aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp18
-rw-r--r--lib/Analysis/AliasSetTracker.cpp36
-rw-r--r--lib/Analysis/Analysis.cpp3
-rw-r--r--lib/Analysis/AssumptionCache.cpp28
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp54
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp65
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp2
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp119
-rw-r--r--lib/Analysis/CFLAndersAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp55
-rw-r--r--lib/Analysis/CMakeLists.txt2
-rw-r--r--lib/Analysis/CallGraph.cpp12
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp18
-rw-r--r--lib/Analysis/ConstantFolding.cpp204
-rw-r--r--lib/Analysis/CostModel.cpp18
-rw-r--r--lib/Analysis/DemandedBits.cpp3
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp5
-rw-r--r--lib/Analysis/DominanceFrontier.cpp10
-rw-r--r--lib/Analysis/IVUsers.cpp88
-rw-r--r--lib/Analysis/InlineCost.cpp271
-rw-r--r--lib/Analysis/InstructionSimplify.cpp680
-rw-r--r--lib/Analysis/IteratedDominanceFrontier.cpp5
-rw-r--r--lib/Analysis/LazyBlockFrequencyInfo.cpp2
-rw-r--r--lib/Analysis/LazyCallGraph.cpp631
-rw-r--r--lib/Analysis/LazyValueInfo.cpp232
-rw-r--r--lib/Analysis/Loads.cpp36
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp129
-rw-r--r--lib/Analysis/LoopAnalysisManager.cpp23
-rw-r--r--lib/Analysis/LoopInfo.cpp24
-rw-r--r--lib/Analysis/LoopPass.cpp2
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp136
-rw-r--r--lib/Analysis/MemoryLocation.cpp4
-rw-r--r--lib/Analysis/MemorySSA.cpp (renamed from lib/Transforms/Utils/MemorySSA.cpp)808
-rw-r--r--lib/Analysis/MemorySSAUpdater.cpp494
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp145
-rw-r--r--lib/Analysis/OptimizationDiagnosticInfo.cpp186
-rw-r--r--lib/Analysis/PostDominators.cpp9
-rw-r--r--lib/Analysis/ProfileSummaryInfo.cpp114
-rw-r--r--lib/Analysis/RegionInfo.cpp9
-rw-r--r--lib/Analysis/RegionPass.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp649
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp3
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp299
-rw-r--r--lib/Analysis/SparsePropagation.cpp2
-rw-r--r--lib/Analysis/TargetLibraryInfo.cpp1152
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp56
-rw-r--r--lib/Analysis/TypeMetadataUtils.cpp11
-rw-r--r--lib/Analysis/ValueTracking.cpp692
-rw-r--r--lib/Analysis/VectorUtils.cpp85
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp206
-rw-r--r--lib/AsmParser/LLParser.h10
-rw-r--r--lib/AsmParser/LLToken.h1
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp658
-rw-r--r--lib/Bitcode/Reader/MetadataLoader.cpp36
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp155
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp11
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h22
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp6
-rw-r--r--lib/CodeGen/Analysis.cpp24
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp195
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp68
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp59
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h13
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp107
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp28
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp212
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h9
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp123
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp248
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h126
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp177
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h40
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp21
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp4
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp4
-rw-r--r--lib/CodeGen/BranchCoalescing.cpp758
-rw-r--r--lib/CodeGen/BranchFolding.cpp163
-rw-r--r--lib/CodeGen/BranchFolding.h43
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp4
-rw-r--r--lib/CodeGen/BuiltinGCs.cpp9
-rw-r--r--lib/CodeGen/CMakeLists.txt8
-rw-r--r--lib/CodeGen/CallingConvLower.cpp3
-rw-r--r--lib/CodeGen/CodeGen.cpp15
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp928
-rw-r--r--lib/CodeGen/CountingFunctionInserter.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp7
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp2
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp2
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp472
-rw-r--r--lib/CodeGen/FEntryInserter.cpp55
-rw-r--r--lib/CodeGen/FaultMaps.cpp15
-rw-r--r--lib/CodeGen/GCStrategy.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/CMakeLists.txt1
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp37
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp755
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp98
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp41
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp84
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp399
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp31
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp266
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp27
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp9
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp95
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp50
-rw-r--r--lib/CodeGen/IfConversion.cpp111
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp206
-rw-r--r--lib/CodeGen/InlineSpiller.cpp5
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--lib/CodeGen/LLVMBuild.txt2
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp15
-rw-r--r--lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp97
-rw-r--r--lib/CodeGen/LexicalScopes.cpp41
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp168
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp4
-rw-r--r--lib/CodeGen/LiveInterval.cpp33
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp162
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp41
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp12
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp41
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp13
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp27
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp126
-rw-r--r--lib/CodeGen/LiveVariables.cpp4
-rw-r--r--lib/CodeGen/LowLevelType.cpp55
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp151
-rw-r--r--lib/CodeGen/MIRParser/MIParser.h9
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp52
-rw-r--r--lib/CodeGen/MIRPrinter.cpp53
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp57
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp85
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp1047
-rw-r--r--lib/CodeGen/MachineCombiner.cpp48
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp12
-rw-r--r--lib/CodeGen/MachineDominators.cpp28
-rw-r--r--lib/CodeGen/MachineFunction.cpp7
-rw-r--r--lib/CodeGen/MachineInstr.cpp104
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp16
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp4
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp1
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp100
-rw-r--r--lib/CodeGen/MachineOutliner.cpp1251
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp25
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp22
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp76
-rw-r--r--lib/CodeGen/MachineScheduler.cpp256
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp66
-rw-r--r--lib/CodeGen/MachineVerifier.cpp33
-rw-r--r--lib/CodeGen/PatchableFunction.cpp2
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp2
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp19
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp5
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp2
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp92
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp60
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp24
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp50
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp289
-rw-r--r--lib/CodeGen/RegisterPressure.cpp57
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp81
-rw-r--r--lib/CodeGen/ResetMachineFunctionPass.cpp15
-rw-r--r--lib/CodeGen/SafeStack.cpp2
-rw-r--r--lib/CodeGen/SafeStackColoring.cpp2
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp442
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp209
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp2429
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp101
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp120
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp24
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h15
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp25
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp308
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp42
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp561
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp996
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h87
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp613
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp172
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp30
-rw-r--r--lib/CodeGen/SlotIndexes.cpp42
-rw-r--r--lib/CodeGen/SplitKit.cpp136
-rw-r--r--lib/CodeGen/SplitKit.h11
-rw-r--r--lib/CodeGen/StackColoring.cpp28
-rw-r--r--lib/CodeGen/StackMaps.cpp24
-rw-r--r--lib/CodeGen/StackProtector.cpp58
-rw-r--r--lib/CodeGen/TailDuplicator.cpp3
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp2
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp10
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp32
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp229
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp8
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp28
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp16
-rw-r--r--lib/CodeGen/TargetSchedule.cpp110
-rw-r--r--lib/CodeGen/TargetSubtargetInfo.cpp46
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp4
-rw-r--r--lib/CodeGen/VirtRegMap.cpp49
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp6
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp7
-rw-r--r--lib/DebugInfo/CodeView/CMakeLists.txt3
-rw-r--r--lib/DebugInfo/CodeView/CVSymbolVisitor.cpp11
-rw-r--r--lib/DebugInfo/CodeView/CVTypeDumper.cpp10
-rw-r--r--lib/DebugInfo/CodeView/CVTypeVisitor.cpp77
-rw-r--r--lib/DebugInfo/CodeView/CodeViewError.cpp2
-rw-r--r--lib/DebugInfo/CodeView/CodeViewRecordIO.cpp36
-rw-r--r--lib/DebugInfo/CodeView/Formatters.cpp37
-rw-r--r--lib/DebugInfo/CodeView/ModuleSubstream.cpp12
-rw-r--r--lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp36
-rw-r--r--lib/DebugInfo/CodeView/RecordSerialization.cpp22
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp4
-rw-r--r--lib/DebugInfo/CodeView/SymbolSerializer.cpp52
-rw-r--r--lib/DebugInfo/CodeView/TypeDatabase.cpp6
-rw-r--r--lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp20
-rw-r--r--lib/DebugInfo/CodeView/TypeDumpVisitor.cpp46
-rw-r--r--lib/DebugInfo/CodeView/TypeRecord.cpp213
-rw-r--r--lib/DebugInfo/CodeView/TypeRecordMapping.cpp26
-rw-r--r--lib/DebugInfo/CodeView/TypeSerializer.cpp13
-rw-r--r--lib/DebugInfo/CodeView/TypeStreamMerger.cpp416
-rw-r--r--lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp9
-rw-r--r--lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp14
-rw-r--r--lib/DebugInfo/DWARF/DWARFCompileUnit.cpp6
-rw-r--r--lib/DebugInfo/DWARF/DWARFContext.cpp296
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp6
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp7
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugAranges.cpp10
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugFrame.cpp23
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp18
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLine.cpp11
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugLoc.cpp14
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugMacro.cpp6
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp10
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp5
-rw-r--r--lib/DebugInfo/DWARF/DWARFDie.cpp196
-rw-r--r--lib/DebugInfo/DWARF/DWARFFormValue.cpp63
-rw-r--r--lib/DebugInfo/DWARF/DWARFGdbIndex.cpp12
-rw-r--r--lib/DebugInfo/DWARF/DWARFTypeUnit.cpp21
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnit.cpp59
-rw-r--r--lib/DebugInfo/DWARF/DWARFUnitIndex.cpp14
-rw-r--r--lib/DebugInfo/DWARF/SyntaxHighlighting.cpp18
-rw-r--r--lib/DebugInfo/DWARF/SyntaxHighlighting.h25
-rw-r--r--lib/DebugInfo/MSF/CMakeLists.txt2
-rw-r--r--lib/DebugInfo/MSF/MappedBlockStream.cpp74
-rw-r--r--lib/DebugInfo/MSF/StreamReader.cpp156
-rw-r--r--lib/DebugInfo/MSF/StreamWriter.cpp98
-rw-r--r--lib/DebugInfo/PDB/CMakeLists.txt56
-rw-r--r--lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp23
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStream.cpp (renamed from lib/DebugInfo/PDB/Raw/DbiStream.cpp)56
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp (renamed from lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp)135
-rw-r--r--lib/DebugInfo/PDB/Native/EnumTables.cpp (renamed from lib/DebugInfo/PDB/Raw/EnumTables.cpp)4
-rw-r--r--lib/DebugInfo/PDB/Native/GSI.cpp (renamed from lib/DebugInfo/PDB/Raw/GSI.cpp)20
-rw-r--r--lib/DebugInfo/PDB/Native/GSI.h (renamed from lib/DebugInfo/PDB/Raw/GSI.h)20
-rw-r--r--lib/DebugInfo/PDB/Native/GlobalsStream.cpp (renamed from lib/DebugInfo/PDB/Raw/GlobalsStream.cpp)6
-rw-r--r--lib/DebugInfo/PDB/Native/Hash.cpp (renamed from lib/DebugInfo/PDB/Raw/Hash.cpp)2
-rw-r--r--lib/DebugInfo/PDB/Native/HashTable.cpp302
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStream.cpp126
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp (renamed from lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp)43
-rw-r--r--lib/DebugInfo/PDB/Native/ModInfo.cpp (renamed from lib/DebugInfo/PDB/Raw/ModInfo.cpp)15
-rw-r--r--lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp136
-rw-r--r--lib/DebugInfo/PDB/Native/ModStream.cpp (renamed from lib/DebugInfo/PDB/Raw/ModStream.cpp)20
-rw-r--r--lib/DebugInfo/PDB/Native/NamedStreamMap.cpp135
-rw-r--r--lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp43
-rw-r--r--lib/DebugInfo/PDB/Native/NativeEnumModules.cpp52
-rw-r--r--lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp79
-rw-r--r--lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp706
-rw-r--r--lib/DebugInfo/PDB/Native/NativeSession.cpp146
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp (renamed from lib/DebugInfo/PDB/Raw/PDBFile.cpp)107
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp (renamed from lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp)64
-rw-r--r--lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp119
-rw-r--r--lib/DebugInfo/PDB/Native/PublicsStream.cpp (renamed from lib/DebugInfo/PDB/Raw/PublicsStream.cpp)12
-rw-r--r--lib/DebugInfo/PDB/Native/RawError.cpp (renamed from lib/DebugInfo/PDB/Raw/RawError.cpp)4
-rw-r--r--lib/DebugInfo/PDB/Native/StringTable.cpp (renamed from lib/DebugInfo/PDB/Raw/NameHashTable.cpp)45
-rw-r--r--lib/DebugInfo/PDB/Native/StringTableBuilder.cpp102
-rw-r--r--lib/DebugInfo/PDB/Native/SymbolStream.cpp (renamed from lib/DebugInfo/PDB/Raw/SymbolStream.cpp)13
-rw-r--r--lib/DebugInfo/PDB/Native/TpiHashing.cpp (renamed from lib/DebugInfo/PDB/Raw/TpiHashing.cpp)6
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp (renamed from lib/DebugInfo/PDB/Raw/TpiStream.cpp)51
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp (renamed from lib/DebugInfo/PDB/Raw/TpiStreamBuilder.cpp)108
-rw-r--r--lib/DebugInfo/PDB/PDB.cpp10
-rw-r--r--lib/DebugInfo/PDB/PDBExtras.cpp26
-rw-r--r--lib/DebugInfo/PDB/PDBSymbol.cpp36
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolBlock.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompiland.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolCustom.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolData.cpp6
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolExe.cpp15
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFunc.cpp21
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolLabel.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolThunk.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp10
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp11
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp17
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp10
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp12
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp4
-rw-r--r--lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp4
-rw-r--r--lib/DebugInfo/PDB/Raw/InfoStream.cpp77
-rw-r--r--lib/DebugInfo/PDB/Raw/NameMap.cpp163
-rw-r--r--lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp108
-rw-r--r--lib/DebugInfo/PDB/Raw/RawSession.cpp136
-rw-r--r--lib/DebugInfo/PDB/UDTLayout.cpp335
-rw-r--r--lib/DebugInfo/Symbolize/DIPrinter.cpp14
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp35
-rw-r--r--lib/DebugInfo/Symbolize/SymbolizableObjectFile.h20
-rw-r--r--lib/Demangle/ItaniumDemangle.cpp84
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp18
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp2
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt2
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp6
-rw-r--r--lib/ExecutionEngine/Orc/CMakeLists.txt1
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindingsStack.h4
-rw-r--r--lib/ExecutionEngine/Orc/OrcError.cpp15
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h8
-rw-r--r--lib/ExecutionEngine/Orc/RPCUtils.cpp55
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp30
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp211
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h21
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h13
-rw-r--r--lib/Fuzzer/CMakeLists.txt9
-rw-r--r--lib/Fuzzer/FuzzerCorpus.h26
-rw-r--r--lib/Fuzzer/FuzzerDefs.h33
-rw-r--r--lib/Fuzzer/FuzzerDictionary.h7
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp195
-rw-r--r--lib/Fuzzer/FuzzerExtFunctions.def10
-rw-r--r--lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp60
-rw-r--r--lib/Fuzzer/FuzzerExtraCounters.cpp41
-rw-r--r--lib/Fuzzer/FuzzerFlags.def15
-rw-r--r--lib/Fuzzer/FuzzerIO.cpp7
-rw-r--r--lib/Fuzzer/FuzzerIO.h9
-rw-r--r--lib/Fuzzer/FuzzerIOPosix.cpp29
-rw-r--r--lib/Fuzzer/FuzzerIOWindows.cpp45
-rw-r--r--lib/Fuzzer/FuzzerInterface.h2
-rw-r--r--lib/Fuzzer/FuzzerInternal.h41
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp243
-rw-r--r--lib/Fuzzer/FuzzerMerge.cpp87
-rw-r--r--lib/Fuzzer/FuzzerMerge.h12
-rw-r--r--lib/Fuzzer/FuzzerMutate.cpp88
-rw-r--r--lib/Fuzzer/FuzzerMutate.h13
-rw-r--r--lib/Fuzzer/FuzzerOptions.h2
-rw-r--r--lib/Fuzzer/FuzzerRandom.h10
-rw-r--r--lib/Fuzzer/FuzzerShmem.h69
-rw-r--r--lib/Fuzzer/FuzzerShmemPosix.cpp103
-rw-r--r--lib/Fuzzer/FuzzerShmemWindows.cpp64
-rw-r--r--lib/Fuzzer/FuzzerTracePC.cpp246
-rw-r--r--lib/Fuzzer/FuzzerTracePC.h115
-rw-r--r--lib/Fuzzer/FuzzerTraceState.cpp175
-rw-r--r--lib/Fuzzer/FuzzerUtil.h4
-rw-r--r--lib/Fuzzer/FuzzerUtilPosix.cpp8
-rw-r--r--lib/Fuzzer/FuzzerUtilWindows.cpp18
-rw-r--r--lib/Fuzzer/FuzzerValueBitMap.h21
-rw-r--r--lib/Fuzzer/afl/afl_driver.cpp7
-rwxr-xr-xlib/Fuzzer/build.sh3
-rw-r--r--lib/Fuzzer/test/AbsNegAndConstant64Test.cpp2
-rw-r--r--lib/Fuzzer/test/BadStrcmpTest.cpp19
-rw-r--r--lib/Fuzzer/test/BogusInitializeTest.cpp15
-rw-r--r--lib/Fuzzer/test/CMakeLists.txt107
-rw-r--r--lib/Fuzzer/test/CustomCrossOverAndMutateTest.cpp34
-rw-r--r--lib/Fuzzer/test/CxxStringEqTest.cpp25
-rw-r--r--lib/Fuzzer/test/DSO1.cpp4
-rw-r--r--lib/Fuzzer/test/DSO2.cpp4
-rw-r--r--lib/Fuzzer/test/EquivalenceATest.cpp17
-rw-r--r--lib/Fuzzer/test/EquivalenceBTest.cpp27
-rw-r--r--lib/Fuzzer/test/FuzzerUnittest.cpp43
-rw-r--r--lib/Fuzzer/test/LargeTest.cpp37
-rw-r--r--lib/Fuzzer/test/LoadTest.cpp2
-rw-r--r--lib/Fuzzer/test/Memcmp64BytesTest.cpp20
-rw-r--r--lib/Fuzzer/test/NotinstrumentedTest.cpp (renamed from lib/Fuzzer/test/UninstrumentedTest.cpp)0
-rw-r--r--lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp2
-rw-r--r--lib/Fuzzer/test/RepeatedMemcmp.cpp17
-rw-r--r--lib/Fuzzer/test/SimpleCmpTest.cpp9
-rw-r--r--lib/Fuzzer/test/SingleByteInputTest.cpp17
-rw-r--r--lib/Fuzzer/test/SingleStrcmpTest.cpp12
-rw-r--r--lib/Fuzzer/test/SingleStrncmpTest.cpp3
-rw-r--r--lib/Fuzzer/test/SwapCmpTest.cpp3
-rw-r--r--lib/Fuzzer/test/TableLookupTest.cpp45
-rw-r--r--lib/Fuzzer/test/TwoDifferentBugsTest.cpp22
-rw-r--r--lib/Fuzzer/test/afl-driver-extra-stats.test2
-rw-r--r--lib/Fuzzer/test/afl-driver-stderr.test2
-rw-r--r--lib/Fuzzer/test/bad-strcmp.test1
-rw-r--r--lib/Fuzzer/test/coverage.test4
-rw-r--r--lib/Fuzzer/test/cxxstring.test2
-rw-r--r--lib/Fuzzer/test/disable-leaks.test4
-rw-r--r--lib/Fuzzer/test/dump_coverage.test26
-rw-r--r--lib/Fuzzer/test/equivalence-signals.test9
-rw-r--r--lib/Fuzzer/test/equivalence.test8
-rw-r--r--lib/Fuzzer/test/extra-counters.test6
-rw-r--r--lib/Fuzzer/test/fuzzer-customcrossover.test2
-rw-r--r--lib/Fuzzer/test/fuzzer-customcrossoverandmutate.test1
-rw-r--r--lib/Fuzzer/test/fuzzer-dirs.test4
-rw-r--r--lib/Fuzzer/test/fuzzer-jobs.test29
-rw-r--r--lib/Fuzzer/test/fuzzer-leak.test2
-rw-r--r--lib/Fuzzer/test/fuzzer-oom.test6
-rw-r--r--lib/Fuzzer/test/fuzzer-segv.test2
-rw-r--r--lib/Fuzzer/test/fuzzer-singleinputs.test2
-rw-r--r--lib/Fuzzer/test/fuzzer-traces-hooks.test26
-rw-r--r--lib/Fuzzer/test/fuzzer.test9
-rw-r--r--lib/Fuzzer/test/lit.cfg26
-rw-r--r--lib/Fuzzer/test/lit.site.cfg.in1
-rw-r--r--lib/Fuzzer/test/merge-posix.test23
-rw-r--r--lib/Fuzzer/test/merge-summary.test15
-rw-r--r--lib/Fuzzer/test/merge.test27
-rw-r--r--lib/Fuzzer/test/minimize_crash.test9
-rw-r--r--lib/Fuzzer/test/minimize_two_crashes.test16
-rw-r--r--lib/Fuzzer/test/no-coverage/CMakeLists.txt20
-rw-r--r--lib/Fuzzer/test/trace-malloc-2.test8
-rw-r--r--lib/Fuzzer/test/trace-malloc.test5
-rw-r--r--lib/Fuzzer/test/trace-pc.test2
-rw-r--r--lib/Fuzzer/test/trace-pc/CMakeLists.txt13
-rw-r--r--lib/Fuzzer/test/ubsan/CMakeLists.txt3
-rw-r--r--lib/Fuzzer/test/ulimit.test2
-rw-r--r--lib/Fuzzer/test/uninstrumented/CMakeLists.txt3
-rw-r--r--lib/Fuzzer/test/value-profile-div.test2
-rw-r--r--lib/Fuzzer/test/value-profile-mem.test2
-rw-r--r--lib/Fuzzer/test/value-profile-strcmp.test2
-rw-r--r--lib/Fuzzer/test/value-profile-strncmp.test2
-rw-r--r--lib/IR/AsmWriter.cpp89
-rw-r--r--lib/IR/AttributeImpl.h147
-rw-r--r--lib/IR/AttributeSetNode.h106
-rw-r--r--lib/IR/Attributes.cpp750
-rw-r--r--lib/IR/AutoUpgrade.cpp867
-rw-r--r--lib/IR/BasicBlock.cpp62
-rw-r--r--lib/IR/Comdat.cpp8
-rw-r--r--lib/IR/ConstantFold.cpp25
-rw-r--r--lib/IR/ConstantRange.cpp40
-rw-r--r--lib/IR/Constants.cpp2
-rw-r--r--lib/IR/Core.cpp68
-rw-r--r--lib/IR/DIBuilder.cpp55
-rw-r--r--lib/IR/DataLayout.cpp119
-rw-r--r--lib/IR/DebugInfo.cpp88
-rw-r--r--lib/IR/DebugInfoMetadata.cpp32
-rw-r--r--lib/IR/DebugLoc.cpp4
-rw-r--r--lib/IR/DiagnosticInfo.cpp186
-rw-r--r--lib/IR/Dominators.cpp13
-rw-r--r--lib/IR/Function.cpp225
-rw-r--r--lib/IR/GCOV.cpp54
-rw-r--r--lib/IR/Globals.cpp20
-rw-r--r--lib/IR/IRBuilder.cpp14
-rw-r--r--lib/IR/IRPrintingPasses.cpp6
-rw-r--r--lib/IR/InlineAsm.cpp49
-rw-r--r--lib/IR/Instruction.cpp122
-rw-r--r--lib/IR/Instructions.cpp127
-rw-r--r--lib/IR/IntrinsicInst.cpp32
-rw-r--r--lib/IR/LLVMContext.cpp1
-rw-r--r--lib/IR/LLVMContextImpl.cpp7
-rw-r--r--lib/IR/LLVMContextImpl.h30
-rw-r--r--lib/IR/MDBuilder.cpp13
-rw-r--r--lib/IR/Mangler.cpp32
-rw-r--r--lib/IR/Metadata.cpp73
-rw-r--r--lib/IR/Module.cpp60
-rw-r--r--lib/IR/Operator.cpp14
-rw-r--r--lib/IR/OptBisect.cpp18
-rw-r--r--lib/IR/Pass.cpp2
-rw-r--r--lib/IR/PassManager.cpp2
-rw-r--r--lib/IR/Statepoint.cpp7
-rw-r--r--lib/IR/Type.cpp6
-rw-r--r--lib/IR/TypeFinder.cpp13
-rw-r--r--lib/IR/Value.cpp47
-rw-r--r--lib/IR/ValueSymbolTable.cpp13
-rw-r--r--lib/IR/Verifier.cpp532
-rw-r--r--lib/LTO/CMakeLists.txt51
-rw-r--r--lib/LTO/Caching.cpp82
-rw-r--r--lib/LTO/LTO.cpp424
-rw-r--r--lib/LTO/LTOBackend.cpp117
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp32
-rw-r--r--lib/LTO/LTOModule.cpp11
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp59
-rw-r--r--lib/LTO/UpdateCompilerUsed.cpp2
-rw-r--r--lib/LibDriver/LibDriver.cpp2
-rw-r--r--lib/Linker/IRMover.cpp24
-rw-r--r--lib/Linker/LinkModules.cpp105
-rw-r--r--lib/MC/CMakeLists.txt5
-rw-r--r--lib/MC/ConstantPools.cpp7
-rw-r--r--lib/MC/ELFObjectWriter.cpp113
-rw-r--r--lib/MC/MCAsmBackend.cpp13
-rw-r--r--lib/MC/MCAsmInfo.cpp60
-rw-r--r--lib/MC/MCAsmInfoCOFF.cpp17
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp7
-rw-r--r--lib/MC/MCAsmInfoELF.cpp6
-rw-r--r--lib/MC/MCAsmInfoWasm.cpp27
-rw-r--r--lib/MC/MCAsmStreamer.cpp45
-rw-r--r--lib/MC/MCAssembler.cpp54
-rw-r--r--lib/MC/MCCodeEmitter.cpp8
-rw-r--r--lib/MC/MCContext.cpp204
-rw-r--r--lib/MC/MCDisassembler/MCDisassembler.cpp7
-rw-r--r--lib/MC/MCDisassembler/MCRelocationInfo.cpp11
-rw-r--r--lib/MC/MCDisassembler/MCSymbolizer.cpp5
-rw-r--r--lib/MC/MCDwarf.cpp60
-rw-r--r--lib/MC/MCELFObjectTargetWriter.cpp3
-rw-r--r--lib/MC/MCELFStreamer.cpp88
-rw-r--r--lib/MC/MCExpr.cpp37
-rw-r--r--lib/MC/MCFragment.cpp40
-rw-r--r--lib/MC/MCInst.cpp5
-rw-r--r--lib/MC/MCInstPrinter.cpp13
-rw-r--r--lib/MC/MCInstrAnalysis.cpp7
-rw-r--r--lib/MC/MCLabel.cpp6
-rw-r--r--lib/MC/MCLinkerOptimizationHint.cpp10
-rw-r--r--lib/MC/MCMachOStreamer.cpp48
-rw-r--r--lib/MC/MCMachObjectTargetWriter.cpp4
-rw-r--r--lib/MC/MCNullStreamer.cpp4
-rw-r--r--lib/MC/MCObjectFileInfo.cpp101
-rw-r--r--lib/MC/MCObjectStreamer.cpp29
-rw-r--r--lib/MC/MCObjectWriter.cpp6
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp10
-rw-r--r--lib/MC/MCParser/AsmParser.cpp95
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp32
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp66
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp233
-rw-r--r--lib/MC/MCParser/MCAsmLexer.cpp12
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp23
-rw-r--r--lib/MC/MCParser/MCAsmParserExtension.cpp10
-rw-r--r--lib/MC/MCParser/MCTargetAsmParser.cpp13
-rw-r--r--lib/MC/MCRegisterInfo.cpp9
-rw-r--r--lib/MC/MCSection.cpp22
-rw-r--r--lib/MC/MCSectionCOFF.cpp11
-rw-r--r--lib/MC/MCSectionELF.cpp45
-rw-r--r--lib/MC/MCSectionMachO.cpp84
-rw-r--r--lib/MC/MCSectionWasm.cpp97
-rw-r--r--lib/MC/MCStreamer.cpp51
-rw-r--r--lib/MC/MCSubtargetInfo.cpp9
-rw-r--r--lib/MC/MCSymbol.cpp14
-rw-r--r--lib/MC/MCSymbolELF.cpp4
-rw-r--r--lib/MC/MCTargetOptions.cpp15
-rw-r--r--lib/MC/MCValue.cpp2
-rw-r--r--lib/MC/MCWasmObjectTargetWriter.cpp27
-rw-r--r--lib/MC/MCWasmStreamer.cpp216
-rw-r--r--lib/MC/MachObjectWriter.cpp17
-rw-r--r--lib/MC/StringTableBuilder.cpp18
-rw-r--r--lib/MC/SubtargetFeature.cpp117
-rw-r--r--lib/MC/WasmObjectWriter.cpp1149
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp759
-rw-r--r--lib/MC/WinCOFFStreamer.cpp31
-rw-r--r--lib/Object/ArchiveWriter.cpp73
-rw-r--r--lib/Object/CMakeLists.txt1
-rw-r--r--lib/Object/Decompressor.cpp5
-rw-r--r--lib/Object/ELFObjectFile.cpp262
-rw-r--r--lib/Object/IRSymtab.cpp231
-rw-r--r--lib/Object/MachOObjectFile.cpp856
-rw-r--r--lib/Object/ModuleSummaryIndexObjectFile.cpp9
-rw-r--r--lib/Object/ModuleSymbolTable.cpp97
-rw-r--r--lib/Object/RecordStreamer.cpp11
-rw-r--r--lib/Object/RecordStreamer.h23
-rw-r--r--lib/Object/WasmObjectFile.cpp664
-rw-r--r--lib/ObjectYAML/CMakeLists.txt7
-rw-r--r--lib/ObjectYAML/DWARFEmitter.cpp321
-rw-r--r--lib/ObjectYAML/DWARFVisitor.cpp178
-rw-r--r--lib/ObjectYAML/DWARFVisitor.h97
-rw-r--r--lib/ObjectYAML/DWARFYAML.cpp15
-rw-r--r--lib/ObjectYAML/ELFYAML.cpp834
-rw-r--r--lib/ObjectYAML/MachOYAML.cpp29
-rw-r--r--lib/ObjectYAML/ObjectYAML.cpp3
-rw-r--r--lib/ObjectYAML/WasmYAML.cpp357
-rw-r--r--lib/Option/Arg.cpp2
-rw-r--r--lib/Option/ArgList.cpp228
-rw-r--r--lib/Option/Option.cpp2
-rw-r--r--lib/Passes/PassBuilder.cpp344
-rw-r--r--lib/Passes/PassRegistry.def8
-rw-r--r--lib/ProfileData/Coverage/CoverageMapping.cpp55
-rw-r--r--lib/ProfileData/Coverage/CoverageMappingReader.cpp74
-rw-r--r--lib/ProfileData/Coverage/CoverageMappingWriter.cpp35
-rw-r--r--lib/ProfileData/InstrProf.cpp174
-rw-r--r--lib/ProfileData/InstrProfReader.cpp49
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp42
-rw-r--r--lib/ProfileData/SampleProf.cpp34
-rw-r--r--lib/ProfileData/SampleProfReader.cpp31
-rw-r--r--lib/ProfileData/SampleProfWriter.cpp70
-rw-r--r--lib/Support/APFloat.cpp622
-rw-r--r--lib/Support/APInt.cpp938
-rw-r--r--lib/Support/ARMAttributeParser.cpp708
-rw-r--r--lib/Support/BinaryStreamError.cpp56
-rw-r--r--lib/Support/BinaryStreamReader.cpp95
-rw-r--r--lib/Support/BinaryStreamWriter.cpp68
-rw-r--r--lib/Support/BranchProbability.cpp2
-rw-r--r--lib/Support/CMakeLists.txt13
-rw-r--r--lib/Support/CachePruning.cpp97
-rw-r--r--lib/Support/Chrono.cpp7
-rw-r--r--lib/Support/CommandLine.cpp66
-rw-r--r--lib/Support/Compression.cpp83
-rw-r--r--lib/Support/DebugCounter.cpp108
-rw-r--r--lib/Support/Dwarf.cpp11
-rw-r--r--lib/Support/DynamicLibrary.cpp21
-rw-r--r--lib/Support/FileOutputBuffer.cpp2
-rw-r--r--lib/Support/Host.cpp447
-rw-r--r--lib/Support/LockFileManager.cpp6
-rw-r--r--lib/Support/LowLevelType.cpp47
-rw-r--r--lib/Support/MD5.cpp25
-rw-r--r--lib/Support/ManagedStatic.cpp2
-rw-r--r--lib/Support/MemoryBuffer.cpp34
-rw-r--r--lib/Support/Path.cpp485
-rw-r--r--lib/Support/RWMutex.cpp19
-rw-r--r--lib/Support/Signals.cpp1
-rw-r--r--lib/Support/SourceMgr.cpp27
-rw-r--r--lib/Support/StringRef.cpp13
-rw-r--r--lib/Support/TargetParser.cpp4
-rw-r--r--lib/Support/Threading.cpp123
-rw-r--r--lib/Support/Timer.cpp19
-rw-r--r--lib/Support/Triple.cpp5
-rw-r--r--lib/Support/Twine.cpp4
-rw-r--r--lib/Support/Unix/Path.inc273
-rw-r--r--lib/Support/Unix/Signals.inc18
-rw-r--r--lib/Support/Unix/Threading.inc215
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc34
-rw-r--r--lib/Support/Windows/Mutex.inc11
-rw-r--r--lib/Support/Windows/Path.inc306
-rw-r--r--lib/Support/Windows/Process.inc1
-rw-r--r--lib/Support/Windows/Program.inc1
-rw-r--r--lib/Support/Windows/RWMutex.inc13
-rw-r--r--lib/Support/Windows/Signals.inc2
-rw-r--r--lib/Support/Windows/ThreadLocal.inc11
-rw-r--r--lib/Support/Windows/Threading.inc109
-rw-r--r--lib/Support/YAMLTraits.cpp17
-rw-r--r--lib/Support/raw_ostream.cpp10
-rw-r--r--lib/TableGen/Record.cpp10
-rw-r--r--lib/TableGen/TGParser.cpp4
-rw-r--r--lib/Target/AArch64/AArch64.h6
-rw-r--r--lib/Target/AArch64/AArch64.td116
-rw-r--r--lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp49
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp23
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.cpp182
-rw-r--r--lib/Target/AArch64/AArch64CallLowering.h27
-rw-r--r--lib/Target/AArch64/AArch64ConditionOptimizer.cpp8
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp11
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp65
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp102
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp45
-rw-r--r--lib/Target/AArch64/AArch64GenRegisterBankInfo.def419
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp47
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp338
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h21
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td69
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp391
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h39
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td49
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.cpp337
-rw-r--r--lib/Target/AArch64/AArch64InstructionSelector.h49
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.cpp133
-rw-r--r--lib/Target/AArch64/AArch64LegalizerInfo.h7
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp73
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.cpp272
-rw-r--r--lib/Target/AArch64/AArch64MacroFusion.h29
-rw-r--r--lib/Target/AArch64/AArch64RedundantCopyElimination.cpp358
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.cpp213
-rw-r--r--lib/Target/AArch64/AArch64RegisterBankInfo.h77
-rw-r--r--lib/Target/AArch64/AArch64RegisterBanks.td20
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp16
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td4
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkor.td106
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td523
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorWriteRes.td361
-rw-r--r--lib/Target/AArch64/AArch64SchedKryoDetails.td62
-rw-r--r--lib/Target/AArch64/AArch64SchedM1.td3
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX.td352
-rw-r--r--lib/Target/AArch64/AArch64SchedThunderX2T99.td (renamed from lib/Target/AArch64/AArch64SchedVulcan.td)354
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp14
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp22
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h33
-rw-r--r--lib/Target/AArch64/AArch64SystemOperands.td134
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp36
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h2
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp56
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h16
-rw-r--r--lib/Target/AArch64/AArch64VectorByElementOpt.cpp25
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp475
-rw-r--r--lib/Target/AArch64/CMakeLists.txt2
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp281
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h10
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp92
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp23
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp11
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp30
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h78
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h95
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td108
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp147
-rw-r--r--lib/Target/AMDGPU/AMDGPUAliasAnalysis.h102
-rw-r--r--lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp14
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp52
-rw-r--r--lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp6
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp212
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h123
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.cpp134
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallLowering.h10
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallingConv.td30
-rw-r--r--lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp120
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.cpp36
-rw-r--r--lib/Target/AMDGPU/AMDGPUFrameLowering.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def62
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp151
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp446
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h62
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td74
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp424
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructionSelector.h67
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstructions.td138
-rw-r--r--lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp13
-rw-r--r--lib/Target/AMDGPU/AMDGPUIntrinsics.td17
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp62
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.h30
-rw-r--r--lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp160
-rw-r--r--lib/Target/AMDGPU/AMDGPUMCInstLower.cpp33
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.cpp18
-rw-r--r--lib/Target/AMDGPU/AMDGPUMachineFunction.h14
-rw-r--r--lib/Target/AMDGPU/AMDGPUPTNote.h5
-rw-r--r--lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp135
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp230
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.h65
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBanks.td16
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterInfo.h3
-rw-r--r--lib/Target/AMDGPU/AMDGPURuntimeMetadata.h193
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp253
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h293
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp230
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.h14
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp4
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetObjectFile.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp241
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h24
-rw-r--r--lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp225
-rw-r--r--lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp27
-rw-r--r--lib/Target/AMDGPU/AMDILCFGStructurizer.cpp105
-rw-r--r--lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp856
-rw-r--r--lib/Target/AMDGPU/BUFInstructions.td10
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt15
-rw-r--r--lib/Target/AMDGPU/DSInstructions.td174
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp39
-rw-r--r--lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h2
-rw-r--r--lib/Target/AMDGPU/EvergreenInstructions.td119
-rw-r--r--lib/Target/AMDGPU/FLATInstructions.td12
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.cpp109
-rw-r--r--lib/Target/AMDGPU/GCNHazardRecognizer.h3
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.cpp528
-rw-r--r--lib/Target/AMDGPU/GCNIterativeScheduler.h118
-rw-r--r--lib/Target/AMDGPU/GCNMinRegStrategy.cpp266
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.cpp355
-rw-r--r--lib/Target/AMDGPU/GCNRegPressure.h170
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.cpp344
-rw-r--r--lib/Target/AMDGPU/GCNSchedStrategy.h62
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp129
-rw-r--r--lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h10
-rw-r--r--lib/Target/AMDGPU/LLVMBuild.txt2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp19
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h422
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp625
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h99
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp14
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h6
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp408
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h26
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp72
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h32
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp30
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td100
-rw-r--r--lib/Target/AMDGPU/Processors.td7
-rw-r--r--lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp48
-rw-r--r--lib/Target/AMDGPU/R600EmitClauseMarkers.cpp49
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.cpp36
-rw-r--r--lib/Target/AMDGPU/R600FrameLowering.h2
-rw-r--r--lib/Target/AMDGPU/R600ISelLowering.cpp122
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.cpp64
-rw-r--r--lib/Target/AMDGPU/R600InstrInfo.h6
-rw-r--r--lib/Target/AMDGPU/R600Instructions.td10
-rw-r--r--lib/Target/AMDGPU/SIAnnotateControlFlow.cpp165
-rw-r--r--lib/Target/AMDGPU/SIDefines.h33
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp35
-rw-r--r--lib/Target/AMDGPU/SIFixVGPRCopies.cpp72
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp275
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp138
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.h5
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp1780
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h44
-rw-r--r--lib/Target/AMDGPU/SIInsertSkips.cpp66
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp1863
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp104
-rw-r--r--lib/Target/AMDGPU/SIInstrFormats.td19
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp455
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h86
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.td332
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td294
-rw-r--r--lib/Target/AMDGPU/SIIntrinsics.td158
-rw-r--r--lib/Target/AMDGPU/SILoadStoreOptimizer.cpp376
-rw-r--r--lib/Target/AMDGPU/SILowerControlFlow.cpp67
-rw-r--r--lib/Target/AMDGPU/SILowerI1Copies.cpp24
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp135
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h56
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp251
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.h44
-rw-r--r--lib/Target/AMDGPU/SIPeepholeSDWA.cpp713
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp414
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h106
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.td37
-rw-r--r--lib/Target/AMDGPU/SISchedule.td5
-rw-r--r--lib/Target/AMDGPU/SIShrinkInstructions.cpp8
-rw-r--r--lib/Target/AMDGPU/SMInstructions.td10
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td66
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp448
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h217
-rw-r--r--lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h2
-rw-r--r--lib/Target/AMDGPU/VOP1Instructions.td107
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td40
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td92
-rw-r--r--lib/Target/AMDGPU/VOP3PInstructions.td82
-rw-r--r--lib/Target/AMDGPU/VOPCInstructions.td8
-rw-r--r--lib/Target/AMDGPU/VOPInstructions.td80
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp24
-rw-r--r--lib/Target/ARM/ARM.h13
-rw-r--r--lib/Target/ARM/ARM.td64
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp15
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp832
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h52
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp43
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h17
-rw-r--r--lib/Target/ARM/ARMBasicBlockInfo.h21
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp323
-rw-r--r--lib/Target/ARM/ARMCallLowering.h10
-rw-r--r--lib/Target/ARM/ARMComputeBlockSize.cpp10
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp175
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp10
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h16
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp255
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp172
-rw-r--r--lib/Target/ARM/ARMFeatures.h4
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp471
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp278
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1520
-rw-r--r--lib/Target/ARM/ARMISelLowering.h29
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td24
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td364
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td11
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td141
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td562
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td280
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.cpp270
-rw-r--r--lib/Target/ARM/ARMInstructionSelector.h13
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.cpp40
-rw-r--r--lib/Target/ARM/ARMLegalizerInfo.h4
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp152
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp26
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h69
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.cpp208
-rw-r--r--lib/Target/ARM/ARMRegisterBankInfo.h13
-rw-r--r--lib/Target/ARM/ARMRegisterBanks.td14
-rw-r--r--lib/Target/ARM/ARMSchedule.td58
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td43
-rw-r--r--lib/Target/ARM/ARMScheduleR52.td104
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td52
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp39
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp37
-rw-r--r--lib/Target/ARM/ARMSubtarget.h108
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp101
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h20
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp11
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h12
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.h12
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp387
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp139
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp9
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp68
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp52
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp188
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp53
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp30
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp13
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h21
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp15
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp141
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp47
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp21
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp71
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp90
-rw-r--r--lib/Target/ARM/ThumbRegisterInfo.cpp48
-rw-r--r--lib/Target/AVR/AVRAsmPrinter.cpp3
-rw-r--r--lib/Target/AVR/AVRExpandPseudoInsts.cpp17
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp31
-rw-r--r--lib/Target/AVR/AVRInstrInfo.td4
-rw-r--r--lib/Target/AVR/AVRInstrumentFunctions.cpp2
-rw-r--r--lib/Target/AVR/AVRMCInstLower.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp2
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp1
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp1
-rw-r--r--lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h2
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp6
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp18
-rw-r--r--lib/Target/BPF/BPFInstrInfo.td1
-rw-r--r--lib/Target/BPF/BPFMCInstLower.cpp10
-rw-r--r--lib/Target/BPF/BPFMCInstLower.h1
-rw-r--r--lib/Target/BPF/BPFRegisterInfo.cpp28
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp6
-rw-r--r--lib/Target/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp367
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp16
-rw-r--r--lib/Target/Hexagon/BitTracker.h3
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt5
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp1332
-rw-r--r--lib/Target/Hexagon/Hexagon.td40
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp238
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp389
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonBlockRanges.cpp77
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td35
-rw-r--r--lib/Target/Hexagon/HexagonCommonGEP.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp28
-rw-r--r--lib/Target/Hexagon/HexagonDepArch.h10
-rw-r--r--lib/Target/Hexagon/HexagonDepArch.td19
-rw-r--r--lib/Target/Hexagon/HexagonDepDecoders.h64
-rw-r--r--lib/Target/Hexagon/HexagonDepITypes.h53
-rw-r--r--lib/Target/Hexagon/HexagonDepITypes.td48
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrFormats.td4182
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrInfo.td45573
-rw-r--r--lib/Target/Hexagon/HexagonDepMappings.td654
-rw-r--r--lib/Target/Hexagon/HexagonDepOperands.td132
-rw-r--r--lib/Target/Hexagon/HexagonEarlyIfConv.cpp205
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp48
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp52
-rw-r--r--lib/Target/Hexagon/HexagonGenExtract.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp13
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp26
-rw-r--r--lib/Target/Hexagon/HexagonIICHVX.td102
-rw-r--r--lib/Target/Hexagon/HexagonIICScalar.td164
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp320
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp170
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h11
-rw-r--r--lib/Target/Hexagon/HexagonInstrAlias.td652
-rw-r--r--lib/Target/Hexagon/HexagonInstrEnc.td1019
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td169
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td22
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV60.td22
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp225
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td4799
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV3.td215
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td3301
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV5.td497
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV60.td2068
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoVector.td69
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td19
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV60.td2
-rw-r--r--lib/Target/Hexagon/HexagonIsetDx.td728
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp2338
-rw-r--r--lib/Target/Hexagon/HexagonMCInstLower.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td204
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonOperands.td319
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp70
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td147
-rw-r--r--lib/Target/Hexagon/HexagonPseudo.td537
-rw-r--r--lib/Target/Hexagon/HexagonRDFOpt.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp55
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h3
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td109
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td8
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td12
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV55.td11
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV60.td13
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV62.td129
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonSplitDouble.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp1
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h7
-rw-r--r--lib/Target/Hexagon/HexagonSystemInst.td134
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp28
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h1
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp63
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.h4
-rw-r--r--lib/Target/Hexagon/LLVMBuild.txt1
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp224
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h129
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp40
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp3
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp89
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h4
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp299
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h11
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp22
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp45
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp66
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h15
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp46
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp343
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h76
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp83
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h18
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp214
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h20
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp382
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h28
-rw-r--r--lib/Target/Hexagon/RDFCopy.cpp72
-rw-r--r--lib/Target/Hexagon/RDFCopy.h11
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.cpp12
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp405
-rw-r--r--lib/Target/Hexagon/RDFGraph.h159
-rw-r--r--lib/Target/Hexagon/RDFLiveness.cpp291
-rw-r--r--lib/Target/Hexagon/RDFLiveness.h32
-rw-r--r--lib/Target/Hexagon/RDFRegisters.cpp368
-rw-r--r--lib/Target/Hexagon/RDFRegisters.h209
-rw-r--r--lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp2
-rw-r--r--lib/Target/Lanai/InstPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/Lanai/InstPrinter/LLVMBuild.txt2
-rw-r--r--lib/Target/Lanai/LLVMBuild.txt2
-rw-r--r--lib/Target/Lanai/LanaiInstrInfo.cpp4
-rw-r--r--lib/Target/Lanai/LanaiMCInstLower.cpp2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp4
-rw-r--r--lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp6
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp4
-rw-r--r--lib/Target/MSP430/MSP430CallingConv.td8
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp87
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp2
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h10
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp766
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp87
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h39
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp74
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp68
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h15
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp60
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h27
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp16
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp10
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp40
-rw-r--r--lib/Target/Mips/MicroMips64r6InstrInfo.td28
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td33
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp18
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td2
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td6
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td264
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp173
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h16
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp119
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp58
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp106
-rw-r--r--lib/Target/Mips/MipsHazardSchedule.cpp27
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp190
-rw-r--r--lib/Target/Mips/MipsISelLowering.h54
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp32
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h3
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td243
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp44
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp13
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h34
-rw-r--r--lib/Target/Mips/MipsOptionRecord.h17
-rw-r--r--lib/Target/Mips/MipsOs16.cpp2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td39
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp43
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h9
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp241
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp18
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp15
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp7
-rw-r--r--lib/Target/Mips/MipsSubtarget.h9
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp42
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h19
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp12
-rw-r--r--lib/Target/NVPTX/LLVMBuild.txt2
-rw-r--r--lib/Target/NVPTX/NVPTX.h5
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp42
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp2449
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp1694
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h34
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp5
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td530
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td626
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp223
-rw-r--r--lib/Target/NVPTX/NVPTXLowerArgs.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h10
-rw-r--r--lib/Target/NVPTX/NVPTXPeephole.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp52
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td4
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h2
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp9
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp15
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h3
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h4
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp62
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt1
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp32
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp34
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h9
-rw-r--r--lib/Target/PowerPC/PPC.h2
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp98
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp88
-rw-r--r--lib/Target/PowerPC/PPCExpandISEL.cpp458
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp109
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp427
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h71
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td10
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td8
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp31
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td29
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td28
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp6
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--lib/Target/PowerPC/PPCMIPeephole.cpp20
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp5
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h73
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp73
-rw-r--r--lib/Target/PowerPC/PPCScheduleP8.td2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp24
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp43
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h1
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h14
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp14
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.h8
-rw-r--r--lib/Target/PowerPC/PPCVSXCopy.cpp4
-rw-r--r--lib/Target/PowerPC/PPCVSXSwapRemoval.cpp59
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp4
-rw-r--r--lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp5
-rw-r--r--lib/Target/RISCV/RISCVInstrFormats.td3
-rw-r--r--lib/Target/RISCV/RISCVTargetMachine.cpp6
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp56
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp3
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp5
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h8
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp37
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp11
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp5
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h1
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp23
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp8
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp7
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h5
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp25
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp15
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp65
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp115
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp135
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h31
-rw-r--r--lib/Target/SystemZ/SystemZInstrVector.td40
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp83
-rw-r--r--lib/Target/SystemZ/SystemZMachineScheduler.h26
-rw-r--r--lib/Target/SystemZ/SystemZScheduleZ13.td2
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp27
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h17
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp549
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h28
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp5
-rw-r--r--lib/Target/TargetMachine.cpp11
-rw-r--r--lib/Target/WebAssembly/CMakeLists.txt2
-rw-r--r--lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp85
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp14
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h2
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp107
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h31
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp34
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h9
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp47
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp24
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h54
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp166
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h43
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp92
-rw-r--r--lib/Target/WebAssembly/README.txt21
-rw-r--r--lib/Target/WebAssembly/WebAssembly.h1
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp107
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.h77
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGSort.cpp277
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp237
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp25
-rw-r--r--lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp66
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFastISel.cpp30
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp7
-rw-r--r--lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp98
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrCall.td12
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrControl.td5
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrFloat.td4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp6
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.td18
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrMemory.td4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp35
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp130
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.h6
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyPeephole.cpp44
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp34
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp1302
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h37
-rw-r--r--lib/Target/WebAssembly/WebAssemblyStoreResults.cpp2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp26
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp10
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyUtilities.cpp26
-rw-r--r--lib/Target/WebAssembly/WebAssemblyUtilities.h9
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp67
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h12
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp134
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h24
-rw-r--r--lib/Target/X86/CMakeLists.txt10
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp159
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp37
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h81
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp25
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h7
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp2
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp10
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h7
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp27
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp40
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp27
-rw-r--r--lib/Target/X86/X86.h6
-rw-r--r--lib/Target/X86/X86.td52
-rw-r--r--lib/Target/X86/X86AsmPrinter.h4
-rw-r--r--lib/Target/X86/X86CallFrameOptimization.cpp75
-rw-r--r--lib/Target/X86/X86CallLowering.cpp183
-rw-r--r--lib/Target/X86/X86CallLowering.h8
-rw-r--r--lib/Target/X86/X86CallingConv.td2
-rwxr-xr-xlib/Target/X86/X86EvexToVex.cpp40
-rw-r--r--lib/Target/X86/X86ExpandPseudo.cpp26
-rw-r--r--lib/Target/X86/X86FastISel.cpp109
-rw-r--r--lib/Target/X86/X86FixupBWInsts.cpp53
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp18
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp117
-rw-r--r--lib/Target/X86/X86FrameLowering.h3
-rw-r--r--lib/Target/X86/X86GenRegisterBankInfo.def104
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp117
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp4020
-rw-r--r--lib/Target/X86/X86ISelLowering.h61
-rw-r--r--lib/Target/X86/X86Instr3DNow.td22
-rw-r--r--lib/Target/X86/X86InstrAVX512.td1290
-rw-r--r--lib/Target/X86/X86InstrBuilder.h2
-rw-r--r--lib/Target/X86/X86InstrCMovSetCC.td6
-rw-r--r--lib/Target/X86/X86InstrCompiler.td20
-rw-r--r--lib/Target/X86/X86InstrControl.td31
-rw-r--r--lib/Target/X86/X86InstrFMA.td16
-rw-r--r--lib/Target/X86/X86InstrFMA3Info.cpp5
-rw-r--r--lib/Target/X86/X86InstrFMA3Info.h11
-rw-r--r--lib/Target/X86/X86InstrFPStack.td16
-rw-r--r--lib/Target/X86/X86InstrFormats.td10
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td105
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp1565
-rw-r--r--lib/Target/X86/X86InstrInfo.h46
-rw-r--r--lib/Target/X86/X86InstrInfo.td64
-rw-r--r--lib/Target/X86/X86InstrMMX.td39
-rw-r--r--lib/Target/X86/X86InstrMPX.td10
-rw-r--r--lib/Target/X86/X86InstrSSE.td1266
-rw-r--r--lib/Target/X86/X86InstrShiftRotate.td82
-rw-r--r--lib/Target/X86/X86InstrSystem.td25
-rw-r--r--lib/Target/X86/X86InstrTSX.td10
-rwxr-xr-xlib/Target/X86/X86InstrTablesInfo.h1162
-rw-r--r--lib/Target/X86/X86InstrVMX.td20
-rw-r--r--lib/Target/X86/X86InstrXOP.td195
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp516
-rw-r--r--lib/Target/X86/X86InterleavedAccess.cpp3
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h147
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp142
-rw-r--r--lib/Target/X86/X86LegalizerInfo.h43
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp96
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.cpp8
-rw-r--r--lib/Target/X86/X86MacroFusion.cpp271
-rw-r--r--lib/Target/X86/X86MacroFusion.h30
-rw-r--r--lib/Target/X86/X86OptimizeLEAs.cpp18
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.cpp243
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.h81
-rw-r--r--lib/Target/X86/X86RegisterBanks.td17
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp10
-rw-r--r--lib/Target/X86/X86RegisterInfo.td36
-rw-r--r--lib/Target/X86/X86Schedule.td1
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp10
-rw-r--r--lib/Target/X86/X86ShuffleDecodeConstantPool.cpp78
-rw-r--r--lib/Target/X86/X86Subtarget.cpp32
-rw-r--r--lib/Target/X86/X86Subtarget.h71
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp121
-rw-r--r--lib/Target/X86/X86TargetMachine.h16
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp106
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h19
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp118
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp6
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h6
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp16
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h8
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp3
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h1
-rw-r--r--lib/Target/XCore/XCoreMachineFunctionInfo.h44
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp10
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp18
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h9
-rw-r--r--lib/Transforms/Coroutines/CoroElide.cpp5
-rw-r--r--lib/Transforms/Coroutines/CoroFrame.cpp40
-rw-r--r--lib/Transforms/Coroutines/CoroInstr.h5
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp58
-rw-r--r--lib/Transforms/Coroutines/Coroutines.cpp6
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp1309
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp26
-rw-r--r--lib/Transforms/IPO/CrossDSOCFI.cpp7
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp152
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp150
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp110
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp152
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp10
-rw-r--r--lib/Transforms/IPO/GlobalSplit.cpp11
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp8
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp13
-rw-r--r--lib/Transforms/IPO/Inliner.cpp205
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp308
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp299
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp2
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp87
-rw-r--r--lib/Transforms/IPO/SampleProfile.cpp271
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp24
-rw-r--r--lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp261
-rw-r--r--lib/Transforms/IPO/WholeProgramDevirt.cpp764
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp135
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp1192
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp1148
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp169
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp212
-rw-r--r--lib/Transforms/InstCombine/InstCombineInternal.h61
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp145
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp58
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp86
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp703
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp562
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp44
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp285
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp156
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp78
-rw-r--r--lib/Transforms/Instrumentation/EfficiencySanitizer.cpp22
-rw-r--r--lib/Transforms/Instrumentation/IndirectCallPromotion.cpp506
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp170
-rw-r--r--lib/Transforms/Instrumentation/Instrumentation.cpp1
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp103
-rw-r--r--lib/Transforms/Instrumentation/PGOInstrumentation.cpp353
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp100
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp54
-rw-r--r--lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h14
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp1
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp92
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp43
-rw-r--r--lib/Transforms/Scalar/AlignmentFromAssumptions.cpp12
-rw-r--r--lib/Transforms/Scalar/BDCE.cpp4
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt2
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp21
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp25
-rw-r--r--lib/Transforms/Scalar/DCE.cpp9
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp62
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp92
-rw-r--r--lib/Transforms/Scalar/Float2Int.cpp11
-rw-r--r--lib/Transforms/Scalar/GVN.cpp498
-rw-r--r--lib/Transforms/Scalar/GVNHoist.cpp89
-rw-r--r--lib/Transforms/Scalar/GuardWidening.cpp11
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp19
-rw-r--r--lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp44
-rw-r--r--lib/Transforms/Scalar/InferAddressSpaces.cpp (renamed from lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp)568
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp293
-rw-r--r--lib/Transforms/Scalar/LICM.cpp170
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp19
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp127
-rw-r--r--lib/Transforms/Scalar/LoopDistribute.cpp46
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopInterchange.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopLoadElimination.cpp105
-rw-r--r--lib/Transforms/Scalar/LoopPassManager.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopPredication.cpp282
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp49
-rw-r--r--lib/Transforms/Scalar/LoopSimplifyCFG.cpp1
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp40
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp432
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp172
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp301
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp4
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp147
-rw-r--r--lib/Transforms/Scalar/MergedLoadStoreMotion.cpp179
-rw-r--r--lib/Transforms/Scalar/NaryReassociate.cpp12
-rw-r--r--lib/Transforms/Scalar/NewGVN.cpp2750
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp12
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp13
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp59
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp171
-rw-r--r--lib/Transforms/Scalar/SROA.cpp24
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp9
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp19
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp76
-rw-r--r--lib/Transforms/Scalar/Sink.cpp12
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp24
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp9
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp448
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp532
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt4
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp65
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp13
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp19
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp17
-rw-r--r--lib/Transforms/Utils/Evaluator.cpp3
-rw-r--r--lib/Transforms/Utils/FunctionComparator.cpp8
-rw-r--r--lib/Transforms/Utils/FunctionImportUtils.cpp14
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp21
-rw-r--r--lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp2
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp152
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp84
-rw-r--r--lib/Transforms/Utils/LibCallsShrinkWrap.cpp182
-rw-r--r--lib/Transforms/Utils/Local.cpp148
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp40
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp167
-rw-r--r--lib/Transforms/Utils/LoopUnrollPeel.cpp98
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp52
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp25
-rw-r--r--lib/Transforms/Utils/LowerMemIntrinsics.cpp231
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp8
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/MetaRenamer.cpp17
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp23
-rw-r--r--lib/Transforms/Utils/PredicateInfo.cpp782
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp93
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp27
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp171
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp42
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp22
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp399
-rw-r--r--lib/Transforms/Utils/Utils.cpp3
-rw-r--r--lib/Transforms/Utils/VNCoercion.cpp482
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp1
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp77
-rw-r--r--lib/Transforms/Vectorize/LoadStoreVectorizer.cpp13
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2965
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp846
-rw-r--r--lib/XRay/CMakeLists.txt3
-rw-r--r--lib/XRay/InstrumentationMap.cpp198
-rw-r--r--lib/XRay/Trace.cpp373
1492 files changed, 159590 insertions, 64524 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 84da76be98bb..4c29aeaa622f 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -332,8 +332,8 @@ FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) {
ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
const MemoryLocation &Loc) {
- // Be conservative in the face of volatile/atomic.
- if (!L->isUnordered())
+ // Be conservative in the face of atomic.
+ if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered))
return MRI_ModRef;
// If the load address doesn't alias the given address, it doesn't read
@@ -347,8 +347,8 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
const MemoryLocation &Loc) {
- // Be conservative in the face of volatile/atomic.
- if (!S->isUnordered())
+ // Be conservative in the face of atomic.
+ if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered))
return MRI_ModRef;
if (Loc.Ptr) {
@@ -367,6 +367,14 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
return MRI_Mod;
}
+ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
+ // If we know that the location is a constant memory location, the fence
+ // cannot modify this location.
+ if (Loc.Ptr && pointsToConstantMemory(Loc))
+ return MRI_Ref;
+ return MRI_ModRef;
+}
+
ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
const MemoryLocation &Loc) {
@@ -689,7 +697,7 @@ AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
bool llvm::isNoAliasCall(const Value *V) {
if (auto CS = ImmutableCallSite(V))
- return CS.paramHasAttr(0, Attribute::NoAlias);
+ return CS.hasRetAttr(Attribute::NoAlias);
return false;
}
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 701b0e1a5925..16b711a69ec3 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -199,9 +199,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
// Check the unknown instructions...
if (!UnknownInsts.empty()) {
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
- if (AA.getModRefInfo(UnknownInsts[i],
- MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef)
- return true;
+ if (auto *Inst = getUnknownInst(i))
+ if (AA.getModRefInfo(Inst, MemoryLocation(Ptr, Size, AAInfo)) !=
+ MRI_NoModRef)
+ return true;
}
return false;
@@ -217,10 +218,12 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
return false;
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
- ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
- if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
- AA.getModRefInfo(C2, C1) != MRI_NoModRef)
- return true;
+ if (auto *Inst = getUnknownInst(i)) {
+ ImmutableCallSite C1(Inst), C2(Inst);
+ if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
+ AA.getModRefInfo(C2, C1) != MRI_NoModRef)
+ return true;
+ }
}
for (iterator I = begin(), E = end(); I != E; ++I)
@@ -471,7 +474,8 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
// If there are any call sites in the alias set, add them to this AST.
for (unsigned i = 0, e = AS.UnknownInsts.size(); i != e; ++i)
- add(AS.UnknownInsts[i]);
+ if (auto *Inst = AS.getUnknownInst(i))
+ add(Inst);
// Loop over all of the pointers in this alias set.
for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
@@ -489,19 +493,6 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
// dangling pointers to deleted instructions.
//
void AliasSetTracker::deleteValue(Value *PtrVal) {
- // If this is a call instruction, remove the callsite from the appropriate
- // AliasSet (if present).
- if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
- if (Inst->mayReadOrWriteMemory()) {
- // Scan all the alias sets to see if this call site is contained.
- for (iterator I = begin(), E = end(); I != E;) {
- iterator Cur = I++;
- if (!Cur->Forward)
- Cur->removeUnknownInst(*this, Inst);
- }
- }
- }
-
// First, look up the PointerRec for this pointer.
PointerMapType::iterator I = PointerMap.find_as(PtrVal);
if (I == PointerMap.end()) return; // Noop
@@ -633,7 +624,8 @@ void AliasSet::print(raw_ostream &OS) const {
OS << "\n " << UnknownInsts.size() << " Unknown instructions: ";
for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
if (i) OS << ", ";
- UnknownInsts[i]->printAsOperand(OS);
+ if (auto *I = getUnknownInst(i))
+ I->printAsOperand(OS);
}
}
OS << "\n";
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 0e7cf402cdb5..0e0b5c92a918 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -57,6 +57,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeLazyBranchProbabilityInfoPassPass(Registry);
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
+ initializeLazyValueInfoPrinterPass(Registry);
initializeLintPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
initializeMemDepPrinterPass(Registry);
@@ -78,6 +79,8 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeTypeBasedAAWrapperPassPass(Registry);
initializeScopedNoAliasAAWrapperPassPass(Registry);
initializeLCSSAVerificationPassPass(Registry);
+ initializeMemorySSAWrapperPassPass(Registry);
+ initializeMemorySSAPrinterLegacyPassPass(Registry);
}
void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp
index 5851594700a4..1fae94724487 100644
--- a/lib/Analysis/AssumptionCache.cpp
+++ b/lib/Analysis/AssumptionCache.cpp
@@ -24,6 +24,11 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+static cl::opt<bool>
+ VerifyAssumptionCache("verify-assumption-cache", cl::Hidden,
+ cl::desc("Enable verification of assumption cache"),
+ cl::init(false));
+
SmallVector<WeakVH, 1> &AssumptionCache::getOrInsertAffectedValues(Value *V) {
// Try using find_as first to avoid creating extra value handles just for the
// purpose of doing the lookup.
@@ -47,9 +52,11 @@ void AssumptionCache::updateAffectedValues(CallInst *CI) {
} else if (auto *I = dyn_cast<Instruction>(V)) {
Affected.push_back(I);
- if (I->getOpcode() == Instruction::BitCast ||
- I->getOpcode() == Instruction::PtrToInt) {
- auto *Op = I->getOperand(0);
+ // Peek through unary operators to find the source of the condition.
+ Value *Op;
+ if (match(I, m_BitCast(m_Value(Op))) ||
+ match(I, m_PtrToInt(m_Value(Op))) ||
+ match(I, m_Not(m_Value(Op)))) {
if (isa<Instruction>(Op) || isa<Argument>(Op))
Affected.push_back(Op);
}
@@ -229,7 +236,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
}
void AssumptionCacheTracker::verifyAnalysis() const {
-#ifndef NDEBUG
+ // FIXME: In the long term the verifier should not be controllable with a
+ // flag. We should either fix all passes to correctly update the assumption
+ // cache and enable the verifier unconditionally or somehow arrange for the
+ // assumption list to be updated automatically by passes.
+ if (!VerifyAssumptionCache)
+ return;
+
SmallPtrSet<const CallInst *, 4> AssumptionSet;
for (const auto &I : AssumptionCaches) {
for (auto &VH : I.second->assumptions())
@@ -238,11 +251,10 @@ void AssumptionCacheTracker::verifyAnalysis() const {
for (const BasicBlock &B : cast<Function>(*I.first))
for (const Instruction &II : B)
- if (match(&II, m_Intrinsic<Intrinsic::assume>()))
- assert(AssumptionSet.count(cast<CallInst>(&II)) &&
- "Assumption in scanned function not in cache");
+ if (match(&II, m_Intrinsic<Intrinsic::assume>()) &&
+ !AssumptionSet.count(cast<CallInst>(&II)))
+ report_fatal_error("Assumption in scanned function not in cache");
}
-#endif
}
AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) {
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index c8d057949493..09582cf9a71d 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -127,7 +127,9 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
bool RoundToAlign = false) {
uint64_t Size;
- if (getObjectSize(V, Size, DL, &TLI, RoundToAlign))
+ ObjectSizeOpts Opts;
+ Opts.RoundToAlign = RoundToAlign;
+ if (getObjectSize(V, Size, DL, &TLI, Opts))
return Size;
return MemoryLocation::UnknownSize;
}
@@ -635,7 +637,7 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
/// Returns true if this is a writeonly (i.e Mod only) parameter.
static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
const TargetLibraryInfo &TLI) {
- if (CS.paramHasAttr(ArgIdx + 1, Attribute::WriteOnly))
+ if (CS.paramHasAttr(ArgIdx, Attribute::WriteOnly))
return true;
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -644,9 +646,9 @@ static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
// whenever possible.
// FIXME Consider handling this in InferFunctionAttr.cpp together with other
// attributes.
- LibFunc::Func F;
+ LibFunc F;
if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
- F == LibFunc::memset_pattern16 && TLI.has(F))
+ F == LibFunc_memset_pattern16 && TLI.has(F))
if (ArgIdx == 0)
return true;
@@ -664,10 +666,10 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
if (isWriteOnlyParam(CS, ArgIdx, TLI))
return MRI_Mod;
- if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
+ if (CS.paramHasAttr(ArgIdx, Attribute::ReadOnly))
return MRI_Ref;
- if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone))
+ if (CS.paramHasAttr(ArgIdx, Attribute::ReadNone))
return MRI_NoModRef;
return AAResultBase::getArgModRefInfo(CS, ArgIdx);
@@ -749,7 +751,11 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
// as an argument, and itself doesn't capture it.
if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
isNonEscapingLocalObject(Object)) {
- bool PassedAsArg = false;
+
+ // Optimistically assume that call doesn't touch Object and check this
+ // assumption in the following loop.
+ ModRefInfo Result = MRI_NoModRef;
+
unsigned OperandNo = 0;
for (auto CI = CS.data_operands_begin(), CE = CS.data_operands_end();
CI != CE; ++CI, ++OperandNo) {
@@ -761,20 +767,38 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
continue;
+ // Call doesn't access memory through this operand, so we don't care
+ // if it aliases with Object.
+ if (CS.doesNotAccessMemory(OperandNo))
+ continue;
+
// If this is a no-capture pointer argument, see if we can tell that it
- // is impossible to alias the pointer we're checking. If not, we have to
- // assume that the call could touch the pointer, even though it doesn't
- // escape.
+ // is impossible to alias the pointer we're checking.
AliasResult AR =
getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
- if (AR) {
- PassedAsArg = true;
- break;
+
+ // Operand doesnt alias 'Object', continue looking for other aliases
+ if (AR == NoAlias)
+ continue;
+ // Operand aliases 'Object', but call doesn't modify it. Strengthen
+ // initial assumption and keep looking in case if there are more aliases.
+ if (CS.onlyReadsMemory(OperandNo)) {
+ Result = static_cast<ModRefInfo>(Result | MRI_Ref);
+ continue;
+ }
+ // Operand aliases 'Object' but call only writes into it.
+ if (CS.doesNotReadMemory(OperandNo)) {
+ Result = static_cast<ModRefInfo>(Result | MRI_Mod);
+ continue;
}
+ // This operand aliases 'Object' and call reads and writes into it.
+ Result = MRI_ModRef;
+ break;
}
- if (!PassedAsArg)
- return MRI_NoModRef;
+ // Early return if we improved mod ref information
+ if (Result != MRI_ModRef)
+ return Result;
}
// If the CallSite is to malloc or calloc, we can assume that it doesn't
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 4cdbe4d0fcf6..07a2a9229fd5 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -26,7 +26,6 @@ using namespace llvm;
#define DEBUG_TYPE "block-freq"
-#ifndef NDEBUG
static cl::opt<GVDAGType> ViewBlockFreqPropagationDAG(
"view-block-freq-propagation-dags", cl::Hidden,
cl::desc("Pop up a window to show a dag displaying how block "
@@ -55,8 +54,29 @@ cl::opt<unsigned>
"is no less than the max frequency of the "
"function multiplied by this percent."));
+// Command line option to turn on CFG dot dump after profile annotation.
+cl::opt<bool>
+ PGOViewCounts("pgo-view-counts", cl::init(false), cl::Hidden,
+ cl::desc("A boolean option to show CFG dag with "
+ "block profile counts and branch probabilities "
+ "right after PGO profile annotation step. The "
+ "profile counts are computed using branch "
+ "probabilities from the runtime profile data and "
+ "block frequency propagation algorithm. To view "
+ "the raw counts from the profile, use option "
+ "-pgo-view-raw-counts instead. To limit graph "
+ "display to only one function, use filtering option "
+ "-view-bfi-func-name."));
+
namespace llvm {
+static GVDAGType getGVDT() {
+
+ if (PGOViewCounts)
+ return GVDT_Count;
+ return ViewBlockFreqPropagationDAG;
+}
+
template <>
struct GraphTraits<BlockFrequencyInfo *> {
typedef const BasicBlock *NodeRef;
@@ -89,8 +109,7 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase {
std::string getNodeLabel(const BasicBlock *Node,
const BlockFrequencyInfo *Graph) {
- return BFIDOTGTraitsBase::getNodeLabel(Node, Graph,
- ViewBlockFreqPropagationDAG);
+ return BFIDOTGTraitsBase::getNodeLabel(Node, Graph, getGVDT());
}
std::string getNodeAttributes(const BasicBlock *Node,
@@ -107,7 +126,6 @@ struct DOTGraphTraits<BlockFrequencyInfo *> : public BFIDOTGTraitsBase {
};
} // end namespace llvm
-#endif
BlockFrequencyInfo::BlockFrequencyInfo() {}
@@ -132,19 +150,26 @@ BlockFrequencyInfo &BlockFrequencyInfo::operator=(BlockFrequencyInfo &&RHS) {
// template instantiated which is not available in the header.
BlockFrequencyInfo::~BlockFrequencyInfo() {}
+bool BlockFrequencyInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on functions, or the function's
+ // CFG have been preserved.
+ auto PAC = PA.getChecker<BlockFrequencyAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+ PAC.preservedSet<CFGAnalyses>());
+}
+
void BlockFrequencyInfo::calculate(const Function &F,
const BranchProbabilityInfo &BPI,
const LoopInfo &LI) {
if (!BFI)
BFI.reset(new ImplType);
BFI->calculate(F, BPI, LI);
-#ifndef NDEBUG
if (ViewBlockFreqPropagationDAG != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
F.getName().equals(ViewBlockFreqFuncName))) {
view();
}
-#endif
}
BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
@@ -171,16 +196,32 @@ void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
BFI->setBlockFreq(BB, Freq);
}
+void BlockFrequencyInfo::setBlockFreqAndScale(
+ const BasicBlock *ReferenceBB, uint64_t Freq,
+ SmallPtrSetImpl<BasicBlock *> &BlocksToScale) {
+ assert(BFI && "Expected analysis to be available");
+ // Use 128 bits APInt to avoid overflow.
+ APInt NewFreq(128, Freq);
+ APInt OldFreq(128, BFI->getBlockFreq(ReferenceBB).getFrequency());
+ APInt BBFreq(128, 0);
+ for (auto *BB : BlocksToScale) {
+ BBFreq = BFI->getBlockFreq(BB).getFrequency();
+ // Multiply first by NewFreq and then divide by OldFreq
+ // to minimize loss of precision.
+ BBFreq *= NewFreq;
+ // udiv is an expensive operation in the general case. If this ends up being
+ // a hot spot, one of the options proposed in
+ // https://reviews.llvm.org/D28535#650071 could be used to avoid this.
+ BBFreq = BBFreq.udiv(OldFreq);
+ BFI->setBlockFreq(BB, BBFreq.getLimitedValue());
+ }
+ BFI->setBlockFreq(ReferenceBB, Freq);
+}
+
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
void BlockFrequencyInfo::view() const {
-// This code is only for debugging.
-#ifndef NDEBUG
ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
-#else
- errs() << "BlockFrequencyInfo::view is only available in debug builds on "
- "systems with Graphviz or gv!\n";
-#endif // NDEBUG
}
const Function *BlockFrequencyInfo::getFunction() const {
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 9850e02fca22..e5d8c3347c16 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -28,7 +28,9 @@ ScaledNumber<uint64_t> BlockMass::toScaled() const {
return ScaledNumber<uint64_t>(getMass() + 1, -64);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); }
+#endif
static char getHexDigit(int N) {
assert(N < 16);
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 3eabb780398c..5935dec15c70 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -108,11 +108,9 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
/// instruction. This is essentially never taken.
static const uint32_t IH_NONTAKEN_WEIGHT = 1;
-/// \brief Calculate edge weights for successors lead to unreachable.
-///
-/// Predict that a successor which leads necessarily to an
-/// unreachable-terminated block as extremely unlikely.
-bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
+/// \brief Add \p BB to PostDominatedByUnreachable set if applicable.
+void
+BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
const TerminatorInst *TI = BB->getTerminator();
if (TI->getNumSuccessors() == 0) {
if (isa<UnreachableInst>(TI) ||
@@ -122,38 +120,86 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
// never execute.
BB->getTerminatingDeoptimizeCall())
PostDominatedByUnreachable.insert(BB);
- return false;
+ return;
+ }
+
+ // If the terminator is an InvokeInst, check only the normal destination block
+ // as the unwind edge of InvokeInst is also very unlikely taken.
+ if (auto *II = dyn_cast<InvokeInst>(TI)) {
+ if (PostDominatedByUnreachable.count(II->getNormalDest()))
+ PostDominatedByUnreachable.insert(BB);
+ return;
}
+ for (auto *I : successors(BB))
+ // If any of successor is not post dominated then BB is also not.
+ if (!PostDominatedByUnreachable.count(I))
+ return;
+
+ PostDominatedByUnreachable.insert(BB);
+}
+
+/// \brief Add \p BB to PostDominatedByColdCall set if applicable.
+void
+BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
+ assert(!PostDominatedByColdCall.count(BB));
+ const TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0)
+ return;
+
+ // If all of successor are post dominated then BB is also done.
+ if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
+ return PostDominatedByColdCall.count(SuccBB);
+ })) {
+ PostDominatedByColdCall.insert(BB);
+ return;
+ }
+
+ // If the terminator is an InvokeInst, check only the normal destination
+ // block as the unwind edge of InvokeInst is also very unlikely taken.
+ if (auto *II = dyn_cast<InvokeInst>(TI))
+ if (PostDominatedByColdCall.count(II->getNormalDest())) {
+ PostDominatedByColdCall.insert(BB);
+ return;
+ }
+
+ // Otherwise, if the block itself contains a cold function, add it to the
+ // set of blocks post-dominated by a cold call.
+ for (auto &I : *BB)
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
+ if (CI->hasFnAttr(Attribute::Cold)) {
+ PostDominatedByColdCall.insert(BB);
+ return;
+ }
+}
+
+/// \brief Calculate edge weights for successors lead to unreachable.
+///
+/// Predict that a successor which leads necessarily to an
+/// unreachable-terminated block as extremely unlikely.
+bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
+ const TerminatorInst *TI = BB->getTerminator();
+ if (TI->getNumSuccessors() == 0)
+ return false;
+
SmallVector<unsigned, 4> UnreachableEdges;
SmallVector<unsigned, 4> ReachableEdges;
- for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+ for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
if (PostDominatedByUnreachable.count(*I))
UnreachableEdges.push_back(I.getSuccessorIndex());
else
ReachableEdges.push_back(I.getSuccessorIndex());
- }
-
- // If all successors are in the set of blocks post-dominated by unreachable,
- // this block is too.
- if (UnreachableEdges.size() == TI->getNumSuccessors())
- PostDominatedByUnreachable.insert(BB);
// Skip probabilities if this block has a single successor or if all were
// reachable.
if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
return false;
- // If the terminator is an InvokeInst, check only the normal destination block
- // as the unwind edge of InvokeInst is also very unlikely taken.
- if (auto *II = dyn_cast<InvokeInst>(TI))
- if (PostDominatedByUnreachable.count(II->getNormalDest())) {
- PostDominatedByUnreachable.insert(BB);
- // Return false here so that edge weights for InvokeInst could be decided
- // in calcInvokeHeuristics().
- return false;
- }
+ // Return false here so that edge weights for InvokeInst could be decided
+ // in calcInvokeHeuristics().
+ if (isa<InvokeInst>(TI))
+ return false;
if (ReachableEdges.empty()) {
BranchProbability Prob(1, UnreachableEdges.size());
@@ -263,31 +309,10 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
else
NormalEdges.push_back(I.getSuccessorIndex());
- // If all successors are in the set of blocks post-dominated by cold calls,
- // this block is in the set post-dominated by cold calls.
- if (ColdEdges.size() == TI->getNumSuccessors())
- PostDominatedByColdCall.insert(BB);
- else {
- // Otherwise, if the block itself contains a cold function, add it to the
- // set of blocks postdominated by a cold call.
- assert(!PostDominatedByColdCall.count(BB));
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (const CallInst *CI = dyn_cast<CallInst>(I))
- if (CI->hasFnAttr(Attribute::Cold)) {
- PostDominatedByColdCall.insert(BB);
- break;
- }
- }
-
- if (auto *II = dyn_cast<InvokeInst>(TI)) {
- // If the terminator is an InvokeInst, consider only the normal destination
- // block.
- if (PostDominatedByColdCall.count(II->getNormalDest()))
- PostDominatedByColdCall.insert(BB);
- // Return false here so that edge weights for InvokeInst could be decided
- // in calcInvokeHeuristics().
+ // Return false here so that edge weights for InvokeInst could be decided
+ // in calcInvokeHeuristics().
+ if (isa<InvokeInst>(TI))
return false;
- }
// Skip probabilities if this block has a single successor.
if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
@@ -671,6 +696,8 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI) {
// the successors of a block iteratively.
for (auto BB : post_order(&F.getEntryBlock())) {
DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+ updatePostDominatedByUnreachable(BB);
+ updatePostDominatedByColdCall(BB);
if (calcUnreachableHeuristics(BB))
continue;
if (calcMetadataWeights(BB))
diff --git a/lib/Analysis/CFLAndersAliasAnalysis.cpp b/lib/Analysis/CFLAndersAliasAnalysis.cpp
index e48ff230f43c..ddd5123d0eff 100644
--- a/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -307,7 +307,7 @@ class CFLAndersAAResult::FunctionInfo {
public:
FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
- const ReachabilitySet &, AliasAttrMap);
+ const ReachabilitySet &, const AliasAttrMap &);
bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const;
const AliasSummary &getAliasSummary() const { return Summary; }
@@ -470,7 +470,7 @@ static void populateExternalAttributes(
CFLAndersAAResult::FunctionInfo::FunctionInfo(
const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
- const ReachabilitySet &ReachSet, AliasAttrMap AMap) {
+ const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) {
populateAttrMap(AttrMap, AMap);
populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
populateAliasMap(AliasMap, ReachSet);
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
index 054bdc45ad67..9d4521221f47 100644
--- a/lib/Analysis/CGSCCPassManager.cpp
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -117,6 +117,7 @@ bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>();
// Ok, we have a graph, so we can propagate the invalidation down into it.
+ G->buildRefSCCs();
for (auto &RC : G->postorder_ref_sccs())
for (auto &C : RC) {
Optional<PreservedAnalyses> InnerPA;
@@ -273,9 +274,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// demoted edges.
SmallVector<Constant *, 16> Worklist;
SmallPtrSet<Constant *, 16> Visited;
- SmallPtrSet<Function *, 16> RetainedEdges;
- SmallSetVector<Function *, 4> PromotedRefTargets;
- SmallSetVector<Function *, 4> DemotedCallTargets;
+ SmallPtrSet<Node *, 16> RetainedEdges;
+ SmallSetVector<Node *, 4> PromotedRefTargets;
+ SmallSetVector<Node *, 4> DemotedCallTargets;
// First walk the function and handle all called functions. We do this first
// because if there is a single call edge, whether there are ref edges is
@@ -284,7 +285,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
if (auto CS = CallSite(&I))
if (Function *Callee = CS.getCalledFunction())
if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
- const Edge *E = N.lookup(*Callee);
+ Node &CalleeN = *G.lookup(*Callee);
+ Edge *E = N->lookup(CalleeN);
// FIXME: We should really handle adding new calls. While it will
// make downstream usage more complex, there is no fundamental
// limitation and it will allow passes within the CGSCC to be a bit
@@ -293,9 +295,9 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
assert(E && "No function transformations should introduce *new* "
"call edges! Any new calls should be modeled as "
"promoted existing ref edges!");
- RetainedEdges.insert(Callee);
+ RetainedEdges.insert(&CalleeN);
if (!E->isCall())
- PromotedRefTargets.insert(Callee);
+ PromotedRefTargets.insert(&CalleeN);
}
// Now walk all references.
@@ -306,24 +308,25 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
Worklist.push_back(C);
LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) {
- const Edge *E = N.lookup(Referee);
+ Node &RefereeN = *G.lookup(Referee);
+ Edge *E = N->lookup(RefereeN);
// FIXME: Similarly to new calls, we also currently preclude
// introducing new references. See above for details.
assert(E && "No function transformations should introduce *new* ref "
"edges! Any new ref edges would require IPO which "
"function passes aren't allowed to do!");
- RetainedEdges.insert(&Referee);
+ RetainedEdges.insert(&RefereeN);
if (E->isCall())
- DemotedCallTargets.insert(&Referee);
+ DemotedCallTargets.insert(&RefereeN);
});
// First remove all of the edges that are no longer present in this function.
// We have to build a list of dead targets first and then remove them as the
// data structures will all be invalidated by removing them.
SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets;
- for (Edge &E : N)
- if (!RetainedEdges.count(&E.getFunction()))
- DeadTargets.push_back({E.getNode(), E.getKind()});
+ for (Edge &E : *N)
+ if (!RetainedEdges.count(&E.getNode()))
+ DeadTargets.push_back({&E.getNode(), E.getKind()});
for (auto DeadTarget : DeadTargets) {
Node &TargetN = *DeadTarget.getPointer();
bool IsCall = DeadTarget.getInt() == Edge::Call;
@@ -397,9 +400,8 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// Next demote all the call edges that are now ref edges. This helps make
// the SCCs small which should minimize the work below as we don't want to
// form cycles that this would break.
- for (Function *RefTarget : DemotedCallTargets) {
- Node &TargetN = *G.lookup(*RefTarget);
- SCC &TargetC = *G.lookupSCC(TargetN);
+ for (Node *RefTarget : DemotedCallTargets) {
+ SCC &TargetC = *G.lookupSCC(*RefTarget);
RefSCC &TargetRC = TargetC.getOuterRefSCC();
// The easy case is when the target RefSCC is not this RefSCC. This is
@@ -407,10 +409,10 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
if (&TargetRC != RC) {
assert(RC->isAncestorOf(TargetRC) &&
"Cannot potentially form RefSCC cycles here!");
- RC->switchOutgoingEdgeToRef(N, TargetN);
+ RC->switchOutgoingEdgeToRef(N, *RefTarget);
if (DebugLogging)
dbgs() << "Switch outgoing call edge to a ref edge from '" << N
- << "' to '" << TargetN << "'\n";
+ << "' to '" << *RefTarget << "'\n";
continue;
}
@@ -418,7 +420,7 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// some SCCs.
if (C != &TargetC) {
// For separate SCCs this is trivial.
- RC->switchTrivialInternalEdgeToRef(N, TargetN);
+ RC->switchTrivialInternalEdgeToRef(N, *RefTarget);
continue;
}
@@ -430,14 +432,13 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
// structure is changed.
AM.invalidate(*C, PreservedAnalyses::none());
// Now update the call graph.
- C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
- N, C, AM, UR, DebugLogging);
+ C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N,
+ C, AM, UR, DebugLogging);
}
// Now promote ref edges into call edges.
- for (Function *CallTarget : PromotedRefTargets) {
- Node &TargetN = *G.lookup(*CallTarget);
- SCC &TargetC = *G.lookupSCC(TargetN);
+ for (Node *CallTarget : PromotedRefTargets) {
+ SCC &TargetC = *G.lookupSCC(*CallTarget);
RefSCC &TargetRC = TargetC.getOuterRefSCC();
// The easy case is when the target RefSCC is not this RefSCC. This is
@@ -445,22 +446,22 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
if (&TargetRC != RC) {
assert(RC->isAncestorOf(TargetRC) &&
"Cannot potentially form RefSCC cycles here!");
- RC->switchOutgoingEdgeToCall(N, TargetN);
+ RC->switchOutgoingEdgeToCall(N, *CallTarget);
if (DebugLogging)
dbgs() << "Switch outgoing ref edge to a call edge from '" << N
- << "' to '" << TargetN << "'\n";
+ << "' to '" << *CallTarget << "'\n";
continue;
}
if (DebugLogging)
dbgs() << "Switch an internal ref edge to a call edge from '" << N
- << "' to '" << TargetN << "'\n";
+ << "' to '" << *CallTarget << "'\n";
// Otherwise we are switching an internal ref edge to a call edge. This
// may merge away some SCCs, and we add those to the UpdateResult. We also
// need to make sure to update the worklist in the event SCCs have moved
// before the current one in the post-order sequence.
auto InitialSCCIndex = RC->find(*C) - RC->begin();
- auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN);
+ auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, *CallTarget);
if (!InvalidatedSCCs.empty()) {
C = &TargetC;
assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index d53364373d7b..161709a48466 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -53,6 +53,8 @@ add_llvm_library(LLVMAnalysis
MemoryBuiltins.cpp
MemoryDependenceAnalysis.cpp
MemoryLocation.cpp
+ MemorySSA.cpp
+ MemorySSAUpdater.cpp
ModuleDebugInfoPrinter.cpp
ModuleSummaryAnalysis.cpp
ObjCARCAliasAnalysis.cpp
diff --git a/lib/Analysis/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
index 458b7bfae959..6942176ae6ae 100644
--- a/lib/Analysis/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -125,8 +125,9 @@ void CallGraph::print(raw_ostream &OS) const {
CN->print(OS);
}
-LLVM_DUMP_METHOD
-void CallGraph::dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void CallGraph::dump() const { print(dbgs()); }
+#endif
// removeFunctionFromModule - Unlink the function from this module, returning
// it. Because this removes the function from the module, the call graph node
@@ -194,8 +195,9 @@ void CallGraphNode::print(raw_ostream &OS) const {
OS << '\n';
}
-LLVM_DUMP_METHOD
-void CallGraphNode::dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void CallGraphNode::dump() const { print(dbgs()); }
+#endif
/// removeCallEdgeFor - This method removes the edge in the node for the
/// specified call site. Note that this method takes linear time, so it
@@ -307,8 +309,10 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
G->print(OS);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
+#endif
namespace {
struct CallGraphPrinterLegacyPass : public ModulePass {
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 9cef78144150..ea70f5752c61 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -609,16 +609,28 @@ namespace {
}
bool runOnSCC(CallGraphSCC &SCC) override {
- Out << Banner;
+ auto PrintBannerOnce = [&] () {
+ static bool BannerPrinted = false;
+ if (BannerPrinted)
+ return;
+ Out << Banner;
+ BannerPrinted = true;
+ };
for (CallGraphNode *CGN : SCC) {
if (CGN->getFunction()) {
- if (isFunctionInPrintList(CGN->getFunction()->getName()))
+ if (isFunctionInPrintList(CGN->getFunction()->getName())) {
+ PrintBannerOnce();
CGN->getFunction()->print(Out);
- } else
+ }
+ } else if (llvm::isFunctionInPrintList("*")) {
+ PrintBannerOnce();
Out << "\nPrinting <null> Function\n";
+ }
}
return false;
}
+
+ StringRef getPassName() const override { return "Print CallGraph IR"; }
};
} // end anonymous namespace.
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 73867279abe4..14176dac2104 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1058,8 +1058,8 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
if (It == FoldedOps.end()) {
if (auto *FoldedC =
ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) {
- NewC = FoldedC;
FoldedOps.insert({NewC, FoldedC});
+ NewC = FoldedC;
} else {
FoldedOps.insert({NewC, NewC});
}
@@ -1401,7 +1401,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
return true;
default:
return false;
- case 0: break;
+ case Intrinsic::not_intrinsic: break;
}
if (!F->hasName())
@@ -1518,9 +1518,9 @@ Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
bool isExact = false;
APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
: APFloat::rmNearestTiesToEven;
- APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
- /*isSigned=*/true, mode,
- &isExact);
+ APFloat::opStatus status =
+ Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth,
+ /*isSigned=*/true, mode, &isExact);
if (status != APFloat::opOK &&
(!roundTowardZero || status != APFloat::opInexact))
return nullptr;
@@ -1630,6 +1630,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
return ConstantFoldFP(sin, V, Ty);
case Intrinsic::cos:
return ConstantFoldFP(cos, V, Ty);
+ case Intrinsic::sqrt:
+ return ConstantFoldFP(sqrt, V, Ty);
}
if (!TLI)
@@ -1637,87 +1639,74 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
switch (Name[0]) {
case 'a':
- if ((Name == "acos" && TLI->has(LibFunc::acos)) ||
- (Name == "acosf" && TLI->has(LibFunc::acosf)))
+ if ((Name == "acos" && TLI->has(LibFunc_acos)) ||
+ (Name == "acosf" && TLI->has(LibFunc_acosf)))
return ConstantFoldFP(acos, V, Ty);
- else if ((Name == "asin" && TLI->has(LibFunc::asin)) ||
- (Name == "asinf" && TLI->has(LibFunc::asinf)))
+ else if ((Name == "asin" && TLI->has(LibFunc_asin)) ||
+ (Name == "asinf" && TLI->has(LibFunc_asinf)))
return ConstantFoldFP(asin, V, Ty);
- else if ((Name == "atan" && TLI->has(LibFunc::atan)) ||
- (Name == "atanf" && TLI->has(LibFunc::atanf)))
+ else if ((Name == "atan" && TLI->has(LibFunc_atan)) ||
+ (Name == "atanf" && TLI->has(LibFunc_atanf)))
return ConstantFoldFP(atan, V, Ty);
break;
case 'c':
- if ((Name == "ceil" && TLI->has(LibFunc::ceil)) ||
- (Name == "ceilf" && TLI->has(LibFunc::ceilf)))
+ if ((Name == "ceil" && TLI->has(LibFunc_ceil)) ||
+ (Name == "ceilf" && TLI->has(LibFunc_ceilf)))
return ConstantFoldFP(ceil, V, Ty);
- else if ((Name == "cos" && TLI->has(LibFunc::cos)) ||
- (Name == "cosf" && TLI->has(LibFunc::cosf)))
+ else if ((Name == "cos" && TLI->has(LibFunc_cos)) ||
+ (Name == "cosf" && TLI->has(LibFunc_cosf)))
return ConstantFoldFP(cos, V, Ty);
- else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) ||
- (Name == "coshf" && TLI->has(LibFunc::coshf)))
+ else if ((Name == "cosh" && TLI->has(LibFunc_cosh)) ||
+ (Name == "coshf" && TLI->has(LibFunc_coshf)))
return ConstantFoldFP(cosh, V, Ty);
break;
case 'e':
- if ((Name == "exp" && TLI->has(LibFunc::exp)) ||
- (Name == "expf" && TLI->has(LibFunc::expf)))
+ if ((Name == "exp" && TLI->has(LibFunc_exp)) ||
+ (Name == "expf" && TLI->has(LibFunc_expf)))
return ConstantFoldFP(exp, V, Ty);
- if ((Name == "exp2" && TLI->has(LibFunc::exp2)) ||
- (Name == "exp2f" && TLI->has(LibFunc::exp2f)))
+ if ((Name == "exp2" && TLI->has(LibFunc_exp2)) ||
+ (Name == "exp2f" && TLI->has(LibFunc_exp2f)))
// Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
// C99 library.
return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
break;
case 'f':
- if ((Name == "fabs" && TLI->has(LibFunc::fabs)) ||
- (Name == "fabsf" && TLI->has(LibFunc::fabsf)))
+ if ((Name == "fabs" && TLI->has(LibFunc_fabs)) ||
+ (Name == "fabsf" && TLI->has(LibFunc_fabsf)))
return ConstantFoldFP(fabs, V, Ty);
- else if ((Name == "floor" && TLI->has(LibFunc::floor)) ||
- (Name == "floorf" && TLI->has(LibFunc::floorf)))
+ else if ((Name == "floor" && TLI->has(LibFunc_floor)) ||
+ (Name == "floorf" && TLI->has(LibFunc_floorf)))
return ConstantFoldFP(floor, V, Ty);
break;
case 'l':
- if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) ||
- (Name == "logf" && V > 0 && TLI->has(LibFunc::logf)))
+ if ((Name == "log" && V > 0 && TLI->has(LibFunc_log)) ||
+ (Name == "logf" && V > 0 && TLI->has(LibFunc_logf)))
return ConstantFoldFP(log, V, Ty);
- else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) ||
- (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f)))
+ else if ((Name == "log10" && V > 0 && TLI->has(LibFunc_log10)) ||
+ (Name == "log10f" && V > 0 && TLI->has(LibFunc_log10f)))
return ConstantFoldFP(log10, V, Ty);
- else if (IntrinsicID == Intrinsic::sqrt &&
- (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
- if (V >= -0.0)
- return ConstantFoldFP(sqrt, V, Ty);
- else {
- // Unlike the sqrt definitions in C/C++, POSIX, and IEEE-754 - which
- // all guarantee or favor returning NaN - the square root of a
- // negative number is not defined for the LLVM sqrt intrinsic.
- // This is because the intrinsic should only be emitted in place of
- // libm's sqrt function when using "no-nans-fp-math".
- return UndefValue::get(Ty);
- }
- }
break;
case 'r':
- if ((Name == "round" && TLI->has(LibFunc::round)) ||
- (Name == "roundf" && TLI->has(LibFunc::roundf)))
+ if ((Name == "round" && TLI->has(LibFunc_round)) ||
+ (Name == "roundf" && TLI->has(LibFunc_roundf)))
return ConstantFoldFP(round, V, Ty);
case 's':
- if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
- (Name == "sinf" && TLI->has(LibFunc::sinf)))
+ if ((Name == "sin" && TLI->has(LibFunc_sin)) ||
+ (Name == "sinf" && TLI->has(LibFunc_sinf)))
return ConstantFoldFP(sin, V, Ty);
- else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) ||
- (Name == "sinhf" && TLI->has(LibFunc::sinhf)))
+ else if ((Name == "sinh" && TLI->has(LibFunc_sinh)) ||
+ (Name == "sinhf" && TLI->has(LibFunc_sinhf)))
return ConstantFoldFP(sinh, V, Ty);
- else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) ||
- (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)))
+ else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc_sqrt)) ||
+ (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc_sqrtf)))
return ConstantFoldFP(sqrt, V, Ty);
break;
case 't':
- if ((Name == "tan" && TLI->has(LibFunc::tan)) ||
- (Name == "tanf" && TLI->has(LibFunc::tanf)))
+ if ((Name == "tan" && TLI->has(LibFunc_tan)) ||
+ (Name == "tanf" && TLI->has(LibFunc_tanf)))
return ConstantFoldFP(tan, V, Ty);
- else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) ||
- (Name == "tanhf" && TLI->has(LibFunc::tanhf)))
+ else if ((Name == "tanh" && TLI->has(LibFunc_tanh)) ||
+ (Name == "tanhf" && TLI->has(LibFunc_tanhf)))
return ConstantFoldFP(tanh, V, Ty);
break;
default:
@@ -1779,7 +1768,8 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
}
if (isa<UndefValue>(Operands[0])) {
- if (IntrinsicID == Intrinsic::bswap)
+ if (IntrinsicID == Intrinsic::bswap ||
+ IntrinsicID == Intrinsic::bitreverse)
return Operands[0];
return nullptr;
}
@@ -1822,14 +1812,14 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
if (!TLI)
return nullptr;
- if ((Name == "pow" && TLI->has(LibFunc::pow)) ||
- (Name == "powf" && TLI->has(LibFunc::powf)))
+ if ((Name == "pow" && TLI->has(LibFunc_pow)) ||
+ (Name == "powf" && TLI->has(LibFunc_powf)))
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
- if ((Name == "fmod" && TLI->has(LibFunc::fmod)) ||
- (Name == "fmodf" && TLI->has(LibFunc::fmodf)))
+ if ((Name == "fmod" && TLI->has(LibFunc_fmod)) ||
+ (Name == "fmodf" && TLI->has(LibFunc_fmodf)))
return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
- if ((Name == "atan2" && TLI->has(LibFunc::atan2)) ||
- (Name == "atan2f" && TLI->has(LibFunc::atan2f)))
+ if ((Name == "atan2" && TLI->has(LibFunc_atan2)) ||
+ (Name == "atan2f" && TLI->has(LibFunc_atan2f)))
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
@@ -2022,7 +2012,7 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
if (!F)
return false;
- LibFunc::Func Func;
+ LibFunc Func;
if (!TLI || !TLI->getLibFunc(*F, Func))
return false;
@@ -2030,20 +2020,20 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
const APFloat &Op = OpC->getValueAPF();
switch (Func) {
- case LibFunc::logl:
- case LibFunc::log:
- case LibFunc::logf:
- case LibFunc::log2l:
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log10l:
- case LibFunc::log10:
- case LibFunc::log10f:
+ case LibFunc_logl:
+ case LibFunc_log:
+ case LibFunc_logf:
+ case LibFunc_log2l:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log10l:
+ case LibFunc_log10:
+ case LibFunc_log10f:
return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
- case LibFunc::expl:
- case LibFunc::exp:
- case LibFunc::expf:
+ case LibFunc_expl:
+ case LibFunc_exp:
+ case LibFunc_expf:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan &&
@@ -2053,9 +2043,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan;
break;
- case LibFunc::exp2l:
- case LibFunc::exp2:
- case LibFunc::exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan &&
@@ -2065,17 +2055,17 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan;
break;
- case LibFunc::sinl:
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::cosl:
- case LibFunc::cos:
- case LibFunc::cosf:
+ case LibFunc_sinl:
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_cosl:
+ case LibFunc_cos:
+ case LibFunc_cosf:
return !Op.isInfinity();
- case LibFunc::tanl:
- case LibFunc::tan:
- case LibFunc::tanf: {
+ case LibFunc_tanl:
+ case LibFunc_tan:
+ case LibFunc_tanf: {
// FIXME: Stop using the host math library.
// FIXME: The computation isn't done in the right precision.
Type *Ty = OpC->getType();
@@ -2086,23 +2076,23 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
break;
}
- case LibFunc::asinl:
- case LibFunc::asin:
- case LibFunc::asinf:
- case LibFunc::acosl:
- case LibFunc::acos:
- case LibFunc::acosf:
+ case LibFunc_asinl:
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_acosl:
+ case LibFunc_acos:
+ case LibFunc_acosf:
return Op.compare(APFloat(Op.getSemantics(), "-1")) !=
APFloat::cmpLessThan &&
Op.compare(APFloat(Op.getSemantics(), "1")) !=
APFloat::cmpGreaterThan;
- case LibFunc::sinh:
- case LibFunc::cosh:
- case LibFunc::sinhf:
- case LibFunc::coshf:
- case LibFunc::sinhl:
- case LibFunc::coshl:
+ case LibFunc_sinh:
+ case LibFunc_cosh:
+ case LibFunc_sinhf:
+ case LibFunc_coshf:
+ case LibFunc_sinhl:
+ case LibFunc_coshl:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan &&
@@ -2112,9 +2102,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan;
break;
- case LibFunc::sqrtl:
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
return Op.isNaN() || Op.isZero() || !Op.isNegative();
// FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
@@ -2133,9 +2123,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
const APFloat &Op1 = Op1C->getValueAPF();
switch (Func) {
- case LibFunc::powl:
- case LibFunc::pow:
- case LibFunc::powf: {
+ case LibFunc_powl:
+ case LibFunc_pow:
+ case LibFunc_powf: {
// FIXME: Stop using the host math library.
// FIXME: The computation isn't done in the right precision.
Type *Ty = Op0C->getType();
@@ -2149,9 +2139,9 @@ bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
break;
}
- case LibFunc::fmodl:
- case LibFunc::fmod:
- case LibFunc::fmodf:
+ case LibFunc_fmodl:
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
return Op0.isNaN() || Op1.isNaN() ||
(!Op0.isInfinity() && !Op1.isZero());
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 6b77397956cd..32bfea58bf9d 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -447,25 +447,25 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
case Instruction::Select: {
const SelectInst *SI = cast<SelectInst>(I);
Type *CondTy = SI->getCondition()->getType();
- return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
+ return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
}
case Instruction::ICmp:
case Instruction::FCmp: {
Type *ValTy = I->getOperand(0)->getType();
- return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
+ return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
}
case Instruction::Store: {
const StoreInst *SI = cast<StoreInst>(I);
Type *ValTy = SI->getValueOperand()->getType();
return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
- SI->getAlignment(),
- SI->getPointerAddressSpace());
+ SI->getAlignment(),
+ SI->getPointerAddressSpace(), I);
}
case Instruction::Load: {
const LoadInst *LI = cast<LoadInst>(I);
return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
- LI->getAlignment(),
- LI->getPointerAddressSpace());
+ LI->getAlignment(),
+ LI->getPointerAddressSpace(), I);
}
case Instruction::ZExt:
case Instruction::SExt:
@@ -481,7 +481,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
case Instruction::BitCast:
case Instruction::AddrSpaceCast: {
Type *SrcTy = I->getOperand(0)->getType();
- return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
+ return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
}
case Instruction::ExtractElement: {
const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
@@ -542,9 +542,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
}
case Instruction::Call:
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- SmallVector<Value *, 4> Args;
- for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
- Args.push_back(II->getArgOperand(J));
+ SmallVector<Value *, 4> Args(II->arg_operands());
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
index 688c1db534c1..151c0b0e6c93 100644
--- a/lib/Analysis/DemandedBits.cpp
+++ b/lib/Analysis/DemandedBits.cpp
@@ -110,6 +110,9 @@ void DemandedBits::determineLiveOperandBits(
// the output.
AB = AOut.byteSwap();
break;
+ case Intrinsic::bitreverse:
+ AB = AOut.reverseBits();
+ break;
case Intrinsic::ctlz:
if (OperandNo == 0) {
// We need some output bits, so we need all bits of the
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index a332a07ce864..a4672efeedd6 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -385,9 +385,9 @@ void DependenceInfo::Constraint::setAny(ScalarEvolution *NewSE) {
Kind = Any;
}
-
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// For debugging purposes. Dumps the constraint out to OS.
-void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
+LLVM_DUMP_METHOD void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
if (isEmpty())
OS << " Empty\n";
else if (isAny())
@@ -403,6 +403,7 @@ void DependenceInfo::Constraint::dump(raw_ostream &OS) const {
else
llvm_unreachable("unknown constraint type in Constraint::dump");
}
+#endif
// Updates X with the intersection
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 15856c3f8b7a..5b6e2d0476e4 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -56,6 +56,16 @@ LLVM_DUMP_METHOD void DominanceFrontierWrapperPass::dump() const {
}
#endif
+/// Handle invalidation explicitly.
+bool DominanceFrontier::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on functions, or the function's
+ // CFG have been preserved.
+ auto PAC = PA.getChecker<DominanceFrontierAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+ PAC.preservedSet<CFGAnalyses>());
+}
+
AnalysisKey DominanceFrontierAnalysis::Key;
DominanceFrontier DominanceFrontierAnalysis::run(Function &F,
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index a661b0101e6a..fde805a5fde5 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -76,9 +76,8 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
// An add is interesting if exactly one of its operands is interesting.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
bool AnyInterestingYet = false;
- for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
- OI != OE; ++OI)
- if (isInteresting(*OI, I, L, SE, LI)) {
+ for (const auto *Op : Add->operands())
+ if (isInteresting(Op, I, L, SE, LI)) {
if (AnyInterestingYet)
return false;
AnyInterestingYet = true;
@@ -118,6 +117,50 @@ static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
return true;
}
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value. If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+ const Loop *L, DominatorTree *DT) {
+ // If the user is in the loop, use the preinc value.
+ if (L->contains(User))
+ return false;
+
+ BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock)
+ return false;
+
+ // Ok, the user is outside of the loop. If it is dominated by the latch
+ // block, use the post-inc value.
+ if (DT->dominates(LatchBlock, User->getParent()))
+ return true;
+
+ // There is one case we have to be careful of: PHI nodes. These little guys
+ // can live in blocks that are not dominated by the latch block, but (since
+ // their uses occur in the predecessor block, not the block the PHI lives in)
+ // should still use the post-inc value. Check for this case now.
+ PHINode *PN = dyn_cast<PHINode>(User);
+ if (!PN || !Operand)
+ return false; // not a phi, not dominated by latch block.
+
+ // Look at all of the uses of Operand by the PHI node. If any use corresponds
+ // to a block that is not dominated by the latch block, give up and use the
+ // preincremented value.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == Operand &&
+ !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
+
+ // Okay, all uses of Operand by PN are in predecessor blocks that really are
+ // dominated by the latch block. Use the post-incremented value.
+ return true;
+}
+
/// AddUsersImpl - Inspect the specified instruction. If it is a
/// reducible SCEV, recursively add its users to the IVUsesByStride set and
/// return true. Otherwise, return false.
@@ -208,10 +251,26 @@ bool IVUsers::AddUsersImpl(Instruction *I,
// The regular return value here is discarded; instead of recording
// it, we just recompute it when we need it.
const SCEV *OriginalISE = ISE;
- ISE = TransformForPostIncUse(NormalizeAutodetect,
- ISE, User, I,
- NewUse.PostIncLoops,
- *SE, *DT);
+
+ auto NormalizePred = [&](const SCEVAddRecExpr *AR) {
+ // We only allow affine AddRecs to be normalized, otherwise we would not
+ // be able to correctly denormalize.
+ // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
+ // Normalized form: {-2,+,1,+,2}
+ // Denormalized form: {1,+,3,+,2}
+ //
+ // However, denormalization would use a different step expression than
+ // normalization (see getPostIncExpr), generating the wrong final
+ // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
+ auto *L = AR->getLoop();
+ bool Result =
+ AR->isAffine() && IVUseShouldUsePostIncValue(User, I, L, DT);
+ if (Result)
+ NewUse.PostIncLoops.insert(L);
+ return Result;
+ };
+
+ ISE = normalizeForPostIncUseIf(ISE, NormalizePred, *SE);
// PostIncNormalization effectively simplifies the expression under
// pre-increment assumptions. Those assumptions (no wrapping) might not
@@ -219,8 +278,7 @@ bool IVUsers::AddUsersImpl(Instruction *I,
// transformation is invertible.
if (OriginalISE != ISE) {
const SCEV *DenormalizedISE =
- TransformForPostIncUse(Denormalize, ISE, User, I,
- NewUse.PostIncLoops, *SE, *DT);
+ denormalizeForPostIncUse(ISE, NewUse.PostIncLoops, *SE);
// If we normalized the expression, but denormalization doesn't give the
// original one, discard this user.
@@ -338,11 +396,8 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
/// getExpr - Return the expression for the use.
const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
- return
- TransformForPostIncUse(Normalize, getReplacementExpr(IU),
- IU.getUser(), IU.getOperandValToReplace(),
- const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
- *SE, *DT);
+ return normalizeForPostIncUse(getReplacementExpr(IU), IU.getPostIncLoops(),
+ *SE);
}
static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
@@ -353,9 +408,8 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
}
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I)
- if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+ for (const auto *Op : Add->operands())
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(Op, L))
return AR;
return nullptr;
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 4109049ecabc..1f8dec2aed80 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -48,6 +49,11 @@ static cl::opt<int> HintThreshold(
"inlinehint-threshold", cl::Hidden, cl::init(325),
cl::desc("Threshold for inlining functions with inline hint"));
+static cl::opt<int>
+ ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden,
+ cl::init(45),
+ cl::desc("Threshold for inlining cold callsites"));
+
// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
@@ -72,12 +78,18 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Getter for the cache of @llvm.assume intrinsics.
std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
+ /// Getter for BlockFrequencyInfo
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI;
+
/// Profile summary information.
ProfileSummaryInfo *PSI;
/// The called function.
Function &F;
+ // Cache the DataLayout since we use it a lot.
+ const DataLayout &DL;
+
/// The candidate callsite being analyzed. Please do not use this to do
/// analysis in the caller function; we want the inline cost query to be
/// easily cacheable. Instead, use the cover function paramHasAttr.
@@ -133,9 +145,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
void disableSROA(Value *V);
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost);
- bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+ bool isGEPFree(GetElementPtrInst &GEP);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
bool simplifyCallSite(Function *F, CallSite CS);
+ template <typename Callable>
+ bool simplifyInstruction(Instruction &I, Callable Evaluate);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
/// Return true if the given argument to the function being considered for
@@ -202,9 +216,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const TargetTransformInfo &TTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI,
ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
const InlineParams &Params)
- : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee),
+ : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
+ PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()),
CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
@@ -286,23 +302,11 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
SROACostSavings += InstructionCost;
}
-/// \brief Check whether a GEP's indices are all constant.
-///
-/// Respects any simplified values known during the analysis of this callsite.
-bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
- for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
- if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
- return false;
-
- return true;
-}
-
/// \brief Accumulate a constant GEP offset into an APInt if possible.
///
/// Returns false if unable to compute the offset for any reason. Respects any
/// simplified values known during the analysis of this callsite.
bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
- const DataLayout &DL = F.getParent()->getDataLayout();
unsigned IntPtrWidth = DL.getPointerSizeInBits();
assert(IntPtrWidth == Offset.getBitWidth());
@@ -331,13 +335,27 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
return true;
}
+/// \brief Use TTI to check whether a GEP is free.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) {
+ SmallVector<Value *, 4> Indices;
+ for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+ if (Constant *SimpleOp = SimplifiedValues.lookup(*I))
+ Indices.push_back(SimpleOp);
+ else
+ Indices.push_back(*I);
+ return TargetTransformInfo::TCC_Free ==
+ TTI.getGEPCost(GEP.getSourceElementType(), GEP.getPointerOperand(),
+ Indices);
+}
+
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Check whether inlining will turn a dynamic alloca into a static
// alloca and handle that case.
if (I.isArrayAllocation()) {
Constant *Size = SimplifiedValues.lookup(I.getArraySize());
if (auto *AllocSize = dyn_cast_or_null<ConstantInt>(Size)) {
- const DataLayout &DL = F.getParent()->getDataLayout();
Type *Ty = I.getAllocatedType();
AllocatedSize = SaturatingMultiplyAdd(
AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty), AllocatedSize);
@@ -347,7 +365,6 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
- const DataLayout &DL = F.getParent()->getDataLayout();
Type *Ty = I.getAllocatedType();
AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty), AllocatedSize);
}
@@ -396,7 +413,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
// Non-constant GEPs aren't folded, and disable SROA.
if (SROACandidate)
disableSROA(CostIt);
- return false;
+ return isGEPFree(I);
}
// Add the result as a new mapping to Base + Offset.
@@ -411,7 +428,15 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
}
}
- if (isGEPOffsetConstant(I)) {
+ // Lambda to check whether a GEP's indices are all constant.
+ auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) {
+ for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+ if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+ return false;
+ return true;
+ };
+
+ if (IsGEPOffsetConstant(I)) {
if (SROACandidate)
SROAArgValues[&I] = SROAArg;
@@ -422,19 +447,36 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
// Variable GEPs will require math and will disable SROA.
if (SROACandidate)
disableSROA(CostIt);
- return false;
+ return isGEPFree(I);
+}
+
+/// Simplify \p I if its operands are constants and update SimplifiedValues.
+/// \p Evaluate is a callable specific to instruction type that evaluates the
+/// instruction when all the operands are constants.
+template <typename Callable>
+bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
+ SmallVector<Constant *, 2> COps;
+ for (Value *Op : I.operands()) {
+ Constant *COp = dyn_cast<Constant>(Op);
+ if (!COp)
+ COp = SimplifiedValues.lookup(Op);
+ if (!COp)
+ return false;
+ COps.push_back(COp);
+ }
+ auto *C = Evaluate(COps);
+ if (!C)
+ return false;
+ SimplifiedValues[&I] = C;
+ return true;
}
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
// Propagate constants through bitcasts.
- Constant *COp = dyn_cast<Constant>(I.getOperand(0));
- if (!COp)
- COp = SimplifiedValues.lookup(I.getOperand(0));
- if (COp)
- if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
- SimplifiedValues[&I] = C;
- return true;
- }
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantExpr::getBitCast(COps[0], I.getType());
+ }))
+ return true;
// Track base/offsets through casts
std::pair<Value *, APInt> BaseAndOffset =
@@ -455,19 +497,14 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
// Propagate constants through ptrtoint.
- Constant *COp = dyn_cast<Constant>(I.getOperand(0));
- if (!COp)
- COp = SimplifiedValues.lookup(I.getOperand(0));
- if (COp)
- if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
- SimplifiedValues[&I] = C;
- return true;
- }
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantExpr::getPtrToInt(COps[0], I.getType());
+ }))
+ return true;
// Track base/offset pairs when converted to a plain integer provided the
// integer is large enough to represent the pointer.
unsigned IntegerSize = I.getType()->getScalarSizeInBits();
- const DataLayout &DL = F.getParent()->getDataLayout();
if (IntegerSize >= DL.getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset =
ConstantOffsetPtrs.lookup(I.getOperand(0));
@@ -492,20 +529,15 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
// Propagate constants through ptrtoint.
- Constant *COp = dyn_cast<Constant>(I.getOperand(0));
- if (!COp)
- COp = SimplifiedValues.lookup(I.getOperand(0));
- if (COp)
- if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
- SimplifiedValues[&I] = C;
- return true;
- }
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantExpr::getIntToPtr(COps[0], I.getType());
+ }))
+ return true;
// Track base/offset pairs when round-tripped through a pointer without
// modifications provided the integer is not too large.
Value *Op = I.getOperand(0);
unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
- const DataLayout &DL = F.getParent()->getDataLayout();
if (IntegerSize <= DL.getPointerSizeInBits()) {
std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
if (BaseAndOffset.first)
@@ -523,14 +555,10 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
bool CallAnalyzer::visitCastInst(CastInst &I) {
// Propagate constants through ptrtoint.
- Constant *COp = dyn_cast<Constant>(I.getOperand(0));
- if (!COp)
- COp = SimplifiedValues.lookup(I.getOperand(0));
- if (COp)
- if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
- SimplifiedValues[&I] = C;
- return true;
- }
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType());
+ }))
+ return true;
// Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
disableSROA(I.getOperand(0));
@@ -540,16 +568,10 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
Value *Operand = I.getOperand(0);
- Constant *COp = dyn_cast<Constant>(Operand);
- if (!COp)
- COp = SimplifiedValues.lookup(Operand);
- if (COp) {
- const DataLayout &DL = F.getParent()->getDataLayout();
- if (Constant *C = ConstantFoldInstOperands(&I, COp, DL)) {
- SimplifiedValues[&I] = C;
- return true;
- }
- }
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantFoldInstOperands(&I, COps[0], DL);
+ }))
+ return true;
// Disable any SROA on the argument to arbitrary unary operators.
disableSROA(Operand);
@@ -558,8 +580,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
}
bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) {
- unsigned ArgNo = A->getArgNo();
- return CandidateCS.paramHasAttr(ArgNo + 1, Attr);
+ return CandidateCS.paramHasAttr(A->getArgNo(), Attr);
}
bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
@@ -642,16 +663,21 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
if (Callee.hasFnAttribute(Attribute::InlineHint))
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
if (PSI) {
- uint64_t TotalWeight;
- if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
- PSI->isHotCount(TotalWeight)) {
- Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
+ BlockFrequencyInfo *CallerBFI = GetBFI ? &((*GetBFI)(*Caller)) : nullptr;
+ if (PSI->isHotCallSite(CS, CallerBFI)) {
+ DEBUG(dbgs() << "Hot callsite.\n");
+ Threshold = Params.HotCallSiteThreshold.getValue();
} else if (PSI->isFunctionEntryHot(&Callee)) {
+ DEBUG(dbgs() << "Hot callee.\n");
// If callsite hotness can not be determined, we may still know
// that the callee is hot and treat it as a weaker hint for threshold
// increase.
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+ } else if (PSI->isColdCallSite(CS, CallerBFI)) {
+ DEBUG(dbgs() << "Cold callsite.\n");
+ Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
} else if (PSI->isFunctionEntryCold(&Callee)) {
+ DEBUG(dbgs() << "Cold callee.\n");
Threshold = MinIfValid(Threshold, Params.ColdThreshold);
}
}
@@ -665,20 +691,10 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
// First try to handle simplified comparisons.
- if (!isa<Constant>(LHS))
- if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
- LHS = SimpleLHS;
- if (!isa<Constant>(RHS))
- if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
- RHS = SimpleRHS;
- if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
- if (Constant *CRHS = dyn_cast<Constant>(RHS))
- if (Constant *C =
- ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
- SimplifiedValues[&I] = C;
- return true;
- }
- }
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]);
+ }))
+ return true;
if (I.getOpcode() == Instruction::FCmp)
return false;
@@ -756,24 +772,18 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) {
bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- const DataLayout &DL = F.getParent()->getDataLayout();
- if (!isa<Constant>(LHS))
- if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
- LHS = SimpleLHS;
- if (!isa<Constant>(RHS))
- if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
- RHS = SimpleRHS;
- Value *SimpleV = nullptr;
- if (auto FI = dyn_cast<FPMathOperator>(&I))
- SimpleV =
- SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
- else
- SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
+ auto Evaluate = [&](SmallVectorImpl<Constant *> &COps) {
+ Value *SimpleV = nullptr;
+ if (auto FI = dyn_cast<FPMathOperator>(&I))
+ SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1],
+ FI->getFastMathFlags(), DL);
+ else
+ SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL);
+ return dyn_cast_or_null<Constant>(SimpleV);
+ };
- if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
- SimplifiedValues[&I] = C;
+ if (simplifyInstruction(I, Evaluate))
return true;
- }
// Disable any SROA on arguments to arbitrary, unsimplified binary operators.
disableSROA(LHS);
@@ -814,13 +824,10 @@ bool CallAnalyzer::visitStore(StoreInst &I) {
bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
// Constant folding for extract value is trivial.
- Constant *C = dyn_cast<Constant>(I.getAggregateOperand());
- if (!C)
- C = SimplifiedValues.lookup(I.getAggregateOperand());
- if (C) {
- SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices());
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantExpr::getExtractValue(COps[0], I.getIndices());
+ }))
return true;
- }
// SROA can look through these but give them a cost.
return false;
@@ -828,17 +835,12 @@ bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) {
bool CallAnalyzer::visitInsertValue(InsertValueInst &I) {
// Constant folding for insert value is trivial.
- Constant *AggC = dyn_cast<Constant>(I.getAggregateOperand());
- if (!AggC)
- AggC = SimplifiedValues.lookup(I.getAggregateOperand());
- Constant *InsertedC = dyn_cast<Constant>(I.getInsertedValueOperand());
- if (!InsertedC)
- InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand());
- if (AggC && InsertedC) {
- SimplifiedValues[&I] =
- ConstantExpr::getInsertValue(AggC, InsertedC, I.getIndices());
+ if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
+ return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0],
+ /*InsertedValueOperand*/ COps[1],
+ I.getIndices());
+ }))
return true;
- }
// SROA can look through these but give them a cost.
return false;
@@ -959,7 +961,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// out. Pretend to inline the function, with a custom threshold.
auto IndirectCallParams = Params;
IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
- CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams);
+ CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS,
+ IndirectCallParams);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
@@ -1006,8 +1009,8 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// does not (yet) fire.
SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
SuccessorBlocks.insert(SI.getDefaultDest());
- for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
- SuccessorBlocks.insert(I.getCaseSuccessor());
+ for (auto Case : SI.cases())
+ SuccessorBlocks.insert(Case.getCaseSuccessor());
// Add cost corresponding to the number of distinct destinations. The first
// we model as free because of fallthrough.
Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
@@ -1098,19 +1101,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
// is expensive or the function has the "use-soft-float" attribute, this may
// eventually become a library call. Treat the cost as such.
if (I->getType()->isFloatingPointTy()) {
- bool hasSoftFloatAttr = false;
-
// If the function has the "use-soft-float" attribute, mark it as
// expensive.
- if (F.hasFnAttribute("use-soft-float")) {
- Attribute Attr = F.getFnAttribute("use-soft-float");
- StringRef Val = Attr.getValueAsString();
- if (Val == "true")
- hasSoftFloatAttr = true;
- }
-
if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
- hasSoftFloatAttr)
+ (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
Cost += InlineConstants::CallPenalty;
}
@@ -1155,7 +1149,6 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
if (!V->getType()->isPointerTy())
return nullptr;
- const DataLayout &DL = F.getParent()->getDataLayout();
unsigned IntPtrWidth = DL.getPointerSizeInBits();
APInt Offset = APInt::getNullValue(IntPtrWidth);
@@ -1212,7 +1205,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
FiftyPercentVectorBonus = 3 * Threshold / 2;
TenPercentVectorBonus = 3 * Threshold / 4;
- const DataLayout &DL = F.getParent()->getDataLayout();
// Track whether the post-inlining function would have more than one basic
// block. A single basic block is often intended for inlining. Balloon the
@@ -1371,7 +1363,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
Value *Cond = SI->getCondition();
if (ConstantInt *SimpleCond =
dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
- BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
+ BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor());
continue;
}
}
@@ -1430,13 +1422,6 @@ LLVM_DUMP_METHOD void CallAnalyzer::dump() {
}
#endif
-/// \brief Test that two functions either have or have not the given attribute
-/// at the same time.
-template <typename AttrKind>
-static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
- return F1->getFnAttribute(Attr) == F2->getFnAttribute(Attr);
-}
-
/// \brief Test that there are no attribute conflicts between Caller and Callee
/// that prevent inlining.
static bool functionsHaveCompatibleAttributes(Function *Caller,
@@ -1449,15 +1434,17 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
InlineCost llvm::getInlineCost(
CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI) {
return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
- GetAssumptionCache, PSI);
+ GetAssumptionCache, GetBFI, PSI);
}
InlineCost llvm::getInlineCost(
CallSite CS, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+ Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI) {
// Cannot inline indirect calls.
@@ -1492,7 +1479,8 @@ InlineCost llvm::getInlineCost(
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params);
+ CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS,
+ Params);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1565,6 +1553,9 @@ InlineParams llvm::getInlineParams(int Threshold) {
// Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
Params.HotCallSiteThreshold = HotCallSiteThreshold;
+ // Set the ColdCallSiteThreshold knob from the -inline-cold-callsite-threshold.
+ Params.ColdCallSiteThreshold = ColdCallSiteThreshold;
+
// Set the OptMinSizeThreshold and OptSizeThreshold params only if the
// Set the OptMinSizeThreshold and OptSizeThreshold params only if the
// -inlinehint-threshold commandline option is not explicitly given. If that
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 796e6e444980..e12f640394e6 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
@@ -140,10 +141,9 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
/// Returns the simplified value, or null if no simplification was performed.
-static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- unsigned OpcToExpand, const Query &Q,
+static Value *ExpandBinOp(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
+ Instruction::BinaryOps OpcodeToExpand, const Query &Q,
unsigned MaxRecurse) {
- Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
@@ -199,9 +199,9 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
/// Generic simplifications for associative binary operations.
/// Returns the simpler value, or null if none was found.
-static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
- Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
+ Value *LHS, Value *RHS, const Query &Q,
+ unsigned MaxRecurse) {
assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -298,8 +298,9 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
/// try to simplify the binop by seeing whether evaluating it on both branches
/// of the select results in the same value. Returns the common value if so,
/// otherwise returns null.
-static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
+ Value *RHS, const Query &Q,
+ unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
@@ -451,8 +452,9 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
/// try to simplify the binop by seeing whether evaluating it on the incoming
/// phi values yields the same result for every value. If so returns the common
/// value, otherwise returns null.
-static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
- const Query &Q, unsigned MaxRecurse) {
+static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
+ Value *RHS, const Query &Q,
+ unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
@@ -527,17 +529,26 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
return CommonValue;
}
+static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
+ Value *&Op0, Value *&Op1,
+ const Query &Q) {
+ if (auto *CLHS = dyn_cast<Constant>(Op0)) {
+ if (auto *CRHS = dyn_cast<Constant>(Op1))
+ return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
+
+ // Canonicalize the constant to the RHS if this is a commutative operation.
+ if (Instruction::isCommutative(Opcode))
+ std::swap(Op0, Op1);
+ }
+ return nullptr;
+}
+
/// Given operands for an Add, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::Add, CLHS, CRHS, Q.DL);
-
- // Canonicalize the constant to the RHS.
- std::swap(Op0, Op1);
- }
+ if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q))
+ return C;
// X + undef -> undef
if (match(Op1, m_Undef()))
@@ -556,12 +567,20 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return Y;
// X + ~X -> -1 since ~X = -X-1
+ Type *Ty = Op0->getType();
if (match(Op0, m_Not(m_Specific(Op1))) ||
match(Op1, m_Not(m_Specific(Op0))))
- return Constant::getAllOnesValue(Op0->getType());
+ return Constant::getAllOnesValue(Ty);
+
+ // add nsw/nuw (xor Y, signbit), signbit --> Y
+ // The no-wrapping add guarantees that the top bit will be set by the add.
+ // Therefore, the xor must be clearing the already set sign bit of Y.
+ if ((isNSW || isNUW) && match(Op1, m_SignBit()) &&
+ match(Op0, m_Xor(m_Value(Y), m_SignBit())))
+ return Y;
/// i1 add -> xor.
- if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
@@ -665,9 +684,8 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
/// If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0))
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::Sub, CLHS, CRHS, Q.DL);
+ if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q))
+ return C;
// X - undef -> undef
// undef - X -> undef
@@ -692,7 +710,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (KnownZero == ~APInt::getSignBit(BitWidth)) {
+ if (KnownZero.isMaxSignedValue()) {
// Op1 is either 0 or the minimum signed value. If the sub is NSW, then
// Op1 must be 0 because negating the minimum signed value is undefined.
if (isNSW)
@@ -779,7 +797,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
// i1 sub -> xor.
- if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
@@ -807,13 +825,8 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
/// returns null.
static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const Query &Q, unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::FAdd, CLHS, CRHS, Q.DL);
-
- // Canonicalize the constant to the RHS.
- std::swap(Op0, Op1);
- }
+ if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
+ return C;
// fadd X, -0 ==> X
if (match(Op1, m_NegZero()))
@@ -846,10 +859,8 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
/// returns null.
static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const Query &Q, unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::FSub, CLHS, CRHS, Q.DL);
- }
+ if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
+ return C;
// fsub X, 0 ==> X
if (match(Op1, m_Zero()))
@@ -878,40 +889,28 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
}
/// Given the operands for an FMul, see if we can fold the result
-static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
- FastMathFlags FMF,
- const Query &Q,
- unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::FMul, CLHS, CRHS, Q.DL);
-
- // Canonicalize the constant to the RHS.
- std::swap(Op0, Op1);
- }
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
+ return C;
- // fmul X, 1.0 ==> X
- if (match(Op1, m_FPOne()))
- return Op0;
+ // fmul X, 1.0 ==> X
+ if (match(Op1, m_FPOne()))
+ return Op0;
- // fmul nnan nsz X, 0 ==> 0
- if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
- return Op1;
+ // fmul nnan nsz X, 0 ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
+ return Op1;
- return nullptr;
+ return nullptr;
}
/// Given operands for a Mul, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::Mul, CLHS, CRHS, Q.DL);
-
- // Canonicalize the constant to the RHS.
- std::swap(Op0, Op1);
- }
+ if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q))
+ return C;
// X * undef -> 0
if (match(Op1, m_Undef()))
@@ -932,7 +931,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
return X;
// i1 mul -> and.
- if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+ if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1))
if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
return V;
@@ -998,43 +997,68 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// Given operands for an SDiv or UDiv, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
- const Query &Q, unsigned MaxRecurse) {
- if (Constant *C0 = dyn_cast<Constant>(Op0))
- if (Constant *C1 = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL);
-
- bool isSigned = Opcode == Instruction::SDiv;
+/// Check for common or similar folds of integer division or integer remainder.
+static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) {
+ Type *Ty = Op0->getType();
// X / undef -> undef
+ // X % undef -> undef
if (match(Op1, m_Undef()))
return Op1;
- // X / 0 -> undef, we don't need to preserve faults!
+ // X / 0 -> undef
+ // X % 0 -> undef
+ // We don't need to preserve faults!
if (match(Op1, m_Zero()))
- return UndefValue::get(Op1->getType());
+ return UndefValue::get(Ty);
+
+ // If any element of a constant divisor vector is zero, the whole op is undef.
+ auto *Op1C = dyn_cast<Constant>(Op1);
+ if (Op1C && Ty->isVectorTy()) {
+ unsigned NumElts = Ty->getVectorNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = Op1C->getAggregateElement(i);
+ if (Elt && Elt->isNullValue())
+ return UndefValue::get(Ty);
+ }
+ }
// undef / X -> 0
+ // undef % X -> 0
if (match(Op0, m_Undef()))
- return Constant::getNullValue(Op0->getType());
+ return Constant::getNullValue(Ty);
- // 0 / X -> 0, we don't need to preserve faults!
+ // 0 / X -> 0
+ // 0 % X -> 0
if (match(Op0, m_Zero()))
return Op0;
+ // X / X -> 1
+ // X % X -> 0
+ if (Op0 == Op1)
+ return IsDiv ? ConstantInt::get(Ty, 1) : Constant::getNullValue(Ty);
+
// X / 1 -> X
- if (match(Op1, m_One()))
- return Op0;
+ // X % 1 -> 0
+ // If this is a boolean op (single-bit element type), we can't have
+ // division-by-zero or remainder-by-zero, so assume the divisor is 1.
+ if (match(Op1, m_One()) || Ty->getScalarType()->isIntegerTy(1))
+ return IsDiv ? Op0 : Constant::getNullValue(Ty);
- if (Op0->getType()->isIntegerTy(1))
- // It can't be division by zero, hence it must be division by one.
- return Op0;
+ return nullptr;
+}
- // X / X -> 1
- if (Op0 == Op1)
- return ConstantInt::get(Op0->getType(), 1);
+/// Given operands for an SDiv or UDiv, see if we can fold the result.
+/// If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+ const Query &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
+ return C;
+
+ if (Value *V = simplifyDivRem(Op0, Op1, true))
+ return V;
+
+ bool isSigned = Opcode == Instruction::SDiv;
// (X * Y) / Y -> X if the multiplication does not overflow.
Value *X = nullptr, *Y = nullptr;
@@ -1129,6 +1153,9 @@ Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const Query &Q, unsigned) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
+ return C;
+
// undef / X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1178,37 +1205,11 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
/// If not, this returns null.
static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const Query &Q, unsigned MaxRecurse) {
- if (Constant *C0 = dyn_cast<Constant>(Op0))
- if (Constant *C1 = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL);
-
- // X % undef -> undef
- if (match(Op1, m_Undef()))
- return Op1;
-
- // undef % X -> 0
- if (match(Op0, m_Undef()))
- return Constant::getNullValue(Op0->getType());
-
- // 0 % X -> 0, we don't need to preserve faults!
- if (match(Op0, m_Zero()))
- return Op0;
+ if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
+ return C;
- // X % 0 -> undef, we don't need to preserve faults!
- if (match(Op1, m_Zero()))
- return UndefValue::get(Op0->getType());
-
- // X % 1 -> 0
- if (match(Op1, m_One()))
- return Constant::getNullValue(Op0->getType());
-
- if (Op0->getType()->isIntegerTy(1))
- // It can't be remainder by zero, hence it must be remainder by one.
- return Constant::getNullValue(Op0->getType());
-
- // X % X -> 0
- if (Op0 == Op1)
- return Constant::getNullValue(Op0->getType());
+ if (Value *V = simplifyDivRem(Op0, Op1, false))
+ return V;
// (X % Y) % Y -> X % Y
if ((Opcode == Instruction::SRem &&
@@ -1279,7 +1280,10 @@ Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout &DL,
}
static Value *SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
- const Query &, unsigned) {
+ const Query &Q, unsigned) {
+ if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
+ return C;
+
// undef % X -> undef (the undef could be a snan).
if (match(Op0, m_Undef()))
return Op0;
@@ -1335,11 +1339,10 @@ static bool isUndefShift(Value *Amount) {
/// Given operands for an Shl, LShr or AShr, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
- const Query &Q, unsigned MaxRecurse) {
- if (Constant *C0 = dyn_cast<Constant>(Op0))
- if (Constant *C1 = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Opcode, C0, C1, Q.DL);
+static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
+ Value *Op1, const Query &Q, unsigned MaxRecurse) {
+ if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
+ return C;
// 0 shift by X -> 0
if (match(Op0, m_Zero()))
@@ -1386,8 +1389,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
/// \brief Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
-static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
- bool isExact, const Query &Q,
+static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
+ Value *Op1, bool isExact, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyShift(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
@@ -1636,13 +1639,8 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
/// If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::And, CLHS, CRHS, Q.DL);
-
- // Canonicalize the constant to the RHS.
- std::swap(Op0, Op1);
- }
+ if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q))
+ return C;
// X & undef -> 0
if (match(Op1, m_Undef()))
@@ -1838,13 +1836,8 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
/// If not, this returns null.
static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::Or, CLHS, CRHS, Q.DL);
-
- // Canonicalize the constant to the RHS.
- std::swap(Op0, Op1);
- }
+ if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q))
+ return C;
// X | undef -> -1
if (match(Op1, m_Undef()))
@@ -1971,13 +1964,8 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
/// If not, this returns null.
static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
- if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
- if (Constant *CRHS = dyn_cast<Constant>(Op1))
- return ConstantFoldBinaryOpOperands(Instruction::Xor, CLHS, CRHS, Q.DL);
-
- // Canonicalize the constant to the RHS.
- std::swap(Op0, Op1);
- }
+ if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
+ return C;
// A ^ undef -> undef
if (match(Op1, m_Undef()))
@@ -2377,6 +2365,163 @@ static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
+/// Many binary operators with a constant operand have an easy-to-compute
+/// range of outputs. This can be used to fold a comparison to always true or
+/// always false.
+static void setLimitsForBinOp(BinaryOperator &BO, APInt &Lower, APInt &Upper) {
+ unsigned Width = Lower.getBitWidth();
+ const APInt *C;
+ switch (BO.getOpcode()) {
+ case Instruction::Add:
+ if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) {
+ // FIXME: If we have both nuw and nsw, we should reduce the range further.
+ if (BO.hasNoUnsignedWrap()) {
+ // 'add nuw x, C' produces [C, UINT_MAX].
+ Lower = *C;
+ } else if (BO.hasNoSignedWrap()) {
+ if (C->isNegative()) {
+ // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
+ Lower = APInt::getSignedMinValue(Width);
+ Upper = APInt::getSignedMaxValue(Width) + *C + 1;
+ } else {
+ // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
+ Lower = APInt::getSignedMinValue(Width) + *C;
+ Upper = APInt::getSignedMaxValue(Width) + 1;
+ }
+ }
+ }
+ break;
+
+ case Instruction::And:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'and x, C' produces [0, C].
+ Upper = *C + 1;
+ break;
+
+ case Instruction::Or:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'or x, C' produces [C, UINT_MAX].
+ Lower = *C;
+ break;
+
+ case Instruction::AShr:
+ if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+ // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
+ Lower = APInt::getSignedMinValue(Width).ashr(*C);
+ Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ unsigned ShiftAmount = Width - 1;
+ if (*C != 0 && BO.isExact())
+ ShiftAmount = C->countTrailingZeros();
+ if (C->isNegative()) {
+ // 'ashr C, x' produces [C, C >> (Width-1)]
+ Lower = *C;
+ Upper = C->ashr(ShiftAmount) + 1;
+ } else {
+ // 'ashr C, x' produces [C >> (Width-1), C]
+ Lower = C->ashr(ShiftAmount);
+ Upper = *C + 1;
+ }
+ }
+ break;
+
+ case Instruction::LShr:
+ if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
+ // 'lshr x, C' produces [0, UINT_MAX >> C].
+ Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ // 'lshr C, x' produces [C >> (Width-1), C].
+ unsigned ShiftAmount = Width - 1;
+ if (*C != 0 && BO.isExact())
+ ShiftAmount = C->countTrailingZeros();
+ Lower = C->lshr(ShiftAmount);
+ Upper = *C + 1;
+ }
+ break;
+
+ case Instruction::Shl:
+ if (match(BO.getOperand(0), m_APInt(C))) {
+ if (BO.hasNoUnsignedWrap()) {
+ // 'shl nuw C, x' produces [C, C << CLZ(C)]
+ Lower = *C;
+ Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+ } else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
+ if (C->isNegative()) {
+ // 'shl nsw C, x' produces [C << CLO(C)-1, C]
+ unsigned ShiftAmount = C->countLeadingOnes() - 1;
+ Lower = C->shl(ShiftAmount);
+ Upper = *C + 1;
+ } else {
+ // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
+ unsigned ShiftAmount = C->countLeadingZeros() - 1;
+ Lower = *C;
+ Upper = C->shl(ShiftAmount) + 1;
+ }
+ }
+ }
+ break;
+
+ case Instruction::SDiv:
+ if (match(BO.getOperand(1), m_APInt(C))) {
+ APInt IntMin = APInt::getSignedMinValue(Width);
+ APInt IntMax = APInt::getSignedMaxValue(Width);
+ if (C->isAllOnesValue()) {
+ // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+ // where C != -1 and C != 0 and C != 1
+ Lower = IntMin + 1;
+ Upper = IntMax + 1;
+ } else if (C->countLeadingZeros() < Width - 1) {
+ // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
+ // where C != -1 and C != 0 and C != 1
+ Lower = IntMin.sdiv(*C);
+ Upper = IntMax.sdiv(*C);
+ if (Lower.sgt(Upper))
+ std::swap(Lower, Upper);
+ Upper = Upper + 1;
+ assert(Upper != Lower && "Upper part of range has wrapped!");
+ }
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ if (C->isMinSignedValue()) {
+ // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+ Lower = *C;
+ Upper = Lower.lshr(1) + 1;
+ } else {
+ // 'sdiv C, x' produces [-|C|, |C|].
+ Upper = C->abs() + 1;
+ Lower = (-Upper) + 1;
+ }
+ }
+ break;
+
+ case Instruction::UDiv:
+ if (match(BO.getOperand(1), m_APInt(C)) && *C != 0) {
+ // 'udiv x, C' produces [0, UINT_MAX / C].
+ Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
+ } else if (match(BO.getOperand(0), m_APInt(C))) {
+ // 'udiv C, x' produces [0, C].
+ Upper = *C + 1;
+ }
+ break;
+
+ case Instruction::SRem:
+ if (match(BO.getOperand(1), m_APInt(C))) {
+ // 'srem x, C' produces (-|C|, |C|).
+ Upper = C->abs();
+ Lower = (-Upper) + 1;
+ }
+ break;
+
+ case Instruction::URem:
+ if (match(BO.getOperand(1), m_APInt(C)))
+ // 'urem x, C' produces [0, C).
+ Upper = *C;
+ break;
+
+ default:
+ break;
+ }
+}
+
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
Value *RHS) {
const APInt *C;
@@ -2390,114 +2535,12 @@ static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
if (RHS_CR.isFullSet())
return ConstantInt::getTrue(GetCompareTy(RHS));
- // Many binary operators with constant RHS have easy to compute constant
- // range. Use them to check whether the comparison is a tautology.
+ // Find the range of possible values for binary operators.
unsigned Width = C->getBitWidth();
APInt Lower = APInt(Width, 0);
APInt Upper = APInt(Width, 0);
- const APInt *C2;
- if (match(LHS, m_URem(m_Value(), m_APInt(C2)))) {
- // 'urem x, C2' produces [0, C2).
- Upper = *C2;
- } else if (match(LHS, m_SRem(m_Value(), m_APInt(C2)))) {
- // 'srem x, C2' produces (-|C2|, |C2|).
- Upper = C2->abs();
- Lower = (-Upper) + 1;
- } else if (match(LHS, m_UDiv(m_APInt(C2), m_Value()))) {
- // 'udiv C2, x' produces [0, C2].
- Upper = *C2 + 1;
- } else if (match(LHS, m_UDiv(m_Value(), m_APInt(C2)))) {
- // 'udiv x, C2' produces [0, UINT_MAX / C2].
- APInt NegOne = APInt::getAllOnesValue(Width);
- if (*C2 != 0)
- Upper = NegOne.udiv(*C2) + 1;
- } else if (match(LHS, m_SDiv(m_APInt(C2), m_Value()))) {
- if (C2->isMinSignedValue()) {
- // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
- Lower = *C2;
- Upper = Lower.lshr(1) + 1;
- } else {
- // 'sdiv C2, x' produces [-|C2|, |C2|].
- Upper = C2->abs() + 1;
- Lower = (-Upper) + 1;
- }
- } else if (match(LHS, m_SDiv(m_Value(), m_APInt(C2)))) {
- APInt IntMin = APInt::getSignedMinValue(Width);
- APInt IntMax = APInt::getSignedMaxValue(Width);
- if (C2->isAllOnesValue()) {
- // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
- // where C2 != -1 and C2 != 0 and C2 != 1
- Lower = IntMin + 1;
- Upper = IntMax + 1;
- } else if (C2->countLeadingZeros() < Width - 1) {
- // 'sdiv x, C2' produces [INT_MIN / C2, INT_MAX / C2]
- // where C2 != -1 and C2 != 0 and C2 != 1
- Lower = IntMin.sdiv(*C2);
- Upper = IntMax.sdiv(*C2);
- if (Lower.sgt(Upper))
- std::swap(Lower, Upper);
- Upper = Upper + 1;
- assert(Upper != Lower && "Upper part of range has wrapped!");
- }
- } else if (match(LHS, m_NUWShl(m_APInt(C2), m_Value()))) {
- // 'shl nuw C2, x' produces [C2, C2 << CLZ(C2)]
- Lower = *C2;
- Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
- } else if (match(LHS, m_NSWShl(m_APInt(C2), m_Value()))) {
- if (C2->isNegative()) {
- // 'shl nsw C2, x' produces [C2 << CLO(C2)-1, C2]
- unsigned ShiftAmount = C2->countLeadingOnes() - 1;
- Lower = C2->shl(ShiftAmount);
- Upper = *C2 + 1;
- } else {
- // 'shl nsw C2, x' produces [C2, C2 << CLZ(C2)-1]
- unsigned ShiftAmount = C2->countLeadingZeros() - 1;
- Lower = *C2;
- Upper = C2->shl(ShiftAmount) + 1;
- }
- } else if (match(LHS, m_LShr(m_Value(), m_APInt(C2)))) {
- // 'lshr x, C2' produces [0, UINT_MAX >> C2].
- APInt NegOne = APInt::getAllOnesValue(Width);
- if (C2->ult(Width))
- Upper = NegOne.lshr(*C2) + 1;
- } else if (match(LHS, m_LShr(m_APInt(C2), m_Value()))) {
- // 'lshr C2, x' produces [C2 >> (Width-1), C2].
- unsigned ShiftAmount = Width - 1;
- if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
- ShiftAmount = C2->countTrailingZeros();
- Lower = C2->lshr(ShiftAmount);
- Upper = *C2 + 1;
- } else if (match(LHS, m_AShr(m_Value(), m_APInt(C2)))) {
- // 'ashr x, C2' produces [INT_MIN >> C2, INT_MAX >> C2].
- APInt IntMin = APInt::getSignedMinValue(Width);
- APInt IntMax = APInt::getSignedMaxValue(Width);
- if (C2->ult(Width)) {
- Lower = IntMin.ashr(*C2);
- Upper = IntMax.ashr(*C2) + 1;
- }
- } else if (match(LHS, m_AShr(m_APInt(C2), m_Value()))) {
- unsigned ShiftAmount = Width - 1;
- if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
- ShiftAmount = C2->countTrailingZeros();
- if (C2->isNegative()) {
- // 'ashr C2, x' produces [C2, C2 >> (Width-1)]
- Lower = *C2;
- Upper = C2->ashr(ShiftAmount) + 1;
- } else {
- // 'ashr C2, x' produces [C2 >> (Width-1), C2]
- Lower = C2->ashr(ShiftAmount);
- Upper = *C2 + 1;
- }
- } else if (match(LHS, m_Or(m_Value(), m_APInt(C2)))) {
- // 'or x, C2' produces [C2, UINT_MAX].
- Lower = *C2;
- } else if (match(LHS, m_And(m_Value(), m_APInt(C2)))) {
- // 'and x, C2' produces [0, C2].
- Upper = *C2 + 1;
- } else if (match(LHS, m_NUWAdd(m_Value(), m_APInt(C2)))) {
- // 'add nuw x, C2' produces [C2, UINT_MAX].
- Lower = *C2;
- }
+ if (auto *BO = dyn_cast<BinaryOperator>(LHS))
+ setLimitsForBinOp(*BO, Lower, Upper);
ConstantRange LHS_CR =
Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
@@ -3064,8 +3107,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// If both operands have range metadata, use the metadata
// to simplify the comparison.
if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
- auto RHS_Instr = dyn_cast<Instruction>(RHS);
- auto LHS_Instr = dyn_cast<Instruction>(LHS);
+ auto RHS_Instr = cast<Instruction>(RHS);
+ auto LHS_Instr = cast<Instruction>(LHS);
if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
LHS_Instr->getMetadata(LLVMContext::MD_range)) {
@@ -4039,6 +4082,62 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
RecursionLimit);
}
+static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
+ Type *RetTy, const Query &Q,
+ unsigned MaxRecurse) {
+ Type *InVecTy = Op0->getType();
+ unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+ unsigned InVecNumElts = InVecTy->getVectorNumElements();
+
+ auto *Op0Const = dyn_cast<Constant>(Op0);
+ auto *Op1Const = dyn_cast<Constant>(Op1);
+
+ // If all operands are constant, constant fold the shuffle.
+ if (Op0Const && Op1Const)
+ return ConstantFoldShuffleVectorInstruction(Op0Const, Op1Const, Mask);
+
+ // If only one of the operands is constant, constant fold the shuffle if the
+ // mask does not select elements from the variable operand.
+ bool MaskSelects0 = false, MaskSelects1 = false;
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = ShuffleVectorInst::getMaskValue(Mask, i);
+ if (Idx == -1)
+ continue;
+ if ((unsigned)Idx < InVecNumElts)
+ MaskSelects0 = true;
+ else
+ MaskSelects1 = true;
+ }
+ if (!MaskSelects0 && Op1Const)
+ return ConstantFoldShuffleVectorInstruction(UndefValue::get(InVecTy),
+ Op1Const, Mask);
+ if (!MaskSelects1 && Op0Const)
+ return ConstantFoldShuffleVectorInstruction(Op0Const,
+ UndefValue::get(InVecTy), Mask);
+
+ // A shuffle of a splat is always the splat itself. Legal if the shuffle's
+ // value type is same as the input vectors' type.
+ if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
+ if (!MaskSelects1 && RetTy == InVecTy &&
+ OpShuf->getMask()->getSplatValue())
+ return Op0;
+ if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op1))
+ if (!MaskSelects0 && RetTy == InVecTy &&
+ OpShuf->getMask()->getSplatValue())
+ return Op1;
+
+ return nullptr;
+}
+
+/// Given operands for a ShuffleVectorInst, fold the result or return null.
+Value *llvm::SimplifyShuffleVectorInst(
+ Value *Op0, Value *Op1, Constant *Mask, Type *RetTy,
+ const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT,
+ AssumptionCache *AC, const Instruction *CxtI) {
+ return ::SimplifyShuffleVectorInst(
+ Op0, Op1, Mask, RetTy, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
+}
+
//=== Helper functions for higher up the class hierarchy.
/// Given operands for a BinaryOperator, see if we can fold the result.
@@ -4047,61 +4146,43 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::Add:
- return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- Q, MaxRecurse);
+ return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::FAdd:
return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
-
case Instruction::Sub:
- return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- Q, MaxRecurse);
+ return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::FSub:
return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
-
- case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+ case Instruction::Mul:
+ return SimplifyMulInst(LHS, RHS, Q, MaxRecurse);
case Instruction::FMul:
- return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse);
- case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+ return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ case Instruction::SDiv:
+ return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::UDiv:
+ return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::FDiv:
- return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
- case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+ return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ case Instruction::SRem:
+ return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::URem:
+ return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::FRem:
- return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
+ return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::Shl:
- return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
- Q, MaxRecurse);
+ return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::LShr:
- return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+ return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse);
case Instruction::AShr:
- return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
- case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
- case Instruction::Or: return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
- case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
+ return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse);
+ case Instruction::And:
+ return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::Or:
+ return SimplifyOrInst(LHS, RHS, Q, MaxRecurse);
+ case Instruction::Xor:
+ return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
default:
- if (Constant *CLHS = dyn_cast<Constant>(LHS))
- if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
-
- // If the operation is associative, try some generic simplifications.
- if (Instruction::isAssociative(Opcode))
- if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
- return V;
-
- // If the operation is with the result of a select instruction check whether
- // operating on either branch of the select always yields the same value.
- if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
- if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
- return V;
-
- // If the operation is with the result of a phi instruction, check whether
- // operating on all incoming values of the phi always yields the same value.
- if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
- if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
- return V;
-
- return nullptr;
+ llvm_unreachable("Unexpected opcode");
}
}
@@ -4267,6 +4348,7 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
case Intrinsic::fabs: {
if (SignBitMustBeZero(*ArgBegin, Q.TLI))
return *ArgBegin;
+ return nullptr;
}
default:
return nullptr;
@@ -4396,7 +4478,8 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
/// If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI,
- const DominatorTree *DT, AssumptionCache *AC) {
+ const DominatorTree *DT, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE) {
Value *Result;
switch (I->getOpcode()) {
@@ -4522,6 +4605,13 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I);
break;
}
+ case Instruction::ShuffleVector: {
+ auto *SVI = cast<ShuffleVectorInst>(I);
+ Result = SimplifyShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
+ SVI->getMask(), SVI->getType(), DL, TLI,
+ DT, AC, I);
+ break;
+ }
case Instruction::PHI:
Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
break;
@@ -4537,6 +4627,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(),
DL, TLI, DT, AC, I);
break;
+ case Instruction::Alloca:
+ // No simplifications for Alloca and it can't be constant folded.
+ Result = nullptr;
+ break;
}
// In general, it is possible for computeKnownBits to determine all bits in a
@@ -4545,7 +4639,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
unsigned BitWidth = I->getType()->getScalarSizeInBits();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
+ computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT, ORE);
if ((KnownZero | KnownOne).isAllOnesValue())
Result = ConstantInt::get(I->getType(), KnownOne);
}
diff --git a/lib/Analysis/IteratedDominanceFrontier.cpp b/lib/Analysis/IteratedDominanceFrontier.cpp
index d1374acd963e..2a736ec0379c 100644
--- a/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -64,10 +64,7 @@ void IDFCalculator<NodeTy>::calculate(
BasicBlock *BB = Node->getBlock();
// Succ is the successor in the direction we are calculating IDF, so it is
// successor for IDF, and predecessor for Reverse IDF.
- for (auto SuccIter = GraphTraits<NodeTy>::child_begin(BB),
- End = GraphTraits<NodeTy>::child_end(BB);
- SuccIter != End; ++SuccIter) {
- BasicBlock *Succ = *SuccIter;
+ for (auto *Succ : children<NodeTy>(BB)) {
DomTreeNode *SuccNode = DT.getNode(Succ);
// Quickly skip all CFG edges that are also dominator tree edges instead
diff --git a/lib/Analysis/LazyBlockFrequencyInfo.cpp b/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 596b6fc1afb5..a8178ecc0a24 100644
--- a/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -9,7 +9,7 @@
//
// This is an alternative analysis pass to BlockFrequencyInfoWrapperPass. The
// difference is that with this pass the block frequencies are not computed when
-// the analysis pass is executed but rather when the BFI results is explicitly
+// the analysis pass is executed but rather when the BFI result is explicitly
// requested by the analysis client.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index f7cf8c6729f2..eef56815f2e0 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -18,26 +18,50 @@
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "lcg"
+void LazyCallGraph::EdgeSequence::insertEdgeInternal(Node &TargetN,
+ Edge::Kind EK) {
+ EdgeIndexMap.insert({&TargetN, Edges.size()});
+ Edges.emplace_back(TargetN, EK);
+}
+
+void LazyCallGraph::EdgeSequence::setEdgeKind(Node &TargetN, Edge::Kind EK) {
+ Edges[EdgeIndexMap.find(&TargetN)->second].setKind(EK);
+}
+
+bool LazyCallGraph::EdgeSequence::removeEdgeInternal(Node &TargetN) {
+ auto IndexMapI = EdgeIndexMap.find(&TargetN);
+ if (IndexMapI == EdgeIndexMap.end())
+ return false;
+
+ Edges[IndexMapI->second] = Edge();
+ EdgeIndexMap.erase(IndexMapI);
+ return true;
+}
+
static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges,
- DenseMap<Function *, int> &EdgeIndexMap, Function &F,
- LazyCallGraph::Edge::Kind EK) {
- if (!EdgeIndexMap.insert({&F, Edges.size()}).second)
+ DenseMap<LazyCallGraph::Node *, int> &EdgeIndexMap,
+ LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK) {
+ if (!EdgeIndexMap.insert({&N, Edges.size()}).second)
return;
- DEBUG(dbgs() << " Added callable function: " << F.getName() << "\n");
- Edges.emplace_back(LazyCallGraph::Edge(F, EK));
+ DEBUG(dbgs() << " Added callable function: " << N.getName() << "\n");
+ Edges.emplace_back(LazyCallGraph::Edge(N, EK));
}
-LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
- : G(&G), F(F), DFSNumber(0), LowLink(0) {
- DEBUG(dbgs() << " Adding functions called by '" << F.getName()
+LazyCallGraph::EdgeSequence &LazyCallGraph::Node::populateSlow() {
+ assert(!Edges && "Must not have already populated the edges for this node!");
+
+ DEBUG(dbgs() << " Adding functions called by '" << getName()
<< "' to the graph.\n");
+ Edges = EdgeSequence();
+
SmallVector<Constant *, 16> Worklist;
SmallPtrSet<Function *, 4> Callees;
SmallPtrSet<Constant *, 16> Visited;
@@ -58,14 +82,15 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
// alias. Then a test of the address of the weak function against the new
// strong definition's address would be an effective way to determine the
// safety of optimizing a direct call edge.
- for (BasicBlock &BB : F)
+ for (BasicBlock &BB : *F)
for (Instruction &I : BB) {
if (auto CS = CallSite(&I))
if (Function *Callee = CS.getCalledFunction())
if (!Callee->isDeclaration())
if (Callees.insert(Callee).second) {
Visited.insert(Callee);
- addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
+ addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(*Callee),
+ LazyCallGraph::Edge::Call);
}
for (Value *Op : I.operand_values())
@@ -78,50 +103,33 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
// function containing) operands to all of the instructions in the function.
// Process them (recursively) collecting every function found.
visitReferences(Worklist, Visited, [&](Function &F) {
- addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref);
+ addEdge(Edges->Edges, Edges->EdgeIndexMap, G->get(F),
+ LazyCallGraph::Edge::Ref);
});
-}
-
-void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) {
- if (Node *N = G->lookup(Target))
- return insertEdgeInternal(*N, EK);
-
- EdgeIndexMap.insert({&Target, Edges.size()});
- Edges.emplace_back(Target, EK);
-}
-void LazyCallGraph::Node::insertEdgeInternal(Node &TargetN, Edge::Kind EK) {
- EdgeIndexMap.insert({&TargetN.getFunction(), Edges.size()});
- Edges.emplace_back(TargetN, EK);
+ return *Edges;
}
-void LazyCallGraph::Node::setEdgeKind(Function &TargetF, Edge::Kind EK) {
- Edges[EdgeIndexMap.find(&TargetF)->second].setKind(EK);
+void LazyCallGraph::Node::replaceFunction(Function &NewF) {
+ assert(F != &NewF && "Must not replace a function with itself!");
+ F = &NewF;
}
-void LazyCallGraph::Node::removeEdgeInternal(Function &Target) {
- auto IndexMapI = EdgeIndexMap.find(&Target);
- assert(IndexMapI != EdgeIndexMap.end() &&
- "Target not in the edge set for this caller?");
-
- Edges[IndexMapI->second] = Edge();
- EdgeIndexMap.erase(IndexMapI);
-}
-
-void LazyCallGraph::Node::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LazyCallGraph::Node::dump() const {
dbgs() << *this << '\n';
}
+#endif
-LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
+LazyCallGraph::LazyCallGraph(Module &M) {
DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
<< "\n");
for (Function &F : M)
- if (!F.isDeclaration() && !F.hasLocalLinkage())
- if (EntryIndexMap.insert({&F, EntryEdges.size()}).second) {
- DEBUG(dbgs() << " Adding '" << F.getName()
- << "' to entry set of the graph.\n");
- EntryEdges.emplace_back(F, Edge::Ref);
- }
+ if (!F.isDeclaration() && !F.hasLocalLinkage()) {
+ DEBUG(dbgs() << " Adding '" << F.getName()
+ << "' to entry set of the graph.\n");
+ addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F), Edge::Ref);
+ }
// Now add entry nodes for functions reachable via initializers to globals.
SmallVector<Constant *, 16> Worklist;
@@ -134,21 +142,15 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
DEBUG(dbgs() << " Adding functions referenced by global initializers to the "
"entry set.\n");
visitReferences(Worklist, Visited, [&](Function &F) {
- addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref);
+ addEdge(EntryEdges.Edges, EntryEdges.EdgeIndexMap, get(F),
+ LazyCallGraph::Edge::Ref);
});
-
- for (const Edge &E : EntryEdges)
- RefSCCEntryNodes.push_back(&E.getFunction());
}
LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
: BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)),
- EntryEdges(std::move(G.EntryEdges)),
- EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)),
- SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)),
- DFSStack(std::move(G.DFSStack)),
- RefSCCEntryNodes(std::move(G.RefSCCEntryNodes)),
- NextDFSNumber(G.NextDFSNumber) {
+ EntryEdges(std::move(G.EntryEdges)), SCCBPA(std::move(G.SCCBPA)),
+ SCCMap(std::move(G.SCCMap)), LeafRefSCCs(std::move(G.LeafRefSCCs)) {
updateGraphPtrs();
}
@@ -156,20 +158,18 @@ LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
BPA = std::move(G.BPA);
NodeMap = std::move(G.NodeMap);
EntryEdges = std::move(G.EntryEdges);
- EntryIndexMap = std::move(G.EntryIndexMap);
SCCBPA = std::move(G.SCCBPA);
SCCMap = std::move(G.SCCMap);
LeafRefSCCs = std::move(G.LeafRefSCCs);
- DFSStack = std::move(G.DFSStack);
- RefSCCEntryNodes = std::move(G.RefSCCEntryNodes);
- NextDFSNumber = G.NextDFSNumber;
updateGraphPtrs();
return *this;
}
-void LazyCallGraph::SCC::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LazyCallGraph::SCC::dump() const {
dbgs() << *this << '\n';
}
+#endif
#ifndef NDEBUG
void LazyCallGraph::SCC::verify() {
@@ -184,8 +184,8 @@ void LazyCallGraph::SCC::verify() {
"Must set DFS numbers to -1 when adding a node to an SCC!");
assert(N->LowLink == -1 &&
"Must set low link to -1 when adding a node to an SCC!");
- for (Edge &E : *N)
- assert(E.getNode() && "Can't have an edge to a raw function!");
+ for (Edge &E : **N)
+ assert(E.getNode() && "Can't have an unpopulated node!");
}
}
#endif
@@ -195,10 +195,9 @@ bool LazyCallGraph::SCC::isParentOf(const SCC &C) const {
return false;
for (Node &N : *this)
- for (Edge &E : N.calls())
- if (Node *CalleeN = E.getNode())
- if (OuterRefSCC->G->lookupSCC(*CalleeN) == &C)
- return true;
+ for (Edge &E : N->calls())
+ if (OuterRefSCC->G->lookupSCC(E.getNode()) == &C)
+ return true;
// No edges found.
return false;
@@ -218,11 +217,8 @@ bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const {
do {
const SCC &C = *Worklist.pop_back_val();
for (Node &N : C)
- for (Edge &E : N.calls()) {
- Node *CalleeN = E.getNode();
- if (!CalleeN)
- continue;
- SCC *CalleeC = G.lookupSCC(*CalleeN);
+ for (Edge &E : N->calls()) {
+ SCC *CalleeC = G.lookupSCC(E.getNode());
if (!CalleeC)
continue;
@@ -243,9 +239,11 @@ bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const {
LazyCallGraph::RefSCC::RefSCC(LazyCallGraph &G) : G(&G) {}
-void LazyCallGraph::RefSCC::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LazyCallGraph::RefSCC::dump() const {
dbgs() << *this << '\n';
}
+#endif
#ifndef NDEBUG
void LazyCallGraph::RefSCC::verify() {
@@ -279,10 +277,10 @@ void LazyCallGraph::RefSCC::verify() {
for (int i = 0, Size = SCCs.size(); i < Size; ++i) {
SCC &SourceSCC = *SCCs[i];
for (Node &N : SourceSCC)
- for (Edge &E : N) {
+ for (Edge &E : *N) {
if (!E.isCall())
continue;
- SCC &TargetSCC = *G->lookupSCC(*E.getNode());
+ SCC &TargetSCC = *G->lookupSCC(E.getNode());
if (&TargetSCC.getOuterRefSCC() == this) {
assert(SCCIndices.find(&TargetSCC)->second <= i &&
"Edge between SCCs violates post-order relationship.");
@@ -299,8 +297,8 @@ void LazyCallGraph::RefSCC::verify() {
auto HasConnectingEdge = [&] {
for (SCC &C : *ParentRC)
for (Node &N : C)
- for (Edge &E : N)
- if (G->lookupRefSCC(*E.getNode()) == this)
+ for (Edge &E : *N)
+ if (G->lookupRefSCC(E.getNode()) == this)
return true;
return false;
};
@@ -461,7 +459,7 @@ updatePostorderSequenceForEdgeInsertion(
SmallVector<LazyCallGraph::SCC *, 1>
LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
- assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
+ assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!");
SmallVector<SCC *, 1> DeletedSCCs;
#ifndef NDEBUG
@@ -477,7 +475,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
// If the two nodes are already part of the same SCC, we're also done as
// we've just added more connectivity.
if (&SourceSCC == &TargetSCC) {
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+ SourceN->setEdgeKind(TargetN, Edge::Call);
return DeletedSCCs;
}
@@ -490,7 +488,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
int SourceIdx = SCCIndices[&SourceSCC];
int TargetIdx = SCCIndices[&TargetSCC];
if (TargetIdx < SourceIdx) {
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+ SourceN->setEdgeKind(TargetN, Edge::Call);
return DeletedSCCs;
}
@@ -504,11 +502,9 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
ConnectedSet.insert(&SourceSCC);
auto IsConnected = [&](SCC &C) {
for (Node &N : C)
- for (Edge &E : N.calls()) {
- assert(E.getNode() && "Must have formed a node within an SCC!");
- if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
+ for (Edge &E : N->calls())
+ if (ConnectedSet.count(G->lookupSCC(E.getNode())))
return true;
- }
return false;
};
@@ -535,11 +531,10 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
do {
SCC &C = *Worklist.pop_back_val();
for (Node &N : C)
- for (Edge &E : N) {
- assert(E.getNode() && "Must have formed a node within an SCC!");
+ for (Edge &E : *N) {
if (!E.isCall())
continue;
- SCC &EdgeC = *G->lookupSCC(*E.getNode());
+ SCC &EdgeC = *G->lookupSCC(E.getNode());
if (&EdgeC.getOuterRefSCC() != this)
// Not in this RefSCC...
continue;
@@ -565,7 +560,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
// new cycles. We're done.
if (MergeRange.begin() == MergeRange.end()) {
// Now that the SCC structure is finalized, flip the kind to call.
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+ SourceN->setEdgeKind(TargetN, Edge::Call);
return DeletedSCCs;
}
@@ -600,7 +595,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
SCCIndices[C] -= IndexOffset;
// Now that the SCC structure is finalized, flip the kind to call.
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+ SourceN->setEdgeKind(TargetN, Edge::Call);
// And we're done!
return DeletedSCCs;
@@ -608,7 +603,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
Node &TargetN) {
- assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
+ assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!");
#ifndef NDEBUG
// In a debug build, verify the RefSCC is valid to start with and when this
@@ -625,12 +620,12 @@ void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
"Source and Target must be in separate SCCs for this to be trivial!");
// Set the edge kind.
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+ SourceN->setEdgeKind(TargetN, Edge::Ref);
}
iterator_range<LazyCallGraph::RefSCC::iterator>
LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
- assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
+ assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!");
#ifndef NDEBUG
// In a debug build, verify the RefSCC is valid to start with and when this
@@ -650,7 +645,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
"full CG update.");
// Set the edge kind.
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+ SourceN->setEdgeKind(TargetN, Edge::Ref);
// Otherwise we are removing a call edge from a single SCC. This may break
// the cycle. In order to compute the new set of SCCs, we need to do a small
@@ -665,7 +660,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
// etc.
SCC &OldSCC = TargetSCC;
- SmallVector<std::pair<Node *, call_edge_iterator>, 16> DFSStack;
+ SmallVector<std::pair<Node *, EdgeSequence::call_iterator>, 16> DFSStack;
SmallVector<Node *, 16> PendingSCCStack;
SmallVector<SCC *, 4> NewSCCs;
@@ -706,14 +701,14 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
RootN->DFSNumber = RootN->LowLink = 1;
int NextDFSNumber = 2;
- DFSStack.push_back({RootN, RootN->call_begin()});
+ DFSStack.push_back({RootN, (*RootN)->call_begin()});
do {
Node *N;
- call_edge_iterator I;
+ EdgeSequence::call_iterator I;
std::tie(N, I) = DFSStack.pop_back_val();
- auto E = N->call_end();
+ auto E = (*N)->call_end();
while (I != E) {
- Node &ChildN = *I->getNode();
+ Node &ChildN = I->getNode();
if (ChildN.DFSNumber == 0) {
// We haven't yet visited this child, so descend, pushing the current
// node onto the stack.
@@ -723,8 +718,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
"Found a node with 0 DFS number but already in an SCC!");
ChildN.DFSNumber = ChildN.LowLink = NextDFSNumber++;
N = &ChildN;
- I = N->call_begin();
- E = N->call_end();
+ I = (*N)->call_begin();
+ E = (*N)->call_end();
continue;
}
@@ -817,17 +812,19 @@ LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN,
Node &TargetN) {
- assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
+ assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!");
assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
assert(G->lookupRefSCC(TargetN) != this &&
"Target must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) &&
"Target must be a descendant of the Source.");
+#endif
// Edges between RefSCCs are the same regardless of call or ref, so we can
// just flip the edge here.
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+ SourceN->setEdgeKind(TargetN, Edge::Call);
#ifndef NDEBUG
// Check that the RefSCC is still valid.
@@ -837,17 +834,19 @@ void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN,
void LazyCallGraph::RefSCC::switchOutgoingEdgeToRef(Node &SourceN,
Node &TargetN) {
- assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
+ assert((*SourceN)[TargetN].isCall() && "Must start with a call edge!");
assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
assert(G->lookupRefSCC(TargetN) != this &&
"Target must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
assert(G->lookupRefSCC(TargetN)->isDescendantOf(*this) &&
"Target must be a descendant of the Source.");
+#endif
// Edges between RefSCCs are the same regardless of call or ref, so we can
// just flip the edge here.
- SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+ SourceN->setEdgeKind(TargetN, Edge::Ref);
#ifndef NDEBUG
// Check that the RefSCC is still valid.
@@ -860,7 +859,7 @@ void LazyCallGraph::RefSCC::insertInternalRefEdge(Node &SourceN,
assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
- SourceN.insertEdgeInternal(TargetN, Edge::Ref);
+ SourceN->insertEdgeInternal(TargetN, Edge::Ref);
#ifndef NDEBUG
// Check that the RefSCC is still valid.
@@ -871,14 +870,16 @@ void LazyCallGraph::RefSCC::insertInternalRefEdge(Node &SourceN,
void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN,
Edge::Kind EK) {
// First insert it into the caller.
- SourceN.insertEdgeInternal(TargetN, EK);
+ SourceN->insertEdgeInternal(TargetN, EK);
assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
RefSCC &TargetC = *G->lookupRefSCC(TargetN);
assert(&TargetC != this && "Target must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
assert(TargetC.isDescendantOf(*this) &&
"Target must be a descendant of the Source.");
+#endif
// The only change required is to add this SCC to the parent set of the
// callee.
@@ -895,8 +896,10 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
RefSCC &SourceC = *G->lookupRefSCC(SourceN);
assert(&SourceC != this && "Source must not be in this RefSCC.");
+#ifdef EXPENSIVE_CHECKS
assert(SourceC.isDescendantOf(*this) &&
"Source must be a descendant of the Target.");
+#endif
SmallVector<RefSCC *, 1> DeletedRefSCCs;
@@ -951,9 +954,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
RefSCC &RC = *Worklist.pop_back_val();
for (SCC &C : RC)
for (Node &N : C)
- for (Edge &E : N) {
- assert(E.getNode() && "Must have formed a node!");
- RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode());
+ for (Edge &E : *N) {
+ RefSCC &EdgeRC = *G->lookupRefSCC(E.getNode());
if (G->getRefSCCIndex(EdgeRC) <= SourceIdx)
// Not in the postorder sequence between source and target.
continue;
@@ -1003,10 +1005,8 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
SCCIndices[&InnerC] = SCCIndex++;
for (Node &N : InnerC) {
G->SCCMap[&N] = &InnerC;
- for (Edge &E : N) {
- assert(E.getNode() &&
- "Cannot have a null node within a visited SCC!");
- RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
+ for (Edge &E : *N) {
+ RefSCC &ChildRC = *G->lookupRefSCC(E.getNode());
if (MergeSet.count(&ChildRC))
continue;
ChildRC.Parents.erase(RC);
@@ -1042,7 +1042,7 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
// At this point we have a merged RefSCC with a post-order SCCs list, just
// connect the nodes to form the new edge.
- SourceN.insertEdgeInternal(TargetN, Edge::Ref);
+ SourceN->insertEdgeInternal(TargetN, Edge::Ref);
// We return the list of SCCs which were merged so that callers can
// invalidate any data they have associated with those SCCs. Note that these
@@ -1069,15 +1069,16 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
#endif
// First remove it from the node.
- SourceN.removeEdgeInternal(TargetN.getFunction());
+ bool Removed = SourceN->removeEdgeInternal(TargetN);
+ (void)Removed;
+ assert(Removed && "Target not in the edge set for this caller?");
bool HasOtherEdgeToChildRC = false;
bool HasOtherChildRC = false;
for (SCC *InnerC : SCCs) {
for (Node &N : *InnerC) {
- for (Edge &E : N) {
- assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
- RefSCC &OtherChildRC = *G->lookupRefSCC(*E.getNode());
+ for (Edge &E : *N) {
+ RefSCC &OtherChildRC = *G->lookupRefSCC(E.getNode());
if (&OtherChildRC == &TargetRC) {
HasOtherEdgeToChildRC = true;
break;
@@ -1116,7 +1117,7 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
SmallVector<LazyCallGraph::RefSCC *, 1>
LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
- assert(!SourceN[TargetN].isCall() &&
+ assert(!(*SourceN)[TargetN].isCall() &&
"Cannot remove a call edge, it must first be made a ref edge");
#ifndef NDEBUG
@@ -1127,7 +1128,9 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
#endif
// First remove the actual edge.
- SourceN.removeEdgeInternal(TargetN.getFunction());
+ bool Removed = SourceN->removeEdgeInternal(TargetN);
+ (void)Removed;
+ assert(Removed && "Target not in the edge set for this caller?");
// We return a list of the resulting *new* RefSCCs in post-order.
SmallVector<RefSCC *, 1> Result;
@@ -1186,7 +1189,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
PostOrderMapping[&N] = Number;
};
- SmallVector<std::pair<Node *, edge_iterator>, 4> DFSStack;
+ SmallVector<std::pair<Node *, EdgeSequence::iterator>, 4> DFSStack;
SmallVector<Node *, 4> PendingRefSCCStack;
do {
assert(DFSStack.empty() &&
@@ -1205,18 +1208,18 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
RootN->DFSNumber = RootN->LowLink = 1;
int NextDFSNumber = 2;
- DFSStack.push_back({RootN, RootN->begin()});
+ DFSStack.push_back({RootN, (*RootN)->begin()});
do {
Node *N;
- edge_iterator I;
+ EdgeSequence::iterator I;
std::tie(N, I) = DFSStack.pop_back_val();
- auto E = N->end();
+ auto E = (*N)->end();
assert(N->DFSNumber != 0 && "We should always assign a DFS number "
"before processing a node.");
while (I != E) {
- Node &ChildN = I->getNode(*G);
+ Node &ChildN = I->getNode();
if (ChildN.DFSNumber == 0) {
// Mark that we should start at this child when next this node is the
// top of the stack. We don't start at the next child to ensure this
@@ -1226,8 +1229,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
// Continue, resetting to the child node.
ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
N = &ChildN;
- I = ChildN.begin();
- E = ChildN.end();
+ I = ChildN->begin();
+ E = ChildN->end();
continue;
}
if (ChildN.DFSNumber == -1) {
@@ -1382,9 +1385,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
#endif
for (SCC *C : SCCs)
for (Node &N : *C) {
- for (Edge &E : N) {
- assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
- RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
+ for (Edge &E : *N) {
+ RefSCC &ChildRC = *G->lookupRefSCC(E.getNode());
if (&ChildRC == this)
continue;
ChildRC.Parents.insert(this);
@@ -1408,9 +1410,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
for (RefSCC *ParentRC : OldParents)
for (SCC &ParentC : *ParentRC)
for (Node &ParentN : ParentC)
- for (Edge &E : ParentN) {
- assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
- RefSCC &RC = *G->lookupRefSCC(*E.getNode());
+ for (Edge &E : *ParentN) {
+ RefSCC &RC = *G->lookupRefSCC(E.getNode());
if (&RC != ParentRC)
RC.Parents.insert(ParentRC);
}
@@ -1448,8 +1449,10 @@ void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN,
return;
}
+#ifdef EXPENSIVE_CHECKS
assert(TargetRC.isDescendantOf(*this) &&
"Target must be a descendant of the Source.");
+#endif
// The only change required is to add this RefSCC to the parent set of the
// target. This is a set and so idempotent if the edge already existed.
TargetRC.Parents.insert(this);
@@ -1461,25 +1464,29 @@ void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
// Check that the RefSCC is still valid when we finish.
auto ExitVerifier = make_scope_exit([this] { verify(); });
- // Check that we aren't breaking some invariants of the SCC graph.
+#ifdef EXPENSIVE_CHECKS
+ // Check that we aren't breaking some invariants of the SCC graph. Note that
+ // this is quadratic in the number of edges in the call graph!
SCC &SourceC = *G->lookupSCC(SourceN);
SCC &TargetC = *G->lookupSCC(TargetN);
if (&SourceC != &TargetC)
assert(SourceC.isAncestorOf(TargetC) &&
"Call edge is not trivial in the SCC graph!");
-#endif
+#endif // EXPENSIVE_CHECKS
+#endif // NDEBUG
+
// First insert it into the source or find the existing edge.
- auto InsertResult = SourceN.EdgeIndexMap.insert(
- {&TargetN.getFunction(), SourceN.Edges.size()});
+ auto InsertResult =
+ SourceN->EdgeIndexMap.insert({&TargetN, SourceN->Edges.size()});
if (!InsertResult.second) {
// Already an edge, just update it.
- Edge &E = SourceN.Edges[InsertResult.first->second];
+ Edge &E = SourceN->Edges[InsertResult.first->second];
if (E.isCall())
return; // Nothing to do!
E.setKind(Edge::Call);
} else {
// Create the new edge.
- SourceN.Edges.emplace_back(TargetN, Edge::Call);
+ SourceN->Edges.emplace_back(TargetN, Edge::Call);
}
// Now that we have the edge, handle the graph fallout.
@@ -1491,39 +1498,75 @@ void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) {
// Check that the RefSCC is still valid when we finish.
auto ExitVerifier = make_scope_exit([this] { verify(); });
+#ifdef EXPENSIVE_CHECKS
// Check that we aren't breaking some invariants of the RefSCC graph.
RefSCC &SourceRC = *G->lookupRefSCC(SourceN);
RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
if (&SourceRC != &TargetRC)
assert(SourceRC.isAncestorOf(TargetRC) &&
"Ref edge is not trivial in the RefSCC graph!");
-#endif
+#endif // EXPENSIVE_CHECKS
+#endif // NDEBUG
+
// First insert it into the source or find the existing edge.
- auto InsertResult = SourceN.EdgeIndexMap.insert(
- {&TargetN.getFunction(), SourceN.Edges.size()});
+ auto InsertResult =
+ SourceN->EdgeIndexMap.insert({&TargetN, SourceN->Edges.size()});
if (!InsertResult.second)
// Already an edge, we're done.
return;
// Create the new edge.
- SourceN.Edges.emplace_back(TargetN, Edge::Ref);
+ SourceN->Edges.emplace_back(TargetN, Edge::Ref);
// Now that we have the edge, handle the graph fallout.
handleTrivialEdgeInsertion(SourceN, TargetN);
}
-void LazyCallGraph::insertEdge(Node &SourceN, Function &Target, Edge::Kind EK) {
- assert(SCCMap.empty() && DFSStack.empty() &&
+void LazyCallGraph::RefSCC::replaceNodeFunction(Node &N, Function &NewF) {
+ Function &OldF = N.getFunction();
+
+#ifndef NDEBUG
+ // Check that the RefSCC is still valid when we finish.
+ auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+ assert(G->lookupRefSCC(N) == this &&
+ "Cannot replace the function of a node outside this RefSCC.");
+
+ assert(G->NodeMap.find(&NewF) == G->NodeMap.end() &&
+ "Must not have already walked the new function!'");
+
+ // It is important that this replacement not introduce graph changes so we
+ // insist that the caller has already removed every use of the original
+ // function and that all uses of the new function correspond to existing
+ // edges in the graph. The common and expected way to use this is when
+ // replacing the function itself in the IR without changing the call graph
+ // shape and just updating the analysis based on that.
+ assert(&OldF != &NewF && "Cannot replace a function with itself!");
+ assert(OldF.use_empty() &&
+ "Must have moved all uses from the old function to the new!");
+#endif
+
+ N.replaceFunction(NewF);
+
+ // Update various call graph maps.
+ G->NodeMap.erase(&OldF);
+ G->NodeMap[&NewF] = &N;
+}
+
+void LazyCallGraph::insertEdge(Node &SourceN, Node &TargetN, Edge::Kind EK) {
+ assert(SCCMap.empty() &&
"This method cannot be called after SCCs have been formed!");
- return SourceN.insertEdgeInternal(Target, EK);
+ return SourceN->insertEdgeInternal(TargetN, EK);
}
-void LazyCallGraph::removeEdge(Node &SourceN, Function &Target) {
- assert(SCCMap.empty() && DFSStack.empty() &&
+void LazyCallGraph::removeEdge(Node &SourceN, Node &TargetN) {
+ assert(SCCMap.empty() &&
"This method cannot be called after SCCs have been formed!");
- return SourceN.removeEdgeInternal(Target);
+ bool Removed = SourceN->removeEdgeInternal(TargetN);
+ (void)Removed;
+ assert(Removed && "Target not in the edge set for this caller?");
}
void LazyCallGraph::removeDeadFunction(Function &F) {
@@ -1532,19 +1575,6 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
assert(F.use_empty() &&
"This routine should only be called on trivially dead functions!");
- auto EII = EntryIndexMap.find(&F);
- if (EII != EntryIndexMap.end()) {
- EntryEdges[EII->second] = Edge();
- EntryIndexMap.erase(EII);
- }
-
- // It's safe to just remove un-visited functions from the RefSCC entry list.
- // FIXME: This is a linear operation which could become hot and benefit from
- // an index map.
- auto RENI = find(RefSCCEntryNodes, &F);
- if (RENI != RefSCCEntryNodes.end())
- RefSCCEntryNodes.erase(RENI);
-
auto NI = NodeMap.find(&F);
if (NI == NodeMap.end())
// Not in the graph at all!
@@ -1553,22 +1583,16 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
Node &N = *NI->second;
NodeMap.erase(NI);
- if (SCCMap.empty() && DFSStack.empty()) {
- // No SCC walk has begun, so removing this is fine and there is nothing
+ // Remove this from the entry edges if present.
+ EntryEdges.removeEdgeInternal(N);
+
+ if (SCCMap.empty()) {
+ // No SCCs have been formed, so removing this is fine and there is nothing
// else necessary at this point but clearing out the node.
N.clear();
return;
}
- // Check that we aren't going to break the DFS walk.
- assert(all_of(DFSStack,
- [&N](const std::pair<Node *, edge_iterator> &Element) {
- return Element.first != &N;
- }) &&
- "Tried to remove a function currently in the DFS stack!");
- assert(find(PendingRefSCCStack, &N) == PendingRefSCCStack.end() &&
- "Tried to remove a function currently pending to add to a RefSCC!");
-
// Cannot remove a function which has yet to be visited in the DFS walk, so
// if we have a node at all then we must have an SCC and RefSCC.
auto CI = SCCMap.find(&N);
@@ -1583,13 +1607,19 @@ void LazyCallGraph::removeDeadFunction(Function &F) {
// Validate these properties first.
assert(C.size() == 1 && "Dead functions must be in a singular SCC");
assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC");
- assert(RC.Parents.empty() && "Cannot have parents of a dead RefSCC!");
+
+ // Clean up any remaining reference edges. Note that we walk an unordered set
+ // here but are just removing and so the order doesn't matter.
+ for (RefSCC &ParentRC : RC.parents())
+ for (SCC &ParentC : ParentRC)
+ for (Node &ParentN : ParentC)
+ if (ParentN)
+ ParentN->removeEdgeInternal(N);
// Now remove this RefSCC from any parents sets and the leaf list.
- for (Edge &E : N)
- if (Node *TargetN = E.getNode())
- if (RefSCC *TargetRC = lookupRefSCC(*TargetN))
- TargetRC->Parents.erase(&RC);
+ for (Edge &E : *N)
+ if (RefSCC *TargetRC = lookupRefSCC(E.getNode()))
+ TargetRC->Parents.erase(&RC);
// FIXME: This is a linear operation which could become hot and benefit from
// an index map.
auto LRI = find(LeafRefSCCs, &RC);
@@ -1622,15 +1652,14 @@ void LazyCallGraph::updateGraphPtrs() {
{
SmallVector<Node *, 16> Worklist;
for (Edge &E : EntryEdges)
- if (Node *EntryN = E.getNode())
- Worklist.push_back(EntryN);
+ Worklist.push_back(&E.getNode());
while (!Worklist.empty()) {
- Node *N = Worklist.pop_back_val();
- N->G = this;
- for (Edge &E : N->Edges)
- if (Node *TargetN = E.getNode())
- Worklist.push_back(TargetN);
+ Node &N = *Worklist.pop_back_val();
+ N.G = this;
+ if (N)
+ for (Edge &E : *N)
+ Worklist.push_back(&E.getNode());
}
}
@@ -1647,34 +1676,18 @@ void LazyCallGraph::updateGraphPtrs() {
}
}
-/// Build the internal SCCs for a RefSCC from a sequence of nodes.
-///
-/// Appends the SCCs to the provided vector and updates the map with their
-/// indices. Both the vector and map must be empty when passed into this
-/// routine.
-void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
- assert(RC.SCCs.empty() && "Already built SCCs!");
- assert(RC.SCCIndices.empty() && "Already mapped SCC indices!");
-
- for (Node *N : Nodes) {
- assert(N->LowLink >= (*Nodes.begin())->LowLink &&
- "We cannot have a low link in an SCC lower than its root on the "
- "stack!");
+template <typename RootsT, typename GetBeginT, typename GetEndT,
+ typename GetNodeT, typename FormSCCCallbackT>
+void LazyCallGraph::buildGenericSCCs(RootsT &&Roots, GetBeginT &&GetBegin,
+ GetEndT &&GetEnd, GetNodeT &&GetNode,
+ FormSCCCallbackT &&FormSCC) {
+ typedef decltype(GetBegin(std::declval<Node &>())) EdgeItT;
- // This node will go into the next RefSCC, clear out its DFS and low link
- // as we scan.
- N->DFSNumber = N->LowLink = 0;
- }
-
- // Each RefSCC contains a DAG of the call SCCs. To build these, we do
- // a direct walk of the call edges using Tarjan's algorithm. We reuse the
- // internal storage as we won't need it for the outer graph's DFS any longer.
-
- SmallVector<std::pair<Node *, call_edge_iterator>, 16> DFSStack;
+ SmallVector<std::pair<Node *, EdgeItT>, 16> DFSStack;
SmallVector<Node *, 16> PendingSCCStack;
// Scan down the stack and DFS across the call edges.
- for (Node *RootN : Nodes) {
+ for (Node *RootN : Roots) {
assert(DFSStack.empty() &&
"Cannot begin a new root with a non-empty DFS stack!");
assert(PendingSCCStack.empty() &&
@@ -1690,25 +1703,23 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
RootN->DFSNumber = RootN->LowLink = 1;
int NextDFSNumber = 2;
- DFSStack.push_back({RootN, RootN->call_begin()});
+ DFSStack.push_back({RootN, GetBegin(*RootN)});
do {
Node *N;
- call_edge_iterator I;
+ EdgeItT I;
std::tie(N, I) = DFSStack.pop_back_val();
- auto E = N->call_end();
+ auto E = GetEnd(*N);
while (I != E) {
- Node &ChildN = *I->getNode();
+ Node &ChildN = GetNode(I);
if (ChildN.DFSNumber == 0) {
// We haven't yet visited this child, so descend, pushing the current
// node onto the stack.
DFSStack.push_back({N, I});
- assert(!lookupSCC(ChildN) &&
- "Found a node with 0 DFS number but already in an SCC!");
ChildN.DFSNumber = ChildN.LowLink = NextDFSNumber++;
N = &ChildN;
- I = N->call_begin();
- E = N->call_end();
+ I = GetBegin(*N);
+ E = GetEnd(*N);
continue;
}
@@ -1750,20 +1761,93 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
}));
// Form a new SCC out of these nodes and then clear them off our pending
// stack.
- RC.SCCs.push_back(createSCC(RC, SCCNodes));
- for (Node &N : *RC.SCCs.back()) {
- N.DFSNumber = N.LowLink = -1;
- SCCMap[&N] = RC.SCCs.back();
- }
+ FormSCC(SCCNodes);
PendingSCCStack.erase(SCCNodes.end().base(), PendingSCCStack.end());
} while (!DFSStack.empty());
}
+}
+
+/// Build the internal SCCs for a RefSCC from a sequence of nodes.
+///
+/// Appends the SCCs to the provided vector and updates the map with their
+/// indices. Both the vector and map must be empty when passed into this
+/// routine.
+void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
+ assert(RC.SCCs.empty() && "Already built SCCs!");
+ assert(RC.SCCIndices.empty() && "Already mapped SCC indices!");
+
+ for (Node *N : Nodes) {
+ assert(N->LowLink >= (*Nodes.begin())->LowLink &&
+ "We cannot have a low link in an SCC lower than its root on the "
+ "stack!");
+
+ // This node will go into the next RefSCC, clear out its DFS and low link
+ // as we scan.
+ N->DFSNumber = N->LowLink = 0;
+ }
+
+ // Each RefSCC contains a DAG of the call SCCs. To build these, we do
+ // a direct walk of the call edges using Tarjan's algorithm. We reuse the
+ // internal storage as we won't need it for the outer graph's DFS any longer.
+ buildGenericSCCs(
+ Nodes, [](Node &N) { return N->call_begin(); },
+ [](Node &N) { return N->call_end(); },
+ [](EdgeSequence::call_iterator I) -> Node & { return I->getNode(); },
+ [this, &RC](node_stack_range Nodes) {
+ RC.SCCs.push_back(createSCC(RC, Nodes));
+ for (Node &N : *RC.SCCs.back()) {
+ N.DFSNumber = N.LowLink = -1;
+ SCCMap[&N] = RC.SCCs.back();
+ }
+ });
// Wire up the SCC indices.
for (int i = 0, Size = RC.SCCs.size(); i < Size; ++i)
RC.SCCIndices[RC.SCCs[i]] = i;
}
+void LazyCallGraph::buildRefSCCs() {
+ if (EntryEdges.empty() || !PostOrderRefSCCs.empty())
+ // RefSCCs are either non-existent or already built!
+ return;
+
+ assert(RefSCCIndices.empty() && "Already mapped RefSCC indices!");
+
+ SmallVector<Node *, 16> Roots;
+ for (Edge &E : *this)
+ Roots.push_back(&E.getNode());
+
+ // The roots will be popped of a stack, so use reverse to get a less
+ // surprising order. This doesn't change any of the semantics anywhere.
+ std::reverse(Roots.begin(), Roots.end());
+
+ buildGenericSCCs(
+ Roots,
+ [](Node &N) {
+ // We need to populate each node as we begin to walk its edges.
+ N.populate();
+ return N->begin();
+ },
+ [](Node &N) { return N->end(); },
+ [](EdgeSequence::iterator I) -> Node & { return I->getNode(); },
+ [this](node_stack_range Nodes) {
+ RefSCC *NewRC = createRefSCC(*this);
+ buildSCCs(*NewRC, Nodes);
+ connectRefSCC(*NewRC);
+
+ // Push the new node into the postorder list and remember its position
+ // in the index map.
+ bool Inserted =
+ RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
+ (void)Inserted;
+ assert(Inserted && "Cannot already have this RefSCC in the index map!");
+ PostOrderRefSCCs.push_back(NewRC);
+#ifndef NDEBUG
+ NewRC->verify();
+#endif
+ });
+}
+
// FIXME: We should move callers of this to embed the parent linking and leaf
// tracking into their DFS in order to remove a full walk of all edges.
void LazyCallGraph::connectRefSCC(RefSCC &RC) {
@@ -1773,10 +1857,8 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) {
bool IsLeaf = true;
for (SCC &C : RC)
for (Node &N : C)
- for (Edge &E : N) {
- assert(E.getNode() &&
- "Cannot have a missing node in a visited part of the graph!");
- RefSCC &ChildRC = *lookupRefSCC(*E.getNode());
+ for (Edge &E : *N) {
+ RefSCC &ChildRC = *lookupRefSCC(E.getNode());
if (&ChildRC == &RC)
continue;
ChildRC.Parents.insert(&RC);
@@ -1788,113 +1870,13 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) {
LeafRefSCCs.push_back(&RC);
}
-bool LazyCallGraph::buildNextRefSCCInPostOrder() {
- if (DFSStack.empty()) {
- Node *N;
- do {
- // If we've handled all candidate entry nodes to the SCC forest, we're
- // done.
- if (RefSCCEntryNodes.empty())
- return false;
-
- N = &get(*RefSCCEntryNodes.pop_back_val());
- } while (N->DFSNumber != 0);
-
- // Found a new root, begin the DFS here.
- N->LowLink = N->DFSNumber = 1;
- NextDFSNumber = 2;
- DFSStack.push_back({N, N->begin()});
- }
-
- for (;;) {
- Node *N;
- edge_iterator I;
- std::tie(N, I) = DFSStack.pop_back_val();
-
- assert(N->DFSNumber > 0 && "We should always assign a DFS number "
- "before placing a node onto the stack.");
-
- auto E = N->end();
- while (I != E) {
- Node &ChildN = I->getNode(*this);
- if (ChildN.DFSNumber == 0) {
- // We haven't yet visited this child, so descend, pushing the current
- // node onto the stack.
- DFSStack.push_back({N, N->begin()});
-
- assert(!SCCMap.count(&ChildN) &&
- "Found a node with 0 DFS number but already in an SCC!");
- ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
- N = &ChildN;
- I = N->begin();
- E = N->end();
- continue;
- }
-
- // If the child has already been added to some child component, it
- // couldn't impact the low-link of this parent because it isn't
- // connected, and thus its low-link isn't relevant so skip it.
- if (ChildN.DFSNumber == -1) {
- ++I;
- continue;
- }
-
- // Track the lowest linked child as the lowest link for this node.
- assert(ChildN.LowLink > 0 && "Must have a positive low-link number!");
- if (ChildN.LowLink < N->LowLink)
- N->LowLink = ChildN.LowLink;
-
- // Move to the next edge.
- ++I;
- }
-
- // We've finished processing N and its descendents, put it on our pending
- // SCC stack to eventually get merged into an SCC of nodes.
- PendingRefSCCStack.push_back(N);
-
- // If this node is linked to some lower entry, continue walking up the
- // stack.
- if (N->LowLink != N->DFSNumber) {
- assert(!DFSStack.empty() &&
- "We never found a viable root for an SCC to pop off!");
- continue;
- }
-
- // Otherwise, form a new RefSCC from the top of the pending node stack.
- int RootDFSNumber = N->DFSNumber;
- // Find the range of the node stack by walking down until we pass the
- // root DFS number.
- auto RefSCCNodes = node_stack_range(
- PendingRefSCCStack.rbegin(),
- find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
- return N->DFSNumber < RootDFSNumber;
- }));
- // Form a new RefSCC out of these nodes and then clear them off our pending
- // stack.
- RefSCC *NewRC = createRefSCC(*this);
- buildSCCs(*NewRC, RefSCCNodes);
- connectRefSCC(*NewRC);
- PendingRefSCCStack.erase(RefSCCNodes.end().base(),
- PendingRefSCCStack.end());
-
- // Push the new node into the postorder list and return true indicating we
- // successfully grew the postorder sequence by one.
- bool Inserted =
- RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
- (void)Inserted;
- assert(Inserted && "Cannot already have this RefSCC in the index map!");
- PostOrderRefSCCs.push_back(NewRC);
- return true;
- }
-}
-
AnalysisKey LazyCallGraphAnalysis::Key;
LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
static void printNode(raw_ostream &OS, LazyCallGraph::Node &N) {
OS << " Edges in function: " << N.getFunction().getName() << "\n";
- for (const LazyCallGraph::Edge &E : N)
+ for (LazyCallGraph::Edge &E : N.populate())
OS << " " << (E.isCall() ? "call" : "ref ") << " -> "
<< E.getFunction().getName() << "\n";
@@ -1929,6 +1911,7 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module &M,
for (Function &F : M)
printNode(OS, G.get(F));
+ G.buildRefSCCs();
for (LazyCallGraph::RefSCC &C : G.postorder_ref_sccs())
printRefSCC(OS, C);
@@ -1941,7 +1924,7 @@ LazyCallGraphDOTPrinterPass::LazyCallGraphDOTPrinterPass(raw_ostream &OS)
static void printNodeDOT(raw_ostream &OS, LazyCallGraph::Node &N) {
std::string Name = "\"" + DOT::EscapeString(N.getFunction().getName()) + "\"";
- for (const LazyCallGraph::Edge &E : N) {
+ for (LazyCallGraph::Edge &E : N.populate()) {
OS << " " << Name << " -> \""
<< DOT::EscapeString(E.getFunction().getName()) << "\"";
if (!E.isCall()) // It is a ref edge.
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index d442310476cf..ad01f7f2f215 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <stack>
@@ -39,6 +41,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "lazy-value-info"
+// This is the number of worklist items we will process to try to discover an
+// answer for a given value.
+static const unsigned MaxProcessedPerValue = 500;
+
char LazyValueInfoWrapperPass::ID = 0;
INITIALIZE_PASS_BEGIN(LazyValueInfoWrapperPass, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
@@ -358,6 +364,7 @@ namespace {
/// This is the cache kept by LazyValueInfo which
/// maintains information about queries across the clients' queries.
class LazyValueInfoCache {
+ friend class LazyValueInfoAnnotatedWriter;
/// This is all of the cached block information for exactly one Value*.
/// The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
@@ -366,22 +373,23 @@ namespace {
struct ValueCacheEntryTy {
ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {}
LVIValueHandle Handle;
- SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
+ SmallDenseMap<PoisoningVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
};
- /// This is all of the cached information for all values,
- /// mapped from Value* to key information.
- DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
-
/// This tracks, on a per-block basis, the set of values that are
/// over-defined at the end of that block.
- typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>>
+ typedef DenseMap<PoisoningVH<BasicBlock>, SmallPtrSet<Value *, 4>>
OverDefinedCacheTy;
- OverDefinedCacheTy OverDefinedCache;
-
/// Keep track of all blocks that we have ever seen, so we
/// don't spend time removing unused blocks from our caches.
- DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+ DenseSet<PoisoningVH<BasicBlock> > SeenBlocks;
+
+ protected:
+ /// This is all of the cached information for all values,
+ /// mapped from Value* to key information.
+ DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
+ OverDefinedCacheTy OverDefinedCache;
+
public:
void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
@@ -435,6 +443,7 @@ namespace {
return BBI->second;
}
+ void printCache(Function &F, raw_ostream &OS);
/// clear - Empty the cache.
void clear() {
SeenBlocks.clear();
@@ -458,16 +467,71 @@ namespace {
};
}
+
+namespace {
+
+ /// An assembly annotator class to print LazyValueCache information in
+ /// comments.
+ class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter {
+ const LazyValueInfoCache* LVICache;
+
+ public:
+ LazyValueInfoAnnotatedWriter(const LazyValueInfoCache *L) : LVICache(L) {}
+
+ virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {
+ auto ODI = LVICache->OverDefinedCache.find(const_cast<BasicBlock*>(BB));
+ if (ODI == LVICache->OverDefinedCache.end())
+ return;
+ OS << "; OverDefined values for block are: \n";
+ for (auto *V : ODI->second)
+ OS << ";" << *V << "\n";
+
+ // Find if there are latticevalues defined for arguments of the function.
+ auto *F = const_cast<Function *>(BB->getParent());
+ for (auto &Arg : F->args()) {
+ auto VI = LVICache->ValueCache.find_as(&Arg);
+ if (VI == LVICache->ValueCache.end())
+ continue;
+ auto BBI = VI->second->BlockVals.find(const_cast<BasicBlock *>(BB));
+ if (BBI != VI->second->BlockVals.end())
+ OS << "; CachedLatticeValue for: '" << *VI->first << "' is: '"
+ << BBI->second << "'\n";
+ }
+ }
+
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) {
+
+ auto VI = LVICache->ValueCache.find_as(const_cast<Instruction *>(I));
+ if (VI == LVICache->ValueCache.end())
+ return;
+ OS << "; CachedLatticeValues for: '" << *VI->first << "'\n";
+ for (auto &BV : VI->second->BlockVals) {
+ OS << "; at beginning of BasicBlock: '";
+ BV.first->printAsOperand(OS, false);
+ OS << "' LatticeVal: '" << BV.second << "' \n";
+ }
+ }
+};
+}
+
+void LazyValueInfoCache::printCache(Function &F, raw_ostream &OS) {
+ LazyValueInfoAnnotatedWriter Writer(this);
+ F.print(OS, &Writer);
+
+}
+
void LazyValueInfoCache::eraseValue(Value *V) {
- SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
- for (auto &I : OverDefinedCache) {
- SmallPtrSetImpl<Value *> &ValueSet = I.second;
+ for (auto I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E;) {
+ // Copy and increment the iterator immediately so we can erase behind
+ // ourselves.
+ auto Iter = I++;
+ SmallPtrSetImpl<Value *> &ValueSet = Iter->second;
ValueSet.erase(V);
if (ValueSet.empty())
- ToErase.push_back(I.first);
+ OverDefinedCache.erase(Iter);
}
- for (auto &BB : ToErase)
- OverDefinedCache.erase(BB);
ValueCache.erase(V);
}
@@ -480,7 +544,7 @@ void LVIValueHandle::deleted() {
void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
// Shortcut if we have never seen this block.
- DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+ DenseSet<PoisoningVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
if (I == SeenBlocks.end())
return;
SeenBlocks.erase(I);
@@ -563,7 +627,7 @@ namespace {
/// This stack holds the state of the value solver during a query.
/// It basically emulates the callstack of the naive
/// recursive value lookup process.
- std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+ SmallVector<std::pair<BasicBlock*, Value*>, 8> BlockValueStack;
/// Keeps track of which block-value pairs are in BlockValueStack.
DenseSet<std::pair<BasicBlock*, Value*> > BlockValueSet;
@@ -576,7 +640,7 @@ namespace {
DEBUG(dbgs() << "PUSH: " << *BV.second << " in " << BV.first->getName()
<< "\n");
- BlockValueStack.push(BV);
+ BlockValueStack.push_back(BV);
return true;
}
@@ -629,6 +693,11 @@ namespace {
TheCache.clear();
}
+ /// Printing the LazyValueInfoCache.
+ void printCache(Function &F, raw_ostream &OS) {
+ TheCache.printCache(F, OS);
+ }
+
/// This is part of the update interface to inform the cache
/// that a block has been deleted.
void eraseBlock(BasicBlock *BB) {
@@ -646,24 +715,50 @@ namespace {
} // end anonymous namespace
void LazyValueInfoImpl::solve() {
+ SmallVector<std::pair<BasicBlock *, Value *>, 8> StartingStack(
+ BlockValueStack.begin(), BlockValueStack.end());
+
+ unsigned processedCount = 0;
while (!BlockValueStack.empty()) {
- std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+ processedCount++;
+ // Abort if we have to process too many values to get a result for this one.
+ // Because of the design of the overdefined cache currently being per-block
+ // to avoid naming-related issues (IE it wants to try to give different
+ // results for the same name in different blocks), overdefined results don't
+ // get cached globally, which in turn means we will often try to rediscover
+ // the same overdefined result again and again. Once something like
+ // PredicateInfo is used in LVI or CVP, we should be able to make the
+ // overdefined cache global, and remove this throttle.
+ if (processedCount > MaxProcessedPerValue) {
+ DEBUG(dbgs() << "Giving up on stack because we are getting too deep\n");
+ // Fill in the original values
+ while (!StartingStack.empty()) {
+ std::pair<BasicBlock *, Value *> &e = StartingStack.back();
+ TheCache.insertResult(e.second, e.first,
+ LVILatticeVal::getOverdefined());
+ StartingStack.pop_back();
+ }
+ BlockValueSet.clear();
+ BlockValueStack.clear();
+ return;
+ }
+ std::pair<BasicBlock *, Value *> e = BlockValueStack.back();
assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
if (solveBlockValue(e.second, e.first)) {
// The work item was completely processed.
- assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
+ assert(BlockValueStack.back() == e && "Nothing should have been pushed!");
assert(TheCache.hasCachedValueInfo(e.second, e.first) &&
"Result should be in cache!");
DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName()
<< " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
- BlockValueStack.pop();
+ BlockValueStack.pop_back();
BlockValueSet.erase(e);
} else {
// More work needs to be done before revisiting.
- assert(BlockValueStack.top() != e && "Stack should have been pushed!");
+ assert(BlockValueStack.back() != e && "Stack should have been pushed!");
}
}
}
@@ -839,13 +934,19 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
}
// Loop over all of our predecessors, merging what we know from them into
- // result.
- bool EdgesMissing = false;
+ // result. If we encounter an unexplored predecessor, we eagerly explore it
+ // in a depth first manner. In practice, this has the effect of discovering
+ // paths we can't analyze eagerly without spending compile times analyzing
+ // other paths. This heuristic benefits from the fact that predecessors are
+ // frequently arranged such that dominating ones come first and we quickly
+ // find a path to function entry. TODO: We should consider explicitly
+ // canonicalizing to make this true rather than relying on this happy
+ // accident.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
LVILatticeVal EdgeResult;
- EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
- if (EdgesMissing)
- continue;
+ if (!getEdgeValue(Val, *PI, BB, EdgeResult))
+ // Explore that input, then return here
+ return false;
Result.mergeIn(EdgeResult, DL);
@@ -866,8 +967,6 @@ bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
return true;
}
}
- if (EdgesMissing)
- return false;
// Return the merged value, which is more precise than 'overdefined'.
assert(!Result.isOverdefined());
@@ -880,8 +979,8 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
LVILatticeVal Result; // Start Undefined.
// Loop over all of our predecessors, merging what we know from them into
- // result.
- bool EdgesMissing = false;
+ // result. See the comment about the chosen traversal order in
+ // solveBlockValueNonLocal; the same reasoning applies here.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PhiBB = PN->getIncomingBlock(i);
Value *PhiVal = PN->getIncomingValue(i);
@@ -889,9 +988,9 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
// Note that we can provide PN as the context value to getEdgeValue, even
// though the results will be cached, because PN is the value being used as
// the cache key in the caller.
- EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN);
- if (EdgesMissing)
- continue;
+ if (!getEdgeValue(PhiVal, PhiBB, BB, EdgeResult, PN))
+ // Explore that input, then return here
+ return false;
Result.mergeIn(EdgeResult, DL);
@@ -905,8 +1004,6 @@ bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
return true;
}
}
- if (EdgesMissing)
- return false;
// Return the merged value, which is more precise than 'overdefined'.
assert(!Result.isOverdefined() && "Possible PHI in entry block?");
@@ -1333,14 +1430,14 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
unsigned BitWidth = Val->getType()->getIntegerBitWidth();
ConstantRange EdgesVals(BitWidth, DefaultCase/*isFullSet*/);
- for (SwitchInst::CaseIt i : SI->cases()) {
- ConstantRange EdgeVal(i.getCaseValue()->getValue());
+ for (auto Case : SI->cases()) {
+ ConstantRange EdgeVal(Case.getCaseValue()->getValue());
if (DefaultCase) {
// It is possible that the default destination is the destination of
// some cases. There is no need to perform difference for those cases.
- if (i.getCaseSuccessor() != BBTo)
+ if (Case.getCaseSuccessor() != BBTo)
EdgesVals = EdgesVals.difference(EdgeVal);
- } else if (i.getCaseSuccessor() == BBTo)
+ } else if (Case.getCaseSuccessor() == BBTo)
EdgesVals = EdgesVals.unionWith(EdgeVal);
}
Result = LVILatticeVal::getRange(std::move(EdgesVals));
@@ -1352,8 +1449,8 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
/// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
/// the basic block if the edge does not constrain Val.
bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
- BasicBlock *BBTo, LVILatticeVal &Result,
- Instruction *CxtI) {
+ BasicBlock *BBTo, LVILatticeVal &Result,
+ Instruction *CxtI) {
// If already a constant, there is nothing to compute.
if (Constant *VC = dyn_cast<Constant>(Val)) {
Result = LVILatticeVal::get(VC);
@@ -1503,6 +1600,18 @@ void LazyValueInfo::releaseMemory() {
}
}
+bool LazyValueInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // We need to invalidate if we have either failed to preserve this analyses
+ // result directly or if any of its dependencies have been invalidated.
+ auto PAC = PA.getChecker<LazyValueAnalysis>();
+ if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) ||
+ (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)))
+ return true;
+
+ return false;
+}
+
void LazyValueInfoWrapperPass::releaseMemory() { Info.releaseMemory(); }
LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
@@ -1510,7 +1619,7 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM)
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
- return LazyValueInfo(&AC, &TLI, DT);
+ return LazyValueInfo(&AC, &F.getParent()->getDataLayout(), &TLI, DT);
}
/// Returns true if we can statically tell that this value will never be a
@@ -1780,3 +1889,40 @@ void LazyValueInfo::eraseBlock(BasicBlock *BB) {
getImpl(PImpl, AC, &DL, DT).eraseBlock(BB);
}
}
+
+
+void LazyValueInfo::printCache(Function &F, raw_ostream &OS) {
+ if (PImpl) {
+ getImpl(PImpl, AC, DL, DT).printCache(F, OS);
+ }
+}
+
+namespace {
+// Printer class for LazyValueInfo results.
+class LazyValueInfoPrinter : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ LazyValueInfoPrinter() : FunctionPass(ID) {
+ initializeLazyValueInfoPrinterPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ AU.addRequired<LazyValueInfoWrapperPass>();
+ }
+
+ bool runOnFunction(Function &F) override {
+ dbgs() << "LVI for function '" << F.getName() << "':\n";
+ auto &LVI = getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ LVI.printCache(F, dbgs());
+ return false;
+ }
+};
+}
+
+char LazyValueInfoPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(LazyValueInfoPrinter, "print-lazy-value-info",
+ "Lazy Value Info Printer Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
+INITIALIZE_PASS_END(LazyValueInfoPrinter, "print-lazy-value-info",
+ "Lazy Value Info Printer Pass", false, false)
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index e46541e6538d..96799a459bfc 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -312,21 +312,26 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan,
- AliasAnalysis *AA, bool *IsLoadCSE) {
- if (MaxInstsToScan == 0)
- MaxInstsToScan = ~0U;
-
- Value *Ptr = Load->getPointerOperand();
- Type *AccessTy = Load->getType();
-
- // We can never remove a volatile load
- if (Load->isVolatile())
- return nullptr;
-
- // Anything stronger than unordered is currently unimplemented.
+ AliasAnalysis *AA, bool *IsLoad,
+ unsigned *NumScanedInst) {
+ // Don't CSE load that is volatile or anything stronger than unordered.
if (!Load->isUnordered())
return nullptr;
+ return FindAvailablePtrLoadStore(
+ Load->getPointerOperand(), Load->getType(), Load->isAtomic(), ScanBB,
+ ScanFrom, MaxInstsToScan, AA, IsLoad, NumScanedInst);
+}
+
+Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
+ bool AtLeastAtomic, BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA, bool *IsLoadCSE,
+ unsigned *NumScanedInst) {
+ if (MaxInstsToScan == 0)
+ MaxInstsToScan = ~0U;
+
const DataLayout &DL = ScanBB->getModule()->getDataLayout();
// Try to get the store size for the type.
@@ -344,6 +349,9 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
// Restore ScanFrom to expected value in case next test succeeds
ScanFrom++;
+ if (NumScanedInst)
+ ++(*NumScanedInst);
+
// Don't scan huge blocks.
if (MaxInstsToScan-- == 0)
return nullptr;
@@ -359,7 +367,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
// We can value forward from an atomic to a non-atomic, but not the
// other way around.
- if (LI->isAtomic() < Load->isAtomic())
+ if (LI->isAtomic() < AtLeastAtomic)
return nullptr;
if (IsLoadCSE)
@@ -378,7 +386,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
// We can value forward from an atomic to a non-atomic, but not the
// other way around.
- if (SI->isAtomic() < Load->isAtomic())
+ if (SI->isAtomic() < AtLeastAtomic)
return nullptr;
if (IsLoadCSE)
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index bf8007213097..4ba12583ff83 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -135,21 +135,6 @@ bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
}
-void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
- const Loop *TheLoop, const char *PassName,
- OptimizationRemarkEmitter &ORE) {
- DebugLoc DL = TheLoop->getStartLoc();
- const Value *V = TheLoop->getHeader();
- if (const Instruction *I = Message.getInstr()) {
- // If there is no debug location attached to the instruction, revert back to
- // using the loop's.
- if (I->getDebugLoc())
- DL = I->getDebugLoc();
- V = I->getParent();
- }
- ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
-}
-
Value *llvm::stripIntegerCast(Value *V) {
if (auto *CI = dyn_cast<CastInst>(V))
if (CI->getOperand(0)->getType()->isIntegerTy())
@@ -172,11 +157,6 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
// Strip casts.
StrideVal = stripIntegerCast(StrideVal);
- // Replace symbolic stride by one.
- Value *One = ConstantInt::get(StrideVal->getType(), 1);
- ValueToValueMap RewriteMap;
- RewriteMap[StrideVal] = One;
-
ScalarEvolution *SE = PSE.getSE();
const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
const auto *CT =
@@ -518,7 +498,7 @@ class AccessAnalysis {
public:
/// \brief Read or write access location.
typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
- typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
+ typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
@@ -570,7 +550,7 @@ public:
DepChecker.clearDependences();
}
- MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
+ MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
private:
typedef SetVector<MemAccessInfo> PtrAccessSet;
@@ -584,8 +564,8 @@ private:
const DataLayout &DL;
- /// Set of accesses that need a further dependence check.
- MemAccessInfoSet CheckDeps;
+ /// List of accesses that need a further dependence check.
+ MemAccessInfoList CheckDeps;
/// Set of pointers that are read only.
SmallPtrSet<Value*, 16> ReadOnlyPtr;
@@ -842,7 +822,7 @@ void AccessAnalysis::processMemAccesses() {
// there is no other write to the ptr - this is an optimization to
// catch "a[i] = a[i] + " without having to do a dependence check).
if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
- CheckDeps.insert(Access);
+ CheckDeps.push_back(Access);
IsRTCheckAnalysisNeeded = true;
}
@@ -1205,6 +1185,73 @@ bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
return false;
}
+/// Given a non-constant (unknown) dependence-distance \p Dist between two
+/// memory accesses, that have the same stride whose absolute value is given
+/// in \p Stride, and that have the same type size \p TypeByteSize,
+/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
+/// possible to prove statically that the dependence distance is larger
+/// than the range that the accesses will travel through the execution of
+/// the loop. If so, return true; false otherwise. This is useful for
+/// example in loops such as the following (PR31098):
+/// for (i = 0; i < D; ++i) {
+/// = out[i];
+/// out[i+D] =
+/// }
+static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
+ const SCEV &BackedgeTakenCount,
+ const SCEV &Dist, uint64_t Stride,
+ uint64_t TypeByteSize) {
+
+ // If we can prove that
+ // (**) |Dist| > BackedgeTakenCount * Step
+ // where Step is the absolute stride of the memory accesses in bytes,
+ // then there is no dependence.
+ //
+ // Ratioanle:
+ // We basically want to check if the absolute distance (|Dist/Step|)
+ // is >= the loop iteration count (or > BackedgeTakenCount).
+ // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
+ // Section 4.2.1); Note, that for vectorization it is sufficient to prove
+ // that the dependence distance is >= VF; This is checked elsewhere.
+ // But in some cases we can prune unknown dependence distances early, and
+ // even before selecting the VF, and without a runtime test, by comparing
+ // the distance against the loop iteration count. Since the vectorized code
+ // will be executed only if LoopCount >= VF, proving distance >= LoopCount
+ // also guarantees that distance >= VF.
+ //
+ const uint64_t ByteStride = Stride * TypeByteSize;
+ const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
+ const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
+
+ const SCEV *CastedDist = &Dist;
+ const SCEV *CastedProduct = Product;
+ uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
+ uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
+
+ // The dependence distance can be positive/negative, so we sign extend Dist;
+ // The multiplication of the absolute stride in bytes and the
+ // backdgeTakenCount is non-negative, so we zero extend Product.
+ if (DistTypeSize > ProductTypeSize)
+ CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
+ else
+ CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
+
+ // Is Dist - (BackedgeTakenCount * Step) > 0 ?
+ // (If so, then we have proven (**) because |Dist| >= Dist)
+ const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
+ if (SE.isKnownPositive(Minus))
+ return true;
+
+ // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
+ // (If so, then we have proven (**) because |Dist| >= -1*Dist)
+ const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
+ Minus = SE.getMinusSCEV(NegDist, CastedProduct);
+ if (SE.isKnownPositive(Minus))
+ return true;
+
+ return false;
+}
+
/// \brief Check the dependence for two accesses with the same stride \p Stride.
/// \p Distance is the positive distance and \p TypeByteSize is type size in
/// bytes.
@@ -1292,21 +1339,26 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
+ auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
+ uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
+ uint64_t Stride = std::abs(StrideAPtr);
const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
if (!C) {
+ if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
+ isSafeDependenceDistance(DL, *(PSE.getSE()),
+ *(PSE.getBackedgeTakenCount()), *Dist, Stride,
+ TypeByteSize))
+ return Dependence::NoDep;
+
DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
ShouldRetryWithRuntimeCheck = true;
return Dependence::Unknown;
}
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
- auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
- uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
-
const APInt &Val = C->getAPInt();
int64_t Distance = Val.getSExtValue();
- uint64_t Stride = std::abs(StrideAPtr);
// Attempt to prove strided accesses independent.
if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
@@ -1427,12 +1479,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
}
bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
- MemAccessInfoSet &CheckDeps,
+ MemAccessInfoList &CheckDeps,
const ValueToValueMap &Strides) {
MaxSafeDepDistBytes = -1;
- while (!CheckDeps.empty()) {
- MemAccessInfo CurAccess = *CheckDeps.begin();
+ SmallPtrSet<MemAccessInfo, 8> Visited;
+ for (MemAccessInfo CurAccess : CheckDeps) {
+ if (Visited.count(CurAccess))
+ continue;
// Get the relevant memory access set.
EquivalenceClasses<MemAccessInfo>::iterator I =
@@ -1446,7 +1500,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
// Check every access pair.
while (AI != AE) {
- CheckDeps.erase(*AI);
+ Visited.insert(*AI);
EquivalenceClasses<MemAccessInfo>::member_iterator OI = std::next(AI);
while (OI != AE) {
// Check every accessing instruction pair in program order.
@@ -1885,7 +1939,10 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
Value *NewPtr = (Inst && TheLoop->contains(Inst))
? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
: Ptr;
- return {NewPtr, NewPtr};
+ // We must return a half-open range, which means incrementing Sc.
+ const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
+ Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
+ return {NewPtr, NewPtrPlusOne};
} else {
Value *Start = nullptr, *End = nullptr;
DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp
index 5be3ee341c9c..e4a0f90b2f71 100644
--- a/lib/Analysis/LoopAnalysisManager.cpp
+++ b/lib/Analysis/LoopAnalysisManager.cpp
@@ -31,24 +31,10 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
FunctionAnalysisManager::Invalidator &Inv) {
// First compute the sequence of IR units covered by this proxy. We will want
// to visit this in postorder, but because this is a tree structure we can do
- // this by building a preorder sequence and walking it in reverse.
- SmallVector<Loop *, 4> PreOrderLoops, PreOrderWorklist;
- // Note that we want to walk the roots in reverse order because we will end
- // up reversing the preorder sequence. However, it happens that the loop nest
- // roots are in reverse order within the LoopInfo object. So we just walk
- // forward here.
- // FIXME: If we change the order of LoopInfo we will want to add a reverse
- // here.
- for (Loop *RootL : *LI) {
- assert(PreOrderWorklist.empty() &&
- "Must start with an empty preorder walk worklist.");
- PreOrderWorklist.push_back(RootL);
- do {
- Loop *L = PreOrderWorklist.pop_back_val();
- PreOrderWorklist.append(L->begin(), L->end());
- PreOrderLoops.push_back(L);
- } while (!PreOrderWorklist.empty());
- }
+ // this by building a preorder sequence and walking it backwards. We also
+ // want siblings in forward program order to match the LoopPassManager so we
+ // get the preorder with siblings reversed.
+ SmallVector<Loop *, 4> PreOrderLoops = LI->getLoopsInReverseSiblingPreorder();
// If this proxy or the loop info is going to be invalidated, we also need
// to clear all the keys coming from that analysis. We also completely blow
@@ -145,7 +131,6 @@ LoopAnalysisManagerFunctionProxy::run(Function &F,
PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PreservedAnalyses PA;
- PA.preserve<AssumptionAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
PA.preserve<LoopAnalysisManagerFunctionProxy>();
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index f449ce94d57c..ff68810abb82 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -40,9 +40,9 @@ template class llvm::LoopInfoBase<BasicBlock, Loop>;
// Always verify loopinfo if expensive checking is enabled.
#ifdef EXPENSIVE_CHECKS
-static bool VerifyLoopInfo = true;
+bool llvm::VerifyLoopInfo = true;
#else
-static bool VerifyLoopInfo = false;
+bool llvm::VerifyLoopInfo = false;
#endif
static cl::opt<bool,true>
VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
@@ -211,9 +211,11 @@ bool Loop::isSafeToClone() const {
MDNode *Loop::getLoopID() const {
MDNode *LoopID = nullptr;
- if (isLoopSimplifyForm()) {
- LoopID = getLoopLatch()->getTerminator()->getMetadata(LLVMContext::MD_loop);
+ if (BasicBlock *Latch = getLoopLatch()) {
+ LoopID = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
} else {
+ assert(!getLoopLatch() &&
+ "The loop should have no single latch at this point");
// Go through each predecessor of the loop header and check the
// terminator for the metadata.
BasicBlock *H = getHeader();
@@ -248,11 +250,12 @@ void Loop::setLoopID(MDNode *LoopID) const {
assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand");
assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself");
- if (isLoopSimplifyForm()) {
- getLoopLatch()->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
+ if (BasicBlock *Latch = getLoopLatch()) {
+ Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
return;
}
+ assert(!getLoopLatch() && "The loop should have no single latch at this point");
BasicBlock *H = getHeader();
for (BasicBlock *BB : this->blocks()) {
TerminatorInst *TI = BB->getTerminator();
@@ -610,6 +613,15 @@ LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
analyze(DomTree);
}
+bool LoopInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on functions, or the function's
+ // CFG have been preserved.
+ auto PAC = PA.getChecker<LoopAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+ PAC.preservedSet<CFGAnalyses>());
+}
+
void LoopInfo::markAsRemoved(Loop *Unloop) {
assert(!Unloop->isInvalid() && "Loop has already been removed");
Unloop->invalidate();
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 3f4a07942154..0b5f6266e373 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -54,6 +54,8 @@ public:
}
return false;
}
+
+ StringRef getPassName() const override { return "Print Loop IR"; }
};
char PrintLoopPassWrapper::ID = 0;
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 2d8274040d39..b8c444904723 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -50,30 +50,30 @@ struct AllocFnsTy {
// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
// know which functions are nounwind, noalias, nocapture parameters, etc.
-static const std::pair<LibFunc::Func, AllocFnsTy> AllocationFnData[] = {
- {LibFunc::malloc, {MallocLike, 1, 0, -1}},
- {LibFunc::valloc, {MallocLike, 1, 0, -1}},
- {LibFunc::Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int)
- {LibFunc::ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow)
- {LibFunc::Znwm, {OpNewLike, 1, 0, -1}}, // new(unsigned long)
- {LibFunc::ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned long, nothrow)
- {LibFunc::Znaj, {OpNewLike, 1, 0, -1}}, // new[](unsigned int)
- {LibFunc::ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow)
- {LibFunc::Znam, {OpNewLike, 1, 0, -1}}, // new[](unsigned long)
- {LibFunc::ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned long, nothrow)
- {LibFunc::msvc_new_int, {OpNewLike, 1, 0, -1}}, // new(unsigned int)
- {LibFunc::msvc_new_int_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow)
- {LibFunc::msvc_new_longlong, {OpNewLike, 1, 0, -1}}, // new(unsigned long long)
- {LibFunc::msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned long long, nothrow)
- {LibFunc::msvc_new_array_int, {OpNewLike, 1, 0, -1}}, // new[](unsigned int)
- {LibFunc::msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow)
- {LibFunc::msvc_new_array_longlong, {OpNewLike, 1, 0, -1}}, // new[](unsigned long long)
- {LibFunc::msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow)
- {LibFunc::calloc, {CallocLike, 2, 0, 1}},
- {LibFunc::realloc, {ReallocLike, 2, 1, -1}},
- {LibFunc::reallocf, {ReallocLike, 2, 1, -1}},
- {LibFunc::strdup, {StrDupLike, 1, -1, -1}},
- {LibFunc::strndup, {StrDupLike, 2, 1, -1}}
+static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
+ {LibFunc_malloc, {MallocLike, 1, 0, -1}},
+ {LibFunc_valloc, {MallocLike, 1, 0, -1}},
+ {LibFunc_Znwj, {OpNewLike, 1, 0, -1}}, // new(unsigned int)
+ {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow)
+ {LibFunc_Znwm, {OpNewLike, 1, 0, -1}}, // new(unsigned long)
+ {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new(unsigned long, nothrow)
+ {LibFunc_Znaj, {OpNewLike, 1, 0, -1}}, // new[](unsigned int)
+ {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow)
+ {LibFunc_Znam, {OpNewLike, 1, 0, -1}}, // new[](unsigned long)
+ {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1}}, // new[](unsigned long, nothrow)
+ {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1}}, // new(unsigned int)
+ {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned int, nothrow)
+ {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1}}, // new(unsigned long long)
+ {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new(unsigned long long, nothrow)
+ {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1}}, // new[](unsigned int)
+ {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned int, nothrow)
+ {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1}}, // new[](unsigned long long)
+ {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1}}, // new[](unsigned long long, nothrow)
+ {LibFunc_calloc, {CallocLike, 2, 0, 1}},
+ {LibFunc_realloc, {ReallocLike, 2, 1, -1}},
+ {LibFunc_reallocf, {ReallocLike, 2, 1, -1}},
+ {LibFunc_strdup, {StrDupLike, 1, -1, -1}},
+ {LibFunc_strndup, {StrDupLike, 2, 1, -1}}
// TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
@@ -106,12 +106,12 @@ getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
const TargetLibraryInfo *TLI) {
// Make sure that the function is available.
StringRef FnName = Callee->getName();
- LibFunc::Func TLIFn;
+ LibFunc TLIFn;
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return None;
const auto *Iter = find_if(
- AllocationFnData, [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
+ AllocationFnData, [TLIFn](const std::pair<LibFunc, AllocFnsTy> &P) {
return P.first == TLIFn;
});
@@ -183,7 +183,7 @@ static Optional<AllocFnsTy> getAllocationSize(const Value *V,
static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
- return CS && CS.paramHasAttr(AttributeSet::ReturnIndex, Attribute::NoAlias);
+ return CS && CS.hasRetAttr(Attribute::NoAlias);
}
@@ -333,33 +333,33 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
return nullptr;
StringRef FnName = Callee->getName();
- LibFunc::Func TLIFn;
+ LibFunc TLIFn;
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
return nullptr;
unsigned ExpectedNumParams;
- if (TLIFn == LibFunc::free ||
- TLIFn == LibFunc::ZdlPv || // operator delete(void*)
- TLIFn == LibFunc::ZdaPv || // operator delete[](void*)
- TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*)
- TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*)
- TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*)
- TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*)
+ if (TLIFn == LibFunc_free ||
+ TLIFn == LibFunc_ZdlPv || // operator delete(void*)
+ TLIFn == LibFunc_ZdaPv || // operator delete[](void*)
+ TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*)
+ TLIFn == LibFunc_msvc_delete_ptr64 || // operator delete(void*)
+ TLIFn == LibFunc_msvc_delete_array_ptr32 || // operator delete[](void*)
+ TLIFn == LibFunc_msvc_delete_array_ptr64) // operator delete[](void*)
ExpectedNumParams = 1;
- else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint)
- TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong)
- TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
- TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint)
- TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong)
- TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
- TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint)
- TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
- TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
- TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
- TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint)
- TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
- TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
- TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow)
+ else if (TLIFn == LibFunc_ZdlPvj || // delete(void*, uint)
+ TLIFn == LibFunc_ZdlPvm || // delete(void*, ulong)
+ TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
+ TLIFn == LibFunc_ZdaPvj || // delete[](void*, uint)
+ TLIFn == LibFunc_ZdaPvm || // delete[](void*, ulong)
+ TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
+ TLIFn == LibFunc_msvc_delete_ptr32_int || // delete(void*, uint)
+ TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
+ TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
+ TLIFn == LibFunc_msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
+ TLIFn == LibFunc_msvc_delete_array_ptr32_int || // delete[](void*, uint)
+ TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
+ TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
+ TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow)
ExpectedNumParams = 2;
else
return nullptr;
@@ -394,10 +394,8 @@ static APInt getSizeWithOverflow(const SizeOffsetType &Data) {
/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
/// byval arguments, and global variables.
bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
- const TargetLibraryInfo *TLI, bool RoundToAlign,
- llvm::ObjSizeMode Mode) {
- ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(),
- RoundToAlign, Mode);
+ const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) {
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts);
SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
if (!Visitor.bothKnown(Data))
return false;
@@ -414,19 +412,23 @@ ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
"ObjectSize must be a call to llvm.objectsize!");
bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
- ObjSizeMode Mode;
+ ObjectSizeOpts EvalOptions;
// Unless we have to fold this to something, try to be as accurate as
// possible.
if (MustSucceed)
- Mode = MaxVal ? ObjSizeMode::Max : ObjSizeMode::Min;
+ EvalOptions.EvalMode =
+ MaxVal ? ObjectSizeOpts::Mode::Max : ObjectSizeOpts::Mode::Min;
else
- Mode = ObjSizeMode::Exact;
+ EvalOptions.EvalMode = ObjectSizeOpts::Mode::Exact;
+
+ EvalOptions.NullIsUnknownSize =
+ cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne();
// FIXME: Does it make sense to just return a failure value if the size won't
// fit in the output and `!MustSucceed`?
uint64_t Size;
auto *ResultType = cast<IntegerType>(ObjectSize->getType());
- if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, false, Mode) &&
+ if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
isUIntN(ResultType->getBitWidth(), Size))
return ConstantInt::get(ResultType, Size);
@@ -443,7 +445,7 @@ STATISTIC(ObjectVisitorLoad,
APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
- if (RoundToAlign && Align)
+ if (Options.RoundToAlign && Align)
return APInt(IntTyBits, alignTo(Size.getZExtValue(), Align));
return Size;
}
@@ -451,9 +453,8 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
- bool RoundToAlign,
- ObjSizeMode Mode)
- : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign), Mode(Mode) {
+ ObjectSizeOpts Options)
+ : DL(DL), TLI(TLI), Options(Options) {
// Pointer size must be rechecked for each object visited since it could have
// a different address space.
}
@@ -596,7 +597,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
}
SizeOffsetType
-ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull&) {
+ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) {
+ if (Options.NullIsUnknownSize && CPN.getType()->getAddressSpace() == 0)
+ return unknown();
return std::make_pair(Zero, Zero);
}
@@ -663,12 +666,12 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
if (TrueResult == FalseResult) {
return TrueSide;
}
- if (Mode == ObjSizeMode::Min) {
+ if (Options.EvalMode == ObjectSizeOpts::Mode::Min) {
if (TrueResult.slt(FalseResult))
return TrueSide;
return FalseSide;
}
- if (Mode == ObjSizeMode::Max) {
+ if (Options.EvalMode == ObjectSizeOpts::Mode::Max) {
if (TrueResult.sgt(FalseResult))
return TrueSide;
return FalseSide;
@@ -719,7 +722,10 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
- ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign);
+ ObjectSizeOpts ObjSizeOptions;
+ ObjSizeOptions.RoundToAlign = RoundToAlign;
+
+ ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, ObjSizeOptions);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
diff --git a/lib/Analysis/MemoryLocation.cpp b/lib/Analysis/MemoryLocation.cpp
index a0ae72f1415f..9db6c499129a 100644
--- a/lib/Analysis/MemoryLocation.cpp
+++ b/lib/Analysis/MemoryLocation.cpp
@@ -142,9 +142,9 @@ MemoryLocation MemoryLocation::getForArgument(ImmutableCallSite CS,
// for memcpy/memset. This is particularly important because the
// LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
// whenever possible.
- LibFunc::Func F;
+ LibFunc F;
if (CS.getCalledFunction() && TLI.getLibFunc(*CS.getCalledFunction(), F) &&
- F == LibFunc::memset_pattern16 && TLI.has(F)) {
+ F == LibFunc_memset_pattern16 && TLI.has(F)) {
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
if (ArgIdx == 1)
diff --git a/lib/Transforms/Utils/MemorySSA.cpp b/lib/Analysis/MemorySSA.cpp
index 1ce4225f09cc..910170561abf 100644
--- a/lib/Transforms/Utils/MemorySSA.cpp
+++ b/lib/Analysis/MemorySSA.cpp
@@ -10,7 +10,7 @@
// This file implements the MemorySSA class.
//
//===----------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/MemorySSA.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -44,10 +44,6 @@
#define DEBUG_TYPE "memoryssa"
using namespace llvm;
-STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups");
-STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits");
-STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts");
-
INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
@@ -145,8 +141,8 @@ public:
private:
union {
- ImmutableCallSite CS;
- MemoryLocation Loc;
+ ImmutableCallSite CS;
+ MemoryLocation Loc;
};
};
}
@@ -218,12 +214,16 @@ static bool instructionClobbersQuery(MemoryDef *MD,
AliasAnalysis &AA) {
Instruction *DefInst = MD->getMemoryInst();
assert(DefInst && "Defining instruction not actually an instruction");
+ ImmutableCallSite UseCS(UseInst);
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
// These intrinsics will show up as affecting memory, but they are just
// markers.
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
+ if (UseCS)
+ return false;
+ return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), UseLoc);
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
@@ -234,7 +234,6 @@ static bool instructionClobbersQuery(MemoryDef *MD,
}
}
- ImmutableCallSite UseCS(UseInst);
if (UseCS) {
ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
return I != MRI_NoModRef;
@@ -269,8 +268,8 @@ static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
}
// Return true when MD may alias MU, return false otherwise.
-bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
- AliasAnalysis &AA) {
+bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
+ AliasAnalysis &AA) {
return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA);
}
}
@@ -302,7 +301,6 @@ static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
Instruction *Inst = MD->getMemoryInst();
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
- case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
default:
@@ -320,95 +318,8 @@ static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
// FIXME: We should handle invariant groups, as well. It's a bit harder,
// because we need to pay close attention to invariant group barriers.
return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
- AA.pointsToConstantMemory(I));
-}
-
-/// Cache for our caching MemorySSA walker.
-class WalkerCache {
- DenseMap<ConstMemoryAccessPair, MemoryAccess *> Accesses;
- DenseMap<const MemoryAccess *, MemoryAccess *> Calls;
-
-public:
- MemoryAccess *lookup(const MemoryAccess *MA, const MemoryLocation &Loc,
- bool IsCall) const {
- ++NumClobberCacheLookups;
- MemoryAccess *R = IsCall ? Calls.lookup(MA) : Accesses.lookup({MA, Loc});
- if (R)
- ++NumClobberCacheHits;
- return R;
- }
-
- bool insert(const MemoryAccess *MA, MemoryAccess *To,
- const MemoryLocation &Loc, bool IsCall) {
- // This is fine for Phis, since there are times where we can't optimize
- // them. Making a def its own clobber is never correct, though.
- assert((MA != To || isa<MemoryPhi>(MA)) &&
- "Something can't clobber itself!");
-
- ++NumClobberCacheInserts;
- bool Inserted;
- if (IsCall)
- Inserted = Calls.insert({MA, To}).second;
- else
- Inserted = Accesses.insert({{MA, Loc}, To}).second;
-
- return Inserted;
- }
-
- bool remove(const MemoryAccess *MA, const MemoryLocation &Loc, bool IsCall) {
- return IsCall ? Calls.erase(MA) : Accesses.erase({MA, Loc});
- }
-
- void clear() {
- Accesses.clear();
- Calls.clear();
- }
-
- bool contains(const MemoryAccess *MA) const {
- for (auto &P : Accesses)
- if (P.first.first == MA || P.second == MA)
- return true;
- for (auto &P : Calls)
- if (P.first == MA || P.second == MA)
- return true;
- return false;
- }
-};
-
-/// Walks the defining uses of MemoryDefs. Stops after we hit something that has
-/// no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when comparing
-/// against a null def_chain_iterator, this will compare equal only after
-/// walking said Phi/liveOnEntry.
-struct def_chain_iterator
- : public iterator_facade_base<def_chain_iterator, std::forward_iterator_tag,
- MemoryAccess *> {
- def_chain_iterator() : MA(nullptr) {}
- def_chain_iterator(MemoryAccess *MA) : MA(MA) {}
-
- MemoryAccess *operator*() const { return MA; }
-
- def_chain_iterator &operator++() {
- // N.B. liveOnEntry has a null defining access.
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
- MA = MUD->getDefiningAccess();
- else
- MA = nullptr;
- return *this;
- }
-
- bool operator==(const def_chain_iterator &O) const { return MA == O.MA; }
-
-private:
- MemoryAccess *MA;
-};
-
-static iterator_range<def_chain_iterator>
-def_chain(MemoryAccess *MA, MemoryAccess *UpTo = nullptr) {
-#ifdef EXPENSIVE_CHECKS
- assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator()) &&
- "UpTo isn't in the def chain!");
-#endif
- return make_range(def_chain_iterator(MA), def_chain_iterator(UpTo));
+ AA.pointsToConstantMemory(cast<LoadInst>(I)->
+ getPointerOperand()));
}
/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
@@ -512,91 +423,24 @@ class ClobberWalker {
const MemorySSA &MSSA;
AliasAnalysis &AA;
DominatorTree &DT;
- WalkerCache &WC;
UpwardsMemoryQuery *Query;
- bool UseCache;
// Phi optimization bookkeeping
SmallVector<DefPath, 32> Paths;
DenseSet<ConstMemoryAccessPair> VisitedPhis;
- DenseMap<const BasicBlock *, MemoryAccess *> WalkTargetCache;
-
- void setUseCache(bool Use) { UseCache = Use; }
- bool shouldIgnoreCache() const {
- // UseCache will only be false when we're debugging, or when expensive
- // checks are enabled. In either case, we don't care deeply about speed.
- return LLVM_UNLIKELY(!UseCache);
- }
-
- void addCacheEntry(const MemoryAccess *What, MemoryAccess *To,
- const MemoryLocation &Loc) const {
-// EXPENSIVE_CHECKS because most of these queries are redundant.
-#ifdef EXPENSIVE_CHECKS
- assert(MSSA.dominates(To, What));
-#endif
- if (shouldIgnoreCache())
- return;
- WC.insert(What, To, Loc, Query->IsCall);
- }
-
- MemoryAccess *lookupCache(const MemoryAccess *MA, const MemoryLocation &Loc) {
- return shouldIgnoreCache() ? nullptr : WC.lookup(MA, Loc, Query->IsCall);
- }
-
- void cacheDefPath(const DefPath &DN, MemoryAccess *Target) const {
- if (shouldIgnoreCache())
- return;
-
- for (MemoryAccess *MA : def_chain(DN.First, DN.Last))
- addCacheEntry(MA, Target, DN.Loc);
-
- // DefPaths only express the path we walked. So, DN.Last could either be a
- // thing we want to cache, or not.
- if (DN.Last != Target)
- addCacheEntry(DN.Last, Target, DN.Loc);
- }
/// Find the nearest def or phi that `From` can legally be optimized to.
- ///
- /// FIXME: Deduplicate this with MSSA::findDominatingDef. Ideally, MSSA should
- /// keep track of this information for us, and allow us O(1) lookups of this
- /// info.
- MemoryAccess *getWalkTarget(const MemoryPhi *From) {
+ const MemoryAccess *getWalkTarget(const MemoryPhi *From) const {
assert(From->getNumOperands() && "Phi with no operands?");
BasicBlock *BB = From->getBlock();
- auto At = WalkTargetCache.find(BB);
- if (At != WalkTargetCache.end())
- return At->second;
-
- SmallVector<const BasicBlock *, 8> ToCache;
- ToCache.push_back(BB);
-
MemoryAccess *Result = MSSA.getLiveOnEntryDef();
DomTreeNode *Node = DT.getNode(BB);
while ((Node = Node->getIDom())) {
- auto At = WalkTargetCache.find(BB);
- if (At != WalkTargetCache.end()) {
- Result = At->second;
- break;
- }
-
- auto *Accesses = MSSA.getBlockAccesses(Node->getBlock());
- if (Accesses) {
- auto Iter = find_if(reverse(*Accesses), [](const MemoryAccess &MA) {
- return !isa<MemoryUse>(MA);
- });
- if (Iter != Accesses->rend()) {
- Result = const_cast<MemoryAccess *>(&*Iter);
- break;
- }
- }
-
- ToCache.push_back(Node->getBlock());
+ auto *Defs = MSSA.getBlockDefs(Node->getBlock());
+ if (Defs)
+ return &*Defs->rbegin();
}
-
- for (const BasicBlock *BB : ToCache)
- WalkTargetCache.insert({BB, Result});
return Result;
}
@@ -606,7 +450,6 @@ class ClobberWalker {
/// both.
MemoryAccess *Result;
bool IsKnownClobber;
- bool FromCache;
};
/// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
@@ -614,29 +457,25 @@ class ClobberWalker {
/// StopAt.
///
/// This does not test for whether StopAt is a clobber
- UpwardsWalkResult walkToPhiOrClobber(DefPath &Desc,
- MemoryAccess *StopAt = nullptr) {
+ UpwardsWalkResult
+ walkToPhiOrClobber(DefPath &Desc,
+ const MemoryAccess *StopAt = nullptr) const {
assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
for (MemoryAccess *Current : def_chain(Desc.Last)) {
Desc.Last = Current;
if (Current == StopAt)
- return {Current, false, false};
+ return {Current, false};
if (auto *MD = dyn_cast<MemoryDef>(Current))
if (MSSA.isLiveOnEntryDef(MD) ||
instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA))
- return {MD, true, false};
-
- // Cache checks must be done last, because if Current is a clobber, the
- // cache will contain the clobber for Current.
- if (MemoryAccess *MA = lookupCache(Current, Desc.Loc))
- return {MA, true, true};
+ return {MD, true};
}
assert(isa<MemoryPhi>(Desc.Last) &&
"Ended at a non-clobber that's not a phi?");
- return {Desc.Last, false, false};
+ return {Desc.Last, false};
}
void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
@@ -666,7 +505,7 @@ class ClobberWalker {
/// If this returns None, NewPaused is a vector of searches that terminated
/// at StopWhere. Otherwise, NewPaused is left in an unspecified state.
Optional<TerminatedPath>
- getBlockingAccess(MemoryAccess *StopWhere,
+ getBlockingAccess(const MemoryAccess *StopWhere,
SmallVectorImpl<ListIndex> &PausedSearches,
SmallVectorImpl<ListIndex> &NewPaused,
SmallVectorImpl<TerminatedPath> &Terminated) {
@@ -701,11 +540,11 @@ class ClobberWalker {
UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
if (Res.IsKnownClobber) {
- assert(Res.Result != StopWhere || Res.FromCache);
+ assert(Res.Result != StopWhere);
// If this wasn't a cache hit, we hit a clobber when walking. That's a
// failure.
TerminatedPath Term{Res.Result, PathIndex};
- if (!Res.FromCache || !MSSA.dominates(Res.Result, StopWhere))
+ if (!MSSA.dominates(Res.Result, StopWhere))
return Term;
// Otherwise, it's a valid thing to potentially optimize to.
@@ -830,7 +669,7 @@ class ClobberWalker {
assert(!MSSA.isLiveOnEntryDef(Current) &&
"liveOnEntry wasn't treated as a clobber?");
- MemoryAccess *Target = getWalkTarget(Current);
+ const auto *Target = getWalkTarget(Current);
// If a TerminatedPath doesn't dominate Target, then it wasn't a legal
// optimization for the prior phi.
assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {
@@ -842,8 +681,6 @@ class ClobberWalker {
// For the moment, this is fine, since we do nothing with blocker info.
if (Optional<TerminatedPath> Blocker = getBlockingAccess(
Target, PausedSearches, NewPaused, TerminatedPaths)) {
- // Cache our work on the blocking node, since we know that's correct.
- cacheDefPath(Paths[Blocker->LastNode], Blocker->Clobber);
// Find the node we started at. We can't search based on N->Last, since
// we may have gone around a loop with a different MemoryLocation.
@@ -908,7 +745,7 @@ class ClobberWalker {
// If we couldn't find the dominating phi/liveOnEntry in the above loop,
// do it now.
if (!DefChainEnd)
- for (MemoryAccess *MA : def_chain(Target))
+ for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target)))
DefChainEnd = MA;
// If any of the terminated paths don't dominate the phi we'll try to
@@ -946,35 +783,6 @@ class ClobberWalker {
}
}
- /// Caches everything in an OptznResult.
- void cacheOptResult(const OptznResult &R) {
- if (R.OtherClobbers.empty()) {
- // If we're not going to be caching OtherClobbers, don't bother with
- // marking visited/etc.
- for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode))
- cacheDefPath(N, R.PrimaryClobber.Clobber);
- return;
- }
-
- // PrimaryClobber is our answer. If we can cache anything back, we need to
- // stop caching when we visit PrimaryClobber.
- SmallBitVector Visited(Paths.size());
- for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode)) {
- Visited[defPathIndex(N)] = true;
- cacheDefPath(N, R.PrimaryClobber.Clobber);
- }
-
- for (const TerminatedPath &P : R.OtherClobbers) {
- for (const DefPath &N : const_def_path(P.LastNode)) {
- ListIndex NIndex = defPathIndex(N);
- if (Visited[NIndex])
- break;
- Visited[NIndex] = true;
- cacheDefPath(N, P.Clobber);
- }
- }
- }
-
void verifyOptResult(const OptznResult &R) const {
assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {
return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);
@@ -987,17 +795,14 @@ class ClobberWalker {
}
public:
- ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT,
- WalkerCache &WC)
- : MSSA(MSSA), AA(AA), DT(DT), WC(WC), UseCache(true) {}
+ ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT)
+ : MSSA(MSSA), AA(AA), DT(DT) {}
- void reset() { WalkTargetCache.clear(); }
+ void reset() {}
/// Finds the nearest clobber for the given query, optimizing phis if
/// possible.
- MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q,
- bool UseWalkerCache = true) {
- setUseCache(UseWalkerCache);
+ MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q) {
Query = &Q;
MemoryAccess *Current = Start;
@@ -1012,13 +817,11 @@ public:
UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc);
MemoryAccess *Result;
if (WalkResult.IsKnownClobber) {
- cacheDefPath(FirstDesc, WalkResult.Result);
Result = WalkResult.Result;
} else {
OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last),
Current, Q.StartingLoc);
verifyOptResult(OptRes);
- cacheOptResult(OptRes);
resetPhiOptznState();
Result = OptRes.PrimaryClobber.Clobber;
}
@@ -1049,41 +852,10 @@ struct RenamePassData {
} // anonymous namespace
namespace llvm {
-/// \brief A MemorySSAWalker that does AA walks and caching of lookups to
-/// disambiguate accesses.
-///
-/// FIXME: The current implementation of this can take quadratic space in rare
-/// cases. This can be fixed, but it is something to note until it is fixed.
-///
-/// In order to trigger this behavior, you need to store to N distinct locations
-/// (that AA can prove don't alias), perform M stores to other memory
-/// locations that AA can prove don't alias any of the initial N locations, and
-/// then load from all of the N locations. In this case, we insert M cache
-/// entries for each of the N loads.
-///
-/// For example:
-/// define i32 @foo() {
-/// %a = alloca i32, align 4
-/// %b = alloca i32, align 4
-/// store i32 0, i32* %a, align 4
-/// store i32 0, i32* %b, align 4
-///
-/// ; Insert M stores to other memory that doesn't alias %a or %b here
-///
-/// %c = load i32, i32* %a, align 4 ; Caches M entries in
-/// ; CachedUpwardsClobberingAccess for the
-/// ; MemoryLocation %a
-/// %d = load i32, i32* %b, align 4 ; Caches M entries in
-/// ; CachedUpwardsClobberingAccess for the
-/// ; MemoryLocation %b
-///
-/// ; For completeness' sake, loading %a or %b again would not cache *another*
-/// ; M entries.
-/// %r = add i32 %c, %d
-/// ret i32 %r
-/// }
+/// \brief A MemorySSAWalker that does AA walks to disambiguate accesses. It no
+/// longer does caching on its own,
+/// but the name has been retained for the moment.
class MemorySSA::CachingWalker final : public MemorySSAWalker {
- WalkerCache Cache;
ClobberWalker Walker;
bool AutoResetWalker;
@@ -1104,10 +876,7 @@ public:
/// answer a clobber query.
void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; }
- /// Drop the walker's persistent data structures. At the moment, this means
- /// "drop the walker's cache of BasicBlocks ->
- /// earliest-MemoryAccess-we-can-optimize-to". This is necessary if we're
- /// going to have DT updates, if we remove MemoryAccesses, etc.
+ /// Drop the walker's persistent data structures.
void resetClobberWalker() { Walker.reset(); }
void verify(const MemorySSA *MSSA) override {
@@ -1116,18 +885,37 @@ public:
}
};
+void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal,
+ bool RenameAllUses) {
+ // Pass through values to our successors
+ for (const BasicBlock *S : successors(BB)) {
+ auto It = PerBlockAccesses.find(S);
+ // Rename the phi nodes in our successor block
+ if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
+ continue;
+ AccessList *Accesses = It->second.get();
+ auto *Phi = cast<MemoryPhi>(&Accesses->front());
+ if (RenameAllUses) {
+ int PhiIndex = Phi->getBasicBlockIndex(BB);
+ assert(PhiIndex != -1 && "Incomplete phi during partial rename");
+ Phi->setIncomingValue(PhiIndex, IncomingVal);
+ } else
+ Phi->addIncoming(IncomingVal, BB);
+ }
+}
+
/// \brief Rename a single basic block into MemorySSA form.
/// Uses the standard SSA renaming algorithm.
/// \returns The new incoming value.
-MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB,
- MemoryAccess *IncomingVal) {
+MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal,
+ bool RenameAllUses) {
auto It = PerBlockAccesses.find(BB);
// Skip most processing if the list is empty.
if (It != PerBlockAccesses.end()) {
AccessList *Accesses = It->second.get();
for (MemoryAccess &L : *Accesses) {
if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) {
- if (MUD->getDefiningAccess() == nullptr)
+ if (MUD->getDefiningAccess() == nullptr || RenameAllUses)
MUD->setDefiningAccess(IncomingVal);
if (isa<MemoryDef>(&L))
IncomingVal = &L;
@@ -1136,18 +924,6 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB,
}
}
}
-
- // Pass through values to our successors
- for (const BasicBlock *S : successors(BB)) {
- auto It = PerBlockAccesses.find(S);
- // Rename the phi nodes in our successor block
- if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front()))
- continue;
- AccessList *Accesses = It->second.get();
- auto *Phi = cast<MemoryPhi>(&Accesses->front());
- Phi->addIncoming(IncomingVal, BB);
- }
-
return IncomingVal;
}
@@ -1156,11 +932,19 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB,
/// We walk the dominator tree in preorder, renaming accesses, and then filling
/// in phi nodes in our successors.
void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
- SmallPtrSet<BasicBlock *, 16> &Visited) {
+ SmallPtrSetImpl<BasicBlock *> &Visited,
+ bool SkipVisited, bool RenameAllUses) {
SmallVector<RenamePassData, 32> WorkStack;
- IncomingVal = renameBlock(Root->getBlock(), IncomingVal);
+ // Skip everything if we already renamed this block and we are skipping.
+ // Note: You can't sink this into the if, because we need it to occur
+ // regardless of whether we skip blocks or not.
+ bool AlreadyVisited = !Visited.insert(Root->getBlock()).second;
+ if (SkipVisited && AlreadyVisited)
+ return;
+
+ IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses);
+ renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses);
WorkStack.push_back({Root, Root->begin(), IncomingVal});
- Visited.insert(Root->getBlock());
while (!WorkStack.empty()) {
DomTreeNode *Node = WorkStack.back().DTN;
@@ -1173,20 +957,25 @@ void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal,
DomTreeNode *Child = *ChildIt;
++WorkStack.back().ChildIt;
BasicBlock *BB = Child->getBlock();
- Visited.insert(BB);
- IncomingVal = renameBlock(BB, IncomingVal);
+ // Note: You can't sink this into the if, because we need it to occur
+ // regardless of whether we skip blocks or not.
+ AlreadyVisited = !Visited.insert(BB).second;
+ if (SkipVisited && AlreadyVisited) {
+ // We already visited this during our renaming, which can happen when
+ // being asked to rename multiple blocks. Figure out the incoming val,
+ // which is the last def.
+ // Incoming value can only change if there is a block def, and in that
+ // case, it's the last block def in the list.
+ if (auto *BlockDefs = getWritableBlockDefs(BB))
+ IncomingVal = &*BlockDefs->rbegin();
+ } else
+ IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses);
+ renameSuccessorPhis(BB, IncomingVal, RenameAllUses);
WorkStack.push_back({Child, Child->begin(), IncomingVal});
}
}
}
-/// \brief Compute dominator levels, used by the phi insertion algorithm above.
-void MemorySSA::computeDomLevels(DenseMap<DomTreeNode *, unsigned> &DomLevels) {
- for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode());
- DFI != DFE; ++DFI)
- DomLevels[*DFI] = DFI.getPathLength() - 1;
-}
-
/// \brief This handles unreachable block accesses by deleting phi nodes in
/// unreachable blocks, and marking all other unreachable MemoryAccess's as
/// being uses of the live on entry definition.
@@ -1247,6 +1036,13 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
Res.first->second = make_unique<AccessList>();
return Res.first->second.get();
}
+MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) {
+ auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr));
+
+ if (Res.second)
+ Res.first->second = make_unique<DefsList>();
+ return Res.first->second.get();
+}
/// This class is a batch walker of all MemoryUse's in the program, and points
/// their defining access at the thing that actually clobbers them. Because it
@@ -1315,7 +1111,10 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
// Pop everything that doesn't dominate the current block off the stack,
// increment the PopEpoch to account for this.
- while (!VersionStack.empty()) {
+ while (true) {
+ assert(
+ !VersionStack.empty() &&
+ "Version stack should have liveOnEntry sentinel dominating everything");
BasicBlock *BackBlock = VersionStack.back()->getBlock();
if (DT->dominates(BackBlock, BB))
break;
@@ -1323,6 +1122,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
VersionStack.pop_back();
++PopEpoch;
}
+
for (MemoryAccess &MA : *Accesses) {
auto *MU = dyn_cast<MemoryUse>(&MA);
if (!MU) {
@@ -1443,20 +1243,13 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
/// Optimize uses to point to their actual clobbering definitions.
void MemorySSA::OptimizeUses::optimizeUses() {
-
- // We perform a non-recursive top-down dominator tree walk
- struct StackInfo {
- const DomTreeNode *Node;
- DomTreeNode::const_iterator Iter;
- };
-
SmallVector<MemoryAccess *, 16> VersionStack;
- SmallVector<StackInfo, 16> DomTreeWorklist;
DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo;
VersionStack.push_back(MSSA->getLiveOnEntryDef());
unsigned long StackEpoch = 1;
unsigned long PopEpoch = 1;
+ // We perform a non-recursive top-down dominator tree walk.
for (const auto *DomNode : depth_first(DT->getRootNode()))
optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack,
LocStackInfo);
@@ -1477,14 +1270,8 @@ void MemorySSA::placePHINodes(
});
// Now place MemoryPhi nodes.
- for (auto &BB : IDFBlocks) {
- // Insert phi node
- AccessList *Accesses = getOrCreateAccessList(BB);
- MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
- ValueToMemoryAccess[BB] = Phi;
- // Phi's always are placed at the front of the block.
- Accesses->push_front(Phi);
- }
+ for (auto &BB : IDFBlocks)
+ createMemoryPhi(BB);
}
void MemorySSA::buildMemorySSA() {
@@ -1511,15 +1298,21 @@ void MemorySSA::buildMemorySSA() {
BBNumbers[&B] = NextBBNum++;
bool InsertIntoDef = false;
AccessList *Accesses = nullptr;
+ DefsList *Defs = nullptr;
for (Instruction &I : B) {
MemoryUseOrDef *MUD = createNewAccess(&I);
if (!MUD)
continue;
- InsertIntoDef |= isa<MemoryDef>(MUD);
if (!Accesses)
Accesses = getOrCreateAccessList(&B);
Accesses->push_back(MUD);
+ if (isa<MemoryDef>(MUD)) {
+ InsertIntoDef = true;
+ if (!Defs)
+ Defs = getOrCreateDefsList(&B);
+ Defs->push_back(*MUD);
+ }
}
if (InsertIntoDef)
DefiningBlocks.insert(&B);
@@ -1558,14 +1351,94 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
return Walker.get();
}
+// This is a helper function used by the creation routines. It places NewAccess
+// into the access and defs lists for a given basic block, at the given
+// insertion point.
+void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess,
+ const BasicBlock *BB,
+ InsertionPlace Point) {
+ auto *Accesses = getOrCreateAccessList(BB);
+ if (Point == Beginning) {
+ // If it's a phi node, it goes first, otherwise, it goes after any phi
+ // nodes.
+ if (isa<MemoryPhi>(NewAccess)) {
+ Accesses->push_front(NewAccess);
+ auto *Defs = getOrCreateDefsList(BB);
+ Defs->push_front(*NewAccess);
+ } else {
+ auto AI = find_if_not(
+ *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); });
+ Accesses->insert(AI, NewAccess);
+ if (!isa<MemoryUse>(NewAccess)) {
+ auto *Defs = getOrCreateDefsList(BB);
+ auto DI = find_if_not(
+ *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); });
+ Defs->insert(DI, *NewAccess);
+ }
+ }
+ } else {
+ Accesses->push_back(NewAccess);
+ if (!isa<MemoryUse>(NewAccess)) {
+ auto *Defs = getOrCreateDefsList(BB);
+ Defs->push_back(*NewAccess);
+ }
+ }
+ BlockNumberingValid.erase(BB);
+}
+
+void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB,
+ AccessList::iterator InsertPt) {
+ auto *Accesses = getWritableBlockAccesses(BB);
+ bool WasEnd = InsertPt == Accesses->end();
+ Accesses->insert(AccessList::iterator(InsertPt), What);
+ if (!isa<MemoryUse>(What)) {
+ auto *Defs = getOrCreateDefsList(BB);
+ // If we got asked to insert at the end, we have an easy job, just shove it
+ // at the end. If we got asked to insert before an existing def, we also get
+ // an terator. If we got asked to insert before a use, we have to hunt for
+ // the next def.
+ if (WasEnd) {
+ Defs->push_back(*What);
+ } else if (isa<MemoryDef>(InsertPt)) {
+ Defs->insert(InsertPt->getDefsIterator(), *What);
+ } else {
+ while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt))
+ ++InsertPt;
+ // Either we found a def, or we are inserting at the end
+ if (InsertPt == Accesses->end())
+ Defs->push_back(*What);
+ else
+ Defs->insert(InsertPt->getDefsIterator(), *What);
+ }
+ }
+ BlockNumberingValid.erase(BB);
+}
+
+// Move What before Where in the IR. The end result is taht What will belong to
+// the right lists and have the right Block set, but will not otherwise be
+// correct. It will not have the right defining access, and if it is a def,
+// things below it will not properly be updated.
+void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
+ AccessList::iterator Where) {
+ // Keep it in the lookup tables, remove from the lists
+ removeFromLists(What, false);
+ What->setBlock(BB);
+ insertIntoListsBefore(What, BB, Where);
+}
+
+void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
+ InsertionPlace Point) {
+ removeFromLists(What, false);
+ What->setBlock(BB);
+ insertIntoListsForBlock(What, BB, Point);
+}
+
MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
- AccessList *Accesses = getOrCreateAccessList(BB);
MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
- ValueToMemoryAccess[BB] = Phi;
// Phi's always are placed at the front of the block.
- Accesses->push_front(Phi);
- BlockNumberingValid.erase(BB);
+ insertIntoListsForBlock(Phi, BB, Beginning);
+ ValueToMemoryAccess[BB] = Phi;
return Phi;
}
@@ -1580,72 +1453,19 @@ MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I,
return NewAccess;
}
-MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I,
- MemoryAccess *Definition,
- const BasicBlock *BB,
- InsertionPlace Point) {
- MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
- auto *Accesses = getOrCreateAccessList(BB);
- if (Point == Beginning) {
- // It goes after any phi nodes
- auto AI = find_if(
- *Accesses, [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); });
-
- Accesses->insert(AI, NewAccess);
- } else {
- Accesses->push_back(NewAccess);
- }
- BlockNumberingValid.erase(BB);
- return NewAccess;
-}
-
-MemoryUseOrDef *MemorySSA::createMemoryAccessBefore(Instruction *I,
- MemoryAccess *Definition,
- MemoryUseOrDef *InsertPt) {
- assert(I->getParent() == InsertPt->getBlock() &&
- "New and old access must be in the same block");
- MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
- auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
- Accesses->insert(AccessList::iterator(InsertPt), NewAccess);
- BlockNumberingValid.erase(InsertPt->getBlock());
- return NewAccess;
-}
-
-MemoryUseOrDef *MemorySSA::createMemoryAccessAfter(Instruction *I,
- MemoryAccess *Definition,
- MemoryAccess *InsertPt) {
- assert(I->getParent() == InsertPt->getBlock() &&
- "New and old access must be in the same block");
- MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
- auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
- Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess);
- BlockNumberingValid.erase(InsertPt->getBlock());
- return NewAccess;
-}
-
-void MemorySSA::spliceMemoryAccessAbove(MemoryDef *Where,
- MemoryUseOrDef *What) {
- assert(What != getLiveOnEntryDef() &&
- Where != getLiveOnEntryDef() && "Can't splice (above) LOE.");
- assert(dominates(Where, What) && "Only upwards splices are permitted.");
-
- if (Where == What)
- return;
- if (isa<MemoryDef>(What)) {
- // TODO: possibly use removeMemoryAccess' more efficient RAUW
- What->replaceAllUsesWith(What->getDefiningAccess());
- What->setDefiningAccess(Where->getDefiningAccess());
- Where->setDefiningAccess(What);
+// Return true if the instruction has ordering constraints.
+// Note specifically that this only considers stores and loads
+// because others are still considered ModRef by getModRefInfo.
+static inline bool isOrdered(const Instruction *I) {
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ if (!SI->isUnordered())
+ return true;
+ } else if (auto *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isUnordered())
+ return true;
}
- AccessList *Src = getWritableBlockAccesses(What->getBlock());
- AccessList *Dest = getWritableBlockAccesses(Where->getBlock());
- Dest->splice(AccessList::iterator(Where), *Src, What);
-
- BlockNumberingValid.erase(What->getBlock());
- if (What->getBlock() != Where->getBlock())
- BlockNumberingValid.erase(Where->getBlock());
+ return false;
}
-
/// \brief Helper function to create new memory accesses
MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
// The assume intrinsic has a control dependency which we model by claiming
@@ -1658,7 +1478,15 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
// Find out what affect this instruction has on memory.
ModRefInfo ModRef = AA->getModRefInfo(I);
- bool Def = bool(ModRef & MRI_Mod);
+ // The isOrdered check is used to ensure that volatiles end up as defs
+ // (atomics end up as ModRef right now anyway). Until we separate the
+ // ordering chain from the memory chain, this enables people to see at least
+ // some relative ordering to volatiles. Note that getClobberingMemoryAccess
+ // will still give an answer that bypasses other volatile loads. TODO:
+ // Separate memory aliasing and ordering into two different chains so that we
+ // can precisely represent both "what memory will this read/write/is clobbered
+ // by" and "what instructions can I move this past".
+ bool Def = bool(ModRef & MRI_Mod) || isOrdered(I);
bool Use = bool(ModRef & MRI_Ref);
// It's possible for an instruction to not modify memory at all. During
@@ -1678,33 +1506,6 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
return MUD;
}
-MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock,
- enum InsertionPlace Where) {
- // Handle the initial case
- if (Where == Beginning)
- // The only thing that could define us at the beginning is a phi node
- if (MemoryPhi *Phi = getMemoryAccess(UseBlock))
- return Phi;
-
- DomTreeNode *CurrNode = DT->getNode(UseBlock);
- // Need to be defined by our dominator
- if (Where == Beginning)
- CurrNode = CurrNode->getIDom();
- Where = End;
- while (CurrNode) {
- auto It = PerBlockAccesses.find(CurrNode->getBlock());
- if (It != PerBlockAccesses.end()) {
- auto &Accesses = It->second;
- for (MemoryAccess &RA : reverse(*Accesses)) {
- if (isa<MemoryDef>(RA) || isa<MemoryPhi>(RA))
- return &RA;
- }
- }
- CurrNode = CurrNode->getIDom();
- }
- return LiveOnEntryDef.get();
-}
-
/// \brief Returns true if \p Replacer dominates \p Replacee .
bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
const MemoryAccess *Replacee) const {
@@ -1722,24 +1523,7 @@ bool MemorySSA::dominatesUse(const MemoryAccess *Replacer,
return true;
}
-/// \brief If all arguments of a MemoryPHI are defined by the same incoming
-/// argument, return that argument.
-static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
- MemoryAccess *MA = nullptr;
-
- for (auto &Arg : MP->operands()) {
- if (!MA)
- MA = cast<MemoryAccess>(Arg);
- else if (MA != Arg)
- return nullptr;
- }
- return MA;
-}
-
/// \brief Properly remove \p MA from all of MemorySSA's lookup tables.
-///
-/// Because of the way the intrusive list and use lists work, it is important to
-/// do removal in the right order.
void MemorySSA::removeFromLookups(MemoryAccess *MA) {
assert(MA->use_empty() &&
"Trying to remove memory access that still has uses");
@@ -1760,69 +1544,46 @@ void MemorySSA::removeFromLookups(MemoryAccess *MA) {
auto VMA = ValueToMemoryAccess.find(MemoryInst);
if (VMA->second == MA)
ValueToMemoryAccess.erase(VMA);
+}
+/// \brief Properly remove \p MA from all of MemorySSA's lists.
+///
+/// Because of the way the intrusive list and use lists work, it is important to
+/// do removal in the right order.
+/// ShouldDelete defaults to true, and will cause the memory access to also be
+/// deleted, not just removed.
+void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) {
+ // The access list owns the reference, so we erase it from the non-owning list
+ // first.
+ if (!isa<MemoryUse>(MA)) {
+ auto DefsIt = PerBlockDefs.find(MA->getBlock());
+ std::unique_ptr<DefsList> &Defs = DefsIt->second;
+ Defs->remove(*MA);
+ if (Defs->empty())
+ PerBlockDefs.erase(DefsIt);
+ }
+
+ // The erase call here will delete it. If we don't want it deleted, we call
+ // remove instead.
auto AccessIt = PerBlockAccesses.find(MA->getBlock());
std::unique_ptr<AccessList> &Accesses = AccessIt->second;
- Accesses->erase(MA);
+ if (ShouldDelete)
+ Accesses->erase(MA);
+ else
+ Accesses->remove(MA);
+
if (Accesses->empty())
PerBlockAccesses.erase(AccessIt);
}
-void MemorySSA::removeMemoryAccess(MemoryAccess *MA) {
- assert(!isLiveOnEntryDef(MA) && "Trying to remove the live on entry def");
- // We can only delete phi nodes if they have no uses, or we can replace all
- // uses with a single definition.
- MemoryAccess *NewDefTarget = nullptr;
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) {
- // Note that it is sufficient to know that all edges of the phi node have
- // the same argument. If they do, by the definition of dominance frontiers
- // (which we used to place this phi), that argument must dominate this phi,
- // and thus, must dominate the phi's uses, and so we will not hit the assert
- // below.
- NewDefTarget = onlySingleValue(MP);
- assert((NewDefTarget || MP->use_empty()) &&
- "We can't delete this memory phi");
- } else {
- NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
- }
-
- // Re-point the uses at our defining access
- if (!MA->use_empty()) {
- // Reset optimized on users of this store, and reset the uses.
- // A few notes:
- // 1. This is a slightly modified version of RAUW to avoid walking the
- // uses twice here.
- // 2. If we wanted to be complete, we would have to reset the optimized
- // flags on users of phi nodes if doing the below makes a phi node have all
- // the same arguments. Instead, we prefer users to removeMemoryAccess those
- // phi nodes, because doing it here would be N^3.
- if (MA->hasValueHandle())
- ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget);
- // Note: We assume MemorySSA is not used in metadata since it's not really
- // part of the IR.
-
- while (!MA->use_empty()) {
- Use &U = *MA->use_begin();
- if (MemoryUse *MU = dyn_cast<MemoryUse>(U.getUser()))
- MU->resetOptimized();
- U.set(NewDefTarget);
- }
- }
-
- // The call below to erase will destroy MA, so we can't change the order we
- // are doing things here
- removeFromLookups(MA);
-}
-
void MemorySSA::print(raw_ostream &OS) const {
MemorySSAAnnotatedWriter Writer(this);
F.print(OS, &Writer);
}
-void MemorySSA::dump() const {
- MemorySSAAnnotatedWriter Writer(this);
- F.print(dbgs(), &Writer);
-}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
+#endif
void MemorySSA::verifyMemorySSA() const {
verifyDefUses(F);
@@ -1838,26 +1599,41 @@ void MemorySSA::verifyOrdering(Function &F) const {
// lists think, as well as the order in the blocks vs the order in the access
// lists.
SmallVector<MemoryAccess *, 32> ActualAccesses;
+ SmallVector<MemoryAccess *, 32> ActualDefs;
for (BasicBlock &B : F) {
const AccessList *AL = getBlockAccesses(&B);
+ const auto *DL = getBlockDefs(&B);
MemoryAccess *Phi = getMemoryAccess(&B);
- if (Phi)
+ if (Phi) {
ActualAccesses.push_back(Phi);
+ ActualDefs.push_back(Phi);
+ }
+
for (Instruction &I : B) {
MemoryAccess *MA = getMemoryAccess(&I);
- assert((!MA || AL) && "We have memory affecting instructions "
- "in this block but they are not in the "
- "access list");
- if (MA)
+ assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&
+ "We have memory affecting instructions "
+ "in this block but they are not in the "
+ "access list or defs list");
+ if (MA) {
ActualAccesses.push_back(MA);
+ if (isa<MemoryDef>(MA))
+ ActualDefs.push_back(MA);
+ }
}
// Either we hit the assert, really have no accesses, or we have both
- // accesses and an access list
- if (!AL)
+ // accesses and an access list.
+ // Same with defs.
+ if (!AL && !DL)
continue;
assert(AL->size() == ActualAccesses.size() &&
"We don't have the same number of accesses in the block as on the "
"access list");
+ assert((DL || ActualDefs.size() == 0) &&
+ "Either we should have a defs list, or we should have no defs");
+ assert((!DL || DL->size() == ActualDefs.size()) &&
+ "We don't have the same number of defs in the block as on the "
+ "def list");
auto ALI = AL->begin();
auto AAI = ActualAccesses.begin();
while (ALI != AL->end() && AAI != ActualAccesses.end()) {
@@ -1866,6 +1642,16 @@ void MemorySSA::verifyOrdering(Function &F) const {
++AAI;
}
ActualAccesses.clear();
+ if (DL) {
+ auto DLI = DL->begin();
+ auto ADI = ActualDefs.begin();
+ while (DLI != DL->end() && ADI != ActualDefs.end()) {
+ assert(&*DLI == *ADI && "Not the same defs in the same order");
+ ++DLI;
+ ++ADI;
+ }
+ }
+ ActualDefs.clear();
}
}
@@ -2066,8 +1852,11 @@ void MemoryUse::print(raw_ostream &OS) const {
}
void MemoryAccess::dump() const {
+// Cannot completely remove virtual function even in release mode.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
print(dbgs());
dbgs() << "\n";
+#endif
}
char MemorySSAPrinterLegacyPass::ID = 0;
@@ -2145,35 +1934,13 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
DominatorTree *D)
- : MemorySSAWalker(M), Walker(*M, *A, *D, Cache), AutoResetWalker(true) {}
+ : MemorySSAWalker(M), Walker(*M, *A, *D), AutoResetWalker(true) {}
MemorySSA::CachingWalker::~CachingWalker() {}
void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
- // TODO: We can do much better cache invalidation with differently stored
- // caches. For now, for MemoryUses, we simply remove them
- // from the cache, and kill the entire call/non-call cache for everything
- // else. The problem is for phis or defs, currently we'd need to follow use
- // chains down and invalidate anything below us in the chain that currently
- // terminates at this access.
-
- // See if this is a MemoryUse, if so, just remove the cached info. MemoryUse
- // is by definition never a barrier, so nothing in the cache could point to
- // this use. In that case, we only need invalidate the info for the use
- // itself.
-
- if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) {
- UpwardsMemoryQuery Q(MU->getMemoryInst(), MU);
- Cache.remove(MU, Q.StartingLoc, Q.IsCall);
- MU->resetOptimized();
- } else {
- // If it is not a use, the best we can do right now is destroy the cache.
- Cache.clear();
- }
-
-#ifdef EXPENSIVE_CHECKS
- verifyRemoved(MA);
-#endif
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+ MUD->resetOptimized();
}
/// \brief Walk the use-def chains starting at \p MA and find
@@ -2184,8 +1951,7 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
MemoryAccess *New = Walker.findClobber(StartingAccess, Q);
#ifdef EXPENSIVE_CHECKS
- MemoryAccess *NewNoCache =
- Walker.findClobber(StartingAccess, Q, /*UseWalkerCache=*/false);
+ MemoryAccess *NewNoCache = Walker.findClobber(StartingAccess, Q);
assert(NewNoCache == New && "Cache made us hand back a different result?");
#endif
if (AutoResetWalker)
@@ -2215,9 +1981,6 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
Q.Inst = I;
Q.IsCall = false;
- if (auto *CacheResult = Cache.lookup(StartingUseOrDef, Loc, Q.IsCall))
- return CacheResult;
-
// Unlike the other function, do not walk to the def of a def, because we are
// handed something we already believe is the clobbering access.
MemoryAccess *DefiningAccess = isa<MemoryUse>(StartingUseOrDef)
@@ -2242,9 +2005,9 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
// If this is an already optimized use or def, return the optimized result.
// Note: Currently, we do not store the optimized def result because we'd need
// a separate field, since we can't use it as the defining access.
- if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
- if (MU->isOptimized())
- return MU->getDefiningAccess();
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
+ if (MUD->isOptimized())
+ return MUD->getOptimized();
const Instruction *I = StartingAccess->getMemoryInst();
UpwardsMemoryQuery Q(I, StartingAccess);
@@ -2254,14 +2017,10 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
if (!Q.IsCall && I->isFenceLike())
return StartingAccess;
- if (auto *CacheResult = Cache.lookup(StartingAccess, Q.StartingLoc, Q.IsCall))
- return CacheResult;
-
if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
- Cache.insert(StartingAccess, LiveOnEntry, Q.StartingLoc, Q.IsCall);
- if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
- MU->setDefiningAccess(LiveOnEntry, true);
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
+ MUD->setOptimized(LiveOnEntry);
return LiveOnEntry;
}
@@ -2278,17 +2037,12 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
DEBUG(dbgs() << *DefiningAccess << "\n");
DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
DEBUG(dbgs() << *Result << "\n");
- if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
- MU->setDefiningAccess(Result, true);
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(StartingAccess))
+ MUD->setOptimized(Result);
return Result;
}
-// Verify that MA doesn't exist in any of the caches.
-void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) {
- assert(!Cache.contains(MA) && "Found removed MemoryAccess in cache.");
-}
-
MemoryAccess *
DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp
new file mode 100644
index 000000000000..c63677fe5502
--- /dev/null
+++ b/lib/Analysis/MemorySSAUpdater.cpp
@@ -0,0 +1,494 @@
+//===-- MemorySSAUpdater.cpp - Memory SSA Updater--------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the MemorySSAUpdater class.
+//
+//===----------------------------------------------------------------===//
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "memoryssa"
+using namespace llvm;
+namespace llvm {
+// This is the marker algorithm from "Simple and Efficient Construction of
+// Static Single Assignment Form"
+// The simple, non-marker algorithm places phi nodes at any join
+// Here, we place markers, and only place phi nodes if they end up necessary.
+// They are only necessary if they break a cycle (IE we recursively visit
+// ourselves again), or we discover, while getting the value of the operands,
+// that there are two or more definitions needing to be merged.
+// This still will leave non-minimal form in the case of irreducible control
+// flow, where phi nodes may be in cycles with themselves, but unnecessary.
+MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(BasicBlock *BB) {
+ // Single predecessor case, just recurse, we can only have one definition.
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ return getPreviousDefFromEnd(Pred);
+ } else if (VisitedBlocks.count(BB)) {
+ // We hit our node again, meaning we had a cycle, we must insert a phi
+ // node to break it so we have an operand. The only case this will
+ // insert useless phis is if we have irreducible control flow.
+ return MSSA->createMemoryPhi(BB);
+ } else if (VisitedBlocks.insert(BB).second) {
+ // Mark us visited so we can detect a cycle
+ SmallVector<MemoryAccess *, 8> PhiOps;
+
+ // Recurse to get the values in our predecessors for placement of a
+ // potential phi node. This will insert phi nodes if we cycle in order to
+ // break the cycle and have an operand.
+ for (auto *Pred : predecessors(BB))
+ PhiOps.push_back(getPreviousDefFromEnd(Pred));
+
+ // Now try to simplify the ops to avoid placing a phi.
+ // This may return null if we never created a phi yet, that's okay
+ MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB));
+ bool PHIExistsButNeedsUpdate = false;
+ // See if the existing phi operands match what we need.
+ // Unlike normal SSA, we only allow one phi node per block, so we can't just
+ // create a new one.
+ if (Phi && Phi->getNumOperands() != 0)
+ if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
+ PHIExistsButNeedsUpdate = true;
+ }
+
+ // See if we can avoid the phi by simplifying it.
+ auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
+ // If we couldn't simplify, we may have to create a phi
+ if (Result == Phi) {
+ if (!Phi)
+ Phi = MSSA->createMemoryPhi(BB);
+
+ // These will have been filled in by the recursive read we did above.
+ if (PHIExistsButNeedsUpdate) {
+ std::copy(PhiOps.begin(), PhiOps.end(), Phi->op_begin());
+ std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
+ } else {
+ unsigned i = 0;
+ for (auto *Pred : predecessors(BB))
+ Phi->addIncoming(PhiOps[i++], Pred);
+ }
+
+ Result = Phi;
+ }
+ if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Result))
+ InsertedPHIs.push_back(MP);
+ // Set ourselves up for the next variable by resetting visited state.
+ VisitedBlocks.erase(BB);
+ return Result;
+ }
+ llvm_unreachable("Should have hit one of the three cases above");
+}
+
+// This starts at the memory access, and goes backwards in the block to find the
+// previous definition. If a definition is not found the block of the access,
+// it continues globally, creating phi nodes to ensure we have a single
+// definition.
+MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) {
+ auto *LocalResult = getPreviousDefInBlock(MA);
+
+ return LocalResult ? LocalResult : getPreviousDefRecursive(MA->getBlock());
+}
+
+// This starts at the memory access, and goes backwards in the block to the find
+// the previous definition. If the definition is not found in the block of the
+// access, it returns nullptr.
+MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) {
+ auto *Defs = MSSA->getWritableBlockDefs(MA->getBlock());
+
+ // It's possible there are no defs, or we got handed the first def to start.
+ if (Defs) {
+ // If this is a def, we can just use the def iterators.
+ if (!isa<MemoryUse>(MA)) {
+ auto Iter = MA->getReverseDefsIterator();
+ ++Iter;
+ if (Iter != Defs->rend())
+ return &*Iter;
+ } else {
+ // Otherwise, have to walk the all access iterator.
+ auto Iter = MA->getReverseIterator();
+ ++Iter;
+ while (&*Iter != &*Defs->begin()) {
+ if (!isa<MemoryUse>(*Iter))
+ return &*Iter;
+ --Iter;
+ }
+ // At this point it must be pointing at firstdef
+ assert(&*Iter == &*Defs->begin() &&
+ "Should have hit first def walking backwards");
+ return &*Iter;
+ }
+ }
+ return nullptr;
+}
+
+// This starts at the end of block
+MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(BasicBlock *BB) {
+ auto *Defs = MSSA->getWritableBlockDefs(BB);
+
+ if (Defs)
+ return &*Defs->rbegin();
+
+ return getPreviousDefRecursive(BB);
+}
+// Recurse over a set of phi uses to eliminate the trivial ones
+MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
+ if (!Phi)
+ return nullptr;
+ TrackingVH<MemoryAccess> Res(Phi);
+ SmallVector<TrackingVH<Value>, 8> Uses;
+ std::copy(Phi->user_begin(), Phi->user_end(), std::back_inserter(Uses));
+ for (auto &U : Uses) {
+ if (MemoryPhi *UsePhi = dyn_cast<MemoryPhi>(&*U)) {
+ auto OperRange = UsePhi->operands();
+ tryRemoveTrivialPhi(UsePhi, OperRange);
+ }
+ }
+ return Res;
+}
+
+// Eliminate trivial phis
+// Phis are trivial if they are defined either by themselves, or all the same
+// argument.
+// IE phi(a, a) or b = phi(a, b) or c = phi(a, a, c)
+// We recursively try to remove them.
+template <class RangeType>
+MemoryAccess *MemorySSAUpdater::tryRemoveTrivialPhi(MemoryPhi *Phi,
+ RangeType &Operands) {
+ // Detect equal or self arguments
+ MemoryAccess *Same = nullptr;
+ for (auto &Op : Operands) {
+ // If the same or self, good so far
+ if (Op == Phi || Op == Same)
+ continue;
+ // not the same, return the phi since it's not eliminatable by us
+ if (Same)
+ return Phi;
+ Same = cast<MemoryAccess>(Op);
+ }
+ // Never found a non-self reference, the phi is undef
+ if (Same == nullptr)
+ return MSSA->getLiveOnEntryDef();
+ if (Phi) {
+ Phi->replaceAllUsesWith(Same);
+ removeMemoryAccess(Phi);
+ }
+
+ // We should only end up recursing in case we replaced something, in which
+ // case, we may have made other Phis trivial.
+ return recursePhi(Same);
+}
+
+void MemorySSAUpdater::insertUse(MemoryUse *MU) {
+ InsertedPHIs.clear();
+ MU->setDefiningAccess(getPreviousDef(MU));
+ // Unlike for defs, there is no extra work to do. Because uses do not create
+ // new may-defs, there are only two cases:
+ //
+ // 1. There was a def already below us, and therefore, we should not have
+ // created a phi node because it was already needed for the def.
+ //
+ // 2. There is no def below us, and therefore, there is no extra renaming work
+ // to do.
+}
+
+// Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef.
+void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
+ MemoryAccess *NewDef) {
+ // Replace any operand with us an incoming block with the new defining
+ // access.
+ int i = MP->getBasicBlockIndex(BB);
+ assert(i != -1 && "Should have found the basic block in the phi");
+ // We can't just compare i against getNumOperands since one is signed and the
+ // other not. So use it to index into the block iterator.
+ for (auto BBIter = MP->block_begin() + i; BBIter != MP->block_end();
+ ++BBIter) {
+ if (*BBIter != BB)
+ break;
+ MP->setIncomingValue(i, NewDef);
+ ++i;
+ }
+}
+
+// A brief description of the algorithm:
+// First, we compute what should define the new def, using the SSA
+// construction algorithm.
+// Then, we update the defs below us (and any new phi nodes) in the graph to
+// point to the correct new defs, to ensure we only have one variable, and no
+// disconnected stores.
+void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
+ InsertedPHIs.clear();
+
+ // See if we had a local def, and if not, go hunting.
+ MemoryAccess *DefBefore = getPreviousDefInBlock(MD);
+ bool DefBeforeSameBlock = DefBefore != nullptr;
+ if (!DefBefore)
+ DefBefore = getPreviousDefRecursive(MD->getBlock());
+
+ // There is a def before us, which means we can replace any store/phi uses
+ // of that thing with us, since we are in the way of whatever was there
+ // before.
+ // We now define that def's memorydefs and memoryphis
+ if (DefBeforeSameBlock) {
+ for (auto UI = DefBefore->use_begin(), UE = DefBefore->use_end();
+ UI != UE;) {
+ Use &U = *UI++;
+ // Leave the uses alone
+ if (isa<MemoryUse>(U.getUser()))
+ continue;
+ U.set(MD);
+ }
+ }
+
+ // and that def is now our defining access.
+ // We change them in this order otherwise we will appear in the use list
+ // above and reset ourselves.
+ MD->setDefiningAccess(DefBefore);
+
+ SmallVector<MemoryAccess *, 8> FixupList(InsertedPHIs.begin(),
+ InsertedPHIs.end());
+ if (!DefBeforeSameBlock) {
+ // If there was a local def before us, we must have the same effect it
+ // did. Because every may-def is the same, any phis/etc we would create, it
+ // would also have created. If there was no local def before us, we
+ // performed a global update, and have to search all successors and make
+ // sure we update the first def in each of them (following all paths until
+ // we hit the first def along each path). This may also insert phi nodes.
+ // TODO: There are other cases we can skip this work, such as when we have a
+ // single successor, and only used a straight line of single pred blocks
+ // backwards to find the def. To make that work, we'd have to track whether
+ // getDefRecursive only ever used the single predecessor case. These types
+ // of paths also only exist in between CFG simplifications.
+ FixupList.push_back(MD);
+ }
+
+ while (!FixupList.empty()) {
+ unsigned StartingPHISize = InsertedPHIs.size();
+ fixupDefs(FixupList);
+ FixupList.clear();
+ // Put any new phis on the fixup list, and process them
+ FixupList.append(InsertedPHIs.end() - StartingPHISize, InsertedPHIs.end());
+ }
+ // Now that all fixups are done, rename all uses if we are asked.
+ if (RenameUses) {
+ SmallPtrSet<BasicBlock *, 16> Visited;
+ BasicBlock *StartBlock = MD->getBlock();
+ // We are guaranteed there is a def in the block, because we just got it
+ // handed to us in this function.
+ MemoryAccess *FirstDef = &*MSSA->getWritableBlockDefs(StartBlock)->begin();
+ // Convert to incoming value if it's a memorydef. A phi *is* already an
+ // incoming value.
+ if (auto *MD = dyn_cast<MemoryDef>(FirstDef))
+ FirstDef = MD->getDefiningAccess();
+
+ MSSA->renamePass(MD->getBlock(), FirstDef, Visited);
+ // We just inserted a phi into this block, so the incoming value will become
+ // the phi anyway, so it does not matter what we pass.
+ for (auto *MP : InsertedPHIs)
+ MSSA->renamePass(MP->getBlock(), nullptr, Visited);
+ }
+}
+
+void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<MemoryAccess *> &Vars) {
+ SmallPtrSet<const BasicBlock *, 8> Seen;
+ SmallVector<const BasicBlock *, 16> Worklist;
+ for (auto *NewDef : Vars) {
+ // First, see if there is a local def after the operand.
+ auto *Defs = MSSA->getWritableBlockDefs(NewDef->getBlock());
+ auto DefIter = NewDef->getDefsIterator();
+
+ // If there is a local def after us, we only have to rename that.
+ if (++DefIter != Defs->end()) {
+ cast<MemoryDef>(DefIter)->setDefiningAccess(NewDef);
+ continue;
+ }
+
+ // Otherwise, we need to search down through the CFG.
+ // For each of our successors, handle it directly if their is a phi, or
+ // place on the fixup worklist.
+ for (const auto *S : successors(NewDef->getBlock())) {
+ if (auto *MP = MSSA->getMemoryAccess(S))
+ setMemoryPhiValueForBlock(MP, NewDef->getBlock(), NewDef);
+ else
+ Worklist.push_back(S);
+ }
+
+ while (!Worklist.empty()) {
+ const BasicBlock *FixupBlock = Worklist.back();
+ Worklist.pop_back();
+
+ // Get the first def in the block that isn't a phi node.
+ if (auto *Defs = MSSA->getWritableBlockDefs(FixupBlock)) {
+ auto *FirstDef = &*Defs->begin();
+ // The loop above and below should have taken care of phi nodes
+ assert(!isa<MemoryPhi>(FirstDef) &&
+ "Should have already handled phi nodes!");
+ // We are now this def's defining access, make sure we actually dominate
+ // it
+ assert(MSSA->dominates(NewDef, FirstDef) &&
+ "Should have dominated the new access");
+
+ // This may insert new phi nodes, because we are not guaranteed the
+ // block we are processing has a single pred, and depending where the
+ // store was inserted, it may require phi nodes below it.
+ cast<MemoryDef>(FirstDef)->setDefiningAccess(getPreviousDef(FirstDef));
+ return;
+ }
+ // We didn't find a def, so we must continue.
+ for (const auto *S : successors(FixupBlock)) {
+ // If there is a phi node, handle it.
+ // Otherwise, put the block on the worklist
+ if (auto *MP = MSSA->getMemoryAccess(S))
+ setMemoryPhiValueForBlock(MP, FixupBlock, NewDef);
+ else {
+ // If we cycle, we should have ended up at a phi node that we already
+ // processed. FIXME: Double check this
+ if (!Seen.insert(S).second)
+ continue;
+ Worklist.push_back(S);
+ }
+ }
+ }
+ }
+}
+
+// Move What before Where in the MemorySSA IR.
+template <class WhereType>
+void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB,
+ WhereType Where) {
+ // Replace all our users with our defining access.
+ What->replaceAllUsesWith(What->getDefiningAccess());
+
+ // Let MemorySSA take care of moving it around in the lists.
+ MSSA->moveTo(What, BB, Where);
+
+ // Now reinsert it into the IR and do whatever fixups needed.
+ if (auto *MD = dyn_cast<MemoryDef>(What))
+ insertDef(MD);
+ else
+ insertUse(cast<MemoryUse>(What));
+}
+
+// Move What before Where in the MemorySSA IR.
+void MemorySSAUpdater::moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where) {
+ moveTo(What, Where->getBlock(), Where->getIterator());
+}
+
+// Move What after Where in the MemorySSA IR.
+void MemorySSAUpdater::moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where) {
+ moveTo(What, Where->getBlock(), ++Where->getIterator());
+}
+
+void MemorySSAUpdater::moveToPlace(MemoryUseOrDef *What, BasicBlock *BB,
+ MemorySSA::InsertionPlace Where) {
+ return moveTo(What, BB, Where);
+}
+
+/// \brief If all arguments of a MemoryPHI are defined by the same incoming
+/// argument, return that argument.
+static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
+ MemoryAccess *MA = nullptr;
+
+ for (auto &Arg : MP->operands()) {
+ if (!MA)
+ MA = cast<MemoryAccess>(Arg);
+ else if (MA != Arg)
+ return nullptr;
+ }
+ return MA;
+}
+void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) {
+ assert(!MSSA->isLiveOnEntryDef(MA) &&
+ "Trying to remove the live on entry def");
+ // We can only delete phi nodes if they have no uses, or we can replace all
+ // uses with a single definition.
+ MemoryAccess *NewDefTarget = nullptr;
+ if (MemoryPhi *MP = dyn_cast<MemoryPhi>(MA)) {
+ // Note that it is sufficient to know that all edges of the phi node have
+ // the same argument. If they do, by the definition of dominance frontiers
+ // (which we used to place this phi), that argument must dominate this phi,
+ // and thus, must dominate the phi's uses, and so we will not hit the assert
+ // below.
+ NewDefTarget = onlySingleValue(MP);
+ assert((NewDefTarget || MP->use_empty()) &&
+ "We can't delete this memory phi");
+ } else {
+ NewDefTarget = cast<MemoryUseOrDef>(MA)->getDefiningAccess();
+ }
+
+ // Re-point the uses at our defining access
+ if (!isa<MemoryUse>(MA) && !MA->use_empty()) {
+ // Reset optimized on users of this store, and reset the uses.
+ // A few notes:
+ // 1. This is a slightly modified version of RAUW to avoid walking the
+ // uses twice here.
+ // 2. If we wanted to be complete, we would have to reset the optimized
+ // flags on users of phi nodes if doing the below makes a phi node have all
+ // the same arguments. Instead, we prefer users to removeMemoryAccess those
+ // phi nodes, because doing it here would be N^3.
+ if (MA->hasValueHandle())
+ ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget);
+ // Note: We assume MemorySSA is not used in metadata since it's not really
+ // part of the IR.
+
+ while (!MA->use_empty()) {
+ Use &U = *MA->use_begin();
+ if (auto *MUD = dyn_cast<MemoryUseOrDef>(U.getUser()))
+ MUD->resetOptimized();
+ U.set(NewDefTarget);
+ }
+ }
+
+ // The call below to erase will destroy MA, so we can't change the order we
+ // are doing things here
+ MSSA->removeFromLookups(MA);
+ MSSA->removeFromLists(MA);
+}
+
+MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
+ Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
+ MemorySSA::InsertionPlace Point) {
+ MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
+ MSSA->insertIntoListsForBlock(NewAccess, BB, Point);
+ return NewAccess;
+}
+
+MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessBefore(
+ Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt) {
+ assert(I->getParent() == InsertPt->getBlock() &&
+ "New and old access must be in the same block");
+ MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
+ MSSA->insertIntoListsBefore(NewAccess, InsertPt->getBlock(),
+ InsertPt->getIterator());
+ return NewAccess;
+}
+
+MemoryUseOrDef *MemorySSAUpdater::createMemoryAccessAfter(
+ Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt) {
+ assert(I->getParent() == InsertPt->getBlock() &&
+ "New and old access must be in the same block");
+ MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
+ MSSA->insertIntoListsBefore(NewAccess, InsertPt->getBlock(),
+ ++InsertPt->getIterator());
+ return NewAccess;
+}
+
+} // namespace llvm
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index f5ba637e58e2..f6d9a73e4e9a 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -28,7 +28,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -84,6 +84,92 @@ static bool isNonRenamableLocal(const GlobalValue &GV) {
return GV.hasSection() && GV.hasLocalLinkage();
}
+/// Determine whether this call has all constant integer arguments (excluding
+/// "this") and summarize it to VCalls or ConstVCalls as appropriate.
+static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
+ SetVector<FunctionSummary::VFuncId> &VCalls,
+ SetVector<FunctionSummary::ConstVCall> &ConstVCalls) {
+ std::vector<uint64_t> Args;
+ // Start from the second argument to skip the "this" pointer.
+ for (auto &Arg : make_range(Call.CS.arg_begin() + 1, Call.CS.arg_end())) {
+ auto *CI = dyn_cast<ConstantInt>(Arg);
+ if (!CI || CI->getBitWidth() > 64) {
+ VCalls.insert({Guid, Call.Offset});
+ return;
+ }
+ Args.push_back(CI->getZExtValue());
+ }
+ ConstVCalls.insert({{Guid, Call.Offset}, std::move(Args)});
+}
+
+/// If this intrinsic call requires that we add information to the function
+/// summary, do so via the non-constant reference arguments.
+static void addIntrinsicToSummary(
+ const CallInst *CI, SetVector<GlobalValue::GUID> &TypeTests,
+ SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
+ SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
+ SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
+ SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls) {
+ switch (CI->getCalledFunction()->getIntrinsicID()) {
+ case Intrinsic::type_test: {
+ auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+ auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+ if (!TypeId)
+ break;
+ GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
+
+ // Produce a summary from type.test intrinsics. We only summarize type.test
+ // intrinsics that are used other than by an llvm.assume intrinsic.
+ // Intrinsics that are assumed are relevant only to the devirtualization
+ // pass, not the type test lowering pass.
+ bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
+ auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
+ if (!AssumeCI)
+ return true;
+ Function *F = AssumeCI->getCalledFunction();
+ return !F || F->getIntrinsicID() != Intrinsic::assume;
+ });
+ if (HasNonAssumeUses)
+ TypeTests.insert(Guid);
+
+ SmallVector<DevirtCallSite, 4> DevirtCalls;
+ SmallVector<CallInst *, 4> Assumes;
+ findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI);
+ for (auto &Call : DevirtCalls)
+ addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
+ TypeTestAssumeConstVCalls);
+
+ break;
+ }
+
+ case Intrinsic::type_checked_load: {
+ auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(2));
+ auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
+ if (!TypeId)
+ break;
+ GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
+
+ SmallVector<DevirtCallSite, 4> DevirtCalls;
+ SmallVector<Instruction *, 4> LoadedPtrs;
+ SmallVector<Instruction *, 4> Preds;
+ bool HasNonCallUses = false;
+ findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
+ HasNonCallUses, CI);
+ // Any non-call uses of the result of llvm.type.checked.load will
+ // prevent us from optimizing away the llvm.type.test.
+ if (HasNonCallUses)
+ TypeTests.insert(Guid);
+ for (auto &Call : DevirtCalls)
+ addVCallToSet(Call, Guid, TypeCheckedLoadVCalls,
+ TypeCheckedLoadConstVCalls);
+
+ break;
+ }
+ default:
+ break;
+ }
+}
+
static void
computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
const Function &F, BlockFrequencyInfo *BFI,
@@ -99,6 +185,10 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
SetVector<ValueInfo> RefEdges;
SetVector<GlobalValue::GUID> TypeTests;
+ SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
+ TypeCheckedLoadVCalls;
+ SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
+ TypeCheckedLoadConstVCalls;
ICallPromotionAnalysis ICallAnalysis;
bool HasInlineAsmMaybeReferencingInternal = false;
@@ -133,29 +223,15 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// Check if this is a direct call to a known function or a known
// intrinsic, or an indirect call with profile data.
if (CalledFunction) {
- if (CalledFunction->isIntrinsic()) {
- if (CalledFunction->getIntrinsicID() != Intrinsic::type_test)
- continue;
- // Produce a summary from type.test intrinsics. We only summarize
- // type.test intrinsics that are used other than by an llvm.assume
- // intrinsic. Intrinsics that are assumed are relevant only to the
- // devirtualization pass, not the type test lowering pass.
- bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
- auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
- if (!AssumeCI)
- return true;
- Function *F = AssumeCI->getCalledFunction();
- return !F || F->getIntrinsicID() != Intrinsic::assume;
- });
- if (HasNonAssumeUses) {
- auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
- if (auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()))
- TypeTests.insert(GlobalValue::getGUID(TypeId->getString()));
- }
+ if (CI && CalledFunction->isIntrinsic()) {
+ addIntrinsicToSummary(
+ CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
+ TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls);
+ continue;
}
// We should have named any anonymous globals
assert(CalledFunction->hasName());
- auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
+ auto ScaledCount = ProfileSummaryInfo::getProfileCount(&I, BFI);
auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
: CalleeInfo::HotnessType::Unknown;
@@ -183,6 +259,11 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
}
}
+ // Explicit add hot edges to enforce importing for designated GUIDs for
+ // sample PGO, to enable the same inlines as the profiled optimized binary.
+ for (auto &I : F.getImportGUIDs())
+ CallGraphEdges[I].updateHotness(CalleeInfo::HotnessType::Hot);
+
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport =
NonRenamableLocal || HasInlineAsmMaybeReferencingInternal ||
@@ -193,7 +274,10 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
/* LiveRoot = */ false);
auto FuncSummary = llvm::make_unique<FunctionSummary>(
Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
- TypeTests.takeVector());
+ TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
+ TypeCheckedLoadVCalls.takeVector(),
+ TypeTestAssumeConstVCalls.takeVector(),
+ TypeCheckedLoadConstVCalls.takeVector());
if (NonRenamableLocal)
CantBePromoted.insert(F.getGUID());
Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
@@ -326,9 +410,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
// be listed on the llvm.used or llvm.compiler.used global and marked as
// referenced from there.
ModuleSymbolTable::CollectAsmSymbols(
- Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
- [&M, &Index, &CantBePromoted](StringRef Name,
- object::BasicSymbolRef::Flags Flags) {
+ M, [&M, &Index, &CantBePromoted](StringRef Name,
+ object::BasicSymbolRef::Flags Flags) {
// Symbols not marked as Weak or Global are local definitions.
if (Flags & (object::BasicSymbolRef::SF_Weak |
object::BasicSymbolRef::SF_Global))
@@ -347,7 +430,11 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
llvm::make_unique<FunctionSummary>(
GVFlags, 0, ArrayRef<ValueInfo>{},
ArrayRef<FunctionSummary::EdgeTy>{},
- ArrayRef<GlobalValue::GUID>{});
+ ArrayRef<GlobalValue::GUID>{},
+ ArrayRef<FunctionSummary::VFuncId>{},
+ ArrayRef<FunctionSummary::VFuncId>{},
+ ArrayRef<FunctionSummary::ConstVCall>{},
+ ArrayRef<FunctionSummary::ConstVCall>{});
Index.addGlobalValueSummary(Name, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
@@ -364,6 +451,12 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
auto &Summary = GlobalList.second[0];
bool AllRefsCanBeExternallyReferenced =
llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
+ // If a global value definition references an unnamed global,
+ // be conservative. They're valid IR so we don't want to crash
+ // when we encounter any of them but they're infrequent enough
+ // that we don't bother optimizing them.
+ if (!VI.getValue()->hasName())
+ return false;
return !CantBePromoted.count(VI.getValue()->getGUID());
});
if (!AllRefsCanBeExternallyReferenced) {
diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationDiagnosticInfo.cpp
index fa8b07d61b01..73245981b022 100644
--- a/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -23,14 +23,14 @@
using namespace llvm;
-OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(const Function *F)
: F(F), BFI(nullptr) {
if (!F->getContext().getDiagnosticHotnessRequested())
return;
// First create a dominator tree.
DominatorTree DT;
- DT.recalculate(*F);
+ DT.recalculate(*const_cast<Function *>(F));
// Generate LoopInfo from it.
LoopInfo LI;
@@ -45,6 +45,18 @@ OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
BFI = OwnedBFI.get();
}
+bool OptimizationRemarkEmitter::invalidate(
+ Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // This analysis has no state and so can be trivially preserved but it needs
+ // a fresh view of BFI if it was constructed with one.
+ if (BFI && Inv.invalidate<BlockFrequencyAnalysis>(F, PA))
+ return true;
+
+ // Otherwise this analysis result remains valid.
+ return false;
+}
+
Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
if (!BFI)
return None;
@@ -55,53 +67,59 @@ Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
namespace llvm {
namespace yaml {
-template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
- static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
- assert(io.outputting() && "input not yet implemented");
+void MappingTraits<DiagnosticInfoOptimizationBase *>::mapping(
+ IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
+ assert(io.outputting() && "input not yet implemented");
+
+ if (io.mapTag("!Passed",
+ (OptDiag->getKind() == DK_OptimizationRemark ||
+ OptDiag->getKind() == DK_MachineOptimizationRemark)))
+ ;
+ else if (io.mapTag(
+ "!Missed",
+ (OptDiag->getKind() == DK_OptimizationRemarkMissed ||
+ OptDiag->getKind() == DK_MachineOptimizationRemarkMissed)))
+ ;
+ else if (io.mapTag(
+ "!Analysis",
+ (OptDiag->getKind() == DK_OptimizationRemarkAnalysis ||
+ OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis)))
+ ;
+ else if (io.mapTag("!AnalysisFPCommute",
+ OptDiag->getKind() ==
+ DK_OptimizationRemarkAnalysisFPCommute))
+ ;
+ else if (io.mapTag("!AnalysisAliasing",
+ OptDiag->getKind() ==
+ DK_OptimizationRemarkAnalysisAliasing))
+ ;
+ else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure))
+ ;
+ else
+ llvm_unreachable("Unknown remark type");
- if (io.mapTag("!Passed", OptDiag->getKind() == DK_OptimizationRemark))
- ;
- else if (io.mapTag("!Missed",
- OptDiag->getKind() == DK_OptimizationRemarkMissed))
- ;
- else if (io.mapTag("!Analysis",
- OptDiag->getKind() == DK_OptimizationRemarkAnalysis))
- ;
- else if (io.mapTag("!AnalysisFPCommute",
- OptDiag->getKind() ==
- DK_OptimizationRemarkAnalysisFPCommute))
- ;
- else if (io.mapTag("!AnalysisAliasing",
- OptDiag->getKind() ==
- DK_OptimizationRemarkAnalysisAliasing))
- ;
- else
- llvm_unreachable("todo");
-
- // These are read-only for now.
- DebugLoc DL = OptDiag->getDebugLoc();
- StringRef FN = GlobalValue::getRealLinkageName(
- OptDiag->getFunction().getName());
-
- StringRef PassName(OptDiag->PassName);
- io.mapRequired("Pass", PassName);
- io.mapRequired("Name", OptDiag->RemarkName);
- if (!io.outputting() || DL)
- io.mapOptional("DebugLoc", DL);
- io.mapRequired("Function", FN);
- io.mapOptional("Hotness", OptDiag->Hotness);
- io.mapOptional("Args", OptDiag->Args);
- }
-};
+ // These are read-only for now.
+ DiagnosticLocation DL = OptDiag->getLocation();
+ StringRef FN =
+ GlobalValue::getRealLinkageName(OptDiag->getFunction().getName());
+
+ StringRef PassName(OptDiag->PassName);
+ io.mapRequired("Pass", PassName);
+ io.mapRequired("Name", OptDiag->RemarkName);
+ if (!io.outputting() || DL.isValid())
+ io.mapOptional("DebugLoc", DL);
+ io.mapRequired("Function", FN);
+ io.mapOptional("Hotness", OptDiag->Hotness);
+ io.mapOptional("Args", OptDiag->Args);
+}
-template <> struct MappingTraits<DebugLoc> {
- static void mapping(IO &io, DebugLoc &DL) {
+template <> struct MappingTraits<DiagnosticLocation> {
+ static void mapping(IO &io, DiagnosticLocation &DL) {
assert(io.outputting() && "input not yet implemented");
- auto *Scope = cast<DIScope>(DL.getScope());
- StringRef File = Scope->getFilename();
+ StringRef File = DL.getFilename();
unsigned Line = DL.getLine();
- unsigned Col = DL.getCol();
+ unsigned Col = DL.getColumn();
io.mapRequired("File", File);
io.mapRequired("Line", Line);
@@ -116,8 +134,8 @@ template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
assert(io.outputting() && "input not yet implemented");
io.mapRequired(A.Key.data(), A.Val);
- if (A.DLoc)
- io.mapOptional("DebugLoc", A.DLoc);
+ if (A.Loc.isValid())
+ io.mapOptional("DebugLoc", A.Loc);
}
};
@@ -127,18 +145,20 @@ template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
void OptimizationRemarkEmitter::computeHotness(
- DiagnosticInfoOptimizationBase &OptDiag) {
- Value *V = OptDiag.getCodeRegion();
+ DiagnosticInfoIROptimization &OptDiag) {
+ const Value *V = OptDiag.getCodeRegion();
if (V)
OptDiag.setHotness(computeHotness(V));
}
-void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) {
+void OptimizationRemarkEmitter::emit(
+ DiagnosticInfoOptimizationBase &OptDiagBase) {
+ auto &OptDiag = cast<DiagnosticInfoIROptimization>(OptDiagBase);
computeHotness(OptDiag);
yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
if (Out) {
- auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiag);
+ auto *P = const_cast<DiagnosticInfoOptimizationBase *>(&OptDiagBase);
*Out << P;
}
// FIXME: now that IsVerbose is part of DI, filtering for this will be moved
@@ -147,72 +167,6 @@ void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) {
F->getContext().diagnose(OptDiag);
}
-void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
- const DebugLoc &DLoc,
- const Value *V,
- const Twine &Msg) {
- LLVMContext &Ctx = F->getContext();
- Ctx.diagnose(OptimizationRemark(PassName, *F, DLoc, Msg, computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
- Loop *L,
- const Twine &Msg) {
- emitOptimizationRemark(PassName, L->getStartLoc(), L->getHeader(), Msg);
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
- const char *PassName, const DebugLoc &DLoc, const Value *V,
- const Twine &Msg, bool IsVerbose) {
- LLVMContext &Ctx = F->getContext();
- if (!IsVerbose || shouldEmitVerbose())
- Ctx.diagnose(
- OptimizationRemarkMissed(PassName, *F, DLoc, Msg, computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
- const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
- emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg,
- IsVerbose);
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
- const char *PassName, const DebugLoc &DLoc, const Value *V,
- const Twine &Msg, bool IsVerbose) {
- LLVMContext &Ctx = F->getContext();
- if (!IsVerbose || shouldEmitVerbose())
- Ctx.diagnose(
- OptimizationRemarkAnalysis(PassName, *F, DLoc, Msg, computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
- const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
- emitOptimizationRemarkAnalysis(PassName, L->getStartLoc(), L->getHeader(),
- Msg, IsVerbose);
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute(
- const char *PassName, const DebugLoc &DLoc, const Value *V,
- const Twine &Msg) {
- LLVMContext &Ctx = F->getContext();
- Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, *F, DLoc, Msg,
- computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
- const char *PassName, const DebugLoc &DLoc, const Value *V,
- const Twine &Msg) {
- LLVMContext &Ctx = F->getContext();
- Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, *F, DLoc, Msg,
- computeHotness(V)));
-}
-
-void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
- const char *PassName, Loop *L, const Twine &Msg) {
- emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(),
- L->getHeader(), Msg);
-}
-
OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
: FunctionPass(ID) {
initializeOptimizationRemarkEmitterWrapperPassPass(
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index cb9438a2f928..1caf151546d9 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -31,6 +31,15 @@ char PostDominatorTreeWrapperPass::ID = 0;
INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree",
"Post-Dominator Tree Construction", true, true)
+bool PostDominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on functions, or the function's
+ // CFG have been preserved.
+ auto PAC = PA.getChecker<PostDominatorTreeAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+ PAC.preservedSet<CFGAnalyses>());
+}
+
bool PostDominatorTreeWrapperPass::runOnFunction(Function &F) {
DT.recalculate(F);
return false;
diff --git a/lib/Analysis/ProfileSummaryInfo.cpp b/lib/Analysis/ProfileSummaryInfo.cpp
index 16d3614c14c6..1a53a8ed4283 100644
--- a/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/lib/Analysis/ProfileSummaryInfo.cpp
@@ -12,9 +12,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ProfileSummary.h"
@@ -55,22 +56,40 @@ static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
// The profile summary metadata may be attached either by the frontend or by
// any backend passes (IR level instrumentation, for example). This method
// checks if the Summary is null and if so checks if the summary metadata is now
-// available in the module and parses it to get the Summary object.
-void ProfileSummaryInfo::computeSummary() {
+// available in the module and parses it to get the Summary object. Returns true
+// if a valid Summary is available.
+bool ProfileSummaryInfo::computeSummary() {
if (Summary)
- return;
+ return true;
auto *SummaryMD = M.getProfileSummary();
if (!SummaryMD)
- return;
+ return false;
Summary.reset(ProfileSummary::getFromMD(SummaryMD));
+ return true;
+}
+
+Optional<uint64_t>
+ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
+ BlockFrequencyInfo *BFI) {
+ if (!Inst)
+ return None;
+ assert((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+ "We can only get profile count for call/invoke instruction.");
+ // Check if there is a profile metadata on the instruction. If it is present,
+ // determine hotness solely based on that.
+ uint64_t TotalCount;
+ if (Inst->extractProfTotalWeight(TotalCount))
+ return TotalCount;
+ if (BFI)
+ return BFI->getBlockProfileCount(Inst->getParent());
+ return None;
}
/// Returns true if the function's entry is hot. If it returns false, it
/// either means it is not hot or it is unknown whether it is hot or not (for
/// example, no profile data is available).
bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
- computeSummary();
- if (!F || !Summary)
+ if (!F || !computeSummary())
return false;
auto FunctionCount = F->getEntryCount();
// FIXME: The heuristic used below for determining hotness is based on
@@ -79,17 +98,53 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
return FunctionCount && isHotCount(FunctionCount.getValue());
}
+/// Returns true if the function's entry or total call edge count is hot.
+/// If it returns false, it either means it is not hot or it is unknown
+/// whether it is hot or not (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionHotInCallGraph(const Function *F) {
+ if (!F || !computeSummary())
+ return false;
+ if (auto FunctionCount = F->getEntryCount())
+ if (isHotCount(FunctionCount.getValue()))
+ return true;
+
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ return isHotCount(TotalCallCount);
+}
+
+/// Returns true if the function's entry and total call edge count is cold.
+/// If it returns false, it either means it is not cold or it is unknown
+/// whether it is cold or not (for example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F) {
+ if (!F || !computeSummary())
+ return false;
+ if (auto FunctionCount = F->getEntryCount())
+ if (!isColdCount(FunctionCount.getValue()))
+ return false;
+
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(&I, nullptr))
+ TotalCallCount += CallCount.getValue();
+ return isColdCount(TotalCallCount);
+}
+
/// Returns true if the function's entry is a cold. If it returns false, it
/// either means it is not cold or it is unknown whether it is cold or not (for
/// example, no profile data is available).
bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
- computeSummary();
if (!F)
return false;
- if (F->hasFnAttribute(Attribute::Cold)) {
+ if (F->hasFnAttribute(Attribute::Cold))
return true;
- }
- if (!Summary)
+ if (!computeSummary())
return false;
auto FunctionCount = F->getEntryCount();
// FIXME: The heuristic used below for determining coldness is based on
@@ -100,9 +155,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
/// Compute the hot and cold thresholds.
void ProfileSummaryInfo::computeThresholds() {
- if (!Summary)
- computeSummary();
- if (!Summary)
+ if (!computeSummary())
return;
auto &DetailedSummary = Summary->getDetailedSummary();
HotCountThreshold =
@@ -125,20 +178,25 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
auto Count = BFI->getBlockProfileCount(B);
- if (Count && isHotCount(*Count))
- return true;
- // Use extractProfTotalWeight to get BB count.
- // For Sample PGO, BFI may not provide accurate BB count due to errors
- // magnified during sample count propagation. This serves as a backup plan
- // to ensure all hot BB will not be missed.
- // The query currently has false positives as branch instruction cloning does
- // not update/scale branch weights. Unlike false negatives, this will not cause
- // performance problem.
- uint64_t TotalCount;
- if (B->getTerminator()->extractProfTotalWeight(TotalCount) &&
- isHotCount(TotalCount))
- return true;
- return false;
+ return Count && isHotCount(*Count);
+}
+
+bool ProfileSummaryInfo::isColdBB(const BasicBlock *B,
+ BlockFrequencyInfo *BFI) {
+ auto Count = BFI->getBlockProfileCount(B);
+ return Count && isColdCount(*Count);
+}
+
+bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
+ BlockFrequencyInfo *BFI) {
+ auto C = getProfileCount(CS.getInstruction(), BFI);
+ return C && isHotCount(*C);
+}
+
+bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
+ BlockFrequencyInfo *BFI) {
+ auto C = getProfileCount(CS.getInstruction(), BFI);
+ return C && isColdCount(*C);
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 8c084ddd2266..63ef8d28d44a 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -83,6 +83,15 @@ RegionInfo::~RegionInfo() {
}
+bool RegionInfo::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on functions, or the function's
+ // CFG have been preserved.
+ auto PAC = PA.getChecker<RegionInfoAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+ PAC.preservedSet<CFGAnalyses>());
+}
+
void RegionInfo::updateStatistics(Region *R) {
++numRegions;
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 7358aa6810a1..82107cb18025 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -206,6 +206,8 @@ public:
return false;
}
+
+ StringRef getPassName() const override { return "Print Region IR"; }
};
char PrintRegionPass::ID = 0;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index ed328f12c463..ca32cf3c7c34 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -127,16 +127,35 @@ static cl::opt<unsigned> MulOpsInlineThreshold(
cl::desc("Threshold for inlining multiplication operands into a SCEV"),
cl::init(1000));
+static cl::opt<unsigned> AddOpsInlineThreshold(
+ "scev-addops-inline-threshold", cl::Hidden,
+ cl::desc("Threshold for inlining multiplication operands into a SCEV"),
+ cl::init(500));
+
static cl::opt<unsigned> MaxSCEVCompareDepth(
"scalar-evolution-max-scev-compare-depth", cl::Hidden,
cl::desc("Maximum depth of recursive SCEV complexity comparisons"),
cl::init(32));
+static cl::opt<unsigned> MaxSCEVOperationsImplicationDepth(
+ "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive SCEV operations implication analysis"),
+ cl::init(2));
+
static cl::opt<unsigned> MaxValueCompareDepth(
"scalar-evolution-max-value-compare-depth", cl::Hidden,
cl::desc("Maximum depth of recursive value complexity comparisons"),
cl::init(2));
+static cl::opt<unsigned>
+ MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive AddExpr"),
+ cl::init(32));
+
+static cl::opt<unsigned> MaxConstantEvolvingDepth(
+ "scalar-evolution-max-constant-evolving-depth", cl::Hidden,
+ cl::desc("Maximum depth of recursive constant evolving"), cl::init(32));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -145,11 +164,12 @@ static cl::opt<unsigned> MaxValueCompareDepth(
// Implementation of the SCEV class.
//
-LLVM_DUMP_METHOD
-void SCEV::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SCEV::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void SCEV::print(raw_ostream &OS) const {
switch (static_cast<SCEVTypes>(getSCEVType())) {
@@ -2095,7 +2115,8 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
/// Get a canonical add expression, or something simpler if possible.
const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
- SCEV::NoWrapFlags Flags) {
+ SCEV::NoWrapFlags Flags,
+ unsigned Depth) {
assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
"only nuw or nsw allowed");
assert(!Ops.empty() && "Cannot get empty add!");
@@ -2134,6 +2155,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 1) return Ops[0];
}
+ // Limit recursion calls depth
+ if (Depth > MaxAddExprDepth)
+ return getOrCreateAddExpr(Ops, Flags);
+
// Okay, check to see if the same value occurs in the operand list more than
// once. If so, merge them together into an multiply expression. Since we
// sorted the list, these values are required to be adjacent.
@@ -2205,7 +2230,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
if (Ok) {
// Evaluate the expression in the larger type.
- const SCEV *Fold = getAddExpr(LargeOps, Flags);
+ const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1);
// If it folds to something simple, use it. Otherwise, don't.
if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
return getTruncateExpr(Fold, DstType);
@@ -2220,6 +2245,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Idx < Ops.size()) {
bool DeletedAdd = false;
while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+ if (Ops.size() > AddOpsInlineThreshold ||
+ Add->getNumOperands() > AddOpsInlineThreshold)
+ break;
// If we have an add, expand the add operands onto the end of the operands
// list.
Ops.erase(Ops.begin()+Idx);
@@ -2231,7 +2259,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// and they are not necessarily sorted. Recurse to resort and resimplify
// any operands we just acquired.
if (DeletedAdd)
- return getAddExpr(Ops);
+ return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// Skip over the add expression until we get to a multiply.
@@ -2266,13 +2294,14 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
Ops.push_back(getConstant(AccumulatedConstant));
for (auto &MulOp : MulOpLists)
if (MulOp.first != 0)
- Ops.push_back(getMulExpr(getConstant(MulOp.first),
- getAddExpr(MulOp.second)));
+ Ops.push_back(getMulExpr(
+ getConstant(MulOp.first),
+ getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1)));
if (Ops.empty())
return getZero(Ty);
if (Ops.size() == 1)
return Ops[0];
- return getAddExpr(Ops);
+ return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
}
@@ -2297,8 +2326,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul = getMulExpr(MulOps);
}
- const SCEV *One = getOne(Ty);
- const SCEV *AddOne = getAddExpr(One, InnerMul);
+ SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul};
+ const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
if (Ops.size() == 2) return OuterMul;
if (AddOp < Idx) {
@@ -2309,7 +2338,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
Ops.erase(Ops.begin()+AddOp-1);
}
Ops.push_back(OuterMul);
- return getAddExpr(Ops);
+ return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// Check this multiply against other multiplies being added together.
@@ -2337,13 +2366,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
InnerMul2 = getMulExpr(MulOps);
}
- const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+ SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2};
+ const SCEV *InnerMulSum =
+ getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
if (Ops.size() == 2) return OuterMul;
Ops.erase(Ops.begin()+Idx);
Ops.erase(Ops.begin()+OtherMulIdx-1);
Ops.push_back(OuterMul);
- return getAddExpr(Ops);
+ return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
}
}
@@ -2379,7 +2410,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// This follows from the fact that the no-wrap flags on the outer add
// expression are applicable on the 0th iteration, when the add recurrence
// will be equal to its start value.
- AddRecOps[0] = getAddExpr(LIOps, Flags);
+ AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer add and the inner addrec are guaranteed to have no overflow.
@@ -2396,7 +2427,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
Ops[i] = NewRec;
break;
}
- return getAddExpr(Ops);
+ return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// Okay, if there weren't any loop invariants to be folded, check to see if
@@ -2420,14 +2451,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
OtherAddRec->op_end());
break;
}
- AddRecOps[i] = getAddExpr(AddRecOps[i],
- OtherAddRec->getOperand(i));
+ SmallVector<const SCEV *, 2> TwoOps = {
+ AddRecOps[i], OtherAddRec->getOperand(i)};
+ AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
}
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
}
// Step size has changed, so we cannot guarantee no self-wraparound.
Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
- return getAddExpr(Ops);
+ return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
@@ -2436,18 +2468,24 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Okay, it looks like we really DO need an add expr. Check to see if we
// already have one, otherwise create a new one.
+ return getOrCreateAddExpr(Ops, Flags);
+}
+
+const SCEV *
+ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+ SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
void *IP = nullptr;
SCEVAddExpr *S =
- static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+ static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
- S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
- O, Ops.size());
+ S = new (SCEVAllocator)
+ SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
}
S->setNoWrapFlags(Flags);
@@ -2889,7 +2927,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
// end of this file for inspiration.
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
- if (!Mul)
+ if (!Mul || !Mul->hasNoUnsignedWrap())
return getUDivExpr(LHS, RHS);
if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
@@ -3385,6 +3423,10 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
return getDataLayout().getIntPtrType(Ty);
}
+Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
+ return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
+}
+
const SCEV *ScalarEvolution::getCouldNotCompute() {
return CouldNotCompute.get();
}
@@ -4409,8 +4451,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
return getGEPExpr(GEP, IndexExprs);
}
-uint32_t
-ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return C->getAPInt().countTrailingZeros();
@@ -4420,14 +4461,16 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
- return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
- getTypeSizeInBits(E->getType()) : OpRes;
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType())
+ ? getTypeSizeInBits(E->getType())
+ : OpRes;
}
if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
- return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
- getTypeSizeInBits(E->getType()) : OpRes;
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType())
+ ? getTypeSizeInBits(E->getType())
+ : OpRes;
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
@@ -4444,8 +4487,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
uint32_t BitWidth = getTypeSizeInBits(M->getType());
for (unsigned i = 1, e = M->getNumOperands();
SumOpRes != BitWidth && i != e; ++i)
- SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
- BitWidth);
+ SumOpRes =
+ std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth);
return SumOpRes;
}
@@ -4486,6 +4529,17 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
return 0;
}
+uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+ auto I = MinTrailingZerosCache.find(S);
+ if (I != MinTrailingZerosCache.end())
+ return I->second;
+
+ uint32_t Result = GetMinTrailingZerosImpl(S);
+ auto InsertPair = MinTrailingZerosCache.insert({S, Result});
+ assert(InsertPair.second && "Should insert a new key");
+ return InsertPair.first->second;
+}
+
/// Helper method to assign a range to V from metadata present in the IR.
static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -4668,6 +4722,77 @@ ScalarEvolution::getRange(const SCEV *S,
return setRange(S, SignHint, ConservativeResult);
}
+// Given a StartRange, Step and MaxBECount for an expression compute a range of
+// values that the expression can take. Initially, the expression has a value
+// from StartRange and then is changed by Step up to MaxBECount times. Signed
+// argument defines if we treat Step as signed or unsigned.
+static ConstantRange getRangeForAffineARHelper(APInt Step,
+ ConstantRange StartRange,
+ APInt MaxBECount,
+ unsigned BitWidth, bool Signed) {
+ // If either Step or MaxBECount is 0, then the expression won't change, and we
+ // just need to return the initial range.
+ if (Step == 0 || MaxBECount == 0)
+ return StartRange;
+
+ // If we don't know anything about the initial value (i.e. StartRange is
+ // FullRange), then we don't know anything about the final range either.
+ // Return FullRange.
+ if (StartRange.isFullSet())
+ return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+ // If Step is signed and negative, then we use its absolute value, but we also
+ // note that we're moving in the opposite direction.
+ bool Descending = Signed && Step.isNegative();
+
+ if (Signed)
+ // This is correct even for INT_SMIN. Let's look at i8 to illustrate this:
+ // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128.
+ // This equations hold true due to the well-defined wrap-around behavior of
+ // APInt.
+ Step = Step.abs();
+
+ // Check if Offset is more than full span of BitWidth. If it is, the
+ // expression is guaranteed to overflow.
+ if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount))
+ return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+ // Offset is by how much the expression can change. Checks above guarantee no
+ // overflow here.
+ APInt Offset = Step * MaxBECount;
+
+ // Minimum value of the final range will match the minimal value of StartRange
+ // if the expression is increasing and will be decreased by Offset otherwise.
+ // Maximum value of the final range will match the maximal value of StartRange
+ // if the expression is decreasing and will be increased by Offset otherwise.
+ APInt StartLower = StartRange.getLower();
+ APInt StartUpper = StartRange.getUpper() - 1;
+ APInt MovedBoundary =
+ Descending ? (StartLower - Offset) : (StartUpper + Offset);
+
+ // It's possible that the new minimum/maximum value will fall into the initial
+ // range (due to wrap around). This means that the expression can take any
+ // value in this bitwidth, and we have to return full range.
+ if (StartRange.contains(MovedBoundary))
+ return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+ APInt NewLower, NewUpper;
+ if (Descending) {
+ NewLower = MovedBoundary;
+ NewUpper = StartUpper;
+ } else {
+ NewLower = StartLower;
+ NewUpper = MovedBoundary;
+ }
+
+ // If we end up with full range, return a proper full range.
+ if (NewLower == NewUpper + 1)
+ return ConstantRange(BitWidth, /* isFullSet = */ true);
+
+ // No overflow detected, return [StartLower, StartUpper + Offset + 1) range.
+ return ConstantRange(NewLower, NewUpper + 1);
+}
+
ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
const SCEV *Step,
const SCEV *MaxBECount,
@@ -4676,60 +4801,30 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
"Precondition!");
- ConstantRange Result(BitWidth, /* isFullSet = */ true);
-
- // Check for overflow. This must be done with ConstantRange arithmetic
- // because we could be called from within the ScalarEvolution overflow
- // checking code.
-
MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
- ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2);
+ APInt MaxBECountValue = MaxBECountRange.getUnsignedMax();
+ // First, consider step signed.
+ ConstantRange StartSRange = getSignedRange(Start);
ConstantRange StepSRange = getSignedRange(Step);
- ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2);
-
- ConstantRange StartURange = getUnsignedRange(Start);
- ConstantRange EndURange =
- StartURange.add(MaxBECountRange.multiply(StepSRange));
-
- // Check for unsigned overflow.
- ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2);
- ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2);
- if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
- ZExtEndURange) {
- APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
- EndURange.getUnsignedMin());
- APInt Max = APIntOps::umax(StartURange.getUnsignedMax(),
- EndURange.getUnsignedMax());
- bool IsFullRange = Min.isMinValue() && Max.isMaxValue();
- if (!IsFullRange)
- Result =
- Result.intersectWith(ConstantRange(Min, Max + 1));
- }
- ConstantRange StartSRange = getSignedRange(Start);
- ConstantRange EndSRange =
- StartSRange.add(MaxBECountRange.multiply(StepSRange));
-
- // Check for signed overflow. This must be done with ConstantRange
- // arithmetic because we could be called from within the ScalarEvolution
- // overflow checking code.
- ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2);
- ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2);
- if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
- SExtEndSRange) {
- APInt Min =
- APIntOps::smin(StartSRange.getSignedMin(), EndSRange.getSignedMin());
- APInt Max =
- APIntOps::smax(StartSRange.getSignedMax(), EndSRange.getSignedMax());
- bool IsFullRange = Min.isMinSignedValue() && Max.isMaxSignedValue();
- if (!IsFullRange)
- Result =
- Result.intersectWith(ConstantRange(Min, Max + 1));
- }
+ // If Step can be both positive and negative, we need to find ranges for the
+ // maximum absolute step values in both directions and union them.
+ ConstantRange SR =
+ getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange,
+ MaxBECountValue, BitWidth, /* Signed = */ true);
+ SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
+ StartSRange, MaxBECountValue,
+ BitWidth, /* Signed = */ true));
- return Result;
+ // Next, consider step unsigned.
+ ConstantRange UR = getRangeForAffineARHelper(
+ getUnsignedRange(Step).getUnsignedMax(), getUnsignedRange(Start),
+ MaxBECountValue, BitWidth, /* Signed = */ false);
+
+ // Finally, intersect signed and unsigned ranges.
+ return SR.intersectWith(UR);
}
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
@@ -5148,12 +5243,27 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
- const SCEV *MulCount = getConstant(ConstantInt::get(
- getContext(), APInt::getOneBitSet(BitWidth, TZ)));
+ const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ));
+ const SCEV *LHS = getSCEV(BO->LHS);
+ const SCEV *ShiftedLHS = nullptr;
+ if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
+ if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
+ // For an expression like (x * 8) & 8, simplify the multiply.
+ unsigned MulZeros = OpC->getAPInt().countTrailingZeros();
+ unsigned GCD = std::min(MulZeros, TZ);
+ APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
+ SmallVector<const SCEV*, 4> MulOps;
+ MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD)));
+ MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end());
+ auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags());
+ ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt));
+ }
+ }
+ if (!ShiftedLHS)
+ ShiftedLHS = getUDivExpr(LHS, MulCount);
return getMulExpr(
getZeroExtendExpr(
- getTruncateExpr(
- getUDivExactExpr(getSCEV(BO->LHS), MulCount),
+ getTruncateExpr(ShiftedLHS,
IntegerType::get(getContext(), BitWidth - LZ - TZ)),
BO->LHS->getType()),
MulCount);
@@ -5211,7 +5321,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// If C is a low-bits mask, the zero extend is serving to
// mask off the high bits. Complement the operand and
// re-apply the zext.
- if (APIntOps::isMask(Z0TySize, CI->getValue()))
+ if (CI->getValue().isMask(Z0TySize))
return getZeroExtendExpr(getNotSCEV(Z0), UTy);
// If C is a single bit, it may be in the sign-bit position
@@ -5255,28 +5365,55 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
break;
case Instruction::AShr:
- // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS))
- if (Operator *L = dyn_cast<Operator>(BO->LHS))
- if (L->getOpcode() == Instruction::Shl &&
- L->getOperand(1) == BO->RHS) {
- uint64_t BitWidth = getTypeSizeInBits(BO->LHS->getType());
-
- // If the shift count is not less than the bitwidth, the result of
- // the shift is undefined. Don't try to analyze it, because the
- // resolution chosen here may differ from the resolution chosen in
- // other parts of the compiler.
- if (CI->getValue().uge(BitWidth))
- break;
+ // AShr X, C, where C is a constant.
+ ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
+ if (!CI)
+ break;
+
+ Type *OuterTy = BO->LHS->getType();
+ uint64_t BitWidth = getTypeSizeInBits(OuterTy);
+ // If the shift count is not less than the bitwidth, the result of
+ // the shift is undefined. Don't try to analyze it, because the
+ // resolution chosen here may differ from the resolution chosen in
+ // other parts of the compiler.
+ if (CI->getValue().uge(BitWidth))
+ break;
- uint64_t Amt = BitWidth - CI->getZExtValue();
- if (Amt == BitWidth)
- return getSCEV(L->getOperand(0)); // shift by zero --> noop
+ if (CI->isNullValue())
+ return getSCEV(BO->LHS); // shift by zero --> noop
+
+ uint64_t AShrAmt = CI->getZExtValue();
+ Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt);
+
+ Operator *L = dyn_cast<Operator>(BO->LHS);
+ if (L && L->getOpcode() == Instruction::Shl) {
+ // X = Shl A, n
+ // Y = AShr X, m
+ // Both n and m are constant.
+
+ const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
+ if (L->getOperand(1) == BO->RHS)
+ // For a two-shift sext-inreg, i.e. n = m,
+ // use sext(trunc(x)) as the SCEV expression.
+ return getSignExtendExpr(
+ getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy);
+
+ ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
+ if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
+ uint64_t ShlAmt = ShlAmtCI->getZExtValue();
+ if (ShlAmt > AShrAmt) {
+ // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
+ // expression. We already checked that ShlAmt < BitWidth, so
+ // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as
+ // ShlAmt - AShrAmt < Amt.
+ APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt,
+ ShlAmt - AShrAmt);
return getSignExtendExpr(
- getTruncateExpr(getSCEV(L->getOperand(0)),
- IntegerType::get(getContext(), Amt)),
- BO->LHS->getType());
+ getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
+ getConstant(Mul)), OuterTy);
}
+ }
+ }
break;
}
}
@@ -5348,7 +5485,7 @@ static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
return ((unsigned)ExitConst->getZExtValue()) + 1;
}
-unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
+unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) {
if (BasicBlock *ExitingBB = L->getExitingBlock())
return getSmallConstantTripCount(L, ExitingBB);
@@ -5356,7 +5493,7 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
return 0;
}
-unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
+unsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L,
BasicBlock *ExitingBlock) {
assert(ExitingBlock && "Must pass a non-null exiting block!");
assert(L->isLoopExiting(ExitingBlock) &&
@@ -5366,13 +5503,13 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
return getConstantTripCount(ExitCount);
}
-unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) {
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
const auto *MaxExitCount =
dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
return getConstantTripCount(MaxExitCount);
}
-unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
+unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
if (BasicBlock *ExitingBB = L->getExitingBlock())
return getSmallConstantTripMultiple(L, ExitingBB);
@@ -5393,7 +5530,7 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
/// As explained in the comments for getSmallConstantTripCount, this assumes
/// that control exits the loop via ExitingBlock.
unsigned
-ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
+ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
BasicBlock *ExitingBlock) {
assert(ExitingBlock && "Must pass a non-null exiting block!");
assert(L->isLoopExiting(ExitingBlock) &&
@@ -5403,17 +5540,16 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
return 1;
// Get the trip count from the BE count by adding 1.
- const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
- // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
- // to factor simple cases.
- if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
- TCMul = Mul->getOperand(0);
-
- const SCEVConstant *MulC = dyn_cast<SCEVConstant>(TCMul);
- if (!MulC)
- return 1;
+ const SCEV *TCExpr = getAddExpr(ExitCount, getOne(ExitCount->getType()));
- ConstantInt *Result = MulC->getValue();
+ const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
+ if (!TC)
+ // Attempt to factor more general cases. Returns the greatest power of
+ // two divisor. If overflow happens, the trip count expression is still
+ // divisible by the greatest power of 2 divisor returned.
+ return 1U << std::min((uint32_t)31, GetMinTrailingZeros(TCExpr));
+
+ ConstantInt *Result = TC->getValue();
// Guard against huge trip counts (this requires checking
// for zero to handle the case where the trip count == -1 and the
@@ -5428,7 +5564,8 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
/// Get the expression for the number of loop iterations for which this loop is
/// guaranteed not to exit via ExitingBlock. Otherwise return
/// SCEVCouldNotCompute.
-const SCEV *ScalarEvolution::getExitCount(Loop *L, BasicBlock *ExitingBlock) {
+const SCEV *ScalarEvolution::getExitCount(const Loop *L,
+ BasicBlock *ExitingBlock) {
return getBackedgeTakenInfo(L).getExact(ExitingBlock, this);
}
@@ -6408,7 +6545,10 @@ static bool canConstantEvolve(Instruction *I, const Loop *L) {
/// recursing through each instruction operand until reaching a loop header phi.
static PHINode *
getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
- DenseMap<Instruction *, PHINode *> &PHIMap) {
+ DenseMap<Instruction *, PHINode *> &PHIMap,
+ unsigned Depth) {
+ if (Depth > MaxConstantEvolvingDepth)
+ return nullptr;
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
@@ -6428,7 +6568,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
if (!P) {
// Recurse and memoize the results, whether a phi is found or not.
// This recursive call invalidates pointers into PHIMap.
- P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
+ P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1);
PHIMap[OpInst] = P;
}
if (!P)
@@ -6455,7 +6595,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
// Record non-constant instructions contained by the loop.
DenseMap<Instruction *, PHINode *> PHIMap;
- return getConstantEvolvingPHIOperands(I, L, PHIMap);
+ return getConstantEvolvingPHIOperands(I, L, PHIMap, 0);
}
/// EvaluateExpression - Given an expression that passes the
@@ -7014,10 +7154,10 @@ const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
/// A and B isn't important.
///
/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
-static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
ScalarEvolution &SE) {
uint32_t BW = A.getBitWidth();
- assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+ assert(BW == SE.getTypeSizeInBits(B->getType()));
assert(A != 0 && "A must be non-zero.");
// 1. D = gcd(A, N)
@@ -7031,7 +7171,7 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
//
// B is divisible by D if and only if the multiplicity of prime factor 2 for B
// is not less than multiplicity of this prime factor for D.
- if (B.countTrailingZeros() < Mult2)
+ if (SE.GetMinTrailingZeros(B) < Mult2)
return SE.getCouldNotCompute();
// 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
@@ -7049,9 +7189,8 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
// I * (B / D) mod (N / D)
// To simplify the computation, we factor out the divide by D:
// (I * B mod N) / D
- APInt Result = (I * B).lshr(Mult2);
-
- return SE.getConstant(Result);
+ const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2));
+ return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D);
}
/// Find the roots of the quadratic equation for the given quadratic chrec
@@ -7082,7 +7221,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
// Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
// The B coefficient is M-N/2
APInt B(M);
- B -= sdiv(N,Two);
+ B -= N.sdiv(Two);
// The A coefficient is N/2
APInt A(N.sdiv(Two));
@@ -7233,62 +7372,6 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates);
}
- // As a special case, handle the instance where Step is a positive power of
- // two. In this case, determining whether Step divides Distance evenly can be
- // done by counting and comparing the number of trailing zeros of Step and
- // Distance.
- if (!CountDown) {
- const APInt &StepV = StepC->getAPInt();
- // StepV.isPowerOf2() returns true if StepV is an positive power of two. It
- // also returns true if StepV is maximally negative (eg, INT_MIN), but that
- // case is not handled as this code is guarded by !CountDown.
- if (StepV.isPowerOf2() &&
- GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) {
- // Here we've constrained the equation to be of the form
- //
- // 2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W) ... (0)
- //
- // where we're operating on a W bit wide integer domain and k is
- // non-negative. The smallest unsigned solution for X is the trip count.
- //
- // (0) is equivalent to:
- //
- // 2^(N + k) * Distance' - 2^N * X = L * 2^W
- // <=> 2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N
- // <=> 2^k * Distance' - X = L * 2^(W - N)
- // <=> 2^k * Distance' = L * 2^(W - N) + X ... (1)
- //
- // The smallest X satisfying (1) is unsigned remainder of dividing the LHS
- // by 2^(W - N).
- //
- // <=> X = 2^k * Distance' URem 2^(W - N) ... (2)
- //
- // E.g. say we're solving
- //
- // 2 * Val = 2 * X (in i8) ... (3)
- //
- // then from (2), we get X = Val URem i8 128 (k = 0 in this case).
- //
- // Note: It is tempting to solve (3) by setting X = Val, but Val is not
- // necessarily the smallest unsigned value of X that satisfies (3).
- // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3)
- // is i8 1, not i8 -127
-
- const auto *ModuloResult = getUDivExactExpr(Distance, Step);
-
- // Since SCEV does not have a URem node, we construct one using a truncate
- // and a zero extend.
-
- unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros();
- auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
- auto *WideTy = Distance->getType();
-
- const SCEV *Limit =
- getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
- return ExitLimit(Limit, Limit, false, Predicates);
- }
- }
-
// If the condition controls loop exit (the loop exits only if the expression
// is true) and the addition is no-wrap we can use unsigned divide to
// compute the backedge count. In this case, the step may not divide the
@@ -7301,13 +7384,10 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
return ExitLimit(Exact, Exact, false, Predicates);
}
- // Then, try to solve the above equation provided that Start is constant.
- if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
- const SCEV *E = SolveLinEquationWithOverflow(
- StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
- return ExitLimit(E, E, false, Predicates);
- }
- return getCouldNotCompute();
+ // Solve the general equation.
+ const SCEV *E = SolveLinEquationWithOverflow(
+ StepC->getAPInt(), getNegativeSCEV(Start), *this);
+ return ExitLimit(E, E, false, Predicates);
}
ScalarEvolution::ExitLimit
@@ -8488,19 +8568,161 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
llvm_unreachable("covered switch fell through?!");
}
+bool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS,
+ const SCEV *FoundLHS,
+ const SCEV *FoundRHS,
+ unsigned Depth) {
+ assert(getTypeSizeInBits(LHS->getType()) ==
+ getTypeSizeInBits(RHS->getType()) &&
+ "LHS and RHS have different sizes?");
+ assert(getTypeSizeInBits(FoundLHS->getType()) ==
+ getTypeSizeInBits(FoundRHS->getType()) &&
+ "FoundLHS and FoundRHS have different sizes?");
+ // We want to avoid hurting the compile time with analysis of too big trees.
+ if (Depth > MaxSCEVOperationsImplicationDepth)
+ return false;
+ // We only want to work with ICMP_SGT comparison so far.
+ // TODO: Extend to ICMP_UGT?
+ if (Pred == ICmpInst::ICMP_SLT) {
+ Pred = ICmpInst::ICMP_SGT;
+ std::swap(LHS, RHS);
+ std::swap(FoundLHS, FoundRHS);
+ }
+ if (Pred != ICmpInst::ICMP_SGT)
+ return false;
+
+ auto GetOpFromSExt = [&](const SCEV *S) {
+ if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S))
+ return Ext->getOperand();
+ // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off
+ // the constant in some cases.
+ return S;
+ };
+
+ // Acquire values from extensions.
+ auto *OrigFoundLHS = FoundLHS;
+ LHS = GetOpFromSExt(LHS);
+ FoundLHS = GetOpFromSExt(FoundLHS);
+
+ // Is the SGT predicate can be proved trivially or using the found context.
+ auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) {
+ return isKnownViaSimpleReasoning(ICmpInst::ICMP_SGT, S1, S2) ||
+ isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS,
+ FoundRHS, Depth + 1);
+ };
+
+ if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) {
+ // We want to avoid creation of any new non-constant SCEV. Since we are
+ // going to compare the operands to RHS, we should be certain that we don't
+ // need any size extensions for this. So let's decline all cases when the
+ // sizes of types of LHS and RHS do not match.
+ // TODO: Maybe try to get RHS from sext to catch more cases?
+ if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType()))
+ return false;
+
+ // Should not overflow.
+ if (!LHSAddExpr->hasNoSignedWrap())
+ return false;
+
+ auto *LL = LHSAddExpr->getOperand(0);
+ auto *LR = LHSAddExpr->getOperand(1);
+ auto *MinusOne = getNegativeSCEV(getOne(RHS->getType()));
+
+ // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context.
+ auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) {
+ return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS);
+ };
+ // Try to prove the following rule:
+ // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS).
+ // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS).
+ if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL))
+ return true;
+ } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) {
+ Value *LL, *LR;
+ // FIXME: Once we have SDiv implemented, we can get rid of this matching.
+ using namespace llvm::PatternMatch;
+ if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) {
+ // Rules for division.
+ // We are going to perform some comparisons with Denominator and its
+ // derivative expressions. In general case, creating a SCEV for it may
+ // lead to a complex analysis of the entire graph, and in particular it
+ // can request trip count recalculation for the same loop. This would
+ // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid
+ // this, we only want to create SCEVs that are constants in this section.
+ // So we bail if Denominator is not a constant.
+ if (!isa<ConstantInt>(LR))
+ return false;
+
+ auto *Denominator = cast<SCEVConstant>(getSCEV(LR));
+
+ // We want to make sure that LHS = FoundLHS / Denominator. If it is so,
+ // then a SCEV for the numerator already exists and matches with FoundLHS.
+ auto *Numerator = getExistingSCEV(LL);
+ if (!Numerator || Numerator->getType() != FoundLHS->getType())
+ return false;
+
+ // Make sure that the numerator matches with FoundLHS and the denominator
+ // is positive.
+ if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator))
+ return false;
+
+ auto *DTy = Denominator->getType();
+ auto *FRHSTy = FoundRHS->getType();
+ if (DTy->isPointerTy() != FRHSTy->isPointerTy())
+ // One of types is a pointer and another one is not. We cannot extend
+ // them properly to a wider type, so let us just reject this case.
+ // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help
+ // to avoid this check.
+ return false;
+
+ // Given that:
+ // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0.
+ auto *WTy = getWiderType(DTy, FRHSTy);
+ auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy);
+ auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy);
+
+ // Try to prove the following rule:
+ // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS).
+ // For example, given that FoundLHS > 2. It means that FoundLHS is at
+ // least 3. If we divide it by Denominator < 4, we will have at least 1.
+ auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2));
+ if (isKnownNonPositive(RHS) &&
+ IsSGTViaContext(FoundRHSExt, DenomMinusTwo))
+ return true;
+
+ // Try to prove the following rule:
+ // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS).
+ // For example, given that FoundLHS > -3. Then FoundLHS is at least -2.
+ // If we divide it by Denominator > 2, then:
+ // 1. If FoundLHS is negative, then the result is 0.
+ // 2. If FoundLHS is non-negative, then the result is non-negative.
+ // Anyways, the result is non-negative.
+ auto *MinusOne = getNegativeSCEV(getOne(WTy));
+ auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt);
+ if (isKnownNegative(RHS) &&
+ IsSGTViaContext(FoundRHSExt, NegDenomMinusOne))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool
+ScalarEvolution::isKnownViaSimpleReasoning(ICmpInst::Predicate Pred,
+ const SCEV *LHS, const SCEV *RHS) {
+ return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
+ IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
+ IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
+ isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
+}
+
bool
ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS) {
- auto IsKnownPredicateFull =
- [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
- return isKnownPredicateViaConstantRanges(Pred, LHS, RHS) ||
- IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
- IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
- isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
- };
-
switch (Pred) {
default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
case ICmpInst::ICMP_EQ:
@@ -8510,30 +8732,34 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
break;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- if (IsKnownPredicateFull(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
- IsKnownPredicateFull(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+ if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+ isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- if (IsKnownPredicateFull(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
- IsKnownPredicateFull(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+ if (isKnownViaSimpleReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+ isKnownViaSimpleReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- if (IsKnownPredicateFull(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
- IsKnownPredicateFull(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+ if (isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+ isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS))
return true;
break;
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- if (IsKnownPredicateFull(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
- IsKnownPredicateFull(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+ if (isKnownViaSimpleReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+ isKnownViaSimpleReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS))
return true;
break;
}
+ // Maybe it can be proved via operations?
+ if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS))
+ return true;
+
return false;
}
@@ -9524,6 +9750,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
ValueExprMap(std::move(Arg.ValueExprMap)),
PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+ MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
std::move(Arg.PredicatedBackedgeTakenCounts)),
@@ -9621,6 +9848,13 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
OS << "Unpredictable predicated backedge-taken count. ";
}
OS << "\n";
+
+ if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+ OS << "Loop ";
+ L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
+ OS << ": ";
+ OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n";
+ }
}
static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) {
@@ -9929,6 +10163,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
SignedRanges.erase(S);
ExprValueMap.erase(S);
HasRecMap.erase(S);
+ MinTrailingZerosCache.erase(S);
auto RemoveSCEVFromBackedgeMap =
[S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index d15a7dbd20e6..6dd10441c4cb 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1268,8 +1268,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
if (PostIncLoops.count(L)) {
PostIncLoopSet Loops;
Loops.insert(L);
- Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse(
- Normalize, S, nullptr, nullptr, Loops, SE, SE.DT));
+ Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
}
// Strip off any non-loop-dominating component from the addrec start.
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index c1f9503816ee..2aaa4c1ae117 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -12,243 +12,100 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
using namespace llvm;
-/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
-/// and now we need to decide whether the user should use the preinc or post-inc
-/// value. If this user should use the post-inc version of the IV, return true.
-///
-/// Choosing wrong here can break dominance properties (if we choose to use the
-/// post-inc value when we cannot) or it can end up adding extra live-ranges to
-/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
-/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
- const Loop *L, DominatorTree *DT) {
- // If the user is in the loop, use the preinc value.
- if (L->contains(User)) return false;
-
- BasicBlock *LatchBlock = L->getLoopLatch();
- if (!LatchBlock)
- return false;
-
- // Ok, the user is outside of the loop. If it is dominated by the latch
- // block, use the post-inc value.
- if (DT->dominates(LatchBlock, User->getParent()))
- return true;
-
- // There is one case we have to be careful of: PHI nodes. These little guys
- // can live in blocks that are not dominated by the latch block, but (since
- // their uses occur in the predecessor block, not the block the PHI lives in)
- // should still use the post-inc value. Check for this case now.
- PHINode *PN = dyn_cast<PHINode>(User);
- if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
-
- // Look at all of the uses of Operand by the PHI node. If any use corresponds
- // to a block that is not dominated by the latch block, give up and use the
- // preincremented value.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Operand &&
- !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
- return false;
-
- // Okay, all uses of Operand by PN are in predecessor blocks that really are
- // dominated by the latch block. Use the post-incremented value.
- return true;
-}
+/// TransformKind - Different types of transformations that
+/// TransformForPostIncUse can do.
+enum TransformKind {
+ /// Normalize - Normalize according to the given loops.
+ Normalize,
+ /// Denormalize - Perform the inverse transform on the expression with the
+ /// given loop set.
+ Denormalize
+};
namespace {
-
-/// Hold the state used during post-inc expression transformation, including a
-/// map of transformed expressions.
-class PostIncTransform {
- TransformKind Kind;
- PostIncLoopSet &Loops;
- ScalarEvolution &SE;
- DominatorTree &DT;
-
- DenseMap<const SCEV*, const SCEV*> Transformed;
-
-public:
- PostIncTransform(TransformKind kind, PostIncLoopSet &loops,
- ScalarEvolution &se, DominatorTree &dt):
- Kind(kind), Loops(loops), SE(se), DT(dt) {}
-
- const SCEV *TransformSubExpr(const SCEV *S, Instruction *User,
- Value *OperandValToReplace);
-
-protected:
- const SCEV *TransformImpl(const SCEV *S, Instruction *User,
- Value *OperandValToReplace);
+struct NormalizeDenormalizeRewriter
+ : public SCEVRewriteVisitor<NormalizeDenormalizeRewriter> {
+ const TransformKind Kind;
+
+ // NB! Pred is a function_ref. Storing it here is okay only because
+ // we're careful about the lifetime of NormalizeDenormalizeRewriter.
+ const NormalizePredTy Pred;
+
+ NormalizeDenormalizeRewriter(TransformKind Kind, NormalizePredTy Pred,
+ ScalarEvolution &SE)
+ : SCEVRewriteVisitor<NormalizeDenormalizeRewriter>(SE), Kind(Kind),
+ Pred(Pred) {}
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr);
};
-
} // namespace
-/// Implement post-inc transformation for all valid expression types.
-const SCEV *PostIncTransform::
-TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
-
- if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
- const SCEV *O = X->getOperand();
- const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
- if (O != N)
- switch (S->getSCEVType()) {
- case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
- case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
- case scTruncate: return SE.getTruncateExpr(N, S->getType());
- default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
- }
- return S;
- }
-
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- // An addrec. This is the interesting part.
- SmallVector<const SCEV *, 8> Operands;
- const Loop *L = AR->getLoop();
- // The addrec conceptually uses its operands at loop entry.
- Instruction *LUser = &L->getHeader()->front();
- // Transform each operand.
- for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
- I != E; ++I) {
- Operands.push_back(TransformSubExpr(*I, LUser, nullptr));
+const SCEV *
+NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) {
+ SmallVector<const SCEV *, 8> Operands;
+
+ transform(AR->operands(), std::back_inserter(Operands),
+ [&](const SCEV *Op) { return visit(Op); });
+
+ // Conservatively use AnyWrap until/unless we need FlagNW.
+ const SCEV *Result =
+ SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
+ switch (Kind) {
+ case Normalize:
+ // We want to normalize step expression, because otherwise we might not be
+ // able to denormalize to the original expression.
+ //
+ // Here is an example what will happen if we don't normalize step:
+ // ORIGINAL ISE:
+ // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25>
+ // NORMALIZED ISE:
+ // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+,
+ // (100 /u {0,+,1}<%bb16>)}<%bb25>
+ // DENORMALIZED BACK ISE:
+ // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+,
+ // (100 /u {1,+,1}<%bb16>)}<%bb25>
+ // Note that the initial value changes after normalization +
+ // denormalization, which isn't correct.
+ if (Pred(AR)) {
+ const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE));
+ Result = SE.getMinusSCEV(Result, TransformedStep);
}
- // Conservatively use AnyWrap until/unless we need FlagNW.
- const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
- switch (Kind) {
- case NormalizeAutodetect:
- // Normalize this SCEV by subtracting the expression for the final step.
- // We only allow affine AddRecs to be normalized, otherwise we would not
- // be able to correctly denormalize.
- // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
- // Normalized form: {-2,+,1,+,2}
- // Denormalized form: {1,+,3,+,2}
- //
- // However, denormalization would use a different step expression than
- // normalization (see getPostIncExpr), generating the wrong final
- // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
- if (AR->isAffine() &&
- IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
- const SCEV *TransformedStep =
- TransformSubExpr(AR->getStepRecurrence(SE),
- User, OperandValToReplace);
- Result = SE.getMinusSCEV(Result, TransformedStep);
- Loops.insert(L);
- }
-#if 0
- // This assert is conceptually correct, but ScalarEvolution currently
- // sometimes fails to canonicalize two equal SCEVs to exactly the same
- // form. It's possibly a pessimization when this happens, but it isn't a
- // correctness problem, so disable this assert for now.
- assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
- "SCEV normalization is not invertible!");
-#endif
- break;
- case Normalize:
- // We want to normalize step expression, because otherwise we might not be
- // able to denormalize to the original expression.
- //
- // Here is an example what will happen if we don't normalize step:
- // ORIGINAL ISE:
- // {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25>
- // NORMALIZED ISE:
- // {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+,
- // (100 /u {0,+,1}<%bb16>)}<%bb25>
- // DENORMALIZED BACK ISE:
- // {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+,
- // (100 /u {1,+,1}<%bb16>)}<%bb25>
- // Note that the initial value changes after normalization +
- // denormalization, which isn't correct.
- if (Loops.count(L)) {
- const SCEV *TransformedStep =
- TransformSubExpr(AR->getStepRecurrence(SE),
- User, OperandValToReplace);
- Result = SE.getMinusSCEV(Result, TransformedStep);
- }
-#if 0
- // See the comment on the assert above.
- assert(S == TransformSubExpr(Result, User, OperandValToReplace) &&
- "SCEV normalization is not invertible!");
-#endif
- break;
- case Denormalize:
- // Here we want to normalize step expressions for the same reasons, as
- // stated above.
- if (Loops.count(L)) {
- const SCEV *TransformedStep =
- TransformSubExpr(AR->getStepRecurrence(SE),
- User, OperandValToReplace);
- Result = SE.getAddExpr(Result, TransformedStep);
- }
- break;
+ break;
+ case Denormalize:
+ // Here we want to normalize step expressions for the same reasons, as
+ // stated above.
+ if (Pred(AR)) {
+ const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE));
+ Result = SE.getAddExpr(Result, TransformedStep);
}
- return Result;
- }
-
- if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
- SmallVector<const SCEV *, 8> Operands;
- bool Changed = false;
- // Transform each operand.
- for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
- I != E; ++I) {
- const SCEV *O = *I;
- const SCEV *N = TransformSubExpr(O, User, OperandValToReplace);
- Changed |= N != O;
- Operands.push_back(N);
- }
- // If any operand actually changed, return a transformed result.
- if (Changed)
- switch (S->getSCEVType()) {
- case scAddExpr: return SE.getAddExpr(Operands);
- case scMulExpr: return SE.getMulExpr(Operands);
- case scSMaxExpr: return SE.getSMaxExpr(Operands);
- case scUMaxExpr: return SE.getUMaxExpr(Operands);
- default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
- }
- return S;
- }
-
- if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
- const SCEV *LO = X->getLHS();
- const SCEV *RO = X->getRHS();
- const SCEV *LN = TransformSubExpr(LO, User, OperandValToReplace);
- const SCEV *RN = TransformSubExpr(RO, User, OperandValToReplace);
- if (LO != LN || RO != RN)
- return SE.getUDivExpr(LN, RN);
- return S;
+ break;
}
-
- llvm_unreachable("Unexpected SCEV kind!");
+ return Result;
}
-/// Manage recursive transformation across an expression DAG. Revisiting
-/// expressions would lead to exponential recursion.
-const SCEV *PostIncTransform::
-TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
-
- if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
- return S;
-
- const SCEV *Result = Transformed.lookup(S);
- if (Result)
- return Result;
+const SCEV *llvm::normalizeForPostIncUse(const SCEV *S,
+ const PostIncLoopSet &Loops,
+ ScalarEvolution &SE) {
+ auto Pred = [&](const SCEVAddRecExpr *AR) {
+ return Loops.count(AR->getLoop());
+ };
+ return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S);
+}
- Result = TransformImpl(S, User, OperandValToReplace);
- Transformed[S] = Result;
- return Result;
+const SCEV *llvm::normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred,
+ ScalarEvolution &SE) {
+ return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S);
}
-/// Top level driver for transforming an expression DAG into its requested
-/// post-inc form (either "Normalized" or "Denormalized").
-const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
- const SCEV *S,
- Instruction *User,
- Value *OperandValToReplace,
- PostIncLoopSet &Loops,
- ScalarEvolution &SE,
- DominatorTree &DT) {
- PostIncTransform Transform(Kind, Loops, SE, DT);
- return Transform.TransformSubExpr(S, User, OperandValToReplace);
+const SCEV *llvm::denormalizeForPostIncUse(const SCEV *S,
+ const PostIncLoopSet &Loops,
+ ScalarEvolution &SE) {
+ auto Pred = [&](const SCEVAddRecExpr *AR) {
+ return Loops.count(AR->getLoop());
+ };
+ return NormalizeDenormalizeRewriter(Denormalize, Pred, SE).visit(S);
}
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 79dc84e25533..470f4bee1e0a 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -195,7 +195,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
Succs.assign(TI.getNumSuccessors(), true);
return;
}
- SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+ SwitchInst::CaseHandle Case = *SI.findCaseValue(cast<ConstantInt>(C));
Succs[Case.getSuccessorIndex()] = true;
}
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 112118ab77eb..be734fa91425 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -82,24 +82,24 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
if (T.getArch() == Triple::r600 ||
T.getArch() == Triple::amdgcn) {
- TLI.setUnavailable(LibFunc::ldexp);
- TLI.setUnavailable(LibFunc::ldexpf);
- TLI.setUnavailable(LibFunc::ldexpl);
- TLI.setUnavailable(LibFunc::exp10);
- TLI.setUnavailable(LibFunc::exp10f);
- TLI.setUnavailable(LibFunc::exp10l);
- TLI.setUnavailable(LibFunc::log10);
- TLI.setUnavailable(LibFunc::log10f);
- TLI.setUnavailable(LibFunc::log10l);
+ TLI.setUnavailable(LibFunc_ldexp);
+ TLI.setUnavailable(LibFunc_ldexpf);
+ TLI.setUnavailable(LibFunc_ldexpl);
+ TLI.setUnavailable(LibFunc_exp10);
+ TLI.setUnavailable(LibFunc_exp10f);
+ TLI.setUnavailable(LibFunc_exp10l);
+ TLI.setUnavailable(LibFunc_log10);
+ TLI.setUnavailable(LibFunc_log10f);
+ TLI.setUnavailable(LibFunc_log10l);
}
// There are no library implementations of mempcy and memset for AMD gpus and
// these can be difficult to lower in the backend.
if (T.getArch() == Triple::r600 ||
T.getArch() == Triple::amdgcn) {
- TLI.setUnavailable(LibFunc::memcpy);
- TLI.setUnavailable(LibFunc::memset);
- TLI.setUnavailable(LibFunc::memset_pattern16);
+ TLI.setUnavailable(LibFunc_memcpy);
+ TLI.setUnavailable(LibFunc_memset);
+ TLI.setUnavailable(LibFunc_memset_pattern16);
return;
}
@@ -107,21 +107,21 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// All versions of watchOS support it.
if (T.isMacOSX()) {
if (T.isMacOSXVersionLT(10, 5))
- TLI.setUnavailable(LibFunc::memset_pattern16);
+ TLI.setUnavailable(LibFunc_memset_pattern16);
} else if (T.isiOS()) {
if (T.isOSVersionLT(3, 0))
- TLI.setUnavailable(LibFunc::memset_pattern16);
+ TLI.setUnavailable(LibFunc_memset_pattern16);
} else if (!T.isWatchOS()) {
- TLI.setUnavailable(LibFunc::memset_pattern16);
+ TLI.setUnavailable(LibFunc_memset_pattern16);
}
if (!hasSinCosPiStret(T)) {
- TLI.setUnavailable(LibFunc::sinpi);
- TLI.setUnavailable(LibFunc::sinpif);
- TLI.setUnavailable(LibFunc::cospi);
- TLI.setUnavailable(LibFunc::cospif);
- TLI.setUnavailable(LibFunc::sincospi_stret);
- TLI.setUnavailable(LibFunc::sincospif_stret);
+ TLI.setUnavailable(LibFunc_sinpi);
+ TLI.setUnavailable(LibFunc_sinpif);
+ TLI.setUnavailable(LibFunc_cospi);
+ TLI.setUnavailable(LibFunc_cospif);
+ TLI.setUnavailable(LibFunc_sincospi_stret);
+ TLI.setUnavailable(LibFunc_sincospif_stret);
}
if (T.isMacOSX() && T.getArch() == Triple::x86 &&
@@ -131,179 +131,179 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// has a $UNIX2003 suffix. The two implementations are identical except
// for the return value in some edge cases. However, we don't want to
// generate code that depends on the old symbols.
- TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
- TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+ TLI.setAvailableWithName(LibFunc_fwrite, "fwrite$UNIX2003");
+ TLI.setAvailableWithName(LibFunc_fputs, "fputs$UNIX2003");
}
// iprintf and friends are only available on XCore and TCE.
if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
- TLI.setUnavailable(LibFunc::iprintf);
- TLI.setUnavailable(LibFunc::siprintf);
- TLI.setUnavailable(LibFunc::fiprintf);
+ TLI.setUnavailable(LibFunc_iprintf);
+ TLI.setUnavailable(LibFunc_siprintf);
+ TLI.setUnavailable(LibFunc_fiprintf);
}
if (T.isOSWindows() && !T.isOSCygMing()) {
// Win32 does not support long double
- TLI.setUnavailable(LibFunc::acosl);
- TLI.setUnavailable(LibFunc::asinl);
- TLI.setUnavailable(LibFunc::atanl);
- TLI.setUnavailable(LibFunc::atan2l);
- TLI.setUnavailable(LibFunc::ceill);
- TLI.setUnavailable(LibFunc::copysignl);
- TLI.setUnavailable(LibFunc::cosl);
- TLI.setUnavailable(LibFunc::coshl);
- TLI.setUnavailable(LibFunc::expl);
- TLI.setUnavailable(LibFunc::fabsf); // Win32 and Win64 both lack fabsf
- TLI.setUnavailable(LibFunc::fabsl);
- TLI.setUnavailable(LibFunc::floorl);
- TLI.setUnavailable(LibFunc::fmaxl);
- TLI.setUnavailable(LibFunc::fminl);
- TLI.setUnavailable(LibFunc::fmodl);
- TLI.setUnavailable(LibFunc::frexpl);
- TLI.setUnavailable(LibFunc::ldexpf);
- TLI.setUnavailable(LibFunc::ldexpl);
- TLI.setUnavailable(LibFunc::logl);
- TLI.setUnavailable(LibFunc::modfl);
- TLI.setUnavailable(LibFunc::powl);
- TLI.setUnavailable(LibFunc::sinl);
- TLI.setUnavailable(LibFunc::sinhl);
- TLI.setUnavailable(LibFunc::sqrtl);
- TLI.setUnavailable(LibFunc::tanl);
- TLI.setUnavailable(LibFunc::tanhl);
+ TLI.setUnavailable(LibFunc_acosl);
+ TLI.setUnavailable(LibFunc_asinl);
+ TLI.setUnavailable(LibFunc_atanl);
+ TLI.setUnavailable(LibFunc_atan2l);
+ TLI.setUnavailable(LibFunc_ceill);
+ TLI.setUnavailable(LibFunc_copysignl);
+ TLI.setUnavailable(LibFunc_cosl);
+ TLI.setUnavailable(LibFunc_coshl);
+ TLI.setUnavailable(LibFunc_expl);
+ TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf
+ TLI.setUnavailable(LibFunc_fabsl);
+ TLI.setUnavailable(LibFunc_floorl);
+ TLI.setUnavailable(LibFunc_fmaxl);
+ TLI.setUnavailable(LibFunc_fminl);
+ TLI.setUnavailable(LibFunc_fmodl);
+ TLI.setUnavailable(LibFunc_frexpl);
+ TLI.setUnavailable(LibFunc_ldexpf);
+ TLI.setUnavailable(LibFunc_ldexpl);
+ TLI.setUnavailable(LibFunc_logl);
+ TLI.setUnavailable(LibFunc_modfl);
+ TLI.setUnavailable(LibFunc_powl);
+ TLI.setUnavailable(LibFunc_sinl);
+ TLI.setUnavailable(LibFunc_sinhl);
+ TLI.setUnavailable(LibFunc_sqrtl);
+ TLI.setUnavailable(LibFunc_tanl);
+ TLI.setUnavailable(LibFunc_tanhl);
// Win32 only has C89 math
- TLI.setUnavailable(LibFunc::acosh);
- TLI.setUnavailable(LibFunc::acoshf);
- TLI.setUnavailable(LibFunc::acoshl);
- TLI.setUnavailable(LibFunc::asinh);
- TLI.setUnavailable(LibFunc::asinhf);
- TLI.setUnavailable(LibFunc::asinhl);
- TLI.setUnavailable(LibFunc::atanh);
- TLI.setUnavailable(LibFunc::atanhf);
- TLI.setUnavailable(LibFunc::atanhl);
- TLI.setUnavailable(LibFunc::cbrt);
- TLI.setUnavailable(LibFunc::cbrtf);
- TLI.setUnavailable(LibFunc::cbrtl);
- TLI.setUnavailable(LibFunc::exp2);
- TLI.setUnavailable(LibFunc::exp2f);
- TLI.setUnavailable(LibFunc::exp2l);
- TLI.setUnavailable(LibFunc::expm1);
- TLI.setUnavailable(LibFunc::expm1f);
- TLI.setUnavailable(LibFunc::expm1l);
- TLI.setUnavailable(LibFunc::log2);
- TLI.setUnavailable(LibFunc::log2f);
- TLI.setUnavailable(LibFunc::log2l);
- TLI.setUnavailable(LibFunc::log1p);
- TLI.setUnavailable(LibFunc::log1pf);
- TLI.setUnavailable(LibFunc::log1pl);
- TLI.setUnavailable(LibFunc::logb);
- TLI.setUnavailable(LibFunc::logbf);
- TLI.setUnavailable(LibFunc::logbl);
- TLI.setUnavailable(LibFunc::nearbyint);
- TLI.setUnavailable(LibFunc::nearbyintf);
- TLI.setUnavailable(LibFunc::nearbyintl);
- TLI.setUnavailable(LibFunc::rint);
- TLI.setUnavailable(LibFunc::rintf);
- TLI.setUnavailable(LibFunc::rintl);
- TLI.setUnavailable(LibFunc::round);
- TLI.setUnavailable(LibFunc::roundf);
- TLI.setUnavailable(LibFunc::roundl);
- TLI.setUnavailable(LibFunc::trunc);
- TLI.setUnavailable(LibFunc::truncf);
- TLI.setUnavailable(LibFunc::truncl);
+ TLI.setUnavailable(LibFunc_acosh);
+ TLI.setUnavailable(LibFunc_acoshf);
+ TLI.setUnavailable(LibFunc_acoshl);
+ TLI.setUnavailable(LibFunc_asinh);
+ TLI.setUnavailable(LibFunc_asinhf);
+ TLI.setUnavailable(LibFunc_asinhl);
+ TLI.setUnavailable(LibFunc_atanh);
+ TLI.setUnavailable(LibFunc_atanhf);
+ TLI.setUnavailable(LibFunc_atanhl);
+ TLI.setUnavailable(LibFunc_cbrt);
+ TLI.setUnavailable(LibFunc_cbrtf);
+ TLI.setUnavailable(LibFunc_cbrtl);
+ TLI.setUnavailable(LibFunc_exp2);
+ TLI.setUnavailable(LibFunc_exp2f);
+ TLI.setUnavailable(LibFunc_exp2l);
+ TLI.setUnavailable(LibFunc_expm1);
+ TLI.setUnavailable(LibFunc_expm1f);
+ TLI.setUnavailable(LibFunc_expm1l);
+ TLI.setUnavailable(LibFunc_log2);
+ TLI.setUnavailable(LibFunc_log2f);
+ TLI.setUnavailable(LibFunc_log2l);
+ TLI.setUnavailable(LibFunc_log1p);
+ TLI.setUnavailable(LibFunc_log1pf);
+ TLI.setUnavailable(LibFunc_log1pl);
+ TLI.setUnavailable(LibFunc_logb);
+ TLI.setUnavailable(LibFunc_logbf);
+ TLI.setUnavailable(LibFunc_logbl);
+ TLI.setUnavailable(LibFunc_nearbyint);
+ TLI.setUnavailable(LibFunc_nearbyintf);
+ TLI.setUnavailable(LibFunc_nearbyintl);
+ TLI.setUnavailable(LibFunc_rint);
+ TLI.setUnavailable(LibFunc_rintf);
+ TLI.setUnavailable(LibFunc_rintl);
+ TLI.setUnavailable(LibFunc_round);
+ TLI.setUnavailable(LibFunc_roundf);
+ TLI.setUnavailable(LibFunc_roundl);
+ TLI.setUnavailable(LibFunc_trunc);
+ TLI.setUnavailable(LibFunc_truncf);
+ TLI.setUnavailable(LibFunc_truncl);
// Win32 provides some C99 math with mangled names
- TLI.setAvailableWithName(LibFunc::copysign, "_copysign");
+ TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
if (T.getArch() == Triple::x86) {
// Win32 on x86 implements single-precision math functions as macros
- TLI.setUnavailable(LibFunc::acosf);
- TLI.setUnavailable(LibFunc::asinf);
- TLI.setUnavailable(LibFunc::atanf);
- TLI.setUnavailable(LibFunc::atan2f);
- TLI.setUnavailable(LibFunc::ceilf);
- TLI.setUnavailable(LibFunc::copysignf);
- TLI.setUnavailable(LibFunc::cosf);
- TLI.setUnavailable(LibFunc::coshf);
- TLI.setUnavailable(LibFunc::expf);
- TLI.setUnavailable(LibFunc::floorf);
- TLI.setUnavailable(LibFunc::fminf);
- TLI.setUnavailable(LibFunc::fmaxf);
- TLI.setUnavailable(LibFunc::fmodf);
- TLI.setUnavailable(LibFunc::logf);
- TLI.setUnavailable(LibFunc::log10f);
- TLI.setUnavailable(LibFunc::modff);
- TLI.setUnavailable(LibFunc::powf);
- TLI.setUnavailable(LibFunc::sinf);
- TLI.setUnavailable(LibFunc::sinhf);
- TLI.setUnavailable(LibFunc::sqrtf);
- TLI.setUnavailable(LibFunc::tanf);
- TLI.setUnavailable(LibFunc::tanhf);
+ TLI.setUnavailable(LibFunc_acosf);
+ TLI.setUnavailable(LibFunc_asinf);
+ TLI.setUnavailable(LibFunc_atanf);
+ TLI.setUnavailable(LibFunc_atan2f);
+ TLI.setUnavailable(LibFunc_ceilf);
+ TLI.setUnavailable(LibFunc_copysignf);
+ TLI.setUnavailable(LibFunc_cosf);
+ TLI.setUnavailable(LibFunc_coshf);
+ TLI.setUnavailable(LibFunc_expf);
+ TLI.setUnavailable(LibFunc_floorf);
+ TLI.setUnavailable(LibFunc_fminf);
+ TLI.setUnavailable(LibFunc_fmaxf);
+ TLI.setUnavailable(LibFunc_fmodf);
+ TLI.setUnavailable(LibFunc_logf);
+ TLI.setUnavailable(LibFunc_log10f);
+ TLI.setUnavailable(LibFunc_modff);
+ TLI.setUnavailable(LibFunc_powf);
+ TLI.setUnavailable(LibFunc_sinf);
+ TLI.setUnavailable(LibFunc_sinhf);
+ TLI.setUnavailable(LibFunc_sqrtf);
+ TLI.setUnavailable(LibFunc_tanf);
+ TLI.setUnavailable(LibFunc_tanhf);
}
// Win32 does *not* provide provide these functions, but they are
// generally available on POSIX-compliant systems:
- TLI.setUnavailable(LibFunc::access);
- TLI.setUnavailable(LibFunc::bcmp);
- TLI.setUnavailable(LibFunc::bcopy);
- TLI.setUnavailable(LibFunc::bzero);
- TLI.setUnavailable(LibFunc::chmod);
- TLI.setUnavailable(LibFunc::chown);
- TLI.setUnavailable(LibFunc::closedir);
- TLI.setUnavailable(LibFunc::ctermid);
- TLI.setUnavailable(LibFunc::fdopen);
- TLI.setUnavailable(LibFunc::ffs);
- TLI.setUnavailable(LibFunc::fileno);
- TLI.setUnavailable(LibFunc::flockfile);
- TLI.setUnavailable(LibFunc::fseeko);
- TLI.setUnavailable(LibFunc::fstat);
- TLI.setUnavailable(LibFunc::fstatvfs);
- TLI.setUnavailable(LibFunc::ftello);
- TLI.setUnavailable(LibFunc::ftrylockfile);
- TLI.setUnavailable(LibFunc::funlockfile);
- TLI.setUnavailable(LibFunc::getc_unlocked);
- TLI.setUnavailable(LibFunc::getitimer);
- TLI.setUnavailable(LibFunc::getlogin_r);
- TLI.setUnavailable(LibFunc::getpwnam);
- TLI.setUnavailable(LibFunc::gettimeofday);
- TLI.setUnavailable(LibFunc::htonl);
- TLI.setUnavailable(LibFunc::htons);
- TLI.setUnavailable(LibFunc::lchown);
- TLI.setUnavailable(LibFunc::lstat);
- TLI.setUnavailable(LibFunc::memccpy);
- TLI.setUnavailable(LibFunc::mkdir);
- TLI.setUnavailable(LibFunc::ntohl);
- TLI.setUnavailable(LibFunc::ntohs);
- TLI.setUnavailable(LibFunc::open);
- TLI.setUnavailable(LibFunc::opendir);
- TLI.setUnavailable(LibFunc::pclose);
- TLI.setUnavailable(LibFunc::popen);
- TLI.setUnavailable(LibFunc::pread);
- TLI.setUnavailable(LibFunc::pwrite);
- TLI.setUnavailable(LibFunc::read);
- TLI.setUnavailable(LibFunc::readlink);
- TLI.setUnavailable(LibFunc::realpath);
- TLI.setUnavailable(LibFunc::rmdir);
- TLI.setUnavailable(LibFunc::setitimer);
- TLI.setUnavailable(LibFunc::stat);
- TLI.setUnavailable(LibFunc::statvfs);
- TLI.setUnavailable(LibFunc::stpcpy);
- TLI.setUnavailable(LibFunc::stpncpy);
- TLI.setUnavailable(LibFunc::strcasecmp);
- TLI.setUnavailable(LibFunc::strncasecmp);
- TLI.setUnavailable(LibFunc::times);
- TLI.setUnavailable(LibFunc::uname);
- TLI.setUnavailable(LibFunc::unlink);
- TLI.setUnavailable(LibFunc::unsetenv);
- TLI.setUnavailable(LibFunc::utime);
- TLI.setUnavailable(LibFunc::utimes);
- TLI.setUnavailable(LibFunc::write);
+ TLI.setUnavailable(LibFunc_access);
+ TLI.setUnavailable(LibFunc_bcmp);
+ TLI.setUnavailable(LibFunc_bcopy);
+ TLI.setUnavailable(LibFunc_bzero);
+ TLI.setUnavailable(LibFunc_chmod);
+ TLI.setUnavailable(LibFunc_chown);
+ TLI.setUnavailable(LibFunc_closedir);
+ TLI.setUnavailable(LibFunc_ctermid);
+ TLI.setUnavailable(LibFunc_fdopen);
+ TLI.setUnavailable(LibFunc_ffs);
+ TLI.setUnavailable(LibFunc_fileno);
+ TLI.setUnavailable(LibFunc_flockfile);
+ TLI.setUnavailable(LibFunc_fseeko);
+ TLI.setUnavailable(LibFunc_fstat);
+ TLI.setUnavailable(LibFunc_fstatvfs);
+ TLI.setUnavailable(LibFunc_ftello);
+ TLI.setUnavailable(LibFunc_ftrylockfile);
+ TLI.setUnavailable(LibFunc_funlockfile);
+ TLI.setUnavailable(LibFunc_getc_unlocked);
+ TLI.setUnavailable(LibFunc_getitimer);
+ TLI.setUnavailable(LibFunc_getlogin_r);
+ TLI.setUnavailable(LibFunc_getpwnam);
+ TLI.setUnavailable(LibFunc_gettimeofday);
+ TLI.setUnavailable(LibFunc_htonl);
+ TLI.setUnavailable(LibFunc_htons);
+ TLI.setUnavailable(LibFunc_lchown);
+ TLI.setUnavailable(LibFunc_lstat);
+ TLI.setUnavailable(LibFunc_memccpy);
+ TLI.setUnavailable(LibFunc_mkdir);
+ TLI.setUnavailable(LibFunc_ntohl);
+ TLI.setUnavailable(LibFunc_ntohs);
+ TLI.setUnavailable(LibFunc_open);
+ TLI.setUnavailable(LibFunc_opendir);
+ TLI.setUnavailable(LibFunc_pclose);
+ TLI.setUnavailable(LibFunc_popen);
+ TLI.setUnavailable(LibFunc_pread);
+ TLI.setUnavailable(LibFunc_pwrite);
+ TLI.setUnavailable(LibFunc_read);
+ TLI.setUnavailable(LibFunc_readlink);
+ TLI.setUnavailable(LibFunc_realpath);
+ TLI.setUnavailable(LibFunc_rmdir);
+ TLI.setUnavailable(LibFunc_setitimer);
+ TLI.setUnavailable(LibFunc_stat);
+ TLI.setUnavailable(LibFunc_statvfs);
+ TLI.setUnavailable(LibFunc_stpcpy);
+ TLI.setUnavailable(LibFunc_stpncpy);
+ TLI.setUnavailable(LibFunc_strcasecmp);
+ TLI.setUnavailable(LibFunc_strncasecmp);
+ TLI.setUnavailable(LibFunc_times);
+ TLI.setUnavailable(LibFunc_uname);
+ TLI.setUnavailable(LibFunc_unlink);
+ TLI.setUnavailable(LibFunc_unsetenv);
+ TLI.setUnavailable(LibFunc_utime);
+ TLI.setUnavailable(LibFunc_utimes);
+ TLI.setUnavailable(LibFunc_write);
// Win32 does *not* provide provide these functions, but they are
// specified by C99:
- TLI.setUnavailable(LibFunc::atoll);
- TLI.setUnavailable(LibFunc::frexpf);
- TLI.setUnavailable(LibFunc::llabs);
+ TLI.setUnavailable(LibFunc_atoll);
+ TLI.setUnavailable(LibFunc_frexpf);
+ TLI.setUnavailable(LibFunc_llabs);
}
switch (T.getOS()) {
@@ -311,28 +311,28 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0
// and their names are __exp10 and __exp10f. exp10l is not available on
// OS X or iOS.
- TLI.setUnavailable(LibFunc::exp10l);
+ TLI.setUnavailable(LibFunc_exp10l);
if (T.isMacOSXVersionLT(10, 9)) {
- TLI.setUnavailable(LibFunc::exp10);
- TLI.setUnavailable(LibFunc::exp10f);
+ TLI.setUnavailable(LibFunc_exp10);
+ TLI.setUnavailable(LibFunc_exp10f);
} else {
- TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
- TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+ TLI.setAvailableWithName(LibFunc_exp10, "__exp10");
+ TLI.setAvailableWithName(LibFunc_exp10f, "__exp10f");
}
break;
case Triple::IOS:
case Triple::TvOS:
case Triple::WatchOS:
- TLI.setUnavailable(LibFunc::exp10l);
+ TLI.setUnavailable(LibFunc_exp10l);
if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
(T.isOSVersionLT(9, 0) &&
(T.getArch() == Triple::x86 ||
T.getArch() == Triple::x86_64)))) {
- TLI.setUnavailable(LibFunc::exp10);
- TLI.setUnavailable(LibFunc::exp10f);
+ TLI.setUnavailable(LibFunc_exp10);
+ TLI.setUnavailable(LibFunc_exp10f);
} else {
- TLI.setAvailableWithName(LibFunc::exp10, "__exp10");
- TLI.setAvailableWithName(LibFunc::exp10f, "__exp10f");
+ TLI.setAvailableWithName(LibFunc_exp10, "__exp10");
+ TLI.setAvailableWithName(LibFunc_exp10f, "__exp10f");
}
break;
case Triple::Linux:
@@ -344,9 +344,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// Fall through to disable all of them.
LLVM_FALLTHROUGH;
default:
- TLI.setUnavailable(LibFunc::exp10);
- TLI.setUnavailable(LibFunc::exp10f);
- TLI.setUnavailable(LibFunc::exp10l);
+ TLI.setUnavailable(LibFunc_exp10);
+ TLI.setUnavailable(LibFunc_exp10f);
+ TLI.setUnavailable(LibFunc_exp10l);
}
// ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
@@ -364,7 +364,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
case Triple::Linux:
break;
default:
- TLI.setUnavailable(LibFunc::ffsl);
+ TLI.setUnavailable(LibFunc_ffsl);
}
// ffsll is available on at least FreeBSD and Linux (GLIBC):
@@ -380,7 +380,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
case Triple::Linux:
break;
default:
- TLI.setUnavailable(LibFunc::ffsll);
+ TLI.setUnavailable(LibFunc_ffsll);
}
// The following functions are available on at least FreeBSD:
@@ -388,30 +388,30 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// http://svn.freebsd.org/base/head/lib/libc/string/flsl.c
// http://svn.freebsd.org/base/head/lib/libc/string/flsll.c
if (!T.isOSFreeBSD()) {
- TLI.setUnavailable(LibFunc::fls);
- TLI.setUnavailable(LibFunc::flsl);
- TLI.setUnavailable(LibFunc::flsll);
+ TLI.setUnavailable(LibFunc_fls);
+ TLI.setUnavailable(LibFunc_flsl);
+ TLI.setUnavailable(LibFunc_flsll);
}
// The following functions are available on at least Linux:
if (!T.isOSLinux()) {
- TLI.setUnavailable(LibFunc::dunder_strdup);
- TLI.setUnavailable(LibFunc::dunder_strtok_r);
- TLI.setUnavailable(LibFunc::dunder_isoc99_scanf);
- TLI.setUnavailable(LibFunc::dunder_isoc99_sscanf);
- TLI.setUnavailable(LibFunc::under_IO_getc);
- TLI.setUnavailable(LibFunc::under_IO_putc);
- TLI.setUnavailable(LibFunc::memalign);
- TLI.setUnavailable(LibFunc::fopen64);
- TLI.setUnavailable(LibFunc::fseeko64);
- TLI.setUnavailable(LibFunc::fstat64);
- TLI.setUnavailable(LibFunc::fstatvfs64);
- TLI.setUnavailable(LibFunc::ftello64);
- TLI.setUnavailable(LibFunc::lstat64);
- TLI.setUnavailable(LibFunc::open64);
- TLI.setUnavailable(LibFunc::stat64);
- TLI.setUnavailable(LibFunc::statvfs64);
- TLI.setUnavailable(LibFunc::tmpfile64);
+ TLI.setUnavailable(LibFunc_dunder_strdup);
+ TLI.setUnavailable(LibFunc_dunder_strtok_r);
+ TLI.setUnavailable(LibFunc_dunder_isoc99_scanf);
+ TLI.setUnavailable(LibFunc_dunder_isoc99_sscanf);
+ TLI.setUnavailable(LibFunc_under_IO_getc);
+ TLI.setUnavailable(LibFunc_under_IO_putc);
+ TLI.setUnavailable(LibFunc_memalign);
+ TLI.setUnavailable(LibFunc_fopen64);
+ TLI.setUnavailable(LibFunc_fseeko64);
+ TLI.setUnavailable(LibFunc_fstat64);
+ TLI.setUnavailable(LibFunc_fstatvfs64);
+ TLI.setUnavailable(LibFunc_ftello64);
+ TLI.setUnavailable(LibFunc_lstat64);
+ TLI.setUnavailable(LibFunc_open64);
+ TLI.setUnavailable(LibFunc_stat64);
+ TLI.setUnavailable(LibFunc_statvfs64);
+ TLI.setUnavailable(LibFunc_tmpfile64);
}
// As currently implemented in clang, NVPTX code has no standard library to
@@ -427,9 +427,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// optimizations, so this situation should be fixed.
if (T.isNVPTX()) {
TLI.disableAllFunctions();
- TLI.setAvailable(LibFunc::nvvm_reflect);
+ TLI.setAvailable(LibFunc_nvvm_reflect);
} else {
- TLI.setUnavailable(LibFunc::nvvm_reflect);
+ TLI.setUnavailable(LibFunc_nvvm_reflect);
}
TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary);
@@ -500,9 +500,9 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
}
bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
- LibFunc::Func &F) const {
+ LibFunc &F) const {
StringRef const *Start = &StandardNames[0];
- StringRef const *End = &StandardNames[LibFunc::NumLibFuncs];
+ StringRef const *End = &StandardNames[NumLibFuncs];
funcName = sanitizeFunctionName(funcName);
if (funcName.empty())
@@ -513,14 +513,14 @@ bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
return LHS < RHS;
});
if (I != End && *I == funcName) {
- F = (LibFunc::Func)(I - Start);
+ F = (LibFunc)(I - Start);
return true;
}
return false;
}
bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
- LibFunc::Func F,
+ LibFunc F,
const DataLayout *DL) const {
LLVMContext &Ctx = FTy.getContext();
Type *PCharTy = Type::getInt8PtrTy(Ctx);
@@ -531,504 +531,660 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
unsigned NumParams = FTy.getNumParams();
switch (F) {
- case LibFunc::strlen:
+ case LibFunc_strlen:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy());
- case LibFunc::strchr:
- case LibFunc::strrchr:
+ case LibFunc_strchr:
+ case LibFunc_strrchr:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1)->isIntegerTy());
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
return ((NumParams == 2 || NumParams == 3) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::strcat:
+ case LibFunc_strcat:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1) == FTy.getReturnType());
- case LibFunc::strncat:
+ case LibFunc_strncat:
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1) == FTy.getReturnType() &&
FTy.getParamType(2)->isIntegerTy());
- case LibFunc::strcpy_chk:
- case LibFunc::stpcpy_chk:
+ case LibFunc_strcpy_chk:
+ case LibFunc_stpcpy_chk:
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
LLVM_FALLTHROUGH;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
+ case LibFunc_strcpy:
+ case LibFunc_stpcpy:
return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(0) == PCharTy);
- case LibFunc::strncpy_chk:
- case LibFunc::stpncpy_chk:
+ case LibFunc_strncpy_chk:
+ case LibFunc_stpncpy_chk:
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
LLVM_FALLTHROUGH;
- case LibFunc::strncpy:
- case LibFunc::stpncpy:
+ case LibFunc_strncpy:
+ case LibFunc_stpncpy:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(0) == PCharTy &&
FTy.getParamType(2)->isIntegerTy());
- case LibFunc::strxfrm:
+ case LibFunc_strxfrm:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::strcmp:
+ case LibFunc_strcmp:
return (NumParams == 2 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(0) == FTy.getParamType(1));
- case LibFunc::strncmp:
+ case LibFunc_strncmp:
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(2)->isIntegerTy());
- case LibFunc::strspn:
- case LibFunc::strcspn:
+ case LibFunc_strspn:
+ case LibFunc_strcspn:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getReturnType()->isIntegerTy());
- case LibFunc::strcoll:
- case LibFunc::strcasecmp:
- case LibFunc::strncasecmp:
+ case LibFunc_strcoll:
+ case LibFunc_strcasecmp:
+ case LibFunc_strncasecmp:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::strstr:
+ case LibFunc_strstr:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::strpbrk:
+ case LibFunc_strpbrk:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0) == FTy.getParamType(1));
- case LibFunc::strtok:
- case LibFunc::strtok_r:
+ case LibFunc_strtok:
+ case LibFunc_strtok_r:
return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::scanf:
- case LibFunc::setbuf:
- case LibFunc::setvbuf:
+ case LibFunc_scanf:
+ case LibFunc_setbuf:
+ case LibFunc_setvbuf:
return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::strdup:
- case LibFunc::strndup:
+ case LibFunc_strdup:
+ case LibFunc_strndup:
return (NumParams >= 1 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy());
- case LibFunc::sscanf:
- case LibFunc::stat:
- case LibFunc::statvfs:
- case LibFunc::sprintf:
+ case LibFunc_sscanf:
+ case LibFunc_stat:
+ case LibFunc_statvfs:
+ case LibFunc_siprintf:
+ case LibFunc_sprintf:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::snprintf:
+ case LibFunc_snprintf:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
- case LibFunc::setitimer:
+ case LibFunc_setitimer:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
- case LibFunc::system:
+ case LibFunc_system:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::malloc:
+ case LibFunc_malloc:
return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
- case LibFunc::memcmp:
- return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getParamType(1)->isPointerTy() &&
- FTy.getReturnType()->isIntegerTy(32));
+ case LibFunc_memcmp:
+ return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
+ FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy());
- case LibFunc::memchr:
- case LibFunc::memrchr:
- return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
+ case LibFunc_memchr:
+ case LibFunc_memrchr:
+ return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
+ FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(1)->isIntegerTy(32) &&
- FTy.getParamType(2)->isIntegerTy() &&
- FTy.getReturnType()->isPointerTy());
- case LibFunc::modf:
- case LibFunc::modff:
- case LibFunc::modfl:
+ IsSizeTTy(FTy.getParamType(2)));
+ case LibFunc_modf:
+ case LibFunc_modff:
+ case LibFunc_modfl:
return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::memcpy_chk:
- case LibFunc::memmove_chk:
+ case LibFunc_memcpy_chk:
+ case LibFunc_memmove_chk:
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
LLVM_FALLTHROUGH;
- case LibFunc::memcpy:
- case LibFunc::mempcpy:
- case LibFunc::memmove:
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memmove:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
IsSizeTTy(FTy.getParamType(2)));
- case LibFunc::memset_chk:
+ case LibFunc_memset_chk:
--NumParams;
if (!IsSizeTTy(FTy.getParamType(NumParams)))
return false;
LLVM_FALLTHROUGH;
- case LibFunc::memset:
+ case LibFunc_memset:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy() &&
IsSizeTTy(FTy.getParamType(2)));
- case LibFunc::memccpy:
+ case LibFunc_memccpy:
return (NumParams >= 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::memalign:
+ case LibFunc_memalign:
return (FTy.getReturnType()->isPointerTy());
- case LibFunc::realloc:
- return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getReturnType()->isPointerTy());
- case LibFunc::read:
+ case LibFunc_realloc:
+ case LibFunc_reallocf:
+ return (NumParams == 2 && FTy.getReturnType() == PCharTy &&
+ FTy.getParamType(0) == FTy.getReturnType() &&
+ IsSizeTTy(FTy.getParamType(1)));
+ case LibFunc_read:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::rewind:
- case LibFunc::rmdir:
- case LibFunc::remove:
- case LibFunc::realpath:
+ case LibFunc_rewind:
+ case LibFunc_rmdir:
+ case LibFunc_remove:
+ case LibFunc_realpath:
return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::rename:
+ case LibFunc_rename:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::readlink:
+ case LibFunc_readlink:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::write:
+ case LibFunc_write:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::bcopy:
- case LibFunc::bcmp:
+ case LibFunc_bcopy:
+ case LibFunc_bcmp:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::bzero:
+ case LibFunc_bzero:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::calloc:
+ case LibFunc_calloc:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy());
- case LibFunc::atof:
- case LibFunc::atoi:
- case LibFunc::atol:
- case LibFunc::atoll:
- case LibFunc::ferror:
- case LibFunc::getenv:
- case LibFunc::getpwnam:
- case LibFunc::pclose:
- case LibFunc::perror:
- case LibFunc::printf:
- case LibFunc::puts:
- case LibFunc::uname:
- case LibFunc::under_IO_getc:
- case LibFunc::unlink:
- case LibFunc::unsetenv:
+ case LibFunc_atof:
+ case LibFunc_atoi:
+ case LibFunc_atol:
+ case LibFunc_atoll:
+ case LibFunc_ferror:
+ case LibFunc_getenv:
+ case LibFunc_getpwnam:
+ case LibFunc_iprintf:
+ case LibFunc_pclose:
+ case LibFunc_perror:
+ case LibFunc_printf:
+ case LibFunc_puts:
+ case LibFunc_uname:
+ case LibFunc_under_IO_getc:
+ case LibFunc_unlink:
+ case LibFunc_unsetenv:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::chmod:
- case LibFunc::chown:
- case LibFunc::clearerr:
- case LibFunc::closedir:
- case LibFunc::ctermid:
- case LibFunc::fclose:
- case LibFunc::feof:
- case LibFunc::fflush:
- case LibFunc::fgetc:
- case LibFunc::fileno:
- case LibFunc::flockfile:
- case LibFunc::free:
- case LibFunc::fseek:
- case LibFunc::fseeko64:
- case LibFunc::fseeko:
- case LibFunc::fsetpos:
- case LibFunc::ftell:
- case LibFunc::ftello64:
- case LibFunc::ftello:
- case LibFunc::ftrylockfile:
- case LibFunc::funlockfile:
- case LibFunc::getc:
- case LibFunc::getc_unlocked:
- case LibFunc::getlogin_r:
- case LibFunc::mkdir:
- case LibFunc::mktime:
- case LibFunc::times:
+ case LibFunc_access:
+ case LibFunc_chmod:
+ case LibFunc_chown:
+ case LibFunc_clearerr:
+ case LibFunc_closedir:
+ case LibFunc_ctermid:
+ case LibFunc_fclose:
+ case LibFunc_feof:
+ case LibFunc_fflush:
+ case LibFunc_fgetc:
+ case LibFunc_fileno:
+ case LibFunc_flockfile:
+ case LibFunc_free:
+ case LibFunc_fseek:
+ case LibFunc_fseeko64:
+ case LibFunc_fseeko:
+ case LibFunc_fsetpos:
+ case LibFunc_ftell:
+ case LibFunc_ftello64:
+ case LibFunc_ftello:
+ case LibFunc_ftrylockfile:
+ case LibFunc_funlockfile:
+ case LibFunc_getc:
+ case LibFunc_getc_unlocked:
+ case LibFunc_getlogin_r:
+ case LibFunc_mkdir:
+ case LibFunc_mktime:
+ case LibFunc_times:
return (NumParams != 0 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::access:
- return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::fopen:
+ case LibFunc_fopen:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::fdopen:
+ case LibFunc_fdopen:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::fputc:
- case LibFunc::fstat:
- case LibFunc::frexp:
- case LibFunc::frexpf:
- case LibFunc::frexpl:
- case LibFunc::fstatvfs:
+ case LibFunc_fputc:
+ case LibFunc_fstat:
+ case LibFunc_frexp:
+ case LibFunc_frexpf:
+ case LibFunc_frexpl:
+ case LibFunc_fstatvfs:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::fgets:
+ case LibFunc_fgets:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
- case LibFunc::fread:
+ case LibFunc_fread:
return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(3)->isPointerTy());
- case LibFunc::fwrite:
+ case LibFunc_fwrite:
return (NumParams == 4 && FTy.getReturnType()->isIntegerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy() &&
FTy.getParamType(2)->isIntegerTy() &&
FTy.getParamType(3)->isPointerTy());
- case LibFunc::fputs:
+ case LibFunc_fputs:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::fscanf:
- case LibFunc::fprintf:
- return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
+ case LibFunc_fscanf:
+ case LibFunc_fiprintf:
+ case LibFunc_fprintf:
+ return (NumParams >= 2 && FTy.getReturnType()->isIntegerTy() &&
+ FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::fgetpos:
+ case LibFunc_fgetpos:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::gets:
- case LibFunc::getchar:
- case LibFunc::getitimer:
+ case LibFunc_getchar:
+ return (NumParams == 0 && FTy.getReturnType()->isIntegerTy());
+ case LibFunc_gets:
+ return (NumParams == 1 && FTy.getParamType(0) == PCharTy);
+ case LibFunc_getitimer:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::ungetc:
+ case LibFunc_ungetc:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::utime:
- case LibFunc::utimes:
+ case LibFunc_utime:
+ case LibFunc_utimes:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::putc:
+ case LibFunc_putc:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::pread:
- case LibFunc::pwrite:
+ case LibFunc_pread:
+ case LibFunc_pwrite:
return (NumParams == 4 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::popen:
+ case LibFunc_popen:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::vscanf:
+ case LibFunc_vscanf:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::vsscanf:
+ case LibFunc_vsscanf:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
- case LibFunc::vfscanf:
+ case LibFunc_vfscanf:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
- case LibFunc::valloc:
+ case LibFunc_valloc:
return (FTy.getReturnType()->isPointerTy());
- case LibFunc::vprintf:
+ case LibFunc_vprintf:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::vfprintf:
- case LibFunc::vsprintf:
+ case LibFunc_vfprintf:
+ case LibFunc_vsprintf:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::vsnprintf:
+ case LibFunc_vsnprintf:
return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
- case LibFunc::open:
+ case LibFunc_open:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::opendir:
+ case LibFunc_opendir:
return (NumParams == 1 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy());
- case LibFunc::tmpfile:
+ case LibFunc_tmpfile:
return (FTy.getReturnType()->isPointerTy());
- case LibFunc::htonl:
- case LibFunc::htons:
- case LibFunc::ntohl:
- case LibFunc::ntohs:
- case LibFunc::lstat:
+ case LibFunc_htonl:
+ case LibFunc_ntohl:
+ return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
+ FTy.getReturnType() == FTy.getParamType(0));
+ case LibFunc_htons:
+ case LibFunc_ntohs:
+ return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(16) &&
+ FTy.getReturnType() == FTy.getParamType(0));
+ case LibFunc_lstat:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::lchown:
+ case LibFunc_lchown:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::qsort:
+ case LibFunc_qsort:
return (NumParams == 4 && FTy.getParamType(3)->isPointerTy());
- case LibFunc::dunder_strdup:
- case LibFunc::dunder_strndup:
+ case LibFunc_dunder_strdup:
+ case LibFunc_dunder_strndup:
return (NumParams >= 1 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy());
- case LibFunc::dunder_strtok_r:
+ case LibFunc_dunder_strtok_r:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::under_IO_putc:
+ case LibFunc_under_IO_putc:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::dunder_isoc99_scanf:
+ case LibFunc_dunder_isoc99_scanf:
return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::stat64:
- case LibFunc::lstat64:
- case LibFunc::statvfs64:
+ case LibFunc_stat64:
+ case LibFunc_lstat64:
+ case LibFunc_statvfs64:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::dunder_isoc99_sscanf:
+ case LibFunc_dunder_isoc99_sscanf:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::fopen64:
+ case LibFunc_fopen64:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::tmpfile64:
+ case LibFunc_tmpfile64:
return (FTy.getReturnType()->isPointerTy());
- case LibFunc::fstat64:
- case LibFunc::fstatvfs64:
+ case LibFunc_fstat64:
+ case LibFunc_fstatvfs64:
return (NumParams == 2 && FTy.getParamType(1)->isPointerTy());
- case LibFunc::open64:
+ case LibFunc_open64:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
- case LibFunc::gettimeofday:
+ case LibFunc_gettimeofday:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy());
- case LibFunc::Znwj: // new(unsigned int);
- case LibFunc::Znwm: // new(unsigned long);
- case LibFunc::Znaj: // new[](unsigned int);
- case LibFunc::Znam: // new[](unsigned long);
- case LibFunc::msvc_new_int: // new(unsigned int);
- case LibFunc::msvc_new_longlong: // new(unsigned long long);
- case LibFunc::msvc_new_array_int: // new[](unsigned int);
- case LibFunc::msvc_new_array_longlong: // new[](unsigned long long);
- return (NumParams == 1);
-
- case LibFunc::memset_pattern16:
+ // new(unsigned int);
+ case LibFunc_Znwj:
+ // new(unsigned long);
+ case LibFunc_Znwm:
+ // new[](unsigned int);
+ case LibFunc_Znaj:
+ // new[](unsigned long);
+ case LibFunc_Znam:
+ // new(unsigned int);
+ case LibFunc_msvc_new_int:
+ // new(unsigned long long);
+ case LibFunc_msvc_new_longlong:
+ // new[](unsigned int);
+ case LibFunc_msvc_new_array_int:
+ // new[](unsigned long long);
+ case LibFunc_msvc_new_array_longlong:
+ return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
+
+ // new(unsigned int, nothrow);
+ case LibFunc_ZnwjRKSt9nothrow_t:
+ // new(unsigned long, nothrow);
+ case LibFunc_ZnwmRKSt9nothrow_t:
+ // new[](unsigned int, nothrow);
+ case LibFunc_ZnajRKSt9nothrow_t:
+ // new[](unsigned long, nothrow);
+ case LibFunc_ZnamRKSt9nothrow_t:
+ // new(unsigned int, nothrow);
+ case LibFunc_msvc_new_int_nothrow:
+ // new(unsigned long long, nothrow);
+ case LibFunc_msvc_new_longlong_nothrow:
+ // new[](unsigned int, nothrow);
+ case LibFunc_msvc_new_array_int_nothrow:
+ // new[](unsigned long long, nothrow);
+ case LibFunc_msvc_new_array_longlong_nothrow:
+ return (NumParams == 2 && FTy.getReturnType()->isPointerTy());
+
+ // void operator delete[](void*);
+ case LibFunc_ZdaPv:
+ // void operator delete(void*);
+ case LibFunc_ZdlPv:
+ // void operator delete[](void*);
+ case LibFunc_msvc_delete_array_ptr32:
+ // void operator delete[](void*);
+ case LibFunc_msvc_delete_array_ptr64:
+ // void operator delete(void*);
+ case LibFunc_msvc_delete_ptr32:
+ // void operator delete(void*);
+ case LibFunc_msvc_delete_ptr64:
+ return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
+
+ // void operator delete[](void*, nothrow);
+ case LibFunc_ZdaPvRKSt9nothrow_t:
+ // void operator delete[](void*, unsigned int);
+ case LibFunc_ZdaPvj:
+ // void operator delete[](void*, unsigned long);
+ case LibFunc_ZdaPvm:
+ // void operator delete(void*, nothrow);
+ case LibFunc_ZdlPvRKSt9nothrow_t:
+ // void operator delete(void*, unsigned int);
+ case LibFunc_ZdlPvj:
+ // void operator delete(void*, unsigned long);
+ case LibFunc_ZdlPvm:
+ // void operator delete[](void*, unsigned int);
+ case LibFunc_msvc_delete_array_ptr32_int:
+ // void operator delete[](void*, nothrow);
+ case LibFunc_msvc_delete_array_ptr32_nothrow:
+ // void operator delete[](void*, unsigned long long);
+ case LibFunc_msvc_delete_array_ptr64_longlong:
+ // void operator delete[](void*, nothrow);
+ case LibFunc_msvc_delete_array_ptr64_nothrow:
+ // void operator delete(void*, unsigned int);
+ case LibFunc_msvc_delete_ptr32_int:
+ // void operator delete(void*, nothrow);
+ case LibFunc_msvc_delete_ptr32_nothrow:
+ // void operator delete(void*, unsigned long long);
+ case LibFunc_msvc_delete_ptr64_longlong:
+ // void operator delete(void*, nothrow);
+ case LibFunc_msvc_delete_ptr64_nothrow:
+ return (NumParams == 2 && FTy.getParamType(0)->isPointerTy());
+
+ case LibFunc_memset_pattern16:
return (!FTy.isVarArg() && NumParams == 3 &&
- isa<PointerType>(FTy.getParamType(0)) &&
- isa<PointerType>(FTy.getParamType(1)) &&
- isa<IntegerType>(FTy.getParamType(2)));
-
- // int __nvvm_reflect(const char *);
- case LibFunc::nvvm_reflect:
- return (NumParams == 1 && isa<PointerType>(FTy.getParamType(0)));
-
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
- case LibFunc::tan:
- case LibFunc::tanf:
- case LibFunc::tanl:
- case LibFunc::exp:
- case LibFunc::expf:
- case LibFunc::expl:
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
- case LibFunc::log:
- case LibFunc::logf:
- case LibFunc::logl:
- case LibFunc::log10:
- case LibFunc::log10f:
- case LibFunc::log10l:
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
+ FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getParamType(2)->isIntegerTy());
+
+ case LibFunc_cxa_guard_abort:
+ case LibFunc_cxa_guard_acquire:
+ case LibFunc_cxa_guard_release:
+ case LibFunc_nvvm_reflect:
+ return (NumParams == 1 && FTy.getParamType(0)->isPointerTy());
+
+ case LibFunc_sincospi_stret:
+ case LibFunc_sincospif_stret:
+ return (NumParams == 1 && FTy.getParamType(0)->isFloatingPointTy());
+
+ case LibFunc_acos:
+ case LibFunc_acosf:
+ case LibFunc_acosh:
+ case LibFunc_acoshf:
+ case LibFunc_acoshl:
+ case LibFunc_acosl:
+ case LibFunc_asin:
+ case LibFunc_asinf:
+ case LibFunc_asinh:
+ case LibFunc_asinhf:
+ case LibFunc_asinhl:
+ case LibFunc_asinl:
+ case LibFunc_atan:
+ case LibFunc_atanf:
+ case LibFunc_atanh:
+ case LibFunc_atanhf:
+ case LibFunc_atanhl:
+ case LibFunc_atanl:
+ case LibFunc_cbrt:
+ case LibFunc_cbrtf:
+ case LibFunc_cbrtl:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_cosl:
+ case LibFunc_exp10:
+ case LibFunc_exp10f:
+ case LibFunc_exp10l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_expm1:
+ case LibFunc_expm1f:
+ case LibFunc_expm1l:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
+ case LibFunc_log10:
+ case LibFunc_log10f:
+ case LibFunc_log10l:
+ case LibFunc_log1p:
+ case LibFunc_log1pf:
+ case LibFunc_log1pl:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
+ case LibFunc_log:
+ case LibFunc_logb:
+ case LibFunc_logbf:
+ case LibFunc_logbl:
+ case LibFunc_logf:
+ case LibFunc_logl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl:
+ case LibFunc_sinl:
+ case LibFunc_sqrt:
+ case LibFunc_sqrt_finite:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrtl_finite:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanh:
+ case LibFunc_tanhf:
+ case LibFunc_tanhl:
+ case LibFunc_tanl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
return (NumParams == 1 && FTy.getReturnType()->isFloatingPointTy() &&
FTy.getReturnType() == FTy.getParamType(0));
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- case LibFunc::pow:
- case LibFunc::powf:
- case LibFunc::powl:
+ case LibFunc_atan2:
+ case LibFunc_atan2f:
+ case LibFunc_atan2l:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
+ case LibFunc_fmod:
+ case LibFunc_fmodf:
+ case LibFunc_fmodl:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ case LibFunc_pow:
+ case LibFunc_powf:
+ case LibFunc_powl:
return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() &&
FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getReturnType() == FTy.getParamType(1));
- case LibFunc::ffs:
- case LibFunc::ffsl:
- case LibFunc::ffsll:
- case LibFunc::fls:
- case LibFunc::flsl:
- case LibFunc::flsll:
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() &&
+ FTy.getReturnType() == FTy.getParamType(0) &&
+ FTy.getParamType(1)->isIntegerTy(32));
+
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isIntegerTy());
- case LibFunc::isdigit:
- case LibFunc::isascii:
- case LibFunc::toascii:
+ case LibFunc_isdigit:
+ case LibFunc_isascii:
+ case LibFunc_toascii:
+ case LibFunc_putchar:
return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getReturnType() == FTy.getParamType(0));
- case LibFunc::abs:
- case LibFunc::labs:
- case LibFunc::llabs:
+ case LibFunc_abs:
+ case LibFunc_labs:
+ case LibFunc_llabs:
return (NumParams == 1 && FTy.getReturnType()->isIntegerTy() &&
FTy.getReturnType() == FTy.getParamType(0));
- case LibFunc::cxa_atexit:
+ case LibFunc_cxa_atexit:
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy() &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
- case LibFunc::sinpi:
- case LibFunc::cospi:
+ case LibFunc_sinpi:
+ case LibFunc_cospi:
return (NumParams == 1 && FTy.getReturnType()->isDoubleTy() &&
FTy.getReturnType() == FTy.getParamType(0));
- case LibFunc::sinpif:
- case LibFunc::cospif:
+ case LibFunc_sinpif:
+ case LibFunc_cospif:
return (NumParams == 1 && FTy.getReturnType()->isFloatTy() &&
FTy.getReturnType() == FTy.getParamType(0));
- default:
- // Assume the other functions are correct.
- // FIXME: It'd be really nice to cover them all.
- return true;
+ case LibFunc_strnlen:
+ return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(1) &&
+ FTy.getParamType(0) == PCharTy &&
+ FTy.getParamType(1) == SizeTTy);
+
+ case LibFunc_posix_memalign:
+ return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
+ FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1) == SizeTTy && FTy.getParamType(2) == SizeTTy);
+
+ case LibFunc::NumLibFuncs:
+ break;
}
+
+ llvm_unreachable("Invalid libfunc");
}
bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
- LibFunc::Func &F) const {
+ LibFunc &F) const {
const DataLayout *DL =
FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr;
return getLibFunc(FDecl.getName(), F) &&
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 5c0d1aac1b98..d73b1a128031 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -97,6 +97,10 @@ bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
return TTIImpl->isSourceOfDivergence(V);
}
+unsigned TargetTransformInfo::getFlatAddressSpace() const {
+ return TTIImpl->getFlatAddressSpace();
+}
+
bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
return TTIImpl->isLoweredToCall(F);
}
@@ -182,6 +186,21 @@ bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const
return TTIImpl->shouldBuildLookupTablesForConstant(C);
}
+unsigned TargetTransformInfo::
+getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
+ return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
+}
+
+unsigned TargetTransformInfo::
+getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) const {
+ return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
+}
+
+bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
+ return TTIImpl->supportsEfficientVectorElementLoadStore();
+}
+
bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
}
@@ -254,6 +273,12 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
return TTIImpl->getRegisterBitWidth(Vector);
}
+bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
+ return TTIImpl->shouldConsiderAddressTypePromotion(
+ I, AllowPromotionWithoutCommonHeader);
+}
+
unsigned TargetTransformInfo::getCacheLineSize() const {
return TTIImpl->getCacheLineSize();
}
@@ -293,8 +318,10 @@ int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
}
int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
- int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+ Type *Src, const Instruction *I) const {
+ assert ((I == nullptr || I->getOpcode() == Opcode) &&
+ "Opcode should reflect passed instruction.");
+ int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -314,8 +341,10 @@ int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
}
int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
- int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ Type *CondTy, const Instruction *I) const {
+ assert ((I == nullptr || I->getOpcode() == Opcode) &&
+ "Opcode should reflect passed instruction.");
+ int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -329,8 +358,11 @@ int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,
- unsigned AddressSpace) const {
- int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ unsigned AddressSpace,
+ const Instruction *I) const {
+ assert ((I == nullptr || I->getOpcode() == Opcode) &&
+ "Opcode should reflect passed instruction.");
+ int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
@@ -363,17 +395,17 @@ int TargetTransformInfo::getInterleavedMemoryOpCost(
}
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys,
- FastMathFlags FMF) const {
- int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args,
- FastMathFlags FMF) const {
- int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
+ int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/lib/Analysis/TypeMetadataUtils.cpp b/lib/Analysis/TypeMetadataUtils.cpp
index f56754167360..6871e4887c9e 100644
--- a/lib/Analysis/TypeMetadataUtils.cpp
+++ b/lib/Analysis/TypeMetadataUtils.cpp
@@ -39,7 +39,7 @@ findCallsAtConstantOffset(SmallVectorImpl<DevirtCallSite> &DevirtCalls,
// Search for virtual calls that load from VPtr and add them to DevirtCalls.
static void
-findLoadCallsAtConstantOffset(Module *M,
+findLoadCallsAtConstantOffset(const Module *M,
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
Value *VPtr, int64_t Offset) {
for (const Use &U : VPtr->uses()) {
@@ -62,10 +62,10 @@ findLoadCallsAtConstantOffset(Module *M,
void llvm::findDevirtualizableCallsForTypeTest(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
- SmallVectorImpl<CallInst *> &Assumes, CallInst *CI) {
+ SmallVectorImpl<CallInst *> &Assumes, const CallInst *CI) {
assert(CI->getCalledFunction()->getIntrinsicID() == Intrinsic::type_test);
- Module *M = CI->getParent()->getParent()->getParent();
+ const Module *M = CI->getParent()->getParent()->getParent();
// Find llvm.assume intrinsics for this llvm.type.test call.
for (const Use &CIU : CI->uses()) {
@@ -86,7 +86,8 @@ void llvm::findDevirtualizableCallsForTypeTest(
void llvm::findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<DevirtCallSite> &DevirtCalls,
SmallVectorImpl<Instruction *> &LoadedPtrs,
- SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, CallInst *CI) {
+ SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
+ const CallInst *CI) {
assert(CI->getCalledFunction()->getIntrinsicID() ==
Intrinsic::type_checked_load);
@@ -96,7 +97,7 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
return;
}
- for (Use &U : CI->uses()) {
+ for (const Use &U : CI->uses()) {
auto CIU = U.getUser();
if (auto EVI = dyn_cast<ExtractValueInst>(CIU)) {
if (EVI->getNumIndices() == 1 && EVI->getIndices()[0] == 0) {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b79370baad10..d871e83f222a 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/ConstantRange.h"
@@ -76,6 +77,9 @@ struct Query {
AssumptionCache *AC;
const Instruction *CxtI;
const DominatorTree *DT;
+ // Unlike the other analyses, this may be a nullptr because not all clients
+ // provide it currently.
+ OptimizationRemarkEmitter *ORE;
/// Set of assumptions that should be excluded from further queries.
/// This is because of the potential for mutual recursion to cause
@@ -90,11 +94,12 @@ struct Query {
unsigned NumExcluded;
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT)
- : DL(DL), AC(AC), CxtI(CxtI), DT(DT), NumExcluded(0) {}
+ const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr)
+ : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {}
Query(const Query &Q, const Value *NewExcl)
- : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), NumExcluded(Q.NumExcluded) {
+ : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE),
+ NumExcluded(Q.NumExcluded) {
Excluded = Q.Excluded;
Excluded[NumExcluded++] = NewExcl;
assert(NumExcluded <= Excluded.size());
@@ -131,9 +136,10 @@ static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT) {
+ const DominatorTree *DT,
+ OptimizationRemarkEmitter *ORE) {
::computeKnownBits(V, KnownZero, KnownOne, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT));
+ Query(DL, AC, safeCxtI(V, CxtI), DT, ORE));
}
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
@@ -249,30 +255,6 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
APInt &KnownZero, APInt &KnownOne,
APInt &KnownZero2, APInt &KnownOne2,
unsigned Depth, const Query &Q) {
- if (!Add) {
- if (const ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (!CLHS->getValue().isNegative()) {
- unsigned BitWidth = KnownZero.getBitWidth();
- unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q);
-
- // If all of the MaskV bits are known to be zero, then we know the
- // output top bits are zero, because we now know that the output is
- // from [0-C].
- if ((KnownZero2 & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
- // Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
- }
- }
- }
- }
-
unsigned BitWidth = KnownZero.getBitWidth();
// If an initial sequence of bits in the result is not needed, the
@@ -282,11 +264,11 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
computeKnownBits(Op1, KnownZero2, KnownOne2, Depth + 1, Q);
// Carry in a 1 for a subtract, rather than a 0.
- APInt CarryIn(BitWidth, 0);
+ uint64_t CarryIn = 0;
if (!Add) {
// Sum = LHS + ~RHS + 1
std::swap(KnownZero2, KnownOne2);
- CarryIn.setBit(0);
+ CarryIn = 1;
}
APInt PossibleSumZero = ~LHSKnownZero + ~KnownZero2 + CarryIn;
@@ -315,11 +297,11 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
// Adding two non-negative numbers, or subtracting a negative number from
// a non-negative one, can't wrap into negative.
if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
- KnownZero |= APInt::getSignBit(BitWidth);
+ KnownZero.setSignBit();
// Adding two negative numbers, or subtracting a non-negative number from
// a negative one, can't wrap into non-negative.
else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
- KnownOne |= APInt::getSignBit(BitWidth);
+ KnownOne.setSignBit();
}
}
}
@@ -370,8 +352,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
TrailZ = std::min(TrailZ, BitWidth);
LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero.clearAllBits();
+ KnownZero.setLowBits(TrailZ);
+ KnownZero.setHighBits(LeadZ);
// Only make use of no-wrap flags if we failed to compute the sign bit
// directly. This matters if the multiplication always overflows, in
@@ -379,9 +362,9 @@ static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
// though as the program is invoking undefined behaviour we can choose
// whatever we like here.
if (isKnownNonNegative && !KnownOne.isNegative())
- KnownZero.setBit(BitWidth - 1);
+ KnownZero.setSignBit();
else if (isKnownNegative && !KnownZero.isNegative())
- KnownOne.setBit(BitWidth - 1);
+ KnownOne.setSignBit();
}
void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
@@ -553,6 +536,13 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
KnownOne.setAllBits();
return;
}
+ if (match(Arg, m_Not(m_Specific(V))) &&
+ isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
+ assert(BitWidth == 1 && "assume operand is not i1?");
+ KnownZero.setAllBits();
+ KnownOne.clearAllBits();
+ return;
+ }
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth == MaxDepth)
@@ -719,7 +709,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
if (RHSKnownZero.isNegative()) {
// We know that the sign bit is zero.
- KnownZero |= APInt::getSignBit(BitWidth);
+ KnownZero.setSignBit();
}
// assume(v >_s c) where c is at least -1.
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -730,7 +720,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
if (RHSKnownOne.isAllOnesValue() || RHSKnownZero.isNegative()) {
// We know that the sign bit is zero.
- KnownZero |= APInt::getSignBit(BitWidth);
+ KnownZero.setSignBit();
}
// assume(v <=_s c) where c is negative
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -741,7 +731,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
if (RHSKnownOne.isNegative()) {
// We know that the sign bit is one.
- KnownOne |= APInt::getSignBit(BitWidth);
+ KnownOne.setSignBit();
}
// assume(v <_s c) where c is non-positive
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -752,7 +742,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
if (RHSKnownZero.isAllOnesValue() || RHSKnownOne.isNegative()) {
// We know that the sign bit is one.
- KnownOne |= APInt::getSignBit(BitWidth);
+ KnownOne.setSignBit();
}
// assume(v <=_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
@@ -762,8 +752,7 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
computeKnownBits(A, RHSKnownZero, RHSKnownOne, Depth+1, Query(Q, I));
// Whatever high bits in c are zero are known to be zero.
- KnownZero |=
- APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+ KnownZero.setHighBits(RHSKnownZero.countLeadingOnes());
// assume(v <_u c)
} else if (match(Arg, m_ICmp(Pred, m_V, m_Value(A))) &&
Pred == ICmpInst::ICMP_ULT &&
@@ -774,11 +763,27 @@ static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
// Whatever high bits in c are zero are known to be zero (if c is a power
// of 2, then one more).
if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, Query(Q, I)))
- KnownZero |=
- APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes()+1);
+ KnownZero.setHighBits(RHSKnownZero.countLeadingOnes()+1);
else
- KnownZero |=
- APInt::getHighBitsSet(BitWidth, RHSKnownZero.countLeadingOnes());
+ KnownZero.setHighBits(RHSKnownZero.countLeadingOnes());
+ }
+ }
+
+ // If assumptions conflict with each other or previous known bits, then we
+ // have a logical fallacy. It's possible that the assumption is not reachable,
+ // so this isn't a real bug. On the other hand, the program may have undefined
+ // behavior, or we might have a bug in the compiler. We can't assert/crash, so
+ // clear out the known bits, try to warn the user, and hope for the best.
+ if ((KnownZero & KnownOne) != 0) {
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+
+ if (Q.ORE) {
+ auto *CxtI = const_cast<Instruction *>(Q.CxtI);
+ OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI);
+ Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may "
+ "have undefined behavior, or compiler may have "
+ "internal error.");
}
}
}
@@ -817,6 +822,14 @@ static void computeKnownBitsFromShiftOperator(
computeKnownBits(I->getOperand(1), KnownZero, KnownOne, Depth + 1, Q);
+ // If the shift amount could be greater than or equal to the bit-width of the LHS, the
+ // value could be undef, so we don't know anything about it.
+ if ((~KnownZero).uge(BitWidth)) {
+ KnownZero.clearAllBits();
+ KnownOne.clearAllBits();
+ return;
+ }
+
// Note: We cannot use KnownZero.getLimitedValue() here, because if
// BitWidth > 64 and any upper bits are known, we'll end up returning the
// limit value (which implies all bits are known).
@@ -905,14 +918,15 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// TODO: This could be generalized to clearing any bit set in y where the
// following bit is known to be unset in y.
Value *Y = nullptr;
- if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
- m_Value(Y))) ||
- match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
- m_Value(Y)))) {
- APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0);
- computeKnownBits(Y, KnownZero3, KnownOne3, Depth + 1, Q);
- if (KnownOne3.countTrailingOnes() > 0)
- KnownZero |= APInt::getLowBitsSet(BitWidth, 1);
+ if (!KnownZero[0] && !KnownOne[0] &&
+ (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
+ m_Value(Y))) ||
+ match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
+ m_Value(Y))))) {
+ KnownZero2.clearAllBits(); KnownOne2.clearAllBits();
+ computeKnownBits(Y, KnownZero2, KnownOne2, Depth + 1, Q);
+ if (KnownOne2.countTrailingOnes() > 0)
+ KnownZero.setBit(0);
}
break;
}
@@ -934,7 +948,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
- KnownZero = KnownZeroOut;
+ KnownZero = std::move(KnownZeroOut);
break;
}
case Instruction::Mul: {
@@ -958,15 +972,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero.setHighBits(LeadZ);
break;
}
case Instruction::Select: {
- computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
- computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
-
- const Value *LHS;
- const Value *RHS;
+ const Value *LHS, *RHS;
SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
if (SelectPatternResult::isMinOrMax(SPF)) {
computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q);
@@ -980,23 +990,23 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
unsigned MaxHighZeros = 0;
if (SPF == SPF_SMAX) {
// If both sides are negative, the result is negative.
- if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1])
+ if (KnownOne.isNegative() && KnownOne2.isNegative())
// We can derive a lower bound on the result by taking the max of the
// leading one bits.
MaxHighOnes =
std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
// If either side is non-negative, the result is non-negative.
- else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1])
+ else if (KnownZero.isNegative() || KnownZero2.isNegative())
MaxHighZeros = 1;
} else if (SPF == SPF_SMIN) {
// If both sides are non-negative, the result is non-negative.
- if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1])
+ if (KnownZero.isNegative() && KnownZero2.isNegative())
// We can derive an upper bound on the result by taking the max of the
// leading zero bits.
MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
// If either side is negative, the result is negative.
- else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1])
+ else if (KnownOne.isNegative() || KnownOne2.isNegative())
MaxHighOnes = 1;
} else if (SPF == SPF_UMAX) {
// We can derive a lower bound on the result by taking the max of the
@@ -1014,9 +1024,9 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
if (MaxHighOnes > 0)
- KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes);
+ KnownOne.setHighBits(MaxHighOnes);
if (MaxHighZeros > 0)
- KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros);
+ KnownZero.setHighBits(MaxHighZeros);
break;
}
case Instruction::FPTrunc:
@@ -1047,7 +1057,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
if (BitWidth > SrcBitWidth)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ KnownZero.setBitsFrom(SrcBitWidth);
break;
}
case Instruction::BitCast: {
@@ -1068,35 +1078,29 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
- KnownZero = KnownZero.zext(BitWidth);
- KnownOne = KnownOne.zext(BitWidth);
-
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
- if (KnownZero[SrcBitWidth-1]) // Input sign bit known zero
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
- KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ KnownZero = KnownZero.sext(BitWidth);
+ KnownOne = KnownOne.sext(BitWidth);
break;
}
case Instruction::Shl: {
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
- auto KZF = [BitWidth, NSW](const APInt &KnownZero, unsigned ShiftAmt) {
- APInt KZResult =
- (KnownZero << ShiftAmt) |
- APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+ auto KZF = [NSW](const APInt &KnownZero, unsigned ShiftAmt) {
+ APInt KZResult = KnownZero << ShiftAmt;
+ KZResult.setLowBits(ShiftAmt); // Low bits known 0.
// If this shift has "nsw" keyword, then the result is either a poison
// value or has the same sign bit as the first operand.
if (NSW && KnownZero.isNegative())
- KZResult.setBit(BitWidth - 1);
+ KZResult.setSignBit();
return KZResult;
};
- auto KOF = [BitWidth, NSW](const APInt &KnownOne, unsigned ShiftAmt) {
+ auto KOF = [NSW](const APInt &KnownOne, unsigned ShiftAmt) {
APInt KOResult = KnownOne << ShiftAmt;
if (NSW && KnownOne.isNegative())
- KOResult.setBit(BitWidth - 1);
+ KOResult.setSignBit();
return KOResult;
};
@@ -1108,13 +1112,13 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
case Instruction::LShr: {
// (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
- return APIntOps::lshr(KnownZero, ShiftAmt) |
+ return KnownZero.lshr(ShiftAmt) |
// High bits known zero.
APInt::getHighBitsSet(BitWidth, ShiftAmt);
};
- auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
- return APIntOps::lshr(KnownOne, ShiftAmt);
+ auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
+ return KnownOne.lshr(ShiftAmt);
};
computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
@@ -1124,12 +1128,12 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
}
case Instruction::AShr: {
// (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
- auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
- return APIntOps::ashr(KnownZero, ShiftAmt);
+ auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) {
+ return KnownZero.ashr(ShiftAmt);
};
- auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
- return APIntOps::ashr(KnownOne, ShiftAmt);
+ auto KOF = [](const APInt &KnownOne, unsigned ShiftAmt) {
+ return KnownOne.ashr(ShiftAmt);
};
computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
@@ -1165,12 +1169,12 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
- if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+ if (KnownZero2.isNegative() || ((KnownZero2 & LowBits) == LowBits))
KnownZero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
- if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+ if (KnownOne2.isNegative() && ((KnownOne2 & LowBits) != 0))
KnownOne |= ~LowBits;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
@@ -1185,7 +1189,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
Q);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
- KnownZero.setBit(BitWidth - 1);
+ KnownZero.setSignBit();
}
break;
@@ -1209,7 +1213,8 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ KnownZero.clearAllBits();
+ KnownZero.setHighBits(Leaders);
break;
}
@@ -1220,7 +1225,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
if (Align > 0)
- KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+ KnownZero.setLowBits(countTrailingZeros(Align));
break;
}
case Instruction::GetElementPtr: {
@@ -1267,7 +1272,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
}
}
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
+ KnownZero.setLowBits(TrailZ);
break;
}
case Instruction::PHI: {
@@ -1308,9 +1313,8 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q);
- KnownZero = APInt::getLowBitsSet(
- BitWidth, std::min(KnownZero2.countTrailingOnes(),
- KnownZero3.countTrailingOnes()));
+ KnownZero.setLowBits(std::min(KnownZero2.countTrailingOnes(),
+ KnownZero3.countTrailingOnes()));
if (DontImproveNonNegativePhiBits)
break;
@@ -1328,24 +1332,24 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// (add negative, negative) --> negative
if (Opcode == Instruction::Add) {
if (KnownZero2.isNegative() && KnownZero3.isNegative())
- KnownZero.setBit(BitWidth - 1);
+ KnownZero.setSignBit();
else if (KnownOne2.isNegative() && KnownOne3.isNegative())
- KnownOne.setBit(BitWidth - 1);
+ KnownOne.setSignBit();
}
// (sub nsw non-negative, negative) --> non-negative
// (sub nsw negative, non-negative) --> negative
else if (Opcode == Instruction::Sub && LL == I) {
if (KnownZero2.isNegative() && KnownOne3.isNegative())
- KnownZero.setBit(BitWidth - 1);
+ KnownZero.setSignBit();
else if (KnownOne2.isNegative() && KnownZero3.isNegative())
- KnownOne.setBit(BitWidth - 1);
+ KnownOne.setSignBit();
}
// (mul nsw non-negative, non-negative) --> non-negative
else if (Opcode == Instruction::Mul && KnownZero2.isNegative() &&
KnownZero3.isNegative())
- KnownZero.setBit(BitWidth - 1);
+ KnownZero.setSignBit();
}
break;
@@ -1364,8 +1368,8 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
break;
- KnownZero = APInt::getAllOnesValue(BitWidth);
- KnownOne = APInt::getAllOnesValue(BitWidth);
+ KnownZero.setAllBits();
+ KnownOne.setAllBits();
for (Value *IncValue : P->incoming_values()) {
// Skip direct self references.
if (IncValue == P) continue;
@@ -1400,6 +1404,11 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::bitreverse:
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+ KnownZero |= KnownZero2.reverseBits();
+ KnownOne |= KnownOne2.reverseBits();
+ break;
case Intrinsic::bswap:
computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
KnownZero |= KnownZero2.byteSwap();
@@ -1411,7 +1420,7 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// If this call is undefined for 0, the result will be less than 2^n.
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
LowBits -= 1;
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownZero.setBitsFrom(LowBits);
break;
}
case Intrinsic::ctpop: {
@@ -1419,17 +1428,14 @@ static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
// We can bound the space the count needs. Also, bits known to be zero
// can't contribute to the population.
unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
- unsigned LeadingZeros =
- APInt(BitWidth, BitsPossiblySet).countLeadingZeros();
- assert(LeadingZeros <= BitWidth);
- KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros);
- KnownOne &= ~KnownZero;
+ unsigned LowBits = Log2_32(BitsPossiblySet)+1;
+ KnownZero.setBitsFrom(LowBits);
// TODO: we could bound KnownOne using the lower bound on the number
// of bits which might be set provided by popcnt KnownOne2.
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
- KnownZero |= APInt::getHighBitsSet(64, 32);
+ KnownZero.setBitsFrom(32);
break;
}
}
@@ -1502,6 +1508,7 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
"V, KnownOne and KnownZero should have same BitWidth");
+ (void)BitWidth;
const APInt *C;
if (match(V, m_APInt(C))) {
@@ -1513,7 +1520,7 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
// Null and aggregate-zero are all-zeros.
if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
KnownOne.clearAllBits();
- KnownZero = APInt::getAllOnesValue(BitWidth);
+ KnownZero.setAllBits();
return;
}
// Handle a constant vector by taking the intersection of the known bits of
@@ -1582,7 +1589,7 @@ void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
if (V->getType()->isPointerTy()) {
unsigned Align = V->getPointerAlignment(Q.DL);
if (Align)
- KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+ KnownZero.setLowBits(countTrailingZeros(Align));
}
// computeKnownBitsFromAssume strictly refines KnownZero and
@@ -1607,8 +1614,8 @@ void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
computeKnownBits(V, ZeroBits, OneBits, Depth, Q);
- KnownOne = OneBits[BitWidth - 1];
- KnownZero = ZeroBits[BitWidth - 1];
+ KnownOne = OneBits.isNegative();
+ KnownZero = ZeroBits.isNegative();
}
/// Return true if the given value is known to have exactly one
@@ -1788,10 +1795,12 @@ static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value)
return true;
}
-/// Return true if the given value is known to be non-zero when defined.
-/// For vectors return true if every element is known to be non-zero when
-/// defined. Supports values with integer or pointer type and vectors of
-/// integers.
+/// Return true if the given value is known to be non-zero when defined. For
+/// vectors, return true if every element is known to be non-zero when
+/// defined. For pointers, if the context instruction and dominator tree are
+/// specified, perform context-sensitive analysis and return true if the
+/// pointer couldn't possibly be null at the specified instruction.
+/// Supports values with integer or pointer type and vectors of integers.
bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
if (auto *C = dyn_cast<Constant>(V)) {
if (C->isNullValue())
@@ -1834,7 +1843,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
// Check for pointer simplifications.
if (V->getType()->isPointerTy()) {
- if (isKnownNonNull(V))
+ if (isKnownNonNullAt(V, Q.CxtI, Q.DT))
return true;
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, Depth, Q))
@@ -2075,13 +2084,29 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V,
return MinSignBits;
}
+static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
+ const Query &Q);
+
+static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
+ const Query &Q) {
+ unsigned Result = ComputeNumSignBitsImpl(V, Depth, Q);
+ assert(Result > 0 && "At least one sign bit needs to be present!");
+ return Result;
+}
+
/// Return the number of times the sign bit of the register is replicated into
/// the other bits. We know that at least 1 bit is always equal to the sign bit
/// (itself), but other cases can give us information. For example, immediately
/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
/// other, so we return 3. For vectors, return the number of sign bits for the
/// vector element with the mininum number of known sign bits.
-unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
+static unsigned ComputeNumSignBitsImpl(const Value *V, unsigned Depth,
+ const Query &Q) {
+
+ // We return the minimum number of sign bits that are guaranteed to be present
+ // in V, so for undef we have to conservatively return 1. We don't have the
+ // same behavior for poison though -- that's a FIXME today.
+
unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -2157,7 +2182,10 @@ unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
// ashr X, C -> adds C sign bits. Vectors too.
const APInt *ShAmt;
if (match(U->getOperand(1), m_APInt(ShAmt))) {
- Tmp += ShAmt->getZExtValue();
+ unsigned ShAmtLimited = ShAmt->getZExtValue();
+ if (ShAmtLimited >= TyBits)
+ break; // Bad shift.
+ Tmp += ShAmtLimited;
if (Tmp > TyBits) Tmp = TyBits;
}
return Tmp;
@@ -2436,7 +2464,7 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
if (!TLI)
return Intrinsic::not_intrinsic;
- LibFunc::Func Func;
+ LibFunc Func;
// We're going to make assumptions on the semantics of the functions, check
// that the target knows that it's available in this environment and it does
// not have local linkage.
@@ -2451,81 +2479,81 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS,
switch (Func) {
default:
break;
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
return Intrinsic::sin;
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
return Intrinsic::cos;
- case LibFunc::exp:
- case LibFunc::expf:
- case LibFunc::expl:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
return Intrinsic::exp;
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
return Intrinsic::exp2;
- case LibFunc::log:
- case LibFunc::logf:
- case LibFunc::logl:
+ case LibFunc_log:
+ case LibFunc_logf:
+ case LibFunc_logl:
return Intrinsic::log;
- case LibFunc::log10:
- case LibFunc::log10f:
- case LibFunc::log10l:
+ case LibFunc_log10:
+ case LibFunc_log10f:
+ case LibFunc_log10l:
return Intrinsic::log10;
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
return Intrinsic::log2;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
return Intrinsic::fabs;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
return Intrinsic::minnum;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
return Intrinsic::maxnum;
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
return Intrinsic::copysign;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
return Intrinsic::floor;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
return Intrinsic::ceil;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
return Intrinsic::trunc;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
return Intrinsic::rint;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
return Intrinsic::nearbyint;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
return Intrinsic::round;
- case LibFunc::pow:
- case LibFunc::powf:
- case LibFunc::powl:
+ case LibFunc_pow:
+ case LibFunc_powf:
+ case LibFunc_powl:
return Intrinsic::pow;
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
if (ICS->hasNoNaNs())
return Intrinsic::sqrt;
return Intrinsic::not_intrinsic;
@@ -2590,6 +2618,11 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
const TargetLibraryInfo *TLI,
bool SignBitOnly,
unsigned Depth) {
+ // TODO: This function does not do the right thing when SignBitOnly is true
+ // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
+ // which flips the sign bits of NaNs. See
+ // https://llvm.org/bugs/show_bug.cgi?id=31702.
+
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
return !CFP->getValueAPF().isNegative() ||
(!SignBitOnly && CFP->getValueAPF().isZero());
@@ -2633,7 +2666,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
Depth + 1);
case Instruction::Call:
- Intrinsic::ID IID = getIntrinsicForCallSite(cast<CallInst>(I), TLI);
+ const auto *CI = cast<CallInst>(I);
+ Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
switch (IID) {
default:
break;
@@ -2650,16 +2684,37 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::fabs:
- case Intrinsic::sqrt:
return true;
+
+ case Intrinsic::sqrt:
+ // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0.
+ if (!SignBitOnly)
+ return true;
+ return CI->hasNoNaNs() && (CI->hasNoSignedZeros() ||
+ CannotBeNegativeZero(CI->getOperand(0), TLI));
+
case Intrinsic::powi:
- if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
// powi(x,n) is non-negative if n is even.
- if (CI->getBitWidth() <= 64 && CI->getSExtValue() % 2u == 0)
+ if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
return true;
}
+ // TODO: This is not correct. Given that exp is an integer, here are the
+ // ways that pow can return a negative value:
+ //
+ // pow(x, exp) --> negative if exp is odd and x is negative.
+ // pow(-0, exp) --> -inf if exp is negative odd.
+ // pow(-0, exp) --> -0 if exp is positive odd.
+ // pow(-inf, exp) --> -0 if exp is negative odd.
+ // pow(-inf, exp) --> -inf if exp is positive odd.
+ //
+ // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
+ // but we must return false if x == -0. Unfortunately we do not currently
+ // have a way of expressing this constraint. See details in
+ // https://llvm.org/bugs/show_bug.cgi?id=31702.
return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
Depth + 1);
+
case Intrinsic::fma:
case Intrinsic::fmuladd:
// x*x+y is non-negative if y is non-negative.
@@ -3150,6 +3205,9 @@ Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL,
if (GA->isInterposable())
return V;
V = GA->getAliasee();
+ } else if (isa<AllocaInst>(V)) {
+ // An alloca can't be further simplified.
+ return V;
} else {
if (auto CS = CallSite(V))
if (Value *RV = CS.getReturnedArgOperand()) {
@@ -3327,6 +3385,10 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
case Intrinsic::rint:
case Intrinsic::round:
return true;
+ // These intrinsics do not correspond to any libm function, and
+ // do not set errno.
+ case Intrinsic::powi:
+ return true;
// TODO: are convert_{from,to}_fp16 safe?
// TODO: can we list target-specific intrinsics here?
default: break;
@@ -3406,6 +3468,16 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
if (NumUsesExplored >= DomConditionsMaxUses)
break;
NumUsesExplored++;
+
+ // If the value is used as an argument to a call or invoke, then argument
+ // attributes may provide an answer about null-ness.
+ if (auto CS = ImmutableCallSite(U))
+ if (auto *CalledFunc = CS.getCalledFunction())
+ for (const Argument &Arg : CalledFunc->args())
+ if (CS.getArgOperand(Arg.getArgNo()) == V &&
+ Arg.hasNonNullAttr() && DT->dominates(CS.getInstruction(), CtxI))
+ return true;
+
// Consider only compare instructions uniquely controlling a branch
CmpInst::Predicate Pred;
if (!match(const_cast<User *>(U),
@@ -3683,6 +3755,8 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
return false;
if (isa<ReturnInst>(I))
return false;
+ if (isa<UnreachableInst>(I))
+ return false;
// Calls can throw, or contain an infinite loop, or kill the process.
if (auto CS = ImmutableCallSite(I)) {
@@ -3731,79 +3805,33 @@ bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
bool llvm::propagatesFullPoison(const Instruction *I) {
switch (I->getOpcode()) {
- case Instruction::Add:
- case Instruction::Sub:
- case Instruction::Xor:
- case Instruction::Trunc:
- case Instruction::BitCast:
- case Instruction::AddrSpaceCast:
- // These operations all propagate poison unconditionally. Note that poison
- // is not any particular value, so xor or subtraction of poison with
- // itself still yields poison, not zero.
- return true;
-
- case Instruction::AShr:
- case Instruction::SExt:
- // For these operations, one bit of the input is replicated across
- // multiple output bits. A replicated poison bit is still poison.
- return true;
-
- case Instruction::Shl: {
- // Left shift *by* a poison value is poison. The number of
- // positions to shift is unsigned, so no negative values are
- // possible there. Left shift by zero places preserves poison. So
- // it only remains to consider left shift of poison by a positive
- // number of places.
- //
- // A left shift by a positive number of places leaves the lowest order bit
- // non-poisoned. However, if such a shift has a no-wrap flag, then we can
- // make the poison operand violate that flag, yielding a fresh full-poison
- // value.
- auto *OBO = cast<OverflowingBinaryOperator>(I);
- return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
- }
-
- case Instruction::Mul: {
- // A multiplication by zero yields a non-poison zero result, so we need to
- // rule out zero as an operand. Conservatively, multiplication by a
- // non-zero constant is not multiplication by zero.
- //
- // Multiplication by a non-zero constant can leave some bits
- // non-poisoned. For example, a multiplication by 2 leaves the lowest
- // order bit unpoisoned. So we need to consider that.
- //
- // Multiplication by 1 preserves poison. If the multiplication has a
- // no-wrap flag, then we can make the poison operand violate that flag
- // when multiplied by any integer other than 0 and 1.
- auto *OBO = cast<OverflowingBinaryOperator>(I);
- if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
- for (Value *V : OBO->operands()) {
- if (auto *CI = dyn_cast<ConstantInt>(V)) {
- // A ConstantInt cannot yield poison, so we can assume that it is
- // the other operand that is poison.
- return !CI->isZero();
- }
- }
- }
- return false;
- }
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Xor:
+ case Instruction::Trunc:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::GetElementPtr:
+ // These operations all propagate poison unconditionally. Note that poison
+ // is not any particular value, so xor or subtraction of poison with
+ // itself still yields poison, not zero.
+ return true;
- case Instruction::ICmp:
- // Comparing poison with any value yields poison. This is why, for
- // instance, x s< (x +nsw 1) can be folded to true.
- return true;
+ case Instruction::AShr:
+ case Instruction::SExt:
+ // For these operations, one bit of the input is replicated across
+ // multiple output bits. A replicated poison bit is still poison.
+ return true;
- case Instruction::GetElementPtr:
- // A GEP implicitly represents a sequence of additions, subtractions,
- // truncations, sign extensions and multiplications. The multiplications
- // are by the non-zero sizes of some set of types, so we do not have to be
- // concerned with multiplication by zero. If the GEP is in-bounds, then
- // these operations are implicitly no-signed-wrap so poison is propagated
- // by the arguments above for Add, Sub, Trunc, SExt and Mul.
- return cast<GEPOperator>(I)->isInBounds();
+ case Instruction::ICmp:
+ // Comparing poison with any value yields poison. This is why, for
+ // instance, x s< (x +nsw 1) can be folded to true.
+ return true;
- default:
- return false;
+ default:
+ return false;
}
}
@@ -3906,6 +3934,37 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
Value *CmpLHS, Value *CmpRHS,
Value *TrueVal, Value *FalseVal,
Value *&LHS, Value *&RHS) {
+ // Assume success. If there's no match, callers should not use these anyway.
+ LHS = TrueVal;
+ RHS = FalseVal;
+
+ // Recognize variations of:
+ // CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
+ const APInt *C1;
+ if (CmpRHS == TrueVal && match(CmpRHS, m_APInt(C1))) {
+ const APInt *C2;
+
+ // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
+ if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
+ C1->slt(*C2) && Pred == CmpInst::ICMP_SLT)
+ return {SPF_SMAX, SPNB_NA, false};
+
+ // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
+ if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
+ C1->sgt(*C2) && Pred == CmpInst::ICMP_SGT)
+ return {SPF_SMIN, SPNB_NA, false};
+
+ // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
+ if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
+ C1->ult(*C2) && Pred == CmpInst::ICMP_ULT)
+ return {SPF_UMAX, SPNB_NA, false};
+
+ // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
+ if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
+ C1->ugt(*C2) && Pred == CmpInst::ICMP_UGT)
+ return {SPF_UMIN, SPNB_NA, false};
+ }
+
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -3913,23 +3972,16 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
// (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
// (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
if (match(TrueVal, m_Zero()) &&
- match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
- LHS = TrueVal;
- RHS = FalseVal;
+ match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
- }
// Z = X -nsw Y
// (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
// (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
if (match(FalseVal, m_Zero()) &&
- match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
- LHS = TrueVal;
- RHS = FalseVal;
+ match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS))))
return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
- }
- const APInt *C1;
if (!match(CmpRHS, m_APInt(C1)))
return {SPF_UNKNOWN, SPNB_NA, false};
@@ -3940,41 +3992,29 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
// Is the sign bit set?
// (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
// (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
- if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) {
- LHS = TrueVal;
- RHS = FalseVal;
+ if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue())
return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
- }
// Is the sign bit clear?
// (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
// (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
- C2->isMinSignedValue()) {
- LHS = TrueVal;
- RHS = FalseVal;
+ C2->isMinSignedValue())
return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
- }
}
// Look through 'not' ops to find disguised signed min/max.
// (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
// (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
- match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) {
- LHS = TrueVal;
- RHS = FalseVal;
+ match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2)
return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
- }
// (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
// (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
- match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) {
- LHS = TrueVal;
- RHS = FalseVal;
+ match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2)
return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
- }
return {SPF_UNKNOWN, SPNB_NA, false};
}
@@ -4101,58 +4141,64 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
Instruction::CastOps *CastOp) {
- CastInst *CI = dyn_cast<CastInst>(V1);
- Constant *C = dyn_cast<Constant>(V2);
- if (!CI)
+ auto *Cast1 = dyn_cast<CastInst>(V1);
+ if (!Cast1)
return nullptr;
- *CastOp = CI->getOpcode();
-
- if (auto *CI2 = dyn_cast<CastInst>(V2)) {
- // If V1 and V2 are both the same cast from the same type, we can look
- // through V1.
- if (CI2->getOpcode() == CI->getOpcode() &&
- CI2->getSrcTy() == CI->getSrcTy())
- return CI2->getOperand(0);
- return nullptr;
- } else if (!C) {
+
+ *CastOp = Cast1->getOpcode();
+ Type *SrcTy = Cast1->getSrcTy();
+ if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
+ // If V1 and V2 are both the same cast from the same type, look through V1.
+ if (*CastOp == Cast2->getOpcode() && SrcTy == Cast2->getSrcTy())
+ return Cast2->getOperand(0);
return nullptr;
}
- Constant *CastedTo = nullptr;
-
- if (isa<ZExtInst>(CI) && CmpI->isUnsigned())
- CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy());
-
- if (isa<SExtInst>(CI) && CmpI->isSigned())
- CastedTo = ConstantExpr::getTrunc(C, CI->getSrcTy(), true);
-
- if (isa<TruncInst>(CI))
- CastedTo = ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
-
- if (isa<FPTruncInst>(CI))
- CastedTo = ConstantExpr::getFPExtend(C, CI->getSrcTy(), true);
-
- if (isa<FPExtInst>(CI))
- CastedTo = ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true);
-
- if (isa<FPToUIInst>(CI))
- CastedTo = ConstantExpr::getUIToFP(C, CI->getSrcTy(), true);
-
- if (isa<FPToSIInst>(CI))
- CastedTo = ConstantExpr::getSIToFP(C, CI->getSrcTy(), true);
-
- if (isa<UIToFPInst>(CI))
- CastedTo = ConstantExpr::getFPToUI(C, CI->getSrcTy(), true);
+ auto *C = dyn_cast<Constant>(V2);
+ if (!C)
+ return nullptr;
- if (isa<SIToFPInst>(CI))
- CastedTo = ConstantExpr::getFPToSI(C, CI->getSrcTy(), true);
+ Constant *CastedTo = nullptr;
+ switch (*CastOp) {
+ case Instruction::ZExt:
+ if (CmpI->isUnsigned())
+ CastedTo = ConstantExpr::getTrunc(C, SrcTy);
+ break;
+ case Instruction::SExt:
+ if (CmpI->isSigned())
+ CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
+ break;
+ case Instruction::Trunc:
+ CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned());
+ break;
+ case Instruction::FPTrunc:
+ CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true);
+ break;
+ case Instruction::FPExt:
+ CastedTo = ConstantExpr::getFPTrunc(C, SrcTy, true);
+ break;
+ case Instruction::FPToUI:
+ CastedTo = ConstantExpr::getUIToFP(C, SrcTy, true);
+ break;
+ case Instruction::FPToSI:
+ CastedTo = ConstantExpr::getSIToFP(C, SrcTy, true);
+ break;
+ case Instruction::UIToFP:
+ CastedTo = ConstantExpr::getFPToUI(C, SrcTy, true);
+ break;
+ case Instruction::SIToFP:
+ CastedTo = ConstantExpr::getFPToSI(C, SrcTy, true);
+ break;
+ default:
+ break;
+ }
if (!CastedTo)
return nullptr;
- Constant *CastedBack =
- ConstantExpr::getCast(CI->getOpcode(), CastedTo, C->getType(), true);
// Make sure the cast doesn't lose any information.
+ Constant *CastedBack =
+ ConstantExpr::getCast(*CastOp, CastedTo, C->getType(), true);
if (CastedBack != C)
return nullptr;
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 7e598f435ff5..722f17a8067e 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -488,3 +488,88 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
return Inst;
}
+
+Constant *llvm::createInterleaveMask(IRBuilder<> &Builder, unsigned VF,
+ unsigned NumVecs) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < VF; i++)
+ for (unsigned j = 0; j < NumVecs; j++)
+ Mask.push_back(Builder.getInt32(j * VF + i));
+
+ return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned Stride, unsigned VF) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < VF; i++)
+ Mask.push_back(Builder.getInt32(Start + i * Stride));
+
+ return ConstantVector::get(Mask);
+}
+
+Constant *llvm::createSequentialMask(IRBuilder<> &Builder, unsigned Start,
+ unsigned NumInts, unsigned NumUndefs) {
+ SmallVector<Constant *, 16> Mask;
+ for (unsigned i = 0; i < NumInts; i++)
+ Mask.push_back(Builder.getInt32(Start + i));
+
+ Constant *Undef = UndefValue::get(Builder.getInt32Ty());
+ for (unsigned i = 0; i < NumUndefs; i++)
+ Mask.push_back(Undef);
+
+ return ConstantVector::get(Mask);
+}
+
+/// A helper function for concatenating vectors. This function concatenates two
+/// vectors having the same element type. If the second vector has fewer
+/// elements than the first, it is padded with undefs.
+static Value *concatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
+ Value *V2) {
+ VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
+ VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
+ assert(VecTy1 && VecTy2 &&
+ VecTy1->getScalarType() == VecTy2->getScalarType() &&
+ "Expect two vectors with the same element type");
+
+ unsigned NumElts1 = VecTy1->getNumElements();
+ unsigned NumElts2 = VecTy2->getNumElements();
+ assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
+
+ if (NumElts1 > NumElts2) {
+ // Extend with UNDEFs.
+ Constant *ExtMask =
+ createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
+ V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
+ }
+
+ Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
+ return Builder.CreateShuffleVector(V1, V2, Mask);
+}
+
+Value *llvm::concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs) {
+ unsigned NumVecs = Vecs.size();
+ assert(NumVecs > 1 && "Should be at least two vectors");
+
+ SmallVector<Value *, 8> ResList;
+ ResList.append(Vecs.begin(), Vecs.end());
+ do {
+ SmallVector<Value *, 8> TmpList;
+ for (unsigned i = 0; i < NumVecs - 1; i += 2) {
+ Value *V0 = ResList[i], *V1 = ResList[i + 1];
+ assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
+ "Only the last vector may have a different type");
+
+ TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
+ }
+
+ // Push the last vector if the total number of vectors is odd.
+ if (NumVecs % 2 != 0)
+ TmpList.push_back(ResList[NumVecs - 1]);
+
+ ResList = TmpList;
+ NumVecs = ResList.size();
+ } while (NumVecs > 1);
+
+ return ResList[0];
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 752942fc9fcc..49a8ce4bed0b 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -548,6 +548,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(ninf);
KEYWORD(nsz);
KEYWORD(arcp);
+ KEYWORD(contract);
KEYWORD(fast);
KEYWORD(nuw);
KEYWORD(nsw);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 4cd986e143b6..58ea9296afda 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -130,10 +130,9 @@ bool LLParser::ValidateEndOfModule() {
B.merge(NumberedAttrBuilders[Attr]);
if (Function *Fn = dyn_cast<Function>(V)) {
- AttributeSet AS = Fn->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
- AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
- AS.getFnAttributes());
+ AttributeList AS = Fn->getAttributes();
+ AttrBuilder FnAttrs(AS.getFnAttributes());
+ AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
FnAttrs.merge(B);
@@ -144,32 +143,27 @@ bool LLParser::ValidateEndOfModule() {
FnAttrs.removeAttribute(Attribute::Alignment);
}
- AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
- AttributeSet::get(Context,
- AttributeSet::FunctionIndex,
- FnAttrs));
+ AS = AS.addAttributes(
+ Context, AttributeList::FunctionIndex,
+ AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
Fn->setAttributes(AS);
} else if (CallInst *CI = dyn_cast<CallInst>(V)) {
- AttributeSet AS = CI->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
- AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
- AS.getFnAttributes());
+ AttributeList AS = CI->getAttributes();
+ AttrBuilder FnAttrs(AS.getFnAttributes());
+ AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
- AttributeSet::get(Context,
- AttributeSet::FunctionIndex,
- FnAttrs));
+ AS = AS.addAttributes(
+ Context, AttributeList::FunctionIndex,
+ AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
CI->setAttributes(AS);
} else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
- AttributeSet AS = II->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex);
- AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex,
- AS.getFnAttributes());
+ AttributeList AS = II->getAttributes();
+ AttrBuilder FnAttrs(AS.getFnAttributes());
+ AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeSet::FunctionIndex,
- AttributeSet::get(Context,
- AttributeSet::FunctionIndex,
- FnAttrs));
+ AS = AS.addAttributes(
+ Context, AttributeList::FunctionIndex,
+ AttributeList::get(Context, AttributeList::FunctionIndex, FnAttrs));
II->setAttributes(AS);
} else {
llvm_unreachable("invalid object with forward attribute group reference");
@@ -1855,6 +1849,34 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
return false;
}
+/// ParseOptionalCommaAddrSpace
+/// ::=
+/// ::= ',' addrspace(1)
+///
+/// This returns with AteExtraComma set to true if it ate an excess comma at the
+/// end.
+bool LLParser::ParseOptionalCommaAddrSpace(unsigned &AddrSpace,
+ LocTy &Loc,
+ bool &AteExtraComma) {
+ AteExtraComma = false;
+ while (EatIfPresent(lltok::comma)) {
+ // Metadata at the end is an early exit.
+ if (Lex.getKind() == lltok::MetadataVar) {
+ AteExtraComma = true;
+ return false;
+ }
+
+ Loc = Lex.getLoc();
+ if (Lex.getKind() != lltok::kw_addrspace)
+ return Error(Lex.getLoc(), "expected metadata or 'addrspace'");
+
+ if (ParseOptionalAddrSpace(AddrSpace))
+ return true;
+ }
+
+ return false;
+}
+
bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg,
Optional<unsigned> &HowManyArg) {
Lex.Lex();
@@ -2098,7 +2120,6 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
if (ParseToken(lltok::lparen, "expected '(' in call"))
return true;
- unsigned AttrIndex = 1;
while (Lex.getKind() != lltok::rparen) {
// If this isn't the first argument, we need a comma.
if (!ArgList.empty() &&
@@ -2132,9 +2153,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
if (ParseOptionalParamAttrs(ArgAttrs) || ParseValue(ArgTy, V, PFS))
return true;
}
- ArgList.push_back(ParamInfo(ArgLoc, V, AttributeSet::get(V->getContext(),
- AttrIndex++,
- ArgAttrs)));
+ ArgList.push_back(ParamInfo(
+ ArgLoc, V, AttributeSet::get(V->getContext(), ArgAttrs)));
}
if (IsMustTailCall && InVarArgsFunc)
@@ -2239,9 +2259,8 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
if (!FunctionType::isValidArgumentType(ArgTy))
return Error(TypeLoc, "invalid type for function argument");
- unsigned AttrIndex = 1;
- ArgList.emplace_back(TypeLoc, ArgTy, AttributeSet::get(ArgTy->getContext(),
- AttrIndex++, Attrs),
+ ArgList.emplace_back(TypeLoc, ArgTy,
+ AttributeSet::get(ArgTy->getContext(), Attrs),
std::move(Name));
while (EatIfPresent(lltok::comma)) {
@@ -2268,10 +2287,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
if (!ArgTy->isFirstClassType())
return Error(TypeLoc, "invalid type for function argument");
- ArgList.emplace_back(
- TypeLoc, ArgTy,
- AttributeSet::get(ArgTy->getContext(), AttrIndex++, Attrs),
- std::move(Name));
+ ArgList.emplace_back(TypeLoc, ArgTy,
+ AttributeSet::get(ArgTy->getContext(), Attrs),
+ std::move(Name));
}
}
@@ -2295,7 +2313,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
if (!ArgList[i].Name.empty())
return Error(ArgList[i].Loc, "argument name invalid in function type");
- if (ArgList[i].Attrs.hasAttributes(i + 1))
+ if (ArgList[i].Attrs.hasAttributes())
return Error(ArgList[i].Loc,
"argument attributes invalid in function type");
}
@@ -3908,7 +3926,8 @@ bool LLParser::ParseDIBasicType(MDNode *&Result, bool IsDistinct) {
/// ParseDIDerivedType:
/// ::= !DIDerivedType(tag: DW_TAG_pointer_type, name: "int", file: !0,
/// line: 7, scope: !1, baseType: !2, size: 32,
-/// align: 32, offset: 0, flags: 0, extraData: !3)
+/// align: 32, offset: 0, flags: 0, extraData: !3,
+/// dwarfAddressSpace: 3)
bool LLParser::ParseDIDerivedType(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
@@ -3921,14 +3940,20 @@ bool LLParser::ParseDIDerivedType(MDNode *&Result, bool IsDistinct) {
OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \
OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX)); \
OPTIONAL(flags, DIFlagField, ); \
- OPTIONAL(extraData, MDField, );
+ OPTIONAL(extraData, MDField, ); \
+ OPTIONAL(dwarfAddressSpace, MDUnsignedField, (UINT32_MAX, UINT32_MAX));
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
+ Optional<unsigned> DWARFAddressSpace;
+ if (dwarfAddressSpace.Val != UINT32_MAX)
+ DWARFAddressSpace = dwarfAddressSpace.Val;
+
Result = GET_OR_DISTINCT(DIDerivedType,
(Context, tag.Val, name.Val, file.Val, line.Val,
scope.Val, baseType.Val, size.Val, align.Val,
- offset.Val, flags.Val, extraData.Val));
+ offset.Val, DWARFAddressSpace, flags.Val,
+ extraData.Val));
return false;
}
@@ -4029,7 +4054,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
OPTIONAL(imports, MDField, ); \
OPTIONAL(macros, MDField, ); \
OPTIONAL(dwoId, MDUnsignedField, ); \
- OPTIONAL(splitDebugInlining, MDBoolField, = true);
+ OPTIONAL(splitDebugInlining, MDBoolField, = true); \
+ OPTIONAL(debugInfoForProfiling, MDBoolField, = false);
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4037,7 +4063,7 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val,
runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val,
retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val,
- splitDebugInlining.Val);
+ splitDebugInlining.Val, debugInfoForProfiling.Val);
return false;
}
@@ -4589,6 +4615,9 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
C = cast<Constant>(V);
return false;
}
+ case ValID::t_Null:
+ C = Constant::getNullValue(Ty);
+ return false;
default:
return Error(Loc, "expected a constant value");
}
@@ -4735,25 +4764,14 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
std::vector<Type*> ParamTypeList;
SmallVector<AttributeSet, 8> Attrs;
- if (RetAttrs.hasAttributes())
- Attrs.push_back(AttributeSet::get(RetType->getContext(),
- AttributeSet::ReturnIndex,
- RetAttrs));
-
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
ParamTypeList.push_back(ArgList[i].Ty);
- if (ArgList[i].Attrs.hasAttributes(i + 1)) {
- AttrBuilder B(ArgList[i].Attrs, i + 1);
- Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
- }
+ Attrs.push_back(ArgList[i].Attrs);
}
- if (FuncAttrs.hasAttributes())
- Attrs.push_back(AttributeSet::get(RetType->getContext(),
- AttributeSet::FunctionIndex,
- FuncAttrs));
-
- AttributeSet PAL = AttributeSet::get(Context, Attrs);
+ AttributeList PAL =
+ AttributeList::get(Context, AttributeSet::get(Context, FuncAttrs),
+ AttributeSet::get(Context, RetAttrs), Attrs);
if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy())
return Error(RetTypeLoc, "functions with 'sret' argument must return void");
@@ -5363,13 +5381,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
return true;
// Set up the Attribute for the function.
- SmallVector<AttributeSet, 8> Attrs;
- if (RetAttrs.hasAttributes())
- Attrs.push_back(AttributeSet::get(RetType->getContext(),
- AttributeSet::ReturnIndex,
- RetAttrs));
-
- SmallVector<Value*, 8> Args;
+ SmallVector<Value *, 8> Args;
+ SmallVector<AttributeSet, 8> ArgAttrs;
// Loop through FunctionType's arguments and ensure they are specified
// correctly. Also, gather any parameter attributes.
@@ -5387,26 +5400,19 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
return Error(ArgList[i].Loc, "argument is not of expected type '" +
getTypeString(ExpectedTy) + "'");
Args.push_back(ArgList[i].V);
- if (ArgList[i].Attrs.hasAttributes(i + 1)) {
- AttrBuilder B(ArgList[i].Attrs, i + 1);
- Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
- }
+ ArgAttrs.push_back(ArgList[i].Attrs);
}
if (I != E)
return Error(CallLoc, "not enough parameters specified for call");
- if (FnAttrs.hasAttributes()) {
- if (FnAttrs.hasAlignmentAttr())
- return Error(CallLoc, "invoke instructions may not have an alignment");
-
- Attrs.push_back(AttributeSet::get(RetType->getContext(),
- AttributeSet::FunctionIndex,
- FnAttrs));
- }
+ if (FnAttrs.hasAlignmentAttr())
+ return Error(CallLoc, "invoke instructions may not have an alignment");
// Finish off the Attribute and check them
- AttributeSet PAL = AttributeSet::get(Context, Attrs);
+ AttributeList PAL =
+ AttributeList::get(Context, AttributeSet::get(Context, FnAttrs),
+ AttributeSet::get(Context, RetAttrs), ArgAttrs);
InvokeInst *II =
InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args, BundleList);
@@ -5968,10 +5974,6 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// Set up the Attribute for the function.
SmallVector<AttributeSet, 8> Attrs;
- if (RetAttrs.hasAttributes())
- Attrs.push_back(AttributeSet::get(RetType->getContext(),
- AttributeSet::ReturnIndex,
- RetAttrs));
SmallVector<Value*, 8> Args;
@@ -5991,26 +5993,19 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
return Error(ArgList[i].Loc, "argument is not of expected type '" +
getTypeString(ExpectedTy) + "'");
Args.push_back(ArgList[i].V);
- if (ArgList[i].Attrs.hasAttributes(i + 1)) {
- AttrBuilder B(ArgList[i].Attrs, i + 1);
- Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B));
- }
+ Attrs.push_back(ArgList[i].Attrs);
}
if (I != E)
return Error(CallLoc, "not enough parameters specified for call");
- if (FnAttrs.hasAttributes()) {
- if (FnAttrs.hasAlignmentAttr())
- return Error(CallLoc, "call instructions may not have an alignment");
-
- Attrs.push_back(AttributeSet::get(RetType->getContext(),
- AttributeSet::FunctionIndex,
- FnAttrs));
- }
+ if (FnAttrs.hasAlignmentAttr())
+ return Error(CallLoc, "call instructions may not have an alignment");
// Finish off the Attribute and check them
- AttributeSet PAL = AttributeSet::get(Context, Attrs);
+ AttributeList PAL =
+ AttributeList::get(Context, AttributeSet::get(Context, FnAttrs),
+ AttributeSet::get(Context, RetAttrs), Attrs);
CallInst *CI = CallInst::Create(Ty, Callee, Args, BundleList);
CI->setTailCallKind(TCK);
@@ -6032,8 +6027,9 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
/// (',' 'align' i32)?
int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
Value *Size = nullptr;
- LocTy SizeLoc, TyLoc;
+ LocTy SizeLoc, TyLoc, ASLoc;
unsigned Alignment = 0;
+ unsigned AddrSpace = 0;
Type *Ty = nullptr;
bool IsInAlloca = EatIfPresent(lltok::kw_inalloca);
@@ -6047,12 +6043,21 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
bool AteExtraComma = false;
if (EatIfPresent(lltok::comma)) {
if (Lex.getKind() == lltok::kw_align) {
- if (ParseOptionalAlignment(Alignment)) return true;
+ if (ParseOptionalAlignment(Alignment))
+ return true;
+ if (ParseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma))
+ return true;
+ } else if (Lex.getKind() == lltok::kw_addrspace) {
+ ASLoc = Lex.getLoc();
+ if (ParseOptionalAddrSpace(AddrSpace))
+ return true;
} else if (Lex.getKind() == lltok::MetadataVar) {
AteExtraComma = true;
} else {
if (ParseTypeAndValue(Size, SizeLoc, PFS) ||
- ParseOptionalCommaAlign(Alignment, AteExtraComma))
+ ParseOptionalCommaAlign(Alignment, AteExtraComma) ||
+ (!AteExtraComma &&
+ ParseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma)))
return true;
}
}
@@ -6060,7 +6065,14 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
if (Size && !Size->getType()->isIntegerTy())
return Error(SizeLoc, "element count must have integer type");
- AllocaInst *AI = new AllocaInst(Ty, Size, Alignment);
+ const DataLayout &DL = M->getDataLayout();
+ unsigned AS = DL.getAllocaAddrSpace();
+ if (AS != AddrSpace) {
+ // TODO: In the future it should be possible to specify addrspace per-alloca.
+ return Error(ASLoc, "address space must match datalayout");
+ }
+
+ AllocaInst *AI = new AllocaInst(Ty, AS, Size, Alignment);
AI->setUsedWithInAlloca(IsInAlloca);
AI->setSwiftError(IsSwiftError);
Inst = AI;
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 16d4e8b5baa0..4616c2e86947 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -193,6 +193,10 @@ namespace llvm {
case lltok::kw_ninf: FMF.setNoInfs(); Lex.Lex(); continue;
case lltok::kw_nsz: FMF.setNoSignedZeros(); Lex.Lex(); continue;
case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
+ case lltok::kw_contract:
+ FMF.setAllowContract(true);
+ Lex.Lex();
+ continue;
default: return FMF;
}
return FMF;
@@ -242,6 +246,8 @@ namespace llvm {
bool ParseOrdering(AtomicOrdering &Ordering);
bool ParseOptionalStackAlignment(unsigned &Alignment);
bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
+ bool ParseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
+ bool &AteExtraComma);
bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
bool parseAllocSizeArguments(unsigned &ElemSizeArg,
Optional<unsigned> &HowManyArg);
@@ -393,7 +399,7 @@ namespace llvm {
Value *V;
AttributeSet Attrs;
ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
- : Loc(loc), V(v), Attrs(attrs) {}
+ : Loc(loc), V(v), Attrs(attrs) {}
};
bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
PerFunctionState &PFS,
@@ -447,7 +453,7 @@ namespace llvm {
AttributeSet Attrs;
std::string Name;
ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
- : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
+ : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
};
bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
bool ParseFunctionHeader(Function *&Fn, bool isDefine);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 048aeee90b35..33f8e63daa05 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -98,6 +98,7 @@ enum Kind {
kw_ninf,
kw_nsz,
kw_arcp,
+ kw_contract,
kw_fast,
kw_nuw,
kw_nsw,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index a46e49ccde83..24ab7e9a950c 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -379,6 +379,8 @@ protected:
BitstreamBlockInfo BlockInfo;
BitstreamCursor Stream;
+ Expected<unsigned> parseVersionRecord(ArrayRef<uint64_t> Record);
+
bool readBlockInfo();
// Contains an arbitrary and optional string identifying the bitcode producer
@@ -395,6 +397,16 @@ Error BitcodeReaderBase::error(const Twine &Message) {
return ::error(FullMsg);
}
+Expected<unsigned>
+BitcodeReaderBase::parseVersionRecord(ArrayRef<uint64_t> Record) {
+ if (Record.size() < 1)
+ return error("Invalid record");
+ unsigned ModuleVersion = Record[0];
+ if (ModuleVersion > 1)
+ return error("Invalid value");
+ return ModuleVersion;
+}
+
class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
LLVMContext &Context;
Module *TheModule = nullptr;
@@ -405,6 +417,9 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
bool SeenValueSymbolTable = false;
uint64_t VSTOffset = 0;
+ std::vector<std::string> SectionTable;
+ std::vector<std::string> GCTable;
+
std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
Optional<MetadataLoader> MDLoader;
@@ -419,10 +434,10 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
/// The set of attributes by index. Index zero in the file is for null, and
/// is thus not represented here. As such all indices are off by one.
- std::vector<AttributeSet> MAttributes;
+ std::vector<AttributeList> MAttributes;
/// The set of attribute groups.
- std::map<unsigned, AttributeSet> MAttributeGroups;
+ std::map<unsigned, AttributeList> MAttributeGroups;
/// While parsing a function body, this is a list of the basic blocks for the
/// function.
@@ -520,10 +535,10 @@ private:
return FunctionBBs[ID];
}
- AttributeSet getAttributes(unsigned i) const {
+ AttributeList getAttributes(unsigned i) const {
if (i-1 < MAttributes.size())
return MAttributes[i-1];
- return AttributeSet();
+ return AttributeList();
}
/// Read a value/type pair out of the specified record from slot 'Slot'.
@@ -598,6 +613,13 @@ private:
Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false);
+
+ Error parseComdatRecord(ArrayRef<uint64_t> Record);
+ Error parseGlobalVarRecord(ArrayRef<uint64_t> Record);
+ Error parseFunctionRecord(ArrayRef<uint64_t> Record);
+ Error parseGlobalIndirectSymbolRecord(unsigned BitCode,
+ ArrayRef<uint64_t> Record);
+
Error parseAttributeBlock();
Error parseAttributeGroupBlock();
Error parseTypeTable();
@@ -971,6 +993,8 @@ static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
FMF.setNoSignedZeros();
if (0 != (Val & FastMathFlags::AllowReciprocal))
FMF.setAllowReciprocal();
+ if (0 != (Val & FastMathFlags::AllowContract))
+ FMF.setAllowContract(true);
return FMF;
}
@@ -1132,7 +1156,7 @@ Error BitcodeReader::parseAttributeBlock() {
SmallVector<uint64_t, 64> Record;
- SmallVector<AttributeSet, 8> Attrs;
+ SmallVector<AttributeList, 8> Attrs;
// Read all the records.
while (true) {
@@ -1162,10 +1186,10 @@ Error BitcodeReader::parseAttributeBlock() {
for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
AttrBuilder B;
decodeLLVMAttributesForBitcode(B, Record[i+1]);
- Attrs.push_back(AttributeSet::get(Context, Record[i], B));
+ Attrs.push_back(AttributeList::get(Context, Record[i], B));
}
- MAttributes.push_back(AttributeSet::get(Context, Attrs));
+ MAttributes.push_back(AttributeList::get(Context, Attrs));
Attrs.clear();
break;
}
@@ -1173,7 +1197,7 @@ Error BitcodeReader::parseAttributeBlock() {
for (unsigned i = 0, e = Record.size(); i != e; ++i)
Attrs.push_back(MAttributeGroups[Record[i]]);
- MAttributes.push_back(AttributeSet::get(Context, Attrs));
+ MAttributes.push_back(AttributeList::get(Context, Attrs));
Attrs.clear();
break;
}
@@ -1391,7 +1415,7 @@ Error BitcodeReader::parseAttributeGroupBlock() {
}
}
- MAttributeGroups[GrpID] = AttributeSet::get(Context, Idx, B);
+ MAttributeGroups[GrpID] = AttributeList::get(Context, Idx, B);
break;
}
}
@@ -1794,22 +1818,16 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
return Err;
Value *V = ValOrErr.get();
- auto *GO = dyn_cast<GlobalObject>(V);
- if (!GO) {
- // If this is an alias, need to get the actual Function object
- // it aliases, in order to set up the DeferredFunctionInfo entry below.
- auto *GA = dyn_cast<GlobalAlias>(V);
- if (GA)
- GO = GA->getBaseObject();
- assert(GO);
- }
+ auto *F = dyn_cast<Function>(V);
+ // Ignore function offsets emitted for aliases of functions in older
+ // versions of LLVM.
+ if (!F)
+ break;
// Note that we subtract 1 here because the offset is relative to one word
// before the start of the identification or module block, which was
// historically always the start of the regular bitcode header.
uint64_t FuncWordOffset = Record[1] - 1;
- Function *F = dyn_cast<Function>(GO);
- assert(F);
uint64_t FuncBitOffset = FuncWordOffset * 32;
DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
// Set the LastFunctionBlockBit to point to the last function block.
@@ -2607,6 +2625,246 @@ bool BitcodeReaderBase::readBlockInfo() {
return false;
}
+Error BitcodeReader::parseComdatRecord(ArrayRef<uint64_t> Record) {
+ // [selection_kind, name]
+ if (Record.size() < 2)
+ return error("Invalid record");
+ Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
+ std::string Name;
+ unsigned ComdatNameSize = Record[1];
+ Name.reserve(ComdatNameSize);
+ for (unsigned i = 0; i != ComdatNameSize; ++i)
+ Name += (char)Record[2 + i];
+ Comdat *C = TheModule->getOrInsertComdat(Name);
+ C->setSelectionKind(SK);
+ ComdatList.push_back(C);
+ return Error::success();
+}
+
+Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
+ // [pointer type, isconst, initid, linkage, alignment, section,
+ // visibility, threadlocal, unnamed_addr, externally_initialized,
+ // dllstorageclass, comdat]
+ if (Record.size() < 6)
+ return error("Invalid record");
+ Type *Ty = getTypeByID(Record[0]);
+ if (!Ty)
+ return error("Invalid record");
+ bool isConstant = Record[1] & 1;
+ bool explicitType = Record[1] & 2;
+ unsigned AddressSpace;
+ if (explicitType) {
+ AddressSpace = Record[1] >> 2;
+ } else {
+ if (!Ty->isPointerTy())
+ return error("Invalid type for value");
+ AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
+ Ty = cast<PointerType>(Ty)->getElementType();
+ }
+
+ uint64_t RawLinkage = Record[3];
+ GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
+ unsigned Alignment;
+ if (Error Err = parseAlignmentValue(Record[4], Alignment))
+ return Err;
+ std::string Section;
+ if (Record[5]) {
+ if (Record[5] - 1 >= SectionTable.size())
+ return error("Invalid ID");
+ Section = SectionTable[Record[5] - 1];
+ }
+ GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
+ // Local linkage must have default visibility.
+ if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage))
+ // FIXME: Change to an error if non-default in 4.0.
+ Visibility = getDecodedVisibility(Record[6]);
+
+ GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal;
+ if (Record.size() > 7)
+ TLM = getDecodedThreadLocalMode(Record[7]);
+
+ GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
+ if (Record.size() > 8)
+ UnnamedAddr = getDecodedUnnamedAddrType(Record[8]);
+
+ bool ExternallyInitialized = false;
+ if (Record.size() > 9)
+ ExternallyInitialized = Record[9];
+
+ GlobalVariable *NewGV =
+ new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "",
+ nullptr, TLM, AddressSpace, ExternallyInitialized);
+ NewGV->setAlignment(Alignment);
+ if (!Section.empty())
+ NewGV->setSection(Section);
+ NewGV->setVisibility(Visibility);
+ NewGV->setUnnamedAddr(UnnamedAddr);
+
+ if (Record.size() > 10)
+ NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10]));
+ else
+ upgradeDLLImportExportLinkage(NewGV, RawLinkage);
+
+ ValueList.push_back(NewGV);
+
+ // Remember which value to use for the global initializer.
+ if (unsigned InitID = Record[2])
+ GlobalInits.push_back(std::make_pair(NewGV, InitID - 1));
+
+ if (Record.size() > 11) {
+ if (unsigned ComdatID = Record[11]) {
+ if (ComdatID > ComdatList.size())
+ return error("Invalid global variable comdat ID");
+ NewGV->setComdat(ComdatList[ComdatID - 1]);
+ }
+ } else if (hasImplicitComdat(RawLinkage)) {
+ NewGV->setComdat(reinterpret_cast<Comdat *>(1));
+ }
+ return Error::success();
+}
+
+Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
+ // [type, callingconv, isproto, linkage, paramattr, alignment, section,
+ // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat,
+ // prefixdata]
+ if (Record.size() < 8)
+ return error("Invalid record");
+ Type *Ty = getTypeByID(Record[0]);
+ if (!Ty)
+ return error("Invalid record");
+ if (auto *PTy = dyn_cast<PointerType>(Ty))
+ Ty = PTy->getElementType();
+ auto *FTy = dyn_cast<FunctionType>(Ty);
+ if (!FTy)
+ return error("Invalid type for value");
+ auto CC = static_cast<CallingConv::ID>(Record[1]);
+ if (CC & ~CallingConv::MaxID)
+ return error("Invalid calling convention ID");
+
+ Function *Func =
+ Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule);
+
+ Func->setCallingConv(CC);
+ bool isProto = Record[2];
+ uint64_t RawLinkage = Record[3];
+ Func->setLinkage(getDecodedLinkage(RawLinkage));
+ Func->setAttributes(getAttributes(Record[4]));
+
+ unsigned Alignment;
+ if (Error Err = parseAlignmentValue(Record[5], Alignment))
+ return Err;
+ Func->setAlignment(Alignment);
+ if (Record[6]) {
+ if (Record[6] - 1 >= SectionTable.size())
+ return error("Invalid ID");
+ Func->setSection(SectionTable[Record[6] - 1]);
+ }
+ // Local linkage must have default visibility.
+ if (!Func->hasLocalLinkage())
+ // FIXME: Change to an error if non-default in 4.0.
+ Func->setVisibility(getDecodedVisibility(Record[7]));
+ if (Record.size() > 8 && Record[8]) {
+ if (Record[8] - 1 >= GCTable.size())
+ return error("Invalid ID");
+ Func->setGC(GCTable[Record[8] - 1]);
+ }
+ GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
+ if (Record.size() > 9)
+ UnnamedAddr = getDecodedUnnamedAddrType(Record[9]);
+ Func->setUnnamedAddr(UnnamedAddr);
+ if (Record.size() > 10 && Record[10] != 0)
+ FunctionPrologues.push_back(std::make_pair(Func, Record[10] - 1));
+
+ if (Record.size() > 11)
+ Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11]));
+ else
+ upgradeDLLImportExportLinkage(Func, RawLinkage);
+
+ if (Record.size() > 12) {
+ if (unsigned ComdatID = Record[12]) {
+ if (ComdatID > ComdatList.size())
+ return error("Invalid function comdat ID");
+ Func->setComdat(ComdatList[ComdatID - 1]);
+ }
+ } else if (hasImplicitComdat(RawLinkage)) {
+ Func->setComdat(reinterpret_cast<Comdat *>(1));
+ }
+
+ if (Record.size() > 13 && Record[13] != 0)
+ FunctionPrefixes.push_back(std::make_pair(Func, Record[13] - 1));
+
+ if (Record.size() > 14 && Record[14] != 0)
+ FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1));
+
+ ValueList.push_back(Func);
+
+ // If this is a function with a body, remember the prototype we are
+ // creating now, so that we can match up the body with them later.
+ if (!isProto) {
+ Func->setIsMaterializable(true);
+ FunctionsWithBodies.push_back(Func);
+ DeferredFunctionInfo[Func] = 0;
+ }
+ return Error::success();
+}
+
+Error BitcodeReader::parseGlobalIndirectSymbolRecord(
+ unsigned BitCode, ArrayRef<uint64_t> Record) {
+ // ALIAS_OLD: [alias type, aliasee val#, linkage]
+ // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility,
+ // dllstorageclass]
+ // IFUNC: [alias type, addrspace, aliasee val#, linkage,
+ // visibility, dllstorageclass]
+ bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD;
+ if (Record.size() < (3 + (unsigned)NewRecord))
+ return error("Invalid record");
+ unsigned OpNum = 0;
+ Type *Ty = getTypeByID(Record[OpNum++]);
+ if (!Ty)
+ return error("Invalid record");
+
+ unsigned AddrSpace;
+ if (!NewRecord) {
+ auto *PTy = dyn_cast<PointerType>(Ty);
+ if (!PTy)
+ return error("Invalid type for value");
+ Ty = PTy->getElementType();
+ AddrSpace = PTy->getAddressSpace();
+ } else {
+ AddrSpace = Record[OpNum++];
+ }
+
+ auto Val = Record[OpNum++];
+ auto Linkage = Record[OpNum++];
+ GlobalIndirectSymbol *NewGA;
+ if (BitCode == bitc::MODULE_CODE_ALIAS ||
+ BitCode == bitc::MODULE_CODE_ALIAS_OLD)
+ NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "",
+ TheModule);
+ else
+ NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "",
+ nullptr, TheModule);
+ // Old bitcode files didn't have visibility field.
+ // Local linkage must have default visibility.
+ if (OpNum != Record.size()) {
+ auto VisInd = OpNum++;
+ if (!NewGA->hasLocalLinkage())
+ // FIXME: Change to an error if non-default in 4.0.
+ NewGA->setVisibility(getDecodedVisibility(Record[VisInd]));
+ }
+ if (OpNum != Record.size())
+ NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++]));
+ else
+ upgradeDLLImportExportLinkage(NewGA, Linkage);
+ if (OpNum != Record.size())
+ NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++]));
+ if (OpNum != Record.size())
+ NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++]));
+ ValueList.push_back(NewGA);
+ IndirectSymbolInits.push_back(std::make_pair(NewGA, Val));
+ return Error::success();
+}
+
Error BitcodeReader::parseModule(uint64_t ResumeBit,
bool ShouldLazyLoadMetadata) {
if (ResumeBit)
@@ -2615,8 +2873,6 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
return error("Invalid record");
SmallVector<uint64_t, 64> Record;
- std::vector<std::string> SectionTable;
- std::vector<std::string> GCTable;
// Read all the records for this module.
while (true) {
@@ -2762,21 +3018,11 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
auto BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
default: break; // Default behavior, ignore unknown content.
- case bitc::MODULE_CODE_VERSION: { // VERSION: [version#]
- if (Record.size() < 1)
- return error("Invalid record");
- // Only version #0 and #1 are supported so far.
- unsigned module_version = Record[0];
- switch (module_version) {
- default:
- return error("Invalid value");
- case 0:
- UseRelativeIDs = false;
- break;
- case 1:
- UseRelativeIDs = true;
- break;
- }
+ case bitc::MODULE_CODE_VERSION: {
+ Expected<unsigned> VersionOrErr = parseVersionRecord(Record);
+ if (!VersionOrErr)
+ return VersionOrErr.takeError();
+ UseRelativeIDs = *VersionOrErr >= 1;
break;
}
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
@@ -2822,249 +3068,28 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit,
GCTable.push_back(S);
break;
}
- case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name]
- if (Record.size() < 2)
- return error("Invalid record");
- Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
- unsigned ComdatNameSize = Record[1];
- std::string ComdatName;
- ComdatName.reserve(ComdatNameSize);
- for (unsigned i = 0; i != ComdatNameSize; ++i)
- ComdatName += (char)Record[2 + i];
- Comdat *C = TheModule->getOrInsertComdat(ComdatName);
- C->setSelectionKind(SK);
- ComdatList.push_back(C);
- break;
- }
- // GLOBALVAR: [pointer type, isconst, initid,
- // linkage, alignment, section, visibility, threadlocal,
- // unnamed_addr, externally_initialized, dllstorageclass,
- // comdat]
+ case bitc::MODULE_CODE_COMDAT: {
+ if (Error Err = parseComdatRecord(Record))
+ return Err;
+ break;
+ }
case bitc::MODULE_CODE_GLOBALVAR: {
- if (Record.size() < 6)
- return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
- if (!Ty)
- return error("Invalid record");
- bool isConstant = Record[1] & 1;
- bool explicitType = Record[1] & 2;
- unsigned AddressSpace;
- if (explicitType) {
- AddressSpace = Record[1] >> 2;
- } else {
- if (!Ty->isPointerTy())
- return error("Invalid type for value");
- AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
- Ty = cast<PointerType>(Ty)->getElementType();
- }
-
- uint64_t RawLinkage = Record[3];
- GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
- unsigned Alignment;
- if (Error Err = parseAlignmentValue(Record[4], Alignment))
+ if (Error Err = parseGlobalVarRecord(Record))
return Err;
- std::string Section;
- if (Record[5]) {
- if (Record[5]-1 >= SectionTable.size())
- return error("Invalid ID");
- Section = SectionTable[Record[5]-1];
- }
- GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
- // Local linkage must have default visibility.
- if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage))
- // FIXME: Change to an error if non-default in 4.0.
- Visibility = getDecodedVisibility(Record[6]);
-
- GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal;
- if (Record.size() > 7)
- TLM = getDecodedThreadLocalMode(Record[7]);
-
- GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
- if (Record.size() > 8)
- UnnamedAddr = getDecodedUnnamedAddrType(Record[8]);
-
- bool ExternallyInitialized = false;
- if (Record.size() > 9)
- ExternallyInitialized = Record[9];
-
- GlobalVariable *NewGV =
- new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", nullptr,
- TLM, AddressSpace, ExternallyInitialized);
- NewGV->setAlignment(Alignment);
- if (!Section.empty())
- NewGV->setSection(Section);
- NewGV->setVisibility(Visibility);
- NewGV->setUnnamedAddr(UnnamedAddr);
-
- if (Record.size() > 10)
- NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10]));
- else
- upgradeDLLImportExportLinkage(NewGV, RawLinkage);
-
- ValueList.push_back(NewGV);
-
- // Remember which value to use for the global initializer.
- if (unsigned InitID = Record[2])
- GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
-
- if (Record.size() > 11) {
- if (unsigned ComdatID = Record[11]) {
- if (ComdatID > ComdatList.size())
- return error("Invalid global variable comdat ID");
- NewGV->setComdat(ComdatList[ComdatID - 1]);
- }
- } else if (hasImplicitComdat(RawLinkage)) {
- NewGV->setComdat(reinterpret_cast<Comdat *>(1));
- }
-
break;
}
- // FUNCTION: [type, callingconv, isproto, linkage, paramattr,
- // alignment, section, visibility, gc, unnamed_addr,
- // prologuedata, dllstorageclass, comdat, prefixdata]
case bitc::MODULE_CODE_FUNCTION: {
- if (Record.size() < 8)
- return error("Invalid record");
- Type *Ty = getTypeByID(Record[0]);
- if (!Ty)
- return error("Invalid record");
- if (auto *PTy = dyn_cast<PointerType>(Ty))
- Ty = PTy->getElementType();
- auto *FTy = dyn_cast<FunctionType>(Ty);
- if (!FTy)
- return error("Invalid type for value");
- auto CC = static_cast<CallingConv::ID>(Record[1]);
- if (CC & ~CallingConv::MaxID)
- return error("Invalid calling convention ID");
-
- Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
- "", TheModule);
-
- Func->setCallingConv(CC);
- bool isProto = Record[2];
- uint64_t RawLinkage = Record[3];
- Func->setLinkage(getDecodedLinkage(RawLinkage));
- Func->setAttributes(getAttributes(Record[4]));
-
- unsigned Alignment;
- if (Error Err = parseAlignmentValue(Record[5], Alignment))
+ if (Error Err = parseFunctionRecord(Record))
return Err;
- Func->setAlignment(Alignment);
- if (Record[6]) {
- if (Record[6]-1 >= SectionTable.size())
- return error("Invalid ID");
- Func->setSection(SectionTable[Record[6]-1]);
- }
- // Local linkage must have default visibility.
- if (!Func->hasLocalLinkage())
- // FIXME: Change to an error if non-default in 4.0.
- Func->setVisibility(getDecodedVisibility(Record[7]));
- if (Record.size() > 8 && Record[8]) {
- if (Record[8]-1 >= GCTable.size())
- return error("Invalid ID");
- Func->setGC(GCTable[Record[8] - 1]);
- }
- GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None;
- if (Record.size() > 9)
- UnnamedAddr = getDecodedUnnamedAddrType(Record[9]);
- Func->setUnnamedAddr(UnnamedAddr);
- if (Record.size() > 10 && Record[10] != 0)
- FunctionPrologues.push_back(std::make_pair(Func, Record[10]-1));
-
- if (Record.size() > 11)
- Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11]));
- else
- upgradeDLLImportExportLinkage(Func, RawLinkage);
-
- if (Record.size() > 12) {
- if (unsigned ComdatID = Record[12]) {
- if (ComdatID > ComdatList.size())
- return error("Invalid function comdat ID");
- Func->setComdat(ComdatList[ComdatID - 1]);
- }
- } else if (hasImplicitComdat(RawLinkage)) {
- Func->setComdat(reinterpret_cast<Comdat *>(1));
- }
-
- if (Record.size() > 13 && Record[13] != 0)
- FunctionPrefixes.push_back(std::make_pair(Func, Record[13]-1));
-
- if (Record.size() > 14 && Record[14] != 0)
- FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1));
-
- ValueList.push_back(Func);
-
- // If this is a function with a body, remember the prototype we are
- // creating now, so that we can match up the body with them later.
- if (!isProto) {
- Func->setIsMaterializable(true);
- FunctionsWithBodies.push_back(Func);
- DeferredFunctionInfo[Func] = 0;
- }
break;
}
- // ALIAS: [alias type, addrspace, aliasee val#, linkage]
- // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass]
- // IFUNC: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass]
case bitc::MODULE_CODE_IFUNC:
case bitc::MODULE_CODE_ALIAS:
case bitc::MODULE_CODE_ALIAS_OLD: {
- bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD;
- if (Record.size() < (3 + (unsigned)NewRecord))
- return error("Invalid record");
- unsigned OpNum = 0;
- Type *Ty = getTypeByID(Record[OpNum++]);
- if (!Ty)
- return error("Invalid record");
-
- unsigned AddrSpace;
- if (!NewRecord) {
- auto *PTy = dyn_cast<PointerType>(Ty);
- if (!PTy)
- return error("Invalid type for value");
- Ty = PTy->getElementType();
- AddrSpace = PTy->getAddressSpace();
- } else {
- AddrSpace = Record[OpNum++];
- }
-
- auto Val = Record[OpNum++];
- auto Linkage = Record[OpNum++];
- GlobalIndirectSymbol *NewGA;
- if (BitCode == bitc::MODULE_CODE_ALIAS ||
- BitCode == bitc::MODULE_CODE_ALIAS_OLD)
- NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage),
- "", TheModule);
- else
- NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage),
- "", nullptr, TheModule);
- // Old bitcode files didn't have visibility field.
- // Local linkage must have default visibility.
- if (OpNum != Record.size()) {
- auto VisInd = OpNum++;
- if (!NewGA->hasLocalLinkage())
- // FIXME: Change to an error if non-default in 4.0.
- NewGA->setVisibility(getDecodedVisibility(Record[VisInd]));
- }
- if (OpNum != Record.size())
- NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++]));
- else
- upgradeDLLImportExportLinkage(NewGA, Linkage);
- if (OpNum != Record.size())
- NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++]));
- if (OpNum != Record.size())
- NewGA->setUnnamedAddr(getDecodedUnnamedAddrType(Record[OpNum++]));
- ValueList.push_back(NewGA);
- IndirectSymbolInits.push_back(std::make_pair(NewGA, Val));
- break;
- }
- /// MODULE_CODE_PURGEVALS: [numvals]
- case bitc::MODULE_CODE_PURGEVALS:
- // Trim down the value list to the specified size.
- if (Record.size() < 1 || Record[0] > ValueList.size())
- return error("Invalid record");
- ValueList.shrinkTo(Record[0]);
+ if (Error Err = parseGlobalIndirectSymbolRecord(BitCode, Record))
+ return Err;
break;
+ }
/// MODULE_CODE_VSTOFFSET: [offset]
case bitc::MODULE_CODE_VSTOFFSET:
if (Record.size() < 1)
@@ -3840,7 +3865,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (Record.size() < 4)
return error("Invalid record");
unsigned OpNum = 0;
- AttributeSet PAL = getAttributes(Record[OpNum++]);
+ AttributeList PAL = getAttributes(Record[OpNum++]);
unsigned CCInfo = Record[OpNum++];
BasicBlock *NormalBB = getBasicBlock(Record[OpNum++]);
BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]);
@@ -4017,7 +4042,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
}
if (!Ty || !Size)
return error("Invalid record");
- AllocaInst *AI = new AllocaInst(Ty, Size, Align);
+
+ // FIXME: Make this an optional field.
+ const DataLayout &DL = TheModule->getDataLayout();
+ unsigned AS = DL.getAllocaAddrSpace();
+
+ AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align);
AI->setUsedWithInAlloca(InAlloca);
AI->setSwiftError(SwiftError);
I = AI;
@@ -4225,7 +4255,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
return error("Invalid record");
unsigned OpNum = 0;
- AttributeSet PAL = getAttributes(Record[OpNum++]);
+ AttributeList PAL = getAttributes(Record[OpNum++]);
unsigned CCInfo = Record[OpNum++];
FastMathFlags FMF;
@@ -4753,33 +4783,13 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
// was historically always the start of the regular bitcode header.
VSTOffset = Record[0] - 1;
break;
- // GLOBALVAR: [pointer type, isconst, initid,
- // linkage, alignment, section, visibility, threadlocal,
- // unnamed_addr, externally_initialized, dllstorageclass,
- // comdat]
- case bitc::MODULE_CODE_GLOBALVAR: {
- if (Record.size() < 6)
- return error("Invalid record");
- uint64_t RawLinkage = Record[3];
- GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
- ValueIdToLinkageMap[ValueId++] = Linkage;
- break;
- }
- // FUNCTION: [type, callingconv, isproto, linkage, paramattr,
- // alignment, section, visibility, gc, unnamed_addr,
- // prologuedata, dllstorageclass, comdat, prefixdata]
- case bitc::MODULE_CODE_FUNCTION: {
- if (Record.size() < 8)
- return error("Invalid record");
- uint64_t RawLinkage = Record[3];
- GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
- ValueIdToLinkageMap[ValueId++] = Linkage;
- break;
- }
- // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility,
- // dllstorageclass]
+ // GLOBALVAR: [pointer type, isconst, initid, linkage, ...]
+ // FUNCTION: [type, callingconv, isproto, linkage, ...]
+ // ALIAS: [alias type, addrspace, aliasee val#, linkage, ...]
+ case bitc::MODULE_CODE_GLOBALVAR:
+ case bitc::MODULE_CODE_FUNCTION:
case bitc::MODULE_CODE_ALIAS: {
- if (Record.size() < 6)
+ if (Record.size() <= 3)
return error("Invalid record");
uint64_t RawLinkage = Record[3];
GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
@@ -4846,8 +4856,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
// Keep around the last seen summary to be used when we see an optional
// "OriginalName" attachement.
GlobalValueSummary *LastSeenSummary = nullptr;
+ GlobalValue::GUID LastSeenGUID = 0;
bool Combined = false;
+
+ // We can expect to see any number of type ID information records before
+ // each function summary records; these variables store the information
+ // collected so far so that it can be used to create the summary object.
std::vector<GlobalValue::GUID> PendingTypeTests;
+ std::vector<FunctionSummary::VFuncId> PendingTypeTestAssumeVCalls,
+ PendingTypeCheckedLoadVCalls;
+ std::vector<FunctionSummary::ConstVCall> PendingTypeTestAssumeConstVCalls,
+ PendingTypeCheckedLoadConstVCalls;
while (true) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -4914,8 +4933,15 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
IsOldProfileFormat, HasProfile);
auto FS = llvm::make_unique<FunctionSummary>(
Flags, InstCount, std::move(Refs), std::move(Calls),
- std::move(PendingTypeTests));
+ std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls),
+ std::move(PendingTypeCheckedLoadVCalls),
+ std::move(PendingTypeTestAssumeConstVCalls),
+ std::move(PendingTypeCheckedLoadConstVCalls));
PendingTypeTests.clear();
+ PendingTypeTestAssumeVCalls.clear();
+ PendingTypeCheckedLoadVCalls.clear();
+ PendingTypeTestAssumeConstVCalls.clear();
+ PendingTypeCheckedLoadConstVCalls.clear();
auto GUID = getGUIDFromValueId(ValueID);
FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first());
FS->setOriginalName(GUID.second);
@@ -4989,9 +5015,17 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
auto FS = llvm::make_unique<FunctionSummary>(
Flags, InstCount, std::move(Refs), std::move(Edges),
- std::move(PendingTypeTests));
+ std::move(PendingTypeTests), std::move(PendingTypeTestAssumeVCalls),
+ std::move(PendingTypeCheckedLoadVCalls),
+ std::move(PendingTypeTestAssumeConstVCalls),
+ std::move(PendingTypeCheckedLoadConstVCalls));
PendingTypeTests.clear();
+ PendingTypeTestAssumeVCalls.clear();
+ PendingTypeCheckedLoadVCalls.clear();
+ PendingTypeTestAssumeConstVCalls.clear();
+ PendingTypeCheckedLoadConstVCalls.clear();
LastSeenSummary = FS.get();
+ LastSeenGUID = GUID;
FS->setModulePath(ModuleIdMap[ModuleId]);
TheIndex.addGlobalValueSummary(GUID, std::move(FS));
Combined = true;
@@ -5018,6 +5052,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
AS->setAliasee(AliaseeInModule);
GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
+ LastSeenGUID = GUID;
TheIndex.addGlobalValueSummary(GUID, std::move(AS));
Combined = true;
break;
@@ -5034,6 +5069,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
LastSeenSummary = FS.get();
FS->setModulePath(ModuleIdMap[ModuleId]);
GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
+ LastSeenGUID = GUID;
TheIndex.addGlobalValueSummary(GUID, std::move(FS));
Combined = true;
break;
@@ -5044,8 +5080,10 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
if (!LastSeenSummary)
return error("Name attachment that does not follow a combined record");
LastSeenSummary->setOriginalName(OriginalName);
+ TheIndex.addOriginalName(LastSeenGUID, OriginalName);
// Reset the LastSeenSummary
LastSeenSummary = nullptr;
+ LastSeenGUID = 0;
break;
}
case bitc::FS_TYPE_TESTS: {
@@ -5054,6 +5092,28 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
Record.end());
break;
}
+ case bitc::FS_TYPE_TEST_ASSUME_VCALLS: {
+ assert(PendingTypeTestAssumeVCalls.empty());
+ for (unsigned I = 0; I != Record.size(); I += 2)
+ PendingTypeTestAssumeVCalls.push_back({Record[I], Record[I+1]});
+ break;
+ }
+ case bitc::FS_TYPE_CHECKED_LOAD_VCALLS: {
+ assert(PendingTypeCheckedLoadVCalls.empty());
+ for (unsigned I = 0; I != Record.size(); I += 2)
+ PendingTypeCheckedLoadVCalls.push_back({Record[I], Record[I+1]});
+ break;
+ }
+ case bitc::FS_TYPE_TEST_ASSUME_CONST_VCALL: {
+ PendingTypeTestAssumeConstVCalls.push_back(
+ {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}});
+ break;
+ }
+ case bitc::FS_TYPE_CHECKED_LOAD_CONST_VCALL: {
+ PendingTypeCheckedLoadConstVCalls.push_back(
+ {{Record[0], Record[1]}, {Record.begin() + 2, Record.end()}});
+ break;
+ }
}
}
llvm_unreachable("Exit infinite loop");
diff --git a/lib/Bitcode/Reader/MetadataLoader.cpp b/lib/Bitcode/Reader/MetadataLoader.cpp
index b89f5be4a369..274dfe89cce5 100644
--- a/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -358,6 +358,9 @@ class PlaceholderQueue {
std::deque<DistinctMDOperandPlaceholder> PHs;
public:
+ ~PlaceholderQueue() {
+ assert(empty() && "PlaceholderQueue hasn't been flushed before being destroyed");
+ }
bool empty() { return PHs.empty(); }
DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
void flush(BitcodeReaderMetadataList &MetadataList);
@@ -457,7 +460,7 @@ class MetadataLoader::MetadataLoaderImpl {
PlaceholderQueue &Placeholders, StringRef Blob,
unsigned &NextMetadataNo);
Error parseMetadataStrings(ArrayRef<uint64_t> Record, StringRef Blob,
- std::function<void(StringRef)> CallBack);
+ function_ref<void(StringRef)> CallBack);
Error parseGlobalObjectAttachment(GlobalObject &GO,
ArrayRef<uint64_t> Record);
Error parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
@@ -520,7 +523,7 @@ public:
bool IsImporting)
: MetadataList(TheModule.getContext()), ValueList(ValueList),
Stream(Stream), Context(TheModule.getContext()), TheModule(TheModule),
- getTypeByID(getTypeByID), IsImporting(IsImporting) {}
+ getTypeByID(std::move(getTypeByID)), IsImporting(IsImporting) {}
Error parseMetadata(bool ModuleLevel);
@@ -564,7 +567,7 @@ public:
void shrinkTo(unsigned N) { MetadataList.shrinkTo(N); }
};
-Error error(const Twine &Message) {
+static Error error(const Twine &Message) {
return make_error<StringError>(
Message, make_error_code(BitcodeError::CorruptedBitcode));
}
@@ -1107,9 +1110,15 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_DERIVED_TYPE: {
- if (Record.size() != 12)
+ if (Record.size() < 12 || Record.size() > 13)
return error("Invalid record");
+ // DWARF address space is encoded as N->getDWARFAddressSpace() + 1. 0 means
+ // that there is no DWARF address space associated with DIDerivedType.
+ Optional<unsigned> DWARFAddressSpace;
+ if (Record.size() > 12 && Record[12])
+ DWARFAddressSpace = Record[12] - 1;
+
IsDistinct = Record[0];
DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[10]);
MetadataList.assignValue(
@@ -1118,7 +1127,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[3]), Record[4],
getDITypeRefOrNull(Record[5]),
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- Record[9], Flags, getDITypeRefOrNull(Record[11]))),
+ Record[9], DWARFAddressSpace, Flags,
+ getDITypeRefOrNull(Record[11]))),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1240,7 +1250,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_COMPILE_UNIT: {
- if (Record.size() < 14 || Record.size() > 17)
+ if (Record.size() < 14 || Record.size() > 18)
return error("Invalid record");
// Ignore Record[0], which indicates whether this compile unit is
@@ -1253,7 +1263,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getMDOrNull(Record[12]), getMDOrNull(Record[13]),
Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
Record.size() <= 14 ? 0 : Record[14],
- Record.size() <= 16 ? true : Record[16]);
+ Record.size() <= 16 ? true : Record[16],
+ Record.size() <= 17 ? false : Record[17]);
MetadataList.assignValue(CU, NextMetadataNo);
NextMetadataNo++;
@@ -1433,6 +1444,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
} else if (Version == 0) {
// Upgrade old metadata, which stored a global variable reference or a
// ConstantInt here.
+ NeedUpgradeToDIGlobalVariableExpression = true;
Metadata *Expr = getMDOrNull(Record[9]);
uint32_t AlignInBits = 0;
if (Record.size() > 11) {
@@ -1463,8 +1475,6 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
DIGlobalVariableExpression *DGVE = nullptr;
if (Attach || Expr)
DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
- else
- NeedUpgradeToDIGlobalVariableExpression = true;
if (Attach)
Attach->addDebugInfo(DGVE);
@@ -1485,7 +1495,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
bool HasAlignment = Record[0] & 2;
// 2nd field used to be an artificial tag, either DW_TAG_auto_variable or
// DW_TAG_arg_variable, if we have alignment flag encoded it means, that
- // this is newer version of record which doesn't have artifical tag.
+ // this is newer version of record which doesn't have artificial tag.
bool HasTag = !HasAlignment && Record.size() > 8;
DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[7 + HasTag]);
uint32_t AlignInBits = 0;
@@ -1611,7 +1621,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
ArrayRef<uint64_t> Record, StringRef Blob,
- std::function<void(StringRef)> CallBack) {
+ function_ref<void(StringRef)> CallBack) {
// All the MDStrings in the block are emitted together in a single
// record. The strings are concatenated and stored in a blob along with
// their sizes.
@@ -1808,8 +1818,8 @@ MetadataLoader::MetadataLoader(BitstreamCursor &Stream, Module &TheModule,
BitcodeReaderValueList &ValueList,
bool IsImporting,
std::function<Type *(unsigned)> getTypeByID)
- : Pimpl(llvm::make_unique<MetadataLoaderImpl>(Stream, TheModule, ValueList,
- getTypeByID, IsImporting)) {}
+ : Pimpl(llvm::make_unique<MetadataLoaderImpl>(
+ Stream, TheModule, ValueList, std::move(getTypeByID), IsImporting)) {}
Error MetadataLoader::parseMetadata(bool ModuleLevel) {
return Pimpl->parseMetadata(ModuleLevel);
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index ebb2022551f7..043441bac4de 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -108,6 +108,14 @@ class ModuleBitcodeWriter : public BitcodeWriterBase {
/// True if a module hash record should be written.
bool GenerateHash;
+ /// If non-null, when GenerateHash is true, the resulting hash is written
+ /// into ModHash. When GenerateHash is false, that specified value
+ /// is used as the hash instead of computing from the generated bitcode.
+ /// Can be used to produce the same module hash for a minimized bitcode
+ /// used just for the thin link as in the regular full bitcode that will
+ /// be used in the backend.
+ ModuleHash *ModHash;
+
/// The start bit of the identification block.
uint64_t BitcodeStartBit;
@@ -124,10 +132,12 @@ public:
/// writing to the provided \p Buffer.
ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
- const ModuleSummaryIndex *Index, bool GenerateHash)
+ const ModuleSummaryIndex *Index, bool GenerateHash,
+ ModuleHash *ModHash = nullptr)
: BitcodeWriterBase(Stream), Buffer(Buffer), M(*M),
VE(*M, ShouldPreserveUseListOrder), Index(Index),
- GenerateHash(GenerateHash), BitcodeStartBit(Stream.GetCurrentBitNo()) {
+ GenerateHash(GenerateHash), ModHash(ModHash),
+ BitcodeStartBit(Stream.GetCurrentBitNo()) {
// Assign ValueIds to any callee values in the index that came from
// indirect call profiles and were recorded as a GUID not a Value*
// (which would have been assigned an ID by the ValueEnumerator).
@@ -466,7 +476,6 @@ public:
void write();
private:
- void writeIndex();
void writeModStrings();
void writeCombinedValueSymbolTable();
void writeCombinedGlobalValueSummary();
@@ -709,22 +718,22 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
}
void ModuleBitcodeWriter::writeAttributeGroupTable() {
- const std::vector<AttributeSet> &AttrGrps = VE.getAttributeGroups();
+ const std::vector<AttributeList> &AttrGrps = VE.getAttributeGroups();
if (AttrGrps.empty()) return;
Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3);
SmallVector<uint64_t, 64> Record;
for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) {
- AttributeSet AS = AttrGrps[i];
+ AttributeList AS = AttrGrps[i];
for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) {
- AttributeSet A = AS.getSlotAttributes(i);
+ AttributeList A = AS.getSlotAttributes(i);
Record.push_back(VE.getAttributeGroupID(A));
Record.push_back(AS.getSlotIndex(i));
- for (AttributeSet::iterator I = AS.begin(0), E = AS.end(0);
- I != E; ++I) {
+ for (AttributeList::iterator I = AS.begin(0), E = AS.end(0); I != E;
+ ++I) {
Attribute Attr = *I;
if (Attr.isEnumAttribute()) {
Record.push_back(0);
@@ -756,14 +765,14 @@ void ModuleBitcodeWriter::writeAttributeGroupTable() {
}
void ModuleBitcodeWriter::writeAttributeTable() {
- const std::vector<AttributeSet> &Attrs = VE.getAttributes();
+ const std::vector<AttributeList> &Attrs = VE.getAttributes();
if (Attrs.empty()) return;
Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
SmallVector<uint64_t, 64> Record;
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
- const AttributeSet &A = Attrs[i];
+ const AttributeList &A = Attrs[i];
for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i)
Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i)));
@@ -1326,6 +1335,8 @@ static uint64_t getOptimizationFlags(const Value *V) {
Flags |= FastMathFlags::NoSignedZeros;
if (FPMO->hasAllowReciprocal())
Flags |= FastMathFlags::AllowReciprocal;
+ if (FPMO->hasAllowContract())
+ Flags |= FastMathFlags::AllowContract;
}
return Flags;
@@ -1473,6 +1484,13 @@ void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N,
Record.push_back(N->getFlags());
Record.push_back(VE.getMetadataOrNullID(N->getExtraData()));
+ // DWARF address space is encoded as N->getDWARFAddressSpace() + 1. 0 means
+ // that there is no DWARF address space associated with DIDerivedType.
+ if (const auto &DWARFAddressSpace = N->getDWARFAddressSpace())
+ Record.push_back(*DWARFAddressSpace + 1);
+ else
+ Record.push_back(0);
+
Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev);
Record.clear();
}
@@ -1549,6 +1567,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
Record.push_back(N->getDWOId());
Record.push_back(VE.getMetadataOrNullID(N->getMacros().get()));
Record.push_back(N->getSplitDebugInlining());
+ Record.push_back(N->getDebugInfoForProfiling());
Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
Record.clear();
@@ -2559,7 +2578,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
pushValue(SI.getCondition(), InstID, Vals);
Vals.push_back(VE.getValueID(SI.getDefaultDest()));
- for (SwitchInst::ConstCaseIt Case : SI.cases()) {
+ for (auto Case : SI.cases()) {
Vals.push_back(VE.getValueID(Case.getCaseValue()));
Vals.push_back(VE.getValueID(Case.getCaseSuccessor()));
}
@@ -2905,13 +2924,6 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
NameVals.push_back(VE.getValueID(Name.getValue()));
Function *F = dyn_cast<Function>(Name.getValue());
- if (!F) {
- // If value is an alias, need to get the aliased base object to
- // see if it is a function.
- auto *GA = dyn_cast<GlobalAlias>(Name.getValue());
- if (GA && GA->getBaseObject())
- F = dyn_cast<Function>(GA->getBaseObject());
- }
// VST_CODE_ENTRY: [valueid, namechar x N]
// VST_CODE_FNENTRY: [valueid, funcoffset, namechar x N]
@@ -3367,6 +3379,49 @@ void IndexBitcodeWriter::writeModStrings() {
Stream.ExitBlock();
}
+/// Write the function type metadata related records that need to appear before
+/// a function summary entry (whether per-module or combined).
+static void writeFunctionTypeMetadataRecords(BitstreamWriter &Stream,
+ FunctionSummary *FS) {
+ if (!FS->type_tests().empty())
+ Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
+
+ SmallVector<uint64_t, 64> Record;
+
+ auto WriteVFuncIdVec = [&](uint64_t Ty,
+ ArrayRef<FunctionSummary::VFuncId> VFs) {
+ if (VFs.empty())
+ return;
+ Record.clear();
+ for (auto &VF : VFs) {
+ Record.push_back(VF.GUID);
+ Record.push_back(VF.Offset);
+ }
+ Stream.EmitRecord(Ty, Record);
+ };
+
+ WriteVFuncIdVec(bitc::FS_TYPE_TEST_ASSUME_VCALLS,
+ FS->type_test_assume_vcalls());
+ WriteVFuncIdVec(bitc::FS_TYPE_CHECKED_LOAD_VCALLS,
+ FS->type_checked_load_vcalls());
+
+ auto WriteConstVCallVec = [&](uint64_t Ty,
+ ArrayRef<FunctionSummary::ConstVCall> VCs) {
+ for (auto &VC : VCs) {
+ Record.clear();
+ Record.push_back(VC.VFunc.GUID);
+ Record.push_back(VC.VFunc.Offset);
+ Record.insert(Record.end(), VC.Args.begin(), VC.Args.end());
+ Stream.EmitRecord(Ty, Record);
+ }
+ };
+
+ WriteConstVCallVec(bitc::FS_TYPE_TEST_ASSUME_CONST_VCALL,
+ FS->type_test_assume_const_vcalls());
+ WriteConstVCallVec(bitc::FS_TYPE_CHECKED_LOAD_CONST_VCALL,
+ FS->type_checked_load_const_vcalls());
+}
+
// Helper to emit a single function summary record.
void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
@@ -3375,8 +3430,7 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
- if (!FS->type_tests().empty())
- Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
+ writeFunctionTypeMetadataRecords(Stream, FS);
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
@@ -3636,8 +3690,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
auto *FS = cast<FunctionSummary>(S);
- if (!FS->type_tests().empty())
- Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
+ writeFunctionTypeMetadataRecords(Stream, FS);
NameVals.push_back(ValueId);
NameVals.push_back(Index.getModuleId(FS->modulePath()));
@@ -3659,9 +3712,16 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
for (auto &EI : FS->calls()) {
// If this GUID doesn't have a value id, it doesn't have a function
// summary and we don't need to record any calls to it.
- if (!hasValueId(EI.first.getGUID()))
- continue;
- NameVals.push_back(getValueId(EI.first.getGUID()));
+ GlobalValue::GUID GUID = EI.first.getGUID();
+ if (!hasValueId(GUID)) {
+ // For SamplePGO, the indirect call targets for local functions will
+ // have its original name annotated in profile. We try to find the
+ // corresponding PGOFuncName as the GUID.
+ GUID = Index.getGUIDFromOriginalID(GUID);
+ if (GUID == 0 || !hasValueId(GUID))
+ continue;
+ }
+ NameVals.push_back(getValueId(GUID));
if (HasProfileData)
NameVals.push_back(static_cast<uint8_t>(EI.second.Hotness));
}
@@ -3697,7 +3757,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
/// current llvm version, and a record for the epoch number.
-void writeIdentificationBlock(BitstreamWriter &Stream) {
+static void writeIdentificationBlock(BitstreamWriter &Stream) {
Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
// Write the "user readable" string identifying the bitcode producer
@@ -3722,17 +3782,24 @@ void writeIdentificationBlock(BitstreamWriter &Stream) {
void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
// Emit the module's hash.
// MODULE_CODE_HASH: [5*i32]
- SHA1 Hasher;
- Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos],
- Buffer.size() - BlockStartPos));
- StringRef Hash = Hasher.result();
- uint32_t Vals[5];
- for (int Pos = 0; Pos < 20; Pos += 4) {
- Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos);
- }
+ if (GenerateHash) {
+ SHA1 Hasher;
+ uint32_t Vals[5];
+ Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos],
+ Buffer.size() - BlockStartPos));
+ StringRef Hash = Hasher.result();
+ for (int Pos = 0; Pos < 20; Pos += 4) {
+ Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos);
+ }
- // Emit the finished record.
- Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
+
+ if (ModHash)
+ // Save the written hash value.
+ std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash));
+ } else if (ModHash)
+ Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef<uint32_t>(*ModHash));
}
void ModuleBitcodeWriter::write() {
@@ -3793,9 +3860,7 @@ void ModuleBitcodeWriter::write() {
writeValueSymbolTable(M.getValueSymbolTable(),
/* IsModuleLevel */ true, &FunctionToBitcodeIndex);
- if (GenerateHash) {
- writeModuleHash(BlockStartPos);
- }
+ writeModuleHash(BlockStartPos);
Stream.ExitBlock();
}
@@ -3886,9 +3951,10 @@ BitcodeWriter::~BitcodeWriter() = default;
void BitcodeWriter::writeModule(const Module *M,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index,
- bool GenerateHash) {
- ModuleBitcodeWriter ModuleWriter(
- M, Buffer, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash);
+ bool GenerateHash, ModuleHash *ModHash) {
+ ModuleBitcodeWriter ModuleWriter(M, Buffer, *Stream,
+ ShouldPreserveUseListOrder, Index,
+ GenerateHash, ModHash);
ModuleWriter.write();
}
@@ -3897,7 +3963,7 @@ void BitcodeWriter::writeModule(const Module *M,
void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
bool ShouldPreserveUseListOrder,
const ModuleSummaryIndex *Index,
- bool GenerateHash) {
+ bool GenerateHash, ModuleHash *ModHash) {
SmallVector<char, 0> Buffer;
Buffer.reserve(256*1024);
@@ -3908,7 +3974,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
BitcodeWriter Writer(Buffer);
- Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash);
+ Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash,
+ ModHash);
if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
emitDarwinBCHeaderAndTrailer(Buffer, TT);
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 5d5bfab58b81..3800d9abd429 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -432,12 +432,14 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
return I->second-1;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ValueEnumerator::dump() const {
print(dbgs(), ValueMap, "Default");
dbgs() << '\n';
print(dbgs(), MetadataMap, "MetaData");
dbgs() << '\n';
}
+#endif
void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
const char *Name) const {
@@ -452,7 +454,8 @@ void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
OS << "Value: " << V->getName();
else
OS << "Value: [null]\n";
- V->dump();
+ V->print(errs());
+ errs() << '\n';
OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
for (const Use &U : V->uses()) {
@@ -549,7 +552,7 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata(
void ValueEnumerator::dropFunctionFromMetadata(
MetadataMapType::value_type &FirstMD) {
SmallVector<const MDNode *, 64> Worklist;
- auto push = [this, &Worklist](MetadataMapType::value_type &MD) {
+ auto push = [&Worklist](MetadataMapType::value_type &MD) {
auto &Entry = MD.second;
// Nothing to do if this metadata isn't tagged.
@@ -884,7 +887,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
}
}
-void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) {
+void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
if (PAL.isEmpty()) return; // null is always 0.
// Do a lookup.
@@ -897,7 +900,7 @@ void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) {
// Do lookups for all attribute groups.
for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
- AttributeSet AS = PAL.getSlotAttributes(i);
+ AttributeList AS = PAL.getSlotAttributes(i);
unsigned &Entry = AttributeGroupMap[AS];
if (Entry == 0) {
AttributeGroups.push_back(AS);
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index a8d6cf965a4b..8a82aab29836 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -36,7 +36,7 @@ class LocalAsMetadata;
class MDNode;
class MDOperand;
class NamedMDNode;
-class AttributeSet;
+class AttributeList;
class ValueSymbolTable;
class MDSymbolTable;
class raw_ostream;
@@ -102,13 +102,13 @@ private:
bool ShouldPreserveUseListOrder;
- typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
+ typedef DenseMap<AttributeList, unsigned> AttributeGroupMapType;
AttributeGroupMapType AttributeGroupMap;
- std::vector<AttributeSet> AttributeGroups;
+ std::vector<AttributeList> AttributeGroups;
- typedef DenseMap<AttributeSet, unsigned> AttributeMapType;
+ typedef DenseMap<AttributeList, unsigned> AttributeMapType;
AttributeMapType AttributeMap;
- std::vector<AttributeSet> Attribute;
+ std::vector<AttributeList> Attribute;
/// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
/// the "getGlobalBasicBlockID" method.
@@ -166,14 +166,14 @@ public:
unsigned getInstructionID(const Instruction *I) const;
void setInstructionID(const Instruction *I);
- unsigned getAttributeID(AttributeSet PAL) const {
+ unsigned getAttributeID(AttributeList PAL) const {
if (PAL.isEmpty()) return 0; // Null maps to zero.
AttributeMapType::const_iterator I = AttributeMap.find(PAL);
assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
return I->second;
}
- unsigned getAttributeGroupID(AttributeSet PAL) const {
+ unsigned getAttributeGroupID(AttributeList PAL) const {
if (PAL.isEmpty()) return 0; // Null maps to zero.
AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL);
assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!");
@@ -206,10 +206,8 @@ public:
const std::vector<const BasicBlock*> &getBasicBlocks() const {
return BasicBlocks;
}
- const std::vector<AttributeSet> &getAttributes() const {
- return Attribute;
- }
- const std::vector<AttributeSet> &getAttributeGroups() const {
+ const std::vector<AttributeList> &getAttributes() const { return Attribute; }
+ const std::vector<AttributeList> &getAttributeGroups() const {
return AttributeGroups;
}
@@ -283,7 +281,7 @@ private:
void EnumerateValue(const Value *V);
void EnumerateType(Type *T);
void EnumerateOperandType(const Value *V);
- void EnumerateAttributes(AttributeSet PAL);
+ void EnumerateAttributes(AttributeList PAL);
void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
void EnumerateNamedMetadata(const Module &M);
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index bb908618b679..955524c2a676 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -163,9 +163,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
unsigned Reg = *I;
- if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+ if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg)))
+ continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
unsigned AliasReg = *AI;
State->UnionGroups(AliasReg, 0);
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 79ecc4308fe7..09a37a77e9fb 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -516,10 +516,9 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
ADS = true;
- AttrBuilder CallerAttrs(F->getAttributes(),
- AttributeSet::ReturnIndex);
+ AttrBuilder CallerAttrs(F->getAttributes(), AttributeList::ReturnIndex);
AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
- AttributeSet::ReturnIndex);
+ AttributeList::ReturnIndex);
// Noalias is completely benign as far as calling convention goes, it
// shouldn't affect whether the call is a tail call.
@@ -613,25 +612,6 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
return true;
}
-bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
- if (!GV->hasLinkOnceODRLinkage())
- return false;
-
- // We assume that anyone who sets global unnamed_addr on a non-constant knows
- // what they're doing.
- if (GV->hasGlobalUnnamedAddr())
- return true;
-
- // If it is a non constant variable, it needs to be uniqued across shared
- // objects.
- if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) {
- if (!Var->isConstant())
- return false;
- }
-
- return GV->hasAtLeastLocalUnnamedAddr();
-}
-
static void collectFuncletMembers(
DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
const MachineBasicBlock *MBB) {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 24fdbfc901fd..6c18d56b8272 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -11,48 +11,102 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/AsmPrinter.h"
+#include "AsmPrinterHandler.h"
#include "CodeViewDebug.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
#include "WinException.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ObjectUtils.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -69,6 +123,10 @@ static const char *const CodeViewLineTablesGroupDescription =
STATISTIC(EmittedInsts, "Number of machine instrs printed");
+static cl::opt<bool>
+ PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
+ cl::desc("Print 'sched: [latency:throughput]' in .s output"));
+
char AsmPrinter::ID = 0;
typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
@@ -78,7 +136,6 @@ static gcp_map_type &getGCMap(void *&P) {
return *(gcp_map_type*)P;
}
-
/// getGVAlignmentLog2 - Return the alignment to use for the specified global
/// value in log2 form. This rounds up to the preferred alignment if possible
/// and legal.
@@ -107,16 +164,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
: MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()),
- OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)),
- isCFIMoveForDebugging(false), LastMI(nullptr), LastFn(0), Counter(~0U) {
- DD = nullptr;
- MMI = nullptr;
- LI = nullptr;
- MF = nullptr;
- CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr;
- CurrentFnBegin = nullptr;
- CurrentFnEnd = nullptr;
- GCMetadataPrinters = nullptr;
+ OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)) {
VerboseAsm = OutStreamer->isVerboseAsm();
}
@@ -171,6 +219,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
if (isVerbose())
AU.addRequired<MachineLoopInfo>();
@@ -223,7 +272,7 @@ bool AsmPrinter::doInitialization(Module &M) {
// don't, this at least helps the user find where a global came from.
if (MAI->hasSingleParameterDotFile()) {
// .file "foo.c"
- OutStreamer->EmitFileDirective(M.getModuleIdentifier());
+ OutStreamer->EmitFileDirective(M.getSourceFileName());
}
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -571,7 +620,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
///
/// \p Value - The value to emit.
/// \p Size - The size of the integer (in bytes) to emit.
-void AsmPrinter::EmitDebugValue(const MCExpr *Value,
+void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
unsigned Size) const {
OutStreamer->EmitValue(Value, Size);
}
@@ -602,8 +651,23 @@ void AsmPrinter::EmitFunctionHeader() {
}
// Emit the prefix data.
- if (F->hasPrefixData())
- EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ if (F->hasPrefixData()) {
+ if (MAI->hasSubsectionsViaSymbols()) {
+ // Preserving prefix data on platforms which use subsections-via-symbols
+ // is a bit tricky. Here we introduce a symbol for the prefix data
+ // and use the .alt_entry attribute to mark the function's real entry point
+ // as an alternative entry point to the prefix-data symbol.
+ MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol();
+ OutStreamer->EmitLabel(PrefixSym);
+
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+
+ // Emit an .alt_entry directive for the actual function symbol.
+ OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry);
+ } else {
+ EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
+ }
+ }
// Emit the CurrentFnSym. This is a virtual function to allow targets to
// do their wild and crazy things as required.
@@ -660,7 +724,8 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
+ AsmPrinter *AP) {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -668,6 +733,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
int FI;
const MachineFrameInfo &MFI = MF->getFrameInfo();
+ bool Commented = false;
// We assume a single instruction only has a spill or reload, not
// both.
@@ -675,24 +741,39 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Reload\n";
+ CommentOS << MMO->getSize() << "-byte Reload";
+ Commented = true;
}
} else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Reload";
+ Commented = true;
+ }
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Spill\n";
+ CommentOS << MMO->getSize() << "-byte Spill";
+ Commented = true;
}
} else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Spill";
+ Commented = true;
+ }
}
// Check for spill-induced copies
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
- CommentOS << " Reload Reuse\n";
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
+ Commented = true;
+ CommentOS << " Reload Reuse";
+ }
+
+ if (Commented && AP->EnablePrintSchedInfo)
+ // If any comment was added above and we need sched info comment then
+ // add this new comment just after the above comment w/o "\n" between them.
+ CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
+ else if (Commented)
+ CommentOS << "\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -883,6 +964,7 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
+ int NumInstsInFunction = 0;
for (auto &MBB : *MF) {
// Print a label for the basic block.
EmitBasicBlockStart(MBB);
@@ -892,7 +974,7 @@ void AsmPrinter::EmitFunctionBody() {
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugValue()) {
HasAnyRealCode = true;
- ++EmittedInsts;
+ ++NumInstsInFunction;
}
if (ShouldPrintDebugScopes) {
@@ -905,7 +987,7 @@ void AsmPrinter::EmitFunctionBody() {
}
if (isVerbose())
- emitComments(MI, OutStreamer->GetCommentOS());
+ emitComments(MI, OutStreamer->GetCommentOS(), this);
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -953,6 +1035,14 @@ void AsmPrinter::EmitFunctionBody() {
EmitBasicBlockEnd(MBB);
}
+ EmittedInsts += NumInstsInFunction;
+ MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount",
+ MF->getFunction()->getSubprogram(),
+ &MF->front());
+ R << ore::NV("NumInstructions", NumInstsInFunction)
+ << " instructions in function";
+ ORE->emit(R);
+
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
@@ -1238,7 +1328,7 @@ bool AsmPrinter::doFinalization(Module &M) {
break;
AliasStack.push_back(Cur);
}
- for (const GlobalAlias *AncestorAlias : reverse(AliasStack))
+ for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
emitGlobalIndirectSymbol(M, *AncestorAlias);
AliasStack.clear();
}
@@ -1311,19 +1401,28 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
CurrentFnSymForSize = CurrentFnBegin;
}
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
+ ? PrintSchedule
+ : STI.supportPrintSchedInfo();
}
namespace {
+
// Keep track the alignment, constpool entries per Section.
struct SectionCPs {
MCSection *S;
unsigned Alignment;
SmallVector<unsigned, 4> CPEs;
+
SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {}
};
-}
+
+} // end anonymous namespace
/// EmitConstantPool - Print to the current output stream assembly
/// representations of the constants in the constant pool MCP. This is
@@ -1547,7 +1646,6 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
OutStreamer->EmitValue(Value, EntrySize);
}
-
/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
/// special global used by LLVM. If so, emit it and return true, otherwise
/// do nothing and return false.
@@ -1598,13 +1696,16 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
}
namespace {
+
struct Structor {
- Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {}
- int Priority;
- llvm::Constant *Func;
- llvm::GlobalValue *ComdatKey;
+ int Priority = 0;
+ Constant *Func = nullptr;
+ GlobalValue *ComdatKey = nullptr;
+
+ Structor() = default;
};
-} // end namespace
+
+} // end anonymous namespace
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
@@ -1653,8 +1754,11 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
if (GlobalValue *GV = S.ComdatKey) {
- if (GV->hasAvailableExternallyLinkage())
- // If the associated variable is available_externally, some other TU
+ if (GV->isDeclarationForLinker())
+ // If the associated variable is not defined in this module
+ // (it might be available_externally, or have been an
+ // available_externally definition that was dropped by the
+ // EliminateAvailableExternally pass), some other TU
// will provide its dynamic initializer.
continue;
@@ -1931,7 +2035,6 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
}
-
/// isRepeatedByteSequence - Determine whether the given value is
/// composed of a repeated sequence of identical bytes and return the
/// byte value. If it is not a repeated sequence, return -1.
@@ -1972,7 +2075,6 @@ static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
static void emitGlobalConstantDataSequential(const DataLayout &DL,
const ConstantDataSequential *CDS,
AsmPrinter &AP) {
-
// See if we can aggregate this into a .fill, if so, emit it as such.
int Value = isRepeatedByteSequence(CDS, DL);
if (Value != -1) {
@@ -2006,7 +2108,6 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL,
CDS->getNumElements();
if (unsigned Padding = Size - EmittedSize)
AP.OutStreamer->EmitZeros(Padding);
-
}
static void emitGlobalConstantArray(const DataLayout &DL,
@@ -2420,8 +2521,6 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
return OutContext.getOrCreateSymbol(NameStr);
}
-
-
/// PrintParentLoopComment - Print comments about parent loops of this one.
static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
@@ -2486,7 +2585,6 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
}
-
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
@@ -2607,8 +2705,6 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return true;
}
-
-
GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
if (!S.usesMetadata())
return nullptr;
@@ -2639,7 +2735,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
}
/// Pin vtable to this file.
-AsmPrinterHandler::~AsmPrinterHandler() {}
+AsmPrinterHandler::~AsmPrinterHandler() = default;
void AsmPrinterHandler::markFunctionEnd() {}
@@ -2702,8 +2798,11 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
SledKind Kind) {
auto Fn = MI.getParent()->getParent()->getFunction();
auto Attr = Fn->getFnAttribute("function-instrument");
+ bool LogArgs = Fn->hasFnAttribute("xray-log-args");
bool AlwaysInstrument =
Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+ if (Kind == SledKind::FUNCTION_ENTER && LogArgs)
+ Kind = SledKind::LOG_ARGS_ENTER;
Sleds.emplace_back(
XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 57864e4e4d4f..683e622e3d53 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -40,25 +40,24 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-namespace {
- struct SrcMgrDiagInfo {
- const MDNode *LocInfo;
- LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
- void *DiagContext;
- };
-}
-
/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an
/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
/// struct above.
static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
- SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ AsmPrinter::SrcMgrDiagInfo *DiagInfo =
+ static_cast<AsmPrinter::SrcMgrDiagInfo *>(diagInfo);
assert(DiagInfo && "Diagnostic context not passed down?");
+ // Look up a LocInfo for the buffer this diagnostic is coming from.
+ unsigned BufNum = DiagInfo->SrcMgr.FindBufferContainingLoc(Diag.getLoc());
+ const MDNode *LocInfo = nullptr;
+ if (BufNum > 0 && BufNum <= DiagInfo->LocInfos.size())
+ LocInfo = DiagInfo->LocInfos[BufNum-1];
+
// If the inline asm had metadata associated with it, pull out a location
// cookie corresponding to which line the error occurred on.
unsigned LocCookie = 0;
- if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ if (LocInfo) {
unsigned ErrorLine = Diag.getLineNo()-1;
if (ErrorLine >= LocInfo->getNumOperands())
ErrorLine = 0;
@@ -99,35 +98,39 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
return;
}
- SourceMgr SrcMgr;
- SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+ if (!DiagInfo) {
+ DiagInfo = make_unique<SrcMgrDiagInfo>();
- SrcMgrDiagInfo DiagInfo;
-
- // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
- LLVMContext &LLVMCtx = MMI->getModule()->getContext();
- bool HasDiagHandler = false;
- if (LLVMCtx.getInlineAsmDiagnosticHandler() != nullptr) {
- // If the source manager has an issue, we arrange for srcMgrDiagHandler
- // to be invoked, getting DiagInfo passed into it.
- DiagInfo.LocInfo = LocMDNode;
- DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
- DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
- SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo);
- HasDiagHandler = true;
+ MCContext &Context = MMI->getContext();
+ Context.setInlineSourceManager(&DiagInfo->SrcMgr);
+
+ LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+ if (LLVMCtx.getInlineAsmDiagnosticHandler()) {
+ DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get());
+ }
}
+ SourceMgr &SrcMgr = DiagInfo->SrcMgr;
+ SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths);
+
std::unique_ptr<MemoryBuffer> Buffer;
- if (isNullTerminated)
- Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
- else
- Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+ // The inline asm source manager will outlive Str, so make a copy of the
+ // string for SourceMgr to own.
+ Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
- SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+ unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+
+ // Store LocMDNode in DiagInfo, using BufNum as an identifier.
+ if (LocMDNode) {
+ DiagInfo->LocInfos.resize(BufNum);
+ DiagInfo->LocInfos[BufNum-1] = LocMDNode;
+ }
std::unique_ptr<MCAsmParser> Parser(
- createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
+ createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum));
// We create a new MCInstrInfo here since we might be at the module level
// and not have a MachineFunction to initialize the TargetInstrInfo from and
@@ -151,7 +154,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
int Res = Parser->Run(/*NoInitialTextSection*/ true,
/*NoFinalize*/ true);
emitInlineAsmEnd(STI, &TAP->getSTI());
- if (Res && !HasDiagHandler)
+
+ if (Res && !DiagInfo->DiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 83440513225c..383b8cddb1a0 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -23,13 +23,13 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
#include "llvm/IR/Constants.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -38,7 +38,6 @@
using namespace llvm;
using namespace llvm::codeview;
-using namespace llvm::msf;
CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
: DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(),
@@ -495,9 +494,9 @@ void CodeViewDebug::emitTypeInformation() {
// comments. The MSVC linker doesn't do much type record validation,
// so the first link of an invalid type record can succeed while
// subsequent links will fail with LNK1285.
- ByteStream Stream(Record);
+ BinaryByteStream Stream(Record, llvm::support::little);
CVTypeArray Types;
- StreamReader Reader(Stream);
+ BinaryStreamReader Reader(Stream);
Error E = Reader.readArray(Types, Reader.getLength());
if (!E) {
TypeVisitorCallbacks C;
@@ -948,10 +947,10 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
// Handle fragments.
auto Fragment = DIExpr->getFragmentInfo();
- if (DIExpr && Fragment) {
+ if (Fragment) {
IsSubfield = true;
StructOffset = Fragment->OffsetInBits / 8;
- } else if (DIExpr && DIExpr->getNumElements() > 0) {
+ } else if (DIExpr->getNumElements() > 0) {
continue; // Ignore unrecognized exprs.
}
@@ -1014,14 +1013,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
}
}
-void CodeViewDebug::beginFunction(const MachineFunction *MF) {
- assert(!CurFn && "Can't process two functions at once!");
-
- if (!Asm || !MMI->hasDebugInfo() || !MF->getFunction()->getSubprogram())
- return;
-
- DebugHandlerBase::beginFunction(MF);
-
+void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
const Function *GV = MF->getFunction();
assert(FnDebugInfo.count(GV) == false);
CurFn = &FnDebugInfo[GV];
@@ -1150,27 +1142,6 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
-
- // We want to assert that the element type multiplied by the array lengths
- // match the size of the overall array. However, if we don't have complete
- // type information for the base type, we can't make this assertion. This
- // happens if limited debug info is enabled in this case:
- // struct VTableOptzn { VTableOptzn(); virtual ~VTableOptzn(); };
- // VTableOptzn array[3];
- // The DICompositeType of VTableOptzn will have size zero, and the array will
- // have size 3 * sizeof(void*), and we should avoid asserting.
- //
- // There is a related bug in the front-end where an array of a structure,
- // which was declared as incomplete structure first, ends up not getting a
- // size assigned to it. (PR28303)
- // Example:
- // struct A(*p)[3];
- // struct A { int f; } a[3];
- bool PartiallyIncomplete = false;
- if (Ty->getSizeInBits() == 0 || ElementSize == 0) {
- PartiallyIncomplete = true;
- }
-
// Add subranges to array type.
DINodeArray Elements = Ty->getElements();
for (int i = Elements.size() - 1; i >= 0; --i) {
@@ -1185,16 +1156,14 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
// Variable Length Array (VLA) has Count equal to '-1'.
// Replace with Count '1', assume it is the minimum VLA length.
// FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
- if (Count == -1) {
+ if (Count == -1)
Count = 1;
- PartiallyIncomplete = true;
- }
// Update the element size and element type index for subsequent subranges.
ElementSize *= Count;
// If this is the outermost array, use the size from the array. It will be
- // more accurate if PartiallyIncomplete is true.
+ // more accurate if we had a VLA or an incomplete element type size.
uint64_t ArraySize =
(i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize;
@@ -1203,9 +1172,6 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
ElementTypeIndex = TypeTable.writeKnownType(AR);
}
- (void)PartiallyIncomplete;
- assert(PartiallyIncomplete || ElementSize == (Ty->getSizeInBits() / 8));
-
return ElementTypeIndex;
}
@@ -2115,18 +2081,13 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
}
}
-void CodeViewDebug::endFunction(const MachineFunction *MF) {
- if (!Asm || !CurFn) // We haven't created any debug info for this function.
- return;
-
+void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
const Function *GV = MF->getFunction();
assert(FnDebugInfo.count(GV));
assert(CurFn == &FnDebugInfo[GV]);
collectVariableInfo(GV->getSubprogram());
- DebugHandlerBase::endFunction(MF);
-
// Don't emit anything if we don't have any line tables.
if (!CurFn->HaveLineInfo) {
FnDebugInfo.erase(GV);
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 3dd4315e4c2f..343384c51772 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -299,6 +299,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
unsigned getPointerSizeInBytes();
+protected:
+ /// \brief Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// \brief Gather post-function debug information.
+ void endFunctionImpl(const MachineFunction *) override;
+
public:
CodeViewDebug(AsmPrinter *Asm);
@@ -307,12 +314,6 @@ public:
/// \brief Emit the COFF section that holds the line table information.
void endModule() override;
- /// \brief Gather pre-function debug information.
- void beginFunction(const MachineFunction *MF) override;
-
- /// \brief Gather post-function debug information.
- void endFunction(const MachineFunction *) override;
-
/// \brief Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
};
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 879918995472..b510e0ef36ac 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -42,6 +42,8 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
// overloads. Otherwise MSVC 2010 thinks this call is ambiguous.
ID.AddInteger(unsigned(Attribute));
ID.AddInteger(unsigned(Form));
+ if (Form == dwarf::DW_FORM_implicit_const)
+ ID.AddInteger(Value);
}
//===----------------------------------------------------------------------===//
@@ -107,13 +109,20 @@ void DIEAbbrev::print(raw_ostream &O) {
O << " "
<< dwarf::AttributeString(Data[i].getAttribute())
<< " "
- << dwarf::FormEncodingString(Data[i].getForm())
- << '\n';
+ << dwarf::FormEncodingString(Data[i].getForm());
+
+ if (Data[i].getForm() == dwarf::DW_FORM_implicit_const)
+ O << " " << Data[i].getValue();
+
+ O << '\n';
}
}
-LLVM_DUMP_METHOD
-void DIEAbbrev::dump() { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEAbbrev::dump() {
+ print(dbgs());
+}
+#endif
//===----------------------------------------------------------------------===//
// DIEAbbrevSet Implementation
@@ -249,10 +258,11 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
O << "\n";
}
-LLVM_DUMP_METHOD
-void DIE::dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIE::dump() {
print(dbgs());
}
+#endif
unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
DIEAbbrevSet &AbbrevSet,
@@ -340,10 +350,11 @@ void DIEValue::print(raw_ostream &O) const {
}
}
-LLVM_DUMP_METHOD
-void DIEValue::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void DIEValue::dump() const {
print(dbgs());
}
+#endif
//===----------------------------------------------------------------------===//
// DIEInteger Implementation
@@ -354,57 +365,42 @@ void DIEValue::dump() const {
void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
switch (Form) {
case dwarf::DW_FORM_implicit_const:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_flag_present:
// Emit something to keep the lines and comments in sync.
// FIXME: Is there a better way to do this?
Asm->OutStreamer->AddBlankLine();
return;
case dwarf::DW_FORM_flag:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref1:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data1:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
case dwarf::DW_FORM_ref2:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data2:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
case dwarf::DW_FORM_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref4:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data4:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
case dwarf::DW_FORM_ref8:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_sig8:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_data8:
- LLVM_FALLTHROUGH;
+ case dwarf::DW_FORM_ref_sup8:
case dwarf::DW_FORM_GNU_ref_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_strp_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_line_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_sec_offset:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp_sup:
- LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sup:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_addr:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_addr:
Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form));
return;
case dwarf::DW_FORM_GNU_str_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_addr_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_udata:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_udata:
Asm->EmitULEB128(Integer);
return;
@@ -419,35 +415,41 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
///
unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
switch (Form) {
- case dwarf::DW_FORM_implicit_const: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_flag_present: return 0;
- case dwarf::DW_FORM_flag: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref1: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data1: return sizeof(int8_t);
- case dwarf::DW_FORM_ref2: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data2: return sizeof(int16_t);
- case dwarf::DW_FORM_ref4: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data4: return sizeof(int32_t);
- case dwarf::DW_FORM_ref8: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sig8: LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_data8: return sizeof(int64_t);
+ case dwarf::DW_FORM_implicit_const:
+ case dwarf::DW_FORM_flag_present:
+ return 0;
+ case dwarf::DW_FORM_flag:
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
+ return sizeof(int8_t);
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
+ return sizeof(int16_t);
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
+ return sizeof(int32_t);
+ case dwarf::DW_FORM_ref8:
+ case dwarf::DW_FORM_ref_sig8:
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_ref_sup8:
+ return sizeof(int64_t);
case dwarf::DW_FORM_ref_addr:
if (AP->getDwarfVersion() == 2)
return AP->getPointerSize();
LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_ref_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_strp_alt:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_line_strp:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_sec_offset:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_strp_sup:
- LLVM_FALLTHROUGH;
- case dwarf::DW_FORM_ref_sup:
switch (AP->OutStreamer->getContext().getDwarfFormat()) {
case dwarf::DWARF32:
return 4;
@@ -456,11 +458,8 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
}
llvm_unreachable("Invalid DWARF format");
case dwarf::DW_FORM_GNU_str_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_GNU_addr_index:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_ref_udata:
- LLVM_FALLTHROUGH;
case dwarf::DW_FORM_udata:
return getULEB128Size(Integer);
case dwarf::DW_FORM_sdata:
@@ -484,7 +483,7 @@ void DIEInteger::print(raw_ostream &O) const {
/// EmitValue - Emit expression value.
///
void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
- AP->EmitDebugValue(Expr, SizeOf(AP, Form));
+ AP->EmitDebugThreadLocal(Expr, SizeOf(AP, Form));
}
/// SizeOf - Determine size of expression value in bytes.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index d8ecc7ccfb9b..8e3b88d0af0e 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -490,9 +490,9 @@ uint64_t DIEHash::computeCUSignature(const DIE &Die) {
Hash.final(Result);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
}
/// This is based on the type signature computation given in section 7.27 of the
@@ -514,7 +514,7 @@ uint64_t DIEHash::computeTypeSignature(const DIE &Die) {
Hash.final(Result);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- return support::endian::read64le(Result + 8);
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
+ return Result.high();
}
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 94190981e88e..1d63e33a4d33 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -115,12 +115,35 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
return getBaseTypeSize(BaseType);
}
+bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) {
+ if (!MMI->hasDebugInfo())
+ return false;
+ auto *SP = MF->getFunction()->getSubprogram();
+ if (!SP)
+ return false;
+ assert(SP->getUnit());
+ auto EK = SP->getUnit()->getEmissionKind();
+ if (EK == DICompileUnit::NoDebug)
+ return false;
+ return true;
+}
+
void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
+ assert(Asm);
+ PrevInstBB = nullptr;
+
+ if (!hasDebugInfo(MMI, MF)) {
+ skippedNonDebugFunction();
+ return;
+ }
+
// Grab the lexical scopes for the function, if we don't have any of those
// then we're not going to be able to do anything.
LScopes.initialize(*MF);
- if (LScopes.empty())
+ if (LScopes.empty()) {
+ beginFunctionImpl(MF);
return;
+ }
// Make sure that each lexical scope will have a begin/end label.
identifyScopeMarkers();
@@ -167,6 +190,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
PrevInstLoc = DebugLoc();
PrevLabel = Asm->getFunctionBegin();
+ beginFunctionImpl(MF);
}
void DebugHandlerBase::beginInstruction(const MachineInstr *MI) {
@@ -228,6 +252,8 @@ void DebugHandlerBase::endInstruction() {
}
void DebugHandlerBase::endFunction(const MachineFunction *MF) {
+ if (hasDebugInfo(MMI, MF))
+ endFunctionImpl(MF);
DbgValues.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index c00fa189d94a..659a921e1fc5 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -80,6 +80,10 @@ protected:
LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
}
+ virtual void beginFunctionImpl(const MachineFunction *MF) = 0;
+ virtual void endFunctionImpl(const MachineFunction *MF) = 0;
+ virtual void skippedNonDebugFunction() {}
+
// AsmPrinterHandler overrides.
public:
void beginInstruction(const MachineInstr *MI) override;
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 36fb1507ddc6..a68e8cc6b4b3 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -76,7 +76,8 @@ public:
const DIExpression *getExpression() const { return Expression; }
friend bool operator==(const Value &, const Value &);
friend bool operator<(const Value &, const Value &);
- void dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
if (isLocation()) {
llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
if (Loc.isIndirect())
@@ -90,6 +91,7 @@ public:
if (Expression)
Expression->dump();
}
+#endif
};
private:
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index d904372af589..a550ff2fb90f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,3 +1,16 @@
+//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for constructing a dwarf compile unit.
+//
+//===----------------------------------------------------------------------===//
+
#include "DwarfCompileUnit.h"
#include "DwarfExpression.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -129,67 +142,72 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
std::unique_ptr<DIEDwarfExpression> DwarfExpr;
- bool AllConstant = std::all_of(
- GlobalExprs.begin(), GlobalExprs.end(),
- [&](const GlobalExpr GE) {
- return GE.Expr && GE.Expr->isConstant();
- });
-
for (const auto &GE : GlobalExprs) {
const GlobalVariable *Global = GE.Var;
const DIExpression *Expr = GE.Expr;
+
// For compatibility with DWARF 3 and earlier,
// DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes
// DW_AT_const_value(X).
if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
+ addToAccelTable = true;
addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1));
- // We cannot describe the location of dllimport'd variables: the
- // computation of their address requires loads from the IAT.
- } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) {
- if (!Loc) {
- Loc = new (DIEValueAllocator) DIELoc;
- DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
- }
+ break;
+ }
+
+ // We cannot describe the location of dllimport'd variables: the
+ // computation of their address requires loads from the IAT.
+ if (Global && Global->hasDLLImportStorageClass())
+ continue;
+
+ // Nothing to describe without address or constant.
+ if (!Global && (!Expr || !Expr->isConstant()))
+ continue;
+
+ if (!Loc) {
addToAccelTable = true;
- if (Global) {
- const MCSymbol *Sym = Asm->getSymbol(Global);
- if (Global->isThreadLocal()) {
- if (Asm->TM.Options.EmulatedTLS) {
- // TODO: add debug info for emulated thread local mode.
- } else {
- // FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
- // Based on GCC's support for TLS:
- if (!DD->useSplitDwarf()) {
- // 1) Start with a constNu of the appropriate pointer size
- addUInt(*Loc, dwarf::DW_FORM_data1,
- PointerSize == 4 ? dwarf::DW_OP_const4u
- : dwarf::DW_OP_const8u);
- // 2) containing the (relocated) offset of the TLS variable
- // within the module's TLS block.
- addExpr(*Loc, dwarf::DW_FORM_udata,
- Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
- } else {
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(*Loc, dwarf::DW_FORM_udata,
- DD->getAddressPool().getIndex(Sym, /* TLS */ true));
- }
- // 3) followed by an OP to make the debugger do a TLS lookup.
+ Loc = new (DIEValueAllocator) DIELoc;
+ DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+ }
+
+ if (Global) {
+ const MCSymbol *Sym = Asm->getSymbol(Global);
+ if (Global->isThreadLocal()) {
+ if (Asm->TM.Options.EmulatedTLS) {
+ // TODO: add debug info for emulated thread local mode.
+ } else {
+ // FIXME: Make this work with -gsplit-dwarf.
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
+ // Based on GCC's support for TLS:
+ if (!DD->useSplitDwarf()) {
+ // 1) Start with a constNu of the appropriate pointer size
addUInt(*Loc, dwarf::DW_FORM_data1,
- DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
- : dwarf::DW_OP_form_tls_address);
+ PointerSize == 4 ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // 2) containing the (relocated) offset of the TLS variable
+ // within the module's TLS block.
+ addExpr(*Loc, dwarf::DW_FORM_udata,
+ Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+ } else {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
}
- } else {
- DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(*Loc, Sym);
+ // 3) followed by an OP to make the debugger do a TLS lookup.
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+ : dwarf::DW_OP_form_tls_address);
}
+ } else {
+ DD->addArangeLabel(SymbolCU(this, Sym));
+ addOpAddress(*Loc, Sym);
}
- if (Expr) {
- DwarfExpr->addFragmentOffset(Expr);
- DwarfExpr->AddExpression(Expr);
- }
+ }
+ if (Expr) {
+ DwarfExpr->addFragmentOffset(Expr);
+ DwarfExpr->addExpression(Expr);
}
}
if (Loc)
@@ -507,8 +525,8 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addFragmentOffset(Expr);
- DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
- DwarfExpr.AddExpression(Expr);
+ DwarfExpr.addUnsignedConstant(DVInsn->getOperand(0).getImm());
+ DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
} else
addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
@@ -532,9 +550,15 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Fragment.Expr);
- DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- FrameReg, Offset);
- DwarfExpr.AddExpression(Fragment.Expr);
+ SmallVector<uint64_t, 8> Ops;
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Offset);
+ Ops.push_back(dwarf::DW_OP_deref);
+ Ops.append(Fragment.Expr->elements_begin(), Fragment.Expr->elements_end());
+ DIExpressionCursor Expr(Ops);
+ DwarfExpr.addMachineRegExpression(
+ *Asm->MF->getSubtarget().getRegisterInfo(), Expr, FrameReg);
+ DwarfExpr.addExpression(std::move(Expr));
}
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
@@ -690,11 +714,14 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) {
Asm->OutStreamer->EmitLabel(LabelBegin);
}
- DwarfUnit::emitHeader(UseOffsets);
+ dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile
+ : DD->useSplitDwarf() ? dwarf::DW_UT_skeleton
+ : dwarf::DW_UT_compile;
+ DwarfUnit::emitCommonHeader(UseOffsets, UT);
}
/// addGlobalName - Add a new global name to the compile unit.
-void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
+void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die,
const DIScope *Context) {
if (includeMinimalInlineScopes())
return;
@@ -702,6 +729,18 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die,
GlobalNames[FullName] = &Die;
}
+void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name,
+ const DIScope *Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Name.str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalNames.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
/// Add a new global type to the unit.
void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) {
@@ -711,6 +750,18 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die,
GlobalTypes[FullName] = &Die;
}
+void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
+ const DIScope *Context) {
+ if (includeMinimalInlineScopes())
+ return;
+ std::string FullName = getParentContextString(Context) + Ty->getName().str();
+ // Insert, allowing the entry to remain as-is if it's already present
+ // This way the CU-level type DIE is preferred over the "can't describe this
+ // type as a unit offset because it's not really in the CU at all, it's only
+ // in a type unit"
+ GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
+}
+
/// addVariableAddress - Add DW_AT_location attribute for a
/// DbgVariable based on provided MachineLocation.
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
@@ -727,22 +778,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- DIEDwarfExpression Expr(*Asm, *this, *Loc);
-
- bool validReg;
- if (Location.isReg())
- validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg());
- else
- validReg =
- Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg(), Location.getOffset());
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- if (!validReg)
+ SmallVector<uint64_t, 8> Ops;
+ if (Location.isIndirect()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
+ DIExpressionCursor Cursor(Ops);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Expr.finalize());
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Start with the address based on the location provided, and generate the
@@ -754,23 +805,24 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
- const DIExpression *Expr = DV.getSingleExpression();
- DIExpressionCursor ExprCursor(Expr);
+ const DIExpression *DIExpr = DV.getSingleExpression();
+ DwarfExpr.addFragmentOffset(DIExpr);
+
+ SmallVector<uint64_t, 8> Ops;
+ if (Location.isIndirect()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ DIExpressionCursor Cursor(Ops);
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
- auto Reg = Location.getReg();
- DwarfExpr.addFragmentOffset(Expr);
- bool ValidReg =
- Location.getOffset()
- ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset())
- : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg);
-
- if (!ValidReg)
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
-
- DwarfExpr.AddExpression(std::move(ExprCursor));
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Add a Dwarf loclistptr attribute data and value.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index a8025f1d1521..9a64b4b76b06 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -210,12 +210,19 @@ public:
}
/// Add a new global name to the compile unit.
- void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override;
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+
+ /// Add a new global name present in a type unit to this compile unit.
+ void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context);
/// Add a new global type to the compile unit.
void addGlobalType(const DIType *Ty, const DIE &Die,
const DIScope *Context) override;
+ /// Add a new global type present in a type unit to this compile unit.
+ void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context);
+
const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 91a3d0989cc5..5ce111309208 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -39,7 +39,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
@@ -127,17 +126,17 @@ static const char *const DWARFGroupDescription = "DWARF Emission";
static const char *const DbgTimerName = "writer";
static const char *const DbgTimerDescription = "DWARF Debug Writer";
-void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
+void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
BS.EmitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
-void DebugLocDwarfExpression::EmitSigned(int64_t Value) {
+void DebugLocDwarfExpression::emitSigned(int64_t Value) {
BS.EmitSLEB128(Value, Twine(Value));
}
-void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) {
+void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
BS.EmitULEB128(Value, Twine(Value));
}
@@ -200,6 +199,12 @@ const DIType *DbgVariable::getType() const {
}
ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
+ if (FrameIndexExprs.size() == 1)
+ return FrameIndexExprs;
+
+ assert(all_of(FrameIndexExprs,
+ [](const FrameIndexExpr &A) { return A.Expr->isFragment(); }) &&
+ "multiple FI expressions without DW_OP_LLVM_fragment");
std::sort(FrameIndexExprs.begin(), FrameIndexExprs.end(),
[](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool {
return A.Expr->getFragmentInfo()->OffsetInBits <
@@ -418,7 +423,14 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
Asm->OutStreamer->getContext().setMCLineTableCompilationDir(
NewCU.getUniqueID(), CompilationDir);
- NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer());
+ StringRef Producer = DIUnit->getProducer();
+ StringRef Flags = DIUnit->getFlags();
+ if (!Flags.empty()) {
+ std::string ProducerWithFlags = Producer.str() + " " + Flags.str();
+ NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags);
+ } else
+ NewCU.addString(Die, dwarf::DW_AT_producer, Producer);
+
NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
DIUnit->getSourceLanguage());
NewCU.addString(Die, dwarf::DW_AT_name, FN);
@@ -544,7 +556,6 @@ void DwarfDebug::beginModule() {
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
if (DIType *RT = dyn_cast<DIType>(Ty))
- if (!RT->isExternalTypeRef())
// There is no point in force-emitting a forward declaration.
CU.getOrCreateTypeDIE(RT);
}
@@ -740,6 +751,7 @@ DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) {
void DwarfDebug::createAbstractVariable(const DILocalVariable *Var,
LexicalScope *Scope) {
+ assert(Scope && Scope->isAbstractScope());
auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr);
InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get());
AbstractVariables[Var] = std::move(AbsDbgVariable);
@@ -1137,20 +1149,9 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
-void DwarfDebug::beginFunction(const MachineFunction *MF) {
+void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn = MF;
- // If there's no debug info for the function we're not going to do anything.
- if (!MMI->hasDebugInfo())
- return;
-
- auto DI = MF->getFunction()->getSubprogram();
- if (!DI)
- return;
-
- // Grab the lexical scopes for the function, if we don't have any of those
- // then we're not going to be able to do anything.
- DebugHandlerBase::beginFunction(MF);
if (LScopes.empty())
return;
@@ -1189,23 +1190,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
}
+void DwarfDebug::skippedNonDebugFunction() {
+ // If we don't have a subprogram for this function then there will be a hole
+ // in the range information. Keep note of this by setting the previously used
+ // section to nullptr.
+ PrevCU = nullptr;
+ CurFn = nullptr;
+}
+
// Gather and emit post-function debug information.
-void DwarfDebug::endFunction(const MachineFunction *MF) {
+void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
+ const DISubprogram *SP = MF->getFunction()->getSubprogram();
+
assert(CurFn == MF &&
"endFunction should be called with the same function as beginFunction");
- const DISubprogram *SP = MF->getFunction()->getSubprogram();
- if (!MMI->hasDebugInfo() || !SP ||
- SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
- // If we don't have a subprogram for this function then there will be a hole
- // in the range information. Keep note of this by setting the previously
- // used section to nullptr.
- PrevCU = nullptr;
- CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
- return;
- }
-
// Set DwarfDwarfCompileUnitID in MCContext to default value.
Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
@@ -1220,17 +1219,14 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
// Under -gmlt, skip building the subprogram if there are no inlined
- // subroutines inside it.
- if (TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
+ // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
+ // is still needed as we need its source location.
+ if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
+ TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
assert(InfoHolder.getScopeVariables().empty());
- assert(DbgValues.empty());
- // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed
- // by a -gmlt CU. Add a test and remove this assertion.
- assert(AbstractVariables.empty());
PrevLabel = nullptr;
CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
return;
}
@@ -1266,7 +1262,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
InfoHolder.getScopeVariables().clear();
PrevLabel = nullptr;
CurFn = nullptr;
- DebugHandlerBase::endFunction(MF);
}
// Register a source line with debug info. Returns the unique label that was
@@ -1361,6 +1356,18 @@ void DwarfDebug::emitAccelTypes() {
/// computeIndexValue - Compute the gdb index value for the DIE and CU.
static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
const DIE *Die) {
+ // Entities that ended up only in a Type Unit reference the CU instead (since
+ // the pub entry has offsets within the CU there's no real offset that can be
+ // provided anyway). As it happens all such entities (namespaces and types,
+ // types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out
+ // not to be true it would be necessary to persist this information from the
+ // point at which the entry is added to the index data structure - since by
+ // the time the index is built from that, the original type/namespace DIE in a
+ // type unit has already been destroyed so it can't be queried for properties
+ // like tag, etc.
+ if (Die->getTag() == dwarf::DW_TAG_compile_unit)
+ return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE,
+ dwarf::GIEL_EXTERNAL);
dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC;
// We could have a specification DIE that has our most of our knowledge,
@@ -1498,27 +1505,37 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
ByteStreamer &Streamer,
const DebugLocEntry::Value &Value,
DwarfExpression &DwarfExpr) {
- DIExpressionCursor ExprCursor(Value.getExpression());
- DwarfExpr.addFragmentOffset(Value.getExpression());
+ auto *DIExpr = Value.getExpression();
+ DIExpressionCursor ExprCursor(DIExpr);
+ DwarfExpr.addFragmentOffset(DIExpr);
// Regular entry.
if (Value.isInt()) {
if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
BT->getEncoding() == dwarf::DW_ATE_signed_char))
- DwarfExpr.AddSignedConstant(Value.getInt());
+ DwarfExpr.addSignedConstant(Value.getInt());
else
- DwarfExpr.AddUnsignedConstant(Value.getInt());
+ DwarfExpr.addUnsignedConstant(Value.getInt());
} else if (Value.isLocation()) {
- MachineLocation Loc = Value.getLoc();
+ MachineLocation Location = Value.getLoc();
+
+ SmallVector<uint64_t, 8> Ops;
+ // FIXME: Should this condition be Location.isIndirect() instead?
+ if (Location.getOffset()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ DIExpressionCursor Cursor(Ops);
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
- if (Loc.getOffset())
- DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
- else
- DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg());
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ return DwarfExpr.addExpression(std::move(Cursor));
} else if (Value.isConstantFP()) {
APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
- DwarfExpr.AddUnsignedConstant(RawBytes);
+ DwarfExpr.addUnsignedConstant(RawBytes);
}
- DwarfExpr.AddExpression(std::move(ExprCursor));
+ DwarfExpr.addExpression(std::move(ExprCursor));
}
void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -1940,11 +1957,11 @@ uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
MD5 Hash;
Hash.update(Identifier);
// ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
+ // implementation always returns its results in little endian, so we actually
+ // need the "high" word.
MD5::MD5Result Result;
Hash.final(Result);
- return support::endian::read64le(Result + 8);
+ return Result.high();
}
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 253e3f06200e..8a96e7867b6e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -89,7 +89,7 @@ public:
assert(!MInsn && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
- assert(~FI && "Expected valid index");
+ assert(FI != INT_MAX && "Expected valid index");
FrameIndexExprs.push_back({FI, E});
}
@@ -448,6 +448,15 @@ class DwarfDebug : public DebugHandlerBase {
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P);
+protected:
+ /// Gather pre-function debug information.
+ void beginFunctionImpl(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function debug information.
+ void endFunctionImpl(const MachineFunction *MF) override;
+
+ void skippedNonDebugFunction() override;
+
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -463,12 +472,6 @@ public:
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
- /// Gather pre-function debug information.
- void beginFunction(const MachineFunction *MF) override;
-
- /// Gather and emit post-function debug information.
- void endFunction(const MachineFunction *MF) override;
-
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 61b2c7e65842..debe88f3b1ee 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -22,79 +22,76 @@
using namespace llvm;
-void DwarfExpression::AddReg(int DwarfReg, const char *Comment) {
+void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
if (DwarfReg < 32) {
- EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
} else {
- EmitOp(dwarf::DW_OP_regx, Comment);
- EmitUnsigned(DwarfReg);
+ emitOp(dwarf::DW_OP_regx, Comment);
+ emitUnsigned(DwarfReg);
}
}
-void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
+void DwarfExpression::addBReg(int DwarfReg, int Offset) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
if (DwarfReg < 32) {
- EmitOp(dwarf::DW_OP_breg0 + DwarfReg);
+ emitOp(dwarf::DW_OP_breg0 + DwarfReg);
} else {
- EmitOp(dwarf::DW_OP_bregx);
- EmitUnsigned(DwarfReg);
+ emitOp(dwarf::DW_OP_bregx);
+ emitUnsigned(DwarfReg);
}
- EmitSigned(Offset);
- if (Deref)
- EmitOp(dwarf::DW_OP_deref);
+ emitSigned(Offset);
}
-void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+void DwarfExpression::addFBReg(int Offset) {
+ emitOp(dwarf::DW_OP_fbreg);
+ emitSigned(Offset);
+}
+
+void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
if (!SizeInBits)
return;
const unsigned SizeOfByte = 8;
if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
- EmitOp(dwarf::DW_OP_bit_piece);
- EmitUnsigned(SizeInBits);
- EmitUnsigned(OffsetInBits);
+ emitOp(dwarf::DW_OP_bit_piece);
+ emitUnsigned(SizeInBits);
+ emitUnsigned(OffsetInBits);
} else {
- EmitOp(dwarf::DW_OP_piece);
+ emitOp(dwarf::DW_OP_piece);
unsigned ByteSize = SizeInBits / SizeOfByte;
- EmitUnsigned(ByteSize);
+ emitUnsigned(ByteSize);
}
this->OffsetInBits += SizeInBits;
}
-void DwarfExpression::AddShr(unsigned ShiftBy) {
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(ShiftBy);
- EmitOp(dwarf::DW_OP_shr);
+void DwarfExpression::addShr(unsigned ShiftBy) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(ShiftBy);
+ emitOp(dwarf::DW_OP_shr);
}
-bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
- unsigned MachineReg, int Offset) {
- if (isFrameRegister(TRI, MachineReg)) {
- // If variable offset is based in frame register then use fbreg.
- EmitOp(dwarf::DW_OP_fbreg);
- EmitSigned(Offset);
- return true;
- }
-
- int DwarfReg = TRI.getDwarfRegNum(MachineReg, false);
- if (DwarfReg < 0)
- return false;
-
- AddRegIndirect(DwarfReg, Offset);
- return true;
+void DwarfExpression::addAnd(unsigned Mask) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Mask);
+ emitOp(dwarf::DW_OP_and);
}
-bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
+bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned MachineReg, unsigned MaxSize) {
- if (!TRI.isPhysicalRegister(MachineReg))
+ if (!TRI.isPhysicalRegister(MachineReg)) {
+ if (isFrameRegister(TRI, MachineReg)) {
+ DwarfRegs.push_back({-1, 0, nullptr});
+ return true;
+ }
return false;
+ }
int Reg = TRI.getDwarfRegNum(MachineReg, false);
// If this is a valid register number, emit it.
if (Reg >= 0) {
- AddReg(Reg);
+ DwarfRegs.push_back({Reg, 0, nullptr});
return true;
}
@@ -106,7 +103,7 @@ bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
- AddReg(Reg, "super-register");
+ DwarfRegs.push_back({Reg, 0, "super-register"});
// Use a DW_OP_bit_piece to describe the sub-register.
setSubRegisterPiece(Size, RegOffset);
return true;
@@ -136,72 +133,101 @@ bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
// If this sub-register has a DWARF number and we haven't covered
// its range, emit a DWARF piece for it.
if (Reg >= 0 && Intersection.any()) {
- AddReg(Reg, "sub-register");
+ // Emit a piece for any gap in the coverage.
+ if (Offset > CurPos)
+ DwarfRegs.push_back({-1, Offset - CurPos, nullptr});
+ DwarfRegs.push_back(
+ {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
if (Offset >= MaxSize)
break;
- // Emit a piece for the any gap in the coverage.
- if (Offset > CurPos)
- AddOpPiece(Offset - CurPos);
- AddOpPiece(std::min<unsigned>(Size, MaxSize - Offset));
- CurPos = Offset + Size;
// Mark it as emitted.
Coverage.set(Offset, Offset + Size);
+ CurPos = Offset + Size;
}
}
return CurPos;
}
-void DwarfExpression::AddStackValue() {
+void DwarfExpression::addStackValue() {
if (DwarfVersion >= 4)
- EmitOp(dwarf::DW_OP_stack_value);
+ emitOp(dwarf::DW_OP_stack_value);
}
-void DwarfExpression::AddSignedConstant(int64_t Value) {
- EmitOp(dwarf::DW_OP_consts);
- EmitSigned(Value);
- AddStackValue();
+void DwarfExpression::addSignedConstant(int64_t Value) {
+ emitOp(dwarf::DW_OP_consts);
+ emitSigned(Value);
+ addStackValue();
}
-void DwarfExpression::AddUnsignedConstant(uint64_t Value) {
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Value);
- AddStackValue();
+void DwarfExpression::addUnsignedConstant(uint64_t Value) {
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Value);
+ addStackValue();
}
-void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
+void DwarfExpression::addUnsignedConstant(const APInt &Value) {
unsigned Size = Value.getBitWidth();
const uint64_t *Data = Value.getRawData();
// Chop it up into 64-bit pieces, because that's the maximum that
- // AddUnsignedConstant takes.
+ // addUnsignedConstant takes.
unsigned Offset = 0;
while (Offset < Size) {
- AddUnsignedConstant(*Data++);
+ addUnsignedConstant(*Data++);
if (Offset == 0 && Size <= 64)
break;
- AddOpPiece(std::min(Size-Offset, 64u), Offset);
+ addOpPiece(std::min(Size-Offset, 64u), Offset);
Offset += 64;
}
}
-bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
+bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &ExprCursor,
unsigned MachineReg,
unsigned FragmentOffsetInBits) {
- if (!ExprCursor)
- return AddMachineReg(TRI, MachineReg);
+ auto Fragment = ExprCursor.getFragmentInfo();
+ if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U))
+ return false;
- // Pattern-match combinations for which more efficient representations exist
- // first.
- bool ValidReg = false;
+ bool HasComplexExpression = false;
auto Op = ExprCursor.peek();
+ if (Op && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
+ HasComplexExpression = true;
+
+ // If the register can only be described by a complex expression (i.e.,
+ // multiple subregisters) it doesn't safely compose with another complex
+ // expression. For example, it is not possible to apply a DW_OP_deref
+ // operation to multiple DW_OP_pieces.
+ if (HasComplexExpression && DwarfRegs.size() > 1) {
+ DwarfRegs.clear();
+ return false;
+ }
+
+ // Handle simple register locations.
+ if (!HasComplexExpression) {
+ for (auto &Reg : DwarfRegs) {
+ if (Reg.DwarfRegNo >= 0)
+ addReg(Reg.DwarfRegNo, Reg.Comment);
+ addOpPiece(Reg.Size);
+ }
+ DwarfRegs.clear();
+ return true;
+ }
+
+ assert(DwarfRegs.size() == 1);
+ auto Reg = DwarfRegs[0];
+ bool FBReg = isFrameRegister(TRI, MachineReg);
+ assert(Reg.Size == 0 && "subregister has same size as superregister");
+
+ // Pattern-match combinations for which more efficient representations exist.
switch (Op->getOp()) {
default: {
- auto Fragment = ExprCursor.getFragmentInfo();
- ValidReg = AddMachineReg(TRI, MachineReg,
- Fragment ? Fragment->SizeInBits : ~1U);
+ if (FBReg)
+ addFBReg(0);
+ else
+ addReg(Reg.DwarfRegNo, 0);
break;
}
case dwarf::DW_OP_plus:
@@ -210,28 +236,42 @@ bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
// [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
auto N = ExprCursor.peekNext();
if (N && N->getOp() == dwarf::DW_OP_deref) {
- unsigned Offset = Op->getArg(0);
- ValidReg = AddMachineRegIndirect(
- TRI, MachineReg, Op->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
+ int Offset = Op->getArg(0);
+ int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
+ if (FBReg)
+ addFBReg(SignedOffset);
+ else
+ addBReg(Reg.DwarfRegNo, SignedOffset);
+
ExprCursor.consume(2);
- } else
- ValidReg = AddMachineReg(TRI, MachineReg);
+ break;
+ }
+ addReg(Reg.DwarfRegNo, 0);
break;
}
case dwarf::DW_OP_deref:
// [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
- ValidReg = AddMachineRegIndirect(TRI, MachineReg);
+ if (FBReg)
+ addFBReg(0);
+ else
+ addBReg(Reg.DwarfRegNo, 0);
ExprCursor.take();
break;
}
-
- return ValidReg;
+ DwarfRegs.clear();
+ return true;
}
-void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
+void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
unsigned FragmentOffsetInBits) {
while (ExprCursor) {
auto Op = ExprCursor.take();
+
+ // If we need to mask out a subregister, do it now, unless the next
+ // operation would emit an OpPiece anyway.
+ if (SubRegisterSizeInBits && Op->getOp() != dwarf::DW_OP_LLVM_fragment)
+ maskSubRegister();
+
switch (Op->getOp()) {
case dwarf::DW_OP_LLVM_fragment: {
unsigned SizeInBits = Op->getArg(1);
@@ -241,39 +281,45 @@ void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
// location.
assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
- // If \a AddMachineReg already emitted DW_OP_piece operations to represent
+ // If \a addMachineReg already emitted DW_OP_piece operations to represent
// a super-register by splicing together sub-registers, subtract the size
// of the pieces that was already emitted.
SizeInBits -= OffsetInBits - FragmentOffset;
- // If \a AddMachineReg requested a DW_OP_bit_piece to stencil out a
+ // If \a addMachineReg requested a DW_OP_bit_piece to stencil out a
// sub-register that is smaller than the current fragment's size, use it.
if (SubRegisterSizeInBits)
SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
- AddOpPiece(SizeInBits, SubRegisterOffsetInBits);
+ addOpPiece(SizeInBits, SubRegisterOffsetInBits);
setSubRegisterPiece(0, 0);
break;
}
case dwarf::DW_OP_plus:
- EmitOp(dwarf::DW_OP_plus_uconst);
- EmitUnsigned(Op->getArg(0));
+ emitOp(dwarf::DW_OP_plus_uconst);
+ emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_minus:
// There is no OP_minus_uconst.
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Op->getArg(0));
- EmitOp(dwarf::DW_OP_minus);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Op->getArg(0));
+ emitOp(dwarf::DW_OP_minus);
break;
case dwarf::DW_OP_deref:
- EmitOp(dwarf::DW_OP_deref);
+ emitOp(dwarf::DW_OP_deref);
break;
case dwarf::DW_OP_constu:
- EmitOp(dwarf::DW_OP_constu);
- EmitUnsigned(Op->getArg(0));
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(Op->getArg(0));
break;
case dwarf::DW_OP_stack_value:
- AddStackValue();
+ addStackValue();
+ break;
+ case dwarf::DW_OP_swap:
+ emitOp(dwarf::DW_OP_swap);
+ break;
+ case dwarf::DW_OP_xderef:
+ emitOp(dwarf::DW_OP_xderef);
break;
default:
llvm_unreachable("unhandled opcode found in expression");
@@ -281,9 +327,25 @@ void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
}
}
+/// add masking operations to stencil out a subregister.
+void DwarfExpression::maskSubRegister() {
+ assert(SubRegisterSizeInBits && "no subregister was registered");
+ if (SubRegisterOffsetInBits > 0)
+ addShr(SubRegisterOffsetInBits);
+ uint64_t Mask = (1ULL << (uint64_t)SubRegisterSizeInBits) - 1ULL;
+ addAnd(Mask);
+}
+
+
void DwarfExpression::finalize() {
- if (SubRegisterSizeInBits)
- AddOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
+ assert(DwarfRegs.size() == 0 && "dwarf registers not emitted");
+ // Emit any outstanding DW_OP_piece operations to mask out subregisters.
+ if (SubRegisterSizeInBits == 0)
+ return;
+ // Don't emit a DW_OP_piece for a subregister at offset 0.
+ if (SubRegisterOffsetInBits == 0)
+ return;
+ addOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
}
void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
@@ -294,6 +356,6 @@ void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
assert(FragmentOffset >= OffsetInBits &&
"overlapping or duplicate fragments");
if (FragmentOffset > OffsetInBits)
- AddOpPiece(FragmentOffset - OffsetInBits);
+ addOpPiece(FragmentOffset - OffsetInBits);
OffsetInBits = FragmentOffset;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index fd90fa05bc32..e8dc211eb3c2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -84,9 +84,19 @@ public:
/// entry.
class DwarfExpression {
protected:
- unsigned DwarfVersion;
+ /// Holds information about all subregisters comprising a register location.
+ struct Register {
+ int DwarfRegNo;
+ unsigned Size;
+ const char *Comment;
+ };
+
+ /// The register location, if any.
+ SmallVector<Register, 2> DwarfRegs;
+
/// Current Fragment Offset in Bits.
uint64_t OffsetInBits = 0;
+ unsigned DwarfVersion;
/// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
unsigned SubRegisterSizeInBits = 0;
@@ -99,35 +109,54 @@ protected:
SubRegisterOffsetInBits = OffsetInBits;
}
-public:
- DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
- virtual ~DwarfExpression() {};
-
- /// This needs to be called last to commit any pending changes.
- void finalize();
+ /// Add masking operations to stencil out a subregister.
+ void maskSubRegister();
/// Output a dwarf operand and an optional assembler comment.
- virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
+ virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0;
/// Emit a raw signed value.
- virtual void EmitSigned(int64_t Value) = 0;
+ virtual void emitSigned(int64_t Value) = 0;
/// Emit a raw unsigned value.
- virtual void EmitUnsigned(uint64_t Value) = 0;
+ virtual void emitUnsigned(uint64_t Value) = 0;
/// Return whether the given machine register is the frame register in the
/// current function.
virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
- /// Emit a dwarf register operation.
- void AddReg(int DwarfReg, const char *Comment = nullptr);
- /// Emit an (double-)indirect dwarf register operation.
- void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
+ /// Emit a DW_OP_reg operation.
+ void addReg(int DwarfReg, const char *Comment = nullptr);
+ /// Emit a DW_OP_breg operation.
+ void addBReg(int DwarfReg, int Offset);
+ /// Emit DW_OP_fbreg <Offset>.
+ void addFBReg(int Offset);
+
+ /// Emit a partial DWARF register operation.
+ ///
+ /// \param MachineReg The register number.
+ /// \param MaxSize If the register must be composed from
+ /// sub-registers this is an upper bound
+ /// for how many bits the emitted DW_OP_piece
+ /// may cover.
+ ///
+ /// If size and offset is zero an operation for the entire register is
+ /// emitted: Some targets do not provide a DWARF register number for every
+ /// register. If this is the case, this function will attempt to emit a DWARF
+ /// register by emitting a fragment of a super-register or by piecing together
+ /// multiple subregisters that alias the register.
+ ///
+ /// \return false if no DWARF register exists for MachineReg.
+ bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+ unsigned MaxSize = ~1U);
+
/// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
/// \param OffsetInBits This is an optional offset into the location that
/// is at the top of the DWARF stack.
- void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+ void addOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
- /// Emit a shift-right dwarf expression.
- void AddShr(unsigned ShiftBy);
+ /// Emit a shift-right dwarf operation.
+ void addShr(unsigned ShiftBy);
+ /// Emit a bitwise and dwarf operation.
+ void addAnd(unsigned Mask);
/// Emit a DW_OP_stack_value, if supported.
///
@@ -140,37 +169,21 @@ public:
/// constant value, so the producers and consumers started to rely on
/// heuristics to disambiguate the value vs. location status of the
/// expression. See PR21176 for more details.
- void AddStackValue();
+ void addStackValue();
- /// Emit an indirect dwarf register operation for the given machine register.
- /// \return false if no DWARF register exists for MachineReg.
- bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
- int Offset = 0);
+ ~DwarfExpression() = default;
+public:
+ DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
- /// Emit a partial DWARF register operation.
- ///
- /// \param MachineReg The register number.
- /// \param MaxSize If the register must be composed from
- /// sub-registers this is an upper bound
- /// for how many bits the emitted DW_OP_piece
- /// may cover.
- ///
- /// If size and offset is zero an operation for the entire register is
- /// emitted: Some targets do not provide a DWARF register number for every
- /// register. If this is the case, this function will attempt to emit a DWARF
- /// register by emitting a fragment of a super-register or by piecing together
- /// multiple subregisters that alias the register.
- ///
- /// \return false if no DWARF register exists for MachineReg.
- bool AddMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
- unsigned MaxSize = ~1U);
+ /// This needs to be called last to commit any pending changes.
+ void finalize();
/// Emit a signed constant.
- void AddSignedConstant(int64_t Value);
+ void addSignedConstant(int64_t Value);
/// Emit an unsigned constant.
- void AddUnsignedConstant(uint64_t Value);
+ void addUnsignedConstant(uint64_t Value);
/// Emit an unsigned constant.
- void AddUnsignedConstant(const APInt &Value);
+ void addUnsignedConstant(const APInt &Value);
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
@@ -181,7 +194,7 @@ public:
/// fragment inside the entire variable.
/// \return false if no DWARF register exists
/// for MachineReg.
- bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
+ bool addMachineRegExpression(const TargetRegisterInfo &TRI,
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
/// Emit all remaining operations in the DIExpressionCursor.
@@ -189,7 +202,7 @@ public:
/// \param FragmentOffsetInBits If this is one fragment out of multiple
/// locations, this is the offset of the
/// fragment inside the entire variable.
- void AddExpression(DIExpressionCursor &&Expr,
+ void addExpression(DIExpressionCursor &&Expr,
unsigned FragmentOffsetInBits = 0);
/// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
@@ -198,33 +211,32 @@ public:
};
/// DwarfExpression implementation for .debug_loc entries.
-class DebugLocDwarfExpression : public DwarfExpression {
+class DebugLocDwarfExpression final : public DwarfExpression {
ByteStreamer &BS;
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
+ bool isFrameRegister(const TargetRegisterInfo &TRI,
+ unsigned MachineReg) override;
public:
DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
: DwarfExpression(DwarfVersion), BS(BS) {}
-
- void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int64_t Value) override;
- void EmitUnsigned(uint64_t Value) override;
- bool isFrameRegister(const TargetRegisterInfo &TRI,
- unsigned MachineReg) override;
};
/// DwarfExpression implementation for singular DW_AT_location.
-class DIEDwarfExpression : public DwarfExpression {
+class DIEDwarfExpression final : public DwarfExpression {
const AsmPrinter &AP;
DwarfUnit &DU;
DIELoc &DIE;
-public:
- DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
- void EmitOp(uint8_t Op, const char *Comment = nullptr) override;
- void EmitSigned(int64_t Value) override;
- void EmitUnsigned(uint64_t Value) override;
+ void emitOp(uint8_t Op, const char *Comment = nullptr) override;
+ void emitSigned(int64_t Value) override;
+ void emitUnsigned(uint64_t Value) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
+public:
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
DIELoc *finalize() {
DwarfExpression::finalize();
return &DIE;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 2a866c071f59..bad5b09553cd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -54,15 +54,15 @@ DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
: DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
DIE(DIE) {}
-void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
+void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
-void DIEDwarfExpression::EmitSigned(int64_t Value) {
+void DIEDwarfExpression::emitSigned(int64_t Value) {
DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
-void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
+void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
}
@@ -98,25 +98,35 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
default:
break;
- case dwarf::DW_LANG_C89:
- case dwarf::DW_LANG_C99:
+ // The languages below have valid values in all DWARF versions.
case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
case dwarf::DW_LANG_C_plus_plus:
- case dwarf::DW_LANG_ObjC:
- case dwarf::DW_LANG_ObjC_plus_plus:
return 0;
case dwarf::DW_LANG_Fortran77:
case dwarf::DW_LANG_Fortran90:
- case dwarf::DW_LANG_Fortran95:
return 1;
- // The languages below have valid values only if the DWARF version >= 4.
+ // The languages below have valid values only if the DWARF version >= 3.
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_ObjC:
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ if (DD->getDwarfVersion() >= 3)
+ return 0;
+ break;
+
+ case dwarf::DW_LANG_Fortran95:
+ if (DD->getDwarfVersion() >= 3)
+ return 1;
+ break;
+
+ // Starting with DWARF v4, all defined languages have valid values.
+ case dwarf::DW_LANG_D:
case dwarf::DW_LANG_Java:
case dwarf::DW_LANG_Python:
case dwarf::DW_LANG_UPC:
- case dwarf::DW_LANG_D:
- if (dwarf::DWARF_VERSION >= 4)
+ if (DD->getDwarfVersion() >= 4)
return 0;
break;
@@ -127,31 +137,33 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
case dwarf::DW_LANG_Modula2:
case dwarf::DW_LANG_Pascal83:
case dwarf::DW_LANG_PLI:
- if (dwarf::DWARF_VERSION >= 4)
+ if (DD->getDwarfVersion() >= 4)
return 1;
break;
- // The languages below have valid values only if the DWARF version >= 5.
- case dwarf::DW_LANG_OpenCL:
- case dwarf::DW_LANG_Go:
- case dwarf::DW_LANG_Haskell:
+ // The languages below are new in DWARF v5.
+ case dwarf::DW_LANG_BLISS:
+ case dwarf::DW_LANG_C11:
case dwarf::DW_LANG_C_plus_plus_03:
case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ case dwarf::DW_LANG_Dylan:
+ case dwarf::DW_LANG_Go:
+ case dwarf::DW_LANG_Haskell:
case dwarf::DW_LANG_OCaml:
+ case dwarf::DW_LANG_OpenCL:
+ case dwarf::DW_LANG_RenderScript:
case dwarf::DW_LANG_Rust:
- case dwarf::DW_LANG_C11:
case dwarf::DW_LANG_Swift:
- case dwarf::DW_LANG_Dylan:
- case dwarf::DW_LANG_C_plus_plus_14:
- if (dwarf::DWARF_VERSION >= 5)
+ if (DD->getDwarfVersion() >= 5)
return 0;
break;
- case dwarf::DW_LANG_Modula3:
- case dwarf::DW_LANG_Julia:
case dwarf::DW_LANG_Fortran03:
case dwarf::DW_LANG_Fortran08:
- if (dwarf::DWARF_VERSION >= 5)
+ case dwarf::DW_LANG_Julia:
+ case dwarf::DW_LANG_Modula3:
+ if (DD->getDwarfVersion() >= 5)
return 1;
break;
}
@@ -285,13 +297,6 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) {
dwarf::DW_FORM_ref_sig8, DIEInteger(Signature));
}
-void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
- StringRef Identifier) {
- uint64_t Signature = DD->makeTypeSignature(Identifier);
- Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8,
- DIEInteger(Signature));
-}
-
void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry Entry) {
const DIEUnit *CU = Die.getUnit();
@@ -465,50 +470,47 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- SmallVector<uint64_t, 6> DIExpr;
- DIEDwarfExpression Expr(*Asm, *this, *Loc);
-
- bool validReg;
- if (Location.isReg())
- validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg());
- else
- validReg =
- Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
- Location.getReg(), Location.getOffset());
-
- if (!validReg)
- return;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ SmallVector<uint64_t, 9> Ops;
+ if (Location.isIndirect()) {
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Location.getOffset());
+ Ops.push_back(dwarf::DW_OP_deref);
+ }
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
if (isPointer)
- DIExpr.push_back(dwarf::DW_OP_deref);
+ Ops.push_back(dwarf::DW_OP_deref);
// Next add the offset for the '__forwarding' field:
// DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
// adding the offset if it's 0.
if (forwardingFieldOffset > 0) {
- DIExpr.push_back(dwarf::DW_OP_plus);
- DIExpr.push_back(forwardingFieldOffset);
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(forwardingFieldOffset);
}
// Now dereference the __forwarding field to get to the real __Block_byref
// struct: DW_OP_deref.
- DIExpr.push_back(dwarf::DW_OP_deref);
+ Ops.push_back(dwarf::DW_OP_deref);
// Now that we've got the real __Block_byref... struct, add the offset
// for the variable's field to get to the location of the actual variable:
// DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
if (varFieldOffset > 0) {
- DIExpr.push_back(dwarf::DW_OP_plus);
- DIExpr.push_back(varFieldOffset);
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(varFieldOffset);
}
- Expr.AddExpression(makeArrayRef(DIExpr));
- Expr.finalize();
+
+ DIExpressionCursor Cursor(Ops);
+ const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
+ if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
+ return;
+ DwarfExpr.addExpression(std::move(Cursor));
// Now attach the location information to the DIE.
- addBlock(Die, Attribute, Loc);
+ addBlock(Die, Attribute, DwarfExpr.finalize());
}
/// Return true if type encoding is unsigned.
@@ -672,7 +674,7 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
return getDIE(Context);
}
-DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
auto *Context = resolve(Ty->getScope());
DIE *ContextDIE = getOrCreateContextDIE(Context);
@@ -684,8 +686,7 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
- if (!Ty->isExternalTypeRef())
- updateAcceleratorTables(Context, Ty, TyDIE);
+ updateAcceleratorTables(Context, Ty, TyDIE);
return &TyDIE;
}
@@ -841,6 +842,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy->isForwardDecl())
addSourceLine(Buffer, DTy);
+
+ // If DWARF address space value is other than None, add it for pointer and
+ // reference types as DW_AT_address_class.
+ if (DTy->getDWARFAddressSpace() && (Tag == dwarf::DW_TAG_pointer_type ||
+ Tag == dwarf::DW_TAG_reference_type))
+ addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
+ DTy->getDWARFAddressSpace().getValue());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
@@ -892,13 +900,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
}
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
- if (CTy->isExternalTypeRef()) {
- StringRef Identifier = CTy->getIdentifier();
- assert(!Identifier.empty() && "external type ref without identifier");
- addFlag(Buffer, dwarf::DW_AT_declaration);
- return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier);
- }
-
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -1180,8 +1181,12 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
}
void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
- bool Minimal) {
- if (!Minimal)
+ bool SkipSPAttributes) {
+ // If -fdebug-info-for-profiling is enabled, need to emit the subprogram
+ // and its source location.
+ bool SkipSPSourceLocation = SkipSPAttributes &&
+ !CUNode->getDebugInfoForProfiling();
+ if (!SkipSPSourceLocation)
if (applySubprogramDefinitionAttributes(SP, SPDie))
return;
@@ -1189,12 +1194,13 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
+ if (!SkipSPSourceLocation)
+ addSourceLine(SPDie, SP);
+
// Skip the rest of the attributes under -gmlt to save space.
- if (Minimal)
+ if (SkipSPAttributes)
return;
- addSourceLine(SPDie, SP);
-
// Add the prototype if we have a prototype and we have a C like
// language.
uint16_t Language = getLanguage();
@@ -1526,18 +1532,27 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
return &StaticMemberDIE;
}
-void DwarfUnit::emitHeader(bool UseOffsets) {
+void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) {
// Emit size of content not including length itself
Asm->OutStreamer->AddComment("Length of Unit");
Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize());
Asm->OutStreamer->AddComment("DWARF version number");
- Asm->EmitInt16(DD->getDwarfVersion());
- Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
+ unsigned Version = DD->getDwarfVersion();
+ Asm->EmitInt16(Version);
+
+ // DWARF v5 reorders the address size and adds a unit type.
+ if (Version >= 5) {
+ Asm->OutStreamer->AddComment("DWARF Unit Type");
+ Asm->EmitInt8(UT);
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ }
// We share one abbreviations table across all units so it's always at the
// start of the section. Use a relocatable offset where needed to ensure
// linking doesn't invalidate that offset.
+ Asm->OutStreamer->AddComment("Offset Into Abbrev. Section");
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
if (UseOffsets)
Asm->EmitInt32(0);
@@ -1545,12 +1560,16 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
Asm->emitDwarfSymbolReference(
TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false);
- Asm->OutStreamer->AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ if (Version <= 4) {
+ Asm->OutStreamer->AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+ }
}
void DwarfTypeUnit::emitHeader(bool UseOffsets) {
- DwarfUnit::emitHeader(UseOffsets);
+ DwarfUnit::emitCommonHeader(UseOffsets,
+ DD->useSplitDwarf() ? dwarf::DW_UT_split_type
+ : dwarf::DW_UT_type);
Asm->OutStreamer->AddComment("Type Signature");
Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature));
Asm->OutStreamer->AddComment("Type DIE Offset");
@@ -1564,3 +1583,13 @@ bool DwarfTypeUnit::isDwoUnit() const {
// when split DWARF is being used.
return DD->useSplitDwarf();
}
+
+void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalNameForTypeUnit(Name, Context);
+}
+
+void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) {
+ getCU().addGlobalTypeUnitType(Ty, Context);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 8654d6f0caf4..d626ef920f95 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -124,12 +124,12 @@ public:
std::string getParentContextString(const DIScope *Context) const;
/// Add a new global name to the compile unit.
- virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) {
- }
+ virtual void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) = 0;
/// Add a new global type to the compile unit.
virtual void addGlobalType(const DIType *Ty, const DIE &Die,
- const DIScope *Context) {}
+ const DIScope *Context) = 0;
/// Returns the DIE map slot for the specified debug variable.
///
@@ -198,9 +198,6 @@ public:
/// Add a type's DW_AT_signature and set the declaration flag.
void addDIETypeSignature(DIE &Die, uint64_t Signature);
- /// Add an attribute containing the type signature for a unique identifier.
- void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
- StringRef Identifier);
/// Add block data.
void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
@@ -256,15 +253,12 @@ public:
DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false);
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
- bool Minimal = false);
+ bool SkipSPAttributes = false);
/// Find existing DIE or create new DIE for the given type.
DIE *getOrCreateTypeDIE(const MDNode *N);
/// Get context owner's DIE.
- DIE *createTypeDIE(const DICompositeType *Ty);
-
- /// Get context owner's DIE.
DIE *getOrCreateContextDIE(const DIScope *Context);
/// Construct DIEs for types that contain vtables.
@@ -282,11 +276,13 @@ public:
virtual unsigned getHeaderSize() const {
return sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t); // Pointer Size (in bytes)
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ (DD->getDwarfVersion() >= 5 ? sizeof(int8_t)
+ : 0); // DWARF v5 unit type
}
/// Emit the header for this unit, not including the initial length field.
- virtual void emitHeader(bool UseOffsets);
+ virtual void emitHeader(bool UseOffsets) = 0;
virtual DwarfCompileUnit &getCU() = 0;
@@ -306,6 +302,14 @@ protected:
return Ref.resolve();
}
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
+ /// Emit the common part of the header for this unit.
+ void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
+
private:
void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy);
void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy);
@@ -330,11 +334,6 @@ private:
/// Set D as anonymous type for index which can be reused later.
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
- /// If this is a named finished type then include it in the list of types for
- /// the accelerator tables.
- void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
- const DIE &TyDIE);
-
virtual bool isDwoUnit() const = 0;
};
@@ -354,12 +353,19 @@ public:
void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
void setType(const DIE *Ty) { this->Ty = Ty; }
+ /// Get context owner's DIE.
+ DIE *createTypeDIE(const DICompositeType *Ty);
+
/// Emit the header for this unit, not including the initial length field.
void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature
sizeof(uint32_t); // Type DIE Offset
}
+ void addGlobalName(StringRef Name, const DIE &Die,
+ const DIScope *Context) override;
+ void addGlobalType(const DIType *Ty, const DIE &Die,
+ const DIScope *Context) override;
DwarfCompileUnit &getCU() override { return CU; }
};
} // end llvm namespace
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 6a023b998b32..342efc3611c7 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===//
+//===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,21 +14,19 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -38,13 +36,12 @@ class ErlangGCPrinter : public GCMetadataPrinter {
public:
void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override;
};
-}
+
+} // end anonymous namespace
static GCMetadataPrinterRegistry::Add<ErlangGCPrinter>
X("erlang", "erlang-compatible garbage collector");
-void llvm::linkErlangGCPrinter() {}
-
void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AsmPrinter &AP) {
MCStreamer &OS = *AP.OutStreamer;
@@ -121,3 +118,5 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
}
}
}
+
+void llvm::linkErlangGCPrinter() {}
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 9d7c96a1b8ef..704f0ac2f191 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -68,7 +68,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
const Function *F = MF->getFunction();
- shouldEmitMoves = Asm->needsSEHMoves();
+ shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI();
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -94,7 +94,7 @@ void WinException::beginFunction(const MachineFunction *MF) {
// If we're not using CFI, we don't want the CFI or the personality, but we
// might want EH tables if we had EH pads.
- if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !PerFn)) {
+ if (!Asm->MAI->usesWindowsCFI()) {
if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) {
// If this is 32-bit SEH and we don't have any funclets (really invokes),
// make sure we emit the parent offset label. Some unreferenced filter
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index bf5cf105a8f8..9c19a4fd3c3e 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -1532,7 +1532,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Type *ResultTy;
SmallVector<Value *, 6> Args;
- AttributeSet Attr;
+ AttributeList Attr;
// 'size' argument.
if (!UseSizedLibcall) {
@@ -1593,7 +1593,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// Now, the return type.
if (CASExpected) {
ResultTy = Type::getInt1Ty(Ctx);
- Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
+ Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
} else if (HasResult && UseSizedLibcall)
ResultTy = SizedIntTy;
else
diff --git a/lib/CodeGen/BranchCoalescing.cpp b/lib/CodeGen/BranchCoalescing.cpp
new file mode 100644
index 000000000000..efdf300df850
--- /dev/null
+++ b/lib/CodeGen/BranchCoalescing.cpp
@@ -0,0 +1,758 @@
+//===-- CoalesceBranches.cpp - Coalesce blocks with the same condition ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Coalesce basic blocks guarded by the same branch condition into a single
+/// basic block.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "coal-branch"
+
+static cl::opt<cl::boolOrDefault>
+ EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden,
+ cl::desc("enable coalescing of duplicate branches"));
+
+STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced");
+STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged");
+STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced");
+
+//===----------------------------------------------------------------------===//
+// BranchCoalescing
+//===----------------------------------------------------------------------===//
+///
+/// Improve scheduling by coalescing branches that depend on the same condition.
+/// This pass looks for blocks that are guarded by the same branch condition
+/// and attempts to merge the blocks together. Such opportunities arise from
+/// the expansion of select statements in the IR.
+///
+/// For example, consider the following LLVM IR:
+///
+/// %test = icmp eq i32 %x 0
+/// %tmp1 = select i1 %test, double %a, double 2.000000e-03
+/// %tmp2 = select i1 %test, double %b, double 5.000000e-03
+///
+/// This IR expands to the following machine code on PowerPC:
+///
+/// BB#0: derived from LLVM BB %entry
+/// Live Ins: %F1 %F3 %X6
+/// <SNIP1>
+/// %vreg0<def> = COPY %F1; F8RC:%vreg0
+/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
+/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
+/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
+/// BCC 76, %vreg5, <BB#2>; CRRC:%vreg5
+/// Successors according to CFG: BB#1(?%) BB#2(?%)
+///
+/// BB#1: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0
+/// Successors according to CFG: BB#2(?%)
+///
+/// BB#2: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0 BB#1
+/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
+/// F8RC:%vreg9,%vreg8,%vreg0
+/// <SNIP2>
+/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
+/// Successors according to CFG: BB#3(?%) BB#4(?%)
+///
+/// BB#3: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#2
+/// Successors according to CFG: BB#4(?%)
+///
+/// BB#4: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#2 BB#3
+/// %vreg13<def> = PHI %vreg12, <BB#3>, %vreg2, <BB#2>;
+/// F8RC:%vreg13,%vreg12,%vreg2
+/// <SNIP3>
+/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
+///
+/// When this pattern is detected, branch coalescing will try to collapse
+/// it by moving code in BB#2 to BB#0 and/or BB#4 and removing BB#3.
+///
+/// If all conditions are meet, IR should collapse to:
+///
+/// BB#0: derived from LLVM BB %entry
+/// Live Ins: %F1 %F3 %X6
+/// <SNIP1>
+/// %vreg0<def> = COPY %F1; F8RC:%vreg0
+/// %vreg5<def> = CMPLWI %vreg4<kill>, 0; CRRC:%vreg5 GPRC:%vreg4
+/// %vreg8<def> = LXSDX %ZERO8, %vreg7<kill>, %RM<imp-use>;
+/// mem:LD8[ConstantPool] F8RC:%vreg8 G8RC:%vreg7
+/// <SNIP2>
+/// BCC 76, %vreg5, <BB#4>; CRRC:%vreg5
+/// Successors according to CFG: BB#1(0x2aaaaaaa / 0x80000000 = 33.33%)
+/// BB#4(0x55555554 / 0x80000000 = 66.67%)
+///
+/// BB#1: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0
+/// Successors according to CFG: BB#4(0x40000000 / 0x80000000 = 50.00%)
+///
+/// BB#4: derived from LLVM BB %entry
+/// Predecessors according to CFG: BB#0 BB#1
+/// %vreg9<def> = PHI %vreg8, <BB#1>, %vreg0, <BB#0>;
+/// F8RC:%vreg9,%vreg8,%vreg0
+/// %vreg13<def> = PHI %vreg12, <BB#1>, %vreg2, <BB#0>;
+/// F8RC:%vreg13,%vreg12,%vreg2
+/// <SNIP3>
+/// BLR8 %LR8<imp-use>, %RM<imp-use>, %F1<imp-use>
+///
+/// Branch Coalescing does not split blocks, it moves everything in the same
+/// direction ensuring it does not break use/definition semantics.
+///
+/// PHI nodes and its corresponding use instructions are moved to its successor
+/// block if there are no uses within the successor block PHI nodes. PHI
+/// node ordering cannot be assumed.
+///
+/// Non-PHI can be moved up to the predecessor basic block or down to the
+/// successor basic block following any PHI instructions. Whether it moves
+/// up or down depends on whether the register(s) defined in the instructions
+/// are used in current block or in any PHI instructions at the beginning of
+/// the successor block.
+
+namespace {
+
+class BranchCoalescing : public MachineFunctionPass {
+ struct CoalescingCandidateInfo {
+ MachineBasicBlock *BranchBlock; // Block containing the branch
+ MachineBasicBlock *BranchTargetBlock; // Block branched to
+ MachineBasicBlock *FallThroughBlock; // Fall-through if branch not taken
+ SmallVector<MachineOperand, 4> Cond;
+ bool MustMoveDown;
+ bool MustMoveUp;
+
+ CoalescingCandidateInfo();
+ void clear();
+ };
+
+ MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+
+ void initialize(MachineFunction &F);
+ bool canCoalesceBranch(CoalescingCandidateInfo &Cand);
+ bool identicalOperands(ArrayRef<MachineOperand> OperandList1,
+ ArrayRef<MachineOperand> OperandList2) const;
+ bool validateCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const;
+
+ static bool isBranchCoalescingEnabled() {
+ return EnableBranchCoalescing == cl::BOU_TRUE;
+ }
+
+public:
+ static char ID;
+
+ BranchCoalescing() : MachineFunctionPass(ID) {
+ initializeBranchCoalescingPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Branch Coalescing"; }
+
+ bool mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion);
+ bool canMoveToBeginning(const MachineInstr &MI,
+ const MachineBasicBlock &MBB) const;
+ bool canMoveToEnd(const MachineInstr &MI,
+ const MachineBasicBlock &MBB) const;
+ bool canMerge(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const;
+ void moveAndUpdatePHIs(MachineBasicBlock *SourceRegionMBB,
+ MachineBasicBlock *TargetRegionMBB);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End anonymous namespace.
+
+char BranchCoalescing::ID = 0;
+char &llvm::BranchCoalescingID = BranchCoalescing::ID;
+
+INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing",
+ "Branch Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing",
+ false, false)
+
+BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo()
+ : BranchBlock(nullptr), BranchTargetBlock(nullptr),
+ FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {}
+
+void BranchCoalescing::CoalescingCandidateInfo::clear() {
+ BranchBlock = nullptr;
+ BranchTargetBlock = nullptr;
+ FallThroughBlock = nullptr;
+ Cond.clear();
+ MustMoveDown = false;
+ MustMoveUp = false;
+}
+
+void BranchCoalescing::initialize(MachineFunction &MF) {
+ MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ TII = MF.getSubtarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+///
+/// Analyze the branch statement to determine if it can be coalesced. This
+/// method analyses the branch statement for the given candidate to determine
+/// if it can be coalesced. If the branch can be coalesced, then the
+/// BranchTargetBlock and the FallThroughBlock are recorded in the specified
+/// Candidate.
+///
+///\param[in,out] Cand The coalescing candidate to analyze
+///\return true if and only if the branch can be coalesced, false otherwise
+///
+bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
+ DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber()
+ << " can be coalesced:");
+ MachineBasicBlock *FalseMBB = nullptr;
+
+ if (TII->analyzeBranch(*Cand.BranchBlock, Cand.BranchTargetBlock, FalseMBB,
+ Cand.Cond)) {
+ DEBUG(dbgs() << "TII unable to Analyze Branch - skip\n");
+ return false;
+ }
+
+ for (auto &I : Cand.BranchBlock->terminators()) {
+ DEBUG(dbgs() << "Looking at terminator : " << I << "\n");
+ if (!I.isBranch())
+ continue;
+
+ if (I.getNumOperands() != I.getNumExplicitOperands()) {
+ DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I
+ << "\n");
+ return false;
+ }
+ }
+
+ if (Cand.BranchBlock->isEHPad() || Cand.BranchBlock->hasEHPadSuccessor()) {
+ DEBUG(dbgs() << "EH Pad - skip\n");
+ return false;
+ }
+
+ // For now only consider triangles (i.e, BranchTargetBlock is set,
+ // FalseMBB is null, and BranchTargetBlock is a successor to BranchBlock)
+ if (!Cand.BranchTargetBlock || FalseMBB ||
+ !Cand.BranchBlock->isSuccessor(Cand.BranchTargetBlock)) {
+ DEBUG(dbgs() << "Does not form a triangle - skip\n");
+ return false;
+ }
+
+ // Ensure there are only two successors
+ if (Cand.BranchBlock->succ_size() != 2) {
+ DEBUG(dbgs() << "Does not have 2 successors - skip\n");
+ return false;
+ }
+
+ // Sanity check - the block must be able to fall through
+ assert(Cand.BranchBlock->canFallThrough() &&
+ "Expecting the block to fall through!");
+
+ // We have already ensured there are exactly two successors to
+ // BranchBlock and that BranchTargetBlock is a successor to BranchBlock.
+ // Ensure the single fall though block is empty.
+ MachineBasicBlock *Succ =
+ (*Cand.BranchBlock->succ_begin() == Cand.BranchTargetBlock)
+ ? *Cand.BranchBlock->succ_rbegin()
+ : *Cand.BranchBlock->succ_begin();
+
+ assert(Succ && "Expecting a valid fall-through block\n");
+
+ if (!Succ->empty()) {
+ DEBUG(dbgs() << "Fall-through block contains code -- skip\n");
+ return false;
+ }
+
+ if (!Succ->isSuccessor(Cand.BranchTargetBlock)) {
+ DEBUG(dbgs()
+ << "Successor of fall through block is not branch taken block\n");
+ return false;
+ }
+
+ Cand.FallThroughBlock = Succ;
+ DEBUG(dbgs() << "Valid Candidate\n");
+ return true;
+}
+
+///
+/// Determine if the two operand lists are identical
+///
+/// \param[in] OpList1 operand list
+/// \param[in] OpList2 operand list
+/// \return true if and only if the operands lists are identical
+///
+bool BranchCoalescing::identicalOperands(
+ ArrayRef<MachineOperand> OpList1, ArrayRef<MachineOperand> OpList2) const {
+
+ if (OpList1.size() != OpList2.size()) {
+ DEBUG(dbgs() << "Operand list is different size\n");
+ return false;
+ }
+
+ for (unsigned i = 0; i < OpList1.size(); ++i) {
+ const MachineOperand &Op1 = OpList1[i];
+ const MachineOperand &Op2 = OpList2[i];
+
+ DEBUG(dbgs() << "Op1: " << Op1 << "\n"
+ << "Op2: " << Op2 << "\n");
+
+ if (Op1.isIdenticalTo(Op2)) {
+ DEBUG(dbgs() << "Op1 and Op2 are identical!\n");
+ continue;
+ }
+
+ // If the operands are not identical, but are registers, check to see if the
+ // definition of the register produces the same value. If they produce the
+ // same value, consider them to be identical.
+ if (Op1.isReg() && Op2.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(Op1.getReg()) &&
+ TargetRegisterInfo::isVirtualRegister(Op2.getReg())) {
+ MachineInstr *Op1Def = MRI->getVRegDef(Op1.getReg());
+ MachineInstr *Op2Def = MRI->getVRegDef(Op2.getReg());
+ if (TII->produceSameValue(*Op1Def, *Op2Def, MRI)) {
+ DEBUG(dbgs() << "Op1Def: " << *Op1Def << " and " << *Op2Def
+ << " produce the same value!\n");
+ } else {
+ DEBUG(dbgs() << "Operands produce different values\n");
+ return false;
+ }
+ } else {
+ DEBUG(dbgs() << "The operands are not provably identical.\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+///
+/// Moves ALL PHI instructions in SourceMBB to beginning of TargetMBB
+/// and update them to refer to the new block. PHI node ordering
+/// cannot be assumed so it does not matter where the PHI instructions
+/// are moved to in TargetMBB.
+///
+/// \param[in] SourceMBB block to move PHI instructions from
+/// \param[in] TargetMBB block to move PHI instructions to
+///
+void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB,
+ MachineBasicBlock *TargetMBB) {
+
+ MachineBasicBlock::iterator MI = SourceMBB->begin();
+ MachineBasicBlock::iterator ME = SourceMBB->getFirstNonPHI();
+
+ if (MI == ME) {
+ DEBUG(dbgs() << "SourceMBB contains no PHI instructions.\n");
+ return;
+ }
+
+ // Update all PHI instructions in SourceMBB and move to top of TargetMBB
+ for (MachineBasicBlock::iterator Iter = MI; Iter != ME; Iter++) {
+ MachineInstr &PHIInst = *Iter;
+ for (unsigned i = 2, e = PHIInst.getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = PHIInst.getOperand(i);
+ if (MO.getMBB() == SourceMBB)
+ MO.setMBB(TargetMBB);
+ }
+ }
+ TargetMBB->splice(TargetMBB->begin(), SourceMBB, MI, ME);
+}
+
+///
+/// This function checks if MI can be moved to the beginning of the TargetMBB
+/// following PHI instructions. A MI instruction can be moved to beginning of
+/// the TargetMBB if there are no uses of it within the TargetMBB PHI nodes.
+///
+/// \param[in] MI the machine instruction to move.
+/// \param[in] TargetMBB the machine basic block to move to
+/// \return true if it is safe to move MI to beginning of TargetMBB,
+/// false otherwise.
+///
+bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI,
+ const MachineBasicBlock &TargetMBB
+ ) const {
+
+ DEBUG(dbgs() << "Checking if " << MI << " can move to beginning of "
+ << TargetMBB.getNumber() << "\n");
+
+ for (auto &Def : MI.defs()) { // Looking at Def
+ for (auto &Use : MRI->use_instructions(Def.getReg())) {
+ if (Use.isPHI() && Use.getParent() == &TargetMBB) {
+ DEBUG(dbgs() << " *** used in a PHI -- cannot move ***\n");
+ return false;
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Safe to move to the beginning.\n");
+ return true;
+}
+
+///
+/// This function checks if MI can be moved to the end of the TargetMBB,
+/// immediately before the first terminator. A MI instruction can be moved
+/// to then end of the TargetMBB if no PHI node defines what MI uses within
+/// it's own MBB.
+///
+/// \param[in] MI the machine instruction to move.
+/// \param[in] TargetMBB the machine basic block to move to
+/// \return true if it is safe to move MI to end of TargetMBB,
+/// false otherwise.
+///
+bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI,
+ const MachineBasicBlock &TargetMBB
+ ) const {
+
+ DEBUG(dbgs() << "Checking if " << MI << " can move to end of "
+ << TargetMBB.getNumber() << "\n");
+
+ for (auto &Use : MI.uses()) {
+ if (Use.isReg() && TargetRegisterInfo::isVirtualRegister(Use.getReg())) {
+ MachineInstr *DefInst = MRI->getVRegDef(Use.getReg());
+ if (DefInst->isPHI() && DefInst->getParent() == MI.getParent()) {
+ DEBUG(dbgs() << " *** Cannot move this instruction ***\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** def is in another block -- safe to move!\n");
+ }
+ }
+ }
+
+ DEBUG(dbgs() << " Safe to move to the end.\n");
+ return true;
+}
+
+///
+/// This method checks to ensure the two coalescing candidates follows the
+/// expected pattern required for coalescing.
+///
+/// \param[in] SourceRegion The candidate to move statements from
+/// \param[in] TargetRegion The candidate to move statements to
+/// \return true if all instructions in SourceRegion.BranchBlock can be merged
+/// into a block in TargetRegion; false otherwise.
+///
+bool BranchCoalescing::validateCandidates(
+ CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const {
+
+ if (TargetRegion.BranchTargetBlock != SourceRegion.BranchBlock)
+ llvm_unreachable("Expecting SourceRegion to immediately follow TargetRegion");
+ else if (!MDT->dominates(TargetRegion.BranchBlock, SourceRegion.BranchBlock))
+ llvm_unreachable("Expecting TargetRegion to dominate SourceRegion");
+ else if (!MPDT->dominates(SourceRegion.BranchBlock, TargetRegion.BranchBlock))
+ llvm_unreachable("Expecting SourceRegion to post-dominate TargetRegion");
+ else if (!TargetRegion.FallThroughBlock->empty() ||
+ !SourceRegion.FallThroughBlock->empty())
+ llvm_unreachable("Expecting fall-through blocks to be empty");
+
+ return true;
+}
+
+///
+/// This method determines whether the two coalescing candidates can be merged.
+/// In order to be merged, all instructions must be able to
+/// 1. Move to the beginning of the SourceRegion.BranchTargetBlock;
+/// 2. Move to the end of the TargetRegion.BranchBlock.
+/// Merging involves moving the instructions in the
+/// TargetRegion.BranchTargetBlock (also SourceRegion.BranchBlock).
+///
+/// This function first try to move instructions from the
+/// TargetRegion.BranchTargetBlock down, to the beginning of the
+/// SourceRegion.BranchTargetBlock. This is not possible if any register defined
+/// in TargetRegion.BranchTargetBlock is used in a PHI node in the
+/// SourceRegion.BranchTargetBlock. In this case, check whether the statement
+/// can be moved up, to the end of the TargetRegion.BranchBlock (immediately
+/// before the branch statement). If it cannot move, then these blocks cannot
+/// be merged.
+///
+/// Note that there is no analysis for moving instructions past the fall-through
+/// blocks because they are confirmed to be empty. An assert is thrown if they
+/// are not.
+///
+/// \param[in] SourceRegion The candidate to move statements from
+/// \param[in] TargetRegion The candidate to move statements to
+/// \return true if all instructions in SourceRegion.BranchBlock can be merged
+/// into a block in TargetRegion, false otherwise.
+///
+bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) const {
+ if (!validateCandidates(SourceRegion, TargetRegion))
+ return false;
+
+ // Walk through PHI nodes first and see if they force the merge into the
+ // SourceRegion.BranchTargetBlock.
+ for (MachineBasicBlock::iterator
+ I = SourceRegion.BranchBlock->instr_begin(),
+ E = SourceRegion.BranchBlock->getFirstNonPHI();
+ I != E; ++I) {
+ for (auto &Def : I->defs())
+ for (auto &Use : MRI->use_instructions(Def.getReg())) {
+ if (Use.isPHI() && Use.getParent() == SourceRegion.BranchTargetBlock) {
+ DEBUG(dbgs() << "PHI " << *I << " defines register used in another "
+ "PHI within branch target block -- can't merge\n");
+ NumPHINotMoved++;
+ return false;
+ }
+ if (Use.getParent() == SourceRegion.BranchBlock) {
+ DEBUG(dbgs() << "PHI " << *I
+ << " defines register used in this "
+ "block -- all must move down\n");
+ SourceRegion.MustMoveDown = true;
+ }
+ }
+ }
+
+ // Walk through the MI to see if they should be merged into
+ // TargetRegion.BranchBlock (up) or SourceRegion.BranchTargetBlock (down)
+ for (MachineBasicBlock::iterator
+ I = SourceRegion.BranchBlock->getFirstNonPHI(),
+ E = SourceRegion.BranchBlock->end();
+ I != E; ++I) {
+ if (!canMoveToBeginning(*I, *SourceRegion.BranchTargetBlock)) {
+ DEBUG(dbgs() << "Instruction " << *I
+ << " cannot move down - must move up!\n");
+ SourceRegion.MustMoveUp = true;
+ }
+ if (!canMoveToEnd(*I, *TargetRegion.BranchBlock)) {
+ DEBUG(dbgs() << "Instruction " << *I
+ << " cannot move up - must move down!\n");
+ SourceRegion.MustMoveDown = true;
+ }
+ }
+
+ return (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) ? false : true;
+}
+
+/// Merge the instructions from SourceRegion.BranchBlock,
+/// SourceRegion.BranchTargetBlock, and SourceRegion.FallThroughBlock into
+/// TargetRegion.BranchBlock, TargetRegion.BranchTargetBlock and
+/// TargetRegion.FallThroughBlock respectively.
+///
+/// The successors for blocks in TargetRegion will be updated to use the
+/// successors from blocks in SourceRegion. Finally, the blocks in SourceRegion
+/// will be removed from the function.
+///
+/// A region consists of a BranchBlock, a FallThroughBlock, and a
+/// BranchTargetBlock. Branch coalesce works on patterns where the
+/// TargetRegion's BranchTargetBlock must also be the SourceRegions's
+/// BranchBlock.
+///
+/// Before mergeCandidates:
+///
+/// +---------------------------+
+/// | TargetRegion.BranchBlock |
+/// +---------------------------+
+/// / |
+/// / +--------------------------------+
+/// | | TargetRegion.FallThroughBlock |
+/// \ +--------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | TargetRegion.BranchTargetBlock |
+/// | SourceRegion.BranchBlock |
+/// +----------------------------------+
+/// / |
+/// / +--------------------------------+
+/// | | SourceRegion.FallThroughBlock |
+/// \ +--------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | SourceRegion.BranchTargetBlock |
+/// +----------------------------------+
+///
+/// After mergeCandidates:
+///
+/// +-----------------------------+
+/// | TargetRegion.BranchBlock |
+/// | SourceRegion.BranchBlock |
+/// +-----------------------------+
+/// / |
+/// / +---------------------------------+
+/// | | TargetRegion.FallThroughBlock |
+/// | | SourceRegion.FallThroughBlock |
+/// \ +---------------------------------+
+/// \ |
+/// +----------------------------------+
+/// | SourceRegion.BranchTargetBlock |
+/// +----------------------------------+
+///
+/// \param[in] SourceRegion The candidate to move blocks from
+/// \param[in] TargetRegion The candidate to move blocks to
+///
+bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion,
+ CoalescingCandidateInfo &TargetRegion) {
+
+ if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) {
+ llvm_unreachable("Cannot have both MustMoveDown and MustMoveUp set!");
+ return false;
+ }
+
+ if (!validateCandidates(SourceRegion, TargetRegion))
+ return false;
+
+ // Start the merging process by first handling the BranchBlock.
+ // Move any PHIs in SourceRegion.BranchBlock down to the branch-taken block
+ moveAndUpdatePHIs(SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
+
+ // Move remaining instructions in SourceRegion.BranchBlock into
+ // TargetRegion.BranchBlock
+ MachineBasicBlock::iterator firstInstr =
+ SourceRegion.BranchBlock->getFirstNonPHI();
+ MachineBasicBlock::iterator lastInstr =
+ SourceRegion.BranchBlock->getFirstTerminator();
+
+ MachineBasicBlock *Source = SourceRegion.MustMoveDown
+ ? SourceRegion.BranchTargetBlock
+ : TargetRegion.BranchBlock;
+
+ MachineBasicBlock::iterator Target =
+ SourceRegion.MustMoveDown
+ ? SourceRegion.BranchTargetBlock->getFirstNonPHI()
+ : TargetRegion.BranchBlock->getFirstTerminator();
+
+ Source->splice(Target, SourceRegion.BranchBlock, firstInstr, lastInstr);
+
+ // Once PHI and instructions have been moved we need to clean up the
+ // control flow.
+
+ // Remove SourceRegion.FallThroughBlock before transferring successors of
+ // SourceRegion.BranchBlock to TargetRegion.BranchBlock.
+ SourceRegion.BranchBlock->removeSuccessor(SourceRegion.FallThroughBlock);
+ TargetRegion.BranchBlock->transferSuccessorsAndUpdatePHIs(
+ SourceRegion.BranchBlock);
+ // Update branch in TargetRegion.BranchBlock to jump to
+ // SourceRegion.BranchTargetBlock
+ // In this case, TargetRegion.BranchTargetBlock == SourceRegion.BranchBlock.
+ TargetRegion.BranchBlock->ReplaceUsesOfBlockWith(
+ SourceRegion.BranchBlock, SourceRegion.BranchTargetBlock);
+ // Remove the branch statement(s) in SourceRegion.BranchBlock
+ MachineBasicBlock::iterator I =
+ SourceRegion.BranchBlock->terminators().begin();
+ while (I != SourceRegion.BranchBlock->terminators().end()) {
+ MachineInstr &CurrInst = *I;
+ ++I;
+ if (CurrInst.isBranch())
+ CurrInst.eraseFromParent();
+ }
+
+ // Fall-through block should be empty since this is part of the condition
+ // to coalesce the branches.
+ assert(TargetRegion.FallThroughBlock->empty() &&
+ "FallThroughBlocks should be empty!");
+
+ // Transfer successor information and move PHIs down to the
+ // branch-taken block.
+ TargetRegion.FallThroughBlock->transferSuccessorsAndUpdatePHIs(
+ SourceRegion.FallThroughBlock);
+ TargetRegion.FallThroughBlock->removeSuccessor(SourceRegion.BranchBlock);
+
+ // Remove the blocks from the function.
+ assert(SourceRegion.BranchBlock->empty() &&
+ "Expecting branch block to be empty!");
+ SourceRegion.BranchBlock->eraseFromParent();
+
+ assert(SourceRegion.FallThroughBlock->empty() &&
+ "Expecting fall-through block to be empty!\n");
+ SourceRegion.FallThroughBlock->eraseFromParent();
+
+ NumBlocksCoalesced++;
+ return true;
+}
+
+bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
+
+ if (skipFunction(*MF.getFunction()) || MF.empty() ||
+ !isBranchCoalescingEnabled())
+ return false;
+
+ bool didSomething = false;
+
+ DEBUG(dbgs() << "******** Branch Coalescing ********\n");
+ initialize(MF);
+
+ DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
+
+ CoalescingCandidateInfo Cand1, Cand2;
+ // Walk over blocks and find candidates to merge
+ // Continue trying to merge with the first candidate found, as long as merging
+ // is successfull.
+ for (MachineBasicBlock &MBB : MF) {
+ bool MergedCandidates = false;
+ do {
+ MergedCandidates = false;
+ Cand1.clear();
+ Cand2.clear();
+
+ Cand1.BranchBlock = &MBB;
+
+ // If unable to coalesce the branch, then continue to next block
+ if (!canCoalesceBranch(Cand1))
+ break;
+
+ Cand2.BranchBlock = Cand1.BranchTargetBlock;
+ if (!canCoalesceBranch(Cand2))
+ break;
+
+ // Sanity check
+ // The branch-taken block of the second candidate should post-dominate the
+ // first candidate
+ assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) &&
+ "Branch-taken block should post-dominate first candidate");
+
+ if (!identicalOperands(Cand1.Cond, Cand2.Cond)) {
+ DEBUG(dbgs() << "Blocks " << Cand1.BranchBlock->getNumber() << " and "
+ << Cand2.BranchBlock->getNumber()
+ << " have different branches\n");
+ break;
+ }
+ if (!canMerge(Cand2, Cand1)) {
+ DEBUG(dbgs() << "Cannot merge blocks " << Cand1.BranchBlock->getNumber()
+ << " and " << Cand2.BranchBlock->getNumber() << "\n");
+ NumBlocksNotCoalesced++;
+ continue;
+ }
+ DEBUG(dbgs() << "Merging blocks " << Cand1.BranchBlock->getNumber()
+ << " and " << Cand1.BranchTargetBlock->getNumber() << "\n");
+ MergedCandidates = mergeCandidates(Cand2, Cand1);
+ if (MergedCandidates)
+ didSomething = true;
+
+ DEBUG(dbgs() << "Function after merging: "; MF.dump(); dbgs() << "\n");
+ } while (MergedCandidates);
+ }
+
+#ifndef NDEBUG
+ // Verify MF is still valid after branch coalescing
+ if (didSomething)
+ MF.verify(nullptr, "Error in code produced by branch coalescing");
+#endif // NDEBUG
+
+ DEBUG(dbgs() << "Finished Branch Coalescing\n");
+ return didSomething;
+}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 6fba161033b0..2d01301402f0 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -49,6 +50,7 @@ STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
STATISTIC(NumHoist , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls, "Number of tail calls optimized");
static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -123,8 +125,6 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
}
}
-/// RemoveDeadBlock - Remove the specified dead machine basic block from the
-/// function, updating the CFG.
void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
@@ -144,9 +144,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
MLI->removeBlock(MBB);
}
-/// OptimizeFunction - Perhaps branch folding, tail merging and other
-/// CFG optimizations on the given function. Block placement changes the layout
-/// and may create new tail merging opportunities.
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
@@ -348,8 +345,6 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
return TailLen;
}
-/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
-/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
TII->ReplaceTailWithBranchTo(OldInst, NewDest);
@@ -362,9 +357,6 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
++NumTailMerge;
}
-/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
-/// MBB so that the part before the iterator falls into the part starting at the
-/// iterator. This returns the new MBB.
MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1,
const BasicBlock *BB) {
@@ -388,7 +380,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
// NewMBB belongs to the same loop as CurMBB.
- if (MLI)
+ if (MLI)
if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
@@ -436,7 +428,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl = CurMBB->findBranchDebugLoc();
if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = &*I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
@@ -497,6 +489,15 @@ BranchFolder::MBFIWrapper::printBlockFreq(raw_ostream &OS,
return MBFI.printBlockFreq(OS, Freq);
}
+void BranchFolder::MBFIWrapper::view(const Twine &Name, bool isSimple) {
+ MBFI.view(Name, isSimple);
+}
+
+uint64_t
+BranchFolder::MBFIWrapper::getEntryFreq() const {
+ return MBFI.getEntryFreq();
+}
+
/// CountTerminators - Count the number of terminators in the given
/// block and set I to the position of the first non-terminator, if there
/// is one, or MBB->end() otherwise.
@@ -516,6 +517,17 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
return NumTerms;
}
+/// A no successor, non-return block probably ends in unreachable and is cold.
+/// Also consider a block that ends in an indirect branch to be a return block,
+/// since many targets use plain indirect branches to return.
+static bool blockEndsInUnreachable(const MachineBasicBlock *MBB) {
+ if (!MBB->succ_empty())
+ return false;
+ if (MBB->empty())
+ return true;
+ return !(MBB->back().isReturn() || MBB->back().isIndirectBranch());
+}
+
/// ProfitableToMerge - Check if two machine basic blocks have a common tail
/// and decide if it would be profitable to merge those tails. Return the
/// length of the common tail and iterators to the first common instruction
@@ -570,6 +582,15 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
return true;
}
+ // If these are identical non-return blocks with no successors, merge them.
+ // Such blocks are typically cold calls to noreturn functions like abort, and
+ // are unlikely to become a fallthrough target after machine block placement.
+ // Tail merging these blocks is unlikely to create additional unconditional
+ // branches, and will reduce the size of this cold code.
+ if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+ blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
+ return true;
+
// If one of the blocks can be completely merged and happens to be in
// a position where the other could fall through into it, merge any number
// of instructions, because it can be done without a branch.
@@ -579,6 +600,22 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
return true;
+ // If both blocks are identical and end in a branch, merge them unless they
+ // both have a fallthrough predecessor and successor.
+ // We can only do this after block placement because it depends on whether
+ // there are fallthroughs, and we don't know until after layout.
+ if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+ auto BothFallThrough = [](MachineBasicBlock *MBB) {
+ if (MBB->succ_size() != 0 && !MBB->canFallThrough())
+ return false;
+ MachineFunction::iterator I(MBB);
+ MachineFunction *MF = MBB->getParent();
+ return (MBB != &*MF->begin()) && std::prev(I)->canFallThrough();
+ };
+ if (!BothFallThrough(MBB1) || !BothFallThrough(MBB2))
+ return true;
+ }
+
// If both blocks have an unconditional branch temporarily stripped out,
// count that as an additional common instruction for the following
// heuristics. This heuristic is only accurate for single-succ blocks, so to
@@ -604,16 +641,6 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
(I1 == MBB1->begin() || I2 == MBB2->begin());
}
-/// ComputeSameTails - Look through all the blocks in MergePotentials that have
-/// hash CurHash (guaranteed to match the last element). Build the vector
-/// SameTails of all those that have the (same) largest number of instructions
-/// in common of any pair of these blocks. SameTails entries contain an
-/// iterator into MergePotentials (from which the MachineBasicBlock can be
-/// found) and a MachineBasicBlock::iterator into that MBB indicating the
-/// instruction where the matching code sequence begins.
-/// Order of elements in SameTails is the reverse of the order in which
-/// those blocks appear in MergePotentials (where they are not necessarily
-/// consecutive).
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
unsigned MinCommonTailLength,
MachineBasicBlock *SuccBB,
@@ -650,8 +677,6 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
return maxCommonTailLength;
}
-/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
-/// MergePotentials, restoring branches at ends of blocks as appropriate.
void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB) {
@@ -671,8 +696,6 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
MergePotentials.erase(CurMPIter, MergePotentials.end());
}
-/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
-/// only of the common tail. Create a block that does by splitting one.
bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
MachineBasicBlock *SuccBB,
unsigned maxCommonTailLength,
@@ -723,6 +746,43 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
+void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+ std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
+ for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
+ if (i != commonTailIndex)
+ NextCommonInsts[i] = SameTails[i].getTailStartPos();
+ else {
+ assert(SameTails[i].getTailStartPos() == MBB->begin() &&
+ "MBB is not a common tail only block");
+ }
+ }
+
+ for (auto &MI : *MBB) {
+ if (MI.isDebugValue())
+ continue;
+ DebugLoc DL = MI.getDebugLoc();
+ for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
+ if (i == commonTailIndex)
+ continue;
+
+ auto &Pos = NextCommonInsts[i];
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ while (Pos->isDebugValue()) {
+ ++Pos;
+ assert(Pos != SameTails[i].getBlock()->end() &&
+ "Reached BB end within common tail");
+ }
+ assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
+ DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
+ NextCommonInsts[i] = ++Pos;
+ }
+ MI.setDebugLoc(DL);
+ }
+}
+
static void
mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
MachineBasicBlock &MBBCommon) {
@@ -875,10 +935,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Recompute common tail MBB's edge weights and block frequency.
setCommonTailEdgeWeights(*MBB);
- // Remove the original debug location from the common tail.
- for (auto &MI : *MBB)
- if (!MI.isDebugValue())
- MI.setDebugLoc(DebugLoc());
+ // Merge debug locations across identical instructions for common tail.
+ MergeCommonTailDebugLocs(commonTailIndex);
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
@@ -1043,7 +1101,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
- DebugLoc dl; // FIXME: this is nowhere
+ DebugLoc dl = PBB->findBranchDebugLoc();
TII->removeBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
@@ -1193,8 +1251,6 @@ static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
return DebugLoc();
}
-/// OptimizeBlock - Analyze and optimize control flow related to the specified
-/// block. This is never called on the entry block.
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
@@ -1386,6 +1442,42 @@ ReoptimizeBlock:
}
}
+ if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
+ MF.getFunction()->optForSize()) {
+ // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
+ // direction, thereby defeating careful block placement and regressing
+ // performance. Therefore, only consider this for optsize functions.
+ MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+ if (TII->isUnconditionalTailCall(TailCall)) {
+ MachineBasicBlock *Pred = *MBB->pred_begin();
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ bool PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+ if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) {
+ // The predecessor has a conditional branch to this block which consists
+ // of only a tail call. Try to fold the tail call into the conditional
+ // branch.
+ if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+ // TODO: It would be nice if analyzeBranch() could provide a pointer
+ // to the branch insturction so replaceBranchWithTailCall() doesn't
+ // have to search for it.
+ TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+ ++NumTailCalls;
+ Pred->removeSuccessor(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+ }
+ // If the predecessor is falling through to this block, we could reverse
+ // the branch condition and fold the tail call into that. However, after
+ // that we might have to re-arrange the CFG to fall through to the other
+ // block and there is a high risk of regressing code size rather than
+ // improving it.
+ }
+ }
+
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
SmallVector<MachineOperand, 4> CurCond;
@@ -1599,8 +1691,6 @@ ReoptimizeBlock:
// Hoist Common Code
//===----------------------------------------------------------------------===//
-/// HoistCommonCode - Hoist common instruction sequences at the start of basic
-/// blocks to their common predecessor.
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
@@ -1734,9 +1824,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
return PI;
}
-/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction
-/// sequence at the start of the function, move the instructions before MBB
-/// terminator if it's legal.
bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index fc48e484292d..4852721eea10 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -37,6 +37,9 @@ namespace llvm {
// flag. Ignored for optsize.
unsigned MinCommonTailLength = 0);
+ /// Perhaps branch folding, tail merging and other CFG optimizations on the
+ /// given function. Block placement changes the layout and may create new
+ /// tail merging opportunities.
bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
MachineLoopInfo *mli = nullptr,
@@ -122,6 +125,8 @@ namespace llvm {
const MachineBasicBlock *MBB) const;
raw_ostream &printBlockFreq(raw_ostream &OS,
const BlockFrequency Freq) const;
+ void view(const Twine &Name, bool isSimple = true);
+ uint64_t getEntryFreq() const;
private:
const MachineBlockFrequencyInfo &MBFI;
@@ -137,26 +142,64 @@ namespace llvm {
MachineBasicBlock* PredBB,
unsigned MinCommonTailLength);
void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
+
+ /// Delete the instruction OldInst and everything after it, replacing it
+ /// with an unconditional branch to NewDest.
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
+
+ /// Given a machine basic block and an iterator into it, split the MBB so
+ /// that the part before the iterator falls into the part starting at the
+ /// iterator. This returns the new MBB.
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1,
const BasicBlock *BB);
+
+ /// Look through all the blocks in MergePotentials that have hash CurHash
+ /// (guaranteed to match the last element). Build the vector SameTails of
+ /// all those that have the (same) largest number of instructions in common
+ /// of any pair of these blocks. SameTails entries contain an iterator into
+ /// MergePotentials (from which the MachineBasicBlock can be found) and a
+ /// MachineBasicBlock::iterator into that MBB indicating the instruction
+ /// where the matching code sequence begins. Order of elements in SameTails
+ /// is the reverse of the order in which those blocks appear in
+ /// MergePotentials (where they are not necessarily consecutive).
unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
MachineBasicBlock *SuccBB,
MachineBasicBlock *PredBB);
+
+ /// Remove all blocks with hash CurHash from MergePotentials, restoring
+ /// branches at ends of blocks as appropriate.
void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
+
+ /// None of the blocks to be tail-merged consist only of the common tail.
+ /// Create a block that does by splitting one.
bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
MachineBasicBlock *SuccBB,
unsigned maxCommonTailLength,
unsigned &commonTailIndex);
+ /// Create merged DebugLocs of identical instructions across SameTails and
+ /// assign it to the instruction in common tail.
+ void MergeCommonTailDebugLocs(unsigned commonTailIndex);
+
bool OptimizeBranches(MachineFunction &MF);
+
+ /// Analyze and optimize control flow related to the specified block. This
+ /// is never called on the entry block.
bool OptimizeBlock(MachineBasicBlock *MBB);
+
+ /// Remove the specified dead machine basic block from the function,
+ /// updating the CFG.
void RemoveDeadBlock(MachineBasicBlock *MBB);
+ /// Hoist common instruction sequences at the start of basic blocks to their
+ /// common predecessor.
bool HoistCommonCode(MachineFunction &MF);
+
+ /// If the successors of MBB has common instruction sequence at the start of
+ /// the function, move the instructions before MBB terminator if it's legal.
bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
};
}
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index 8b27570a17f4..7af136941661 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -126,14 +126,16 @@ void BranchRelaxation::verify() {
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void BranchRelaxation::dumpBBs() {
+LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
for (auto &MBB : *MF) {
const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
}
}
+#endif
/// scanFunction - Do the initial scan of the function, building up
/// information about each block.
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index ff7c99de0420..e4eab8c513d9 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -1,4 +1,4 @@
-//===-- BuiltinGCs.cpp - Boilerplate for our built in GC types --*- C++ -*-===//
+//===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,6 +14,8 @@
#include "llvm/CodeGen/GCs.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
@@ -77,6 +79,7 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
+
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
const PointerType *PT = cast<PointerType>(Ty);
@@ -110,6 +113,7 @@ public:
UsesMetadata = false;
CustomRoots = false;
}
+
Optional<bool> isGCManagedPointer(const Type *Ty) const override {
// Method is only valid on pointer typed values.
const PointerType *PT = cast<PointerType>(Ty);
@@ -117,7 +121,8 @@ public:
return (1 == PT->getAddressSpace());
}
};
-}
+
+} // end anonymous namespace
// Register all the above so that they can be found at runtime. Note that
// these static initializers are important since the registration list is
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 398ea88363b6..0912d9f68aff 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMCodeGen
Analysis.cpp
AtomicExpandPass.cpp
BasicTargetTransformInfo.cpp
+ BranchCoalescing.cpp
BranchFolding.cpp
BranchRelaxation.cpp
BuiltinGCs.cpp
@@ -23,6 +24,7 @@ add_llvm_library(LLVMCodeGen
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
FaultMaps.cpp
+ FEntryInserter.cpp
FuncletLayout.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
@@ -36,6 +38,7 @@ add_llvm_library(LLVMCodeGen
InterleavedAccessPass.cpp
IntrinsicLowering.cpp
LatencyPriorityQueue.cpp
+ LazyMachineBlockFrequencyInfo.cpp
LexicalScopes.cpp
LiveDebugValues.cpp
LiveDebugVariables.cpp
@@ -46,6 +49,7 @@ add_llvm_library(LLVMCodeGen
LiveRangeCalc.cpp
LiveRangeEdit.cpp
LiveRegMatrix.cpp
+ LiveRegUnits.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
LLVMTargetMachine.cpp
@@ -70,6 +74,8 @@ add_llvm_library(LLVMCodeGen
MachineLoopInfo.cpp
MachineModuleInfo.cpp
MachineModuleInfoImpls.cpp
+ MachineOptimizationRemarkEmitter.cpp
+ MachineOutliner.cpp
MachinePassRegistry.cpp
MachinePipeliner.cpp
MachinePostDominators.cpp
@@ -147,7 +153,7 @@ add_llvm_library(LLVMCodeGen
${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen
${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen/PBQP
- LINK_LIBS ${PTHREAD_LIB}
+ LINK_LIBS ${LLVM_PTHREAD_LIB}
DEPENDS
intrinsics_gen
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 2e33f14c7ee3..7cad4d031169 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -30,8 +30,7 @@ using namespace llvm;
CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
: CallingConv(CC), IsVarArg(isVarArg), MF(mf),
- TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C),
- CallOrPrologue(Unknown) {
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) {
// No stack is used.
StackOffset = 0;
MaxStackArgAlign = 1;
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 4cf9b138f10d..3fc12ccc3b60 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeAtomicExpandPass(Registry);
+ initializeBranchCoalescingPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
initializeCodeGenPreparePass(Registry);
@@ -31,12 +32,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfConverterPass(Registry);
initializeExpandISelPseudosPass(Registry);
initializeExpandPostRAPass(Registry);
+ initializeFEntryInserterPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeIfConverterPass(Registry);
+ initializeImplicitNullChecksPass(Registry);
initializeInterleavedAccessPass(Registry);
+ initializeLiveDebugValuesPass(Registry);
initializeLiveDebugVariablesPass(Registry);
initializeLiveIntervalsPass(Registry);
initializeLiveStacksPass(Registry);
@@ -47,7 +51,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCSEPass(Registry);
- initializeImplicitNullChecksPass(Registry);
initializeMachineCombinerPass(Registry);
initializeMachineCopyPropagationPass(Registry);
initializeMachineDominatorTreePass(Registry);
@@ -55,16 +58,18 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoPass(Registry);
+ initializeMachineOptimizationRemarkEmitterPassPass(Registry);
+ initializeMachineOutlinerPass(Registry);
initializeMachinePipelinerPass(Registry);
initializeMachinePostDominatorTreePass(Registry);
+ initializeMachineRegionInfoPassPass(Registry);
initializeMachineSchedulerPass(Registry);
initializeMachineSinkingPass(Registry);
initializeMachineVerifierPassPass(Registry);
- initializeXRayInstrumentationPass(Registry);
- initializePatchableFunctionPass(Registry);
initializeOptimizePHIsPass(Registry);
initializePEIPass(Registry);
initializePHIEliminationPass(Registry);
+ initializePatchableFunctionPass(Registry);
initializePeepholeOptimizerPass(Registry);
initializePostMachineSchedulerPass(Registry);
initializePostRAHazardRecognizerPass(Registry);
@@ -74,12 +79,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
+ initializeSafeStackPass(Registry);
initializeShrinkWrapPass(Registry);
initializeSlotIndexesPass(Registry);
initializeStackColoringPass(Registry);
initializeStackMapLivenessPass(Registry);
- initializeLiveDebugValuesPass(Registry);
- initializeSafeStackPass(Registry);
initializeStackProtectorPass(Registry);
initializeStackSlotColoringPass(Registry);
initializeTailDuplicatePassPass(Registry);
@@ -91,6 +95,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeVirtRegMapPass(Registry);
initializeVirtRegRewriterPass(Registry);
initializeWinEHPreparePass(Registry);
+ initializeXRayInstrumentationPass(Registry);
}
void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 934b470f13b5..2bdd189557b4 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -15,10 +15,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -53,8 +55,10 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -77,7 +81,6 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
-STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
static cl::opt<bool> DisableBranchOpts(
@@ -93,7 +96,7 @@ static cl::opt<bool> DisableSelectToBranch(
cl::desc("Disable select to branch conversion."));
static cl::opt<bool> AddrSinkUsingGEPs(
- "addr-sink-using-gep", cl::Hidden, cl::init(false),
+ "addr-sink-using-gep", cl::Hidden, cl::init(true),
cl::desc("Address sinking in CGP using GEPs."));
static cl::opt<bool> EnableAndCmpSinking(
@@ -135,15 +138,24 @@ static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
+static cl::opt<bool>
+EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
+ cl::desc("Enable merging of redundant sexts when one is dominating"
+ " the other."), cl::init(true));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+typedef SmallVector<Instruction *, 16> SExts;
+typedef DenseMap<Value *, SExts> ValueToSExts;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM;
+ const TargetSubtargetInfo *SubtargetInfo;
const TargetLowering *TLI;
+ const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
@@ -165,6 +177,15 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
+ /// Keep track of instructions removed during promotion.
+ SetOfInstrs RemovedInsts;
+
+ /// Keep track of sext chains based on their initial value.
+ DenseMap<Value *, Instruction *> SeenChainsForSExt;
+
+ /// Keep track of SExt promoted.
+ ValueToSExts ValToSExtendedUses;
+
/// True if CFG is modified in any way.
bool ModifiedDT;
@@ -206,7 +227,7 @@ class TypePromotionTransaction;
Type *AccessTy, unsigned AS);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *I);
bool optimizeSelectInst(SelectInst *SI);
@@ -215,13 +236,21 @@ class TypePromotionTransaction;
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB);
bool placeDbgValues(Function &F);
- bool sinkAndCmp(Function &F);
- bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
- Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstCost);
+ bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
+ LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
+ bool tryToPromoteExts(TypePromotionTransaction &TPT,
+ const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost = 0);
+ bool mergeSExts(Function &F);
+ bool performAddressTypePromotion(
+ Instruction *&Inst,
+ bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
+ bool splitIndirectCriticalEdges(Function &F);
};
}
@@ -250,8 +279,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset();
ModifiedDT = false;
- if (TM)
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (TM) {
+ SubtargetInfo = TM->getSubtargetImpl(F);
+ TLI = SubtargetInfo->getTargetLowering();
+ TRI = SubtargetInfo->getRegisterInfo();
+ }
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
@@ -260,9 +292,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ProfileGuidedSectionPrefix) {
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- if (PSI->isFunctionEntryHot(&F))
+ if (PSI->isFunctionHotInCallGraph(&F))
F.setSectionPrefix(".hot");
- else if (PSI->isFunctionEntryCold(&F))
+ else if (PSI->isFunctionColdInCallGraph(&F))
F.setSectionPrefix(".cold");
}
@@ -290,18 +322,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// find a node corresponding to the value.
EverMadeChange |= placeDbgValues(F);
- // If there is a mask, compare against zero, and branch that can be combined
- // into a single target instruction, push the mask and compare into branch
- // users. Do this before OptimizeBlock -> OptimizeInst ->
- // OptimizeCmpExpression, which perturbs the pattern being searched for.
- if (!DisableBranchOpts) {
- EverMadeChange |= sinkAndCmp(F);
+ if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F);
- }
+
+ // Split some critical edges where one of the sources is an indirect branch,
+ // to help generate sane code for PHIs involving such edges.
+ EverMadeChange |= splitIndirectCriticalEdges(F);
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -311,6 +344,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ModifiedDTOnIteration)
break;
}
+ if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
+ MadeChange |= mergeSExts(F);
+
+ // Really free removed instructions during promotion.
+ for (Instruction *I : RemovedInsts)
+ delete I;
+
EverMadeChange |= MadeChange;
}
@@ -432,6 +472,154 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
return DestBB;
}
+// Return the unique indirectbr predecessor of a block. This may return null
+// even if such a predecessor exists, if it's not useful for splitting.
+// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
+// predecessors of BB.
+static BasicBlock *
+findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
+ // If the block doesn't have any PHIs, we don't care about it, since there's
+ // no point in splitting it.
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (!PN)
+ return nullptr;
+
+ // Verify we have exactly one IBR predecessor.
+ // Conservatively bail out if one of the other predecessors is not a "regular"
+ // terminator (that is, not a switch or a br).
+ BasicBlock *IBB = nullptr;
+ for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
+ BasicBlock *PredBB = PN->getIncomingBlock(Pred);
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ switch (PredTerm->getOpcode()) {
+ case Instruction::IndirectBr:
+ if (IBB)
+ return nullptr;
+ IBB = PredBB;
+ break;
+ case Instruction::Br:
+ case Instruction::Switch:
+ OtherPreds.push_back(PredBB);
+ continue;
+ default:
+ return nullptr;
+ }
+ }
+
+ return IBB;
+}
+
+// Split critical edges where the source of the edge is an indirectbr
+// instruction. This isn't always possible, but we can handle some easy cases.
+// This is useful because MI is unable to split such critical edges,
+// which means it will not be able to sink instructions along those edges.
+// This is especially painful for indirect branches with many successors, where
+// we end up having to prepare all outgoing values in the origin block.
+//
+// Our normal algorithm for splitting critical edges requires us to update
+// the outgoing edges of the edge origin block, but for an indirectbr this
+// is hard, since it would require finding and updating the block addresses
+// the indirect branch uses. But if a block only has a single indirectbr
+// predecessor, with the others being regular branches, we can do it in a
+// different way.
+// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
+// We can split D into D0 and D1, where D0 contains only the PHIs from D,
+// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
+// create the following structure:
+// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
+bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
+ // Check whether the function has any indirectbrs, and collect which blocks
+ // they may jump to. Since most functions don't have indirect branches,
+ // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
+ SmallSetVector<BasicBlock *, 16> Targets;
+ for (auto &BB : F) {
+ auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
+ if (!IBI)
+ continue;
+
+ for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
+ Targets.insert(IBI->getSuccessor(Succ));
+ }
+
+ if (Targets.empty())
+ return false;
+
+ bool Changed = false;
+ for (BasicBlock *Target : Targets) {
+ SmallVector<BasicBlock *, 16> OtherPreds;
+ BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
+ // If we did not found an indirectbr, or the indirectbr is the only
+ // incoming edge, this isn't the kind of edge we're looking for.
+ if (!IBRPred || OtherPreds.empty())
+ continue;
+
+ // Don't even think about ehpads/landingpads.
+ Instruction *FirstNonPHI = Target->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() || Target->isLandingPad())
+ continue;
+
+ BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+ // It's possible Target was its own successor through an indirectbr.
+ // In this case, the indirectbr now comes from BodyBlock.
+ if (IBRPred == Target)
+ IBRPred = BodyBlock;
+
+ // At this point Target only has PHIs, and BodyBlock has the rest of the
+ // block's body. Create a copy of Target that will be used by the "direct"
+ // preds.
+ ValueToValueMapTy VMap;
+ BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+
+ for (BasicBlock *Pred : OtherPreds)
+ Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+
+ // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
+ // they are clones, so the number of PHIs are the same.
+ // (a) Remove the edge coming from IBRPred from the "Direct" PHI
+ // (b) Leave that as the only edge in the "Indirect" PHI.
+ // (c) Merge the two in the body block.
+ BasicBlock::iterator Indirect = Target->begin(),
+ End = Target->getFirstNonPHI()->getIterator();
+ BasicBlock::iterator Direct = DirectSucc->begin();
+ BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
+
+ assert(&*End == Target->getTerminator() &&
+ "Block was expected to only contain PHIs");
+
+ while (Indirect != End) {
+ PHINode *DirPHI = cast<PHINode>(Direct);
+ PHINode *IndPHI = cast<PHINode>(Indirect);
+
+ // Now, clean up - the direct block shouldn't get the indirect value,
+ // and vice versa.
+ DirPHI->removeIncomingValue(IBRPred);
+ Direct++;
+
+ // Advance the pointer here, to avoid invalidation issues when the old
+ // PHI is erased.
+ Indirect++;
+
+ PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
+ NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
+ IBRPred);
+
+ // Create a PHI in the body block, to merge the direct and indirect
+ // predecessors.
+ PHINode *MergePHI =
+ PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ MergePHI->addIncoming(NewIndPHI, Target);
+ MergePHI->addIncoming(DirPHI, DirectSucc);
+
+ IndPHI->replaceAllUsesWith(MergePHI);
+ IndPHI->eraseFromParent();
+ }
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -1090,6 +1278,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
return false;
}
+/// Duplicate and sink the given 'and' instruction into user blocks where it is
+/// used in a compare to allow isel to generate better code for targets where
+/// this operation can be combined.
+///
+/// Return true if any changes are made.
+static bool sinkAndCmp0Expression(Instruction *AndI,
+ const TargetLowering &TLI,
+ SetOfInstrs &InsertedInsts) {
+ // Double-check that we're not trying to optimize an instruction that was
+ // already optimized by some other part of this pass.
+ assert(!InsertedInsts.count(AndI) &&
+ "Attempting to optimize already optimized and instruction");
+ (void) InsertedInsts;
+
+ // Nothing to do for single use in same basic block.
+ if (AndI->hasOneUse() &&
+ AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
+ return false;
+
+ // Try to avoid cases where sinking/duplicating is likely to increase register
+ // pressure.
+ if (!isa<ConstantInt>(AndI->getOperand(0)) &&
+ !isa<ConstantInt>(AndI->getOperand(1)) &&
+ AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
+ return false;
+
+ for (auto *U : AndI->users()) {
+ Instruction *User = cast<Instruction>(U);
+
+ // Only sink for and mask feeding icmp with 0.
+ if (!isa<ICmpInst>(User))
+ return false;
+
+ auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
+ if (!CmpC || !CmpC->isZero())
+ return false;
+ }
+
+ if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
+ return false;
+
+ DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
+ DEBUG(AndI->getParent()->dump());
+
+ // Push the 'and' into the same block as the icmp 0. There should only be
+ // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
+ // others, so we don't need to keep track of which BBs we insert into.
+ for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
+
+ // Keep the 'and' in the same place if the use is already in the same block.
+ Instruction *InsertPt =
+ User->getParent() == AndI->getParent() ? AndI : User;
+ Instruction *InsertedAnd =
+ BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
+ AndI->getOperand(1), "", InsertPt);
+ // Propagate the debug info.
+ InsertedAnd->setDebugLoc(AndI->getDebugLoc());
+
+ // Replace a use of the 'and' with a use of the new 'and'.
+ TheUse = InsertedAnd;
+ ++NumAndUses;
+ DEBUG(User->getParent()->dump());
+ }
+
+ // We removed all uses, nuke the and.
+ AndI->eraseFromParent();
+ return true;
+}
+
/// Check if the candidates could be combined with a shift instruction, which
/// includes:
/// 1. Truncate instruction
@@ -2028,16 +2293,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
if (TLI) {
- // Unknown address space.
- // TODO: Target hook to pick which address space the intrinsic cares
- // about?
- unsigned AddrSpace = ~0u;
SmallVector<Value*, 2> PtrOps;
Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
- while (!PtrOps.empty())
- if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+ if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty()) {
+ Value *PtrVal = PtrOps.pop_back_val();
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
return true;
+ }
}
}
@@ -2168,11 +2432,11 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias))
+ AttributeList CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias))
continue;
// Make sure the call instruction is followed by an unconditional branch to
@@ -2561,25 +2825,30 @@ class TypePromotionTransaction {
OperandsHider Hider;
/// Keep track of the uses replaced, if any.
UsesReplacer *Replacer;
+ /// Keep track of instructions removed.
+ SetOfInstrs &RemovedInsts;
public:
/// \brief Remove all reference of \p Inst and optinally replace all its
/// uses with New.
+ /// \p RemovedInsts Keep track of the instructions removed by this Action.
/// \pre If !Inst->use_empty(), then New != nullptr
- InstructionRemover(Instruction *Inst, Value *New = nullptr)
+ InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
+ Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
- Replacer(nullptr) {
+ Replacer(nullptr), RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ RemovedInsts.insert(Inst);
+ /// The instructions removed here will be freed after completing
+ /// optimizeBlock() for all blocks as we need to keep track of the
+ /// removed instructions during promotion.
Inst->removeFromParent();
}
~InstructionRemover() override { delete Replacer; }
- /// \brief Really remove the instruction.
- void commit() override { delete Inst; }
-
/// \brief Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -2588,6 +2857,7 @@ class TypePromotionTransaction {
if (Replacer)
Replacer->undo();
Hider.undo();
+ RemovedInsts.erase(Inst);
}
};
@@ -2596,6 +2866,10 @@ public:
/// The restoration point is a pointer to an action instead of an iterator
/// because the iterator may be invalidated but not the pointer.
typedef const TypePromotionAction *ConstRestorationPt;
+
+ TypePromotionTransaction(SetOfInstrs &RemovedInsts)
+ : RemovedInsts(RemovedInsts) {}
+
/// Advocate every changes made in that transaction.
void commit();
/// Undo all the changes made after the given point.
@@ -2627,6 +2901,7 @@ private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+ SetOfInstrs &RemovedInsts;
};
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
@@ -2638,7 +2913,8 @@ void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
+ make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
+ RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
@@ -2705,8 +2981,8 @@ void TypePromotionTransaction::rollback(
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
- const TargetMachine &TM;
const TargetLowering &TLI;
+ const TargetRegisterInfo &TRI;
const DataLayout &DL;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -2731,14 +3007,14 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
- const TargetMachine &TM, Type *AT, unsigned AS,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
+ Type *AT, unsigned AS,
Instruction *MI, ExtAddrMode &AM,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
- : AddrModeInsts(AMI), TM(TM),
- TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
- ->getTargetLowering()),
+ : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
PromotedInsts(PromotedInsts), TPT(TPT) {
@@ -2756,13 +3032,15 @@ public:
static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetMachine &TM,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
+ AccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -3583,18 +3861,18 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
/// Check to see if all uses of OpVal by the specified inline asm call are due
/// to memory operands. If so, return true, otherwise return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetMachine &TM) {
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI) {
const Function *F = CI->getParent()->getParent();
- const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+ TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
ImmutableCallSite(CI));
+
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
@@ -3613,7 +3891,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
- SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -3635,11 +3914,28 @@ static bool FindAllMemoryUses(
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
unsigned opNo = U.getOperandNo();
- if (opNo == 0) return true; // Storing addr, not into addr.
+ if (opNo != StoreInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
MemoryUses.push_back(std::make_pair(SI, opNo));
continue;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicRMWInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(RMW, opNo));
+ continue;
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(CmpX, opNo));
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
@@ -3650,12 +3946,12 @@ static bool FindAllMemoryUses(
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TM))
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
return true;
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI))
return true;
}
@@ -3743,7 +4039,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -3775,7 +4071,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
+ AddressAccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -3844,7 +4141,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
- TypePromotionTransaction TPT;
+ TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
@@ -3869,7 +4166,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// addressing instructions might have.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
+ V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI,
InsertedInsts, PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
@@ -3935,11 +4232,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
} else if (AddrSinkUsingGEPs ||
(!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
- TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
- ->useAA())) {
+ SubtargetInfo->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -4042,7 +4338,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// We need to add this separately from the scale above to help with
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
@@ -4053,12 +4349,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = ResultPtr;
} else {
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
}
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -4185,14 +4481,14 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
-/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// \brief Check if all the uses of \p Val are equivalent (or free) zero or
/// sign extensions.
-static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
- assert(!Inst->use_empty() && "Input must have at least one use");
- const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
+ assert(!Val->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
bool IsSExt = isa<SExtInst>(FirstUser);
Type *ExtTy = FirstUser->getType();
- for (const User *U : Inst->users()) {
+ for (const User *U : Val->users()) {
const Instruction *UI = cast<Instruction>(U);
if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
return false;
@@ -4202,11 +4498,11 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
continue;
// If IsSExt is true, we are in this situation:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty1 a to ty3
// Assuming ty2 is shorter than ty3, this could be turned into:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty2 b to ty3
// However, the last sext is not free.
@@ -4233,51 +4529,44 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
return true;
}
-/// \brief Try to form ExtLd by promoting \p Exts until they reach a
-/// load instruction.
-/// If an ext(load) can be formed, it is returned via \p LI for the load
-/// and \p Inst for the extension.
-/// Otherwise LI == nullptr and Inst == nullptr.
-/// When some promotion happened, \p TPT contains the proper state to
-/// revert them.
+/// \brief Try to speculatively promote extensions in \p Exts and continue
+/// promoting through newly promoted operands recursively as far as doing so is
+/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
+/// When some promotion happened, \p TPT contains the proper state to revert
+/// them.
///
-/// \return true when promoting was necessary to expose the ext(load)
-/// opportunity, false otherwise.
-///
-/// Example:
-/// \code
-/// %ld = load i32* %addr
-/// %add = add nuw i32 %ld, 4
-/// %zext = zext i32 %add to i64
-/// \endcode
-/// =>
-/// \code
-/// %ld = load i32* %addr
-/// %zext = zext i32 %ld to i64
-/// %add = add nuw i64 %zext, 4
-/// \encode
-/// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
- LoadInst *&LI, Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstsCost = 0) {
- // Iterate over all the extensions to see if one form an ext(load).
+/// \return true if some promotion happened, false otherwise.
+bool CodeGenPrepare::tryToPromoteExts(
+ TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost) {
+ bool Promoted = false;
+
+ // Iterate over all the extensions to try to promote them.
for (auto I : Exts) {
- // Check if we directly have ext(load).
- if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
- Inst = I;
- // No promotion happened here.
- return false;
+ // Early check if we directly have ext(load).
+ if (isa<LoadInst>(I->getOperand(0))) {
+ ProfitablyMovedExts.push_back(I);
+ continue;
}
- // Check whether or not we want to do any promotion.
+
+ // Check whether or not we want to do any promotion. The reason we have
+ // this check inside the for loop is to catch the case where an extension
+ // is directly fed by a load because in such case the extension can be moved
+ // up without any promotion on its operands.
if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
- continue;
+ return false;
+
// Get the action to perform the promotion.
- TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
- I, InsertedInsts, *TLI, PromotedInsts);
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
// Check if we can promote.
- if (!TPH)
+ if (!TPH) {
+ // Save the current extension as we cannot move up through its operand.
+ ProfitablyMovedExts.push_back(I);
continue;
+ }
+
// Save the current state.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
@@ -4297,110 +4586,293 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
- TotalCreatedInstsCost -= ExtCost;
+ // FIXME: It would be possible to propagate a negative value instead of
+ // conservatively ceiling it to 0.
+ TotalCreatedInstsCost =
+ std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
if (!StressExtLdPromotion &&
(TotalCreatedInstsCost > 1 ||
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
- // The promotion is not profitable, rollback to the previous state.
+ // This promotion is not profitable, rollback to the previous state, and
+ // save the current extension in ProfitablyMovedExts as the latest
+ // speculative promotion turned out to be unprofitable.
TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+ // Continue promoting NewExts as far as doing so is profitable.
+ SmallVector<Instruction *, 2> NewlyMovedExts;
+ (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
+ bool NewPromoted = false;
+ for (auto ExtInst : NewlyMovedExts) {
+ Instruction *MovedExt = cast<Instruction>(ExtInst);
+ Value *ExtOperand = MovedExt->getOperand(0);
+ // If we have reached to a load, we need this extra profitability check
+ // as it could potentially be merged into an ext(load).
+ if (isa<LoadInst>(ExtOperand) &&
+ !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
+ (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
+ continue;
+
+ ProfitablyMovedExts.push_back(MovedExt);
+ NewPromoted = true;
+ }
+
+ // If none of speculative promotions for NewExts is profitable, rollback
+ // and save the current extension (I) as the last profitable extension.
+ if (!NewPromoted) {
+ TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
continue;
}
// The promotion is profitable.
- // Check if it exposes an ext(load).
- (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
- if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
- // If we have created a new extension, i.e., now we have two
- // extensions. We must make sure one of them is merged with
- // the load, otherwise we may degrade the code quality.
- (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
- // Promotion happened.
- return true;
- // If this does not help to expose an ext(load) then, rollback.
- TPT.rollback(LastKnownGood);
+ Promoted = true;
}
- // None of the extension can form an ext(load).
- LI = nullptr;
- Inst = nullptr;
- return false;
+ return Promoted;
}
-/// Move a zext or sext fed by a load into the same basic block as the load,
-/// unless conditions are unfavorable. This allows SelectionDAG to fold the
-/// extend into the load.
-/// \p I[in/out] the extension may be modified during the process if some
-/// promotions apply.
-///
-bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
- // ExtLoad formation infrastructure requires TLI to be effective.
- if (!TLI)
- return false;
+/// Merging redundant sexts when one is dominating the other.
+bool CodeGenPrepare::mergeSExts(Function &F) {
+ DominatorTree DT(F);
+ bool Changed = false;
+ for (auto &Entry : ValToSExtendedUses) {
+ SExts &Insts = Entry.second;
+ SExts CurPts;
+ for (Instruction *Inst : Insts) {
+ if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
+ Inst->getOperand(0) != Entry.first)
+ continue;
+ bool inserted = false;
+ for (auto &Pt : CurPts) {
+ if (DT.dominates(Inst, Pt)) {
+ Pt->replaceAllUsesWith(Inst);
+ RemovedInsts.insert(Pt);
+ Pt->removeFromParent();
+ Pt = Inst;
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!DT.dominates(Pt, Inst))
+ // Give up if we need to merge in a common dominator as the
+ // expermients show it is not profitable.
+ continue;
+ Inst->replaceAllUsesWith(Pt);
+ RemovedInsts.insert(Inst);
+ Inst->removeFromParent();
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!inserted)
+ CurPts.push_back(Inst);
+ }
+ }
+ return Changed;
+}
- // Try to promote a chain of computation if it allows to form
- // an extended load.
- TypePromotionTransaction TPT;
- TypePromotionTransaction::ConstRestorationPt LastKnownGood =
- TPT.getRestorationPoint();
- SmallVector<Instruction *, 1> Exts;
- Exts.push_back(I);
- // Look for a load being extended.
- LoadInst *LI = nullptr;
- Instruction *OldExt = I;
- bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
- if (!LI || !I) {
- assert(!HasPromoted && !LI && "If we did not match any load instruction "
- "the code must remain the same");
- I = OldExt;
- return false;
+/// Return true, if an ext(load) can be formed from an extension in
+/// \p MovedExts.
+bool CodeGenPrepare::canFormExtLd(
+ const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
+ Instruction *&Inst, bool HasPromoted) {
+ for (auto *MovedExtInst : MovedExts) {
+ if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
+ LI = cast<LoadInst>(MovedExtInst->getOperand(0));
+ Inst = MovedExtInst;
+ break;
+ }
}
+ if (!LI)
+ return false;
// If they're already in the same block, there's nothing to do.
// Make the cheap checks first if we did not promote.
// If we promoted, we need to check if it is indeed profitable.
- if (!HasPromoted && LI->getParent() == I->getParent())
+ if (!HasPromoted && LI->getParent() == Inst->getParent())
return false;
- EVT VT = TLI->getValueType(*DL, I->getType());
+ EVT VT = TLI->getValueType(*DL, Inst->getType());
EVT LoadVT = TLI->getValueType(*DL, LI->getType());
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
- if (!LI->hasOneUse() &&
- (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
- !TLI->isTruncateFree(I->getType(), LI->getType())) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+ if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
+ !TLI->isTruncateFree(Inst->getType(), LI->getType()))
return false;
- }
// Check whether the target supports casts folded into loads.
unsigned LType;
- if (isa<ZExtInst>(I))
+ if (isa<ZExtInst>(Inst))
LType = ISD::ZEXTLOAD;
else {
- assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ assert(isa<SExtInst>(Inst) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
- if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+
+ return TLI->isLoadExtLegal(LType, VT, LoadVT);
+}
+
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
+///
+/// E.g.,
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
+/// allow us to match zext(load i32*) to i64.
+///
+/// Also, try to promote the computations used to obtain a sign extended
+/// value used into memory accesses.
+/// E.g.,
+/// \code
+/// a = add nsw i32 b, 3
+/// d = sext i32 a to i64
+/// e = getelementptr ..., i64 d
+/// \endcode
+/// =>
+/// \code
+/// f = sext i32 b to i64
+/// a = add nsw i64 f, 3
+/// e = getelementptr ..., i64 a
+/// \endcode
+///
+/// \p Inst[in/out] the extension may be modified during the process if some
+/// promotions apply.
+bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
+ // ExtLoad formation and address type promotion infrastructure requires TLI to
+ // be effective.
+ if (!TLI)
return false;
+
+ bool AllowPromotionWithoutCommonHeader = false;
+ /// See if it is an interesting sext operations for the address type
+ /// promotion before trying to promote it, e.g., the ones with the right
+ /// type and used in memory accesses.
+ bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
+ *Inst, AllowPromotionWithoutCommonHeader);
+ TypePromotionTransaction TPT(RemovedInsts);
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> SpeculativelyMovedExts;
+ Exts.push_back(Inst);
+
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
+
+ // Look for a load being extended.
+ LoadInst *LI = nullptr;
+ Instruction *ExtFedByLoad;
+
+ // Try to promote a chain of computation if it allows to form an extended
+ // load.
+ if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
+ assert(LI && ExtFedByLoad && "Expect a valid load and extension");
+ TPT.commit();
+ // Move the extend into the same block as the load
+ ExtFedByLoad->removeFromParent();
+ ExtFedByLoad->insertAfter(LI);
+ // CGP does not check if the zext would be speculatively executed when moved
+ // to the same basic block as the load. Preserving its original location
+ // would pessimize the debugging experience, as well as negatively impact
+ // the quality of sample pgo. We don't want to use "line 0" as that has a
+ // size cost in the line-table section and logically the zext can be seen as
+ // part of the load. Therefore we conservatively reuse the same debug
+ // location for the load and the zext.
+ ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
+ ++NumExtsMoved;
+ Inst = ExtFedByLoad;
+ return true;
}
- // Move the extend into the same block as the load, so that SelectionDAG
- // can fold it.
- TPT.commit();
- I->removeFromParent();
- I->insertAfter(LI);
- // CGP does not check if the zext would be speculatively executed when moved
- // to the same basic block as the load. Preserving its original location would
- // pessimize the debugging experience, as well as negatively impact the
- // quality of sample pgo. We don't want to use "line 0" as that has a
- // size cost in the line-table section and logically the zext can be seen as
- // part of the load. Therefore we conservatively reuse the same debug location
- // for the load and the zext.
- I->setDebugLoc(LI->getDebugLoc());
- ++NumExtsMoved;
- return true;
+ // Continue promoting SExts if known as considerable depending on targets.
+ if (ATPConsiderable &&
+ performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
+ HasPromoted, TPT, SpeculativelyMovedExts))
+ return true;
+
+ TPT.rollback(LastKnownGood);
+ return false;
+}
+
+// Perform address type promotion if doing so is profitable.
+// If AllowPromotionWithoutCommonHeader == false, we should find other sext
+// instructions that sign extended the same initial value. However, if
+// AllowPromotionWithoutCommonHeader == true, we expect promoting the
+// extension is just profitable.
+bool CodeGenPrepare::performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
+ bool Promoted = false;
+ SmallPtrSet<Instruction *, 1> UnhandledExts;
+ bool AllSeenFirst = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ DenseMap<Value *, Instruction *>::iterator AlreadySeen =
+ SeenChainsForSExt.find(HeadOfChain);
+ // If there is an unhandled SExt which has the same header, try to promote
+ // it as well.
+ if (AlreadySeen != SeenChainsForSExt.end()) {
+ if (AlreadySeen->second != nullptr)
+ UnhandledExts.insert(AlreadySeen->second);
+ AllSeenFirst = false;
+ }
+ }
+
+ if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
+ SpeculativelyMovedExts.size() == 1)) {
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ // Update Inst as promotion happen.
+ Inst = SpeculativelyMovedExts.pop_back_val();
+ } else {
+ // This is the first chain visited from the header, keep the current chain
+ // as unhandled. Defer to promote this until we encounter another SExt
+ // chain derived from the same header.
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = Inst;
+ }
+ return false;
+ }
+
+ if (!AllSeenFirst && !UnhandledExts.empty())
+ for (auto VisitedSExt : UnhandledExts) {
+ if (RemovedInsts.count(VisitedSExt))
+ continue;
+ TypePromotionTransaction TPT(RemovedInsts);
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> Chains;
+ Exts.push_back(VisitedSExt);
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : Chains) {
+ Value *HeadOfChain = I->getOperand(0);
+ // Mark this as handled.
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ }
+ return Promoted;
}
bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
@@ -4534,13 +5006,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
return false;
- // Skip loads we've already transformed or have no reason to transform.
- if (Load->hasOneUse()) {
- User *LoadUser = *Load->user_begin();
- if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
- !dyn_cast<PHINode>(LoadUser))
- return false;
- }
+ // Skip loads we've already transformed.
+ if (Load->hasOneUse() &&
+ InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
+ return false;
// Look at all uses of Load, looking through phis, to determine how many bits
// of the loaded value are needed.
@@ -4620,7 +5089,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
//
// Also avoid hoisting if we didn't see any ands with the exact DemandBits
// mask, since these are the only ands that will be removed by isel.
- if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+ if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
WidestAndBits != DemandBits)
return false;
@@ -4636,6 +5105,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
IRBuilder<> Builder(Load->getNextNode());
auto *NewAnd = dyn_cast<Instruction>(
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(NewAnd);
// Replace all uses of load with new and (except for the use of load in the
// new and itself).
@@ -4985,7 +5457,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
SI->setCondition(ExtInst);
- for (SwitchInst::CaseIt Case : SI->cases()) {
+ for (auto Case : SI->cases()) {
APInt NarrowConst = Case.getCaseValue()->getValue();
APInt WideConst = (ExtType == Instruction::ZExt) ?
NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
@@ -5514,7 +5986,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = moveExtToFormExtLoad(I);
+ bool MadeChange = optimizeExt(I);
return MadeChange | optimizeExtUses(I);
}
}
@@ -5548,8 +6020,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
return false;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ unsigned AS = RMW->getPointerAddressSpace();
+ return optimizeMemoryInst(I, RMW->getPointerOperand(),
+ RMW->getType(), AS);
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
+ unsigned AS = CmpX->getPointerAddressSpace();
+ return optimizeMemoryInst(I, CmpX->getPointerOperand(),
+ CmpX->getCompareOperand()->getType(), AS);
+ }
+
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+ if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
+ EnableAndCmpSinking && TLI)
+ return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
@@ -5679,68 +6167,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
return MadeChange;
}
-// If there is a sequence that branches based on comparing a single bit
-// against zero that can be combined into a single instruction, and the
-// target supports folding these into a single instruction, sink the
-// mask and compare into the branch uses. Do this before OptimizeBlock ->
-// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
-// searched for.
-bool CodeGenPrepare::sinkAndCmp(Function &F) {
- if (!EnableAndCmpSinking)
- return false;
- if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
- return false;
- bool MadeChange = false;
- for (BasicBlock &BB : F) {
- // Does this BB end with the following?
- // %andVal = and %val, #single-bit-set
- // %icmpVal = icmp %andResult, 0
- // br i1 %cmpVal label %dest1, label %dest2"
- BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator());
- if (!Brcc || !Brcc->isConditional())
- continue;
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
- if (!Cmp || Cmp->getParent() != &BB)
- continue;
- ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
- if (!Zero || !Zero->isZero())
- continue;
- Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
- if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
- continue;
- ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
- if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
- continue;
- DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
-
- // Push the "and; icmp" for any users that are conditional branches.
- // Since there can only be one branch use per BB, we don't need to keep
- // track of which BBs we insert into.
- for (Use &TheUse : Cmp->uses()) {
- // Find brcc use.
- BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
- if (!BrccUser || !BrccUser->isConditional())
- continue;
- BasicBlock *UserBB = BrccUser->getParent();
- if (UserBB == &BB) continue;
- DEBUG(dbgs() << "found Brcc use\n");
-
- // Sink the "and; icmp" to use.
- MadeChange = true;
- BinaryOperator *NewAnd =
- BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
- BrccUser);
- CmpInst *NewCmp =
- CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
- "", BrccUser);
- TheUse = NewCmp;
- ++NumAndCmpsMoved;
- DEBUG(BrccUser->getParent()->dump());
- }
- }
- return MadeChange;
-}
-
/// \brief Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
diff --git a/lib/CodeGen/CountingFunctionInserter.cpp b/lib/CodeGen/CountingFunctionInserter.cpp
index 1e46a7a99e7e..7f7350f5fb5c 100644
--- a/lib/CodeGen/CountingFunctionInserter.cpp
+++ b/lib/CodeGen/CountingFunctionInserter.cpp
@@ -41,7 +41,7 @@ namespace {
Type *VoidTy = Type::getVoidTy(F.getContext());
Constant *CountingFn =
F.getParent()->getOrInsertFunction(CountingFunctionName,
- VoidTy, nullptr);
+ VoidTy);
CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
return true;
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 5d60c3055456..e1eeddf0816c 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -71,8 +71,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector Pristine = MFI.getPristineRegs(MF);
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
- if (!IsReturnBlock && !Pristine.test(*I)) continue;
+ for (const MCPhysReg *I = MF.getRegInfo().getCalleeSavedRegs(); *I;
+ ++I) {
+ unsigned Reg = *I;
+ if (!IsReturnBlock && !(Pristine.test(Reg) || BB->isLiveIn(Reg)))
+ continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 17c229a216ae..7ac2e5445435 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -110,7 +110,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
// Start out assuming that reserved registers are live out of this block.
LivePhysRegs = MRI->getReservedRegs();
- // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+ // Add live-ins from successors to LivePhysRegs. Normally, physregs are not
// live across blocks, but some targets (x86) can have flags live out of a
// block.
for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index a7ba694c144d..6f4ea1912cf4 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -441,7 +441,7 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
CrossCopy = isCrossCopy(*MRI, UseMI, DstRC, MO);
if (CrossCopy)
- DEBUG(dbgs() << "Copy accross incompatible classes: " << UseMI);
+ DEBUG(dbgs() << "Copy across incompatible classes: " << UseMI);
}
if (!CrossCopy)
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 32c57e3e3705..e272d25047e6 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -6,21 +6,9 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file contains the execution dependency fix pass.
-//
-// Some X86 SSE instructions like mov, and, or, xor are available in different
-// variants for different operand types. These variant instructions are
-// equivalent, but on Nehalem and newer cpus there is extra latency
-// transferring data between integer and floating point domains. ARM cores
-// have similar issues when they are configured with both VFP and NEON
-// pipelines.
-//
-// This pass changes the variant instructions to minimize domain crossings.
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -35,193 +23,18 @@
using namespace llvm;
-#define DEBUG_TYPE "execution-fix"
-
-/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
-/// of execution domains.
-///
-/// An open DomainValue represents a set of instructions that can still switch
-/// execution domain. Multiple registers may refer to the same open
-/// DomainValue - they will eventually be collapsed to the same execution
-/// domain.
-///
-/// A collapsed DomainValue represents a single register that has been forced
-/// into one of more execution domains. There is a separate collapsed
-/// DomainValue for each register, but it may contain multiple execution
-/// domains. A register value is initially created in a single execution
-/// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact that the register is now available in multiple
-/// domains.
-namespace {
-struct DomainValue {
- // Basic reference counting.
- unsigned Refs;
-
- // Bitmask of available domains. For an open DomainValue, it is the still
- // possible domains for collapsing. For a collapsed DomainValue it is the
- // domains where the register is available for free.
- unsigned AvailableDomains;
-
- // Pointer to the next DomainValue in a chain. When two DomainValues are
- // merged, Victim.Next is set to point to Victor, so old DomainValue
- // references can be updated by following the chain.
- DomainValue *Next;
-
- // Twiddleable instructions using or defining these registers.
- SmallVector<MachineInstr*, 8> Instrs;
-
- // A collapsed DomainValue has no instructions to twiddle - it simply keeps
- // track of the domains where the registers are already available.
- bool isCollapsed() const { return Instrs.empty(); }
-
- // Is domain available?
- bool hasDomain(unsigned domain) const {
- assert(domain <
- static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
- "undefined behavior");
- return AvailableDomains & (1u << domain);
- }
-
- // Mark domain as available.
- void addDomain(unsigned domain) {
- AvailableDomains |= 1u << domain;
- }
-
- // Restrict to a single domain available.
- void setSingleDomain(unsigned domain) {
- AvailableDomains = 1u << domain;
- }
-
- // Return bitmask of domains that are available and in mask.
- unsigned getCommonDomains(unsigned mask) const {
- return AvailableDomains & mask;
- }
-
- // First domain available.
- unsigned getFirstDomain() const {
- return countTrailingZeros(AvailableDomains);
- }
-
- DomainValue() : Refs(0) { clear(); }
-
- // Clear this DomainValue and point to next which has all its data.
- void clear() {
- AvailableDomains = 0;
- Next = nullptr;
- Instrs.clear();
- }
-};
-}
-
-namespace {
-/// Information about a live register.
-struct LiveReg {
- /// Value currently in this register, or NULL when no value is being tracked.
- /// This counts as a DomainValue reference.
- DomainValue *Value;
-
- /// Instruction that defined this register, relative to the beginning of the
- /// current basic block. When a LiveReg is used to represent a live-out
- /// register, this value is relative to the end of the basic block, so it
- /// will be a negative number.
- int Def;
-};
-} // anonymous namespace
-
-namespace {
-class ExeDepsFix : public MachineFunctionPass {
- static char ID;
- SpecificBumpPtrAllocator<DomainValue> Allocator;
- SmallVector<DomainValue*,16> Avail;
-
- const TargetRegisterClass *const RC;
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- RegisterClassInfo RegClassInfo;
- std::vector<SmallVector<int, 1>> AliasMap;
- const unsigned NumRegs;
- LiveReg *LiveRegs;
- typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
- LiveOutMap LiveOuts;
-
- /// List of undefined register reads in this block in forward order.
- std::vector<std::pair<MachineInstr*, unsigned> > UndefReads;
-
- /// Storage for register unit liveness.
- LivePhysRegs LiveRegSet;
-
- /// Current instruction number.
- /// The first instruction in each basic block is 0.
- int CurInstr;
-
- /// True when the current block has a predecessor that hasn't been visited
- /// yet.
- bool SeenUnknownBackEdge;
-
-public:
- ExeDepsFix(const TargetRegisterClass *rc)
- : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- MachineFunctionProperties getRequiredProperties() const override {
- return MachineFunctionProperties().set(
- MachineFunctionProperties::Property::NoVRegs);
- }
-
- StringRef getPassName() const override { return "Execution dependency fix"; }
-
-private:
- iterator_range<SmallVectorImpl<int>::const_iterator>
- regIndices(unsigned Reg) const;
-
- // DomainValue allocation.
- DomainValue *alloc(int domain = -1);
- DomainValue *retain(DomainValue *DV) {
- if (DV) ++DV->Refs;
- return DV;
- }
- void release(DomainValue*);
- DomainValue *resolve(DomainValue*&);
-
- // LiveRegs manipulations.
- void setLiveReg(int rx, DomainValue *DV);
- void kill(int rx);
- void force(int rx, unsigned domain);
- void collapse(DomainValue *dv, unsigned domain);
- bool merge(DomainValue *A, DomainValue *B);
-
- void enterBasicBlock(MachineBasicBlock*);
- void leaveBasicBlock(MachineBasicBlock*);
- void visitInstr(MachineInstr*);
- void processDefs(MachineInstr*, bool Kill);
- void visitSoftInstr(MachineInstr*, unsigned mask);
- void visitHardInstr(MachineInstr*, unsigned domain);
- void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref);
- bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
- void processUndefReads(MachineBasicBlock*);
-};
-}
-
-char ExeDepsFix::ID = 0;
+#define DEBUG_TYPE "execution-deps-fix"
/// Translate TRI register number to a list of indices into our smaller tables
/// of interesting registers.
iterator_range<SmallVectorImpl<int>::const_iterator>
-ExeDepsFix::regIndices(unsigned Reg) const {
+ExecutionDepsFix::regIndices(unsigned Reg) const {
assert(Reg < AliasMap.size() && "Invalid register");
const auto &Entry = AliasMap[Reg];
return make_range(Entry.begin(), Entry.end());
}
-DomainValue *ExeDepsFix::alloc(int domain) {
+DomainValue *ExecutionDepsFix::alloc(int domain) {
DomainValue *dv = Avail.empty() ?
new(Allocator.Allocate()) DomainValue :
Avail.pop_back_val();
@@ -234,7 +47,7 @@ DomainValue *ExeDepsFix::alloc(int domain) {
/// Release a reference to DV. When the last reference is released,
/// collapse if needed.
-void ExeDepsFix::release(DomainValue *DV) {
+void ExecutionDepsFix::release(DomainValue *DV) {
while (DV) {
assert(DV->Refs && "Bad DomainValue");
if (--DV->Refs)
@@ -254,7 +67,7 @@ void ExeDepsFix::release(DomainValue *DV) {
/// Follow the chain of dead DomainValues until a live DomainValue is reached.
/// Update the referenced pointer when necessary.
-DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+DomainValue *ExecutionDepsFix::resolve(DomainValue *&DVRef) {
DomainValue *DV = DVRef;
if (!DV || !DV->Next)
return DV;
@@ -271,7 +84,7 @@ DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
}
/// Set LiveRegs[rx] = dv, updating reference counts.
-void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
+void ExecutionDepsFix::setLiveReg(int rx, DomainValue *dv) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
@@ -283,7 +96,7 @@ void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
}
// Kill register rx, recycle or collapse any DomainValue.
-void ExeDepsFix::kill(int rx) {
+void ExecutionDepsFix::kill(int rx) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
if (!LiveRegs[rx].Value)
@@ -294,7 +107,7 @@ void ExeDepsFix::kill(int rx) {
}
/// Force register rx into domain.
-void ExeDepsFix::force(int rx, unsigned domain) {
+void ExecutionDepsFix::force(int rx, unsigned domain) {
assert(unsigned(rx) < NumRegs && "Invalid index");
assert(LiveRegs && "Must enter basic block first.");
if (DomainValue *dv = LiveRegs[rx].Value) {
@@ -317,7 +130,7 @@ void ExeDepsFix::force(int rx, unsigned domain) {
/// Collapse open DomainValue into given domain. If there are multiple
/// registers using dv, they each get a unique collapsed DomainValue.
-void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
+void ExecutionDepsFix::collapse(DomainValue *dv, unsigned domain) {
assert(dv->hasDomain(domain) && "Cannot collapse");
// Collapse all the instructions.
@@ -333,7 +146,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
}
/// All instructions and registers in B are moved to A, and B is released.
-bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
+bool ExecutionDepsFix::merge(DomainValue *A, DomainValue *B) {
assert(!A->isCollapsed() && "Cannot merge into collapsed");
assert(!B->isCollapsed() && "Cannot merge from collapsed");
if (A == B)
@@ -359,10 +172,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
}
/// Set up LiveRegs by merging predecessor live-out values.
-void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
- // Detect back-edges from predecessors we haven't processed yet.
- SeenUnknownBackEdge = false;
-
+void ExecutionDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Reset instruction counter in each basic block.
CurInstr = 0;
@@ -397,18 +207,21 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Try to coalesce live-out registers from predecessors.
for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
pe = MBB->pred_end(); pi != pe; ++pi) {
- LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
- if (fi == LiveOuts.end()) {
- SeenUnknownBackEdge = true;
+ auto fi = MBBInfos.find(*pi);
+ assert(fi != MBBInfos.end() &&
+ "Should have pre-allocated MBBInfos for all MBBs");
+ LiveReg *Incoming = fi->second.OutRegs;
+ // Incoming is null if this is a backedge from a BB
+ // we haven't processed yet
+ if (Incoming == nullptr) {
continue;
}
- assert(fi->second && "Can't have NULL entries");
for (unsigned rx = 0; rx != NumRegs; ++rx) {
// Use the most recent predecessor def for each register.
- LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+ LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, Incoming[rx].Def);
- DomainValue *pdv = resolve(fi->second[rx].Value);
+ DomainValue *pdv = resolve(Incoming[rx].Value);
if (!pdv)
continue;
if (!LiveRegs[rx].Value) {
@@ -432,35 +245,34 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
force(rx, pdv->getFirstDomain());
}
}
- DEBUG(dbgs() << "BB#" << MBB->getNumber()
- << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+ DEBUG(
+ dbgs() << "BB#" << MBB->getNumber()
+ << (!isBlockDone(MBB) ? ": incomplete\n" : ": all preds known\n"));
}
-void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+void ExecutionDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
assert(LiveRegs && "Must enter basic block first.");
- // Save live registers at end of MBB - used by enterBasicBlock().
- // Also use LiveOuts as a visited set to detect back-edges.
- bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
-
- if (First) {
- // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to
- // the end of this block instead of the beginning.
- for (unsigned i = 0, e = NumRegs; i != e; ++i)
- LiveRegs[i].Def -= CurInstr;
- } else {
- // Insertion failed, this must be the second pass.
+ LiveReg *OldOutRegs = MBBInfos[MBB].OutRegs;
+ // Save register clearances at end of MBB - used by enterBasicBlock().
+ MBBInfos[MBB].OutRegs = LiveRegs;
+
+ // While processing the basic block, we kept `Def` relative to the start
+ // of the basic block for convenience. However, future use of this information
+ // only cares about the clearance from the end of the block, so adjust
+ // everything to be relative to the end of the basic block.
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ LiveRegs[i].Def -= CurInstr;
+ if (OldOutRegs) {
+ // This must be the second pass.
// Release all the DomainValues instead of keeping them.
for (unsigned i = 0, e = NumRegs; i != e; ++i)
- release(LiveRegs[i].Value);
- delete[] LiveRegs;
+ release(OldOutRegs[i].Value);
+ delete[] OldOutRegs;
}
LiveRegs = nullptr;
}
-void ExeDepsFix::visitInstr(MachineInstr *MI) {
- if (MI->isDebugValue())
- return;
-
+bool ExecutionDepsFix::visitInstr(MachineInstr *MI) {
// Update instructions with explicit execution domains.
std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(*MI);
if (DomP.first) {
@@ -470,16 +282,16 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
visitHardInstr(MI, DomP.first);
}
- // Process defs to track register ages, and kill values clobbered by generic
- // instructions.
- processDefs(MI, !DomP.first);
+ return !DomP.first;
}
/// \brief Helps avoid false dependencies on undef registers by updating the
/// machine instructions' undef operand to use a register that the instruction
/// is truly dependent on, or use a register with clearance higher than Pref.
-void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+/// Returns true if it was able to find a true dependency, thus not requiring
+/// a dependency breaking instruction regardless of clearance.
+bool ExecutionDepsFix::pickBestRegisterForUndef(MachineInstr *MI,
+ unsigned OpIdx, unsigned Pref) {
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");
@@ -487,7 +299,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// Update only undef operands that are mapped to one register.
if (AliasMap[OriginalReg].size() != 1)
- return;
+ return false;
// Get the undef operand's register class
const TargetRegisterClass *OpRC =
@@ -502,7 +314,7 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// We found a true dependency - replace the undef register with the true
// dependency.
MO.setReg(CurrMO.getReg());
- return;
+ return true;
}
// Go over all registers in the register class and find the register with
@@ -527,12 +339,14 @@ void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// Update the operand if we found a register with better clearance.
if (MaxClearanceReg != OriginalReg)
MO.setReg(MaxClearanceReg);
+
+ return false;
}
/// \brief Return true to if it makes sense to break dependence on a partial def
/// or undef use.
-bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
- unsigned Pref) {
+bool ExecutionDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
+ unsigned Pref) {
unsigned reg = MI->getOperand(OpIdx).getReg();
for (int rx : regIndices(reg)) {
unsigned Clearance = CurInstr - LiveRegs[rx].Def;
@@ -542,14 +356,7 @@ bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
DEBUG(dbgs() << ": Break dependency.\n");
continue;
}
- // The current clearance seems OK, but we may be ignoring a def from a
- // back-edge.
- if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
- DEBUG(dbgs() << ": OK .\n");
- return false;
- }
- // A def from an unprocessed back-edge may make us break this dependency.
- DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
+ DEBUG(dbgs() << ": OK .\n");
return false;
}
return true;
@@ -559,16 +366,22 @@ bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
// If Kill is set, also kill off DomainValues clobbered by the defs.
//
// Also break dependencies on partial defs and undef uses.
-void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+void ExecutionDepsFix::processDefs(MachineInstr *MI, bool breakDependency,
+ bool Kill) {
assert(!MI->isDebugValue() && "Won't process debug values");
// Break dependence on undef uses. Do this before updating LiveRegs below.
unsigned OpNum;
- unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
- if (Pref) {
- pickBestRegisterForUndef(MI, OpNum, Pref);
- if (shouldBreakDependence(MI, OpNum, Pref))
- UndefReads.push_back(std::make_pair(MI, OpNum));
+ if (breakDependency) {
+ unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
+ if (Pref) {
+ bool HadTrueDependency = pickBestRegisterForUndef(MI, OpNum, Pref);
+ // We don't need to bother trying to break a dependency if this
+ // instruction has a true dependency on that register through another
+ // operand - we'll have to wait for it to be available regardless.
+ if (!HadTrueDependency && shouldBreakDependence(MI, OpNum, Pref))
+ UndefReads.push_back(std::make_pair(MI, OpNum));
+ }
}
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
@@ -584,11 +397,13 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
<< '\t' << *MI);
- // Check clearance before partial register updates.
- // Call breakDependence before setting LiveRegs[rx].Def.
- unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
- if (Pref && shouldBreakDependence(MI, i, Pref))
- TII->breakPartialRegDependency(*MI, i, TRI);
+ if (breakDependency) {
+ // Check clearance before partial register updates.
+ // Call breakDependence before setting LiveRegs[rx].Def.
+ unsigned Pref = TII->getPartialRegUpdateClearance(*MI, i, TRI);
+ if (Pref && shouldBreakDependence(MI, i, Pref))
+ TII->breakPartialRegDependency(*MI, i, TRI);
+ }
// How many instructions since rx was last written?
LiveRegs[rx].Def = CurInstr;
@@ -607,7 +422,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
/// only do it on demand. Note that the occurrence of undefined register reads
/// that should be broken is very rare, but when they occur we may have many in
/// a single block.
-void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
+void ExecutionDepsFix::processUndefReads(MachineBasicBlock *MBB) {
if (UndefReads.empty())
return;
@@ -640,7 +455,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
// A hard instruction only works in one domain. All input registers will be
// forced into that domain.
-void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
+void ExecutionDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
// Collapse all uses.
for (unsigned i = mi->getDesc().getNumDefs(),
e = mi->getDesc().getNumOperands(); i != e; ++i) {
@@ -663,7 +478,7 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
}
// A soft instruction can be changed to work in other domains given by mask.
-void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+void ExecutionDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// Bitmask of available domains for this instruction after taking collapsed
// operands into account.
unsigned available = mask;
@@ -774,7 +589,34 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
}
-bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
+void ExecutionDepsFix::processBasicBlock(MachineBasicBlock *MBB,
+ bool PrimaryPass) {
+ enterBasicBlock(MBB);
+ // If this block is not done, it makes little sense to make any decisions
+ // based on clearance information. We need to make a second pass anyway,
+ // and by then we'll have better information, so we can avoid doing the work
+ // to try and break dependencies now.
+ bool breakDependency = isBlockDone(MBB);
+ for (MachineInstr &MI : *MBB) {
+ if (!MI.isDebugValue()) {
+ bool Kill = false;
+ if (PrimaryPass)
+ Kill = visitInstr(&MI);
+ processDefs(&MI, breakDependency, Kill);
+ }
+ }
+ if (breakDependency)
+ processUndefReads(MBB);
+ leaveBasicBlock(MBB);
+}
+
+bool ExecutionDepsFix::isBlockDone(MachineBasicBlock *MBB) {
+ return MBBInfos[MBB].PrimaryCompleted &&
+ MBBInfos[MBB].IncomingCompleted == MBBInfos[MBB].PrimaryIncoming &&
+ MBBInfos[MBB].IncomingProcessed == MBB->pred_size();
+}
+
+bool ExecutionDepsFix::runOnMachineFunction(MachineFunction &mf) {
if (skipFunction(*mf.getFunction()))
return false;
MF = &mf;
@@ -810,52 +652,104 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
AliasMap[*AI].push_back(i);
}
+ // Initialize the MMBInfos
+ for (auto &MBB : mf) {
+ MBBInfo InitialInfo;
+ MBBInfos.insert(std::make_pair(&MBB, InitialInfo));
+ }
+
+ /*
+ * We want to visit every instruction in every basic block in order to update
+ * it's execution domain or break any false dependencies. However, for the
+ * dependency breaking, we need to know clearances from all predecessors
+ * (including any backedges). One way to do so would be to do two complete
+ * passes over all basic blocks/instructions, the first for recording
+ * clearances, the second to break the dependencies. However, for functions
+ * without backedges, or functions with a lot of straight-line code, and
+ * a small loop, that would be a lot of unnecessary work (since only the
+ * BBs that are part of the loop require two passes). As an example,
+ * consider the following loop.
+ *
+ *
+ * PH -> A -> B (xmm<Undef> -> xmm<Def>) -> C -> D -> EXIT
+ * ^ |
+ * +----------------------------------+
+ *
+ * The iteration order is as follows:
+ * Naive: PH A B C D A' B' C' D'
+ * Optimized: PH A B C A' B' C' D
+ *
+ * Note that we avoid processing D twice, because we can entirely process
+ * the predecessors before getting to D. We call a block that is ready
+ * for its second round of processing `done` (isBlockDone). Once we finish
+ * processing some block, we update the counters in MBBInfos and re-process
+ * any successors that are now done.
+ */
+
MachineBasicBlock *Entry = &*MF->begin();
ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
- SmallVector<MachineBasicBlock*, 16> Loops;
+ SmallVector<MachineBasicBlock *, 4> Workqueue;
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
MachineBasicBlock *MBB = *MBBI;
- enterBasicBlock(MBB);
- if (SeenUnknownBackEdge)
- Loops.push_back(MBB);
- for (MachineInstr &MI : *MBB)
- visitInstr(&MI);
- processUndefReads(MBB);
- leaveBasicBlock(MBB);
+ // N.B: IncomingProcessed and IncomingCompleted were already updated while
+ // processing this block's predecessors.
+ MBBInfos[MBB].PrimaryCompleted = true;
+ MBBInfos[MBB].PrimaryIncoming = MBBInfos[MBB].IncomingProcessed;
+ bool Primary = true;
+ Workqueue.push_back(MBB);
+ while (!Workqueue.empty()) {
+ MachineBasicBlock *ActiveMBB = &*Workqueue.back();
+ Workqueue.pop_back();
+ processBasicBlock(ActiveMBB, Primary);
+ bool Done = isBlockDone(ActiveMBB);
+ for (auto *Succ : ActiveMBB->successors()) {
+ if (!isBlockDone(Succ)) {
+ if (Primary) {
+ MBBInfos[Succ].IncomingProcessed++;
+ }
+ if (Done) {
+ MBBInfos[Succ].IncomingCompleted++;
+ }
+ if (isBlockDone(Succ)) {
+ Workqueue.push_back(Succ);
+ }
+ }
+ }
+ Primary = false;
+ }
}
- // Visit all the loop blocks again in order to merge DomainValues from
- // back-edges.
- for (MachineBasicBlock *MBB : Loops) {
- enterBasicBlock(MBB);
- for (MachineInstr &MI : *MBB)
- if (!MI.isDebugValue())
- processDefs(&MI, false);
- processUndefReads(MBB);
- leaveBasicBlock(MBB);
+ // We need to go through again and finalize any blocks that are not done yet.
+ // This is possible if blocks have dead predecessors, so we didn't visit them
+ // above.
+ for (ReversePostOrderTraversal<MachineBasicBlock *>::rpo_iterator
+ MBBI = RPOT.begin(),
+ MBBE = RPOT.end();
+ MBBI != MBBE; ++MBBI) {
+ MachineBasicBlock *MBB = *MBBI;
+ if (!isBlockDone(MBB)) {
+ processBasicBlock(MBB, false);
+ // Don't update successors here. We'll get to them anyway through this
+ // loop.
+ }
}
// Clear the LiveOuts vectors and collapse any remaining DomainValues.
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
- LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
- if (FI == LiveOuts.end() || !FI->second)
+ auto FI = MBBInfos.find(*MBBI);
+ if (FI == MBBInfos.end() || !FI->second.OutRegs)
continue;
for (unsigned i = 0, e = NumRegs; i != e; ++i)
- if (FI->second[i].Value)
- release(FI->second[i].Value);
- delete[] FI->second;
+ if (FI->second.OutRegs[i].Value)
+ release(FI->second.OutRegs[i].Value);
+ delete[] FI->second.OutRegs;
}
- LiveOuts.clear();
+ MBBInfos.clear();
UndefReads.clear();
Avail.clear();
Allocator.DestroyAll();
return false;
}
-
-FunctionPass *
-llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) {
- return new ExeDepsFix(RC);
-}
diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp
new file mode 100644
index 000000000000..0759bf6713e0
--- /dev/null
+++ b/lib/CodeGen/FEntryInserter.cpp
@@ -0,0 +1,55 @@
+//===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file edits function bodies to insert fentry calls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+using namespace llvm;
+
+namespace {
+struct FEntryInserter : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ FEntryInserter() : MachineFunctionPass(ID) {
+ initializeFEntryInserterPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override;
+};
+}
+
+bool FEntryInserter::runOnMachineFunction(MachineFunction &MF) {
+ const std::string FEntryName =
+ MF.getFunction()->getFnAttribute("fentry-call").getValueAsString();
+ if (FEntryName != "true")
+ return false;
+
+ auto &FirstMBB = *MF.begin();
+ auto &FirstMI = *FirstMBB.begin();
+
+ auto *TII = MF.getSubtarget().getInstrInfo();
+ BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+ TII->get(TargetOpcode::FENTRY_CALL));
+ return true;
+}
+
+char FEntryInserter::ID = 0;
+char &llvm::FEntryInserterID = FEntryInserter::ID;
+INITIALIZE_PASS(FEntryInserter, "fentry-insert", "Insert fentry calls", false,
+ false)
diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp
index 2acafafdb9fc..43f364128978 100644
--- a/lib/CodeGen/FaultMaps.cpp
+++ b/lib/CodeGen/FaultMaps.cpp
@@ -1,4 +1,4 @@
-//===---------------------------- FaultMaps.cpp ---------------------------===//
+//===- FaultMaps.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/FaultMaps.h"
-
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -102,14 +105,16 @@ void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel,
}
}
-
const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) {
switch (FT) {
default:
llvm_unreachable("unhandled fault type!");
-
case FaultMaps::FaultingLoad:
return "FaultingLoad";
+ case FaultMaps::FaultingLoadStore:
+ return "FaultingLoadStore";
+ case FaultMaps::FaultingStore:
+ return "FaultingStore";
}
}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 31ab86fdf276..6be4c16c6301 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -1,4 +1,4 @@
-//===-- GCStrategy.cpp - Garbage Collector Description --------------------===//
+//===- GCStrategy.cpp - Garbage Collector Description ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,7 +18,4 @@ using namespace llvm;
LLVM_INSTANTIATE_REGISTRY(GCRegistry)
-GCStrategy::GCStrategy()
- : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
- CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
- UsesMetadata(false) {}
+GCStrategy::GCStrategy() = default;
diff --git a/lib/CodeGen/GlobalISel/CMakeLists.txt b/lib/CodeGen/GlobalISel/CMakeLists.txt
index 76ab5d36047e..03a8c4f5f909 100644
--- a/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -22,7 +22,6 @@ else()
set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES})
endif()
-
# In LLVMBuild.txt files, it is not possible to mark a dependency to a
# library as optional. So instead, generate an empty library if we did
# not ask for it.
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index 13212212fa01..035a2ac78ed9 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -24,40 +24,42 @@
using namespace llvm;
bool CallLowering::lowerCall(
- MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg,
+ MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
- auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout();
+ auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
SmallVector<ArgInfo, 8> OrigArgs;
unsigned i = 0;
- for (auto &Arg : CI.arg_operands()) {
- ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}};
- setArgFlags(OrigArg, i + 1, DL, CI);
+ unsigned NumFixedArgs = CS.getFunctionType()->getNumParams();
+ for (auto &Arg : CS.args()) {
+ ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
+ i < NumFixedArgs};
+ setArgFlags(OrigArg, i + 1, DL, CS);
OrigArgs.push_back(OrigArg);
++i;
}
MachineOperand Callee = MachineOperand::CreateImm(0);
- if (Function *F = CI.getCalledFunction())
+ if (const Function *F = CS.getCalledFunction())
Callee = MachineOperand::CreateGA(F, 0);
else
Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}};
+ ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}};
if (!OrigRet.Ty->isVoidTy())
- setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI);
+ setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
- return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs);
+ return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs);
}
template <typename FuncInfoTy>
void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const FuncInfoTy &FuncInfo) const {
- const AttributeSet &Attrs = FuncInfo.getAttributes();
+ const AttributeList &Attrs = FuncInfo.getAttributes();
if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
Arg.Flags.setZExt();
if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
@@ -103,7 +105,6 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const CallInst &FuncInfo) const;
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
- CCAssignFn *AssignFn,
ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
@@ -116,12 +117,20 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
- if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo))
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
return false;
}
- for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
+ assert(j < ArgLocs.size() && "Skipped too many arg locs");
+
+ CCValAssign &VA = ArgLocs[j];
+ assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
+
+ if (VA.needsCustom()) {
+ j += Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ continue;
+ }
if (VA.isRegLoc())
Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 89a042ffc477..766187378446 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -12,7 +12,10 @@
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -21,11 +24,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -40,11 +45,21 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
false, false)
-static void reportTranslationError(const Value &V, const Twine &Message) {
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << Message << ": " << V << '\n';
- report_fatal_error(Err.str());
+static void reportTranslationError(MachineFunction &MF,
+ const TargetPassConfig &TPC,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (TPC.isGlobalISelAbortEnabled())
+ report_fatal_error(R.getMsg());
+ else
+ ORE.emit(R);
}
IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
@@ -59,28 +74,31 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
unsigned &ValReg = ValToVReg[&Val];
- // Check if this is the first time we see Val.
- if (!ValReg) {
- // Fill ValRegsSequence with the sequence of registers
- // we need to concat together to produce the value.
- assert(Val.getType()->isSized() &&
- "Don't know how to create an empty vreg");
- unsigned VReg = MRI->createGenericVirtualRegister(LLT{*Val.getType(), *DL});
- ValReg = VReg;
-
- if (auto CV = dyn_cast<Constant>(&Val)) {
- bool Success = translate(*CV, VReg);
- if (!Success) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return VReg;
- }
- reportTranslationError(Val, "unable to translate constant");
- }
+
+ if (ValReg)
+ return ValReg;
+
+ // Fill ValRegsSequence with the sequence of registers
+ // we need to concat together to produce the value.
+ assert(Val.getType()->isSized() &&
+ "Don't know how to create an empty vreg");
+ unsigned VReg =
+ MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL));
+ ValReg = VReg;
+
+ if (auto CV = dyn_cast<Constant>(&Val)) {
+ bool Success = translate(*CV, VReg);
+ if (!Success) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ MF->getFunction()->getSubprogram(),
+ &MF->getFunction()->getEntryBlock());
+ R << "unable to translate constant: " << ore::NV("Type", Val.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return VReg;
}
}
- return ValReg;
+
+ return VReg;
}
int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
@@ -112,28 +130,27 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
} else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
Alignment = LI->getAlignment();
ValTy = LI->getType();
- } else if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
+ } else {
+ OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
+ R << "unable to translate memop: " << ore::NV("Opcode", &I);
+ reportTranslationError(*MF, *TPC, *ORE, R);
return 1;
- } else
- llvm_unreachable("unhandled memory instruction");
+ }
return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
}
-MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
+MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
MachineBasicBlock *&MBB = BBToMBB[&BB];
- if (!MBB) {
- MBB = MF->CreateMachineBasicBlock(&BB);
- MF->push_back(MBB);
-
- if (BB.hasAddressTaken())
- MBB->setHasAddressTaken();
- }
+ assert(MBB && "BasicBlock was not encountered before");
return *MBB;
}
+void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
+ assert(NewPred && "new predecessor must be a real MachineBasicBlock");
+ MachinePreds[Edge].push_back(NewPred);
+}
+
bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
// FIXME: handle signed/unsigned wrapping flags.
@@ -149,6 +166,18 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
return true;
}
+bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
+ // -0.0 - X --> G_FNEG
+ if (isa<Constant>(U.getOperand(0)) &&
+ U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(1)));
+ return true;
+ }
+ return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
+}
+
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
const CmpInst *CI = dyn_cast<CmpInst>(&U);
@@ -158,9 +187,14 @@ bool IRTranslator::translateCompare(const User &U,
CmpInst::Predicate Pred =
CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
cast<ConstantExpr>(U).getPredicate());
-
if (CmpInst::isIntPredicate(Pred))
MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
+ else if (Pred == CmpInst::FCMP_FALSE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getNullValue(CI->getType())));
+ else if (Pred == CmpInst::FCMP_TRUE)
+ MIRBuilder.buildCopy(
+ Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
else
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
@@ -183,18 +217,21 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
// We want a G_BRCOND to the true BB followed by an unconditional branch.
unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
- MachineBasicBlock &TrueBB = getOrCreateBB(TrueTgt);
+ MachineBasicBlock &TrueBB = getMBB(TrueTgt);
MIRBuilder.buildBrCond(Tst, TrueBB);
}
const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
- MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
- MIRBuilder.buildBr(TgtBB);
+ MachineBasicBlock &TgtBB = getMBB(BrTgt);
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+
+ // If the unconditional target is the layout successor, fallthrough.
+ if (!CurBB.isLayoutSuccessor(&TgtBB))
+ MIRBuilder.buildBr(TgtBB);
// Link successors.
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
for (const BasicBlock *Succ : BrInst.successors())
- CurBB.addSuccessor(&getOrCreateBB(*Succ));
+ CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -209,30 +246,52 @@ bool IRTranslator::translateSwitch(const User &U,
const SwitchInst &SwInst = cast<SwitchInst>(U);
const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
+ const BasicBlock *OrigBB = SwInst.getParent();
- LLT LLTi1 = LLT(*Type::getInt1Ty(U.getContext()), *DL);
+ LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
for (auto &CaseIt : SwInst.cases()) {
const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
- MachineBasicBlock &CurBB = MIRBuilder.getMBB();
- MachineBasicBlock &TrueBB = getOrCreateBB(*CaseIt.getCaseSuccessor());
+ MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
+ const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
+ MachineBasicBlock &TrueMBB = getMBB(*TrueBB);
- MIRBuilder.buildBrCond(Tst, TrueBB);
- CurBB.addSuccessor(&TrueBB);
+ MIRBuilder.buildBrCond(Tst, TrueMBB);
+ CurMBB.addSuccessor(&TrueMBB);
+ addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);
- MachineBasicBlock *FalseBB =
+ MachineBasicBlock *FalseMBB =
MF->CreateMachineBasicBlock(SwInst.getParent());
- MF->push_back(FalseBB);
- MIRBuilder.buildBr(*FalseBB);
- CurBB.addSuccessor(FalseBB);
+ // Insert the comparison blocks one after the other.
+ MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
+ MIRBuilder.buildBr(*FalseMBB);
+ CurMBB.addSuccessor(FalseMBB);
- MIRBuilder.setMBB(*FalseBB);
+ MIRBuilder.setMBB(*FalseMBB);
}
// handle default case
- MachineBasicBlock &DefaultBB = getOrCreateBB(*SwInst.getDefaultDest());
- MIRBuilder.buildBr(DefaultBB);
- MIRBuilder.getMBB().addSuccessor(&DefaultBB);
+ const BasicBlock *DefaultBB = SwInst.getDefaultDest();
+ MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
+ MIRBuilder.buildBr(DefaultMBB);
+ MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
+ CurMBB.addSuccessor(&DefaultMBB);
+ addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);
+
+ return true;
+}
+
+bool IRTranslator::translateIndirectBr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
+
+ const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
+ MIRBuilder.buildBrIndirect(Tgt);
+
+ // Link successors.
+ MachineBasicBlock &CurBB = MIRBuilder.getMBB();
+ for (const BasicBlock *Succ : BrInst.successors())
+ CurBB.addSuccessor(&getMBB(*Succ));
return true;
}
@@ -240,47 +299,38 @@ bool IRTranslator::translateSwitch(const User &U,
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
- if (!TPC->isGlobalISelAbortEnabled() && LI.isAtomic())
- return false;
-
- assert(!LI.isAtomic() && "only non-atomic loads are supported at the moment");
auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOLoad;
unsigned Res = getOrCreateVReg(LI);
unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
- LLT VTy{*LI.getType(), *DL}, PTy{*LI.getPointerOperand()->getType(), *DL};
+
MIRBuilder.buildLoad(
Res, Addr,
*MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
Flags, DL->getTypeStoreSize(LI.getType()),
- getMemOpAlignment(LI)));
+ getMemOpAlignment(LI), AAMDNodes(), nullptr,
+ LI.getSynchScope(), LI.getOrdering()));
return true;
}
bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
const StoreInst &SI = cast<StoreInst>(U);
-
- if (!TPC->isGlobalISelAbortEnabled() && SI.isAtomic())
- return false;
-
- assert(!SI.isAtomic() && "only non-atomic stores supported at the moment");
auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
: MachineMemOperand::MONone;
Flags |= MachineMemOperand::MOStore;
unsigned Val = getOrCreateVReg(*SI.getValueOperand());
unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
- LLT VTy{*SI.getValueOperand()->getType(), *DL},
- PTy{*SI.getPointerOperand()->getType(), *DL};
MIRBuilder.buildStore(
Val, Addr,
*MF->getMachineMemOperand(
MachinePointerInfo(SI.getPointerOperand()), Flags,
DL->getTypeStoreSize(SI.getValueOperand()->getType()),
- getMemOpAlignment(SI)));
+ getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSynchScope(),
+ SI.getOrdering()));
return true;
}
@@ -305,7 +355,7 @@ bool IRTranslator::translateExtractValue(const User &U,
uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildExtract(Res, Offset, getOrCreateVReg(*Src));
+ MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset);
return true;
}
@@ -348,12 +398,18 @@ bool IRTranslator::translateSelect(const User &U,
bool IRTranslator::translateBitCast(const User &U,
MachineIRBuilder &MIRBuilder) {
- if (LLT{*U.getOperand(0)->getType(), *DL} == LLT{*U.getType(), *DL}) {
+ // If we're bitcasting to the source type, we can reuse the source vreg.
+ if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
+ getLLTForType(*U.getType(), *DL)) {
+ // Get the source vreg now, to avoid invalidating ValToVReg.
+ unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
unsigned &Reg = ValToVReg[&U];
+ // If we already assigned a vreg for this bitcast, we can't change that.
+ // Emit a copy to satisfy the users we already emitted.
if (Reg)
- MIRBuilder.buildCopy(Reg, getOrCreateVReg(*U.getOperand(0)));
+ MIRBuilder.buildCopy(Reg, SrcReg);
else
- Reg = getOrCreateVReg(*U.getOperand(0));
+ Reg = SrcReg;
return true;
}
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
@@ -375,9 +431,10 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Value &Op0 = *U.getOperand(0);
unsigned BaseReg = getOrCreateVReg(Op0);
- LLT PtrTy{*Op0.getType(), *DL};
- unsigned PtrSize = DL->getPointerSizeInBits(PtrTy.getAddressSpace());
- LLT OffsetTy = LLT::scalar(PtrSize);
+ Type *PtrIRTy = Op0.getType();
+ LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
int64_t Offset = 0;
for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
@@ -399,8 +456,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
if (Offset != 0) {
unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ unsigned OffsetReg =
+ getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
BaseReg = NewBaseReg;
@@ -408,8 +465,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
// N = N + Idx * ElementSize;
- unsigned ElementSizeReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(ElementSizeReg, ElementSize);
+ unsigned ElementSizeReg =
+ getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
unsigned IdxReg = getOrCreateVReg(*Idx);
if (MRI->getType(IdxReg) != OffsetTy) {
@@ -428,8 +485,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildConstant(OffsetReg, Offset);
+ unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
return true;
}
@@ -438,13 +494,12 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return true;
}
-bool IRTranslator::translateMemcpy(const CallInst &CI,
- MachineIRBuilder &MIRBuilder) {
- LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL};
- if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() !=
- 0 ||
- cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() !=
- 0 ||
+bool IRTranslator::translateMemfunc(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder,
+ unsigned ID) {
+ LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
+ Type *DstTy = CI.getArgOperand(0)->getType();
+ if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
return false;
@@ -454,14 +509,32 @@ bool IRTranslator::translateMemcpy(const CallInst &CI,
Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
}
- MachineOperand Callee = MachineOperand::CreateES("memcpy");
+ const char *Callee;
+ switch (ID) {
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy: {
+ Type *SrcTy = CI.getArgOperand(1)->getType();
+ if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
+ return false;
+ Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
+ break;
+ }
+ case Intrinsic::memset:
+ Callee = "memset";
+ break;
+ default:
+ return false;
+ }
- return CLI->lowerCall(MIRBuilder, Callee,
+ return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
+ MachineOperand::CreateES(Callee),
CallLowering::ArgInfo(0, CI.getType()), Args);
}
void IRTranslator::getStackGuard(unsigned DstReg,
MachineIRBuilder &MIRBuilder) {
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
MIB.addDef(DstReg);
@@ -482,7 +555,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
- LLT Ty{*CI.getOperand(0)->getType(), *DL};
+ LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL);
LLT s1 = LLT::scalar(1);
unsigned Width = Ty.getSizeInBits();
unsigned Res = MRI->createGenericVirtualRegister(Ty);
@@ -494,8 +567,8 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
.addUse(getOrCreateVReg(*CI.getOperand(1)));
if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
- unsigned Zero = MRI->createGenericVirtualRegister(s1);
- EntryBuilder.buildConstant(Zero, 0);
+ unsigned Zero = getOrCreateVReg(
+ *Constant::getNullValue(Type::getInt1Ty(CI.getContext())));
MIB.addUse(Zero);
}
@@ -508,12 +581,83 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
switch (ID) {
default:
break;
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- // FIXME: these obviously need to be supported properly.
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // Stack coloring is not enabled in O0 (which we care about now) so we can
+ // drop these. Make sure someone notices when we start compiling at higher
+ // opts though.
+ if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
+ return false;
+ return true;
+ case Intrinsic::dbg_declare: {
+ const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
+ assert(DI.getVariable() && "Missing variable");
+
+ const Value *Address = DI.getAddress();
+ if (!Address || isa<UndefValue>(Address)) {
+ DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+ return true;
+ }
+
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ auto AI = dyn_cast<AllocaInst>(Address);
+ if (AI && AI->isStaticAlloca()) {
+ // Static allocas are tracked at the MF level, no need for DBG_VALUE
+ // instructions (in fact, they get ignored if they *do* exist).
+ MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
+ getOrCreateFrameIndex(*AI), DI.getDebugLoc());
+ } else
+ MIRBuilder.buildDirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
+ return true;
+ }
+ case Intrinsic::vaend:
+ // No target I know of cares about va_end. Certainly no in-tree target
+ // does. Simplest intrinsic ever!
return true;
+ case Intrinsic::vastart: {
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ Value *Ptr = CI.getArgOperand(0);
+ unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+
+ MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
+ .addUse(getOrCreateVReg(*Ptr))
+ .addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
+ return true;
+ }
+ case Intrinsic::dbg_value: {
+ // This form of DBG_VALUE is target-independent.
+ const DbgValueInst &DI = cast<DbgValueInst>(CI);
+ const Value *V = DI.getValue();
+ assert(DI.getVariable()->isValidLocationForIntrinsic(
+ MIRBuilder.getDebugLoc()) &&
+ "Expected inlined-at fields to agree");
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ MIRBuilder.buildIndirectDbgValue(0, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ } else if (const auto *CI = dyn_cast<Constant>(V)) {
+ MIRBuilder.buildConstDbgValue(*CI, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ } else {
+ unsigned Reg = getOrCreateVReg(*V);
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first palce, but it seems
+ // pretty baked in right now.
+ if (DI.getOffset() != 0)
+ MIRBuilder.buildIndirectDbgValue(Reg, DI.getOffset(), DI.getVariable(),
+ DI.getExpression());
+ else
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(),
+ DI.getExpression());
+ }
+ return true;
+ }
case Intrinsic::uadd_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
case Intrinsic::sadd_with_overflow:
@@ -526,8 +670,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+ case Intrinsic::pow:
+ MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
+ .addDef(getOrCreateVReg(CI))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
+ .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
+ return true;
case Intrinsic::memcpy:
- return translateMemcpy(CI, MIRBuilder);
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ return translateMemfunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
unsigned Reg = getOrCreateVReg(CI);
@@ -546,7 +698,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
case Intrinsic::stackprotector: {
- LLT PtrTy{*CI.getArgOperand(0)->getType(), *DL};
+ LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
getStackGuard(GuardVal, MIRBuilder);
@@ -564,18 +716,41 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return false;
}
+bool IRTranslator::translateInlineAsm(const CallInst &CI,
+ MachineIRBuilder &MIRBuilder) {
+ const InlineAsm &IA = cast<InlineAsm>(*CI.getCalledValue());
+ if (!IA.getConstraintString().empty())
+ return false;
+
+ unsigned ExtraInfo = 0;
+ if (IA.hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA.getDialect() == InlineAsm::AD_Intel)
+ ExtraInfo |= InlineAsm::Extra_AsmDialect;
+
+ MIRBuilder.buildInstr(TargetOpcode::INLINEASM)
+ .addExternalSymbol(IA.getAsmString().c_str())
+ .addImm(ExtraInfo);
+
+ return true;
+}
+
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
const Function *F = CI.getCalledFunction();
+ if (CI.isInlineAsm())
+ return translateInlineAsm(CI, MIRBuilder);
+
if (!F || !F->isIntrinsic()) {
unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
SmallVector<unsigned, 8> Args;
for (auto &Arg: CI.arg_operands())
Args.push_back(getOrCreateVReg(*Arg));
- return CLI->lowerCall(MIRBuilder, CI, Res, Args, [&]() {
+ MF->getFrameInfo().setHasCalls(true);
+ return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
return getOrCreateVReg(*CI.getCalledValue());
});
}
@@ -594,10 +769,10 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
for (auto &Arg : CI.arg_operands()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg))
- MIB.addImm(CI->getSExtValue());
- else
- MIB.addUse(getOrCreateVReg(*Arg));
+ // Some intrinsics take metadata parameters. Reject them.
+ if (isa<MetadataAsValue>(Arg))
+ return false;
+ MIB.addUse(getOrCreateVReg(*Arg));
}
return true;
}
@@ -610,7 +785,7 @@ bool IRTranslator::translateInvoke(const User &U,
const BasicBlock *ReturnBB = I.getSuccessor(0);
const BasicBlock *EHPadBB = I.getSuccessor(1);
- const Value *Callee(I.getCalledValue());
+ const Value *Callee = I.getCalledValue();
const Function *Fn = dyn_cast<Function>(Callee);
if (isa<InlineAsm>(Callee))
return false;
@@ -634,23 +809,24 @@ bool IRTranslator::translateInvoke(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
- SmallVector<CallLowering::ArgInfo, 8> Args;
+ SmallVector<unsigned, 8> Args;
for (auto &Arg: I.arg_operands())
- Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+ Args.push_back(getOrCreateVReg(*Arg));
- if (!CLI->lowerCall(MIRBuilder, MachineOperand::CreateGA(Fn, 0),
- CallLowering::ArgInfo(Res, I.getType()), Args))
+ if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
+ [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
return false;
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
// FIXME: track probabilities.
- MachineBasicBlock &EHPadMBB = getOrCreateBB(*EHPadBB),
- &ReturnMBB = getOrCreateBB(*ReturnBB);
+ MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
+ &ReturnMBB = getMBB(*ReturnBB);
MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
+ MIRBuilder.buildBr(ReturnMBB);
return true;
}
@@ -684,37 +860,158 @@ bool IRTranslator::translateLandingPad(const User &U,
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
.addSym(MF->addLandingPad(&MBB));
+ LLT Ty = getLLTForType(*LP.getType(), *DL);
+ unsigned Undef = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Undef);
+
+ SmallVector<LLT, 2> Tys;
+ for (Type *Ty : cast<StructType>(LP.getType())->elements())
+ Tys.push_back(getLLTForType(*Ty, *DL));
+ assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
+
// Mark exception register as live in.
- SmallVector<unsigned, 2> Regs;
- SmallVector<uint64_t, 2> Offsets;
- LLT p0 = LLT::pointer(0, DL->getPointerSizeInBits());
- if (unsigned Reg = TLI.getExceptionPointerRegister(PersonalityFn)) {
- unsigned VReg = MRI->createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(VReg, Reg);
- Regs.push_back(VReg);
- Offsets.push_back(0);
+ unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+ if (!ExceptionReg)
+ return false;
+
+ MBB.addLiveIn(ExceptionReg);
+ unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]),
+ Tmp = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(VReg, ExceptionReg);
+ MIRBuilder.buildInsert(Tmp, Undef, VReg, 0);
+
+ unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+ if (!SelectorReg)
+ return false;
+
+ MBB.addLiveIn(SelectorReg);
+
+ // N.b. the exception selector register always has pointer type and may not
+ // match the actual IR-level type in the landingpad so an extra cast is
+ // needed.
+ unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
+ MIRBuilder.buildCopy(PtrVReg, SelectorReg);
+
+ VReg = MRI->createGenericVirtualRegister(Tys[1]);
+ MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg);
+ MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg,
+ Tys[0].getSizeInBits());
+ return true;
+}
+
+bool IRTranslator::translateAlloca(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ auto &AI = cast<AllocaInst>(U);
+
+ if (AI.isStaticAlloca()) {
+ unsigned Res = getOrCreateVReg(AI);
+ int FI = getOrCreateFrameIndex(AI);
+ MIRBuilder.buildFrameIndex(Res, FI);
+ return true;
+ }
+
+ // Now we're in the harder dynamic case.
+ Type *Ty = AI.getAllocatedType();
+ unsigned Align =
+ std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
+
+ unsigned NumElts = getOrCreateVReg(*AI.getArraySize());
+
+ Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
+ LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
+ if (MRI->getType(NumElts) != IntPtrTy) {
+ unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
+ NumElts = ExtElts;
}
- if (unsigned Reg = TLI.getExceptionSelectorRegister(PersonalityFn)) {
- unsigned VReg = MRI->createGenericVirtualRegister(p0);
- MIRBuilder.buildCopy(VReg, Reg);
- Regs.push_back(VReg);
- Offsets.push_back(p0.getSizeInBits());
+ unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
+ unsigned TySize =
+ getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
+ MIRBuilder.buildMul(AllocSize, NumElts, TySize);
+
+ LLT PtrTy = getLLTForType(*AI.getType(), *DL);
+ auto &TLI = *MF->getSubtarget().getTargetLowering();
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+
+ unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildCopy(SPTmp, SPReg);
+
+ unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
+
+ // Handle alignment. We have to realign if the allocation granule was smaller
+ // than stack alignment, or the specific alloca requires more than stack
+ // alignment.
+ unsigned StackAlign =
+ MF->getSubtarget().getFrameLowering()->getStackAlignment();
+ Align = std::max(Align, StackAlign);
+ if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) {
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
+ AllocTmp = AlignedAlloc;
}
- MIRBuilder.buildSequence(getOrCreateVReg(LP), Regs, Offsets);
+ MIRBuilder.buildCopy(SPReg, AllocTmp);
+ MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
+
+ MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
+ assert(MF->getFrameInfo().hasVarSizedObjects());
return true;
}
-bool IRTranslator::translateStaticAlloca(const AllocaInst &AI,
- MachineIRBuilder &MIRBuilder) {
- if (!TPC->isGlobalISelAbortEnabled() && !AI.isStaticAlloca())
- return false;
+bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
+ // FIXME: We may need more info about the type. Because of how LLT works,
+ // we're completely discarding the i64/double distinction here (amongst
+ // others). Fortunately the ABIs I know of where that matters don't use va_arg
+ // anyway but that's not guaranteed.
+ MIRBuilder.buildInstr(TargetOpcode::G_VAARG)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(0)))
+ .addImm(DL->getABITypeAlignment(U.getType()));
+ return true;
+}
- assert(AI.isStaticAlloca() && "only handle static allocas now");
- unsigned Res = getOrCreateVReg(AI);
- int FI = getOrCreateFrameIndex(AI);
- MIRBuilder.buildFrameIndex(Res, FI);
+bool IRTranslator::translateInsertElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (U.getType()->getVectorNumElements() == 1) {
+ unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+ ValToVReg[&U] = Elt;
+ return true;
+ }
+ MIRBuilder.buildInsertVectorElement(
+ getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
+ getOrCreateVReg(*U.getOperand(1)), getOrCreateVReg(*U.getOperand(2)));
+ return true;
+}
+
+bool IRTranslator::translateExtractElement(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // If it is a <1 x Ty> vector, use the scalar as it is
+ // not a legal vector type in LLT.
+ if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
+ unsigned Elt = getOrCreateVReg(*U.getOperand(0));
+ ValToVReg[&U] = Elt;
+ return true;
+ }
+ MIRBuilder.buildExtractVectorElement(getOrCreateVReg(U),
+ getOrCreateVReg(*U.getOperand(0)),
+ getOrCreateVReg(*U.getOperand(1)));
+ return true;
+}
+
+bool IRTranslator::translateShuffleVector(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
+ .addDef(getOrCreateVReg(U))
+ .addUse(getOrCreateVReg(*U.getOperand(0)))
+ .addUse(getOrCreateVReg(*U.getOperand(1)))
+ .addUse(getOrCreateVReg(*U.getOperand(2)));
return true;
}
@@ -736,11 +1033,21 @@ void IRTranslator::finishPendingPhis() {
// won't create extra control flow here, otherwise we need to find the
// dominating predecessor here (or perhaps force the weirder IRTranslators
// to provide a simple boundary).
+ SmallSet<const BasicBlock *, 4> HandledPreds;
+
for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
- assert(BBToMBB[PI->getIncomingBlock(i)]->isSuccessor(MIB->getParent()) &&
- "I appear to have misunderstood Machine PHIs");
- MIB.addUse(getOrCreateVReg(*PI->getIncomingValue(i)));
- MIB.addMBB(BBToMBB[PI->getIncomingBlock(i)]);
+ auto IRPred = PI->getIncomingBlock(i);
+ if (HandledPreds.count(IRPred))
+ continue;
+
+ HandledPreds.insert(IRPred);
+ unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i));
+ for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
+ assert(Pred->isSuccessor(MIB->getParent()) &&
+ "incorrect CFG at MachineBasicBlock level");
+ MIB.addUse(ValReg);
+ MIB.addMBB(Pred);
+ }
}
}
}
@@ -752,9 +1059,7 @@ bool IRTranslator::translate(const Instruction &Inst) {
case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
#include "llvm/IR/Instruction.def"
default:
- if (!TPC->isGlobalISelAbortEnabled())
- return false;
- llvm_unreachable("unknown opcode");
+ return false;
}
}
@@ -764,25 +1069,43 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
else if (auto CF = dyn_cast<ConstantFP>(&C))
EntryBuilder.buildFConstant(Reg, *CF);
else if (isa<UndefValue>(C))
- EntryBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Reg);
+ EntryBuilder.buildUndef(Reg);
else if (isa<ConstantPointerNull>(C))
EntryBuilder.buildConstant(Reg, 0);
else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder.buildGlobalValue(Reg, GV);
- else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
+ else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
+ if (!CAZ->getType()->isVectorTy())
+ return false;
+ // Return the scalar if it is a <1 x Ty> vector.
+ if (CAZ->getNumElements() == 1)
+ return translate(*CAZ->getElementValue(0u), Reg);
+ std::vector<unsigned> Ops;
+ for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
+ Constant &Elt = *CAZ->getElementValue(i);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
+ // Return the scalar if it is a <1 x Ty> vector.
+ if (CV->getNumElements() == 1)
+ return translate(*CV->getElementAsConstant(0), Reg);
+ std::vector<unsigned> Ops;
+ for (unsigned i = 0; i < CV->getNumElements(); ++i) {
+ Constant &Elt = *CV->getElementAsConstant(i);
+ Ops.push_back(getOrCreateVReg(Elt));
+ }
+ EntryBuilder.buildMerge(Reg, Ops);
+ } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
switch(CE->getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
#include "llvm/IR/Instruction.def"
default:
- if (!TPC->isGlobalISelAbortEnabled())
- return false;
- llvm_unreachable("unknown opcode");
+ return false;
}
- } else if (!TPC->isGlobalISelAbortEnabled())
+ } else
return false;
- else
- llvm_unreachable("unhandled constant kind");
return true;
}
@@ -793,7 +1116,7 @@ void IRTranslator::finalizeFunction() {
PendingPHIs.clear();
ValToVReg.clear();
FrameIndices.clear();
- Constants.clear();
+ MachinePreds.clear();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -807,85 +1130,101 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
TPC = &getAnalysis<TargetPassConfig>();
+ ORE = make_unique<OptimizationRemarkEmitter>(&F);
assert(PendingPHIs.empty() && "stale PHIs");
- // Setup a separate basic-block for the arguments and constants, falling
- // through to the IR-level Function's entry block.
+ // Release the per-function state when we return, whether we succeeded or not.
+ auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
+
+ // Setup a separate basic-block for the arguments and constants
MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
MF->push_back(EntryBB);
- EntryBB->addSuccessor(&getOrCreateBB(F.front()));
EntryBuilder.setMBB(*EntryBB);
+ // Create all blocks, in IR order, to preserve the layout.
+ for (const BasicBlock &BB: F) {
+ auto *&MBB = BBToMBB[&BB];
+
+ MBB = MF->CreateMachineBasicBlock(&BB);
+ MF->push_back(MBB);
+
+ if (BB.hasAddressTaken())
+ MBB->setHasAddressTaken();
+ }
+
+ // Make our arguments/constants entry block fallthrough to the IR entry block.
+ EntryBB->addSuccessor(&getMBB(F.front()));
+
// Lower the actual args into this basic block.
SmallVector<unsigned, 8> VRegArgs;
for (const Argument &Arg: F.args())
VRegArgs.push_back(getOrCreateVReg(Arg));
- bool Succeeded = CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs);
- if (!Succeeded) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- finalizeFunction();
- return false;
- }
- report_fatal_error("Unable to lower arguments");
+ if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ MF->getFunction()->getSubprogram(),
+ &MF->getFunction()->getEntryBlock());
+ R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
}
// And translate the function!
for (const BasicBlock &BB: F) {
- MachineBasicBlock &MBB = getOrCreateBB(BB);
+ MachineBasicBlock &MBB = getMBB(BB);
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder.setMBB(MBB);
for (const Instruction &Inst: BB) {
- Succeeded &= translate(Inst);
- if (!Succeeded) {
- if (TPC->isGlobalISelAbortEnabled())
- reportTranslationError(Inst, "unable to translate instruction");
- MF->getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- break;
- }
- }
- }
-
- if (Succeeded) {
- finishPendingPhis();
-
- // Now that the MachineFrameInfo has been configured, no further changes to
- // the reserved registers are possible.
- MRI->freezeReservedRegs(*MF);
-
- // Merge the argument lowering and constants block with its single
- // successor, the LLVM-IR entry block. We want the basic block to
- // be maximal.
- assert(EntryBB->succ_size() == 1 &&
- "Custom BB used for lowering should have only one successor");
- // Get the successor of the current entry block.
- MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
- assert(NewEntryBB.pred_size() == 1 &&
- "LLVM-IR entry block has a predecessor!?");
- // Move all the instruction from the current entry block to the
- // new entry block.
- NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
- EntryBB->end());
-
- // Update the live-in information for the new entry block.
- for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
- NewEntryBB.addLiveIn(LiveIn);
- NewEntryBB.sortUniqueLiveIns();
+ if (translate(Inst))
+ continue;
- // Get rid of the now empty basic block.
- EntryBB->removeSuccessor(&NewEntryBB);
- MF->remove(EntryBB);
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << Inst;
- assert(&MF->front() == &NewEntryBB &&
- "New entry wasn't next in the list of basic block!");
+ OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
+ Inst.getDebugLoc(), &BB);
+ R << "unable to translate instruction: " << ore::NV("Opcode", &Inst)
+ << ": '" << InstStr.str() << "'";
+ reportTranslationError(*MF, *TPC, *ORE, R);
+ return false;
+ }
}
- finalizeFunction();
+ finishPendingPhis();
+
+ // Now that the MachineFrameInfo has been configured, no further changes to
+ // the reserved registers are possible.
+ MRI->freezeReservedRegs(*MF);
+
+ // Merge the argument lowering and constants block with its single
+ // successor, the LLVM-IR entry block. We want the basic block to
+ // be maximal.
+ assert(EntryBB->succ_size() == 1 &&
+ "Custom BB used for lowering should have only one successor");
+ // Get the successor of the current entry block.
+ MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
+ assert(NewEntryBB.pred_size() == 1 &&
+ "LLVM-IR entry block has a predecessor!?");
+ // Move all the instruction from the current entry block to the
+ // new entry block.
+ NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
+ EntryBB->end());
+
+ // Update the live-in information for the new entry block.
+ for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
+ NewEntryBB.addLiveIn(LiveIn);
+ NewEntryBB.sortUniqueLiveIns();
+
+ // Get rid of the now empty basic block.
+ EntryBB->removeSuccessor(&NewEntryBB);
+ MF->remove(EntryBB);
+ MF->DeleteMachineBasicBlock(EntryBB);
+
+ assert(&MF->front() == &NewEntryBB &&
+ "New entry wasn't next in the list of basic block!");
return false;
}
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 1d205cd6c9c8..26454c1ef00f 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,11 +12,15 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -44,17 +48,14 @@ void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-static void reportSelectionError(const MachineInstr *MI, const Twine &Message) {
- const MachineFunction &MF = *MI->getParent()->getParent();
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << Message << ":\nIn function: " << MF.getName() << '\n';
- if (MI)
- Err << *MI << '\n';
- report_fatal_error(Err.str());
-}
-
bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // No matter what happens, whether we successfully select the function or not,
+ // nothing is going to use the vreg types after us. Make sure they disappear.
+ auto ClearVRegTypesOnReturn =
+ make_scope_exit([&]() { MRI.getVRegToType().clear(); });
+
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
@@ -66,11 +67,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
assert(ISel && "Cannot work without InstructionSelector");
+ // An optimization remark emitter. Used to report failures.
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
// FIXME: freezeReservedRegs is now done in IRTranslator, but there are many
// other MF/MFI fields we need to initialize.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
#ifndef NDEBUG
// Check that our input is fully legal: we require the function to have the
// Legalized property, so it should be.
@@ -80,17 +82,19 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// that it has the same layering problem, but we only use inline methods so
// end up not needing to link against the GlobalISel library.
if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo())
- for (const MachineBasicBlock &MBB : MF)
- for (const MachineInstr &MI : MBB)
- if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
- reportSelectionError(&MI, "Instruction is not legal");
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
+ if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "instruction is not legal", MI);
+ return false;
+ }
#endif
// FIXME: We could introduce new blocks and will need to fix the outer loop.
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
- bool Failed = false;
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
continue;
@@ -115,14 +119,19 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Selecting: \n " << MI);
+ // We could have folded this instruction away already, making it dead.
+ // If so, erase it.
+ if (isTriviallyDead(MI, MRI)) {
+ DEBUG(dbgs() << "Is dead; erasing.\n");
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ continue;
+ }
+
if (!ISel->select(MI)) {
- if (TPC.isGlobalISelAbortEnabled())
- // FIXME: It would be nice to dump all inserted instructions. It's
- // not
- // obvious how, esp. considering select() can insert after MI.
- reportSelectionError(&MI, "Cannot select");
- Failed = true;
- break;
+ // FIXME: It would be nice to dump all inserted instructions. It's
+ // not obvious how, esp. considering select() can insert after MI.
+ reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
+ return false;
}
// Dump the range of instructions that MI expanded into.
@@ -142,33 +151,36 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
for (auto &VRegToType : MRI.getVRegToType()) {
unsigned VReg = VRegToType.first;
auto *RC = MRI.getRegClassOrNull(VReg);
- auto *MI = MRI.def_instr_begin(VReg) == MRI.def_instr_end()
- ? nullptr
- : &*MRI.def_instr_begin(VReg);
- if (!RC) {
- if (TPC.isGlobalISelAbortEnabled())
- reportSelectionError(MI, "VReg as no regclass after selection");
- Failed = true;
- break;
- }
+ MachineInstr *MI = nullptr;
+ if (!MRI.def_empty(VReg))
+ MI = &*MRI.def_instr_begin(VReg);
+ else if (!MRI.use_empty(VReg))
+ MI = &*MRI.use_instr_begin(VReg);
+
+ if (MI && !RC) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "VReg has no regclass after selection", *MI);
+ return false;
+ } else if (!RC)
+ continue;
if (VRegToType.second.isValid() &&
VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) {
- if (TPC.isGlobalISelAbortEnabled())
- reportSelectionError(
- MI, "VReg has explicit size different from class size");
- Failed = true;
- break;
+ reportGISelFailure(MF, TPC, MORE, "gisel-select",
+ "VReg has explicit size different from class size",
+ *MI);
+ return false;
}
}
- MRI.getVRegToType().clear();
-
- if (!TPC.isGlobalISelAbortEnabled() && (Failed || MF.size() != NumBlocks)) {
- MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ if (MF.size() != NumBlocks) {
+ MachineOptimizationRemarkMissed R("gisel-select", "GISelFailure",
+ MF.getFunction()->getSubprogram(),
+ /*MBB=*/nullptr);
+ R << "inserting blocks is not supported yet";
+ reportGISelFailure(MF, TPC, MORE, R);
return false;
}
- assert(MF.size() == NumBlocks && "Inserting blocks is not supported yet");
// FIXME: Should we accurately track changes?
return true;
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 5c34da0dc557..fb9d01ef8542 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -14,6 +14,8 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -55,6 +57,45 @@ bool InstructionSelector::constrainSelectedInstRegOperands(
// constrainOperandRegClass does that for us.
MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
Reg, OpI));
+
+ // Tie uses to defs as indicated in MCInstrDesc.
+ if (MO.isUse()) {
+ int DefIdx = I.getDesc().getOperandConstraint(OpI, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ I.tieOperands(DefIdx, OpI);
+ }
}
return true;
}
+
+Optional<int64_t>
+InstructionSelector::getConstantVRegVal(unsigned VReg,
+ const MachineRegisterInfo &MRI) const {
+ MachineInstr *MI = MRI.getVRegDef(VReg);
+ if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return None;
+
+ if (MI->getOperand(1).isImm())
+ return MI->getOperand(1).getImm();
+
+ if (MI->getOperand(1).isCImm() &&
+ MI->getOperand(1).getCImm()->getBitWidth() <= 64)
+ return MI->getOperand(1).getCImm()->getSExtValue();
+
+ return None;
+}
+
+bool InstructionSelector::isOperandImmEqual(
+ const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const {
+
+ if (MO.getReg())
+ if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
+ return *VRegVal == Value;
+ return false;
+}
+
+bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const {
+ return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
+ MI.implicit_operands().begin() == MI.implicit_operands().end();
+}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index e86356880e99..657ddb307919 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -16,6 +16,8 @@
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@@ -92,10 +94,7 @@ bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
"unexpected physical register in G_SEQUENCE");
// Finally we can replace the uses.
- for (auto &Use : MRI.use_operands(ExtractReg)) {
- Changed = true;
- Use.setReg(OrigReg);
- }
+ MRI.replaceRegWith(ExtractReg, OrigReg);
}
if (AllDefsReplaced) {
@@ -114,6 +113,36 @@ bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
return Changed;
}
+bool Legalizer::combineMerges(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const TargetInstrInfo &TII) {
+ if (MI.getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
+ return false;
+
+ unsigned NumDefs = MI.getNumOperands() - 1;
+ unsigned SrcReg = MI.getOperand(NumDefs).getReg();
+ MachineInstr &MergeI = *MRI.def_instr_begin(SrcReg);
+ if (MergeI.getOpcode() != TargetOpcode::G_MERGE_VALUES)
+ return false;
+
+ if (MergeI.getNumOperands() - 1 != NumDefs)
+ return false;
+
+ // FIXME: is a COPY appropriate if the types mismatch? We know both registers
+ // are allocatable by now.
+ if (MRI.getType(MI.getOperand(0).getReg()) !=
+ MRI.getType(MergeI.getOperand(1).getReg()))
+ return false;
+
+ for (unsigned Idx = 0; Idx < NumDefs; ++Idx)
+ MRI.replaceRegWith(MI.getOperand(Idx).getReg(),
+ MergeI.getOperand(Idx + 1).getReg());
+
+ MI.eraseFromParent();
+ if (MRI.use_empty(MergeI.getOperand(0).getReg()))
+ MergeI.eraseFromParent();
+ return true;
+}
+
bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
@@ -122,7 +151,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
init(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
- const LegalizerInfo &LegalizerInfo = *MF.getSubtarget().getLegalizerInfo();
+ MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
LegalizerHelper Helper(MF);
// FIXME: an instruction may need more than one pass before it is legal. For
@@ -142,27 +171,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// and are assumed to be legal.
if (!isPreISelGenericOpcode(MI->getOpcode()))
continue;
-
- auto Res = Helper.legalizeInstr(*MI, LegalizerInfo);
-
- // Error out if we couldn't legalize this instruction. We may want to fall
- // back to DAG ISel instead in the future.
- if (Res == LegalizerHelper::UnableToLegalize) {
- if (!TPC.isGlobalISelAbortEnabled()) {
- MF.getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return false;
+ SmallVector<MachineInstr *, 4> WorkList;
+ Helper.MIRBuilder.recordInsertions(
+ [&](MachineInstr *MI) { WorkList.push_back(MI); });
+ WorkList.push_back(&*MI);
+
+ LegalizerHelper::LegalizeResult Res;
+ unsigned Idx = 0;
+ do {
+ Res = Helper.legalizeInstrStep(*WorkList[Idx]);
+ // Error out if we couldn't legalize this instruction. We may want to
+ // fall
+ // back to DAG ISel instead in the future.
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ Helper.MIRBuilder.stopRecordingInsertions();
+ if (Res == LegalizerHelper::UnableToLegalize) {
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction",
+ *WorkList[Idx]);
+ return false;
+ }
}
- std::string Msg;
- raw_string_ostream OS(Msg);
- OS << "unable to legalize instruction: ";
- MI->print(OS);
- report_fatal_error(OS.str());
- }
-
- Changed |= Res == LegalizerHelper::Legalized;
- }
+ Changed |= Res == LegalizerHelper::Legalized;
+ ++Idx;
+ } while (Idx < WorkList.size());
+ Helper.MIRBuilder.stopRecordingInsertions();
+ }
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
@@ -173,6 +208,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
NextMI = std::next(MI);
Changed |= combineExtracts(*MI, MRI, TII);
+ Changed |= combineMerges(*MI, MRI, TII);
}
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index eb25b6ca268f..20358f7ee6c2 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -29,14 +29,13 @@
using namespace llvm;
LegalizerHelper::LegalizerHelper(MachineFunction &MF)
- : MRI(MF.getRegInfo()) {
+ : MRI(MF.getRegInfo()), LI(*MF.getSubtarget().getLegalizerInfo()) {
MIRBuilder.setMF(MF);
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
- const LegalizerInfo &LegalizerInfo) {
- auto Action = LegalizerInfo.getAction(MI, MRI);
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
+ auto Action = LI.getAction(MI, MRI);
switch (std::get<0>(Action)) {
case LegalizerInfo::Legal:
return AlreadyLegal;
@@ -50,46 +49,32 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
return lower(MI, std::get<1>(Action), std::get<2>(Action));
case LegalizerInfo::FewerElements:
return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
+ case LegalizerInfo::Custom:
+ return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized
+ : UnableToLegalize;
default:
return UnableToLegalize;
}
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::legalizeInstr(MachineInstr &MI,
- const LegalizerInfo &LegalizerInfo) {
- SmallVector<MachineInstr *, 4> WorkList;
- MIRBuilder.recordInsertions(
- [&](MachineInstr *MI) { WorkList.push_back(MI); });
- WorkList.push_back(&MI);
-
- bool Changed = false;
- LegalizeResult Res;
- unsigned Idx = 0;
- do {
- Res = legalizeInstrStep(*WorkList[Idx], LegalizerInfo);
- if (Res == UnableToLegalize) {
- MIRBuilder.stopRecordingInsertions();
- return UnableToLegalize;
- }
- Changed |= Res == Legalized;
- ++Idx;
- } while (Idx < WorkList.size());
-
- MIRBuilder.stopRecordingInsertions();
-
- return Changed ? Legalized : AlreadyLegal;
-}
-
void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
SmallVectorImpl<unsigned> &VRegs) {
- unsigned Size = Ty.getSizeInBits();
- SmallVector<uint64_t, 4> Indexes;
- for (int i = 0; i < NumParts; ++i) {
+ for (int i = 0; i < NumParts; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
- Indexes.push_back(i * Size);
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+}
+
+static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
+ switch (Opcode) {
+ case TargetOpcode::G_FADD:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
+ case TargetOpcode::G_FREM:
+ return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
+ case TargetOpcode::G_FPOW:
+ return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
}
- MIRBuilder.buildExtract(VRegs, Indexes, Reg);
+ llvm_unreachable("Unknown libcall function");
}
LegalizerHelper::LegalizeResult
@@ -101,17 +86,19 @@ LegalizerHelper::libcall(MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FPOW:
case TargetOpcode::G_FREM: {
auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
- const char *Name =
- TLI.getLibcallName(Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32);
-
+ auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
+ const char *Name = TLI.getLibcallName(Libcall);
+ MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
CLI.lowerCall(
- MIRBuilder, MachineOperand::CreateES(Name),
- {MI.getOperand(0).getReg(), Ty},
+ MIRBuilder, TLI.getLibcallCallingConv(Libcall),
+ MachineOperand::CreateES(Name), {MI.getOperand(0).getReg(), Ty},
{{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}});
MI.eraseFromParent();
return Legalized;
@@ -125,19 +112,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// FIXME: Don't know how to handle secondary types yet.
if (TypeIdx != 0)
return UnableToLegalize;
+
+ MIRBuilder.setInstr(MI);
+
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
case TargetOpcode::G_ADD: {
// Expand in terms of carry-setting/consuming G_ADDE instructions.
- unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
NarrowTy.getSizeInBits();
- MIRBuilder.setInstr(MI);
-
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
@@ -152,11 +138,138 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Src2Regs[i], CarryIn);
DstRegs.push_back(DstReg);
- Indexes.push_back(i * NarrowSize);
CarryIn = CarryOut;
}
unsigned DstReg = MI.getOperand(0).getReg();
- MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_INSERT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ int64_t NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+
+ SmallVector<unsigned, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ unsigned OpReg = MI.getOperand(2).getReg();
+ int64_t OpStart = MI.getOperand(3).getImm();
+ int64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstStart = i * NarrowSize;
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is defined by this insert, forward the new
+ // value.
+ DstRegs.push_back(OpReg);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, InsertOffset, SegSize;
+ if (OpStart < DstStart) {
+ InsertOffset = 0;
+ ExtractOffset = DstStart - OpStart;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
+ } else {
+ InsertOffset = OpStart - DstStart;
+ ExtractOffset = 0;
+ SegSize =
+ std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+ }
+
+ unsigned SegReg = OpReg;
+ if (ExtractOffset != 0 || SegSize != OpSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
+ }
+
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+ DstRegs.push_back(DstReg);
+ }
+
+ assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_LOAD: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ LLT NarrowPtrTy = LLT::pointer(
+ MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize);
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ unsigned SrcReg = MRI.createGenericVirtualRegister(NarrowPtrTy);
+ unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64));
+
+ MIRBuilder.buildConstant(Offset, i * NarrowSize / 8);
+ MIRBuilder.buildGEP(SrcReg, MI.getOperand(1).getReg(), Offset);
+ // TODO: This is conservatively correct, but we probably want to split the
+ // memory operands in the future.
+ MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin());
+
+ DstRegs.push_back(DstReg);
+ }
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_STORE: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ LLT NarrowPtrTy = LLT::pointer(
+ MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize);
+
+ SmallVector<unsigned, 2> SrcRegs;
+ extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowPtrTy);
+ unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIRBuilder.buildConstant(Offset, i * NarrowSize / 8);
+ MIRBuilder.buildGEP(DstReg, MI.getOperand(1).getReg(), Offset);
+ // TODO: This is conservatively correct, but we probably want to split the
+ // memory operands in the future.
+ MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin());
+ }
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize;
+ const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+
+ SmallVector<unsigned, 2> DstRegs;
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ ConstantInt *CI =
+ ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
+ MIRBuilder.buildConstant(DstReg, *CI);
+ DstRegs.push_back(DstReg);
+ }
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
@@ -175,7 +288,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
- case TargetOpcode::G_SUB: {
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_SHL: {
// Perform operation at larger width (any extension is fine here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
@@ -195,10 +309,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV: {
- unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV
- ? TargetOpcode::G_SEXT
- : TargetOpcode::G_ZEXT;
+ case TargetOpcode::G_UDIV:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR: {
+ unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV ||
+ MI.getOpcode() == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SEXT
+ : TargetOpcode::G_ZEXT;
unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse(
@@ -218,6 +335,85 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SELECT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ // Perform operation at larger width (any extension is fine here, high bits
+ // don't affect the result) and then truncate the result back to the
+ // original type.
+ unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
+ unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(2).getReg());
+ MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(3).getReg());
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(TargetOpcode::G_SELECT)
+ .addDef(DstExt)
+ .addReg(MI.getOperand(1).getReg())
+ .addUse(Src1Ext)
+ .addUse(Src2Ext);
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FPTOSI:
+ case TargetOpcode::G_FPTOUI: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(DstExt)
+ .addUse(MI.getOperand(1).getReg());
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_SITOFP:
+ case TargetOpcode::G_UITOFP: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ unsigned Src = MI.getOperand(1).getReg();
+ unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+
+ if (MI.getOpcode() == TargetOpcode::G_SITOFP) {
+ MIRBuilder.buildSExt(SrcExt, Src);
+ } else {
+ assert(MI.getOpcode() == TargetOpcode::G_UITOFP && "Unexpected conv op");
+ MIRBuilder.buildZExt(SrcExt, Src);
+ }
+
+ MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(MI.getOperand(0).getReg())
+ .addUse(SrcExt);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_INSERT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned Src = MI.getOperand(1).getReg();
+ unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.buildAnyExt(SrcExt, Src);
+
+ unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ auto MIB = MIRBuilder.buildInsert(DstExt, SrcExt, MI.getOperand(2).getReg(),
+ MI.getOperand(3).getImm());
+ for (unsigned OpNum = 4; OpNum < MI.getNumOperands(); OpNum += 2) {
+ MIB.addReg(MI.getOperand(OpNum).getReg());
+ MIB.addImm(MI.getOperand(OpNum + 1).getImm());
+ }
+
+ MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_LOAD: {
assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
WideTy.getSizeInBits() &&
@@ -231,12 +427,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
}
case TargetOpcode::G_STORE: {
- assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
- WideTy.getSizeInBits() &&
- "illegal to increase number of bytes modified by a store");
+ if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
+ WideTy != LLT::scalar(8))
+ return UnableToLegalize;
+
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ auto Content = TLI.getBooleanContents(false, false);
+
+ unsigned ExtOp = TargetOpcode::G_ANYEXT;
+ if (Content == TargetLoweringBase::ZeroOrOneBooleanContent)
+ ExtOp = TargetOpcode::G_ZEXT;
+ else if (Content == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
+ ExtOp = TargetOpcode::G_SEXT;
+ else
+ ExtOp = TargetOpcode::G_ANYEXT;
unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
- MIRBuilder.buildAnyExt(SrcExt, MI.getOperand(0).getReg());
+ MIRBuilder.buildInstr(ExtOp).addDef(SrcExt).addUse(
+ MI.getOperand(0).getReg());
MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(),
**MI.memoperands_begin());
MI.eraseFromParent();
@@ -315,6 +523,83 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SMULO:
+ case TargetOpcode::G_UMULO: {
+ // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
+ // result.
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned Overflow = MI.getOperand(1).getReg();
+ unsigned LHS = MI.getOperand(2).getReg();
+ unsigned RHS = MI.getOperand(3).getReg();
+
+ MIRBuilder.buildMul(Res, LHS, RHS);
+
+ unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
+ ? TargetOpcode::G_SMULH
+ : TargetOpcode::G_UMULH;
+
+ unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInstr(Opcode)
+ .addDef(HiPart)
+ .addUse(LHS)
+ .addUse(RHS);
+
+ unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildConstant(Zero, 0);
+ MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FNEG: {
+ // TODO: Handle vector types once we are able to
+ // represent them.
+ if (Ty.isVector())
+ return UnableToLegalize;
+ unsigned Res = MI.getOperand(0).getReg();
+ Type *ZeroTy;
+ LLVMContext &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ ZeroTy = Type::getHalfTy(Ctx);
+ break;
+ case 32:
+ ZeroTy = Type::getFloatTy(Ctx);
+ break;
+ case 64:
+ ZeroTy = Type::getDoubleTy(Ctx);
+ break;
+ default:
+ llvm_unreachable("unexpected floating-point type");
+ }
+ ConstantFP &ZeroForNegation =
+ *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
+ unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildFConstant(Zero, ZeroForNegation);
+ MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
+ .addDef(Res)
+ .addUse(Zero)
+ .addUse(MI.getOperand(1).getReg());
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_FSUB: {
+ // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
+ // First, check if G_FNEG is marked as Lower. If so, we may
+ // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
+ if (LI.getAction({G_FNEG, Ty}).first == LegalizerInfo::Lower)
+ return UnableToLegalize;
+ unsigned Res = MI.getOperand(0).getReg();
+ unsigned LHS = MI.getOperand(1).getReg();
+ unsigned RHS = MI.getOperand(2).getReg();
+ unsigned Neg = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD)
+ .addDef(Res)
+ .addUse(LHS)
+ .addUse(Neg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -335,7 +620,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
MIRBuilder.setInstr(MI);
SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
@@ -343,10 +627,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
DstRegs.push_back(DstReg);
- Indexes.push_back(i * NarrowSize);
}
- MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index e49662075ed5..eaf4056e47ea 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -41,6 +41,8 @@ LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
+ DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar;
+ DefaultActions[TargetOpcode::G_FNEG] = Lower;
}
void LegalizerInfo::computeTables() {
@@ -71,28 +73,36 @@ LegalizerInfo::getAction(const InstrAspect &Aspect) const {
// These *have* to be implemented for now, they're the fundamental basis of
// how everything else is transformed.
- // Nothing is going to go well with types that aren't a power of 2 yet, so
- // don't even try because we might make things worse.
- if (!isPowerOf2_64(Aspect.Type.getSizeInBits()))
- return std::make_pair(Unsupported, LLT());
-
// FIXME: the long-term plan calls for expansion in terms of load/store (if
// they're not legal).
if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
- Aspect.Opcode == TargetOpcode::G_EXTRACT)
+ Aspect.Opcode == TargetOpcode::G_EXTRACT ||
+ Aspect.Opcode == TargetOpcode::G_MERGE_VALUES ||
+ Aspect.Opcode == TargetOpcode::G_UNMERGE_VALUES)
return std::make_pair(Legal, Aspect.Type);
+ LLT Ty = Aspect.Type;
LegalizeAction Action = findInActions(Aspect);
+ // LegalizerHelper is not able to handle non-power-of-2 types right now, so do
+ // not try to legalize them unless they are marked as Legal or Custom.
+ // FIXME: This is a temporary hack until the general non-power-of-2
+ // legalization works.
+ if (!isPowerOf2_64(Ty.getSizeInBits()) &&
+ !(Action == Legal || Action == Custom))
+ return std::make_pair(Unsupported, LLT());
+
if (Action != NotFound)
return findLegalAction(Aspect, Action);
unsigned Opcode = Aspect.Opcode;
- LLT Ty = Aspect.Type;
if (!Ty.isVector()) {
auto DefaultAction = DefaultActions.find(Aspect.Opcode);
if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
return std::make_pair(Legal, Ty);
+ if (DefaultAction != DefaultActions.end() && DefaultAction->second == Lower)
+ return std::make_pair(Lower, Ty);
+
if (DefaultAction == DefaultActions.end() ||
DefaultAction->second != NarrowScalar)
return std::make_pair(Unsupported, LLT());
@@ -160,6 +170,7 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
case Legal:
case Lower:
case Libcall:
+ case Custom:
return Aspect.Type;
case NarrowScalar: {
return findLegalType(Aspect,
@@ -180,3 +191,9 @@ LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
}
}
}
+
+bool LegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ return false;
+}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index c04f6e4ae897..8d1a263395a0 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -54,7 +55,7 @@ void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
void MachineIRBuilder::recordInsertions(
std::function<void(MachineInstr *)> Inserted) {
- InsertedInstr = Inserted;
+ InsertedInstr = std::move(Inserted);
}
void MachineIRBuilder::stopRecordingInsertions() {
@@ -82,6 +83,70 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildDirectDbgValue(
+ unsigned Reg, const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addReg(Reg, RegState::Debug)
+ .addReg(0, RegState::Debug)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIndirectDbgValue(
+ unsigned Reg, unsigned Offset, const MDNode *Variable, const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addReg(Reg, RegState::Debug)
+ .addImm(Offset)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ return buildInstr(TargetOpcode::DBG_VALUE)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMetadata(Variable)
+ .addMetadata(Expr);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
+ unsigned Offset,
+ const MDNode *Variable,
+ const MDNode *Expr) {
+ assert(isa<DILocalVariable>(Variable) && "not a variable");
+ assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
+ assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
+ "Expected inlined-at fields to agree");
+ auto MIB = buildInstr(TargetOpcode::DBG_VALUE);
+ if (auto *CI = dyn_cast<ConstantInt>(&C)) {
+ if (CI->getBitWidth() > 64)
+ MIB.addCImm(CI);
+ else
+ MIB.addImm(CI->getZExtValue());
+ } else if (auto *CFP = dyn_cast<ConstantFP>(&C)) {
+ MIB.addFPImm(CFP);
+ } else {
+ // Insert %noreg if we didn't find a usable constant and had to drop it.
+ MIB.addReg(0U);
+ }
+
+ return MIB.addImm(Offset).addMetadata(Variable).addMetadata(Expr);
+}
+
MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
assert(MRI->getType(Res).isPointer() && "invalid operand type");
return buildInstr(TargetOpcode::G_FRAME_INDEX)
@@ -126,6 +191,17 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+ uint32_t NumBits) {
+ assert(MRI->getType(Res).isPointer() &&
+ MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_PTR_MASK)
+ .addDef(Res)
+ .addUse(Op0)
+ .addImm(NumBits);
+}
+
MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
unsigned Op1) {
assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
@@ -152,10 +228,27 @@ MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0,
+ unsigned Op1) {
+ assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+ "invalid operand type");
+ assert(MRI->getType(Res) == MRI->getType(Op0) &&
+ MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+ return buildInstr(TargetOpcode::G_AND)
+ .addDef(Res)
+ .addUse(Op0)
+ .addUse(Op1);
+}
+
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+ return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
+}
+
MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
}
@@ -262,34 +355,56 @@ MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
return buildInstr(Opcode).addDef(Res).addUse(Op);
}
-MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<unsigned> Results,
- ArrayRef<uint64_t> Indices,
- unsigned Src) {
-#ifndef NDEBUG
- assert(Results.size() == Indices.size() && "inconsistent number of regs");
- assert(!Results.empty() && "invalid trivial extract");
- assert(std::is_sorted(Indices.begin(), Indices.end()) &&
- "extract offsets must be in ascending order");
+MachineInstrBuilder MachineIRBuilder::buildZExtOrTrunc(unsigned Res,
+ unsigned Op) {
+ unsigned Opcode = TargetOpcode::COPY;
+ if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_ZEXT;
+ else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
+ Opcode = TargetOpcode::G_TRUNC;
- assert(MRI->getType(Src).isValid() && "invalid operand type");
- for (auto Res : Results)
- assert(MRI->getType(Res).isValid() && "invalid operand type");
-#endif
+ return buildInstr(Opcode).addDef(Res).addUse(Op);
+}
- auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
- for (auto Res : Results)
- MIB.addDef(Res);
- MIB.addUse(Src);
+MachineInstrBuilder MachineIRBuilder::buildCast(unsigned Dst, unsigned Src) {
+ LLT SrcTy = MRI->getType(Src);
+ LLT DstTy = MRI->getType(Dst);
+ if (SrcTy == DstTy)
+ return buildCopy(Dst, Src);
+
+ unsigned Opcode;
+ if (SrcTy.isPointer() && DstTy.isScalar())
+ Opcode = TargetOpcode::G_PTRTOINT;
+ else if (DstTy.isPointer() && SrcTy.isScalar())
+ Opcode = TargetOpcode::G_INTTOPTR;
+ else {
+ assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
+ Opcode = TargetOpcode::G_BITCAST;
+ }
- for (auto Idx : Indices)
- MIB.addImm(Idx);
+ return buildInstr(Opcode).addDef(Dst).addUse(Src);
+}
- getMBB().insert(getInsertPt(), MIB);
- if (InsertedInstr)
- InsertedInstr(MIB);
+MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+ uint64_t Index) {
+#ifndef NDEBUG
+ assert(MRI->getType(Src).isValid() && "invalid operand type");
+ assert(MRI->getType(Res).isValid() && "invalid operand type");
+ assert(Index + MRI->getType(Res).getSizeInBits() <=
+ MRI->getType(Src).getSizeInBits() &&
+ "extracting off end of register");
+#endif
- return MIB;
+ if (MRI->getType(Res).getSizeInBits() == MRI->getType(Src).getSizeInBits()) {
+ assert(Index == 0 && "insertion past the end of a register");
+ return buildCast(Res, Src);
+ }
+
+ return buildInstr(TargetOpcode::G_EXTRACT)
+ .addDef(Res)
+ .addUse(Src)
+ .addImm(Index);
}
MachineInstrBuilder
@@ -316,6 +431,64 @@ MachineIRBuilder::buildSequence(unsigned Res,
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildUndef(unsigned Res) {
+ return buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Res);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMerge(unsigned Res,
+ ArrayRef<unsigned> Ops) {
+
+#ifndef NDEBUG
+ assert(!Ops.empty() && "invalid trivial sequence");
+ LLT Ty = MRI->getType(Ops[0]);
+ for (auto Reg : Ops)
+ assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Ops.size() * MRI->getType(Ops[0]).getSizeInBits() ==
+ MRI->getType(Res).getSizeInBits() &&
+ "input operands do not cover output register");
+#endif
+
+ MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_MERGE_VALUES);
+ MIB.addDef(Res);
+ for (unsigned i = 0; i < Ops.size(); ++i)
+ MIB.addUse(Ops[i]);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+ unsigned Op) {
+
+#ifndef NDEBUG
+ assert(!Res.empty() && "invalid trivial sequence");
+ LLT Ty = MRI->getType(Res[0]);
+ for (auto Reg : Res)
+ assert(MRI->getType(Reg) == Ty && "type mismatch in input list");
+ assert(Res.size() * MRI->getType(Res[0]).getSizeInBits() ==
+ MRI->getType(Op).getSizeInBits() &&
+ "input operands do not cover output register");
+#endif
+
+ MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+ for (unsigned i = 0; i < Res.size(); ++i)
+ MIB.addDef(Res[i]);
+ MIB.addUse(Op);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
+ unsigned Op, unsigned Index) {
+ if (MRI->getType(Res).getSizeInBits() == MRI->getType(Op).getSizeInBits()) {
+ assert(Index == 0 && "insertion past the end of a register");
+ return buildCast(Res, Op);
+ }
+
+ return buildInstr(TargetOpcode::G_INSERT)
+ .addDef(Res)
+ .addUse(Src)
+ .addUse(Op)
+ .addImm(Index);
+}
+
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
unsigned Res,
bool HasSideEffects) {
@@ -395,9 +568,10 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
if (ResTy.isScalar() || ResTy.isPointer())
assert(MRI->getType(Tst).isScalar() && "type mismatch");
else
- assert(MRI->getType(Tst).isVector() &&
- MRI->getType(Tst).getNumElements() ==
- MRI->getType(Op0).getNumElements() &&
+ assert((MRI->getType(Tst).isScalar() ||
+ (MRI->getType(Tst).isVector() &&
+ MRI->getType(Tst).getNumElements() ==
+ MRI->getType(Op0).getNumElements())) &&
"type mismatch");
#endif
@@ -408,6 +582,46 @@ MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
.addUse(Op1);
}
+MachineInstrBuilder MachineIRBuilder::buildInsertVectorElement(unsigned Res,
+ unsigned Val,
+ unsigned Elt,
+ unsigned Idx) {
+#ifndef NDEBUG
+ LLT ResTy = MRI->getType(Res);
+ LLT ValTy = MRI->getType(Val);
+ LLT EltTy = MRI->getType(Elt);
+ LLT IdxTy = MRI->getType(Idx);
+ assert(ResTy.isVector() && ValTy.isVector() && "invalid operand type");
+ assert(EltTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+ assert(ResTy.getNumElements() == ValTy.getNumElements() && "type mismatch");
+ assert(ResTy.getElementType() == EltTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_INSERT_VECTOR_ELT)
+ .addDef(Res)
+ .addUse(Val)
+ .addUse(Elt)
+ .addUse(Idx);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtractVectorElement(unsigned Res,
+ unsigned Val,
+ unsigned Idx) {
+#ifndef NDEBUG
+ LLT ResTy = MRI->getType(Res);
+ LLT ValTy = MRI->getType(Val);
+ LLT IdxTy = MRI->getType(Idx);
+ assert(ValTy.isVector() && "invalid operand type");
+ assert(ResTy.isScalar() && IdxTy.isScalar() && "invalid operand type");
+ assert(ValTy.getElementType() == ResTy && "type mismatch");
+#endif
+
+ return buildInstr(TargetOpcode::G_EXTRACT_VECTOR_ELT)
+ .addDef(Res)
+ .addUse(Val)
+ .addUse(Idx);
+}
+
void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
bool IsExtend) {
#ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index cc026ef27296..f935390a8d1b 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -71,6 +72,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
+ MORE = make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -585,18 +587,12 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// LegalizerInfo as it's currently in the separate GlobalISel library.
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
- if (!TPC->isGlobalISelAbortEnabled()) {
- MF.getProperties().set(
- MachineFunctionProperties::Property::FailedISel);
- return false;
- }
- std::string ErrStorage;
- raw_string_ostream Err(ErrStorage);
- Err << "Instruction is not legal: " << MI << '\n';
- report_fatal_error(Err.str());
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "instruction is not legal", MI);
+ return false;
}
}
}
@@ -622,9 +618,8 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
if (!assignInstr(MI)) {
- if (TPC->isGlobalISelAbortEnabled())
- report_fatal_error("Unable to map instruction");
- MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
+ "unable to map instruction", MI);
return false;
}
}
@@ -968,10 +963,12 @@ bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
LocalFreq == Cost.LocalFreq;
}
-void RegBankSelect::MappingCost::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegBankSelect::MappingCost::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegBankSelect::MappingCost::print(raw_ostream &OS) const {
if (*this == ImpossibleCost()) {
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 49d676f11da6..940957d02152 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -19,10 +19,11 @@ using namespace llvm;
const unsigned RegisterBank::InvalidID = UINT_MAX;
-RegisterBank::RegisterBank(unsigned ID, const char *Name, unsigned Size,
- const uint32_t *CoveredClasses)
+RegisterBank::RegisterBank(
+ unsigned ID, const char *Name, unsigned Size,
+ const uint32_t *CoveredClasses, unsigned NumRegClasses)
: ID(ID), Name(Name), Size(Size) {
- ContainedRegClasses.resize(200);
+ ContainedRegClasses.resize(NumRegClasses);
ContainedRegClasses.setBitsInMask(CoveredClasses);
}
@@ -75,9 +76,11 @@ bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
return &OtherRB == this;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
print(dbgs(), /* IsForDebug */ true, TRI);
}
+#endif
void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
const TargetRegisterInfo *TRI) const {
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index da5ab0b9fb7b..b2df2f159676 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -63,13 +63,6 @@ RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
#endif // NDEBUG
}
-RegisterBankInfo::~RegisterBankInfo() {
- for (auto It : MapOfPartialMappings)
- delete It.second;
- for (auto It : MapOfValueMappings)
- delete It.second;
-}
-
bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
#ifndef NDEBUG
for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
@@ -133,15 +126,26 @@ const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
return &RC;
}
+/// Check whether or not \p MI should be treated like a copy
+/// for the mappings.
+/// Copy like instruction are special for mapping because
+/// they don't have actual register constraints. Moreover,
+/// they sometimes have register classes assigned and we can
+/// just use that instead of failing to provide a generic mapping.
+static bool isCopyLike(const MachineInstr &MI) {
+ return MI.isCopy() || MI.isPHI() ||
+ MI.getOpcode() == TargetOpcode::REG_SEQUENCE;
+}
+
RegisterBankInfo::InstructionMapping
RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// For copies we want to walk over the operands and try to find one
// that has a register bank since the instruction itself will not get
// us any constraint.
- bool isCopyLike = MI.isCopy() || MI.isPHI();
+ bool IsCopyLike = isCopyLike(MI);
// For copy like instruction, only the mapping of the definition
// is important. The rest is not constrained.
- unsigned NumOperandsForMapping = isCopyLike ? 1 : MI.getNumOperands();
+ unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands();
RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
/*OperandsMapping*/ nullptr,
@@ -175,7 +179,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// For copy-like instruction, we want to reuse the register bank
// that is already set on Reg, if any, since those instructions do
// not have any constraints.
- const RegisterBank *CurRegBank = isCopyLike ? AltRegBank : nullptr;
+ const RegisterBank *CurRegBank = IsCopyLike ? AltRegBank : nullptr;
if (!CurRegBank) {
// If this is a target specific instruction, we can deduce
// the register bank from the encoding constraints.
@@ -184,7 +188,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
// All our attempts failed, give up.
CompleteMapping = false;
- if (!isCopyLike)
+ if (!IsCopyLike)
// MI does not carry enough information to guess the mapping.
return InstructionMapping();
continue;
@@ -192,7 +196,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
}
const ValueMapping *ValMapping =
&getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
- if (isCopyLike) {
+ if (IsCopyLike) {
OperandsMapping[0] = ValMapping;
CompleteMapping = true;
break;
@@ -200,7 +204,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
OperandsMapping[OpIdx] = ValMapping;
}
- if (isCopyLike && !CompleteMapping)
+ if (IsCopyLike && !CompleteMapping)
// No way to deduce the type from what we have.
return InstructionMapping();
@@ -234,8 +238,8 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
++NumPartialMappingsCreated;
- const PartialMapping *&PartMapping = MapOfPartialMappings[Hash];
- PartMapping = new PartialMapping{StartIdx, Length, RegBank};
+ auto &PartMapping = MapOfPartialMappings[Hash];
+ PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank);
return *PartMapping;
}
@@ -268,8 +272,8 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
++NumValueMappingsCreated;
- const ValueMapping *&ValMapping = MapOfValueMappings[Hash];
- ValMapping = new ValueMapping{BreakDown, NumBreakDowns};
+ auto &ValMapping = MapOfValueMappings[Hash];
+ ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
return *ValMapping;
}
@@ -282,9 +286,9 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// The addresses of the value mapping are unique.
// Therefore, we can use them directly to hash the operand mapping.
hash_code Hash = hash_combine_range(Begin, End);
- const auto &It = MapOfOperandsMappings.find(Hash);
- if (It != MapOfOperandsMappings.end())
- return It->second;
+ auto &Res = MapOfOperandsMappings[Hash];
+ if (Res)
+ return Res.get();
++NumOperandsMappingsCreated;
@@ -293,8 +297,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// mapping, because we use the pointer of the ValueMapping
// to hash and we expect them to uniquely identify an instance
// of value mapping.
- ValueMapping *&Res = MapOfOperandsMappings[Hash];
- Res = new ValueMapping[std::distance(Begin, End)];
+ Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End));
unsigned Idx = 0;
for (Iterator It = Begin; It != End; ++It, ++Idx) {
const ValueMapping *ValMap = *It;
@@ -302,7 +305,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
continue;
Res[Idx] = *ValMap;
}
- return Res;
+ return Res.get();
}
const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
@@ -349,6 +352,7 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
MachineInstr &MI = OpdMapper.getMI();
+ MachineRegisterInfo &MRI = OpdMapper.getMRI();
DEBUG(dbgs() << "Applying default-like mapping\n");
for (unsigned OpIdx = 0,
EndIdx = OpdMapper.getInstrMapping().getNumOperands();
@@ -359,6 +363,13 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
DEBUG(dbgs() << " is not a register, nothing to be done\n");
continue;
}
+ if (!MO.getReg()) {
+ DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
+ continue;
+ }
+ assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
+ 0 &&
+ "Invalid mapping");
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
1 &&
"This mapping is too complex for this function");
@@ -368,9 +379,25 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
- DEBUG(dbgs() << " changed, replace " << MO.getReg());
- MO.setReg(*NewRegs.begin());
- DEBUG(dbgs() << " with " << MO.getReg());
+ unsigned OrigReg = MO.getReg();
+ unsigned NewReg = *NewRegs.begin();
+ DEBUG(dbgs() << " changed, replace " << PrintReg(OrigReg, nullptr));
+ MO.setReg(NewReg);
+ DEBUG(dbgs() << " with " << PrintReg(NewReg, nullptr));
+
+ // The OperandsMapper creates plain scalar, we may have to fix that.
+ // Check if the types match and if not, fix that.
+ LLT OrigTy = MRI.getType(OrigReg);
+ LLT NewTy = MRI.getType(NewReg);
+ if (OrigTy != NewTy) {
+ assert(OrigTy.getSizeInBits() == NewTy.getSizeInBits() &&
+ "Types with difference size cannot be handled by the default "
+ "mapping");
+ DEBUG(dbgs() << "\nChange type of new opd from " << NewTy << " to "
+ << OrigTy);
+ MRI.setType(NewReg, OrigTy);
+ }
+ DEBUG(dbgs() << '\n');
}
}
@@ -400,10 +427,12 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
//------------------------------------------------------------------------------
// Helper classes implementation.
//------------------------------------------------------------------------------
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
bool RegisterBankInfo::PartialMapping::verify() const {
assert(RegBank && "Register bank not set");
@@ -451,10 +480,12 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
return true;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
OS << "#BreakDown: " << NumBreakDowns << " ";
@@ -472,8 +503,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
// Check that all the register operands are properly mapped.
// Check the constructor invariant.
// For PHI, we only care about mapping the definition.
- assert(NumOperands ==
- ((MI.isCopy() || MI.isPHI()) ? 1 : MI.getNumOperands()) &&
+ assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) &&
"NumOperands must match, see constructor");
assert(MI.getParent() && MI.getParent()->getParent() &&
"MI must be connected to a MachineFunction");
@@ -503,10 +533,12 @@ bool RegisterBankInfo::InstructionMapping::verify(
return true;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const {
print(dbgs());
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::InstructionMapping::print(raw_ostream &OS) const {
OS << "ID: " << getID() << " Cost: " << getCost() << " Mapping: ";
@@ -576,6 +608,11 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
for (unsigned &NewVReg : NewVRegsForOpIdx) {
assert(PartMap != ValMapping.end() && "Out-of-bound access");
assert(NewVReg == 0 && "Register has already been created");
+ // The new registers are always bound to scalar with the right size.
+ // The actual type has to be set when the target does the mapping
+ // of the instruction.
+ // The rationale is that this generic code cannot guess how the
+ // target plans to split the input type.
NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length));
MRI.setRegBank(NewVReg, *PartMap->RegBank);
++PartMap;
@@ -619,10 +656,12 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
return Res;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const {
print(dbgs(), true);
dbgs() << '\n';
}
+#endif
void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
bool ForDebug) const {
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index e50091833c26..606a59680a3d 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -11,10 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -43,3 +46,50 @@ unsigned llvm::constrainOperandRegClass(
return Reg;
}
+
+bool llvm::isTriviallyDead(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ // If we can move an instruction, we can remove it. Otherwise, it has
+ // a side-effect of some sort.
+ bool SawStore = false;
+ if (!MI.isSafeToMove(/*AA=*/nullptr, SawStore))
+ return false;
+
+ // Instructions without side-effects are dead iff they only define dead vregs.
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI.use_nodbg_empty(Reg))
+ return false;
+ }
+ return true;
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ MachineOptimizationRemarkMissed &R) {
+ MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (TPC.isGlobalISelAbortEnabled())
+ report_fatal_error(R.getMsg());
+ else
+ MORE.emit(R);
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+ MachineOptimizationRemarkEmitter &MORE,
+ const char *PassName, StringRef Msg,
+ const MachineInstr &MI) {
+ MachineOptimizationRemarkMissed R(PassName, "GISelFailure: ",
+ MI.getDebugLoc(), MI.getParent());
+ R << Msg << ": " << ore::MNV("Inst", MI);
+ reportGISelFailure(MF, TPC, MORE, R);
+}
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index b9f3d86eabd8..37fe41582333 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -588,19 +588,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
return TExit && TExit == FalseBBI.BB;
}
-/// Shrink the provided inclusive range by one instruction.
-/// If the range was one instruction (\p It == \p Begin), It is not modified,
-/// but \p Empty is set to true.
-static inline void shrinkInclusiveRange(
- MachineBasicBlock::iterator &Begin,
- MachineBasicBlock::iterator &It,
- bool &Empty) {
- if (It == Begin)
- Empty = true;
- else
- It--;
-}
-
/// Count duplicated instructions and move the iterators to show where they
/// are.
/// @param TIB True Iterator Begin
@@ -633,10 +620,8 @@ bool IfConverter::CountDuplicatedInstructions(
while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
TIB = skipDebugInstructionsForward(TIB, TIE);
- if(TIB == TIE)
- break;
FIB = skipDebugInstructionsForward(FIB, FIE);
- if(FIB == FIE)
+ if (TIB == TIE || FIB == FIE)
break;
if (!TIB->isIdenticalTo(*FIB))
break;
@@ -656,58 +641,42 @@ bool IfConverter::CountDuplicatedInstructions(
if (TIB == TIE || FIB == FIE)
return true;
// Now, in preparation for counting duplicate instructions at the ends of the
- // blocks, move the end iterators up past any branch instructions.
- --TIE;
- --FIE;
-
- // After this point TIB and TIE define an inclusive range, which means that
- // TIB == TIE is true when there is one more instruction to consider, not at
- // the end. Because we may not be able to go before TIB, we need a flag to
- // indicate a completely empty range.
- bool TEmpty = false, FEmpty = false;
-
- // Upon exit TIE and FIE will both point at the last non-shared instruction.
- // They need to be moved forward to point past the last non-shared
- // instruction if the range they delimit is non-empty.
- auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
- if (!TEmpty)
- ++TIE;
- if (!FEmpty)
- ++FIE;
- });
+ // blocks, switch to reverse_iterators. Note that getReverse() returns an
+ // iterator that points to the same instruction, unlike std::reverse_iterator.
+ // We have to do our own shifting so that we get the same range.
+ MachineBasicBlock::reverse_iterator RTIE = std::next(TIE.getReverse());
+ MachineBasicBlock::reverse_iterator RFIE = std::next(FIE.getReverse());
+ const MachineBasicBlock::reverse_iterator RTIB = std::next(TIB.getReverse());
+ const MachineBasicBlock::reverse_iterator RFIB = std::next(FIB.getReverse());
if (!TBB.succ_empty() || !FBB.succ_empty()) {
if (SkipUnconditionalBranches) {
- while (!TEmpty && TIE->isUnconditionalBranch())
- shrinkInclusiveRange(TIB, TIE, TEmpty);
- while (!FEmpty && FIE->isUnconditionalBranch())
- shrinkInclusiveRange(FIB, FIE, FEmpty);
+ while (RTIE != RTIB && RTIE->isUnconditionalBranch())
+ ++RTIE;
+ while (RFIE != RFIB && RFIE->isUnconditionalBranch())
+ ++RFIE;
}
}
- // If Dups1 includes all of a block, then don't count duplicate
- // instructions at the end of the blocks.
- if (TEmpty || FEmpty)
- return true;
-
// Count duplicate instructions at the ends of the blocks.
- while (!TEmpty && !FEmpty) {
+ while (RTIE != RTIB && RFIE != RFIB) {
// Skip dbg_value instructions. These do not count.
- TIE = skipDebugInstructionsBackward(TIE, TIB);
- FIE = skipDebugInstructionsBackward(FIE, FIB);
- TEmpty = TIE == TIB && TIE->isDebugValue();
- FEmpty = FIE == FIB && FIE->isDebugValue();
- if (TEmpty || FEmpty)
+ // Note that these are reverse iterators going forward.
+ RTIE = skipDebugInstructionsForward(RTIE, RTIB);
+ RFIE = skipDebugInstructionsForward(RFIE, RFIB);
+ if (RTIE == RTIB || RFIE == RFIB)
break;
- if (!TIE->isIdenticalTo(*FIE))
+ if (!RTIE->isIdenticalTo(*RFIE))
break;
// We have to verify that any branch instructions are the same, and then we
// don't count them toward the # of duplicate instructions.
- if (!TIE->isBranch())
+ if (!RTIE->isBranch())
++Dups2;
- shrinkInclusiveRange(TIB, TIE, TEmpty);
- shrinkInclusiveRange(FIB, FIE, FEmpty);
+ ++RTIE;
+ ++RFIE;
}
+ TIE = std::next(RTIE.getReverse());
+ FIE = std::next(RFIE.getReverse());
return true;
}
@@ -741,25 +710,21 @@ bool IfConverter::RescanInstructions(
static void verifySameBranchInstructions(
MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
- MachineBasicBlock::iterator B1 = MBB1->begin();
- MachineBasicBlock::iterator B2 = MBB2->begin();
- MachineBasicBlock::iterator E1 = std::prev(MBB1->end());
- MachineBasicBlock::iterator E2 = std::prev(MBB2->end());
- bool Empty1 = false, Empty2 = false;
- while (!Empty1 && !Empty2) {
- E1 = skipDebugInstructionsBackward(E1, B1);
- E2 = skipDebugInstructionsBackward(E2, B2);
- Empty1 = E1 == B1 && E1->isDebugValue();
- Empty2 = E2 == B2 && E2->isDebugValue();
-
- if (Empty1 && Empty2)
+ const MachineBasicBlock::reverse_iterator B1 = MBB1->rend();
+ const MachineBasicBlock::reverse_iterator B2 = MBB2->rend();
+ MachineBasicBlock::reverse_iterator E1 = MBB1->rbegin();
+ MachineBasicBlock::reverse_iterator E2 = MBB2->rbegin();
+ while (E1 != B1 && E2 != B2) {
+ skipDebugInstructionsForward(E1, B1);
+ skipDebugInstructionsForward(E2, B2);
+ if (E1 == B1 && E2 == B2)
break;
- if (Empty1) {
+ if (E1 == B1) {
assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
break;
}
- if (Empty2) {
+ if (E2 == B2) {
assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
break;
}
@@ -769,8 +734,8 @@ static void verifySameBranchInstructions(
"Branch mis-match, branch instructions don't match.");
else
break;
- shrinkInclusiveRange(B1, E1, Empty1);
- shrinkInclusiveRange(B2, E2, Empty2);
+ ++E1;
+ ++E2;
}
}
#endif
@@ -2183,7 +2148,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// unknown probabilities into known ones.
// FIXME: This usage is too tricky and in the future we would like to
// eliminate all unknown probabilities in MBB.
- ToBBI.BB->normalizeSuccProbs();
+ if (ToBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
FromMBB.succ_end());
@@ -2263,7 +2229,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
// Normalize the probabilities of ToBBI.BB's successors with all adjustment
// we've done above.
- ToBBI.BB->normalizeSuccProbs();
+ if (ToBBI.IsBrAnalyzable && FromBBI.IsBrAnalyzable)
+ ToBBI.BB->normalizeSuccProbs();
ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
FromBBI.Predicate.clear();
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 9588dfb72058..920c2a372a9b 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -22,6 +22,7 @@
// With the help of a runtime that understands the .fault_maps section,
// faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs
// a page fault.
+// Store and LoadStore are also supported.
//
//===----------------------------------------------------------------------===//
@@ -29,6 +30,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/FaultMaps.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -151,25 +153,44 @@ class ImplicitNullChecks : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
AliasAnalysis *AA = nullptr;
MachineModuleInfo *MMI = nullptr;
+ MachineFrameInfo *MFI = nullptr;
bool analyzeBlockForNullChecks(MachineBasicBlock &MBB,
SmallVectorImpl<NullCheck> &NullCheckList);
- MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB,
- MachineBasicBlock *HandlerMBB);
+ MachineInstr *insertFaultingInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ MachineBasicBlock *HandlerMBB);
void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
- /// Is \p MI a memory operation that can be used to implicitly null check the
- /// value in \p PointerReg? \p PrevInsts is the set of instruction seen since
+ enum AliasResult {
+ AR_NoAlias,
+ AR_MayAlias,
+ AR_WillAliasEverything
+ };
+ /// Returns AR_NoAlias if \p MI memory operation does not alias with
+ /// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
+ /// they may alias and any further memory operation may alias with \p PrevMI.
+ AliasResult areMemoryOpsAliased(MachineInstr &MI, MachineInstr *PrevMI);
+
+ enum SuitabilityResult {
+ SR_Suitable,
+ SR_Unsuitable,
+ SR_Impossible
+ };
+ /// Return SR_Suitable if \p MI a memory operation that can be used to
+ /// implicitly null check the value in \p PointerReg, SR_Unsuitable if
+ /// \p MI cannot be used to null check and SR_Impossible if there is
+ /// no sense to continue lookup due to any other instruction will not be able
+ /// to be used. \p PrevInsts is the set of instruction seen since
/// the explicit null check on \p PointerReg.
- bool isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
- ArrayRef<MachineInstr *> PrevInsts);
+ SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts);
/// Return true if \p FaultingMI can be hoisted from after the the
/// instructions in \p InstsSeenSoFar to before them. Set \p Dependence to a
/// non-null value if we also need to (and legally can) hoist a depedency.
- bool canHoistLoadInst(MachineInstr *FaultingMI, unsigned PointerReg,
- ArrayRef<MachineInstr *> InstsSeenSoFar,
- MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
+ bool canHoistInst(MachineInstr *FaultingMI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
public:
static char ID;
@@ -193,7 +214,7 @@ public:
}
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
- if (MI->isCall() || MI->mayStore() || MI->hasUnmodeledSideEffects())
+ if (MI->isCall() || MI->hasUnmodeledSideEffects())
return false;
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
@@ -248,7 +269,7 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
unsigned RegB = MOB.getReg();
- if (TRI->regsOverlap(RegA, RegB))
+ if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef()))
return false;
}
}
@@ -260,6 +281,7 @@ bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getRegInfo().getTargetRegisterInfo();
MMI = &MF.getMMI();
+ MFI = &MF.getFrameInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
SmallVector<NullCheck, 16> NullCheckList;
@@ -283,36 +305,91 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
return false;
}
-bool ImplicitNullChecks::isSuitableMemoryOp(
- MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) {
+ImplicitNullChecks::AliasResult
+ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
+ MachineInstr *PrevMI) {
+ // If it is not memory access, skip the check.
+ if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
+ return AR_NoAlias;
+ // Load-Load may alias
+ if (!(MI.mayStore() || PrevMI->mayStore()))
+ return AR_NoAlias;
+ // We lost info, conservatively alias. If it was store then no sense to
+ // continue because we won't be able to check against it further.
+ if (MI.memoperands_empty())
+ return MI.mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+ if (PrevMI->memoperands_empty())
+ return PrevMI->mayStore() ? AR_WillAliasEverything : AR_MayAlias;
+
+ for (MachineMemOperand *MMO1 : MI.memoperands()) {
+ // MMO1 should have a value due it comes from operation we'd like to use
+ // as implicit null check.
+ assert(MMO1->getValue() && "MMO1 should have a Value!");
+ for (MachineMemOperand *MMO2 : PrevMI->memoperands()) {
+ if (const PseudoSourceValue *PSV = MMO2->getPseudoValue()) {
+ if (PSV->mayAlias(MFI))
+ return AR_MayAlias;
+ continue;
+ }
+ llvm::AliasResult AAResult = AA->alias(
+ MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+ MMO1->getAAInfo()),
+ MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+ MMO2->getAAInfo()));
+ if (AAResult != NoAlias)
+ return AR_MayAlias;
+ }
+ }
+ return AR_NoAlias;
+}
+
+ImplicitNullChecks::SuitabilityResult
+ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
unsigned BaseReg;
if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
BaseReg != PointerReg)
- return false;
-
- // We want the load to be issued at a sane offset from PointerReg, so that
- // if PointerReg is null then the load reliably page faults.
- if (!(MI.mayLoad() && !MI.isPredicable() && Offset < PageSize))
- return false;
-
- // Finally, we need to make sure that the load instruction actually is
- // loading from PointerReg, and there isn't some re-definition of PointerReg
- // between the compare and the load.
+ return SR_Unsuitable;
+
+ // We want the mem access to be issued at a sane offset from PointerReg,
+ // so that if PointerReg is null then the access reliably page faults.
+ if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
+ Offset < PageSize))
+ return SR_Unsuitable;
+
+ // Finally, we need to make sure that the access instruction actually is
+ // accessing from PointerReg, and there isn't some re-definition of PointerReg
+ // between the compare and the memory access.
+ // If PointerReg has been redefined before then there is no sense to continue
+ // lookup due to this condition will fail for any further instruction.
+ SuitabilityResult Suitable = SR_Suitable;
for (auto *PrevMI : PrevInsts)
- for (auto &PrevMO : PrevMI->operands())
- if (PrevMO.isReg() && PrevMO.getReg() &&
+ for (auto &PrevMO : PrevMI->operands()) {
+ if (PrevMO.isReg() && PrevMO.getReg() && PrevMO.isDef() &&
TRI->regsOverlap(PrevMO.getReg(), PointerReg))
- return false;
-
- return true;
+ return SR_Impossible;
+
+ // Check whether the current memory access aliases with previous one.
+ // If we already found that it aliases then no need to continue.
+ // But we continue base pointer check as it can result in SR_Impossible.
+ if (Suitable == SR_Suitable) {
+ AliasResult AR = areMemoryOpsAliased(MI, PrevMI);
+ if (AR == AR_WillAliasEverything)
+ return SR_Impossible;
+ if (AR == AR_MayAlias)
+ Suitable = SR_Unsuitable;
+ }
+ }
+ return Suitable;
}
-bool ImplicitNullChecks::canHoistLoadInst(
- MachineInstr *FaultingMI, unsigned PointerReg,
- ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc,
- MachineInstr *&Dependence) {
+bool ImplicitNullChecks::canHoistInst(MachineInstr *FaultingMI,
+ unsigned PointerReg,
+ ArrayRef<MachineInstr *> InstsSeenSoFar,
+ MachineBasicBlock *NullSucc,
+ MachineInstr *&Dependence) {
auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
if (!DepResult.CanReorder)
return false;
@@ -359,7 +436,8 @@ bool ImplicitNullChecks::canHoistLoadInst(
// The Dependency can't be re-defining the base register -- then we won't
// get the memory operation on the address we want. This is already
// checked in \c IsSuitableMemoryOp.
- assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) &&
+ assert(!(DependenceMO.isDef() &&
+ TRI->regsOverlap(DependenceMO.getReg(), PointerReg)) &&
"Should have been checked before!");
}
@@ -481,9 +559,11 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
return false;
MachineInstr *Dependence;
- if (isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar) &&
- canHoistLoadInst(&MI, PointerReg, InstsSeenSoFar, NullSucc,
- Dependence)) {
+ SuitabilityResult SR = isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar);
+ if (SR == SR_Impossible)
+ return false;
+ if (SR == SR_Suitable &&
+ canHoistInst(&MI, PointerReg, InstsSeenSoFar, NullSucc, Dependence)) {
NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
NullSucc, Dependence);
return true;
@@ -495,36 +575,42 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
return false;
}
-/// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine
-/// instruction. The FAULTING_LOAD_OP instruction does the same load as LoadMI
-/// (defining the same register), and branches to HandlerMBB if the load
-/// faults. The FAULTING_LOAD_OP instruction is inserted at the end of MBB.
-MachineInstr *
-ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
- MachineBasicBlock *MBB,
- MachineBasicBlock *HandlerMBB) {
+/// Wrap a machine instruction, MI, into a FAULTING machine instruction.
+/// The FAULTING instruction does the same load/store as MI
+/// (defining the same register), and branches to HandlerMBB if the mem access
+/// faults. The FAULTING instruction is inserted at the end of MBB.
+MachineInstr *ImplicitNullChecks::insertFaultingInstr(
+ MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *HandlerMBB) {
const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
// all targets.
DebugLoc DL;
- unsigned NumDefs = LoadMI->getDesc().getNumDefs();
+ unsigned NumDefs = MI->getDesc().getNumDefs();
assert(NumDefs <= 1 && "other cases unhandled!");
unsigned DefReg = NoRegister;
if (NumDefs != 0) {
- DefReg = LoadMI->defs().begin()->getReg();
- assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+ DefReg = MI->defs().begin()->getReg();
+ assert(std::distance(MI->defs().begin(), MI->defs().end()) == 1 &&
"expected exactly one def!");
}
- auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
+ FaultMaps::FaultKind FK;
+ if (MI->mayLoad())
+ FK =
+ MI->mayStore() ? FaultMaps::FaultingLoadStore : FaultMaps::FaultingLoad;
+ else
+ FK = FaultMaps::FaultingStore;
+
+ auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_OP), DefReg)
+ .addImm(FK)
.addMBB(HandlerMBB)
- .addImm(LoadMI->getOpcode());
+ .addImm(MI->getOpcode());
- for (auto &MO : LoadMI->uses())
- MIB.addOperand(MO);
+ for (auto &MO : MI->uses())
+ MIB.add(MO);
- MIB.setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end());
+ MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
return MIB;
}
@@ -545,18 +631,18 @@ void ImplicitNullChecks::rewriteNullChecks(
NC.getCheckBlock()->insert(NC.getCheckBlock()->end(), DepMI);
}
- // Insert a faulting load where the conditional branch was originally. We
- // check earlier ensures that this bit of code motion is legal. We do not
- // touch the successors list for any basic block since we haven't changed
- // control flow, we've just made it implicit.
- MachineInstr *FaultingLoad = insertFaultingLoad(
+ // Insert a faulting instruction where the conditional branch was
+ // originally. We check earlier ensures that this bit of code motion
+ // is legal. We do not touch the successors list for any basic block
+ // since we haven't changed control flow, we've just made it implicit.
+ MachineInstr *FaultingInstr = insertFaultingInstr(
NC.getMemOperation(), NC.getCheckBlock(), NC.getNullSucc());
// Now the values defined by MemOperation, if any, are live-in of
// the block of MemOperation.
- // The original load operation may define implicit-defs alongside
- // the loaded value.
+ // The original operation may define implicit-defs alongside
+ // the value.
MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
- for (const MachineOperand &MO : FaultingLoad->operands()) {
+ for (const MachineOperand &MO : FaultingInstr->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 3d81184f774a..a1cb0a0695bf 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -558,7 +558,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
// We take the DebugLoc from MI, since OrigMI may be attributed to a
- // different source location.
+ // different source location.
auto *NewMI = LIS.getInstructionFromIndex(DefIdx);
NewMI->setDebugLoc(MI.getDebugLoc());
@@ -686,7 +686,8 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
return true;
}
-#if !defined(NDEBUG)
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
// Dump the range of instructions from B to E with their slot indexes.
static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
MachineBasicBlock::iterator E,
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index afd24067ace7..c6cc909e25d3 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -115,21 +115,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::memmove:
M.getOrInsertFunction("memmove",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::memset:
M.getOrInsertFunction("memset",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt32Ty(M.getContext()),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
break;
case Intrinsic::sqrt:
EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index 86d3624a9d6e..07ea9dcaea7a 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG MIRParser GlobalISel
type = Library
name = CodeGen
parent = Libraries
-required_libraries = Analysis BitReader BitWriter Core MC Scalar Support Target TransformUtils
+required_libraries = Analysis BitReader BitWriter Core MC ProfileData Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 26794e28020e..7b1706f0f4ba 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -42,8 +42,8 @@ static cl::opt<cl::boolOrDefault>
EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
-static cl::opt<bool>
- EnableGlobalISel("global-isel", cl::Hidden, cl::init(false),
+static cl::opt<cl::boolOrDefault>
+ EnableGlobalISel("global-isel", cl::Hidden,
cl::desc("Enable the \"global\" instruction selector"));
void LLVMTargetMachine::initAsmInfo() {
@@ -85,7 +85,7 @@ void LLVMTargetMachine::initAsmInfo() {
LLVMTargetMachine::LLVMTargetMachine(const Target &T,
StringRef DataLayoutString,
const Triple &TT, StringRef CPU,
- StringRef FS, TargetOptions Options,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) {
@@ -149,7 +149,9 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
TM->setFastISel(true);
// Ask the target for an isel.
- if (LLVM_UNLIKELY(EnableGlobalISel)) {
+ // Enable GlobalISel if the target wants to, but allow that to be overriden.
+ if (EnableGlobalISel == cl::BOU_TRUE || (EnableGlobalISel == cl::BOU_UNSET &&
+ PassConfig->isGlobalISelEnabled())) {
if (PassConfig->addIRTranslator())
return nullptr;
@@ -172,11 +174,12 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
// Pass to reset the MachineFunction if the ISel failed.
PM.add(createResetMachineFunctionPass(
- PassConfig->reportDiagnosticWhenGlobalISelFallback()));
+ PassConfig->reportDiagnosticWhenGlobalISelFallback(),
+ PassConfig->isGlobalISelAbortEnabled()));
// Provide a fallback path when we do not want to abort on
// not-yet-supported input.
- if (LLVM_UNLIKELY(!PassConfig->isGlobalISelAbortEnabled()) &&
+ if (!PassConfig->isGlobalISelAbortEnabled() &&
PassConfig->addInstSelector())
return nullptr;
diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
new file mode 100644
index 000000000000..996d40ca6e1e
--- /dev/null
+++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -0,0 +1,97 @@
+///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===//
+///
+/// The LLVM Compiler Infrastructure
+///
+/// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// This is an alternative analysis pass to MachineBlockFrequencyInfo. The
+/// difference is that with this pass the block frequencies are not computed
+/// when the analysis pass is executed but rather when the BFI result is
+/// explicitly requested by the analysis client.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lazy-machine-block-freq"
+
+INITIALIZE_PASS_BEGIN(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
+ "Lazy Machine Block Frequency Analysis", true, true)
+
+char LazyMachineBlockFrequencyInfoPass::ID = 0;
+
+LazyMachineBlockFrequencyInfoPass::LazyMachineBlockFrequencyInfoPass()
+ : MachineFunctionPass(ID) {
+ initializeLazyMachineBlockFrequencyInfoPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void LazyMachineBlockFrequencyInfoPass::print(raw_ostream &OS,
+ const Module *M) const {
+ getBFI().print(OS, M);
+}
+
+void LazyMachineBlockFrequencyInfoPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LazyMachineBlockFrequencyInfoPass::releaseMemory() {
+ OwnedMBFI.reset();
+ OwnedMLI.reset();
+ OwnedMDT.reset();
+}
+
+MachineBlockFrequencyInfo &
+LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
+ auto *MBFI = getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
+ if (MBFI) {
+ DEBUG(dbgs() << "MachineBlockFrequencyInfo is available\n");
+ return *MBFI;
+ }
+
+ auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+ auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+ auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+ DEBUG(dbgs() << "Building MachineBlockFrequencyInfo on the fly\n");
+ DEBUG(if (MLI) dbgs() << "LoopInfo is available\n");
+
+ if (!MLI) {
+ DEBUG(dbgs() << "Building LoopInfo on the fly\n");
+ // First create a dominator tree.
+ DEBUG(if (MDT) dbgs() << "DominatorTree is available\n");
+
+ if (!MDT) {
+ DEBUG(dbgs() << "Building DominatorTree on the fly\n");
+ OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT->getBase().recalculate(*MF);
+ MDT = OwnedMDT.get();
+ }
+
+ // Generate LoopInfo from it.
+ OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI->getBase().analyze(MDT->getBase());
+ MLI = OwnedMLI.get();
+ }
+
+ OwnedMBFI = make_unique<MachineBlockFrequencyInfo>();
+ OwnedMBFI->calculate(*MF, MBPI, *MLI);
+ return *OwnedMBFI.get();
+}
+
+bool LazyMachineBlockFrequencyInfoPass::runOnMachineFunction(
+ MachineFunction &F) {
+ MF = &F;
+ return false;
+}
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 834ed5f06c94..275d84e2c185 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -14,14 +14,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <string>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "lexicalscopes"
@@ -38,6 +47,10 @@ void LexicalScopes::reset() {
/// initialize - Scan machine function and constuct lexical scope nest.
void LexicalScopes::initialize(const MachineFunction &Fn) {
+ // Don't attempt any lexical scope creation for a NoDebug compile unit.
+ if (Fn.getFunction()->getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return;
reset();
MF = &Fn;
SmallVector<InsnRange, 4> MIRanges;
@@ -54,7 +67,6 @@ void LexicalScopes::initialize(const MachineFunction &Fn) {
void LexicalScopes::extractLexicalScopes(
SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
-
// Scan each instruction and create scopes. First build working set of scopes.
for (const auto &MBB : *MF) {
const MachineInstr *RangeBeginMI = nullptr;
@@ -127,6 +139,10 @@ LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) {
LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope,
const DILocation *IA) {
if (IA) {
+ // Skip scopes inlined from a NoDebug compile unit.
+ if (Scope->getSubprogram()->getUnit()->getEmissionKind() ==
+ DICompileUnit::NoDebug)
+ return getOrCreateLexicalScope(IA);
// Create an abstract scope for inlined function.
getOrCreateAbstractScope(Scope);
// Create an inlined scope for inlined function.
@@ -181,10 +197,9 @@ LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope,
else
Parent = getOrCreateLexicalScope(InlinedAt);
- I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
- std::forward_as_tuple(P),
- std::forward_as_tuple(Parent, Scope,
- InlinedAt, false))
+ I = InlinedLexicalScopeMap
+ .emplace(std::piecewise_construct, std::forward_as_tuple(P),
+ std::forward_as_tuple(Parent, Scope, InlinedAt, false))
.first;
return &I->second;
}
@@ -241,7 +256,6 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
void LexicalScopes::assignInstructionRanges(
SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
-
LexicalScope *PrevLexicalScope = nullptr;
for (const auto &R : MIRanges) {
LexicalScope *S = MI2ScopeMap.lookup(R.first);
@@ -299,9 +313,8 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
return Result;
}
-/// dump - Print data structures.
-void LexicalScope::dump(unsigned Indent) const {
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LexicalScope::dump(unsigned Indent) const {
raw_ostream &err = dbgs();
err.indent(Indent);
err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
@@ -316,5 +329,5 @@ void LexicalScope::dump(unsigned Indent) const {
for (unsigned i = 0, e = Children.size(); i != e; ++i)
if (Children[i] != this)
Children[i]->dump(Indent + 2);
-#endif
}
+#endif
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index c945376560f7..f956974b1aaf 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -24,13 +24,16 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -61,6 +64,7 @@ class LiveDebugValues : public MachineFunctionPass {
private:
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
LexicalScopes LS;
/// Keeps track of lexical scopes associated with a user value's source
@@ -127,11 +131,13 @@ private:
if (int RegNo = isDbgValueDescribedByReg(MI)) {
Kind = RegisterKind;
Loc.RegisterLoc.RegNo = RegNo;
- uint64_t Offset =
+ int64_t Offset =
MI.isIndirectDebugValue() ? MI.getOperand(1).getImm() : 0;
// We don't support offsets larger than 4GiB here. They are
// slated to be replaced with DIExpressions anyway.
- if (Offset >= (1ULL << 32))
+ // With indirect debug values used for spill locations, Offset
+ // can be negative.
+ if (Offset == INT64_MIN || std::abs(Offset) >= (1LL << 32))
Kind = InvalidKind;
else
Loc.RegisterLoc.Offset = Offset;
@@ -150,7 +156,9 @@ private:
/// dominates MBB.
bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
- void dump() const { MI.dump(); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { MI.dump(); }
+#endif
bool operator==(const VarLoc &Other) const {
return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
@@ -167,6 +175,11 @@ private:
typedef UniqueVector<VarLoc> VarLocMap;
typedef SparseBitVector<> VarLocSet;
typedef SmallDenseMap<const MachineBasicBlock *, VarLocSet> VarLocInMBB;
+ struct SpillDebugPair {
+ MachineInstr *SpillInst;
+ MachineInstr *DebugInst;
+ };
+ typedef SmallVector<SpillDebugPair, 4> SpillMap;
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
@@ -216,14 +229,21 @@ private:
}
};
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+ int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg);
+
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs);
+ void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, SpillMap &Spills);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
const VarLocMap &VarLocIDs);
bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
bool transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs, SpillMap &Spills,
+ bool transferSpills);
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
@@ -282,6 +302,7 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
+#ifndef NDEBUG
void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
const VarLocInMBB &V,
const VarLocMap &VarLocIDs,
@@ -300,6 +321,22 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
}
Out << "\n";
}
+#endif
+
+/// Given a spill instruction, extract the register and offset used to
+/// address the spill location in a target independent way.
+int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
+ unsigned &Reg) {
+ assert(MI.hasOneMemOperand() &&
+ "Spill instruction does not have exactly one memory operand?");
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ assert(PVal->kind() == PseudoSourceValue::FixedStack &&
+ "Inconsistent memory operand in spill instruction");
+ int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
+ const MachineBasicBlock *MBB = MI.getParent();
+ return TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+}
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
@@ -336,8 +373,12 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
SparseBitVector<> KillSet;
for (const MachineOperand &MO : MI.operands()) {
+ // Determine whether the operand is a register def. Assume that call
+ // instructions never clobber SP, because some backends (e.g., AArch64)
+ // never list SP in the regmask.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
- TRI->isPhysicalRegister(MO.getReg())) {
+ TRI->isPhysicalRegister(MO.getReg()) &&
+ !(MI.isCall() && MO.getReg() == SP)) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
for (unsigned ID : OpenRanges.getVarLocs())
@@ -358,6 +399,91 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
OpenRanges.erase(KillSet, VarLocIDs);
}
+/// Decide if @MI is a spill instruction and return true if it is. We use 2
+/// criteria to make this decision:
+/// - Is this instruction a store to a spill slot?
+/// - Is there a register operand that is both used and killed?
+/// TODO: Store optimization can fold spills into other stores (including
+/// other spills). We do not handle this yet (more than one memory operand).
+bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+ int FI;
+ const MachineMemOperand *MMO;
+
+ // TODO: Handle multiple stores folded into one.
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ // To identify a spill instruction, use the same criteria as in AsmPrinter.
+ if (!((TII->isStoreToStackSlotPostFE(MI, FI) ||
+ TII->hasStoreToStackSlot(MI, MMO, FI)) &&
+ FrameInfo.isSpillSlotObjectIndex(FI)))
+ return false;
+
+ // In a spill instruction generated by the InlineSpiller the spilled register
+ // has its kill flag set. Return false if we don't find such a register.
+ Reg = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isUse() && MO.isKill()) {
+ Reg = MO.getReg();
+ break;
+ }
+ }
+ return Reg != 0;
+}
+
+/// A spilled register may indicate that we have to end the current range of
+/// a variable and create a new one for the spill location.
+/// We don't want to insert any instructions in transfer(), so we just create
+/// the DBG_VALUE witout inserting it and keep track of it in @Spills.
+/// It will be inserted into the BB when we're done iterating over the
+/// instructions.
+void LiveDebugValues::transferSpillInst(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ SpillMap &Spills) {
+ unsigned Reg;
+ MachineFunction *MF = MI.getParent()->getParent();
+ if (!isSpillInstruction(MI, MF, Reg))
+ return;
+
+ // Check if the register is the location of a debug value.
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ if (VarLocIDs[ID].isDescribedByReg() == Reg) {
+ DEBUG(dbgs() << "Spilling Register " << PrintReg(Reg, TRI) << '('
+ << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+
+ // Create a DBG_VALUE instruction to describe the Var in its spilled
+ // location, but don't insert it yet to avoid invalidating the
+ // iterator in our caller.
+ unsigned SpillBase;
+ int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
+ const MachineInstr *DMI = &VarLocIDs[ID].MI;
+ MachineInstr *SpDMI =
+ BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase, 0,
+ DMI->getDebugVariable(), DMI->getDebugExpression());
+ SpDMI->getOperand(1).setImm(SpillOffset);
+ DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
+ SpDMI->print(dbgs(), false, TII));
+
+ // The newly created DBG_VALUE instruction SpDMI must be inserted after
+ // MI. Keep track of the pairing.
+ SpillDebugPair MIP = {&MI, SpDMI};
+ Spills.push_back(MIP);
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(VarLocIDs[ID].Var);
+
+ // Add the VarLoc to OpenRanges.
+ VarLoc VL(*SpDMI, LS);
+ unsigned SpillLocID = VarLocIDs.insert(VL);
+ OpenRanges.insert(SpillLocID, VL.Var);
+ return;
+ }
+ }
+}
+
/// Terminate all open ranges at the end of the current basic block.
bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
OpenRangesSet &OpenRanges,
@@ -383,10 +509,13 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
/// This routine creates OpenRanges and OutLocs.
bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, VarLocMap &VarLocIDs) {
+ VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
+ SpillMap &Spills, bool transferSpills) {
bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs);
+ if (transferSpills)
+ transferSpillInst(MI, OpenRanges, VarLocIDs, Spills);
Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
return Changed;
}
@@ -475,10 +604,11 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool OLChanged = false;
bool MBBJoined = false;
- VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
- VarLocInMBB OutLocs; // Ranges that exist beyond bb.
- VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ SpillMap Spills; // DBG_VALUEs associated with spills.
DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
@@ -490,9 +620,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
Pending;
// Initialize every mbb with OutLocs.
+ // We are not looking at any spill instructions during the initial pass
+ // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
+ // instructions for spills of registers that are known to be user variables
+ // within the BB in which the spill occurs.
for (auto &MBB : MF)
for (auto &MI : MBB)
- transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+ transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
+ /*transferSpills=*/false);
DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "OutLocs after initialization",
dbgs()));
@@ -524,8 +659,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
if (MBBJoined) {
MBBJoined = false;
Changed = true;
+ // Now that we have started to extend ranges across BBs we need to
+ // examine spill instructions to see whether they spill registers that
+ // correspond to user variables.
for (auto &MI : *MBB)
- OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs);
+ OLChanged |= transfer(MI, OpenRanges, OutLocs, VarLocIDs, Spills,
+ /*transferSpills=*/true);
+
+ // Add any DBG_VALUE instructions necessitated by spills.
+ for (auto &SP : Spills)
+ MBB->insertAfter(MachineBasicBlock::iterator(*SP.SpillInst),
+ SP.DebugInst);
+ Spills.clear();
DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
"OutLocs after propagating", dbgs()));
@@ -559,6 +704,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();
+ TFI = MF.getSubtarget().getFrameLowering();
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 0934d8cfeaa1..bcf7c8e99c7f 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -944,7 +944,7 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
IsIndirect, Loc.getReg(), offset, Variable, Expression);
else
BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(Loc)
+ .add(Loc)
.addImm(offset)
.addMetadata(Variable)
.addMetadata(Expression);
@@ -1005,7 +1005,7 @@ bool LiveDebugVariables::doInitialization(Module &M) {
return Pass::doInitialization(M);
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LiveDebugVariables::dump() {
if (pImpl)
static_cast<LDVImpl*>(pImpl)->print(dbgs());
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 623af492fcd4..9ef9f238fdce 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -863,6 +863,37 @@ void LiveInterval::clearSubRanges() {
SubRanges = nullptr;
}
+void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
+ LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
+
+ LaneBitmask ToApply = LaneMask;
+ for (SubRange &SR : subranges()) {
+ LaneBitmask SRMask = SR.LaneMask;
+ LaneBitmask Matching = SRMask & LaneMask;
+ if (Matching.none())
+ continue;
+
+ SubRange *MatchingRange;
+ if (SRMask == Matching) {
+ // The subrange fits (it does not cover bits outside \p LaneMask).
+ MatchingRange = &SR;
+ } else {
+ // We have to split the subrange into a matching and non-matching part.
+ // Reduce lanemask of existing lane to non-matching part.
+ SR.LaneMask = SRMask & ~Matching;
+ // Create a new subrange for the matching part
+ MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
+ }
+ Apply(*MatchingRange);
+ ToApply &= ~Matching;
+ }
+ // Create a new subrange if there are uncovered bits left.
+ if (ToApply.any()) {
+ SubRange *NewRange = createSubRange(Allocator, ToApply);
+ Apply(*NewRange);
+ }
+}
+
unsigned LiveInterval::getSize() const {
unsigned Sum = 0;
for (const Segment &S : segments)
@@ -1032,6 +1063,7 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
// When they exist, Spills.back().start <= LastStart,
// and WriteI[-1].start <= LastStart.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void LiveRangeUpdater::print(raw_ostream &OS) const {
if (!isDirty()) {
if (LR)
@@ -1058,6 +1090,7 @@ void LiveRangeUpdater::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void LiveRangeUpdater::dump() const {
print(errs());
}
+#endif
// Determine if A and B should be coalesced.
static inline bool coalescable(const LiveRange::Segment &A,
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 70d34838b237..3f5b8e19d1f0 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the LiveInterval analysis pass which is used
-// by the Linear Scan Register allocator. This pass linearizes the
-// basic blocks of the function in DFS order and computes live intervals for
-// each virtual and physical register.
+/// \file This file implements the LiveInterval analysis pass which is used
+/// by the Linear Scan Register allocator. This pass linearizes the
+/// basic blocks of the function in DFS order and computes live intervals for
+/// each virtual and physical register.
//
//===----------------------------------------------------------------------===//
@@ -96,16 +96,14 @@ void LiveIntervals::releaseMemory() {
RegMaskBits.clear();
RegMaskBlocks.clear();
- for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
- delete RegUnitRanges[i];
+ for (LiveRange *LR : RegUnitRanges)
+ delete LR;
RegUnitRanges.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
VNInfoAllocator.Reset();
}
-/// runOnMachineFunction - calculates LiveIntervals
-///
bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
MF = &fn;
MRI = &MF->getRegInfo();
@@ -135,14 +133,13 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
return true;
}
-/// print - Implement the dump method.
void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
OS << "********** INTERVALS **********\n";
// Dump the regunits.
- for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i)
- if (LiveRange *LR = RegUnitRanges[i])
- OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n';
+ for (unsigned Unit = 0, UnitE = RegUnitRanges.size(); Unit != UnitE; ++Unit)
+ if (LiveRange *LR = RegUnitRanges[Unit])
+ OS << PrintRegUnit(Unit, TRI) << ' ' << *LR << '\n';
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
@@ -152,8 +149,8 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
}
OS << "RegMasks:";
- for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i)
- OS << ' ' << RegMaskSlots[i];
+ for (SlotIndex Idx : RegMaskSlots)
+ OS << ' ' << Idx;
OS << '\n';
printInstrs(OS);
@@ -165,7 +162,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void LiveIntervals::dumpInstrs() const {
+LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
printInstrs(dbgs());
}
#endif
@@ -177,8 +174,7 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
}
-/// computeVirtRegInterval - Compute the live interval of a virtual register,
-/// based on defs and uses.
+/// Compute the live interval of a virtual register, based on defs and uses.
void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
assert(LRCalc && "LRCalc not initialized.");
assert(LI.empty() && "Should only compute empty intervals.");
@@ -200,7 +196,7 @@ void LiveIntervals::computeRegMasks() {
RegMaskBlocks.resize(MF->getNumBlockIDs());
// Find all instructions with regmask operands.
- for (MachineBasicBlock &MBB : *MF) {
+ for (const MachineBasicBlock &MBB : *MF) {
std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
RMB.first = RegMaskSlots.size();
@@ -210,7 +206,7 @@ void LiveIntervals::computeRegMasks() {
RegMaskBits.push_back(Mask);
}
- for (MachineInstr &MI : MBB) {
+ for (const MachineInstr &MI : MBB) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
@@ -245,9 +241,9 @@ void LiveIntervals::computeRegMasks() {
// interference.
//
-/// computeRegUnitInterval - Compute the live range of a register unit, based
-/// on the uses and defs of aliasing registers. The range should be empty,
-/// or contain only dead phi-defs from ABI blocks.
+/// Compute the live range of a register unit, based on the uses and defs of
+/// aliasing registers. The range should be empty, or contain only dead
+/// phi-defs from ABI blocks.
void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
@@ -257,22 +253,30 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
// may share super-registers. That's OK because createDeadDefs() is
// idempotent. It is very rare for a register unit to have multiple roots, so
// uniquing super-registers is probably not worthwhile.
- for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
- for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
- Supers.isValid(); ++Supers) {
- if (!MRI->reg_empty(*Supers))
- LRCalc->createDeadDefs(LR, *Supers);
+ bool IsReserved = true;
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
+ Super.isValid(); ++Super) {
+ unsigned Reg = *Super;
+ if (!MRI->reg_empty(Reg))
+ LRCalc->createDeadDefs(LR, Reg);
+ // A register unit is considered reserved if all its roots and all their
+ // super registers are reserved.
+ if (!MRI->isReserved(Reg))
+ IsReserved = false;
}
}
// Now extend LR to reach all uses.
// Ignore uses of reserved registers. We only track defs of those.
- for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) {
- for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true);
- Supers.isValid(); ++Supers) {
- unsigned Reg = *Supers;
- if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg))
- LRCalc->extendToUses(LR, Reg);
+ if (!IsReserved) {
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
+ for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
+ Super.isValid(); ++Super) {
+ unsigned Reg = *Super;
+ if (!MRI->reg_empty(Reg))
+ LRCalc->extendToUses(LR, Reg);
+ }
}
}
@@ -281,11 +285,9 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
LR.flushSegmentSet();
}
-
-/// computeLiveInRegUnits - Precompute the live ranges of any register units
-/// that are live-in to an ABI block somewhere. Register values can appear
-/// without a corresponding def when entering the entry block or a landing pad.
-///
+/// Precompute the live ranges of any register units that are live-in to an ABI
+/// block somewhere. Register values can appear without a corresponding def when
+/// entering the entry block or a landing pad.
void LiveIntervals::computeLiveInRegUnits() {
RegUnitRanges.resize(TRI->getNumRegUnits());
DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n");
@@ -294,18 +296,15 @@ void LiveIntervals::computeLiveInRegUnits() {
SmallVector<unsigned, 8> NewRanges;
// Check all basic blocks for live-ins.
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- const MachineBasicBlock *MBB = &*MFI;
-
+ for (const MachineBasicBlock &MBB : *MF) {
// We only care about ABI blocks: Entry + landing pads.
- if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty())
+ if ((&MBB != &MF->front() && !MBB.isEHPad()) || MBB.livein_empty())
continue;
// Create phi-defs at Begin for all live-in registers.
- SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
- DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
- for (const auto &LI : MBB->liveins()) {
+ SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << Begin << "\tBB#" << MBB.getNumber());
+ for (const auto &LI : MBB.liveins()) {
for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
unsigned Unit = *Units;
LiveRange *LR = RegUnitRanges[Unit];
@@ -324,16 +323,13 @@ void LiveIntervals::computeLiveInRegUnits() {
DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n");
// Compute the 'normal' part of the ranges.
- for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) {
- unsigned Unit = NewRanges[i];
+ for (unsigned Unit : NewRanges)
computeRegUnitRange(*RegUnitRanges[Unit], Unit);
- }
}
-
static void createSegmentsForValues(LiveRange &LR,
- iterator_range<LiveInterval::vni_iterator> VNIs) {
- for (auto VNI : VNIs) {
+ iterator_range<LiveInterval::vni_iterator> VNIs) {
+ for (VNInfo *VNI : VNIs) {
if (VNI->isUnused())
continue;
SlotIndex Def = VNI->def;
@@ -349,7 +345,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
// Keep track of the PHIs that are in use.
SmallPtrSet<VNInfo*, 8> UsedPHIs;
// Blocks that have already been added to WorkList as live-out.
- SmallPtrSet<MachineBasicBlock*, 16> LiveOut;
+ SmallPtrSet<const MachineBasicBlock*, 16> LiveOut;
// Extend intervals to reach all uses in WorkList.
while (!WorkList.empty()) {
@@ -368,7 +364,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
!UsedPHIs.insert(VNI).second)
continue;
// The PHI is live, make sure the predecessors are live-out.
- for (auto &Pred : MBB->predecessors()) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
@@ -384,7 +380,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI));
// Make sure VNI is live-out from the predecessors.
- for (auto &Pred : MBB->predecessors()) {
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
if (!LiveOut.insert(Pred).second)
continue;
SlotIndex Stop = Indexes.getMBBEndIdx(Pred);
@@ -415,22 +411,20 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
ShrinkToUsesWorkList WorkList;
// Visit all instructions reading li->reg.
- for (MachineRegisterInfo::reg_instr_iterator
- I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end();
- I != E; ) {
- MachineInstr *UseMI = &*(I++);
- if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ unsigned Reg = li->reg;
+ for (MachineInstr &UseMI : MRI->reg_instructions(Reg)) {
+ if (UseMI.isDebugValue() || !UseMI.readsVirtualRegister(Reg))
continue;
- SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
+ SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
LiveQueryResult LRQ = li->Query(Idx);
VNInfo *VNI = LRQ.valueIn();
if (!VNI) {
// This shouldn't happen: readsVirtualRegister returns true, but there is
// no live value. It is likely caused by a target getting <undef> flags
// wrong.
- DEBUG(dbgs() << Idx << '\t' << *UseMI
+ DEBUG(dbgs() << Idx << '\t' << UseMI
<< "Warning: Instr claims to read non-existent value in "
- << *li << '\n');
+ << *li << '\n');
continue;
}
// Special case: An early-clobber tied operand reads and writes the
@@ -458,7 +452,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
bool LiveIntervals::computeDeadValues(LiveInterval &LI,
SmallVectorImpl<MachineInstr*> *dead) {
bool MayHaveSplitComponents = false;
- for (auto VNI : LI.valnos) {
+ for (VNInfo *VNI : LI.valnos) {
if (VNI->isUnused())
continue;
SlotIndex Def = VNI->def;
@@ -548,7 +542,7 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
SR.segments.swap(NewLR.segments);
// Remove dead PHI value numbers
- for (auto VNI : SR.valnos) {
+ for (VNInfo *VNI : SR.valnos) {
if (VNI->isUnused())
continue;
const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def);
@@ -571,8 +565,8 @@ void LiveIntervals::extendToIndices(LiveRange &LR,
ArrayRef<SlotIndex> Undefs) {
assert(LRCalc && "LRCalc not initialized.");
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
- for (unsigned i = 0, e = Indices.size(); i != e; ++i)
- LRCalc->extend(LR, Indices[i], /*PhysReg=*/0, Undefs);
+ for (SlotIndex Idx : Indices)
+ LRCalc->extend(LR, Idx, /*PhysReg=*/0, Undefs);
}
void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
@@ -601,11 +595,9 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
// from each successor.
typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy;
VisitedTy Visited;
- for (MachineBasicBlock::succ_iterator
- SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
- SuccI != SuccE; ++SuccI) {
+ for (MachineBasicBlock *Succ : KillMBB->successors()) {
for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
- I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+ I = df_ext_begin(Succ, Visited), E = df_ext_end(Succ, Visited);
I != E;) {
MachineBasicBlock *MBB = *I;
@@ -657,9 +649,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Find the regunit intervals for the assigned register. They may overlap
// the virtual register live range, cancelling any kills.
RU.clear();
- for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid();
- ++Units) {
- const LiveRange &RURange = getRegUnit(*Units);
+ for (MCRegUnitIterator Unit(VRM->getPhys(Reg), TRI); Unit.isValid();
+ ++Unit) {
+ const LiveRange &RURange = getRegUnit(*Unit);
if (RURange.empty())
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
@@ -802,9 +794,8 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const {
// Conservatively return true instead of scanning huge predecessor lists.
if (PHIMBB->pred_size() > 100)
return true;
- for (MachineBasicBlock::const_pred_iterator
- PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI)
- if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI)))
+ for (const MachineBasicBlock *Pred : PHIMBB->predecessors())
+ if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(Pred)))
return true;
}
return false;
@@ -895,7 +886,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
// IntervalUpdate class.
//===----------------------------------------------------------------------===//
-// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+/// Toolkit used by handleMove to trim or extend live intervals.
class LiveIntervals::HMEditor {
private:
LiveIntervals& LIS;
@@ -1241,10 +1232,12 @@ private:
LiveRange::iterator NewIdxIn = NewIdxOut;
assert(NewIdxIn == LR.find(NewIdx.getBaseIndex()));
const SlotIndex SplitPos = NewIdxDef;
+ OldIdxVNI = OldIdxIn->valno;
// Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
+ OldIdxOut->valno->def = OldIdxIn->start;
*OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
- OldIdxIn->valno);
+ OldIdxOut->valno);
// OldIdxIn and OldIdxVNI are now undef and can be overridden.
// We Slide [NewIdxIn, OldIdxIn) down one position.
// |- X0/NewIdxIn -| ... |- Xn-1 -||- Xn/OldIdxIn -||- OldIdxOut -|
@@ -1514,8 +1507,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
}
- for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) {
- unsigned Reg = OrigRegs[i];
+ for (unsigned Reg : OrigRegs) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
@@ -1524,16 +1516,16 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
if (!LI.hasAtLeastOneValue())
continue;
- for (LiveInterval::SubRange &S : LI.subranges()) {
+ for (LiveInterval::SubRange &S : LI.subranges())
repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask);
- }
+
repairOldRegInRange(Begin, End, endIdx, LI, Reg);
}
}
void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- if (LiveRange *LR = getCachedRegUnit(*Units))
+ for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
+ if (LiveRange *LR = getCachedRegUnit(*Unit))
if (VNInfo *VNI = LR->getVNInfoAt(Pos))
LR->removeValNo(VNI);
}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index fc2f233f6d68..b4aa0dc326a5 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -1,4 +1,4 @@
-//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//===- LiveIntervalUnion.cpp - Live interval union data structure ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,19 +13,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/LiveIntervalUnion.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include <algorithm>
+#include <cassert>
+#include <cstdlib>
using namespace llvm;
#define DEBUG_TYPE "regalloc"
-
// Merge a LiveInterval's segments. Guarantee no overlaps.
void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) {
if (Range.empty())
@@ -64,7 +64,7 @@ void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) {
LiveRange::const_iterator RegEnd = Range.end();
SegmentIter SegPos = Segments.find(RegPos->start);
- for (;;) {
+ while (true) {
assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
SegPos.erase();
if (!SegPos.valid())
@@ -126,25 +126,24 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
CheckedFirstInterference = true;
// Quickly skip interference check for empty sets.
- if (VirtReg->empty() || LiveUnion->empty()) {
+ if (LR->empty() || LiveUnion->empty()) {
SeenAllInterferences = true;
return 0;
}
- // In most cases, the union will start before VirtReg.
- VirtRegI = VirtReg->begin();
+ // In most cases, the union will start before LR.
+ LRI = LR->begin();
LiveUnionI.setMap(LiveUnion->getMap());
- LiveUnionI.find(VirtRegI->start);
+ LiveUnionI.find(LRI->start);
}
- LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveRange::const_iterator LREnd = LR->end();
LiveInterval *RecentReg = nullptr;
while (LiveUnionI.valid()) {
- assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
+ assert(LRI != LREnd && "Reached end of LR");
// Check for overlapping interference.
- while (VirtRegI->start < LiveUnionI.stop() &&
- VirtRegI->end > LiveUnionI.start()) {
+ while (LRI->start < LiveUnionI.stop() && LRI->end > LiveUnionI.start()) {
// This is an overlap, record the interfering register.
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
@@ -161,20 +160,20 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
}
// The iterators are now not overlapping, LiveUnionI has been advanced
- // beyond VirtRegI.
- assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap");
+ // beyond LRI.
+ assert(LRI->end <= LiveUnionI.start() && "Expected non-overlap");
// Advance the iterator that ends first.
- VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start());
- if (VirtRegI == VirtRegEnd)
+ LRI = LR->advanceTo(LRI, LiveUnionI.start());
+ if (LRI == LREnd)
break;
// Detect overlap, handle above.
- if (VirtRegI->start < LiveUnionI.stop())
+ if (LRI->start < LiveUnionI.stop())
continue;
// Still not overlapping. Catch up LiveUnionI.
- LiveUnionI.advanceTo(VirtRegI->start);
+ LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
return InterferingVRegs.size();
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index dcc41c1718a6..9f7d7cf54848 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -120,12 +120,11 @@ void LivePhysRegs::print(raw_ostream &OS) const {
OS << "\n";
}
-/// Dumps the currently live registers to the debug output.
-LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LivePhysRegs::dump() const {
dbgs() << " " << *this;
-#endif
}
+#endif
bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
unsigned Reg) const {
@@ -161,7 +160,9 @@ void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
const MachineFrameInfo &MFI,
const TargetRegisterInfo &TRI) {
- for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
LiveRegs.addReg(*CSR);
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveRegs.removeReg(Info.getReg());
@@ -180,7 +181,8 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
if (MBB.isReturnBlock()) {
// The return block has no successors whose live-ins we could merge
// below. So instead we add the callee saved registers manually.
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I)
addReg(*I);
} else {
addPristines(*this, MF, MFI, *TRI);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 012837608628..398066bf8903 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -75,34 +75,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
LI.createSubRangeFrom(*Alloc, ClassMask, LI);
}
- LaneBitmask Mask = SubMask;
- for (LiveInterval::SubRange &S : LI.subranges()) {
- // A Mask for subregs common to the existing subrange and current def.
- LaneBitmask Common = S.LaneMask & Mask;
- if (Common.none())
- continue;
- LiveInterval::SubRange *CommonRange;
- // A Mask for subregs covered by the subrange but not the current def.
- LaneBitmask RM = S.LaneMask & ~Mask;
- if (RM.any()) {
- // Split the subrange S into two parts: one covered by the current
- // def (CommonRange), and the one not affected by it (updated S).
- S.LaneMask = RM;
- CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
- } else {
- assert(Common == S.LaneMask);
- CommonRange = &S;
- }
+ LI.refineSubRanges(*Alloc, SubMask,
+ [&MO, this](LiveInterval::SubRange &SR) {
if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, *CommonRange, MO);
- Mask &= ~Common;
- }
- // Create a new SubRange for subregs we did not cover yet.
- if (Mask.any()) {
- LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
- if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, *NewRange, MO);
- }
+ createDeadDef(*Indexes, *Alloc, SR, MO);
+ });
}
// Create the def in the main liverange. We do not have to do this if
@@ -289,8 +266,7 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
if (UndefOnEntry[BN])
return false;
- auto MarkDefined =
- [this,BN,&DefOnEntry,&UndefOnEntry] (MachineBasicBlock &B) -> bool {
+ auto MarkDefined = [BN, &DefOnEntry](MachineBasicBlock &B) -> bool {
for (MachineBasicBlock *S : B.successors())
DefOnEntry[S->getNumber()] = true;
DefOnEntry[BN] = true;
@@ -311,7 +287,12 @@ bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
return MarkDefined(B);
SlotIndex Begin, End;
std::tie(Begin, End) = Indexes->getMBBRange(&B);
- LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End);
+ // Treat End as not belonging to B.
+ // If LR has a segment S that starts at the next block, i.e. [End, ...),
+ // std::upper_bound will return the segment following S. Instead,
+ // S should be treated as the first segment that does not overlap B.
+ LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(),
+ End.getPrevSlot());
if (UB != LR.begin()) {
LiveRange::Segment &Seg = *std::prev(UB);
if (Seg.end > Begin) {
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 7f1c69c0b4a2..92cca1a54951 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -37,6 +37,8 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
LiveInterval &LI = LIS.createEmptyInterval(VReg);
+ if (Parent && !Parent->isSpillable())
+ LI.markNotSpillable();
// Create empty subranges if the OldReg's interval has them. Do not create
// the main range here---it will be constructed later after the subranges
// have been finalized.
@@ -52,6 +54,14 @@ unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
if (VRM) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
+ // FIXME: Getting the interval here actually computes it.
+ // In theory, this may not be what we want, but in practice
+ // the createEmptyIntervalFrom API is used when this is not
+ // the case. Generally speaking we just want to annotate the
+ // LiveInterval when it gets created but we cannot do that at
+ // the moment.
+ if (Parent && !Parent->isSpillable())
+ LIS.getInterval(VReg).markNotSpillable();
return VReg;
}
@@ -442,9 +452,6 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
if (VRM)
VRM->grow();
- if (Parent && !Parent->isSpillable())
- LIS.getInterval(VReg).markNotSpillable();
-
NewRegs.push_back(VReg);
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 7a51386aa9ca..882de1a3fad9 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -1,4 +1,4 @@
-//===-- LiveRegMatrix.cpp - Track register interference -------------------===//
+//===- LiveRegMatrix.cpp - Track register interference --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,15 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/LiveRegMatrix.h"
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
using namespace llvm;
@@ -36,8 +43,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix",
"Live Register Matrix", false, false)
-LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID),
- UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {}
+LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID) {}
void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -169,10 +175,10 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg,
return Result;
}
-LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg,
+LiveIntervalUnion::Query &LiveRegMatrix::query(const LiveRange &LR,
unsigned RegUnit) {
LiveIntervalUnion::Query &Q = Queries[RegUnit];
- Q.init(UserTag, &VirtReg, &Matrix[RegUnit]);
+ Q.init(UserTag, LR, Matrix[RegUnit]);
return Q;
}
@@ -190,9 +196,12 @@ LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) {
return IK_RegUnit;
// Check the matrix for virtual register interference.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- if (query(VirtReg, *Units).checkInterference())
- return IK_VirtReg;
+ bool Interference = foreachUnit(TRI, VirtReg, PhysReg,
+ [&](unsigned Unit, const LiveRange &LR) {
+ return query(LR, Unit).checkInterference();
+ });
+ if (Interference)
+ return IK_VirtReg;
return IK_Free;
}
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
new file mode 100644
index 000000000000..dff555f49565
--- /dev/null
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -0,0 +1,126 @@
+//===- LiveRegUnits.cpp - Register Unit Set -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file imlements the LiveRegUnits set.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveRegUnits.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+void LiveRegUnits::removeRegsNotPreserved(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ Units.reset(U);
+ }
+ }
+}
+
+void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
+ for (unsigned U = 0, E = TRI->getNumRegUnits(); U != E; ++U) {
+ for (MCRegUnitRootIterator RootReg(U, TRI); RootReg.isValid(); ++RootReg) {
+ if (MachineOperand::clobbersPhysReg(RegMask, *RootReg))
+ Units.set(U);
+ }
+ }
+}
+
+void LiveRegUnits::stepBackward(const MachineInstr &MI) {
+ // Remove defined registers and regmask kills from the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ if (!O->isDef())
+ continue;
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ removeReg(Reg);
+ } else if (O->isRegMask())
+ removeRegsNotPreserved(O->getRegMask());
+ }
+
+ // Add uses to the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (!O->isReg() || !O->readsReg())
+ continue;
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ addReg(Reg);
+ }
+}
+
+void LiveRegUnits::accumulateBackward(const MachineInstr &MI) {
+ // Add defs, uses and regmask clobbers to the set.
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ if (O->isReg()) {
+ unsigned Reg = O->getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (!O->isDef() && !O->readsReg())
+ continue;
+ addReg(Reg);
+ } else if (O->isRegMask())
+ addRegsInMask(O->getRegMask());
+ }
+}
+
+/// Add live-in registers of basic block \p MBB to \p LiveUnits.
+static void addLiveIns(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
+ for (const auto &LI : MBB.liveins())
+ LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask);
+}
+
+static void addLiveOuts(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addLiveIns(LiveUnits, *Succ);
+}
+
+/// Add pristine registers to the given \p LiveUnits. This function removes
+/// actually saved callee save registers when \p InPrologueEpilogue is false.
+static void removeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF,
+ const MachineFrameInfo &MFI,
+ const TargetRegisterInfo &TRI) {
+ for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+ LiveUnits.removeReg(Info.getReg());
+}
+
+void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ addReg(*I);
+ if (!MBB.isReturnBlock())
+ removeSavedRegs(*this, MF, MFI, *TRI);
+ }
+ ::addLiveOuts(*this, MBB);
+}
+
+void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid()) {
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+ addReg(*I);
+ if (&MBB != &MF.front())
+ removeSavedRegs(*this, MF, MFI, *TRI);
+ }
+ ::addLiveIns(*this, MBB);
+}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 269b990a3149..3568b0294ad9 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -64,8 +64,8 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
return nullptr;
}
-LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
dbgs() << " Alive in blocks: ";
for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
E = AliveBlocks.end(); I != E; ++I)
@@ -78,8 +78,8 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
dbgs() << "\n #" << i << ": " << *Kills[i];
dbgs() << "\n";
}
-#endif
}
+#endif
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
diff --git a/lib/CodeGen/LowLevelType.cpp b/lib/CodeGen/LowLevelType.cpp
index d74b7306e0f4..c4b9068fa905 100644
--- a/lib/CodeGen/LowLevelType.cpp
+++ b/lib/CodeGen/LowLevelType.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/GlobalISel/LowLevelType.cpp --------------------------===//
+//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,54 +18,21 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-LLT::LLT(Type &Ty, const DataLayout &DL) {
+LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
if (auto VTy = dyn_cast<VectorType>(&Ty)) {
- SizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
- ElementsOrAddrSpace = VTy->getNumElements();
- Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+ auto NumElements = VTy->getNumElements();
+ auto ScalarSizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
+ if (NumElements == 1)
+ return LLT::scalar(ScalarSizeInBits);
+ return LLT::vector(NumElements, ScalarSizeInBits);
} else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
- Kind = Pointer;
- SizeInBits = DL.getTypeSizeInBits(&Ty);
- ElementsOrAddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
} else if (Ty.isSized()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
- Kind = Scalar;
- SizeInBits = DL.getTypeSizeInBits(&Ty);
- ElementsOrAddrSpace = 1;
+ auto SizeInBits = DL.getTypeSizeInBits(&Ty);
assert(SizeInBits != 0 && "invalid zero-sized type");
- } else {
- Kind = Invalid;
- SizeInBits = ElementsOrAddrSpace = 0;
+ return LLT::scalar(SizeInBits);
}
-}
-
-LLT::LLT(MVT VT) {
- if (VT.isVector()) {
- SizeInBits = VT.getVectorElementType().getSizeInBits();
- ElementsOrAddrSpace = VT.getVectorNumElements();
- Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
- } else if (VT.isValid()) {
- // Aggregates are no different from real scalars as far as GlobalISel is
- // concerned.
- Kind = Scalar;
- SizeInBits = VT.getSizeInBits();
- ElementsOrAddrSpace = 1;
- assert(SizeInBits != 0 && "invalid zero-sized type");
- } else {
- Kind = Invalid;
- SizeInBits = ElementsOrAddrSpace = 0;
- }
-}
-
-void LLT::print(raw_ostream &OS) const {
- if (isVector())
- OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
- else if (isPointer())
- OS << "p" << getAddressSpace();
- else if (isValid()) {
- assert(isScalar() && "unexpected type");
- OS << "s" << getScalarSizeInBits();
- } else
- llvm_unreachable("trying to print an invalid type");
+ return LLT();
}
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c8bed0890dd6..cac22af32956 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -41,8 +41,11 @@
using namespace llvm;
PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
- SourceMgr &SM, const SlotMapping &IRSlots)
- : MF(MF), SM(&SM), IRSlots(IRSlots) {
+ SourceMgr &SM, const SlotMapping &IRSlots,
+ const Name2RegClassMap &Names2RegClasses,
+ const Name2RegBankMap &Names2RegBanks)
+ : MF(MF), SM(&SM), IRSlots(IRSlots), Names2RegClasses(Names2RegClasses),
+ Names2RegBanks(Names2RegBanks) {
}
VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
@@ -139,6 +142,7 @@ public:
bool parseVirtualRegister(VRegInfo *&Info);
bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
bool parseRegisterFlag(unsigned &Flags);
+ bool parseRegisterClassOrBank(VRegInfo &RegInfo);
bool parseSubRegisterIndex(unsigned &SubReg);
bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
bool parseRegisterOperand(MachineOperand &Dest,
@@ -172,6 +176,7 @@ public:
bool parseIntrinsicOperand(MachineOperand &Dest);
bool parsePredicateOperand(MachineOperand &Dest);
bool parseTargetIndexOperand(MachineOperand &Dest);
+ bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
bool parseMachineOperand(MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
@@ -184,6 +189,7 @@ public:
bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags);
bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+ bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
private:
@@ -878,6 +884,66 @@ bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
}
}
+bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
+ if (Token.isNot(MIToken::Identifier) && Token.isNot(MIToken::underscore))
+ return error("expected '_', register class, or register bank name");
+ StringRef::iterator Loc = Token.location();
+ StringRef Name = Token.stringValue();
+
+ // Was it a register class?
+ auto RCNameI = PFS.Names2RegClasses.find(Name);
+ if (RCNameI != PFS.Names2RegClasses.end()) {
+ lex();
+ const TargetRegisterClass &RC = *RCNameI->getValue();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::NORMAL:
+ RegInfo.Kind = VRegInfo::NORMAL;
+ if (RegInfo.Explicit && RegInfo.D.RC != &RC) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ return error(Loc, Twine("conflicting register classes, previously: ") +
+ Twine(TRI.getRegClassName(RegInfo.D.RC)));
+ }
+ RegInfo.D.RC = &RC;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ return error(Loc, "register class specification on generic register");
+ }
+ llvm_unreachable("Unexpected register kind");
+ }
+
+ // Should be a register bank or a generic register.
+ const RegisterBank *RegBank = nullptr;
+ if (Name != "_") {
+ auto RBNameI = PFS.Names2RegBanks.find(Name);
+ if (RBNameI == PFS.Names2RegBanks.end())
+ return error(Loc, "expected '_', register class, or register bank name");
+ RegBank = RBNameI->getValue();
+ }
+
+ lex();
+
+ switch (RegInfo.Kind) {
+ case VRegInfo::UNKNOWN:
+ case VRegInfo::GENERIC:
+ case VRegInfo::REGBANK:
+ RegInfo.Kind = RegBank ? VRegInfo::REGBANK : VRegInfo::GENERIC;
+ if (RegInfo.Explicit && RegInfo.D.RegBank != RegBank)
+ return error(Loc, "conflicting generic register banks");
+ RegInfo.D.RegBank = RegBank;
+ RegInfo.Explicit = true;
+ return false;
+
+ case VRegInfo::NORMAL:
+ return error(Loc, "register bank specification on normal register");
+ }
+ llvm_unreachable("Unexpected register kind");
+}
+
bool MIParser::parseRegisterFlag(unsigned &Flags) {
const unsigned OldFlags = Flags;
switch (Token.kind()) {
@@ -1004,6 +1070,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (!TargetRegisterInfo::isVirtualRegister(Reg))
return error("subregister index expects a virtual register");
}
+ if (Token.is(MIToken::colon)) {
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ return error("register class specification expects a virtual register");
+ lex();
+ if (parseRegisterClassOrBank(*RegInfo))
+ return true;
+ }
MachineRegisterInfo &MRI = MF.getRegInfo();
if ((Flags & RegState::Define) == 0) {
if (consumeIfPresent(MIToken::lparen)) {
@@ -1598,6 +1671,35 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseCustomRegisterMaskOperand(MachineOperand &Dest) {
+ assert(Token.stringValue() == "CustomRegMask" && "Expected a custom RegMask");
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return true;
+
+ uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+ while (true) {
+ if (Token.isNot(MIToken::NamedRegister))
+ return error("expected a named register");
+ unsigned Reg;
+ if (parseNamedRegister(Reg))
+ return true;
+ lex();
+ Mask[Reg / 32] |= 1U << (Reg % 32);
+ // TODO: Report an error if the same register is used more than once.
+ if (Token.isNot(MIToken::comma))
+ break;
+ lex();
+ }
+
+ if (expectAndConsume(MIToken::rparen))
+ return true;
+ Dest = MachineOperand::CreateRegMask(Mask);
+ return false;
+}
+
bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_liveout));
const auto *TRI = MF.getSubtarget().getRegisterInfo();
@@ -1695,8 +1797,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
Dest = MachineOperand::CreateRegMask(RegMask);
lex();
break;
- }
- LLVM_FALLTHROUGH;
+ } else
+ return parseCustomRegisterMaskOperand(Dest);
default:
// FIXME: Parse the MCSymbol machine operand.
return error("expected a machine operand");
@@ -1969,6 +2071,28 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
return false;
}
+bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) {
+ Order = AtomicOrdering::NotAtomic;
+ if (Token.isNot(MIToken::Identifier))
+ return false;
+
+ Order = StringSwitch<AtomicOrdering>(Token.stringValue())
+ .Case("unordered", AtomicOrdering::Unordered)
+ .Case("monotonic", AtomicOrdering::Monotonic)
+ .Case("acquire", AtomicOrdering::Acquire)
+ .Case("release", AtomicOrdering::Release)
+ .Case("acq_rel", AtomicOrdering::AcquireRelease)
+ .Case("seq_cst", AtomicOrdering::SequentiallyConsistent)
+ .Default(AtomicOrdering::NotAtomic);
+
+ if (Order != AtomicOrdering::NotAtomic) {
+ lex();
+ return false;
+ }
+
+ return error("expected an atomic scope, ordering or a size integer literal");
+}
+
bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (expectAndConsume(MIToken::lparen))
return true;
@@ -1986,6 +2110,21 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
Flags |= MachineMemOperand::MOStore;
lex();
+ // Optional "singlethread" scope.
+ SynchronizationScope Scope = SynchronizationScope::CrossThread;
+ if (Token.is(MIToken::Identifier) && Token.stringValue() == "singlethread") {
+ Scope = SynchronizationScope::SingleThread;
+ lex();
+ }
+
+ // Up to two atomic orderings (cmpxchg provides guarantees on failure).
+ AtomicOrdering Order, FailureOrder;
+ if (parseOptionalAtomicOrdering(Order))
+ return true;
+
+ if (parseOptionalAtomicOrdering(FailureOrder))
+ return true;
+
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected the size integer literal after memory operation");
uint64_t Size;
@@ -2040,8 +2179,8 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
}
if (expectAndConsume(MIToken::rparen))
return true;
- Dest =
- MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range);
+ Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range,
+ Scope, Order, FailureOrder);
return false;
}
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
index 93a4d84ba62f..9b3879cf8377 100644
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ b/lib/CodeGen/MIRParser/MIParser.h
@@ -45,11 +45,16 @@ struct VRegInfo {
unsigned PreferredReg = 0;
};
+typedef StringMap<const TargetRegisterClass*> Name2RegClassMap;
+typedef StringMap<const RegisterBank*> Name2RegBankMap;
+
struct PerFunctionMIParsingState {
BumpPtrAllocator Allocator;
MachineFunction &MF;
SourceMgr *SM;
const SlotMapping &IRSlots;
+ const Name2RegClassMap &Names2RegClasses;
+ const Name2RegBankMap &Names2RegBanks;
DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
DenseMap<unsigned, VRegInfo*> VRegInfos;
@@ -59,7 +64,9 @@ struct PerFunctionMIParsingState {
DenseMap<unsigned, unsigned> JumpTableSlots;
PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
- const SlotMapping &IRSlots);
+ const SlotMapping &IRSlots,
+ const Name2RegClassMap &Names2RegClasses,
+ const Name2RegBankMap &Names2RegBanks);
VRegInfo &getVRegInfo(unsigned VReg);
};
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 3dff1147631b..a2773cccc5db 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -55,9 +55,9 @@ class MIRParserImpl {
StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
SlotMapping IRSlots;
/// Maps from register class names to register classes.
- StringMap<const TargetRegisterClass *> Names2RegClasses;
+ Name2RegClassMap Names2RegClasses;
/// Maps from register bank names to register banks.
- StringMap<const RegisterBank *> Names2RegBanks;
+ Name2RegBankMap Names2RegBanks;
public:
MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
@@ -325,11 +325,15 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
return error(Twine("no machine function information for function '") +
MF.getName() + "' in the MIR file");
// TODO: Recreate the machine function.
+ initNames2RegClasses(MF);
+ initNames2RegBanks(MF);
const yaml::MachineFunction &YamlMF = *It->getValue();
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
+ if (YamlMF.NoVRegs)
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
if (YamlMF.RegBankSelected)
@@ -338,7 +342,8 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
if (YamlMF.Selected)
MF.getProperties().set(MachineFunctionProperties::Property::Selected);
- PerFunctionMIParsingState PFS(MF, SM, IRSlots);
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,
+ Names2RegBanks);
if (parseRegisterInfo(PFS, YamlMF))
return true;
if (!YamlMF.Constants.empty()) {
@@ -362,9 +367,6 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
}
PFS.SM = &SM;
- if (MF.empty())
- return error(Twine("machine function '") + Twine(MF.getName()) +
- "' requires at least one machine basic block in its body");
// Initialize the frame information after creating all the MBBs so that the
// MBB references in the frame information can be resolved.
if (initializeFrameInfo(PFS, YamlMF))
@@ -462,17 +464,19 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
RegInfo.addLiveIn(Reg, VReg);
}
- // Parse the callee saved register mask.
- BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size());
- if (!YamlMF.CalleeSavedRegisters)
- return false;
- for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
- unsigned Reg = 0;
- if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
- return error(Error, RegSource.SourceRange);
- CalleeSavedRegisterMask[Reg] = true;
+ // Parse the callee saved registers (Registers that will
+ // be saved for the caller).
+ if (YamlMF.CalleeSavedRegisters) {
+ SmallVector<MCPhysReg, 16> CalleeSavedRegisters;
+ for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(PFS, Reg, RegSource.Value, Error))
+ return error(Error, RegSource.SourceRange);
+ CalleeSavedRegisters.push_back(Reg);
+ }
+ RegInfo.setCalleeSavedRegs(CalleeSavedRegisters);
}
- RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip());
+
return false;
}
@@ -505,14 +509,12 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
}
// Compute MachineRegisterInfo::UsedPhysRegMask
- if (!YamlMF.CalleeSavedRegisters) {
- for (const MachineBasicBlock &MBB : MF) {
- for (const MachineInstr &MI : MBB) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
- }
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
+ continue;
+ MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
}
}
}
@@ -818,7 +820,6 @@ void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
StringRef Name) {
- initNames2RegClasses(MF);
auto RegClassInfo = Names2RegClasses.find(Name);
if (RegClassInfo == Names2RegClasses.end())
return nullptr;
@@ -827,7 +828,6 @@ const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
StringRef Name) {
- initNames2RegBanks(MF);
auto RegBankInfo = Names2RegBanks.find(Name);
if (RegBankInfo == Names2RegBanks.end())
return nullptr;
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index db87092177ca..6da174a53666 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -175,6 +175,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.Alignment = MF.getAlignment();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
+ YamlMF.NoVRegs = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoVRegs);
YamlMF.Legalized = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::Legalized);
YamlMF.RegBankSelected = MF.getProperties().hasProperty(
@@ -205,6 +207,25 @@ void MIRPrinter::print(const MachineFunction &MF) {
Out << YamlMF;
}
+static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ assert(RegMask && "Can't print an empty register mask");
+ OS << StringRef("CustomRegMask(");
+
+ bool IsRegInRegMaskFound = false;
+ for (int I = 0, E = TRI->getNumRegs(); I < E; I++) {
+ // Check whether the register is asserted in regmask.
+ if (RegMask[I / 32] & (1u << (I % 32))) {
+ if (IsRegInRegMaskFound)
+ OS << ',';
+ printReg(I, OS, TRI);
+ IsRegInRegMaskFound = true;
+ }
+ }
+
+ OS << ')';
+}
+
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineRegisterInfo &RegInfo,
const TargetRegisterInfo *TRI) {
@@ -239,20 +260,18 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
printReg(I->second, LiveIn.VirtualRegister, TRI);
MF.LiveIns.push_back(LiveIn);
}
- // The used physical register mask is printed as an inverted callee saved
- // register mask.
- const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask();
- if (UsedPhysRegMask.none())
- return;
- std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
- for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) {
- if (!UsedPhysRegMask[I]) {
+
+ // Prints the callee saved registers.
+ if (RegInfo.isUpdatedCSRsInitialized()) {
+ const MCPhysReg *CalleeSavedRegs = RegInfo.getCalleeSavedRegs();
+ std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+ for (const MCPhysReg *I = CalleeSavedRegs; *I; ++I) {
yaml::FlowStringValue Reg;
- printReg(I, Reg, TRI);
+ printReg(*I, Reg, TRI);
CalleeSavedRegisters.push_back(Reg);
}
+ MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
- MF.CalleeSavedRegisters = CalleeSavedRegisters;
}
void MIRPrinter::convert(ModuleSlotTracker &MST,
@@ -860,7 +879,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
if (RegMaskInfo != RegisterMaskIds.end())
OS << StringRef(TRI->getRegMaskNames()[RegMaskInfo->second]).lower();
else
- llvm_unreachable("Can't print this machine register mask yet.");
+ printCustomRegMask(Op.getRegMask(), OS, TRI);
break;
}
case MachineOperand::MO_RegisterLiveOut: {
@@ -906,6 +925,9 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
<< CmpInst::getPredicateName(Pred) << ')';
break;
}
+ case MachineOperand::MO_Placeholder:
+ OS << "<placeholder>";
+ break;
}
}
@@ -926,6 +948,15 @@ void MIPrinter::print(const MachineMemOperand &Op) {
assert(Op.isStore() && "Non load machine operand must be a store");
OS << "store ";
}
+
+ if (Op.getSynchScope() == SynchronizationScope::SingleThread)
+ OS << "singlethread ";
+
+ if (Op.getOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(Op.getOrdering()) << ' ';
+ if (Op.getFailureOrdering() != AtomicOrdering::NotAtomic)
+ OS << toIRString(Op.getFailureOrdering()) << ' ';
+
OS << Op.getSize();
if (const Value *Val = Op.getValue()) {
OS << (Op.isLoad() ? " from " : " into ");
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 3869f976854d..06112723497b 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/ModuleSlotTracker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -148,8 +149,11 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition()))
+ while (I != E && (I->isPHI() || I->isPosition() ||
+ TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels
// inside the bundle.
@@ -160,8 +164,11 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator
MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
+ const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
+
iterator E = end();
- while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
+ while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue() ||
+ TII->isBasicBlockPrologue(*I)))
++I;
// FIXME: This needs to change if we wish to bundle labels / dbg_values
// inside the bundle.
@@ -225,7 +232,7 @@ StringRef MachineBasicBlock::getName() const {
if (const BasicBlock *LBB = getBasicBlock())
return LBB->getName();
else
- return "(null)";
+ return StringRef("", 0);
}
/// Return a hopefully unique identifier for this block.
@@ -417,7 +424,7 @@ void MachineBasicBlock::updateTerminator() {
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
- DebugLoc DL; // FIXME: this is nowhere
+ DebugLoc DL = findBranchDebugLoc();
bool B = TII->analyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -485,7 +492,7 @@ void MachineBasicBlock::updateTerminator() {
// FIXME: This does not seem like a reasonable pattern to support, but it
// has been seen in the wild coming out of degenerate ARM test cases.
TII->removeBranch(*this);
-
+
// Finally update the unconditional successor to be reached via a branch if
// it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
@@ -681,16 +688,16 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
return std::next(I) == MachineFunction::const_iterator(MBB);
}
-bool MachineBasicBlock::canFallThrough() {
+MachineBasicBlock *MachineBasicBlock::getFallThrough() {
MachineFunction::iterator Fallthrough = getIterator();
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == getParent()->end())
- return false;
+ return nullptr;
// If FallthroughBlock isn't a successor, no fallthrough is possible.
if (!isSuccessor(&*Fallthrough))
- return false;
+ return nullptr;
// Analyze the branches, if any, at the end of the block.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -702,25 +709,31 @@ bool MachineBasicBlock::canFallThrough() {
// is possible. The isPredicated check is needed because this code can be
// called during IfConversion, where an instruction which is normally a
// Barrier is predicated and thus no longer an actual control barrier.
- return empty() || !back().isBarrier() || TII->isPredicated(back());
+ return (empty() || !back().isBarrier() || TII->isPredicated(back()))
+ ? &*Fallthrough
+ : nullptr;
}
// If there is no branch, control always falls through.
- if (!TBB) return true;
+ if (!TBB) return &*Fallthrough;
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
if (MachineFunction::iterator(TBB) == Fallthrough ||
MachineFunction::iterator(FBB) == Fallthrough)
- return true;
+ return &*Fallthrough;
// If it's an unconditional branch to some block not the fall through, it
// doesn't fall through.
- if (Cond.empty()) return false;
+ if (Cond.empty()) return nullptr;
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
- return FBB == nullptr;
+ return (FBB == nullptr) ? &*Fallthrough : nullptr;
+}
+
+bool MachineBasicBlock::canFallThrough() {
+ return getFallThrough() != nullptr;
}
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
@@ -1144,6 +1157,24 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
return {};
}
+/// Find and return the merged DebugLoc of the branch instructions of the block.
+/// Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findBranchDebugLoc() {
+ DebugLoc DL;
+ auto TI = getFirstTerminator();
+ while (TI != end() && !TI->isBranch())
+ ++TI;
+
+ if (TI != end()) {
+ DL = TI->getDebugLoc();
+ for (++TI ; TI != end() ; ++TI)
+ if (TI->isBranch())
+ DL = DILocation::getMergedLocation(DL, TI->getDebugLoc());
+ }
+ return DL;
+}
+
/// Return probability of the edge from this block to MBB.
BranchProbability
MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 7d5124d30a04..9c7367b4c780 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -28,7 +28,6 @@ using namespace llvm;
#define DEBUG_TYPE "block-freq"
-#ifndef NDEBUG
static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"view-machine-block-freq-propagation-dags", cl::Hidden,
@@ -43,10 +42,37 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
"integer fractional block frequency representation."),
clEnumValN(GVDT_Count, "count", "display a graph using the real "
"profile count if available.")));
+// Similar option above, but used to control BFI display only after MBP pass
+cl::opt<GVDAGType> ViewBlockLayoutWithBFI(
+ "view-block-layout-with-bfi", cl::Hidden,
+ cl::desc(
+ "Pop up a window to show a dag displaying MBP layout and associated "
+ "block frequencies of the CFG."),
+ cl::values(clEnumValN(GVDT_None, "none", "do not display graphs."),
+ clEnumValN(GVDT_Fraction, "fraction",
+ "display a graph using the "
+ "fractional block frequency representation."),
+ clEnumValN(GVDT_Integer, "integer",
+ "display a graph using the raw "
+ "integer fractional block frequency representation."),
+ clEnumValN(GVDT_Count, "count",
+ "display a graph using the real "
+ "profile count if available.")));
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+// Command line option to specify hot frequency threshold.
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-hot-freq-perc=
extern cl::opt<unsigned> ViewHotFreqPercent;
+static GVDAGType getGVDT() {
+ if (ViewBlockLayoutWithBFI != GVDT_None)
+ return ViewBlockLayoutWithBFI;
+
+ return ViewMachineBlockFreqPropagationDAG;
+}
+
namespace llvm {
template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
@@ -80,12 +106,32 @@ template <>
struct DOTGraphTraits<MachineBlockFrequencyInfo *>
: public MBFIDOTGraphTraitsBase {
explicit DOTGraphTraits(bool isSimple = false)
- : MBFIDOTGraphTraitsBase(isSimple) {}
+ : MBFIDOTGraphTraitsBase(isSimple), CurFunc(nullptr), LayoutOrderMap() {}
+
+ const MachineFunction *CurFunc;
+ DenseMap<const MachineBasicBlock *, int> LayoutOrderMap;
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineBlockFrequencyInfo *Graph) {
- return MBFIDOTGraphTraitsBase::getNodeLabel(
- Node, Graph, ViewMachineBlockFreqPropagationDAG);
+
+ int layout_order = -1;
+ // Attach additional ordering information if 'isSimple' is false.
+ if (!isSimple()) {
+ const MachineFunction *F = Node->getParent();
+ if (!CurFunc || F != CurFunc) {
+ if (CurFunc)
+ LayoutOrderMap.clear();
+
+ CurFunc = F;
+ int O = 0;
+ for (auto MBI = F->begin(); MBI != F->end(); ++MBI, ++O) {
+ LayoutOrderMap[&*MBI] = O;
+ }
+ }
+ layout_order = LayoutOrderMap[Node];
+ }
+ return MBFIDOTGraphTraitsBase::getNodeLabel(Node, Graph, getGVDT(),
+ layout_order);
}
std::string getNodeAttributes(const MachineBasicBlock *Node,
@@ -102,7 +148,6 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *>
};
} // end namespace llvm
-#endif
INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
"Machine Block Frequency Analysis", true, true)
@@ -127,20 +172,24 @@ void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
- MachineBranchProbabilityInfo &MBPI =
- getAnalysis<MachineBranchProbabilityInfo>();
- MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+void MachineBlockFrequencyInfo::calculate(
+ const MachineFunction &F, const MachineBranchProbabilityInfo &MBPI,
+ const MachineLoopInfo &MLI) {
if (!MBFI)
MBFI.reset(new ImplType);
MBFI->calculate(F, MBPI, MLI);
-#ifndef NDEBUG
if (ViewMachineBlockFreqPropagationDAG != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
F.getName().equals(ViewBlockFreqFuncName))) {
- view();
+ view("MachineBlockFrequencyDAGS." + F.getName());
}
-#endif
+}
+
+bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
+ MachineBranchProbabilityInfo &MBPI =
+ getAnalysis<MachineBranchProbabilityInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ calculate(F, MBPI, MLI);
return false;
}
@@ -148,15 +197,9 @@ void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); }
/// Pop up a ghostview window with the current block frequency propagation
/// rendered using dot.
-void MachineBlockFrequencyInfo::view() const {
-// This code is only for debugging.
-#ifndef NDEBUG
- ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this),
- "MachineBlockFrequencyDAGs");
-#else
- errs() << "MachineBlockFrequencyInfo::view is only available in debug builds "
- "on systems with Graphviz or gv!\n";
-#endif // NDEBUG
+void MachineBlockFrequencyInfo::view(const Twine &Name, bool isSimple) const {
+ // This code is only for debugging.
+ ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this), Name, isSimple);
}
BlockFrequency
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 40e3840e6b0b..4cfc128a8c1d 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -32,14 +32,15 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/TailDuplicator.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
@@ -49,6 +50,8 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <functional>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "block-placement"
@@ -82,19 +85,6 @@ static cl::opt<unsigned> ExitBlockBias(
// Definition:
// - Outlining: placement of a basic block outside the chain or hot path.
-static cl::opt<bool> OutlineOptionalBranches(
- "outline-optional-branches",
- cl::desc("Outlining optional branches will place blocks that are optional "
- "branches, i.e. branches with a common post dominator, outside "
- "the hot path or chain"),
- cl::init(false), cl::Hidden);
-
-static cl::opt<unsigned> OutlineOptionalThreshold(
- "outline-optional-threshold",
- cl::desc("Don't outline optional branches that are a single block with an "
- "instruction count below this threshold"),
- cl::init(4), cl::Hidden);
-
static cl::opt<unsigned> LoopToColdBlockRatio(
"loop-to-cold-block-ratio",
cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
@@ -136,20 +126,47 @@ BranchFoldPlacement("branch-fold-placement",
cl::init(true), cl::Hidden);
// Heuristic for tail duplication.
-static cl::opt<unsigned> TailDuplicatePlacementThreshold(
+static cl::opt<unsigned> TailDupPlacementThreshold(
"tail-dup-placement-threshold",
cl::desc("Instruction cutoff for tail duplication during layout. "
"Tail merging during layout is forced to have a threshold "
"that won't conflict."), cl::init(2),
cl::Hidden);
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDupPlacementPenalty(
+ "tail-dup-placement-penalty",
+ cl::desc("Cost penalty for blocks that can avoid breaking CFG by copying. "
+ "Copying can increase fallthrough, but it also increases icache "
+ "pressure. This parameter controls the penalty to account for that. "
+ "Percent as integer."),
+ cl::init(2),
+ cl::Hidden);
+
+// Heuristic for triangle chains.
+static cl::opt<unsigned> TriangleChainCount(
+ "triangle-chain-count",
+ cl::desc("Number of triangle-shaped-CFG's that need to be in a row for the "
+ "triangle tail duplication heuristic to kick in. 0 to disable."),
+ cl::init(2),
+ cl::Hidden);
+
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi=
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
namespace {
class BlockChain;
/// \brief Type for our function-wide basic block -> block chain mapping.
-typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+typedef DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChainMapType;
}
namespace {
@@ -193,12 +210,15 @@ public:
/// \brief Iterator over blocks within the chain.
typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator;
+ typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator const_iterator;
/// \brief Beginning of blocks within the chain.
iterator begin() { return Blocks.begin(); }
+ const_iterator begin() const { return Blocks.begin(); }
/// \brief End of blocks within the chain.
iterator end() { return Blocks.end(); }
+ const_iterator end() const { return Blocks.end(); }
bool remove(MachineBasicBlock* BB) {
for(iterator i = begin(); i != end(); ++i) {
@@ -264,12 +284,28 @@ public:
namespace {
class MachineBlockPlacement : public MachineFunctionPass {
/// \brief A typedef for a block filter set.
- typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
+ typedef SmallSetVector<const MachineBasicBlock *, 16> BlockFilterSet;
+
+ /// Pair struct containing basic block and taildup profitiability
+ struct BlockAndTailDupResult {
+ MachineBasicBlock *BB;
+ bool ShouldTailDup;
+ };
+
+ /// Triple struct containing edge weight and the edge.
+ struct WeightedEdge {
+ BlockFrequency Weight;
+ MachineBasicBlock *Src;
+ MachineBasicBlock *Dest;
+ };
/// \brief work lists of blocks that are ready to be laid out
SmallVector<MachineBasicBlock *, 16> BlockWorkList;
SmallVector<MachineBasicBlock *, 16> EHPadWorkList;
+ /// Edges that have already been computed as optimal.
+ DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
+
/// \brief Machine Function
MachineFunction *F;
@@ -294,7 +330,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
const TargetLoweringBase *TLI;
/// \brief A handle to the post dominator tree.
- MachineDominatorTree *MDT;
+ MachinePostDominatorTree *MPDT;
/// \brief Duplicator used to duplicate tails during placement.
///
@@ -303,10 +339,6 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// must be done inline.
TailDuplicator TailDup;
- /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
- /// all terminators of the MachineFunction.
- SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
-
/// \brief Allocator and owner of BlockChain structures.
///
/// We build BlockChains lazily while processing the loop structure of
@@ -322,7 +354,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// BlockChain it participates in, if any. We use it to, among other things,
/// allow implicitly defining edges between chains as the existing edges
/// between basic blocks.
- DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+ DenseMap<const MachineBasicBlock *, BlockChain *> BlockToChain;
#ifndef NDEBUG
/// The set of basic blocks that have terminators that cannot be fully
@@ -334,75 +366,107 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Decrease the UnscheduledPredecessors count for all blocks in chain, and
/// if the count goes to 0, add them to the appropriate work list.
- void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter = nullptr);
+ void markChainSuccessors(
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
+ const BlockFilterSet *BlockFilter = nullptr);
/// Decrease the UnscheduledPredecessors count for a single block, and
/// if the count goes to 0, add them to the appropriate work list.
void markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
+ const BlockChain &Chain, const MachineBasicBlock *BB,
+ const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter = nullptr);
-
BranchProbability
- collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
- const BlockFilterSet *BlockFilter,
- SmallVector<MachineBasicBlock *, 4> &Successors);
- bool shouldPredBlockBeOutlined(MachineBasicBlock *BB, MachineBasicBlock *Succ,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter,
- BranchProbability SuccProb,
- BranchProbability HotProb);
+ collectViableSuccessors(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
+ SmallVector<MachineBasicBlock *, 4> &Successors);
+ bool shouldPredBlockBeOutlined(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ BranchProbability SuccProb, BranchProbability HotProb);
bool repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
+ const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt);
- bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain,
- BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- bool &DuplicatedToPred);
- bool
- hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
- BlockChain &SuccChain, BranchProbability SuccProb,
- BranchProbability RealSuccProb, BlockChain &Chain,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter);
- MachineBasicBlock *
- selectBestCandidateBlock(BlockChain &Chain,
- SmallVectorImpl<MachineBasicBlock *> &WorkList);
- MachineBasicBlock *
- getFirstUnplacedBlock(const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter);
+ bool maybeTailDuplicateBlock(
+ MachineBasicBlock *BB, MachineBasicBlock *LPred,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ bool &DuplicatedToPred);
+ bool hasBetterLayoutPredecessor(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ BlockAndTailDupResult selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ MachineBasicBlock *selectBestCandidateBlock(
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ const BlockFilterSet *BlockFilter);
/// \brief Add a basic block to the work list if it is appropriate.
///
/// If the optional parameter BlockFilter is provided, only MBB
/// present in the set will be added to the worklist. If nullptr
/// is provided, no filtering occurs.
- void fillWorkLists(MachineBasicBlock *MBB,
+ void fillWorkLists(const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter);
- void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+ void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
- MachineBasicBlock *findBestLoopTop(MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- MachineBasicBlock *findBestLoopExit(MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- BlockFilterSet collectLoopBlockSet(MachineLoop &L);
- void buildLoopChains(MachineLoop &L);
- void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
- const BlockFilterSet &LoopBlockSet);
- void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
- const BlockFilterSet &LoopBlockSet);
- void collectMustExecuteBBs();
+ MachineBasicBlock *findBestLoopTop(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopExit(
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
+ void buildLoopChains(const MachineLoop &L);
+ void rotateLoop(
+ BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
+ const BlockFilterSet &LoopBlockSet);
+ void rotateLoopWithProfile(
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet);
void buildCFGChains();
void optimizeBranches();
void alignBlocks();
+ /// Returns true if a block should be tail-duplicated to increase fallthrough
+ /// opportunities.
+ bool shouldTailDuplicate(MachineBasicBlock *BB);
+ /// Check the edge frequencies to see if tail duplication will increase
+ /// fallthroughs.
+ bool isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability AdjustedSumProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Check for a trellis layout.
+ bool isTrellis(const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Get the best successor given a trellis layout.
+ BlockAndTailDupResult getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter);
+ /// Get the best pair of non-conflicting edges.
+ static std::pair<WeightedEdge, WeightedEdge> getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<WeightedEdge, 8>> Edges);
+ /// Returns true if a block can tail duplicate into all unplaced
+ /// predecessors. Filters based on loop.
+ bool canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter);
+ /// Find chains of triangles to tail-duplicate where a global analysis works,
+ /// but a local analysis would not find them.
+ void precomputeTriangleChains();
public:
static char ID; // Pass identification, replacement for typeid
@@ -415,7 +479,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineBlockFrequencyInfo>();
- AU.addRequired<MachineDominatorTree>();
+ if (TailDupPlacement)
+ AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -429,7 +494,7 @@ INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
"Branch Probability Basic Block Placement", false, false)
@@ -438,7 +503,7 @@ INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement",
/// \brief Helper to print the name of a MBB.
///
/// Only used by debug logging.
-static std::string getBlockName(MachineBasicBlock *BB) {
+static std::string getBlockName(const MachineBasicBlock *BB) {
std::string Result;
raw_string_ostream OS(Result);
OS << "BB#" << BB->getNumber();
@@ -455,7 +520,7 @@ static std::string getBlockName(MachineBasicBlock *BB) {
/// having one fewer active predecessor. It also adds any successors of this
/// chain which reach the zero-predecessor state to the appropriate worklist.
void MachineBlockPlacement::markChainSuccessors(
- BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
+ const BlockChain &Chain, const MachineBasicBlock *LoopHeaderBB,
const BlockFilterSet *BlockFilter) {
// Walk all the blocks in this chain, marking their successors as having
// a predecessor placed.
@@ -471,8 +536,8 @@ void MachineBlockPlacement::markChainSuccessors(
/// and was duplicated into the chain end, we need to redo markBlockSuccessors
/// for just that block.
void MachineBlockPlacement::markBlockSuccessors(
- BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
- const BlockFilterSet *BlockFilter) {
+ const BlockChain &Chain, const MachineBasicBlock *MBB,
+ const MachineBasicBlock *LoopHeaderBB, const BlockFilterSet *BlockFilter) {
// Add any successors for which this is the only un-placed in-loop
// predecessor to the worklist as a viable candidate for CFG-neutral
// placement. No subsequent placement of this block will violate the CFG
@@ -504,7 +569,8 @@ void MachineBlockPlacement::markBlockSuccessors(
/// the total branch probability of edges from \p BB to those
/// blocks.
BranchProbability MachineBlockPlacement::collectViableSuccessors(
- MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter,
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter,
SmallVector<MachineBasicBlock *, 4> &Successors) {
// Adjust edge probabilities by excluding edges pointing to blocks that is
// either not in BlockFilter or is already in the current chain. Consider the
@@ -561,46 +627,573 @@ getAdjustedProbability(BranchProbability OrigProb,
return SuccProb;
}
-/// When the option OutlineOptionalBranches is on, this method
-/// checks if the fallthrough candidate block \p Succ (of block
-/// \p BB) also has other unscheduled predecessor blocks which
-/// are also successors of \p BB (forming triangular shape CFG).
-/// If none of such predecessors are small, it returns true.
-/// The caller can choose to select \p Succ as the layout successors
-/// so that \p Succ's predecessors (optional branches) can be
-/// outlined.
-/// FIXME: fold this with more general layout cost analysis.
-bool MachineBlockPlacement::shouldPredBlockBeOutlined(
- MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &Chain,
- const BlockFilterSet *BlockFilter, BranchProbability SuccProb,
- BranchProbability HotProb) {
- if (!OutlineOptionalBranches)
+/// Check if \p BB has exactly the successors in \p Successors.
+static bool
+hasSameSuccessors(MachineBasicBlock &BB,
+ SmallPtrSetImpl<const MachineBasicBlock *> &Successors) {
+ if (BB.succ_size() != Successors.size())
return false;
- // If we outline optional branches, look whether Succ is unavoidable, i.e.
- // dominates all terminators of the MachineFunction. If it does, other
- // successors must be optional. Don't do this for cold branches.
- if (SuccProb > HotProb.getCompl() && UnavoidableBlocks.count(Succ) > 0) {
- for (MachineBasicBlock *Pred : Succ->predecessors()) {
- // Check whether there is an unplaced optional branch.
- if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
+ // We don't want to count self-loops
+ if (Successors.count(&BB))
+ return false;
+ for (MachineBasicBlock *Succ : BB.successors())
+ if (!Successors.count(Succ))
+ return false;
+ return true;
+}
+
+/// Check if a block should be tail duplicated to increase fallthrough
+/// opportunities.
+/// \p BB Block to check.
+bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) {
+ // Blocks with single successors don't create additional fallthrough
+ // opportunities. Don't duplicate them. TODO: When conditional exits are
+ // analyzable, allow them to be duplicated.
+ bool IsSimple = TailDup.isSimpleBB(BB);
+
+ if (BB->succ_size() == 1)
+ return false;
+ return TailDup.shouldTailDuplicate(IsSimple, *BB);
+}
+
+/// Compare 2 BlockFrequency's with a small penalty for \p A.
+/// In order to be conservative, we apply a X% penalty to account for
+/// increased icache pressure and static heuristics. For small frequencies
+/// we use only the numerators to improve accuracy. For simplicity, we assume the
+/// penalty is less than 100%
+/// TODO(iteratee): Use 64-bit fixed point edge frequencies everywhere.
+static bool greaterWithBias(BlockFrequency A, BlockFrequency B,
+ uint64_t EntryFreq) {
+ BranchProbability ThresholdProb(TailDupPlacementPenalty, 100);
+ BlockFrequency Gain = A - B;
+ return (Gain / ThresholdProb).getFrequency() >= EntryFreq;
+}
+
+/// Check the edge frequencies to see if tail duplication will increase
+/// fallthroughs. It only makes sense to call this function when
+/// \p Succ would not be chosen otherwise. Tail duplication of \p Succ is
+/// always locally profitable if we would have picked \p Succ without
+/// considering duplication.
+bool MachineBlockPlacement::isProfitableToTailDup(
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ BranchProbability QProb,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // We need to do a probability calculation to make sure this is profitable.
+ // First: does succ have a successor that post-dominates? This affects the
+ // calculation. The 2 relevant cases are:
+ // BB BB
+ // | \Qout | \Qout
+ // P| C |P C
+ // = C' = C'
+ // | /Qin | /Qin
+ // | / | /
+ // Succ Succ
+ // / \ | \ V
+ // U/ =V |U \
+ // / \ = D
+ // D E | /
+ // | /
+ // |/
+ // PDom
+ // '=' : Branch taken for that CFG edge
+ // In the second case, Placing Succ while duplicating it into C prevents the
+ // fallthrough of Succ into either D or PDom, because they now have C as an
+ // unplaced predecessor
+
+ // Start by figuring out which case we fall into
+ MachineBasicBlock *PDom = nullptr;
+ SmallVector<MachineBasicBlock *, 4> SuccSuccs;
+ // Only scan the relevant successors
+ auto AdjustedSuccSumProb =
+ collectViableSuccessors(Succ, Chain, BlockFilter, SuccSuccs);
+ BranchProbability PProb = MBPI->getEdgeProbability(BB, Succ);
+ auto BBFreq = MBFI->getBlockFreq(BB);
+ auto SuccFreq = MBFI->getBlockFreq(Succ);
+ BlockFrequency P = BBFreq * PProb;
+ BlockFrequency Qout = BBFreq * QProb;
+ uint64_t EntryFreq = MBFI->getEntryFreq();
+ // If there are no more successors, it is profitable to copy, as it strictly
+ // increases fallthrough.
+ if (SuccSuccs.size() == 0)
+ return greaterWithBias(P, Qout, EntryFreq);
+
+ auto BestSuccSucc = BranchProbability::getZero();
+ // Find the PDom or the best Succ if no PDom exists.
+ for (MachineBasicBlock *SuccSucc : SuccSuccs) {
+ auto Prob = MBPI->getEdgeProbability(Succ, SuccSucc);
+ if (Prob > BestSuccSucc)
+ BestSuccSucc = Prob;
+ if (PDom == nullptr)
+ if (MPDT->dominates(SuccSucc, Succ)) {
+ PDom = SuccSucc;
+ break;
+ }
+ }
+ // For the comparisons, we need to know Succ's best incoming edge that isn't
+ // from BB.
+ auto SuccBestPred = BlockFrequency(0);
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ if (SuccPred == Succ || SuccPred == BB
+ || BlockToChain[SuccPred] == &Chain
+ || (BlockFilter && !BlockFilter->count(SuccPred)))
+ continue;
+ auto Freq = MBFI->getBlockFreq(SuccPred)
+ * MBPI->getEdgeProbability(SuccPred, Succ);
+ if (Freq > SuccBestPred)
+ SuccBestPred = Freq;
+ }
+ // Qin is Succ's best unplaced incoming edge that isn't BB
+ BlockFrequency Qin = SuccBestPred;
+ // If it doesn't have a post-dominating successor, here is the calculation:
+ // BB BB
+ // | \Qout | \
+ // P| C | =
+ // = C' | C
+ // | /Qin | |
+ // | / | C' (+Succ)
+ // Succ Succ /|
+ // / \ | \/ |
+ // U/ =V | == |
+ // / \ | / \|
+ // D E D E
+ // '=' : Branch taken for that CFG edge
+ // Cost in the first case is: P + V
+ // For this calculation, we always assume P > Qout. If Qout > P
+ // The result of this function will be ignored at the caller.
+ // Let F = SuccFreq - Qin
+ // Cost in the second case is: Qout + min(Qin, F) * U + max(Qin, F) * V
+
+ if (PDom == nullptr || !Succ->isSuccessor(PDom)) {
+ BranchProbability UProb = BestSuccSucc;
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency F = SuccFreq - Qin;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency QinU = std::min(Qin, F) * UProb;
+ BlockFrequency BaseCost = P + V;
+ BlockFrequency DupCost = Qout + QinU + std::max(Qin, F) * VProb;
+ return greaterWithBias(BaseCost, DupCost, EntryFreq);
+ }
+ BranchProbability UProb = MBPI->getEdgeProbability(Succ, PDom);
+ BranchProbability VProb = AdjustedSuccSumProb - UProb;
+ BlockFrequency U = SuccFreq * UProb;
+ BlockFrequency V = SuccFreq * VProb;
+ BlockFrequency F = SuccFreq - Qin;
+ // If there is a post-dominating successor, here is the calculation:
+ // BB BB BB BB
+ // | \Qout | \ | \Qout | \
+ // |P C | = |P C | =
+ // = C' |P C = C' |P C
+ // | /Qin | | | /Qin | |
+ // | / | C' (+Succ) | / | C' (+Succ)
+ // Succ Succ /| Succ Succ /|
+ // | \ V | \/ | | \ V | \/ |
+ // |U \ |U /\ =? |U = |U /\ |
+ // = D = = =?| | D | = =|
+ // | / |/ D | / |/ D
+ // | / | / | = | /
+ // |/ | / |/ | =
+ // Dom Dom Dom Dom
+ // '=' : Branch taken for that CFG edge
+ // The cost for taken branches in the first case is P + U
+ // Let F = SuccFreq - Qin
+ // The cost in the second case (assuming independence), given the layout:
+ // BB, Succ, (C+Succ), D, Dom or the layout:
+ // BB, Succ, D, Dom, (C+Succ)
+ // is Qout + max(F, Qin) * U + min(F, Qin)
+ // compare P + U vs Qout + P * U + Qin.
+ //
+ // The 3rd and 4th cases cover when Dom would be chosen to follow Succ.
+ //
+ // For the 3rd case, the cost is P + 2 * V
+ // For the 4th case, the cost is Qout + min(Qin, F) * U + max(Qin, F) * V + V
+ // We choose 4 over 3 when (P + V) > Qout + min(Qin, F) * U + max(Qin, F) * V
+ if (UProb > AdjustedSuccSumProb / 2 &&
+ !hasBetterLayoutPredecessor(Succ, PDom, *BlockToChain[PDom], UProb, UProb,
+ Chain, BlockFilter))
+ // Cases 3 & 4
+ return greaterWithBias(
+ (P + V), (Qout + std::max(Qin, F) * VProb + std::min(Qin, F) * UProb),
+ EntryFreq);
+ // Cases 1 & 2
+ return greaterWithBias((P + U),
+ (Qout + std::min(Qin, F) * AdjustedSuccSumProb +
+ std::max(Qin, F) * UProb),
+ EntryFreq);
+}
+
+/// Check for a trellis layout. \p BB is the upper part of a trellis if its
+/// successors form the lower part of a trellis. A successor set S forms the
+/// lower part of a trellis if all of the predecessors of S are either in S or
+/// have all of S as successors. We ignore trellises where BB doesn't have 2
+/// successors because for fewer than 2, it's trivial, and for 3 or greater they
+/// are very uncommon and complex to compute optimally. Allowing edges within S
+/// is not strictly a trellis, but the same algorithm works, so we allow it.
+bool MachineBlockPlacement::isTrellis(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ // Technically BB could form a trellis with branching factor higher than 2.
+ // But that's extremely uncommon.
+ if (BB->succ_size() != 2 || ViableSuccs.size() != 2)
+ return false;
+
+ SmallPtrSet<const MachineBasicBlock *, 2> Successors(BB->succ_begin(),
+ BB->succ_end());
+ // To avoid reviewing the same predecessors twice.
+ SmallPtrSet<const MachineBasicBlock *, 8> SeenPreds;
+
+ for (MachineBasicBlock *Succ : ViableSuccs) {
+ int PredCount = 0;
+ for (auto SuccPred : Succ->predecessors()) {
+ // Allow triangle successors, but don't count them.
+ if (Successors.count(SuccPred)) {
+ // Make sure that it is actually a triangle.
+ for (MachineBasicBlock *CheckSucc : SuccPred->successors())
+ if (!Successors.count(CheckSucc))
+ return false;
continue;
- // Check whether the optional branch has exactly one BB.
- if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB)
+ }
+ const BlockChain *PredChain = BlockToChain[SuccPred];
+ if (SuccPred == BB || (BlockFilter && !BlockFilter->count(SuccPred)) ||
+ PredChain == &Chain || PredChain == BlockToChain[Succ])
continue;
- // Check whether the optional branch is small.
- if (Pred->size() < OutlineOptionalThreshold)
+ ++PredCount;
+ // Perform the successor check only once.
+ if (!SeenPreds.insert(SuccPred).second)
+ continue;
+ if (!hasSameSuccessors(*SuccPred, Successors))
return false;
}
- return true;
- } else
+ // If one of the successors has only BB as a predecessor, it is not a
+ // trellis.
+ if (PredCount < 1)
+ return false;
+ }
+ return true;
+}
+
+/// Pick the highest total weight pair of edges that can both be laid out.
+/// The edges in \p Edges[0] are assumed to have a different destination than
+/// the edges in \p Edges[1]. Simple counting shows that the best pair is either
+/// the individual highest weight edges to the 2 different destinations, or in
+/// case of a conflict, one of them should be replaced with a 2nd best edge.
+std::pair<MachineBlockPlacement::WeightedEdge,
+ MachineBlockPlacement::WeightedEdge>
+MachineBlockPlacement::getBestNonConflictingEdges(
+ const MachineBasicBlock *BB,
+ MutableArrayRef<SmallVector<MachineBlockPlacement::WeightedEdge, 8>>
+ Edges) {
+ // Sort the edges, and then for each successor, find the best incoming
+ // predecessor. If the best incoming predecessors aren't the same,
+ // then that is clearly the best layout. If there is a conflict, one of the
+ // successors will have to fallthrough from the second best predecessor. We
+ // compare which combination is better overall.
+
+ // Sort for highest frequency.
+ auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
+
+ std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp);
+ std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp);
+ auto BestA = Edges[0].begin();
+ auto BestB = Edges[1].begin();
+ // Arrange for the correct answer to be in BestA and BestB
+ // If the 2 best edges don't conflict, the answer is already there.
+ if (BestA->Src == BestB->Src) {
+ // Compare the total fallthrough of (Best + Second Best) for both pairs
+ auto SecondBestA = std::next(BestA);
+ auto SecondBestB = std::next(BestB);
+ BlockFrequency BestAScore = BestA->Weight + SecondBestB->Weight;
+ BlockFrequency BestBScore = BestB->Weight + SecondBestA->Weight;
+ if (BestAScore < BestBScore)
+ BestA = SecondBestA;
+ else
+ BestB = SecondBestB;
+ }
+ // Arrange for the BB edge to be in BestA if it exists.
+ if (BestB->Src == BB)
+ std::swap(BestA, BestB);
+ return std::make_pair(*BestA, *BestB);
+}
+
+/// Get the best successor from \p BB based on \p BB being part of a trellis.
+/// We only handle trellises with 2 successors, so the algorithm is
+/// straightforward: Find the best pair of edges that don't conflict. We find
+/// the best incoming edge for each successor in the trellis. If those conflict,
+/// we consider which of them should be replaced with the second best.
+/// Upon return the two best edges will be in \p BestEdges. If one of the edges
+/// comes from \p BB, it will be in \p BestEdges[0]
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::getBestTrellisSuccessor(
+ const MachineBasicBlock *BB,
+ const SmallVectorImpl<MachineBasicBlock *> &ViableSuccs,
+ BranchProbability AdjustedSumProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
+
+ BlockAndTailDupResult Result = {nullptr, false};
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+
+ // We assume size 2 because it's common. For general n, we would have to do
+ // the Hungarian algorithm, but it's not worth the complexity because more
+ // than 2 successors is fairly uncommon, and a trellis even more so.
+ if (Successors.size() != 2 || ViableSuccs.size() != 2)
+ return Result;
+
+ // Collect the edge frequencies of all edges that form the trellis.
+ SmallVector<WeightedEdge, 8> Edges[2];
+ int SuccIndex = 0;
+ for (auto Succ : ViableSuccs) {
+ for (MachineBasicBlock *SuccPred : Succ->predecessors()) {
+ // Skip any placed predecessors that are not BB
+ if (SuccPred != BB)
+ if ((BlockFilter && !BlockFilter->count(SuccPred)) ||
+ BlockToChain[SuccPred] == &Chain ||
+ BlockToChain[SuccPred] == BlockToChain[Succ])
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(SuccPred) *
+ MBPI->getEdgeProbability(SuccPred, Succ);
+ Edges[SuccIndex].push_back({EdgeFreq, SuccPred, Succ});
+ }
+ ++SuccIndex;
+ }
+
+ // Pick the best combination of 2 edges from all the edges in the trellis.
+ WeightedEdge BestA, BestB;
+ std::tie(BestA, BestB) = getBestNonConflictingEdges(BB, Edges);
+
+ if (BestA.Src != BB) {
+ // If we have a trellis, and BB doesn't have the best fallthrough edges,
+ // we shouldn't choose any successor. We've already looked and there's a
+ // better fallthrough edge for all the successors.
+ DEBUG(dbgs() << "Trellis, but not one of the chosen edges.\n");
+ return Result;
+ }
+
+ // Did we pick the triangle edge? If tail-duplication is profitable, do
+ // that instead. Otherwise merge the triangle edge now while we know it is
+ // optimal.
+ if (BestA.Dest == BestB.Src) {
+ // The edges are BB->Succ1->Succ2, and we're looking to see if BB->Succ2
+ // would be better.
+ MachineBasicBlock *Succ1 = BestA.Dest;
+ MachineBasicBlock *Succ2 = BestB.Dest;
+ // Check to see if tail-duplication would be profitable.
+ if (TailDupPlacement && shouldTailDuplicate(Succ2) &&
+ canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
+ isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
+ Chain, BlockFilter)) {
+ DEBUG(BranchProbability Succ2Prob = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, Succ2), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(Succ2)
+ << ", probability: " << Succ2Prob << " (Tail Duplicate)\n");
+ Result.BB = Succ2;
+ Result.ShouldTailDup = true;
+ return Result;
+ }
+ }
+ // We have already computed the optimal edge for the other side of the
+ // trellis.
+ ComputedEdges[BestB.Src] = { BestB.Dest, false };
+
+ auto TrellisSucc = BestA.Dest;
+ DEBUG(BranchProbability SuccProb = getAdjustedProbability(
+ MBPI->getEdgeProbability(BB, TrellisSucc), AdjustedSumProb);
+ dbgs() << " Selected: " << getBlockName(TrellisSucc)
+ << ", probability: " << SuccProb << " (Trellis)\n");
+ Result.BB = TrellisSucc;
+ return Result;
+}
+
+/// When the option TailDupPlacement is on, this method checks if the
+/// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated
+/// into all of its unplaced, unfiltered predecessors, that are not BB.
+bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
+ const MachineBasicBlock *BB, MachineBasicBlock *Succ,
+ const BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ if (!shouldTailDuplicate(Succ))
return false;
+
+ // For CFG checking.
+ SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
+ BB->succ_end());
+ for (MachineBasicBlock *Pred : Succ->predecessors()) {
+ // Make sure all unplaced and unfiltered predecessors can be
+ // tail-duplicated into.
+ // Skip any blocks that are already placed or not in this loop.
+ if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
+ || BlockToChain[Pred] == &Chain)
+ continue;
+ if (!TailDup.canTailDuplicate(Succ, Pred)) {
+ if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
+ // This will result in a trellis after tail duplication, so we don't
+ // need to copy Succ into this predecessor. In the presence
+ // of a trellis tail duplication can continue to be profitable.
+ // For example:
+ // A A
+ // |\ |\
+ // | \ | \
+ // | C | C+BB
+ // | / | |
+ // |/ | |
+ // BB => BB |
+ // |\ |\/|
+ // | \ |/\|
+ // | D | D
+ // | / | /
+ // |/ |/
+ // Succ Succ
+ //
+ // After BB was duplicated into C, the layout looks like the one on the
+ // right. BB and C now have the same successors. When considering
+ // whether Succ can be duplicated into all its unplaced predecessors, we
+ // ignore C.
+ // We can do this because C already has a profitable fallthrough, namely
+ // D. TODO(iteratee): ignore sufficiently cold predecessors for
+ // duplication and for this test.
+ //
+ // This allows trellises to be laid out in 2 separate chains
+ // (A,B,Succ,...) and later (C,D,...) This is a reasonable heuristic
+ // because it allows the creation of 2 fallthrough paths with links
+ // between them, and we correctly identify the best layout for these
+ // CFGs. We want to extend trellises that the user created in addition
+ // to trellises created by tail-duplication, so we just look for the
+ // CFG.
+ continue;
+ return false;
+ }
+ }
+ return true;
+}
+
+/// Find chains of triangles where we believe it would be profitable to
+/// tail-duplicate them all, but a local analysis would not find them.
+/// There are 3 ways this can be profitable:
+/// 1) The post-dominators marked 50% are actually taken 55% (This shrinks with
+/// longer chains)
+/// 2) The chains are statically correlated. Branch probabilities have a very
+/// U-shaped distribution.
+/// [http://nrs.harvard.edu/urn-3:HUL.InstRepos:24015805]
+/// If the branches in a chain are likely to be from the same side of the
+/// distribution as their predecessor, but are independent at runtime, this
+/// transformation is profitable. (Because the cost of being wrong is a small
+/// fixed cost, unlike the standard triangle layout where the cost of being
+/// wrong scales with the # of triangles.)
+/// 3) The chains are dynamically correlated. If the probability that a previous
+/// branch was taken positively influences whether the next branch will be
+/// taken
+/// We believe that 2 and 3 are common enough to justify the small margin in 1.
+void MachineBlockPlacement::precomputeTriangleChains() {
+ struct TriangleChain {
+ std::vector<MachineBasicBlock *> Edges;
+ TriangleChain(MachineBasicBlock *src, MachineBasicBlock *dst)
+ : Edges({src, dst}) {}
+
+ void append(MachineBasicBlock *dst) {
+ assert(getKey()->isSuccessor(dst) &&
+ "Attempting to append a block that is not a successor.");
+ Edges.push_back(dst);
+ }
+
+ unsigned count() const { return Edges.size() - 1; }
+
+ MachineBasicBlock *getKey() const {
+ return Edges.back();
+ }
+ };
+
+ if (TriangleChainCount == 0)
+ return;
+
+ DEBUG(dbgs() << "Pre-computing triangle chains.\n");
+ // Map from last block to the chain that contains it. This allows us to extend
+ // chains as we find new triangles.
+ DenseMap<const MachineBasicBlock *, TriangleChain> TriangleChainMap;
+ for (MachineBasicBlock &BB : *F) {
+ // If BB doesn't have 2 successors, it doesn't start a triangle.
+ if (BB.succ_size() != 2)
+ continue;
+ MachineBasicBlock *PDom = nullptr;
+ for (MachineBasicBlock *Succ : BB.successors()) {
+ if (!MPDT->dominates(Succ, &BB))
+ continue;
+ PDom = Succ;
+ break;
+ }
+ // If BB doesn't have a post-dominating successor, it doesn't form a
+ // triangle.
+ if (PDom == nullptr)
+ continue;
+ // If PDom has a hint that it is low probability, skip this triangle.
+ if (MBPI->getEdgeProbability(&BB, PDom) < BranchProbability(50, 100))
+ continue;
+ // If PDom isn't eligible for duplication, this isn't the kind of triangle
+ // we're looking for.
+ if (!shouldTailDuplicate(PDom))
+ continue;
+ bool CanTailDuplicate = true;
+ // If PDom can't tail-duplicate into it's non-BB predecessors, then this
+ // isn't the kind of triangle we're looking for.
+ for (MachineBasicBlock* Pred : PDom->predecessors()) {
+ if (Pred == &BB)
+ continue;
+ if (!TailDup.canTailDuplicate(PDom, Pred)) {
+ CanTailDuplicate = false;
+ break;
+ }
+ }
+ // If we can't tail-duplicate PDom to its predecessors, then skip this
+ // triangle.
+ if (!CanTailDuplicate)
+ continue;
+
+ // Now we have an interesting triangle. Insert it if it's not part of an
+ // existing chain
+ // Note: This cannot be replaced with a call insert() or emplace() because
+ // the find key is BB, but the insert/emplace key is PDom.
+ auto Found = TriangleChainMap.find(&BB);
+ // If it is, remove the chain from the map, grow it, and put it back in the
+ // map with the end as the new key.
+ if (Found != TriangleChainMap.end()) {
+ TriangleChain Chain = std::move(Found->second);
+ TriangleChainMap.erase(Found);
+ Chain.append(PDom);
+ TriangleChainMap.insert(std::make_pair(Chain.getKey(), std::move(Chain)));
+ } else {
+ auto InsertResult = TriangleChainMap.try_emplace(PDom, &BB, PDom);
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+ }
+ }
+
+ // Iterating over a DenseMap is safe here, because the only thing in the body
+ // of the loop is inserting into another DenseMap (ComputedEdges).
+ // ComputedEdges is never iterated, so this doesn't lead to non-determinism.
+ for (auto &ChainPair : TriangleChainMap) {
+ TriangleChain &Chain = ChainPair.second;
+ // Benchmarking has shown that due to branch correlation duplicating 2 or
+ // more triangles is profitable, despite the calculations assuming
+ // independence.
+ if (Chain.count() < TriangleChainCount)
+ continue;
+ MachineBasicBlock *dst = Chain.Edges.back();
+ Chain.Edges.pop_back();
+ for (MachineBasicBlock *src : reverse(Chain.Edges)) {
+ DEBUG(dbgs() << "Marking edge: " << getBlockName(src) << "->" <<
+ getBlockName(dst) << " as pre-computed based on triangles.\n");
+
+ auto InsertResult = ComputedEdges.insert({src, {dst, true}});
+ assert(InsertResult.second && "Block seen twice.");
+ (void)InsertResult;
+
+ dst = src;
+ }
+ }
}
// When profile is not present, return the StaticLikelyProb.
// When profile is available, we need to handle the triangle-shape CFG.
static BranchProbability getLayoutSuccessorProbThreshold(
- MachineBasicBlock *BB) {
+ const MachineBasicBlock *BB) {
if (!BB->getParent()->getFunction()->getEntryCount())
return BranchProbability(StaticLikelyProb, 100);
if (BB->succ_size() == 2) {
@@ -609,11 +1202,11 @@ static BranchProbability getLayoutSuccessorProbThreshold(
if (Succ1->isSuccessor(Succ2) || Succ2->isSuccessor(Succ1)) {
/* See case 1 below for the cost analysis. For BB->Succ to
* be taken with smaller cost, the following needs to hold:
- * Prob(BB->Succ) > 2* Prob(BB->Pred)
- * So the threshold T
- * T = 2 * (1-Prob(BB->Pred). Since T + Prob(BB->Pred) == 1,
- * We have T + T/2 = 1, i.e. T = 2/3. Also adding user specified
- * branch bias, we have
+ * Prob(BB->Succ) > 2 * Prob(BB->Pred)
+ * So the threshold T in the calculation below
+ * (1-T) * Prob(BB->Succ) > T * Prob(BB->Pred)
+ * So T / (1 - T) = 2, Yielding T = 2/3
+ * Also adding user specified branch bias, we have
* T = (2/3)*(ProfileLikelyProb/50)
* = (2*ProfileLikelyProb)/150)
*/
@@ -625,10 +1218,17 @@ static BranchProbability getLayoutSuccessorProbThreshold(
/// Checks to see if the layout candidate block \p Succ has a better layout
/// predecessor than \c BB. If yes, returns true.
+/// \p SuccProb: The probability adjusted for only remaining blocks.
+/// Only used for logging
+/// \p RealSuccProb: The un-adjusted probability.
+/// \p Chain: The chain that BB belongs to and Succ is being considered for.
+/// \p BlockFilter: if non-null, the set of blocks that make up the loop being
+/// considered
bool MachineBlockPlacement::hasBetterLayoutPredecessor(
- MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
- BranchProbability SuccProb, BranchProbability RealSuccProb,
- BlockChain &Chain, const BlockFilterSet *BlockFilter) {
+ const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
+ const BlockChain &SuccChain, BranchProbability SuccProb,
+ BranchProbability RealSuccProb, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
// There isn't a better layout when there are no unscheduled predecessors.
if (SuccChain.UnscheduledPredecessors == 0)
@@ -734,11 +1334,12 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
// | Pred----| | S1----
// | | | |
// --(S1 or S2) ---Pred--
+ // |
+ // S2
//
// topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
// + min(freq(Pred->S1), freq(Pred->S2))
// Non-topo-order cost:
- // In the worst case, S2 will not get laid out after Pred.
// non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
// To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
// is 0. Then the non topo layout is better when
@@ -756,13 +1357,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
for (MachineBasicBlock *Pred : Succ->predecessors()) {
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
(BlockFilter && !BlockFilter->count(Pred)) ||
- BlockToChain[Pred] == &Chain)
+ BlockToChain[Pred] == &Chain ||
+ // This check is redundant except for look ahead. This function is
+ // called for lookahead by isProfitableToTailDup when BB hasn't been
+ // placed yet.
+ (Pred == BB))
continue;
// Do backward checking.
// For all cases above, we need a backward checking to filter out edges that
- // are not 'strongly' biased. With profile data available, the check is
- // mostly redundant for case 2 (when threshold prob is set at 50%) unless S
- // has more than two successors.
+ // are not 'strongly' biased.
// BB Pred
// \ /
// Succ
@@ -798,14 +1401,15 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
/// breaking CFG structure, but cave and break such structures in the case of
/// very hot successor edges.
///
-/// \returns The best successor block found, or null if none are viable.
-MachineBasicBlock *
-MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
- BlockChain &Chain,
- const BlockFilterSet *BlockFilter) {
+/// \returns The best successor block found, or null if none are viable, along
+/// with a boolean indicating if tail duplication is necessary.
+MachineBlockPlacement::BlockAndTailDupResult
+MachineBlockPlacement::selectBestSuccessor(
+ const MachineBasicBlock *BB, const BlockChain &Chain,
+ const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(StaticLikelyProb, 100);
- MachineBasicBlock *BestSucc = nullptr;
+ BlockAndTailDupResult BestSucc = { nullptr, false };
auto BestProb = BranchProbability::getZero();
SmallVector<MachineBasicBlock *, 4> Successors;
@@ -813,22 +1417,45 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
collectViableSuccessors(BB, Chain, BlockFilter, Successors);
DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
+
+ // if we already precomputed the best successor for BB, return that if still
+ // applicable.
+ auto FoundEdge = ComputedEdges.find(BB);
+ if (FoundEdge != ComputedEdges.end()) {
+ MachineBasicBlock *Succ = FoundEdge->second.BB;
+ ComputedEdges.erase(FoundEdge);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if (BB->isSuccessor(Succ) && (!BlockFilter || BlockFilter->count(Succ)) &&
+ SuccChain != &Chain && Succ == *SuccChain->begin())
+ return FoundEdge->second;
+ }
+
+ // if BB is part of a trellis, Use the trellis to determine the optimal
+ // fallthrough edges
+ if (isTrellis(BB, Successors, Chain, BlockFilter))
+ return getBestTrellisSuccessor(BB, Successors, AdjustedSumProb, Chain,
+ BlockFilter);
+
+ // For blocks with CFG violations, we may be able to lay them out anyway with
+ // tail-duplication. We keep this vector so we can perform the probability
+ // calculations the minimum number of times.
+ SmallVector<std::tuple<BranchProbability, MachineBasicBlock *>, 4>
+ DupCandidates;
for (MachineBasicBlock *Succ : Successors) {
auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
BranchProbability SuccProb =
getAdjustedProbability(RealSuccProb, AdjustedSumProb);
- // This heuristic is off by default.
- if (shouldPredBlockBeOutlined(BB, Succ, Chain, BlockFilter, SuccProb,
- HotProb))
- return Succ;
-
BlockChain &SuccChain = *BlockToChain[Succ];
// Skip the edge \c BB->Succ if block \c Succ has a better layout
// predecessor that yields lower global cost.
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
- Chain, BlockFilter))
+ Chain, BlockFilter)) {
+ // If tail duplication would make Succ profitable, place it.
+ if (TailDupPlacement && shouldTailDuplicate(Succ))
+ DupCandidates.push_back(std::make_tuple(SuccProb, Succ));
continue;
+ }
DEBUG(
dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
@@ -836,17 +1463,48 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
<< "\n");
- if (BestSucc && BestProb >= SuccProb) {
+ if (BestSucc.BB && BestProb >= SuccProb) {
DEBUG(dbgs() << " Not the best candidate, continuing\n");
continue;
}
DEBUG(dbgs() << " Setting it as best candidate\n");
- BestSucc = Succ;
+ BestSucc.BB = Succ;
BestProb = SuccProb;
}
- if (BestSucc)
- DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc) << "\n");
+ // Handle the tail duplication candidates in order of decreasing probability.
+ // Stop at the first one that is profitable. Also stop if they are less
+ // profitable than BestSucc. Position is important because we preserve it and
+ // prefer first best match. Here we aren't comparing in order, so we capture
+ // the position instead.
+ if (DupCandidates.size() != 0) {
+ auto cmp =
+ [](const std::tuple<BranchProbability, MachineBasicBlock *> &a,
+ const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
+ return std::get<0>(a) > std::get<0>(b);
+ };
+ std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp);
+ }
+ for(auto &Tup : DupCandidates) {
+ BranchProbability DupProb;
+ MachineBasicBlock *Succ;
+ std::tie(DupProb, Succ) = Tup;
+ if (DupProb < BestProb)
+ break;
+ if (canTailDuplicateUnplacedPreds(BB, Succ, Chain, BlockFilter)
+ && (isProfitableToTailDup(BB, Succ, BestProb, Chain, BlockFilter))) {
+ DEBUG(
+ dbgs() << " Candidate: " << getBlockName(Succ) << ", probability: "
+ << DupProb
+ << " (Tail Duplicate)\n");
+ BestSucc.BB = Succ;
+ BestSucc.ShouldTailDup = true;
+ break;
+ }
+ }
+
+ if (BestSucc.BB)
+ DEBUG(dbgs() << " Selected: " << getBlockName(BestSucc.BB) << "\n");
return BestSucc;
}
@@ -862,7 +1520,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
///
/// \returns The best block found, or null if none are viable.
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
- BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
+ const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList) {
// Once we need to walk the worklist looking for a candidate, cleanup the
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
@@ -948,7 +1606,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
}
void MachineBlockPlacement::fillWorkLists(
- MachineBasicBlock *MBB,
+ const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
const BlockFilterSet *BlockFilter = nullptr) {
BlockChain &Chain = *BlockToChain[MBB];
@@ -970,23 +1628,23 @@ void MachineBlockPlacement::fillWorkLists(
if (Chain.UnscheduledPredecessors != 0)
return;
- MBB = *Chain.begin();
- if (MBB->isEHPad())
- EHPadWorkList.push_back(MBB);
+ MachineBasicBlock *BB = *Chain.begin();
+ if (BB->isEHPad())
+ EHPadWorkList.push_back(BB);
else
- BlockWorkList.push_back(MBB);
+ BlockWorkList.push_back(BB);
}
void MachineBlockPlacement::buildChain(
- MachineBasicBlock *BB, BlockChain &Chain,
+ const MachineBasicBlock *HeadBB, BlockChain &Chain,
BlockFilterSet *BlockFilter) {
- assert(BB && "BB must not be null.\n");
- assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
+ assert(HeadBB && "BB must not be null.\n");
+ assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
- MachineBasicBlock *LoopHeaderBB = BB;
+ const MachineBasicBlock *LoopHeaderBB = HeadBB;
markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
- BB = *std::prev(Chain.end());
+ MachineBasicBlock *BB = *std::prev(Chain.end());
for (;;) {
assert(BB && "null block found at end of chain in loop.");
assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match in loop.");
@@ -995,7 +1653,11 @@ void MachineBlockPlacement::buildChain(
// Look for the best viable successor if there is one to place immediately
// after this block.
- MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+ auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
+ MachineBasicBlock* BestSucc = Result.BB;
+ bool ShouldTailDup = Result.ShouldTailDup;
+ if (TailDupPlacement)
+ ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
// If an immediate successor isn't available, look for the best viable
// block among those we've identified as not violating the loop's CFG at
@@ -1016,7 +1678,7 @@ void MachineBlockPlacement::buildChain(
// Placement may have changed tail duplication opportunities.
// Check for that now.
- if (TailDupPlacement && BestSucc) {
+ if (TailDupPlacement && BestSucc && ShouldTailDup) {
// If the chosen successor was duplicated into all its predecessors,
// don't bother laying it out, just go round the loop again with BB as
// the chain end.
@@ -1052,7 +1714,7 @@ void MachineBlockPlacement::buildChain(
/// unconditional jump (for the backedge) rotating it in front of the loop
/// header is always profitable.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// Placing the latch block before the header may introduce an extra branch
// that skips this block the first time the loop is executed, which we want
@@ -1116,7 +1778,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
/// block to layout at the top of the loop. Typically this is done to maximize
/// fallthrough opportunities.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
+MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
@@ -1235,7 +1897,7 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
/// branches. For example, if the loop has fallthrough into its header and out
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
- MachineBasicBlock *ExitingBB,
+ const MachineBasicBlock *ExitingBB,
const BlockFilterSet &LoopBlockSet) {
if (!ExitingBB)
return;
@@ -1285,7 +1947,8 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
/// Therefore, the cost for a given rotation is the sum of costs listed above.
/// We select the best rotation with the smallest cost.
void MachineBlockPlacement::rotateLoopWithProfile(
- BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
+ BlockChain &LoopChain, const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
auto HeaderBB = L.getHeader();
auto HeaderIter = find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
@@ -1422,7 +2085,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
/// When profile data is available, exclude cold blocks from the returned set;
/// otherwise, collect all blocks in the loop.
MachineBlockPlacement::BlockFilterSet
-MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
+MachineBlockPlacement::collectLoopBlockSet(const MachineLoop &L) {
BlockFilterSet LoopBlockSet;
// Filter cold blocks off from LoopBlockSet when profile data is available.
@@ -1459,10 +2122,10 @@ MachineBlockPlacement::collectLoopBlockSet(MachineLoop &L) {
/// as much as possible. We can then stitch the chains together in a way which
/// both preserves the topological structure and minimizes taken conditional
/// branches.
-void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
+void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// First recurse through any nested loops, building chains for those inner
// loops.
- for (MachineLoop *InnerLoop : L)
+ for (const MachineLoop *InnerLoop : L)
buildLoopChains(*InnerLoop);
assert(BlockWorkList.empty());
@@ -1499,7 +2162,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
assert(LoopChain.UnscheduledPredecessors == 0);
UpdatedPreds.insert(&LoopChain);
- for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
fillWorkLists(LoopBB, UpdatedPreds, &LoopBlockSet);
buildChain(LoopTop, LoopChain, &LoopBlockSet);
@@ -1533,7 +2196,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
if (!LoopBlockSet.empty()) {
BadLoop = true;
- for (MachineBasicBlock *LoopBB : LoopBlockSet)
+ for (const MachineBasicBlock *LoopBB : LoopBlockSet)
dbgs() << "Loop contains blocks never placed into a chain!\n"
<< " Loop header: " << getBlockName(*L.block_begin()) << "\n"
<< " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
@@ -1546,31 +2209,6 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
EHPadWorkList.clear();
}
-/// When OutlineOpitonalBranches is on, this method collects BBs that
-/// dominates all terminator blocks of the function \p F.
-void MachineBlockPlacement::collectMustExecuteBBs() {
- if (OutlineOptionalBranches) {
- // Find the nearest common dominator of all of F's terminators.
- MachineBasicBlock *Terminator = nullptr;
- for (MachineBasicBlock &MBB : *F) {
- if (MBB.succ_size() == 0) {
- if (Terminator == nullptr)
- Terminator = &MBB;
- else
- Terminator = MDT->findNearestCommonDominator(Terminator, &MBB);
- }
- }
-
- // MBBs dominating this common dominator are unavoidable.
- UnavoidableBlocks.clear();
- for (MachineBasicBlock &MBB : *F) {
- if (MDT->dominates(&MBB, Terminator)) {
- UnavoidableBlocks.insert(&MBB);
- }
- }
- }
-}
-
void MachineBlockPlacement::buildCFGChains() {
// Ensure that every BB in the function has an associated chain to simplify
// the assumptions of the remaining algorithm.
@@ -1605,9 +2243,6 @@ void MachineBlockPlacement::buildCFGChains() {
}
}
- // Turned on with OutlineOptionalBranches option
- collectMustExecuteBBs();
-
// Build any loop-based chains.
PreferredLoopExit = nullptr;
for (MachineLoop *L : *MLI)
@@ -1839,7 +2474,7 @@ void MachineBlockPlacement::alignBlocks() {
/// @return true if \p BB was removed.
bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
- MachineBasicBlock *LoopHeaderBB,
+ const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt) {
bool Removed, DuplicatedToLPred;
@@ -1901,21 +2536,16 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
/// \return - True if the block was duplicated into all preds and removed.
bool MachineBlockPlacement::maybeTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *LPred,
- const BlockChain &Chain, BlockFilterSet *BlockFilter,
+ BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt,
bool &DuplicatedToLPred) {
-
DuplicatedToLPred = false;
+ if (!shouldTailDuplicate(BB))
+ return false;
+
DEBUG(dbgs() << "Redoing tail duplication for Succ#"
<< BB->getNumber() << "\n");
- bool IsSimple = TailDup.isSimpleBB(BB);
- // Blocks with single successors don't create additional fallthrough
- // opportunities. Don't duplicate them. TODO: When conditional exits are
- // analyzable, allow them to be duplicated.
- if (!IsSimple && BB->succ_size() == 1)
- return false;
- if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
- return false;
+
// This has to be a callback because none of it can be done after
// BB is deleted.
bool Removed = false;
@@ -1967,6 +2597,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+ bool IsSimple = TailDup.isSimpleBB(BB);
TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
&DuplicatedPreds, &RemovalCallbackRef);
@@ -2006,21 +2637,24 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
- MDT = &getAnalysis<MachineDominatorTree>();
+ MPDT = nullptr;
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
PreferredLoopExit = nullptr;
+ assert(BlockToChain.empty());
+ assert(ComputedEdges.empty());
+
if (TailDupPlacement) {
- unsigned TailDupSize = TailDuplicatePlacementThreshold;
+ MPDT = &getAnalysis<MachinePostDominatorTree>();
+ unsigned TailDupSize = TailDupPlacementThreshold;
if (MF.getFunction()->optForSize())
TailDupSize = 1;
TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+ precomputeTriangleChains();
}
- assert(BlockToChain.empty());
-
buildCFGChains();
// Changing the layout can create new tail merging opportunities.
@@ -2032,7 +2666,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFoldPlacement;
// No tail merging opportunities if the block number is less than four.
if (MF.size() > 3 && EnableTailMerge) {
- unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
+ unsigned TailMergeSize = TailDupPlacementThreshold + 1;
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
@@ -2041,8 +2675,10 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
/*AfterBlockPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
- // Must redo the dominator tree if blocks were changed.
- MDT->runOnMachineFunction(MF);
+ ComputedEdges.clear();
+ // Must redo the post-dominator tree if blocks were changed.
+ if (MPDT)
+ MPDT->runOnMachineFunction(MF);
ChainAllocator.DestroyAll();
buildCFGChains();
}
@@ -2052,6 +2688,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
alignBlocks();
BlockToChain.clear();
+ ComputedEdges.clear();
ChainAllocator.DestroyAll();
if (AlignAllBlock)
@@ -2067,6 +2704,12 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
MBI->setAlignment(AlignAllNonFallThruBlocks);
}
}
+ if (ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ F->getFunction()->getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MBP." + MF.getName(), false);
+ }
+
// We always return true as we have no way to track whether the final order
// differs from the original order.
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 5beed5f5dd08..50e453e4067c 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// The machine combiner pass uses machine trace metrics to ensure the combined
-// instructions does not lengthen the critical path or the resource depth.
+// instructions do not lengthen the critical path or the resource depth.
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "machine-combiner"
@@ -135,7 +135,9 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
unsigned IDepth = 0;
- DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(TII); dbgs() << "\n";);
+ DEBUG(dbgs() << "NEW INSTR ";
+ InstrPtr->print(dbgs(), TII);
+ dbgs() << "\n";);
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
@@ -352,6 +354,19 @@ bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
return false;
}
+static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVector<MachineInstr *, 16> InsInstrs,
+ SmallVector<MachineInstr *, 16> DelInstrs,
+ MachineTraceMetrics *Traces) {
+ for (auto *InstrPtr : InsInstrs)
+ MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr);
+ for (auto *InstrPtr : DelInstrs)
+ InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+ ++NumInstCombined;
+ Traces->invalidate(MBB);
+ Traces->verifyAnalysis();
+}
+
/// Substitute a slow code sequence with a faster one by
/// evaluating instruction combining pattern.
/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction
@@ -406,7 +421,6 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
if (!MinInstr)
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
- MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
Traces->verifyAnalysis();
TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
InstrIdxForVirtReg);
@@ -426,23 +440,23 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// fewer instructions OR
// the new sequence neither lengthens the critical path nor increases
// resource pressure.
- if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
- (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
- DelInstrs, InstrIdxForVirtReg, P) &&
- preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
- for (auto *InstrPtr : InsInstrs)
- MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
- for (auto *InstrPtr : DelInstrs)
- InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-
- Changed = true;
- ++NumInstCombined;
-
- Traces->invalidate(MBB);
- Traces->verifyAnalysis();
+ if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
// Eagerly stop after the first pattern fires.
+ Changed = true;
break;
} else {
+ // Calculating the trace metrics may be expensive,
+ // so only do this when necessary.
+ MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg, P) &&
+ preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) {
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, Traces);
+ // Eagerly stop after the first pattern fires.
+ Changed = true;
+ break;
+ }
// Cleanup instructions of the alternative code sequence. There is no
// use for them.
MachineFunction *MF = MBB->getParent();
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 5de6dec29fb9..7312dc5e94bd 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -291,17 +291,9 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
if (MO.isDef()) {
Defs.push_back(Reg);
- } else {
+ continue;
+ } else if (MO.readsReg())
ReadRegister(Reg);
- }
- // Treat undef use like defs for copy propagation but not for
- // dead copy. We would need to do a liveness check to be sure the copy
- // is dead for undef uses.
- // The backends are allowed to do whatever they want with undef value
- // and we cannot be sure this register will not be rewritten to break
- // some false dependencies for the hardware for instance.
- if (MO.isUndef())
- Defs.push_back(Reg);
}
// The instruction has a register mask operand which means that it clobbers
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 303a6a9263be..e3a6c51c47ad 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -49,32 +49,29 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
CriticalEdgesToSplit.clear();
NewBBs.clear();
+ DT.reset(new DominatorTreeBase<MachineBasicBlock>(false));
DT->recalculate(F);
-
return false;
}
MachineDominatorTree::MachineDominatorTree()
: MachineFunctionPass(ID) {
initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- DT = new DominatorTreeBase<MachineBasicBlock>(false);
-}
-
-MachineDominatorTree::~MachineDominatorTree() {
- delete DT;
}
void MachineDominatorTree::releaseMemory() {
- DT->releaseMemory();
+ CriticalEdgesToSplit.clear();
+ DT.reset(nullptr);
}
void MachineDominatorTree::verifyAnalysis() const {
- if (VerifyMachineDomInfo)
+ if (DT && VerifyMachineDomInfo)
verifyDomTree();
}
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
- DT->print(OS);
+ if (DT)
+ DT->print(OS);
}
void MachineDominatorTree::applySplitCriticalEdges() const {
@@ -143,15 +140,18 @@ void MachineDominatorTree::applySplitCriticalEdges() const {
}
void MachineDominatorTree::verifyDomTree() const {
+ if (!DT)
+ return;
MachineFunction &F = *getRoot()->getParent();
- MachineDominatorTree OtherDT;
- OtherDT.DT->recalculate(F);
- if (compare(OtherDT)) {
+ DominatorTreeBase<MachineBasicBlock> OtherDT(false);
+ OtherDT.recalculate(F);
+ if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
+ DT->compare(OtherDT)) {
errs() << "MachineDominatorTree is not up to date!\nComputed:\n";
- print(errs(), nullptr);
+ DT->print(errs());
errs() << "\nActual:\n";
- OtherDT.print(errs(), nullptr);
+ OtherDT.print(errs());
abort();
}
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index c1d5ea96cd17..c9767a25e908 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -169,6 +169,7 @@ void MachineFunction::clear() {
InstructionRecycler.clear(Allocator);
OperandRecycler.clear(Allocator);
BasicBlockRecycler.clear(Allocator);
+ VariableDbgInfos.clear();
if (RegInfo) {
RegInfo->~MachineRegisterInfo();
Allocator.Deallocate(RegInfo);
@@ -859,7 +860,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
if (!isCalleeSavedInfoValid())
return BV;
- for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR;
+ ++CSR)
BV.set(*CSR);
// Saved CSRs are not pristine.
@@ -956,7 +959,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MachineFrameInfo::dump(const MachineFunction &MF) const {
+LLVM_DUMP_METHOD void MachineFrameInfo::dump(const MachineFunction &MF) const {
print(MF, dbgs());
}
#endif
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 2f2e3b3d8e9f..c0a8b95ed8a0 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -262,8 +262,21 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getBlockAddress() == Other.getBlockAddress() &&
getOffset() == Other.getOffset();
case MachineOperand::MO_RegisterMask:
- case MachineOperand::MO_RegisterLiveOut:
- return getRegMask() == Other.getRegMask();
+ case MachineOperand::MO_RegisterLiveOut: {
+ // Shallow compare of the two RegMasks
+ const uint32_t *RegMask = getRegMask();
+ const uint32_t *OtherRegMask = Other.getRegMask();
+ if (RegMask == OtherRegMask)
+ return true;
+
+ // Calculate the size of the RegMask
+ const MachineFunction *MF = getParent()->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+
+ // Deep compare of the two RegMasks
+ return std::equal(RegMask, RegMask + RegMaskSize, OtherRegMask);
+ }
case MachineOperand::MO_MCSymbol:
return getMCSymbol() == Other.getMCSymbol();
case MachineOperand::MO_CFIIndex:
@@ -274,6 +287,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getIntrinsicID() == Other.getIntrinsicID();
case MachineOperand::MO_Predicate:
return getPredicate() == Other.getPredicate();
+ case MachineOperand::MO_Placeholder:
+ return true;
}
llvm_unreachable("Invalid machine operand type");
}
@@ -322,6 +337,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
case MachineOperand::MO_Predicate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
+ case MachineOperand::MO_Placeholder:
+ return hash_combine();
}
llvm_unreachable("Invalid machine operand type");
}
@@ -403,6 +420,11 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool Unused;
APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
OS << "half " << APF.convertToFloat();
+ } else if (getFPImm()->getType()->isFP128Ty()) {
+ APFloat APF = getFPImm()->getValueAPF();
+ SmallString<16> Str;
+ getFPImm()->getValueAPF().toString(Str);
+ OS << "quad " << Str;
} else {
OS << getFPImm()->getValueAPF().convertToDouble();
}
@@ -491,7 +513,11 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
<< CmpInst::getPredicateName(Pred) << '>';
+ break;
}
+ case MachineOperand::MO_Placeholder:
+ OS << "<placeholder>";
+ break;
}
if (unsigned TF = getTargetFlags())
OS << "[TF=" << TF << ']';
@@ -1571,6 +1597,65 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return true;
}
+bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
+ bool UseTBAA) {
+ const MachineFunction *MF = getParent()->getParent();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+
+ // If neither instruction stores to memory, they can't alias in any
+ // meaningful way, even if they read from the same address.
+ if (!mayStore() && !Other.mayStore())
+ return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
+ return false;
+
+ if (!AA)
+ return true;
+
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!hasOneMemOperand() || !Other.hasOneMemOperand())
+ return true;
+
+ MachineMemOperand *MMOa = *memoperands_begin();
+ MachineMemOperand *MMOb = *Other.memoperands_begin();
+
+ if (!MMOa->getValue() || !MMOb->getValue())
+ return true;
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // FIXME: Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
+ assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
+
+ int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
+ int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
+ int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
+
+ AliasResult AAResult =
+ AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MMOb->getValue(), Overlapb,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+
+ return (AAResult != NoAlias);
+}
+
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
/// or volatile memory reference, or if the information describing the memory
/// reference is not available. Return false if it is known to have no ordered
@@ -1692,14 +1777,14 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
}
}
-LLVM_DUMP_METHOD void MachineInstr::dump(const TargetInstrInfo *TII) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineInstr::dump() const {
dbgs() << " ";
- print(dbgs(), false /* SkipOpers */, TII);
-#endif
+ print(dbgs());
}
+#endif
-void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers, bool SkipDebugLoc,
const TargetInstrInfo *TII) const {
const Module *M = nullptr;
if (const MachineBasicBlock *MBB = getParent())
@@ -1707,11 +1792,12 @@ void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
M = MF->getFunction()->getParent();
ModuleSlotTracker MST(M);
- print(OS, MST, SkipOpers, TII);
+ print(OS, MST, SkipOpers, SkipDebugLoc, TII);
}
void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
- bool SkipOpers, const TargetInstrInfo *TII) const {
+ bool SkipOpers, bool SkipDebugLoc,
+ const TargetInstrInfo *TII) const {
// We can be a bit tidier if we know the MachineFunction.
const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
@@ -1987,6 +2073,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
if (isIndirectDebugValue())
OS << " indirect";
+ } else if (SkipDebugLoc) {
+ return;
} else if (debugLoc && MF) {
if (!HaveSemi)
OS << ";";
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index fdeaf7b71161..a9aa1d954e70 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -87,6 +87,22 @@ MachineBasicBlock *MachineLoop::findLoopControlBlock() {
return nullptr;
}
+DebugLoc MachineLoop::getStartLoc() const {
+ // Try the pre-header first.
+ if (MachineBasicBlock *PHeadMBB = getLoopPreheader())
+ if (const BasicBlock *PHeadBB = PHeadMBB->getBasicBlock())
+ if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
+ return DL;
+
+ // If we have no pre-header or there are no instructions with debug
+ // info in it, try the header.
+ if (MachineBasicBlock *HeadMBB = getHeader())
+ if (const BasicBlock *HeadBB = HeadMBB->getBasicBlock())
+ return HeadBB->getTerminator()->getDebugLoc();
+
+ return DebugLoc();
+}
+
MachineBasicBlock *
MachineLoopInfo::findLoopPreheader(MachineLoop *L,
bool SpeculativePreheader) const {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 6618857477ed..2f0f4297ef5c 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -306,6 +306,10 @@ public:
MMI.deleteMachineFunctionFor(F);
return true;
}
+
+ StringRef getPassName() const override {
+ return "Free MachineFunction";
+ }
};
char FreeMachineFunction::ID;
} // end anonymous namespace
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 22d519e5d88f..4c81fd91cb82 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -23,6 +23,7 @@ using namespace llvm;
// Out of line virtual method.
void MachineModuleInfoMachO::anchor() {}
void MachineModuleInfoELF::anchor() {}
+void MachineModuleInfoWasm::anchor() {}
static int SortSymbolPair(const void *LHS, const void *RHS) {
typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
new file mode 100644
index 000000000000..6b6b5f2814a9
--- /dev/null
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -0,0 +1,100 @@
+///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
+///
+/// The LLVM Compiler Infrastructure
+///
+/// This file is distributed under the University of Illinois Open Source
+/// License. See LICENSE.TXT for details.
+///
+///===---------------------------------------------------------------------===//
+/// \file
+/// Optimization diagnostic interfaces for machine passes. It's packaged as an
+/// analysis pass so that by using this service passes become dependent on MBFI
+/// as well. MBFI is used to compute the "hotness" of the diagnostic message.
+///
+///===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+
+DiagnosticInfoMIROptimization::MachineArgument::MachineArgument(
+ StringRef MKey, const MachineInstr &MI)
+ : Argument() {
+ Key = MKey;
+
+ raw_string_ostream OS(Val);
+ MI.print(OS, /*SkipOpers=*/false, /*SkipDebugLoc=*/true);
+}
+
+Optional<uint64_t>
+MachineOptimizationRemarkEmitter::computeHotness(const MachineBasicBlock &MBB) {
+ if (!MBFI)
+ return None;
+
+ return MBFI->getBlockProfileCount(&MBB);
+}
+
+void MachineOptimizationRemarkEmitter::computeHotness(
+ DiagnosticInfoMIROptimization &Remark) {
+ const MachineBasicBlock *MBB = Remark.getBlock();
+ if (MBB)
+ Remark.setHotness(computeHotness(*MBB));
+}
+
+void MachineOptimizationRemarkEmitter::emit(
+ DiagnosticInfoOptimizationBase &OptDiagCommon) {
+ auto &OptDiag = cast<DiagnosticInfoMIROptimization>(OptDiagCommon);
+ computeHotness(OptDiag);
+
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ yaml::Output *Out = Ctx.getDiagnosticsOutputFile();
+ if (Out) {
+ auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiagCommon);
+ *Out << P;
+ }
+ // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
+ // from here to clang.
+ if (!OptDiag.isVerbose() || shouldEmitVerbose())
+ Ctx.diagnose(OptDiag);
+}
+
+MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass()
+ : MachineFunctionPass(ID) {
+ initializeMachineOptimizationRemarkEmitterPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
+ MachineFunction &MF) {
+ MachineBlockFrequencyInfo *MBFI;
+
+ if (MF.getFunction()->getContext().getDiagnosticHotnessRequested())
+ MBFI = &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ else
+ MBFI = nullptr;
+
+ ORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ return false;
+}
+
+void MachineOptimizationRemarkEmitterPass::getAnalysisUsage(
+ AnalysisUsage &AU) const {
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+char MachineOptimizationRemarkEmitterPass::ID = 0;
+static const char ore_name[] = "Machine Optimization Remark Emitter";
+#define ORE_NAME "machine-opt-remark-emitter"
+
+INITIALIZE_PASS_BEGIN(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ false, true)
+INITIALIZE_PASS_DEPENDENCY(LazyMachineBlockFrequencyInfoPass)
+INITIALIZE_PASS_END(MachineOptimizationRemarkEmitterPass, ORE_NAME, ore_name,
+ false, true)
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
new file mode 100644
index 000000000000..581a8ad81149
--- /dev/null
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -0,0 +1,1251 @@
+//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Replaces repeated sequences of instructions with function calls.
+///
+/// This works by placing every instruction from every basic block in a
+/// suffix tree, and repeatedly querying that tree for repeated sequences of
+/// instructions. If a sequence of instructions appears often, then it ought
+/// to be beneficial to pull out into a function.
+///
+/// This was originally presented at the 2016 LLVM Developers' Meeting in the
+/// talk "Reducing Code Size Using Outlining". For a high-level overview of
+/// how this pass works, the talk is available on YouTube at
+///
+/// https://www.youtube.com/watch?v=yorld-WSOeU
+///
+/// The slides for the talk are available at
+///
+/// http://www.llvm.org/devmtg/2016-11/Slides/Paquette-Outliner.pdf
+///
+/// The talk provides an overview of how the outliner finds candidates and
+/// ultimately outlines them. It describes how the main data structure for this
+/// pass, the suffix tree, is queried and purged for candidates. It also gives
+/// a simplified suffix tree construction algorithm for suffix trees based off
+/// of the algorithm actually used here, Ukkonen's algorithm.
+///
+/// For the original RFC for this pass, please see
+///
+/// http://lists.llvm.org/pipermail/llvm-dev/2016-August/104170.html
+///
+/// For more information on the suffix tree data structure, please see
+/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+///
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <functional>
+#include <map>
+#include <sstream>
+#include <tuple>
+#include <vector>
+
+#define DEBUG_TYPE "machine-outliner"
+
+using namespace llvm;
+
+STATISTIC(NumOutlined, "Number of candidates outlined");
+STATISTIC(FunctionsCreated, "Number of functions created");
+
+namespace {
+
+/// \brief An individual sequence of instructions to be replaced with a call to
+/// an outlined function.
+struct Candidate {
+
+ /// Set to false if the candidate overlapped with another candidate.
+ bool InCandidateList = true;
+
+ /// The start index of this \p Candidate.
+ size_t StartIdx;
+
+ /// The number of instructions in this \p Candidate.
+ size_t Len;
+
+ /// The index of this \p Candidate's \p OutlinedFunction in the list of
+ /// \p OutlinedFunctions.
+ size_t FunctionIdx;
+
+ /// \brief The number of instructions that would be saved by outlining every
+ /// candidate of this type.
+ ///
+ /// This is a fixed value which is not updated during the candidate pruning
+ /// process. It is only used for deciding which candidate to keep if two
+ /// candidates overlap. The true benefit is stored in the OutlinedFunction
+ /// for some given candidate.
+ unsigned Benefit = 0;
+
+ Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx)
+ : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {}
+
+ Candidate() {}
+
+ /// \brief Used to ensure that \p Candidates are outlined in an order that
+ /// preserves the start and end indices of other \p Candidates.
+ bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; }
+};
+
+/// \brief The information necessary to create an outlined function for some
+/// class of candidate.
+struct OutlinedFunction {
+
+ /// The actual outlined function created.
+ /// This is initialized after we go through and create the actual function.
+ MachineFunction *MF = nullptr;
+
+ /// A number assigned to this function which appears at the end of its name.
+ size_t Name;
+
+ /// The number of candidates for this OutlinedFunction.
+ size_t OccurrenceCount = 0;
+
+ /// \brief The sequence of integers corresponding to the instructions in this
+ /// function.
+ std::vector<unsigned> Sequence;
+
+ /// The number of instructions this function would save.
+ unsigned Benefit = 0;
+
+ /// \brief Set to true if candidates for this outlined function should be
+ /// replaced with tail calls to this OutlinedFunction.
+ bool IsTailCall = false;
+
+ OutlinedFunction(size_t Name, size_t OccurrenceCount,
+ const std::vector<unsigned> &Sequence,
+ unsigned Benefit, bool IsTailCall)
+ : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence),
+ Benefit(Benefit), IsTailCall(IsTailCall)
+ {}
+};
+
+/// Represents an undefined index in the suffix tree.
+const size_t EmptyIdx = -1;
+
+/// A node in a suffix tree which represents a substring or suffix.
+///
+/// Each node has either no children or at least two children, with the root
+/// being a exception in the empty tree.
+///
+/// Children are represented as a map between unsigned integers and nodes. If
+/// a node N has a child M on unsigned integer k, then the mapping represented
+/// by N is a proper prefix of the mapping represented by M. Note that this,
+/// although similar to a trie is somewhat different: each node stores a full
+/// substring of the full mapping rather than a single character state.
+///
+/// Each internal node contains a pointer to the internal node representing
+/// the same string, but with the first character chopped off. This is stored
+/// in \p Link. Each leaf node stores the start index of its respective
+/// suffix in \p SuffixIdx.
+struct SuffixTreeNode {
+
+ /// The children of this node.
+ ///
+ /// A child existing on an unsigned integer implies that from the mapping
+ /// represented by the current node, there is a way to reach another
+ /// mapping by tacking that character on the end of the current string.
+ DenseMap<unsigned, SuffixTreeNode *> Children;
+
+ /// A flag set to false if the node has been pruned from the tree.
+ bool IsInTree = true;
+
+ /// The start index of this node's substring in the main string.
+ size_t StartIdx = EmptyIdx;
+
+ /// The end index of this node's substring in the main string.
+ ///
+ /// Every leaf node must have its \p EndIdx incremented at the end of every
+ /// step in the construction algorithm. To avoid having to update O(N)
+ /// nodes individually at the end of every step, the end index is stored
+ /// as a pointer.
+ size_t *EndIdx = nullptr;
+
+ /// For leaves, the start index of the suffix represented by this node.
+ ///
+ /// For all other nodes, this is ignored.
+ size_t SuffixIdx = EmptyIdx;
+
+ /// \brief For internal nodes, a pointer to the internal node representing
+ /// the same sequence with the first character chopped off.
+ ///
+ /// This has two major purposes in the suffix tree. The first is as a
+ /// shortcut in Ukkonen's construction algorithm. One of the things that
+ /// Ukkonen's algorithm does to achieve linear-time construction is
+ /// keep track of which node the next insert should be at. This makes each
+ /// insert O(1), and there are a total of O(N) inserts. The suffix link
+ /// helps with inserting children of internal nodes.
+ ///
+ /// Say we add a child to an internal node with associated mapping S. The
+ /// next insertion must be at the node representing S - its first character.
+ /// This is given by the way that we iteratively build the tree in Ukkonen's
+ /// algorithm. The main idea is to look at the suffixes of each prefix in the
+ /// string, starting with the longest suffix of the prefix, and ending with
+ /// the shortest. Therefore, if we keep pointers between such nodes, we can
+ /// move to the next insertion point in O(1) time. If we don't, then we'd
+ /// have to query from the root, which takes O(N) time. This would make the
+ /// construction algorithm O(N^2) rather than O(N).
+ ///
+ /// The suffix link is also used during the tree pruning process to let us
+ /// quickly throw out a bunch of potential overlaps. Say we have a sequence
+ /// S we want to outline. Then each of its suffixes contribute to at least
+ /// one overlapping case. Therefore, we can follow the suffix links
+ /// starting at the node associated with S to the root and "delete" those
+ /// nodes, save for the root. For each candidate, this removes
+ /// O(|candidate|) overlaps from the search space. We don't actually
+ /// completely invalidate these nodes though; doing that is far too
+ /// aggressive. Consider the following pathological string:
+ ///
+ /// 1 2 3 1 2 3 2 3 2 3 2 3 2 3 2 3 2 3
+ ///
+ /// If we, for the sake of example, outlined 1 2 3, then we would throw
+ /// out all instances of 2 3. This isn't desirable. To get around this,
+ /// when we visit a link node, we decrement its occurrence count by the
+ /// number of sequences we outlined in the current step. In the pathological
+ /// example, the 2 3 node would have an occurrence count of 8, while the
+ /// 1 2 3 node would have an occurrence count of 2. Thus, the 2 3 node
+ /// would survive to the next round allowing us to outline the extra
+ /// instances of 2 3.
+ SuffixTreeNode *Link = nullptr;
+
+ /// The parent of this node. Every node except for the root has a parent.
+ SuffixTreeNode *Parent = nullptr;
+
+ /// The number of times this node's string appears in the tree.
+ ///
+ /// This is equal to the number of leaf children of the string. It represents
+ /// the number of suffixes that the node's string is a prefix of.
+ size_t OccurrenceCount = 0;
+
+ /// The length of the string formed by concatenating the edge labels from the
+ /// root to this node.
+ size_t ConcatLen = 0;
+
+ /// Returns true if this node is a leaf.
+ bool isLeaf() const { return SuffixIdx != EmptyIdx; }
+
+ /// Returns true if this node is the root of its owning \p SuffixTree.
+ bool isRoot() const { return StartIdx == EmptyIdx; }
+
+ /// Return the number of elements in the substring associated with this node.
+ size_t size() const {
+
+ // Is it the root? If so, it's the empty string so return 0.
+ if (isRoot())
+ return 0;
+
+ assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
+
+ // Size = the number of elements in the string.
+ // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
+ return *EndIdx - StartIdx + 1;
+ }
+
+ SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link,
+ SuffixTreeNode *Parent)
+ : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {}
+
+ SuffixTreeNode() {}
+};
+
+/// A data structure for fast substring queries.
+///
+/// Suffix trees represent the suffixes of their input strings in their leaves.
+/// A suffix tree is a type of compressed trie structure where each node
+/// represents an entire substring rather than a single character. Each leaf
+/// of the tree is a suffix.
+///
+/// A suffix tree can be seen as a type of state machine where each state is a
+/// substring of the full string. The tree is structured so that, for a string
+/// of length N, there are exactly N leaves in the tree. This structure allows
+/// us to quickly find repeated substrings of the input string.
+///
+/// In this implementation, a "string" is a vector of unsigned integers.
+/// These integers may result from hashing some data type. A suffix tree can
+/// contain 1 or many strings, which can then be queried as one large string.
+///
+/// The suffix tree is implemented using Ukkonen's algorithm for linear-time
+/// suffix tree construction. Ukkonen's algorithm is explained in more detail
+/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
+/// paper is available at
+///
+/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
+class SuffixTree {
+private:
+ /// Each element is an integer representing an instruction in the module.
+ ArrayRef<unsigned> Str;
+
+ /// Maintains each node in the tree.
+ SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
+
+ /// The root of the suffix tree.
+ ///
+ /// The root represents the empty string. It is maintained by the
+ /// \p NodeAllocator like every other node in the tree.
+ SuffixTreeNode *Root = nullptr;
+
+ /// Stores each leaf node in the tree.
+ ///
+ /// This is used for finding outlining candidates.
+ std::vector<SuffixTreeNode *> LeafVector;
+
+ /// Maintains the end indices of the internal nodes in the tree.
+ ///
+ /// Each internal node is guaranteed to never have its end index change
+ /// during the construction algorithm; however, leaves must be updated at
+ /// every step. Therefore, we need to store leaf end indices by reference
+ /// to avoid updating O(N) leaves at every step of construction. Thus,
+ /// every internal node must be allocated its own end index.
+ BumpPtrAllocator InternalEndIdxAllocator;
+
+ /// The end index of each leaf in the tree.
+ size_t LeafEndIdx = -1;
+
+ /// \brief Helper struct which keeps track of the next insertion point in
+ /// Ukkonen's algorithm.
+ struct ActiveState {
+ /// The next node to insert at.
+ SuffixTreeNode *Node;
+
+ /// The index of the first character in the substring currently being added.
+ size_t Idx = EmptyIdx;
+
+ /// The length of the substring we have to add at the current step.
+ size_t Len = 0;
+ };
+
+ /// \brief The point the next insertion will take place at in the
+ /// construction algorithm.
+ ActiveState Active;
+
+ /// Allocate a leaf node and add it to the tree.
+ ///
+ /// \param Parent The parent of this node.
+ /// \param StartIdx The start index of this node's associated string.
+ /// \param Edge The label on the edge leaving \p Parent to this node.
+ ///
+ /// \returns A pointer to the allocated leaf node.
+ SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx,
+ unsigned Edge) {
+
+ assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
+
+ SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
+ &LeafEndIdx,
+ nullptr,
+ &Parent);
+ Parent.Children[Edge] = N;
+
+ return N;
+ }
+
+ /// Allocate an internal node and add it to the tree.
+ ///
+ /// \param Parent The parent of this node. Only null when allocating the root.
+ /// \param StartIdx The start index of this node's associated string.
+ /// \param EndIdx The end index of this node's associated string.
+ /// \param Edge The label on the edge leaving \p Parent to this node.
+ ///
+ /// \returns A pointer to the allocated internal node.
+ SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx,
+ size_t EndIdx, unsigned Edge) {
+
+ assert(StartIdx <= EndIdx && "String can't start after it ends!");
+ assert(!(!Parent && StartIdx != EmptyIdx) &&
+ "Non-root internal nodes must have parents!");
+
+ size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx);
+ SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx,
+ E,
+ Root,
+ Parent);
+ if (Parent)
+ Parent->Children[Edge] = N;
+
+ return N;
+ }
+
+ /// \brief Set the suffix indices of the leaves to the start indices of their
+ /// respective suffixes. Also stores each leaf in \p LeafVector at its
+ /// respective suffix index.
+ ///
+ /// \param[in] CurrNode The node currently being visited.
+ /// \param CurrIdx The current index of the string being visited.
+ void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) {
+
+ bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
+
+ // Store the length of the concatenation of all strings from the root to
+ // this node.
+ if (!CurrNode.isRoot()) {
+ if (CurrNode.ConcatLen == 0)
+ CurrNode.ConcatLen = CurrNode.size();
+
+ if (CurrNode.Parent)
+ CurrNode.ConcatLen += CurrNode.Parent->ConcatLen;
+ }
+
+ // Traverse the tree depth-first.
+ for (auto &ChildPair : CurrNode.Children) {
+ assert(ChildPair.second && "Node had a null child!");
+ setSuffixIndices(*ChildPair.second,
+ CurrIdx + ChildPair.second->size());
+ }
+
+ // Is this node a leaf?
+ if (IsLeaf) {
+ // If yes, give it a suffix index and bump its parent's occurrence count.
+ CurrNode.SuffixIdx = Str.size() - CurrIdx;
+ assert(CurrNode.Parent && "CurrNode had no parent!");
+ CurrNode.Parent->OccurrenceCount++;
+
+ // Store the leaf in the leaf vector for pruning later.
+ LeafVector[CurrNode.SuffixIdx] = &CurrNode;
+ }
+ }
+
+ /// \brief Construct the suffix tree for the prefix of the input ending at
+ /// \p EndIdx.
+ ///
+ /// Used to construct the full suffix tree iteratively. At the end of each
+ /// step, the constructed suffix tree is either a valid suffix tree, or a
+ /// suffix tree with implicit suffixes. At the end of the final step, the
+ /// suffix tree is a valid tree.
+ ///
+ /// \param EndIdx The end index of the current prefix in the main string.
+ /// \param SuffixesToAdd The number of suffixes that must be added
+ /// to complete the suffix tree at the current phase.
+ ///
+ /// \returns The number of suffixes that have not been added at the end of
+ /// this step.
+ unsigned extend(size_t EndIdx, size_t SuffixesToAdd) {
+ SuffixTreeNode *NeedsLink = nullptr;
+
+ while (SuffixesToAdd > 0) {
+
+ // Are we waiting to add anything other than just the last character?
+ if (Active.Len == 0) {
+ // If not, then say the active index is the end index.
+ Active.Idx = EndIdx;
+ }
+
+ assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
+
+ // The first character in the current substring we're looking at.
+ unsigned FirstChar = Str[Active.Idx];
+
+ // Have we inserted anything starting with FirstChar at the current node?
+ if (Active.Node->Children.count(FirstChar) == 0) {
+ // If not, then we can just insert a leaf and move too the next step.
+ insertLeaf(*Active.Node, EndIdx, FirstChar);
+
+ // The active node is an internal node, and we visited it, so it must
+ // need a link if it doesn't have one.
+ if (NeedsLink) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+ } else {
+ // There's a match with FirstChar, so look for the point in the tree to
+ // insert a new node.
+ SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
+
+ size_t SubstringLen = NextNode->size();
+
+ // Is the current suffix we're trying to insert longer than the size of
+ // the child we want to move to?
+ if (Active.Len >= SubstringLen) {
+ // If yes, then consume the characters we've seen and move to the next
+ // node.
+ Active.Idx += SubstringLen;
+ Active.Len -= SubstringLen;
+ Active.Node = NextNode;
+ continue;
+ }
+
+ // Otherwise, the suffix we're trying to insert must be contained in the
+ // next node we want to move to.
+ unsigned LastChar = Str[EndIdx];
+
+ // Is the string we're trying to insert a substring of the next node?
+ if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
+ // If yes, then we're done for this step. Remember our insertion point
+ // and move to the next end index. At this point, we have an implicit
+ // suffix tree.
+ if (NeedsLink && !Active.Node->isRoot()) {
+ NeedsLink->Link = Active.Node;
+ NeedsLink = nullptr;
+ }
+
+ Active.Len++;
+ break;
+ }
+
+ // The string we're trying to insert isn't a substring of the next node,
+ // but matches up to a point. Split the node.
+ //
+ // For example, say we ended our search at a node n and we're trying to
+ // insert ABD. Then we'll create a new node s for AB, reduce n to just
+ // representing C, and insert a new leaf node l to represent d. This
+ // allows us to ensure that if n was a leaf, it remains a leaf.
+ //
+ // | ABC ---split---> | AB
+ // n s
+ // C / \ D
+ // n l
+
+ // The node s from the diagram
+ SuffixTreeNode *SplitNode =
+ insertInternalNode(Active.Node,
+ NextNode->StartIdx,
+ NextNode->StartIdx + Active.Len - 1,
+ FirstChar);
+
+ // Insert the new node representing the new substring into the tree as
+ // a child of the split node. This is the node l from the diagram.
+ insertLeaf(*SplitNode, EndIdx, LastChar);
+
+ // Make the old node a child of the split node and update its start
+ // index. This is the node n from the diagram.
+ NextNode->StartIdx += Active.Len;
+ NextNode->Parent = SplitNode;
+ SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
+
+ // SplitNode is an internal node, update the suffix link.
+ if (NeedsLink)
+ NeedsLink->Link = SplitNode;
+
+ NeedsLink = SplitNode;
+ }
+
+ // We've added something new to the tree, so there's one less suffix to
+ // add.
+ SuffixesToAdd--;
+
+ if (Active.Node->isRoot()) {
+ if (Active.Len > 0) {
+ Active.Len--;
+ Active.Idx = EndIdx - SuffixesToAdd + 1;
+ }
+ } else {
+ // Start the next phase at the next smallest suffix.
+ Active.Node = Active.Node->Link;
+ }
+ }
+
+ return SuffixesToAdd;
+ }
+
+public:
+
+ /// Find all repeated substrings that satisfy \p BenefitFn.
+ ///
+ /// If a substring appears at least twice, then it must be represented by
+ /// an internal node which appears in at least two suffixes. Each suffix is
+ /// represented by a leaf node. To do this, we visit each internal node in
+ /// the tree, using the leaf children of each internal node. If an internal
+ /// node represents a beneficial substring, then we use each of its leaf
+ /// children to find the locations of its substring.
+ ///
+ /// \param[out] CandidateList Filled with candidates representing each
+ /// beneficial substring.
+ /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each
+ /// type of candidate.
+ /// \param BenefitFn The function to satisfy.
+ ///
+ /// \returns The length of the longest candidate found.
+ size_t findCandidates(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ const std::function<unsigned(SuffixTreeNode &, size_t, unsigned)>
+ &BenefitFn) {
+
+ CandidateList.clear();
+ FunctionList.clear();
+ size_t FnIdx = 0;
+ size_t MaxLen = 0;
+
+ for (SuffixTreeNode* Leaf : LeafVector) {
+ assert(Leaf && "Leaves in LeafVector cannot be null!");
+ if (!Leaf->IsInTree)
+ continue;
+
+ assert(Leaf->Parent && "All leaves must have parents!");
+ SuffixTreeNode &Parent = *(Leaf->Parent);
+
+ // If it doesn't appear enough, or we already outlined from it, skip it.
+ if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
+ continue;
+
+ size_t StringLen = Leaf->ConcatLen - Leaf->size();
+
+ // How many instructions would outlining this string save?
+ unsigned Benefit = BenefitFn(Parent,
+ StringLen, Str[Leaf->SuffixIdx + StringLen - 1]);
+
+ // If it's not beneficial, skip it.
+ if (Benefit < 1)
+ continue;
+
+ if (StringLen > MaxLen)
+ MaxLen = StringLen;
+
+ unsigned OccurrenceCount = 0;
+ for (auto &ChildPair : Parent.Children) {
+ SuffixTreeNode *M = ChildPair.second;
+
+ // Is it a leaf? If so, we have an occurrence of this candidate.
+ if (M && M->IsInTree && M->isLeaf()) {
+ OccurrenceCount++;
+ CandidateList.emplace_back(M->SuffixIdx, StringLen, FnIdx);
+ CandidateList.back().Benefit = Benefit;
+ M->IsInTree = false;
+ }
+ }
+
+ // Save the function for the new candidate sequence.
+ std::vector<unsigned> CandidateSequence;
+ for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++)
+ CandidateSequence.push_back(Str[i]);
+
+ FunctionList.emplace_back(FnIdx, OccurrenceCount, CandidateSequence,
+ Benefit, false);
+
+ // Move to the next function.
+ FnIdx++;
+ Parent.IsInTree = false;
+ }
+
+ return MaxLen;
+ }
+
+ /// Construct a suffix tree from a sequence of unsigned integers.
+ ///
+ /// \param Str The string to construct the suffix tree for.
+ SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
+ Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
+ Root->IsInTree = true;
+ Active.Node = Root;
+ LeafVector = std::vector<SuffixTreeNode*>(Str.size());
+
+ // Keep track of the number of suffixes we have to add of the current
+ // prefix.
+ size_t SuffixesToAdd = 0;
+ Active.Node = Root;
+
+ // Construct the suffix tree iteratively on each prefix of the string.
+ // PfxEndIdx is the end index of the current prefix.
+ // End is one past the last element in the string.
+ for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
+ SuffixesToAdd++;
+ LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
+ SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
+ }
+
+ // Set the suffix indices of each leaf.
+ assert(Root && "Root node can't be nullptr!");
+ setSuffixIndices(*Root, 0);
+ }
+};
+
+/// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings.
+struct InstructionMapper {
+
+ /// \brief The next available integer to assign to a \p MachineInstr that
+ /// cannot be outlined.
+ ///
+ /// Set to -3 for compatability with \p DenseMapInfo<unsigned>.
+ unsigned IllegalInstrNumber = -3;
+
+ /// \brief The next available integer to assign to a \p MachineInstr that can
+ /// be outlined.
+ unsigned LegalInstrNumber = 0;
+
+ /// Correspondence from \p MachineInstrs to unsigned integers.
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>
+ InstructionIntegerMap;
+
+ /// Corresponcence from unsigned integers to \p MachineInstrs.
+ /// Inverse of \p InstructionIntegerMap.
+ DenseMap<unsigned, MachineInstr *> IntegerInstructionMap;
+
+ /// The vector of unsigned integers that the module is mapped to.
+ std::vector<unsigned> UnsignedVec;
+
+ /// \brief Stores the location of the instruction associated with the integer
+ /// at index i in \p UnsignedVec for each index i.
+ std::vector<MachineBasicBlock::iterator> InstrList;
+
+ /// \brief Maps \p *It to a legal integer.
+ ///
+ /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap,
+ /// \p IntegerInstructionMap, and \p LegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) {
+
+ // Get the integer for this instruction or give it the current
+ // LegalInstrNumber.
+ InstrList.push_back(It);
+ MachineInstr &MI = *It;
+ bool WasInserted;
+ DenseMap<MachineInstr *, unsigned, MachineInstrExpressionTrait>::iterator
+ ResultIt;
+ std::tie(ResultIt, WasInserted) =
+ InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber));
+ unsigned MINumber = ResultIt->second;
+
+ // There was an insertion.
+ if (WasInserted) {
+ LegalInstrNumber++;
+ IntegerInstructionMap.insert(std::make_pair(MINumber, &MI));
+ }
+
+ UnsignedVec.push_back(MINumber);
+
+ // Make sure we don't overflow or use any integers reserved by the DenseMap.
+ if (LegalInstrNumber >= IllegalInstrNumber)
+ report_fatal_error("Instruction mapping overflow!");
+
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey()
+ && "Tried to assign DenseMap tombstone or empty key to instruction.");
+ assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey()
+ && "Tried to assign DenseMap tombstone or empty key to instruction.");
+
+ return MINumber;
+ }
+
+ /// Maps \p *It to an illegal integer.
+ ///
+ /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber.
+ ///
+ /// \returns The integer that \p *It was mapped to.
+ unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) {
+ unsigned MINumber = IllegalInstrNumber;
+
+ InstrList.push_back(It);
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+
+ assert(LegalInstrNumber < IllegalInstrNumber &&
+ "Instruction mapping overflow!");
+
+ assert(IllegalInstrNumber !=
+ DenseMapInfo<unsigned>::getEmptyKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ assert(IllegalInstrNumber !=
+ DenseMapInfo<unsigned>::getTombstoneKey() &&
+ "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
+
+ return MINumber;
+ }
+
+ /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds
+ /// and appends it to \p UnsignedVec and \p InstrList.
+ ///
+ /// Two instructions are assigned the same integer if they are identical.
+ /// If an instruction is deemed unsafe to outline, then it will be assigned an
+ /// unique integer. The resulting mapping is placed into a suffix tree and
+ /// queried for candidates.
+ ///
+ /// \param MBB The \p MachineBasicBlock to be translated into integers.
+ /// \param TRI \p TargetRegisterInfo for the module.
+ /// \param TII \p TargetInstrInfo for the module.
+ void convertToUnsignedVec(MachineBasicBlock &MBB,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII) {
+ for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et;
+ It++) {
+
+ // Keep track of where this instruction is in the module.
+ switch(TII.getOutliningType(*It)) {
+ case TargetInstrInfo::MachineOutlinerInstrType::Illegal:
+ mapToIllegalUnsigned(It);
+ break;
+
+ case TargetInstrInfo::MachineOutlinerInstrType::Legal:
+ mapToLegalUnsigned(It);
+ break;
+
+ case TargetInstrInfo::MachineOutlinerInstrType::Invisible:
+ break;
+ }
+ }
+
+ // After we're done every insertion, uniquely terminate this part of the
+ // "string". This makes sure we won't match across basic block or function
+ // boundaries since the "end" is encoded uniquely and thus appears in no
+ // repeated substring.
+ InstrList.push_back(MBB.end());
+ UnsignedVec.push_back(IllegalInstrNumber);
+ IllegalInstrNumber--;
+ }
+
+ InstructionMapper() {
+ // Make sure that the implementation of DenseMapInfo<unsigned> hasn't
+ // changed.
+ assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
+ "DenseMapInfo<unsigned>'s empty key isn't -1!");
+ assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
+ "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+ }
+};
+
+/// \brief An interprocedural pass which finds repeated sequences of
+/// instructions and replaces them with calls to functions.
+///
+/// Each instruction is mapped to an unsigned integer and placed in a string.
+/// The resulting mapping is then placed in a \p SuffixTree. The \p SuffixTree
+/// is then repeatedly queried for repeated sequences of instructions. Each
+/// non-overlapping repeated sequence is then placed in its own
+/// \p MachineFunction and each instance is then replaced with a call to that
+/// function.
+struct MachineOutliner : public ModulePass {
+
+ static char ID;
+
+ StringRef getPassName() const override { return "Machine Outliner"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfo>();
+ AU.addPreserved<MachineModuleInfo>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+
+ MachineOutliner() : ModulePass(ID) {
+ initializeMachineOutlinerPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// \brief Replace the sequences of instructions represented by the
+ /// \p Candidates in \p CandidateList with calls to \p MachineFunctions
+ /// described in \p FunctionList.
+ ///
+ /// \param M The module we are outlining from.
+ /// \param CandidateList A list of candidates to be outlined.
+ /// \param FunctionList A list of functions to be inserted into the module.
+ /// \param Mapper Contains the instruction mappings for the module.
+ bool outline(Module &M, const ArrayRef<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper);
+
+ /// Creates a function for \p OF and inserts it into the module.
+ MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF,
+ InstructionMapper &Mapper);
+
+ /// Find potential outlining candidates and store them in \p CandidateList.
+ ///
+ /// For each type of potential candidate, also build an \p OutlinedFunction
+ /// struct containing the information to build the function for that
+ /// candidate.
+ ///
+ /// \param[out] CandidateList Filled with outlining candidates for the module.
+ /// \param[out] FunctionList Filled with functions corresponding to each type
+ /// of \p Candidate.
+ /// \param ST The suffix tree for the module.
+ /// \param TII TargetInstrInfo for the module.
+ ///
+ /// \returns The length of the longest candidate found. 0 if there are none.
+ unsigned buildCandidateList(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ SuffixTree &ST,
+ InstructionMapper &Mapper,
+ const TargetInstrInfo &TII);
+
+ /// \brief Remove any overlapping candidates that weren't handled by the
+ /// suffix tree's pruning method.
+ ///
+ /// Pruning from the suffix tree doesn't necessarily remove all overlaps.
+ /// If a short candidate is chosen for outlining, then a longer candidate
+ /// which has that short candidate as a suffix is chosen, the tree's pruning
+ /// method will not find it. Thus, we need to prune before outlining as well.
+ ///
+ /// \param[in,out] CandidateList A list of outlining candidates.
+ /// \param[in,out] FunctionList A list of functions to be outlined.
+ /// \param MaxCandidateLen The length of the longest candidate.
+ /// \param TII TargetInstrInfo for the module.
+ void pruneOverlaps(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ unsigned MaxCandidateLen,
+ const TargetInstrInfo &TII);
+
+ /// Construct a suffix tree on the instructions in \p M and outline repeated
+ /// strings from that tree.
+ bool runOnModule(Module &M) override;
+};
+
+} // Anonymous namespace.
+
+char MachineOutliner::ID = 0;
+
+namespace llvm {
+ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); }
+}
+
+INITIALIZE_PASS(MachineOutliner, "machine-outliner",
+ "Machine Function Outliner", false, false)
+
+void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ unsigned MaxCandidateLen,
+ const TargetInstrInfo &TII) {
+ // TODO: Experiment with interval trees or other interval-checking structures
+ // to lower the time complexity of this function.
+ // TODO: Can we do better than the simple greedy choice?
+ // Check for overlaps in the range.
+ // This is O(MaxCandidateLen * CandidateList.size()).
+ for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et;
+ It++) {
+ Candidate &C1 = *It;
+ OutlinedFunction &F1 = FunctionList[C1.FunctionIdx];
+
+ // If we removed this candidate, skip it.
+ if (!C1.InCandidateList)
+ continue;
+
+ // Is it still worth it to outline C1?
+ if (F1.Benefit < 1 || F1.OccurrenceCount < 2) {
+ assert(F1.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F1.OccurrenceCount--;
+ C1.InCandidateList = false;
+ continue;
+ }
+
+ // The minimum start index of any candidate that could overlap with this
+ // one.
+ unsigned FarthestPossibleIdx = 0;
+
+ // Either the index is 0, or it's at most MaxCandidateLen indices away.
+ if (C1.StartIdx > MaxCandidateLen)
+ FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen;
+
+ // Compare against the candidates in the list that start at at most
+ // FarthestPossibleIdx indices away from C1. There are at most
+ // MaxCandidateLen of these.
+ for (auto Sit = It + 1; Sit != Et; Sit++) {
+ Candidate &C2 = *Sit;
+ OutlinedFunction &F2 = FunctionList[C2.FunctionIdx];
+
+ // Is this candidate too far away to overlap?
+ if (C2.StartIdx < FarthestPossibleIdx)
+ break;
+
+ // Did we already remove this candidate in a previous step?
+ if (!C2.InCandidateList)
+ continue;
+
+ // Is the function beneficial to outline?
+ if (F2.OccurrenceCount < 2 || F2.Benefit < 1) {
+ // If not, remove this candidate and move to the next one.
+ assert(F2.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F2.OccurrenceCount--;
+ C2.InCandidateList = false;
+ continue;
+ }
+
+ size_t C2End = C2.StartIdx + C2.Len - 1;
+
+ // Do C1 and C2 overlap?
+ //
+ // Not overlapping:
+ // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices
+ //
+ // We sorted our candidate list so C2Start <= C1Start. We know that
+ // C2End > C2Start since each candidate has length >= 2. Therefore, all we
+ // have to check is C2End < C2Start to see if we overlap.
+ if (C2End < C1.StartIdx)
+ continue;
+
+ // C1 and C2 overlap.
+ // We need to choose the better of the two.
+ //
+ // Approximate this by picking the one which would have saved us the
+ // most instructions before any pruning.
+ if (C1.Benefit >= C2.Benefit) {
+
+ // C1 is better, so remove C2 and update C2's OutlinedFunction to
+ // reflect the removal.
+ assert(F2.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F2.OccurrenceCount--;
+ F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(),
+ F2.OccurrenceCount,
+ F2.IsTailCall
+ );
+
+ C2.InCandidateList = false;
+
+ DEBUG (
+ dbgs() << "- Removed C2. \n";
+ dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount << "\n";
+ dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";
+ );
+
+ } else {
+ // C2 is better, so remove C1 and update C1's OutlinedFunction to
+ // reflect the removal.
+ assert(F1.OccurrenceCount > 0 &&
+ "Can't remove OutlinedFunction with no occurrences!");
+ F1.OccurrenceCount--;
+ F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(),
+ F1.OccurrenceCount,
+ F1.IsTailCall
+ );
+ C1.InCandidateList = false;
+
+ DEBUG (
+ dbgs() << "- Removed C1. \n";
+ dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount << "\n";
+ dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";
+ );
+
+ // C1 is out, so we don't have to compare it against anyone else.
+ break;
+ }
+ }
+ }
+}
+
+unsigned
+MachineOutliner::buildCandidateList(std::vector<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ SuffixTree &ST,
+ InstructionMapper &Mapper,
+ const TargetInstrInfo &TII) {
+
+ std::vector<unsigned> CandidateSequence; // Current outlining candidate.
+ size_t MaxCandidateLen = 0; // Length of the longest candidate.
+
+ // Function for maximizing query in the suffix tree.
+ // This allows us to define more fine-grained types of things to outline in
+ // the target without putting target-specific info in the suffix tree.
+ auto BenefitFn = [&TII, &Mapper](const SuffixTreeNode &Curr,
+ size_t StringLen, unsigned EndVal) {
+
+ // The root represents the empty string.
+ if (Curr.isRoot())
+ return 0u;
+
+ // Is this long enough to outline?
+ // TODO: Let the target decide how "long" a string is in terms of the sizes
+ // of the instructions in the string. For example, if a call instruction
+ // is smaller than a one instruction string, we should outline that string.
+ if (StringLen < 2)
+ return 0u;
+
+ size_t Occurrences = Curr.OccurrenceCount;
+
+ // Anything we want to outline has to appear at least twice.
+ if (Occurrences < 2)
+ return 0u;
+
+ // Check if the last instruction in the sequence is a return.
+ MachineInstr *LastInstr =
+ Mapper.IntegerInstructionMap[EndVal];
+ assert(LastInstr && "Last instruction in sequence was unmapped!");
+
+ // The only way a terminator could be mapped as legal is if it was safe to
+ // tail call.
+ bool IsTailCall = LastInstr->isTerminator();
+ return TII.getOutliningBenefit(StringLen, Occurrences, IsTailCall);
+ };
+
+ MaxCandidateLen = ST.findCandidates(CandidateList, FunctionList, BenefitFn);
+
+ for (auto &OF : FunctionList)
+ OF.IsTailCall = Mapper.
+ IntegerInstructionMap[OF.Sequence.back()]->isTerminator();
+
+ // Sort the candidates in decending order. This will simplify the outlining
+ // process when we have to remove the candidates from the mapping by
+ // allowing us to cut them out without keeping track of an offset.
+ std::stable_sort(CandidateList.begin(), CandidateList.end());
+
+ return MaxCandidateLen;
+}
+
+MachineFunction *
+MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF,
+ InstructionMapper &Mapper) {
+
+ // Create the function name. This should be unique. For now, just hash the
+ // module name and include it in the function name plus the number of this
+ // function.
+ std::ostringstream NameStream;
+ NameStream << "OUTLINED_FUNCTION" << "_" << OF.Name;
+
+ // Create the function using an IR-level function.
+ LLVMContext &C = M.getContext();
+ Function *F = dyn_cast<Function>(
+ M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
+ assert(F && "Function was null!");
+
+ // NOTE: If this is linkonceodr, then we can take advantage of linker deduping
+ // which gives us better results when we outline from linkonceodr functions.
+ F->setLinkage(GlobalValue::PrivateLinkage);
+ F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+ IRBuilder<> Builder(EntryBB);
+ Builder.CreateRetVoid();
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineFunction &MF = MMI.getMachineFunction(*F);
+ MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Insert the new function into the module.
+ MF.insert(MF.begin(), &MBB);
+
+ TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall);
+
+ // Copy over the instructions for the function using the integer mappings in
+ // its sequence.
+ for (unsigned Str : OF.Sequence) {
+ MachineInstr *NewMI =
+ MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second);
+ NewMI->dropMemRefs();
+
+ // Don't keep debug information for outlined instructions.
+ // FIXME: This means outlined functions are currently undebuggable.
+ NewMI->setDebugLoc(DebugLoc());
+ MBB.insert(MBB.end(), NewMI);
+ }
+
+ TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall);
+
+ return &MF;
+}
+
+bool MachineOutliner::outline(Module &M,
+ const ArrayRef<Candidate> &CandidateList,
+ std::vector<OutlinedFunction> &FunctionList,
+ InstructionMapper &Mapper) {
+
+ bool OutlinedSomething = false;
+
+ // Replace the candidates with calls to their respective outlined functions.
+ for (const Candidate &C : CandidateList) {
+
+ // Was the candidate removed during pruneOverlaps?
+ if (!C.InCandidateList)
+ continue;
+
+ // If not, then look at its OutlinedFunction.
+ OutlinedFunction &OF = FunctionList[C.FunctionIdx];
+
+ // Was its OutlinedFunction made unbeneficial during pruneOverlaps?
+ if (OF.OccurrenceCount < 2 || OF.Benefit < 1)
+ continue;
+
+ // If not, then outline it.
+ assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
+ MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent();
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx];
+ unsigned EndIdx = C.StartIdx + C.Len - 1;
+
+ assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!");
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ assert(EndIt != MBB->end() && "EndIt out of bounds!");
+
+ EndIt++; // Erase needs one past the end index.
+
+ // Does this candidate have a function yet?
+ if (!OF.MF) {
+ OF.MF = createOutlinedFunction(M, OF, Mapper);
+ FunctionsCreated++;
+ }
+
+ MachineFunction *MF = OF.MF;
+ const TargetSubtargetInfo &STI = MF->getSubtarget();
+ const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+ // Insert a call to the new function and erase the old sequence.
+ TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall);
+ StartIt = Mapper.InstrList[C.StartIdx];
+ MBB->erase(StartIt, EndIt);
+
+ OutlinedSomething = true;
+
+ // Statistics.
+ NumOutlined++;
+ }
+
+ DEBUG (
+ dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";
+ );
+
+ return OutlinedSomething;
+}
+
+bool MachineOutliner::runOnModule(Module &M) {
+
+ // Is there anything in the module at all?
+ if (M.empty())
+ return false;
+
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ const TargetSubtargetInfo &STI = MMI.getMachineFunction(*M.begin())
+ .getSubtarget();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+
+ InstructionMapper Mapper;
+
+ // Build instruction mappings for each function in the module.
+ for (Function &F : M) {
+ MachineFunction &MF = MMI.getMachineFunction(F);
+
+ // Is the function empty? Safe to outline from?
+ if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF))
+ continue;
+
+ // If it is, look at each MachineBasicBlock in the function.
+ for (MachineBasicBlock &MBB : MF) {
+
+ // Is there anything in MBB?
+ if (MBB.empty())
+ continue;
+
+ // If yes, map it.
+ Mapper.convertToUnsignedVec(MBB, *TRI, *TII);
+ }
+ }
+
+ // Construct a suffix tree, use it to find candidates, and then outline them.
+ SuffixTree ST(Mapper.UnsignedVec);
+ std::vector<Candidate> CandidateList;
+ std::vector<OutlinedFunction> FunctionList;
+
+ // Find all of the outlining candidates.
+ unsigned MaxCandidateLen =
+ buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
+
+ // Remove candidates that overlap with other candidates.
+ pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII);
+
+ // Outline each of the candidates and return true if something was outlined.
+ return outline(M, CandidateList, FunctionList, Mapper);
+}
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 43a18099d39a..d06c38cf4ed8 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -552,7 +552,9 @@ public:
os << "\n";
}
- void dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
+#endif
};
/// This class repesents the scheduled code. The main data structure is a
@@ -593,7 +595,7 @@ private:
/// Virtual register information.
MachineRegisterInfo &MRI;
- DFAPacketizer *Resources;
+ std::unique_ptr<DFAPacketizer> Resources;
public:
SMSchedule(MachineFunction *mf)
@@ -604,13 +606,6 @@ public:
InitiationInterval = 0;
}
- ~SMSchedule() {
- ScheduledInstrs.clear();
- InstrToCycle.clear();
- RegToStageDiff.clear();
- delete Resources;
- }
-
void reset() {
ScheduledInstrs.clear();
InstrToCycle.clear();
@@ -738,7 +733,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
return false;
if (mf.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+ AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
!EnableSWPOptSize.getPosition())
return false;
@@ -960,7 +955,7 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
if (Phi.getOperand(i + 1).getMBB() != Loop)
InitVal = Phi.getOperand(i).getReg();
- else if (Phi.getOperand(i + 1).getMBB() == Loop)
+ else
LoopVal = Phi.getOperand(i).getReg();
assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
@@ -2514,7 +2509,7 @@ void SwingSchedulerDAG::generateExistingPhis(
MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
bool IsLast) {
- // Compute the stage number for the inital value of the Phi, which
+ // Compute the stage number for the initial value of the Phi, which
// comes from the prolog. The prolog to use depends on to which kernel/
// epilog that we're adding the Phi.
unsigned PrologStage = 0;
@@ -3480,7 +3475,7 @@ bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
// increment value to determine if the accesses may be loop carried.
if (OffsetS >= OffsetD)
return OffsetS + AccessSizeS > DeltaS;
- else if (OffsetS < OffsetD)
+ else
return OffsetD + AccessSizeD > DeltaD;
return true;
@@ -3980,5 +3975,7 @@ void SMSchedule::print(raw_ostream &os) const {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Utility function used for debugging to print the schedule.
-void SMSchedule::dump() const { print(dbgs()); }
+LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
+#endif
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index fc32183c7f63..71ad4e6aa7f5 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,10 +1,9 @@
-
#include "llvm/CodeGen/MachineRegionInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
-#define DEBUG_TYPE "region"
+#define DEBUG_TYPE "machine-region-info"
using namespace llvm;
@@ -86,6 +85,9 @@ bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
auto DF = &getAnalysis<MachineDominanceFrontier>();
RI.recalculate(F, DT, PDT, DF);
+
+ DEBUG(RI.dump());
+
return false;
}
@@ -103,9 +105,10 @@ void MachineRegionInfoPass::verifyAnalysis() const {
void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<DominanceFrontierWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const {
@@ -119,14 +122,15 @@ LLVM_DUMP_METHOD void MachineRegionInfoPass::dump() const {
#endif
char MachineRegionInfoPass::ID = 0;
+char &MachineRegionInfoPassID = MachineRegionInfoPass::ID;
-INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions",
- "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
-INITIALIZE_PASS_END(MachineRegionInfoPass, "regions",
- "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE,
+ "Detect single entry single exit regions", true, true)
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 242cb0b80953..128910f8eb2a 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,13 +11,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
using namespace llvm;
@@ -28,9 +42,9 @@ static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
- : MF(MF), TheDelegate(nullptr),
- TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
- EnableSubRegLiveness) {
+ : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+ EnableSubRegLiveness),
+ IsUpdatedCSRsInitialized(false) {
unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
@@ -444,8 +458,8 @@ LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
return TRC.getLaneMask();
}
-#ifndef NDEBUG
-void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineRegisterInfo::dumpUses(unsigned Reg) const {
for (MachineInstr &I : use_instructions(Reg))
I.dump();
}
@@ -543,3 +557,47 @@ bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
}
return false;
}
+
+void MachineRegisterInfo::disableCalleeSavedRegister(unsigned Reg) {
+
+ const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+ assert(Reg && (Reg < TRI->getNumRegs()) &&
+ "Trying to disable an invalid register");
+
+ if (!IsUpdatedCSRsInitialized) {
+ const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ UpdatedCSRs.push_back(*I);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+
+ IsUpdatedCSRsInitialized = true;
+ }
+
+ // Remove the register (and its aliases from the list).
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
+ UpdatedCSRs.end());
+}
+
+const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {
+ if (IsUpdatedCSRsInitialized)
+ return UpdatedCSRs.data();
+
+ return getTargetRegisterInfo()->getCalleeSavedRegs(MF);
+}
+
+void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
+ if (IsUpdatedCSRsInitialized)
+ UpdatedCSRs.clear();
+
+ for (MCPhysReg Reg : CSRs)
+ UpdatedCSRs.push_back(Reg);
+
+ // Zero value represents the end of the register list
+ // (no more registers should be pushed).
+ UpdatedCSRs.push_back(0);
+ IsUpdatedCSRsInitialized = true;
+}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index e06bc517fa91..41e161f71e53 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -12,30 +12,67 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/ScheduleDFS.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
using namespace llvm;
#define DEBUG_TYPE "misched"
namespace llvm {
+
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
@@ -43,7 +80,8 @@ cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::opt<bool>
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
cl::desc("Print critical path length to stdout"));
-}
+
+} // end namespace llvm
#ifndef NDEBUG
static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
@@ -80,10 +118,6 @@ static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
-// Experimental heuristics
-static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
- cl::desc("Enable scheduling for macro fusion."), cl::init(true));
-
static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
cl::desc("Verify machine instrs before and after machine scheduling"));
@@ -92,14 +126,14 @@ static const unsigned MinSubtreeSize = 8;
// Pin the vtables to this file.
void MachineSchedStrategy::anchor() {}
+
void ScheduleDAGMutation::anchor() {}
//===----------------------------------------------------------------------===//
// Machine Instruction Scheduling Pass and Registry
//===----------------------------------------------------------------------===//
-MachineSchedContext::MachineSchedContext():
- MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
+MachineSchedContext::MachineSchedContext() {
RegClassInfo = new RegisterClassInfo();
}
@@ -108,6 +142,7 @@ MachineSchedContext::~MachineSchedContext() {
}
namespace {
+
/// Base class for a machine scheduler class that can run at any point.
class MachineSchedulerBase : public MachineSchedContext,
public MachineFunctionPass {
@@ -149,7 +184,8 @@ public:
protected:
ScheduleDAGInstrs *createPostMachineScheduler();
};
-} // namespace
+
+} // end anonymous namespace
char MachineScheduler::ID = 0;
@@ -158,6 +194,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
"Machine Instruction Scheduler", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
@@ -211,7 +248,7 @@ static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
/// MachineSchedOpt allows command line selection of the scheduler.
static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
- RegisterPassParser<MachineSchedRegistry> >
+ RegisterPassParser<MachineSchedRegistry>>
MachineSchedOpt("misched",
cl::init(&useDefaultMachineSched), cl::Hidden,
cl::desc("Machine instruction scheduler to use"));
@@ -448,7 +485,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
// instruction stream until we find the nearest boundary.
unsigned NumRegionInstrs = 0;
MachineBasicBlock::iterator I = RegionEnd;
- for (;I != MBB->begin(); --I) {
+ for (; I != MBB->begin(); --I) {
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
@@ -504,13 +541,14 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
// unimplemented
}
-LLVM_DUMP_METHOD
-void ReadyQueue::dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ReadyQueue::dump() {
dbgs() << "Queue " << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
}
+#endif
//===----------------------------------------------------------------------===//
// ScheduleDAGMI - Basic machine instruction scheduling. This is
@@ -519,8 +557,7 @@ void ReadyQueue::dump() {
// ===----------------------------------------------------------------------===/
// Provide a vtable anchor.
-ScheduleDAGMI::~ScheduleDAGMI() {
-}
+ScheduleDAGMI::~ScheduleDAGMI() = default;
bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
@@ -825,7 +862,7 @@ void ScheduleDAGMI::placeDebugValues() {
RegionBegin = FirstDbgValue;
}
- for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+ for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
MachineInstr *DbgValue = P.first;
@@ -841,7 +878,7 @@ void ScheduleDAGMI::placeDebugValues() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void ScheduleDAGMI::dumpSchedule() const {
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
if (SUnit *SU = getSUnit(&(*MI)))
SU->dump(this);
@@ -1012,7 +1049,7 @@ updateScheduledPressure(const SUnit *SU,
++CritIdx;
if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
- && NewMaxPressure[ID] <= INT16_MAX)
+ && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
}
unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
@@ -1136,6 +1173,12 @@ void ScheduleDAGMILive::schedule() {
dbgs() << " Pressure Diff : ";
getPressureDiff(&SU).dump(*TRI);
}
+ dbgs() << " Single Issue : ";
+ if (SchedModel.mustBeginGroup(SU.getInstr()) &&
+ SchedModel.mustEndGroup(SU.getInstr()))
+ dbgs() << "true;";
+ else
+ dbgs() << "false;";
dbgs() << '\n';
}
if (ExitSU.getInstr() != nullptr)
@@ -1396,6 +1439,7 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Post-process the DAG to create cluster edges between neighboring
/// loads or between neighboring stores.
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
@@ -1403,6 +1447,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
SUnit *SU;
unsigned BaseReg;
int64_t Offset;
+
MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
: SU(su), BaseReg(reg), Offset(ofs) {}
@@ -1439,25 +1484,26 @@ public:
LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
: BaseMemOpClusterMutation(tii, tri, true) {}
};
-} // anonymous
+
+} // end anonymous namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
: nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
: nullptr;
}
-} // namespace llvm
+} // end namespace llvm
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
@@ -1543,80 +1589,11 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
}
//===----------------------------------------------------------------------===//
-// MacroFusion - DAG post-processing to encourage fusion of macro ops.
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// \brief Post-process the DAG to create cluster edges between instructions
-/// that may be fused by the processor into a single operation.
-class MacroFusion : public ScheduleDAGMutation {
- const TargetInstrInfo &TII;
-public:
- MacroFusion(const TargetInstrInfo &TII)
- : TII(TII) {}
-
- void apply(ScheduleDAGInstrs *DAGInstrs) override;
-};
-} // anonymous
-
-namespace llvm {
-
-std::unique_ptr<ScheduleDAGMutation>
-createMacroFusionDAGMutation(const TargetInstrInfo *TII) {
- return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
-}
-
-} // namespace llvm
-
-/// \brief Callback from DAG postProcessing to create cluster edges to encourage
-/// fused operations.
-void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
- // For now, assume targets can only fuse with the branch.
- SUnit &ExitSU = DAG->ExitSU;
- MachineInstr *Branch = ExitSU.getInstr();
- if (!Branch)
- return;
-
- for (SDep &PredDep : ExitSU.Preds) {
- if (PredDep.isWeak())
- continue;
- SUnit &SU = *PredDep.getSUnit();
- MachineInstr &Pred = *SU.getInstr();
- if (!TII.shouldScheduleAdjacent(Pred, *Branch))
- continue;
-
- // Create a single weak edge from SU to ExitSU. The only effect is to cause
- // bottom-up scheduling to heavily prioritize the clustered SU. There is no
- // need to copy predecessor edges from ExitSU to SU, since top-down
- // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
- // of SU, we could create an artificial edge from the deepest root, but it
- // hasn't been needed yet.
- bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
- (void)Success;
- assert(Success && "No DAG nodes should be reachable from ExitSU");
-
- // Adjust latency of data deps between the nodes.
- for (SDep &PredDep : ExitSU.Preds) {
- if (PredDep.getSUnit() == &SU)
- PredDep.setLatency(0);
- }
- for (SDep &SuccDep : SU.Succs) {
- if (SuccDep.getSUnit() == &ExitSU)
- SuccDep.setLatency(0);
- }
-
- DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
- break;
- }
-}
-
-//===----------------------------------------------------------------------===//
// CopyConstrain - DAG post-processing to encourage copy elimination.
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Post-process the DAG to create weak edges from all uses of a copy to
/// the one use that defines the copy's source vreg, most likely an induction
/// variable increment.
@@ -1626,6 +1603,7 @@ class CopyConstrain : public ScheduleDAGMutation {
// RegionEndIdx is the slot index of the last non-debug instruction in the
// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
SlotIndex RegionEndIdx;
+
public:
CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
@@ -1634,17 +1612,18 @@ public:
protected:
void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
};
-} // anonymous
+
+} // end anonymous namespace
namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
- return make_unique<CopyConstrain>(TII, TRI);
+ const TargetRegisterInfo *TRI) {
+ return llvm::make_unique<CopyConstrain>(TII, TRI);
}
-} // namespace llvm
+} // end namespace llvm
/// constrainLocalCopy handles two possibilities:
/// 1) Local src:
@@ -1836,7 +1815,7 @@ void SchedBoundary::reset() {
CheckPending = false;
CurrCycle = 0;
CurrMOps = 0;
- MinReadyCycle = UINT_MAX;
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
ExpectedLatency = 0;
DependentLatency = 0;
RetiredMOps = 0;
@@ -1937,12 +1916,22 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
return true;
}
+
unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
<< SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
return true;
}
+
+ if (CurrMOps > 0 &&
+ ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
+ DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
+ << (isTop()? "begin" : "end") << " group\n");
+ return true;
+ }
+
if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
for (TargetSchedModel::ProcResIter
@@ -2039,7 +2028,8 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
/// Move the boundary of scheduled code by one cycle.
void SchedBoundary::bumpCycle(unsigned NextCycle) {
if (SchedModel->getMicroOpBufferSize() == 0) {
- assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
+ assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
+ "MinReadyCycle uninitialized");
if (MinReadyCycle > NextCycle)
NextCycle = MinReadyCycle;
}
@@ -2237,6 +2227,18 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// one cycle. Since we commonly reach the max MOps here, opportunistically
// bump the cycle to avoid uselessly checking everything in the readyQ.
CurrMOps += IncMOps;
+
+ // Bump the cycle count for issue group constraints.
+ // This must be done after NextCycle has been adjust for all other stalls.
+ // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
+ // currCycle to X.
+ if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
+ (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
+ DEBUG(dbgs() << " Bump cycle to "
+ << (isTop() ? "end" : "begin") << " group\n");
+ bumpCycle(++NextCycle);
+ }
+
while (CurrMOps >= SchedModel->getIssueWidth()) {
DEBUG(dbgs() << " *** Max MOps " << CurrMOps
<< " at cycle " << CurrCycle << '\n');
@@ -2250,7 +2252,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
void SchedBoundary::releasePending() {
// If the available queue is empty, it is safe to reset MinReadyCycle.
if (Available.empty())
- MinReadyCycle = UINT_MAX;
+ MinReadyCycle = std::numeric_limits<unsigned>::max();
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
@@ -2323,10 +2325,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
return nullptr;
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// This is useful information to dump after bumpNode.
// Note that the Queue contents are more useful before pickNodeFromQueue.
-void SchedBoundary::dumpScheduledState() {
+LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() {
unsigned ResFactor;
unsigned ResCount;
if (ZoneCritResIdx) {
@@ -2666,11 +2668,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
}
void GenericScheduler::dumpPolicy() {
+ // Cannot completely remove virtual function even in release mode.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << "GenericScheduler RegionPolicy: "
<< " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
<< " OnlyTopDown=" << RegionPolicy.OnlyTopDown
<< " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
<< "\n";
+#endif
}
/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
@@ -2724,7 +2729,7 @@ void GenericScheduler::registerRoots() {
errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
}
- if (EnableCyclicPath) {
+ if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
checkAcyclicLatency();
}
@@ -3106,7 +3111,6 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
}
void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
-
MachineBasicBlock::iterator InsertPos = SU->getInstr();
if (!isTop)
++InsertPos;
@@ -3154,7 +3158,8 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
- ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
@@ -3195,7 +3200,6 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
}
}
-
void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
@@ -3302,7 +3306,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
}
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
+ return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
/*RemoveKillFlags=*/true);
}
@@ -3311,14 +3315,14 @@ ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
//===----------------------------------------------------------------------===//
namespace {
+
/// \brief Order nodes by the ILP metric.
struct ILPOrder {
- const SchedDFSResult *DFSResult;
- const BitVector *ScheduledTrees;
+ const SchedDFSResult *DFSResult = nullptr;
+ const BitVector *ScheduledTrees = nullptr;
bool MaximizeILP;
- ILPOrder(bool MaxILP)
- : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
+ ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
/// \brief Apply a less-than relation on node priority.
///
@@ -3347,12 +3351,13 @@ struct ILPOrder {
/// \brief Schedule based on the ILP metric.
class ILPScheduler : public MachineSchedStrategy {
- ScheduleDAGMILive *DAG;
+ ScheduleDAGMILive *DAG = nullptr;
ILPOrder Cmp;
std::vector<SUnit*> ReadyQ;
+
public:
- ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
+ ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
void initialize(ScheduleDAGMI *dag) override {
assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
@@ -3405,14 +3410,16 @@ public:
std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
}
};
-} // namespace
+
+} // end anonymous namespace
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
}
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
+ return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
}
+
static MachineSchedRegistry ILPMaxRegistry(
"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
static MachineSchedRegistry ILPMinRegistry(
@@ -3424,6 +3431,7 @@ static MachineSchedRegistry ILPMinRegistry(
#ifndef NDEBUG
namespace {
+
/// Apply a less-than relation on the node order, which corresponds to the
/// instruction order prior to scheduling. IsReverse implements greater-than.
template<bool IsReverse>
@@ -3444,11 +3452,12 @@ class InstructionShuffler : public MachineSchedStrategy {
// Using a less-than relation (SUnitOrder<false>) for the TopQ priority
// gives nodes with a higher number higher priority causing the latest
// instructions to be scheduled first.
- PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
TopQ;
// When scheduling bottom-up, use greater-than as the queue priority.
- PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+ PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
BottomQ;
+
public:
InstructionShuffler(bool alternate, bool topdown)
: IsAlternating(alternate), IsTopDown(topdown) {}
@@ -3492,15 +3501,18 @@ public:
BottomQ.push(SU);
}
};
-} // namespace
+
+} // end anonymous namespace
static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
bool Alternate = !ForceTopDown && !ForceBottomUp;
bool TopDown = !ForceBottomUp;
assert((TopDown || !ForceTopDown) &&
"-misched-topdown incompatible with -misched-bottomup");
- return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
+ return new ScheduleDAGMILive(
+ C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
}
+
static MachineSchedRegistry ShufflerRegistry(
"shuffle", "Shuffle machine instructions alternating directions",
createInstructionShuffler);
@@ -3518,8 +3530,7 @@ template<> struct GraphTraits<
template<>
struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
-
- DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getGraphName(const ScheduleDAG *G) {
return G->MF.getName();
@@ -3576,7 +3587,8 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
return Str;
}
};
-} // namespace llvm
+
+} // end namespace llvm
#endif // NDEBUG
/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index ef7e525e8165..998a9645e68b 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1,4 +1,4 @@
-//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===//
+//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,21 +7,35 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -37,9 +51,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(MachineTraceMetrics,
"machine-trace-metrics", "Machine Trace Metrics", false, true)
-MachineTraceMetrics::MachineTraceMetrics()
- : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
- MRI(nullptr), Loops(nullptr) {
+MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
}
@@ -137,7 +149,6 @@ MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const {
return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds);
}
-
//===----------------------------------------------------------------------===//
// Ensemble utility functions
//===----------------------------------------------------------------------===//
@@ -151,7 +162,7 @@ MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct)
}
// Virtual destructor serves as an anchor.
-MachineTraceMetrics::Ensemble::~Ensemble() {}
+MachineTraceMetrics::Ensemble::~Ensemble() = default;
const MachineLoop*
MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const {
@@ -297,6 +308,7 @@ static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) {
// MinInstrCountEnsemble - Pick the trace that executes the least number of
// instructions.
namespace {
+
class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble {
const char *getName() const override { return "MinInstr"; }
const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override;
@@ -306,7 +318,8 @@ public:
MinInstrCountEnsemble(MachineTraceMetrics *mtm)
: MachineTraceMetrics::Ensemble(mtm) {}
};
-}
+
+} // end anonymous namespace
// Select the preferred predecessor for MBB.
const MachineBasicBlock*
@@ -409,25 +422,30 @@ void MachineTraceMetrics::verifyAnalysis() const {
// revisit blocks.
namespace {
+
struct LoopBounds {
MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks;
SmallPtrSet<const MachineBasicBlock*, 8> Visited;
const MachineLoopInfo *Loops;
- bool Downward;
+ bool Downward = false;
+
LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks,
- const MachineLoopInfo *loops)
- : Blocks(blocks), Loops(loops), Downward(false) {}
+ const MachineLoopInfo *loops) : Blocks(blocks), Loops(loops) {}
};
-}
+
+} // end anonymous namespace
// Specialize po_iterator_storage in order to prune the post-order traversal so
// it is limited to the current loop and doesn't traverse the loop back edges.
namespace llvm {
+
template<>
class po_iterator_storage<LoopBounds, true> {
LoopBounds &LB;
+
public:
po_iterator_storage(LoopBounds &lb) : LB(lb) {}
+
void finishPostorder(const MachineBasicBlock*) {}
bool insertEdge(Optional<const MachineBasicBlock *> From,
@@ -452,7 +470,8 @@ public:
return LB.Visited.insert(To).second;
}
};
-}
+
+} // end namespace llvm
/// Compute the trace through MBB.
void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) {
@@ -603,6 +622,7 @@ void MachineTraceMetrics::Ensemble::verify() const {
// A data dependency is represented as a defining MI and operand numbers on the
// defining and using MI.
namespace {
+
struct DataDep {
const MachineInstr *DefMI;
unsigned DefOp;
@@ -622,7 +642,8 @@ struct DataDep {
assert((++DefI).atEnd() && "Register has multiple defs");
}
};
-}
+
+} // end anonymous namespace
// Get the input data dependencies that must be ready before UseMI can issue.
// Return true if UseMI has any physreg operands.
@@ -678,17 +699,19 @@ static void getPHIDeps(const MachineInstr &UseMI,
// direction instructions are scanned, it could be the operand that defined the
// regunit, or the highest operand to read the regunit.
namespace {
+
struct LiveRegUnit {
unsigned RegUnit;
- unsigned Cycle;
- const MachineInstr *MI;
- unsigned Op;
+ unsigned Cycle = 0;
+ const MachineInstr *MI = nullptr;
+ unsigned Op = 0;
unsigned getSparseSetIndex() const { return RegUnit; }
- LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
+ LiveRegUnit(unsigned RU) : RegUnit(RU) {}
};
-}
+
+} // end anonymous namespace
// Identify physreg dependencies for UseMI, and update the live regunit
// tracking set when scanning instructions downwards.
@@ -922,7 +945,6 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
return Height;
}
-
typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap;
// Push the height of DefMI upwards if required to match UseMI.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index a98139f9e5af..d392c044bd71 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -260,8 +260,8 @@ namespace {
static char ID; // Pass ID, replacement for typeid
const std::string Banner;
- MachineVerifierPass(const std::string &banner = nullptr)
- : MachineFunctionPass(ID), Banner(banner) {
+ MachineVerifierPass(std::string banner = std::string())
+ : MachineFunctionPass(ID), Banner(std::move(banner)) {
initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
}
@@ -528,7 +528,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
lastIndex = SlotIndex();
regsReserved = MRI->getReservedRegs();
- markReachable(&MF->front());
+ if (!MF->empty())
+ markReachable(&MF->front());
// Build a set of the basic blocks in the function.
FunctionBlocks.clear();
@@ -548,7 +549,8 @@ void MachineVerifier::visitMachineFunctionBefore() {
// Check that the register use lists are sane.
MRI->verifyUseLists();
- verifyStackFrame();
+ if (!MF->empty())
+ verifyStackFrame();
}
// Does iterator point to a and b as the first two elements?
@@ -572,7 +574,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
for (const auto &LI : MBB->liveins()) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
MBB->getIterator() != MBB->getParent()->begin()) {
- report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+ report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
}
}
}
@@ -908,6 +910,14 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
+ // Generic loads and stores must have a single MachineMemOperand
+ // describing that access.
+ if ((MI->getOpcode() == TargetOpcode::G_LOAD ||
+ MI->getOpcode() == TargetOpcode::G_STORE) &&
+ !MI->hasOneMemOperand())
+ report("Generic instruction accessing memory must have one mem operand",
+ MI);
+
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
@@ -2047,23 +2057,14 @@ void MachineVerifier::verifyStackFrame() {
// Update stack state by checking contents of MBB.
for (const auto &I : *MBB) {
if (I.getOpcode() == FrameSetupOpcode) {
- // The first operand of a FrameOpcode should be i32.
- int Size = I.getOperand(0).getImm();
- assert(Size >= 0 &&
- "Value should be non-negative in FrameSetup and FrameDestroy.\n");
-
if (BBState.ExitIsSetup)
report("FrameSetup is after another FrameSetup", &I);
- BBState.ExitValue -= Size;
+ BBState.ExitValue -= TII->getFrameSize(I);
BBState.ExitIsSetup = true;
}
if (I.getOpcode() == FrameDestroyOpcode) {
- // The first operand of a FrameOpcode should be i32.
- int Size = I.getOperand(0).getImm();
- assert(Size >= 0 &&
- "Value should be non-negative in FrameSetup and FrameDestroy.\n");
-
+ int Size = TII->getFrameSize(I);
if (!BBState.ExitIsSetup)
report("FrameDestroy is not after a FrameSetup", &I);
int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index ad9166f1ed23..00e72971a01e 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -75,7 +75,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
.addImm(FirstActualI->getOpcode());
for (auto &MO : FirstActualI->operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
FirstActualI->eraseFromParent();
MF.ensureAlignment(4);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 6081916a6a82..61dccdde8f1d 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -253,7 +253,7 @@ void SchedulePostRATDList::exitRegion() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dumpSchedule - dump the scheduled Sequence.
-void SchedulePostRATDList::dumpSchedule() const {
+LLVM_DUMP_METHOD void SchedulePostRATDList::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
if (SUnit *SU = Sequence[i])
SU->dump(this);
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 5fca7fa5536b..1354009794cb 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -265,11 +265,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
std::vector<MachineBasicBlock::iterator> FrameSDOps;
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
- if (I->getOpcode() == FrameSetupOpcode ||
- I->getOpcode() == FrameDestroyOpcode) {
- assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
- " instructions should have a single immediate argument!");
- unsigned Size = I->getOperand(0).getImm();
+ if (TII.isFrameInstr(*I)) {
+ unsigned Size = TII.getFrameSize(*I);
if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
AdjustsStack = true;
FrameSDOps.push_back(I);
@@ -336,7 +333,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
return;
const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+ const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
@@ -1049,8 +1046,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
- unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
- unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
if (RS && FrameIndexEliminationScavenging)
RS->enterBasicBlock(*BB);
@@ -1059,11 +1054,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
- if (I->getOpcode() == FrameSetupOpcode ||
- I->getOpcode() == FrameDestroyOpcode) {
- InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+ if (TII.isFrameInstr(*I)) {
+ InsideCallSequence = TII.isFrameSetup(*I);
SPAdj += TII.getSPAdjust(*I);
-
I = TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
continue;
}
@@ -1237,4 +1230,6 @@ doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) {
++I;
}
}
+
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
}
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 804a4c3dad66..b29e62bf1aa3 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -29,7 +29,10 @@ PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
PseudoSourceValue::~PseudoSourceValue() {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
- O << PSVNames[Kind];
+ if (Kind < TargetCustom)
+ O << PSVNames[Kind];
+ else
+ O << "TargetCustom" << Kind;
}
bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index a558e371ad4c..a87fed3a687e 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -176,8 +176,6 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
Q.collectInterferingVRegs();
- if (Q.seenUnspillableVReg())
- return false;
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
if (!Intf->isSpillable() || Intf->weight > VirtReg.weight)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index c47cfb1b986f..06500289c971 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -29,8 +29,10 @@
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
@@ -125,6 +127,7 @@ class RAGreedy : public MachineFunctionPass,
MachineBlockFrequencyInfo *MBFI;
MachineDominatorTree *DomTree;
MachineLoopInfo *Loops;
+ MachineOptimizationRemarkEmitter *ORE;
EdgeBundles *Bundles;
SpillPlacement *SpillPlacer;
LiveDebugVariables *DebugVars;
@@ -419,6 +422,20 @@ private:
void collectHintInfo(unsigned, HintsInfo &);
bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
+
+ /// Compute and report the number of spills and reloads for a loop.
+ void reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
+ unsigned &FoldedReloads, unsigned &Spills,
+ unsigned &FoldedSpills);
+
+ /// Report the number of spills and reloads for each loop.
+ void reportNumberOfSplillsReloads() {
+ for (MachineLoop *L : *Loops) {
+ unsigned Reloads, FoldedReloads, Spills, FoldedSpills;
+ reportNumberOfSplillsReloads(L, Reloads, FoldedReloads, Spills,
+ FoldedSpills);
+ }
+ }
};
} // end anonymous namespace
@@ -439,6 +456,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(RAGreedy, "greedy",
"Greedy Register Allocator", false, false)
@@ -490,6 +508,7 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveRegMatrix>();
AU.addRequired<EdgeBundles>();
AU.addRequired<SpillPlacement>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -679,7 +698,7 @@ unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
MCRegUnitIterator Units(PhysReg, TRI);
for (; Units.isValid(); ++Units) {
// Instantiate a "subquery", not to be confused with the Queries array.
- LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
+ LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
if (subQ.checkInterference())
break;
}
@@ -830,7 +849,11 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
SmallVector<LiveInterval*, 8> Intfs;
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+ // We usually have the interfering VRegs cached so collectInterferingVRegs()
+ // should be fast, we may need to recalculate if when different physregs
+ // overlap the same register unit so we had different SubRanges queried
+ // against it.
+ Q.collectInterferingVRegs();
ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
@@ -2611,6 +2634,69 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
return 0;
}
+void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads,
+ unsigned &FoldedReloads,
+ unsigned &Spills,
+ unsigned &FoldedSpills) {
+ Reloads = 0;
+ FoldedReloads = 0;
+ Spills = 0;
+ FoldedSpills = 0;
+
+ // Sum up the spill and reloads in subloops.
+ for (MachineLoop *SubLoop : *L) {
+ unsigned SubReloads;
+ unsigned SubFoldedReloads;
+ unsigned SubSpills;
+ unsigned SubFoldedSpills;
+
+ reportNumberOfSplillsReloads(SubLoop, SubReloads, SubFoldedReloads,
+ SubSpills, SubFoldedSpills);
+ Reloads += SubReloads;
+ FoldedReloads += SubFoldedReloads;
+ Spills += SubSpills;
+ FoldedSpills += SubFoldedSpills;
+ }
+
+ const MachineFrameInfo &MFI = MF->getFrameInfo();
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ int FI;
+
+ for (MachineBasicBlock *MBB : L->getBlocks())
+ // Handle blocks that were not included in subloops.
+ if (Loops->getLoopFor(MBB) == L)
+ for (MachineInstr &MI : *MBB) {
+ const MachineMemOperand *MMO;
+
+ if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI))
+ ++Reloads;
+ else if (TII->hasLoadFromStackSlot(MI, MMO, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++FoldedReloads;
+ else if (TII->isStoreToStackSlot(MI, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++Spills;
+ else if (TII->hasStoreToStackSlot(MI, MMO, FI) &&
+ MFI.isSpillSlotObjectIndex(FI))
+ ++FoldedSpills;
+ }
+
+ if (Reloads || FoldedReloads || Spills || FoldedSpills) {
+ using namespace ore;
+ MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload",
+ L->getStartLoc(), L->getHeader());
+ if (Spills)
+ R << NV("NumSpills", Spills) << " spills ";
+ if (FoldedSpills)
+ R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
+ if (Reloads)
+ R << NV("NumReloads", Reloads) << " reloads ";
+ if (FoldedReloads)
+ R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
+ ORE->emit(R << "generated in loop");
+ }
+}
+
bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
<< "********** Function: " << mf.getName() << '\n');
@@ -2633,6 +2719,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
Indexes = &getAnalysis<SlotIndexes>();
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
DomTree = &getAnalysis<MachineDominatorTree>();
+ ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
Loops = &getAnalysis<MachineLoopInfo>();
Bundles = &getAnalysis<EdgeBundles>();
@@ -2658,6 +2745,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
allocatePhysRegs();
tryHintsRecoloring();
postOptimization();
+ reportNumberOfSplillsReloads();
releaseMemory();
return true;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 101b30bf3b65..3b5964eef55e 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -1,4 +1,4 @@
-//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,34 +29,61 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegAllocPBQP.h"
#include "RegisterCoalescer.h"
#include "Spiller.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Solution.h"
+#include "llvm/CodeGen/PBQPRAConstraint.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
#include <limits>
+#include <map>
#include <memory>
#include <queue>
#include <set>
#include <sstream>
+#include <string>
+#include <system_error>
+#include <tuple>
#include <vector>
+#include <utility>
using namespace llvm;
@@ -86,7 +113,6 @@ namespace {
/// Programming problems.
class RegAllocPBQP : public MachineFunctionPass {
public:
-
static char ID;
/// Construct a PBQP register allocator.
@@ -113,7 +139,6 @@ public:
}
private:
-
typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
typedef std::vector<const LiveInterval*> Node2LIMap;
typedef std::vector<unsigned> AllowedSet;
@@ -187,7 +212,6 @@ public:
/// @brief Add interference edges between overlapping vregs.
class Interference : public PBQPRAConstraint {
private:
-
typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
@@ -276,7 +300,6 @@ private:
}
public:
-
void apply(PBQPRAGraph &G) override {
// The following is loosely based on the linear scan algorithm introduced in
// "Linear Scan Register Allocation" by Poletto and Sarkar. This version
@@ -363,7 +386,6 @@ public:
}
private:
-
// Create an Interference edge and add it to the graph, unless it is
// a null matrix, meaning the nodes' allowed registers do not have any
// interference. This case occurs frequently between integer and floating
@@ -372,7 +394,6 @@ private:
bool createInterferenceEdge(PBQPRAGraph &G,
PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId,
IMatrixCache &C) {
-
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getSubtarget().getRegisterInfo();
const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
@@ -409,7 +430,6 @@ private:
}
};
-
class Coalescing : public PBQPRAConstraint {
public:
void apply(PBQPRAGraph &G) override {
@@ -421,7 +441,6 @@ public:
// gives the Ok.
for (const auto &MBB : MF) {
for (const auto &MI : MBB) {
-
// Skip not-coalescable or already coalesced copies.
if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
continue;
@@ -479,7 +498,6 @@ public:
}
private:
-
void addVirtRegCoalesce(
PBQPRAGraph::RawMatrix &CostMat,
const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
@@ -496,14 +514,15 @@ private:
}
}
}
-
};
-} // End anonymous namespace.
+} // end anonymous namespace
// Out-of-line destructor/anchor for PBQPRAConstraint.
-PBQPRAConstraint::~PBQPRAConstraint() {}
+PBQPRAConstraint::~PBQPRAConstraint() = default;
+
void PBQPRAConstraint::anchor() {}
+
void PBQPRAConstraintList::anchor() {}
void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
@@ -554,7 +573,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI,
const MachineFunction &MF) {
- const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned i = 0; CSR[i] != 0; ++i)
if (TRI.regsOverlap(reg, CSR[i]))
return true;
@@ -777,7 +796,6 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
// If there are non-empty intervals allocate them using pbqp.
if (!VRegsToAlloc.empty()) {
-
const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
llvm::make_unique<PBQPRAConstraintList>();
@@ -840,7 +858,8 @@ static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
});
}
-void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
for (auto NId : nodeIds()) {
const Vector &Costs = getNodeCosts(NId);
assert(Costs.getLength() != 0 && "Empty vector in graph.");
@@ -861,7 +880,10 @@ void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
}
}
-LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); }
+LLVM_DUMP_METHOD void PBQP::RegAlloc::PBQPRAGraph::dump() const {
+ dump(dbgs());
+}
+#endif
void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const {
OS << "graph {\n";
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index ece44c28e9ed..855aa37ff3c3 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -103,9 +103,27 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "Clobbered Registers: ");
- for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
- if (MRI->isPhysRegModified(PReg, true))
- RegMask[PReg / 32] &= ~(1u << PReg % 32);
+ const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
+ auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
+ RegMask[Reg / 32] &= ~(1u << Reg % 32);
+ };
+ // Scan all the physical registers. When a register is defined in the current
+ // function set it and all the aliasing registers as defined in the regmask.
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ // If a register is in the UsedPhysRegsMask set then mark it as defined.
+ // All it's aliases will also be in the set, so we can skip setting
+ // as defined all the aliases here.
+ if (UsedPhysRegsMask.test(PReg)) {
+ SetRegAsDefined(PReg);
+ continue;
+ }
+ // If a register is defined by an instruction mark it as defined together
+ // with all it's aliases.
+ if (!MRI->def_empty(PReg)) {
+ for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
+ SetRegAsDefined(*AI);
+ }
+ }
if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
const uint32_t *CallPreservedMask =
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 178fa18ac5a6..82a3bd9a0bd1 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===//
+//===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,12 +14,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -29,8 +39,7 @@ static cl::opt<unsigned>
StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
cl::desc("Limit all regclasses to N registers"));
-RegisterClassInfo::RegisterClassInfo()
- : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {}
+RegisterClassInfo::RegisterClassInfo() = default;
void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
bool Update = false;
@@ -48,18 +57,20 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
// Does this MF have different CSRs?
assert(TRI && "no register info set");
- const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
- if (Update || CSR != CalleeSaved) {
- // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
+
+ // Get the callee saved registers.
+ const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
+ if (Update || CSR != CalleeSavedRegs) {
+ // Build a CSRAlias map. Every CSR alias saves the last
// overlapping CSR.
- CSRNum.clear();
- CSRNum.resize(TRI->getNumRegs(), 0);
- for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
+ CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+ for (const MCPhysReg *I = CSR; *I; ++I)
+ for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+ CalleeSavedAliases[*AI] = *I;
+
Update = true;
}
- CalleeSaved = CSR;
+ CalleeSavedRegs = CSR;
// Different reserved registers?
const BitVector &RR = MF->getRegInfo().getReservedRegs();
@@ -103,7 +114,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CSRNum[PhysReg])
+ if (CalleeSavedAliases[PhysReg])
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
@@ -114,7 +125,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
}
}
RCI.NumRegs = N + CSRAlias.size();
- assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
+ assert(RCI.NumRegs <= NumRegs && "Allocation order larger than regclass");
// CSR aliases go after the volatile registers, preserve the target's order.
for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) {
@@ -156,9 +167,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
const TargetRegisterClass *RC = nullptr;
unsigned NumRCUnits = 0;
- for (TargetRegisterInfo::regclass_iterator
- RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) {
- const int *PSetID = TRI->getRegClassPressureSets(*RI);
+ for (const TargetRegisterClass *C : TRI->regclasses()) {
+ const int *PSetID = TRI->getRegClassPressureSets(C);
for (; *PSetID != -1; ++PSetID) {
if ((unsigned)*PSetID == Idx)
break;
@@ -168,9 +178,9 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
// Found a register class that counts against this pressure set.
// For efficiency, only compute the set order for the largest set.
- unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit;
+ unsigned NUnits = TRI->getRegClassWeight(C).WeightLimit;
if (!RC || NUnits > NumRCUnits) {
- RC = *RI;
+ RC = C;
NumRCUnits = NUnits;
}
}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 4bb3c229afc5..bf44ee8453b6 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -189,6 +190,9 @@ namespace {
/// This returns true if an interval was modified.
bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+ /// We found a copy which can be moved to its less frequent predecessor.
+ bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
+
/// If the source of a copy is defined by a
/// trivial computation, replace the copy by rematerialize the definition.
bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
@@ -811,42 +815,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
assert(ASubValNo != nullptr);
- LaneBitmask AMask = SA.LaneMask;
- for (LiveInterval::SubRange &SB : IntB.subranges()) {
- LaneBitmask BMask = SB.LaneMask;
- LaneBitmask Common = BMask & AMask;
- if (Common.none())
- continue;
-
- DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
- << " into " << PrintLaneMask(Common) << '\n');
- LaneBitmask BRest = BMask & ~AMask;
- LiveInterval::SubRange *CommonRange;
- if (BRest.any()) {
- SB.LaneMask = BRest;
- DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
- << '\n');
- // Duplicate SubRange for newly merged common stuff.
- CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
- } else {
- // We van reuse the L SubRange.
- SB.LaneMask = Common;
- CommonRange = &SB;
- }
- LiveRange RangeCopy(SB, Allocator);
-
- VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx);
- assert(BSubValNo->def == CopyIdx);
- BSubValNo->def = ASubValNo->def;
- addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
- AMask &= ~BMask;
- }
- if (AMask.any()) {
- DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
- LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
- VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
- addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
- }
+ IntB.refineSubRanges(Allocator, SA.LaneMask,
+ [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
+ VNInfo *BSubValNo = SR.empty()
+ ? SR.getNextValue(CopyIdx, Allocator)
+ : SR.getVNInfoAt(CopyIdx);
+ assert(BSubValNo != nullptr);
+ addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ });
}
}
@@ -861,6 +837,184 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return true;
}
+/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
+/// predecessor of BB2, and if B is not redefined on the way from A = B
+/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
+/// execution goes through the path from BB0 to BB2. We may move B = A
+/// to the predecessor without such reversed copy.
+/// So we will transform the program from:
+/// BB0:
+/// A = B; BB1:
+/// ... ...
+/// / \ /
+/// BB2:
+/// ...
+/// B = A;
+///
+/// to:
+///
+/// BB0: BB1:
+/// A = B; ...
+/// ... B = A;
+/// / \ /
+/// BB2:
+/// ...
+///
+/// A special case is when BB0 and BB2 are the same BB which is the only
+/// BB in a loop:
+/// BB1:
+/// ...
+/// BB0/BB2: ----
+/// B = A; |
+/// ... |
+/// A = B; |
+/// |-------
+/// |
+/// We may hoist B = A from BB0/BB2 to BB1.
+///
+/// The major preconditions for correctness to remove such partial
+/// redundancy include:
+/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
+/// the PHI is defined by the reversed copy A = B in BB0.
+/// 2. No B is referenced from the start of BB2 to B = A.
+/// 3. No B is defined from A = B to the end of BB0.
+/// 4. BB1 has only one successor.
+///
+/// 2 and 4 implicitly ensure B is not live at the end of BB1.
+/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
+/// colder place, which not only prevent endless loop, but also make sure
+/// the movement of copy is beneficial.
+bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
+ MachineInstr &CopyMI) {
+ assert(!CP.isPhys());
+ if (!CopyMI.isFullCopy())
+ return false;
+
+ MachineBasicBlock &MBB = *CopyMI.getParent();
+ if (MBB.isEHPad())
+ return false;
+
+ if (MBB.pred_size() != 2)
+ return false;
+
+ LiveInterval &IntA =
+ LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+ // A is defined by PHI at the entry of MBB.
+ SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
+ assert(AValNo && !AValNo->isUnused() && "COPY source not live");
+ if (!AValNo->isPHIDef())
+ return false;
+
+ // No B is referenced before CopyMI in MBB.
+ if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
+ return false;
+
+ // MBB has two predecessors: one contains A = B so no copy will be inserted
+ // for it. The other one will have a copy moved from MBB.
+ bool FoundReverseCopy = false;
+ MachineBasicBlock *CopyLeftBB = nullptr;
+ for (MachineBasicBlock *Pred : MBB.predecessors()) {
+ VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
+ MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
+ if (!DefMI || !DefMI->isFullCopy()) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // Check DefMI is a reverse copy and it is in BB Pred.
+ if (DefMI->getOperand(0).getReg() != IntA.reg ||
+ DefMI->getOperand(1).getReg() != IntB.reg ||
+ DefMI->getParent() != Pred) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ // If there is any other def of B after DefMI and before the end of Pred,
+ // we need to keep the copy of B = A at the end of Pred if we remove
+ // B = A from MBB.
+ bool ValB_Changed = false;
+ for (auto VNI : IntB.valnos) {
+ if (VNI->isUnused())
+ continue;
+ if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
+ ValB_Changed = true;
+ break;
+ }
+ }
+ if (ValB_Changed) {
+ CopyLeftBB = Pred;
+ continue;
+ }
+ FoundReverseCopy = true;
+ }
+
+ // If no reverse copy is found in predecessors, nothing to do.
+ if (!FoundReverseCopy)
+ return false;
+
+ // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
+ // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
+ // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
+ // update IntA/IntB.
+ //
+ // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
+ // MBB is hotter than CopyLeftBB.
+ if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
+ return false;
+
+ // Now ok to move copy.
+ if (CopyLeftBB) {
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to BB#"
+ << CopyLeftBB->getNumber() << '\t' << CopyMI);
+
+ // Insert new copy to CopyLeftBB.
+ auto InsPos = CopyLeftBB->getFirstTerminator();
+ MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IntB.reg)
+ .addReg(IntA.reg);
+ SlotIndex NewCopyIdx =
+ LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
+ IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+ for (LiveInterval::SubRange &SR : IntB.subranges())
+ SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
+ } else {
+ DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from BB#"
+ << MBB.getNumber() << '\t' << CopyMI);
+ }
+
+ // Remove CopyMI.
+ // Note: This is fine to remove the copy before updating the live-ranges.
+ // While updating the live-ranges, we only look at slot indices and
+ // never go back to the instruction.
+ LIS->RemoveMachineInstrFromMaps(CopyMI);
+ CopyMI.eraseFromParent();
+
+ // Update the liveness.
+ SmallVector<SlotIndex, 8> EndPoints;
+ VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
+ LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
+ &EndPoints);
+ BValNo->markUnused();
+ // Extend IntB to the EndPoints of its original live interval.
+ LIS->extendToIndices(IntB, EndPoints);
+
+ // Now, do the same for its subranges.
+ for (LiveInterval::SubRange &SR : IntB.subranges()) {
+ EndPoints.clear();
+ VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
+ assert(BValNo && "All sublanes should be live");
+ LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
+ BValNo->markUnused();
+ LIS->extendToIndices(SR, EndPoints);
+ }
+
+ // Finally, update the live-range of IntA.
+ shrinkToUses(&IntA);
+ return true;
+}
+
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
/// defining a subregister.
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
@@ -1290,7 +1444,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
// If SrcReg wasn't read, it may still be the case that DstReg is live-in
// because SrcReg is a sub-register.
- if (DstInt && !Reads && SubIdx)
+ if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
// Replace SrcReg with DstReg in all UseMI operands.
@@ -1486,6 +1640,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
}
}
+ // Try and see if we can partially eliminate the copy by moving the copy to
+ // its predecessor.
+ if (!CP.isPartial() && !CP.isPhys())
+ if (removePartialRedundancy(CP, *CopyMI))
+ return true;
+
// Otherwise, we are unable to join the intervals.
DEBUG(dbgs() << "\tInterference!\n");
Again = true; // May be possible to coalesce later.
@@ -1583,6 +1743,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
return false;
}
}
+
+ // We must also check for overlaps with regmask clobbers.
+ BitVector RegMaskUsable;
+ if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
+ !RegMaskUsable.test(DstReg)) {
+ DEBUG(dbgs() << "\t\tRegMask interference\n");
+ return false;
+ }
}
// Skip any value computations, we are not adding new values to the
@@ -1636,14 +1804,6 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
return false;
}
-
- // We must also check for clobbers caused by regmasks.
- for (const auto &MO : MI->operands()) {
- if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
- DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
- return false;
- }
- }
}
}
@@ -2738,39 +2898,16 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
LaneBitmask LaneMask,
CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- for (LiveInterval::SubRange &R : LI.subranges()) {
- LaneBitmask RMask = R.LaneMask;
- // LaneMask of subregisters common to subrange R and ToMerge.
- LaneBitmask Common = RMask & LaneMask;
- // There is nothing to do without common subregs.
- if (Common.none())
- continue;
-
- DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
- << PrintLaneMask(Common) << '\n');
- // LaneMask of subregisters contained in the R range but not in ToMerge,
- // they have to split into their own subrange.
- LaneBitmask LRest = RMask & ~LaneMask;
- LiveInterval::SubRange *CommonRange;
- if (LRest.any()) {
- R.LaneMask = LRest;
- DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
- // Duplicate SubRange for newly merged common stuff.
- CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
+ LI.refineSubRanges(Allocator, LaneMask,
+ [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
+ if (SR.empty()) {
+ SR.assign(ToMerge, Allocator);
} else {
- // Reuse the existing range.
- R.LaneMask = Common;
- CommonRange = &R;
+ // joinSubRegRange() destroys the merged range, so we need a copy.
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
}
- LiveRange RangeCopy(ToMerge, Allocator);
- joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
- LaneMask &= ~RMask;
- }
-
- if (LaneMask.any()) {
- DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
- LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
- }
+ });
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index fc84aebb14d7..c726edc88b41 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===//
+//===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,13 +12,37 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -52,6 +76,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD
void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
const TargetRegisterInfo *TRI) {
@@ -97,6 +122,7 @@ void RegPressureTracker::dump() const {
P.dump(TRI);
}
+LLVM_DUMP_METHOD
void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
const char *sep = "";
for (const PressureChange &Change : *this) {
@@ -108,6 +134,7 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
}
dbgs() << '\n';
}
+#endif
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
LaneBitmask PreviousMask,
@@ -264,7 +291,6 @@ bool RegPressureTracker::isBottomClosed() const {
MachineBasicBlock::const_iterator());
}
-
SlotIndex RegPressureTracker::getCurrSlot() const {
MachineBasicBlock::const_iterator IdxPos =
skipDebugInstructionsForward(CurrPos, MBB->end());
@@ -328,7 +354,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
unsigned RegUnit) {
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end())
@@ -340,7 +366,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end()) {
@@ -352,7 +378,7 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
unsigned RegUnit) {
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end()) {
@@ -366,7 +392,7 @@ static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
RegisterMaskPair Pair) {
unsigned RegUnit = Pair.RegUnit;
assert(Pair.LaneMask.any());
- auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+ auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I != RegUnits.end()) {
@@ -423,6 +449,8 @@ namespace {
///
/// FIXME: always ignore tied opers
class RegisterOperandsCollector {
+ friend class llvm::RegisterOperands;
+
RegisterOperands &RegOpers;
const TargetRegisterInfo &TRI;
const MachineRegisterInfo &MRI;
@@ -517,11 +545,9 @@ class RegisterOperandsCollector {
addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
}
}
-
- friend class llvm::RegisterOperands;
};
-} // namespace
+} // end anonymous namespace
void RegisterOperands::collect(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
@@ -674,7 +700,7 @@ void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
assert(Pair.LaneMask.any());
unsigned RegUnit = Pair.RegUnit;
- auto I = find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
+ auto I = llvm::find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
return Other.RegUnit == RegUnit;
});
LaneBitmask PrevMask;
@@ -772,9 +798,10 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
if (!TrackLaneMasks) {
addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
} else {
- auto I = find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
- return Other.RegUnit == Reg;
- });
+ auto I =
+ llvm::find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
+ return Other.RegUnit == Reg;
+ });
bool IsRedef = I != LiveUses->end();
if (IsRedef) {
// ignore re-defs here...
@@ -1154,7 +1181,7 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) {
int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc();
- if (CritInc > 0 && CritInc <= INT16_MAX) {
+ if (CritInc > 0 && CritInc <= std::numeric_limits<int16_t>::max()) {
Delta.CriticalMax = PressureChange(PSetID);
Delta.CriticalMax.setUnitInc(CritInc);
}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index fdf741fd58f7..6392136fa290 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -1,4 +1,4 @@
-//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//===- RegisterScavenging.cpp - Machine register scavenging ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,28 +15,32 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <string>
+
using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
- for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
- LaneBitmask UnitMask = (*RUI).second;
- if (UnitMask.none() || (LaneMask & UnitMask).any())
- RegUnitsAvailable.reset((*RUI).first);
- }
+ LiveUnits.addRegMasked(Reg, LaneMask);
}
void RegScavenger::init(MachineBasicBlock &MBB) {
@@ -44,6 +48,7 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
TII = MF.getSubtarget().getInstrInfo();
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+ LiveUnits.init(*TRI);
assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) &&
"Target changed?");
@@ -51,7 +56,6 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
// Self-initialize.
if (!this->MBB) {
NumRegUnits = TRI->getNumRegUnits();
- RegUnitsAvailable.resize(NumRegUnits);
KillRegUnits.resize(NumRegUnits);
DefRegUnits.resize(NumRegUnits);
TmpRegUnits.resize(NumRegUnits);
@@ -64,32 +68,17 @@ void RegScavenger::init(MachineBasicBlock &MBB) {
I->Restore = nullptr;
}
- // All register units start out unused.
- RegUnitsAvailable.set();
-
- // Pristine CSRs are not available.
- BitVector PR = MF.getFrameInfo().getPristineRegs(MF);
- for (int I = PR.find_first(); I>0; I = PR.find_next(I))
- setRegUsed(I);
-
Tracking = false;
}
-void RegScavenger::setLiveInsUsed(const MachineBasicBlock &MBB) {
- for (const auto &LI : MBB.liveins())
- setRegUsed(LI.PhysReg, LI.LaneMask);
-}
-
void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
init(MBB);
- setLiveInsUsed(MBB);
+ LiveUnits.addLiveIns(MBB);
}
void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
init(MBB);
- // Merge live-ins of successors to get live-outs.
- for (const MachineBasicBlock *Succ : MBB.successors())
- setLiveInsUsed(*Succ);
+ LiveUnits.addLiveOuts(MBB);
// Move internal iterator at the last instruction of the block.
if (MBB.begin() != MBB.end()) {
@@ -263,36 +252,7 @@ void RegScavenger::backward() {
assert(Tracking && "Must be tracking to determine kills and defs");
const MachineInstr &MI = *MBBI;
- // Defined or clobbered registers are available now.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isRegMask()) {
- for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd;
- ++RU) {
- for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
- if (MO.clobbersPhysReg(*RURI)) {
- RegUnitsAvailable.set(RU);
- break;
- }
- }
- }
- } else if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- isReserved(Reg))
- continue;
- addRegUnits(RegUnitsAvailable, Reg);
- }
- }
- // Mark read registers as unavailable.
- for (const MachineOperand &MO : MI.uses()) {
- if (MO.isReg() && MO.readsReg()) {
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- isReserved(Reg))
- continue;
- removeRegUnits(RegUnitsAvailable, Reg);
- }
- }
+ LiveUnits.stepBackward(MI);
if (MBBI == MBB->begin()) {
MBBI = MachineBasicBlock::iterator(nullptr);
@@ -302,12 +262,9 @@ void RegScavenger::backward() {
}
bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
- if (includeReserved && isReserved(Reg))
- return true;
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- if (!RegUnitsAvailable.test(*RUI))
- return true;
- return false;
+ if (isReserved(Reg))
+ return includeReserved;
+ return !LiveUnits.available(Reg);
}
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
@@ -441,7 +398,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned NeedSize = RC->getSize();
unsigned NeedAlign = RC->getAlignment();
- unsigned SI = Scavenged.size(), Diff = UINT_MAX;
+ unsigned SI = Scavenged.size(), Diff = std::numeric_limits<unsigned>::max();
int FIB = MFI.getObjectIndexBegin(), FIE = MFI.getObjectIndexEnd();
for (unsigned I = 0; I < Scavenged.size(); ++I) {
if (Scavenged[I].Reg != 0)
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index 451964199ba5..3e259927ac5c 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -30,17 +30,23 @@ namespace {
/// Tells whether or not this pass should emit a fallback
/// diagnostic when it resets a function.
bool EmitFallbackDiag;
+ /// Whether we should abort immediately instead of resetting the function.
+ bool AbortOnFailedISel;
public:
static char ID; // Pass identification, replacement for typeid
- ResetMachineFunction(bool EmitFallbackDiag = false)
- : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag) {}
+ ResetMachineFunction(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false)
+ : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag),
+ AbortOnFailedISel(AbortOnFailedISel) {}
StringRef getPassName() const override { return "ResetMachineFunction"; }
bool runOnMachineFunction(MachineFunction &MF) override {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel)) {
+ if (AbortOnFailedISel)
+ report_fatal_error("Instruction selection failed");
DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n');
++NumFunctionsReset;
MF.reset();
@@ -62,6 +68,7 @@ INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
"reset machine function if ISel failed", false, false)
MachineFunctionPass *
-llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false) {
- return new ResetMachineFunction(EmitFallbackDiag);
+llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false,
+ bool AbortOnFailedISel = false) {
+ return new ResetMachineFunction(EmitFallbackDiag, AbortOnFailedISel);
}
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index 2b82df293c14..fa68411284e7 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -451,7 +451,7 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
IRBuilder<> IRBFail(CheckTerm);
// FIXME: respect -fsanitize-trap / -ftrap-function here?
Constant *StackChkFail = F.getParent()->getOrInsertFunction(
- "__stack_chk_fail", IRB.getVoidTy(), nullptr);
+ "__stack_chk_fail", IRB.getVoidTy());
IRBFail.CreateCall(StackChkFail, {});
}
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 7fbeaddb38e8..09289f947dc9 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -236,6 +236,7 @@ void StackColoring::calculateLiveIntervals() {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void StackColoring::dumpAllocas() {
dbgs() << "Allocas:\n";
for (unsigned AllocaNo = 0; AllocaNo < NumAllocas; ++AllocaNo)
@@ -262,6 +263,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpLiveRanges() {
dbgs() << " " << AllocaNo << ": " << Range << "\n";
}
}
+#endif
void StackColoring::run() {
DEBUG(dumpAllocas());
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 427d95268c74..dc72ac073258 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -1,4 +1,4 @@
-//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,22 +7,32 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements the ScheduleDAG class, which is a base class used by
-// scheduling implementation classes.
+/// \file Implements the ScheduleDAG class, which is a base class used by
+/// scheduling implementation classes.
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -33,58 +43,52 @@ static cl::opt<bool> StressSchedOpt(
cl::desc("Stress test instruction scheduling"));
#endif
-void SchedulingPriorityQueue::anchor() { }
+void SchedulingPriorityQueue::anchor() {}
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()),
TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
- MRI(mf.getRegInfo()), EntrySU(), ExitSU() {
+ MRI(mf.getRegInfo()) {
#ifndef NDEBUG
StressSched = StressSchedOpt;
#endif
}
-ScheduleDAG::~ScheduleDAG() {}
+ScheduleDAG::~ScheduleDAG() = default;
-/// Clear the DAG state (e.g. between scheduling regions).
void ScheduleDAG::clearDAG() {
SUnits.clear();
EntrySU = SUnit();
ExitSU = SUnit();
}
-/// getInstrDesc helper to handle SDNodes.
const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
if (!Node || !Node->isMachineOpcode()) return nullptr;
return &TII->get(Node->getMachineOpcode());
}
-/// addPred - This adds the specified edge as a pred of the current node if
-/// not already. It also adds the current node as a successor of the
-/// specified node.
bool SUnit::addPred(const SDep &D, bool Required) {
// If this node already has this dependence, don't add a redundant one.
- for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I) {
+ for (SDep &PredDep : Preds) {
// Zero-latency weak edges may be added purely for heuristic ordering. Don't
// add them if another kind of edge already exists.
- if (!Required && I->getSUnit() == D.getSUnit())
+ if (!Required && PredDep.getSUnit() == D.getSUnit())
return false;
- if (I->overlaps(D)) {
- // Extend the latency if needed. Equivalent to removePred(I) + addPred(D).
- if (I->getLatency() < D.getLatency()) {
- SUnit *PredSU = I->getSUnit();
+ if (PredDep.overlaps(D)) {
+ // Extend the latency if needed. Equivalent to
+ // removePred(PredDep) + addPred(D).
+ if (PredDep.getLatency() < D.getLatency()) {
+ SUnit *PredSU = PredDep.getSUnit();
// Find the corresponding successor in N.
- SDep ForwardD = *I;
+ SDep ForwardD = PredDep;
ForwardD.setSUnit(this);
- for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(),
- EE = PredSU->Succs.end(); II != EE; ++II) {
- if (*II == ForwardD) {
- II->setLatency(D.getLatency());
+ for (SDep &SuccDep : PredSU->Succs) {
+ if (SuccDep == ForwardD) {
+ SuccDep.setLatency(D.getLatency());
break;
}
}
- I->setLatency(D.getLatency());
+ PredDep.setLatency(D.getLatency());
}
return false;
}
@@ -95,8 +99,10 @@ bool SUnit::addPred(const SDep &D, bool Required) {
SUnit *N = D.getSUnit();
// Update the bookkeeping.
if (D.getKind() == SDep::Data) {
- assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
- assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+ assert(NumPreds < std::numeric_limits<unsigned>::max() &&
+ "NumPreds will overflow!");
+ assert(N->NumSuccs < std::numeric_limits<unsigned>::max() &&
+ "NumSuccs will overflow!");
++NumPreds;
++N->NumSuccs;
}
@@ -105,7 +111,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
++WeakPredsLeft;
}
else {
- assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+ assert(NumPredsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumPredsLeft will overflow!");
++NumPredsLeft;
}
}
@@ -114,7 +121,8 @@ bool SUnit::addPred(const SDep &D, bool Required) {
++N->WeakSuccsLeft;
}
else {
- assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ assert(N->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
++N->NumSuccsLeft;
}
}
@@ -127,51 +135,46 @@ bool SUnit::addPred(const SDep &D, bool Required) {
return true;
}
-/// removePred - This removes the specified edge as a pred of the current
-/// node if it exists. It also removes the current node as a successor of
-/// the specified node.
void SUnit::removePred(const SDep &D) {
// Find the matching predecessor.
- for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I)
- if (*I == D) {
- // Find the corresponding successor in N.
- SDep P = D;
- P.setSUnit(this);
- SUnit *N = D.getSUnit();
- SmallVectorImpl<SDep>::iterator Succ = find(N->Succs, P);
- assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
- N->Succs.erase(Succ);
- Preds.erase(I);
- // Update the bookkeeping.
- if (P.getKind() == SDep::Data) {
- assert(NumPreds > 0 && "NumPreds will underflow!");
- assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
- --NumPreds;
- --N->NumSuccs;
- }
- if (!N->isScheduled) {
- if (D.isWeak())
- --WeakPredsLeft;
- else {
- assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
- --NumPredsLeft;
- }
- }
- if (!isScheduled) {
- if (D.isWeak())
- --N->WeakSuccsLeft;
- else {
- assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
- --N->NumSuccsLeft;
- }
- }
- if (P.getLatency() != 0) {
- this->setDepthDirty();
- N->setHeightDirty();
- }
- return;
+ SmallVectorImpl<SDep>::iterator I = llvm::find(Preds, D);
+ if (I == Preds.end())
+ return;
+ // Find the corresponding successor in N.
+ SDep P = D;
+ P.setSUnit(this);
+ SUnit *N = D.getSUnit();
+ SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P);
+ assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
+ N->Succs.erase(Succ);
+ Preds.erase(I);
+ // Update the bookkeeping.
+ if (P.getKind() == SDep::Data) {
+ assert(NumPreds > 0 && "NumPreds will underflow!");
+ assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+ --NumPreds;
+ --N->NumSuccs;
+ }
+ if (!N->isScheduled) {
+ if (D.isWeak())
+ --WeakPredsLeft;
+ else {
+ assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+ --NumPredsLeft;
}
+ }
+ if (!isScheduled) {
+ if (D.isWeak())
+ --N->WeakSuccsLeft;
+ else {
+ assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+ --N->NumSuccsLeft;
+ }
+ }
+ if (P.getLatency() != 0) {
+ this->setDepthDirty();
+ N->setHeightDirty();
+ }
}
void SUnit::setDepthDirty() {
@@ -181,9 +184,8 @@ void SUnit::setDepthDirty() {
do {
SUnit *SU = WorkList.pop_back_val();
SU->isDepthCurrent = false;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(),
- E = SU->Succs.end(); I != E; ++I) {
- SUnit *SuccSU = I->getSUnit();
+ for (SDep &SuccDep : SU->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
if (SuccSU->isDepthCurrent)
WorkList.push_back(SuccSU);
}
@@ -197,18 +199,14 @@ void SUnit::setHeightDirty() {
do {
SUnit *SU = WorkList.pop_back_val();
SU->isHeightCurrent = false;
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),
- E = SU->Preds.end(); I != E; ++I) {
- SUnit *PredSU = I->getSUnit();
+ for (SDep &PredDep : SU->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
if (PredSU->isHeightCurrent)
WorkList.push_back(PredSU);
}
} while (!WorkList.empty());
}
-/// setDepthToAtLeast - Update this node's successors to reflect the
-/// fact that this node's depth just increased.
-///
void SUnit::setDepthToAtLeast(unsigned NewDepth) {
if (NewDepth <= getDepth())
return;
@@ -217,9 +215,6 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth) {
isDepthCurrent = true;
}
-/// setHeightToAtLeast - Update this node's predecessors to reflect the
-/// fact that this node's height just increased.
-///
void SUnit::setHeightToAtLeast(unsigned NewHeight) {
if (NewHeight <= getHeight())
return;
@@ -228,8 +223,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight) {
isHeightCurrent = true;
}
-/// ComputeDepth - Calculate the maximal path from the node to the exit.
-///
+/// Calculates the maximal path from the node to the exit.
void SUnit::ComputeDepth() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
@@ -238,12 +232,11 @@ void SUnit::ComputeDepth() {
bool Done = true;
unsigned MaxPredDepth = 0;
- for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
- E = Cur->Preds.end(); I != E; ++I) {
- SUnit *PredSU = I->getSUnit();
+ for (const SDep &PredDep : Cur->Preds) {
+ SUnit *PredSU = PredDep.getSUnit();
if (PredSU->isDepthCurrent)
MaxPredDepth = std::max(MaxPredDepth,
- PredSU->Depth + I->getLatency());
+ PredSU->Depth + PredDep.getLatency());
else {
Done = false;
WorkList.push_back(PredSU);
@@ -261,8 +254,7 @@ void SUnit::ComputeDepth() {
} while (!WorkList.empty());
}
-/// ComputeHeight - Calculate the maximal path from the node to the entry.
-///
+/// Calculates the maximal path from the node to the entry.
void SUnit::ComputeHeight() {
SmallVector<SUnit*, 8> WorkList;
WorkList.push_back(this);
@@ -271,12 +263,11 @@ void SUnit::ComputeHeight() {
bool Done = true;
unsigned MaxSuccHeight = 0;
- for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
- E = Cur->Succs.end(); I != E; ++I) {
- SUnit *SuccSU = I->getSUnit();
+ for (const SDep &SuccDep : Cur->Succs) {
+ SUnit *SuccSU = SuccDep.getSUnit();
if (SuccSU->isHeightCurrent)
MaxSuccHeight = std::max(MaxSuccHeight,
- SuccSU->Height + I->getLatency());
+ SuccSU->Height + SuccDep.getLatency());
else {
Done = false;
WorkList.push_back(SuccSU);
@@ -310,6 +301,7 @@ void SUnit::biasCriticalPath() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
if (this == &DAG->ExitSU)
OS << "ExitSU";
@@ -319,15 +311,13 @@ void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const {
OS << "SU(" << NodeNum << ")";
}
-/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
-/// a group of nodes flagged together.
-void SUnit::dump(const ScheduleDAG *G) const {
+LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const {
print(dbgs(), G);
dbgs() << ": ";
G->dumpNode(this);
}
-void SUnit::dumpAll(const ScheduleDAG *G) const {
+LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const {
dump(G);
dbgs() << " # preds left : " << NumPredsLeft << "\n";
@@ -343,41 +333,39 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
if (Preds.size() != 0) {
dbgs() << " Predecessors:\n";
- for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
- I != E; ++I) {
+ for (const SDep &SuccDep : Preds) {
dbgs() << " ";
- switch (I->getKind()) {
+ switch (SuccDep.getKind()) {
case SDep::Data: dbgs() << "data "; break;
case SDep::Anti: dbgs() << "anti "; break;
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ord "; break;
}
- I->getSUnit()->print(dbgs(), G);
- if (I->isArtificial())
+ SuccDep.getSUnit()->print(dbgs(), G);
+ if (SuccDep.isArtificial())
dbgs() << " *";
- dbgs() << ": Latency=" << I->getLatency();
- if (I->isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << ": Latency=" << SuccDep.getLatency();
+ if (SuccDep.isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
dbgs() << "\n";
}
}
if (Succs.size() != 0) {
dbgs() << " Successors:\n";
- for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
- I != E; ++I) {
+ for (const SDep &SuccDep : Succs) {
dbgs() << " ";
- switch (I->getKind()) {
+ switch (SuccDep.getKind()) {
case SDep::Data: dbgs() << "data "; break;
case SDep::Anti: dbgs() << "anti "; break;
case SDep::Output: dbgs() << "out "; break;
case SDep::Order: dbgs() << "ord "; break;
}
- I->getSUnit()->print(dbgs(), G);
- if (I->isArtificial())
+ SuccDep.getSUnit()->print(dbgs(), G);
+ if (SuccDep.isArtificial())
dbgs() << " *";
- dbgs() << ": Latency=" << I->getLatency();
- if (I->isAssignedRegDep())
- dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
+ dbgs() << ": Latency=" << SuccDep.getLatency();
+ if (SuccDep.isAssignedRegDep())
+ dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI);
dbgs() << "\n";
}
}
@@ -385,47 +373,44 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
#endif
#ifndef NDEBUG
-/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
-/// their state is consistent. Return the number of scheduled nodes.
-///
unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
bool AnyNotSched = false;
unsigned DeadNodes = 0;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- if (!SUnits[i].isScheduled) {
- if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+ for (const SUnit &SUnit : SUnits) {
+ if (!SUnit.isScheduled) {
+ if (SUnit.NumPreds == 0 && SUnit.NumSuccs == 0) {
++DeadNodes;
continue;
}
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has not been scheduled!\n";
AnyNotSched = true;
}
- if (SUnits[i].isScheduled &&
- (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
- unsigned(INT_MAX)) {
+ if (SUnit.isScheduled &&
+ (isBottomUp ? SUnit.getHeight() : SUnit.getDepth()) >
+ unsigned(std::numeric_limits<int>::max())) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has an unexpected "
<< (isBottomUp ? "Height" : "Depth") << " value!\n";
AnyNotSched = true;
}
if (isBottomUp) {
- if (SUnits[i].NumSuccsLeft != 0) {
+ if (SUnit.NumSuccsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has successors left!\n";
AnyNotSched = true;
}
} else {
- if (SUnits[i].NumPredsLeft != 0) {
+ if (SUnit.NumPredsLeft != 0) {
if (!AnyNotSched)
dbgs() << "*** Scheduling failed! ***\n";
- SUnits[i].dump(this);
+ SUnit.dump(this);
dbgs() << "has predecessors left!\n";
AnyNotSched = true;
}
@@ -436,36 +421,33 @@ unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
}
#endif
-/// InitDAGTopologicalSorting - create the initial topological
-/// ordering from the DAG to be scheduled.
-///
-/// The idea of the algorithm is taken from
-/// "Online algorithms for managing the topological order of
-/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
-/// This is the MNR algorithm, which was first introduced by
-/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
-/// "Maintaining a topological order under edge insertions".
-///
-/// Short description of the algorithm:
-///
-/// Topological ordering, ord, of a DAG maps each node to a topological
-/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
-///
-/// This means that if there is a path from the node X to the node Z,
-/// then ord(X) < ord(Z).
-///
-/// This property can be used to check for reachability of nodes:
-/// if Z is reachable from X, then an insertion of the edge Z->X would
-/// create a cycle.
-///
-/// The algorithm first computes a topological ordering for the DAG by
-/// initializing the Index2Node and Node2Index arrays and then tries to keep
-/// the ordering up-to-date after edge insertions by reordering the DAG.
-///
-/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
-/// the nodes reachable from Y, and then shifts them using Shift to lie
-/// immediately after X in Index2Node.
void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+ // The idea of the algorithm is taken from
+ // "Online algorithms for managing the topological order of
+ // a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+ // This is the MNR algorithm, which was first introduced by
+ // A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+ // "Maintaining a topological order under edge insertions".
+ //
+ // Short description of the algorithm:
+ //
+ // Topological ordering, ord, of a DAG maps each node to a topological
+ // index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+ //
+ // This means that if there is a path from the node X to the node Z,
+ // then ord(X) < ord(Z).
+ //
+ // This property can be used to check for reachability of nodes:
+ // if Z is reachable from X, then an insertion of the edge Z->X would
+ // create a cycle.
+ //
+ // The algorithm first computes a topological ordering for the DAG by
+ // initializing the Index2Node and Node2Index arrays and then tries to keep
+ // the ordering up-to-date after edge insertions by reordering the DAG.
+ //
+ // On insertion of the edge X->Y, the algorithm first marks by calling DFS
+ // the nodes reachable from Y, and then shifts them using Shift to lie
+ // immediately after X in Index2Node.
unsigned DAGSize = SUnits.size();
std::vector<SUnit*> WorkList;
WorkList.reserve(DAGSize);
@@ -476,18 +458,17 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
// Initialize the data structures.
if (ExitSU)
WorkList.push_back(ExitSU);
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- int NodeNum = SU->NodeNum;
- unsigned Degree = SU->Succs.size();
+ for (SUnit &SU : SUnits) {
+ int NodeNum = SU.NodeNum;
+ unsigned Degree = SU.Succs.size();
// Temporarily use the Node2Index array as scratch space for degree counts.
Node2Index[NodeNum] = Degree;
// Is it a node without dependencies?
if (Degree == 0) {
- assert(SU->Succs.empty() && "SUnit should have no successors");
+ assert(SU.Succs.empty() && "SUnit should have no successors");
// Collect leaf nodes.
- WorkList.push_back(SU);
+ WorkList.push_back(&SU);
}
}
@@ -497,9 +478,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
WorkList.pop_back();
if (SU->NodeNum < DAGSize)
Allocate(SU->NodeNum, --Id);
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- SUnit *SU = I->getSUnit();
+ for (const SDep &PredDep : SU->Preds) {
+ SUnit *SU = PredDep.getSUnit();
if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum])
// If all dependencies of the node are processed already,
// then the node can be computed now.
@@ -511,19 +491,15 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
#ifndef NDEBUG
// Check correctness of the ordering
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &SUnits[i];
- for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ for (SUnit &SU : SUnits) {
+ for (const SDep &PD : SU.Preds) {
+ assert(Node2Index[SU.NodeNum] > Node2Index[PD.getSUnit()->NodeNum] &&
"Wrong topological sorting");
}
}
#endif
}
-/// AddPred - Updates the topological ordering to accommodate an edge
-/// to be added from SUnit X to SUnit Y.
void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
int UpperBound, LowerBound;
LowerBound = Node2Index[Y->NodeNum];
@@ -540,16 +516,10 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
}
}
-/// RemovePred - Updates the topological ordering to accommodate an
-/// an edge to be removed from the specified node N from the predecessors
-/// of the current node M.
void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
// InitDAGTopologicalSorting();
}
-/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
-/// all nodes affected by the edge insertion. These nodes will later get new
-/// topological indexes by means of the Shift method.
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
bool &HasLoop) {
std::vector<const SUnit*> WorkList;
@@ -560,8 +530,9 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
SU = WorkList.back();
WorkList.pop_back();
Visited.set(SU->NodeNum);
- for (int I = SU->Succs.size()-1; I >= 0; --I) {
- unsigned s = SU->Succs[I].getSUnit()->NodeNum;
+ for (const SDep &SuccDep
+ : make_range(SU->Succs.rbegin(), SU->Succs.rend())) {
+ unsigned s = SuccDep.getSUnit()->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (s >= Node2Index.size())
continue;
@@ -571,14 +542,93 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
}
// Visit successors if not already and in affected region.
if (!Visited.test(s) && Node2Index[s] < UpperBound) {
- WorkList.push_back(SU->Succs[I].getSUnit());
+ WorkList.push_back(SuccDep.getSUnit());
}
}
} while (!WorkList.empty());
}
-/// Shift - Renumber the nodes so that the topological ordering is
-/// preserved.
+std::vector<int> ScheduleDAGTopologicalSort::GetSubGraph(const SUnit &StartSU,
+ const SUnit &TargetSU,
+ bool &Success) {
+ std::vector<const SUnit*> WorkList;
+ int LowerBound = Node2Index[StartSU.NodeNum];
+ int UpperBound = Node2Index[TargetSU.NodeNum];
+ bool Found = false;
+ BitVector VisitedBack;
+ std::vector<int> Nodes;
+
+ if (LowerBound > UpperBound) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.reserve(SUnits.size());
+ Visited.reset();
+
+ // Starting from StartSU, visit all successors up
+ // to UpperBound.
+ WorkList.push_back(&StartSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (int I = SU->Succs.size()-1; I >= 0; --I) {
+ const SUnit *Succ = SU->Succs[I].getSUnit();
+ unsigned s = Succ->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
+ if (Succ->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == UpperBound) {
+ Found = true;
+ continue;
+ }
+ // Visit successors if not already and in affected region.
+ if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+ Visited.set(s);
+ WorkList.push_back(Succ);
+ }
+ }
+ } while (!WorkList.empty());
+
+ if (!Found) {
+ Success = false;
+ return Nodes;
+ }
+
+ WorkList.clear();
+ VisitedBack.resize(SUnits.size());
+ Found = false;
+
+ // Starting from TargetSU, visit all predecessors up
+ // to LowerBound. SUs that are visited by the two
+ // passes are added to Nodes.
+ WorkList.push_back(&TargetSU);
+ do {
+ const SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ for (int I = SU->Preds.size()-1; I >= 0; --I) {
+ const SUnit *Pred = SU->Preds[I].getSUnit();
+ unsigned s = Pred->NodeNum;
+ // Edges to non-SUnits are allowed but ignored (e.g. EntrySU).
+ if (Pred->isBoundaryNode())
+ continue;
+ if (Node2Index[s] == LowerBound) {
+ Found = true;
+ continue;
+ }
+ if (!VisitedBack.test(s) && Visited.test(s)) {
+ VisitedBack.set(s);
+ WorkList.push_back(Pred);
+ Nodes.push_back(s);
+ }
+ }
+ } while (!WorkList.empty());
+
+ assert(Found && "Error in SUnit Graph!");
+ Success = true;
+ return Nodes;
+}
+
void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
int UpperBound) {
std::vector<int> L;
@@ -598,28 +648,23 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
}
}
- for (unsigned j = 0; j < L.size(); ++j) {
- Allocate(L[j], i - shift);
+ for (unsigned LI : L) {
+ Allocate(LI, i - shift);
i = i + 1;
}
}
-
-/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will
-/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU).
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
// Is SU reachable from TargetSU via successor edges?
if (IsReachable(SU, TargetSU))
return true;
- for (SUnit::pred_iterator
- I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I)
- if (I->isAssignedRegDep() &&
- IsReachable(SU, I->getSUnit()))
+ for (const SDep &PredDep : TargetSU->Preds)
+ if (PredDep.isAssignedRegDep() &&
+ IsReachable(SU, PredDep.getSUnit()))
return true;
return false;
}
-/// IsReachable - Checks if SU is reachable from TargetSU.
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
// If insertion of the edge SU->TargetSU would create a cycle
@@ -637,7 +682,6 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
return HasLoop;
}
-/// Allocate - assign the topological index to the node n.
void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
Node2Index[n] = index;
Index2Node[index] = n;
@@ -647,4 +691,4 @@ ScheduleDAGTopologicalSort::
ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu)
: SUnits(sunits), ExitSU(exitsu) {}
-ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() = default;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 611c5a71bd5a..18823b74c47f 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements the ScheduleDAGInstrs class, which implements re-scheduling
-// of MachineInstrs.
+/// \file This implements the ScheduleDAGInstrs class, which implements
+/// re-scheduling of MachineInstrs.
//
//===----------------------------------------------------------------------===//
@@ -101,8 +101,8 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
SchedModel.init(ST.getSchedModel(), &ST, TII);
}
-/// getUnderlyingObjectFromInt - This is the function that does the work of
-/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+/// This is the function that does the work of looking through basic
+/// ptrtoint+arithmetic+inttoptr sequences.
static const Value *getUnderlyingObjectFromInt(const Value *V) {
do {
if (const Operator *U = dyn_cast<Operator>(V)) {
@@ -129,8 +129,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
} while (1);
}
-/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
-/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+/// This is a wrapper around GetUnderlyingObjects and adds support for basic
+/// ptrtoint+arithmetic+inttoptr sequences.
static void getUnderlyingObjects(const Value *V,
SmallVectorImpl<Value *> &Objects,
const DataLayout &DL) {
@@ -158,9 +158,8 @@ static void getUnderlyingObjects(const Value *V,
} while (!Working.empty());
}
-/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
-/// information and it can be tracked to a normal reference to a known
-/// object, return the Value for that object.
+/// If this machine instr has memory reference information and it can be tracked
+/// to a normal reference to a known object, return the Value for that object.
static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo &MFI,
UnderlyingObjectsVector &Objects,
@@ -216,10 +215,6 @@ void ScheduleDAGInstrs::finishBlock() {
BB = nullptr;
}
-/// Initialize the DAG and common scheduler state for the current scheduling
-/// region. This does not actually create the DAG, only clears it. The
-/// scheduling driver may call BuildSchedGraph multiple times per scheduling
-/// region.
void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
MachineBasicBlock::iterator begin,
MachineBasicBlock::iterator end,
@@ -230,20 +225,10 @@ void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
NumRegionInstrs = regioninstrs;
}
-/// Close the current scheduling region. Don't clear any state in case the
-/// driver wants to refer to the previous scheduling region.
void ScheduleDAGInstrs::exitRegion() {
// Nothing to do.
}
-/// addSchedBarrierDeps - Add dependencies from instructions in the current
-/// list of instructions being scheduled to scheduling barrier by adding
-/// the exit SU to the register defs and use list. This is because we want to
-/// make sure instructions which define registers that are either used by
-/// the terminator or are live-out are properly scheduled. This is
-/// especially important when the definition latency of the return value(s)
-/// are too high to be hidden by the branch or when the liveout registers
-/// used by instructions in the fallthrough block.
void ScheduleDAGInstrs::addSchedBarrierDeps() {
MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
ExitSU.setInstr(ExitMI);
@@ -271,7 +256,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
}
}
-/// MO is an operand of SU's instruction that defines a physical register. Add
+/// MO is an operand of SU's instruction that defines a physical register. Adds
/// data dependencies from SU to any uses of the physical register.
void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
@@ -313,9 +298,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
-/// this SUnit to following instructions in the same scheduling region that
-/// depend the physical register referenced at OperIdx.
+/// \brief Adds register dependencies (data, anti, and output) from this SUnit
+/// to following instructions in the same scheduling region that depend the
+/// physical register referenced at OperIdx.
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
@@ -406,9 +391,9 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
return TRI->getSubRegIndexLaneMask(SubReg);
}
-/// addVRegDefDeps - Add register output and data dependencies from this SUnit
-/// to instructions that occur later in the same scheduling region if they read
-/// from or write to the virtual register defined at OperIdx.
+/// Adds register output and data dependencies from this SUnit to instructions
+/// that occur later in the same scheduling region if they read from or write to
+/// the virtual register defined at OperIdx.
///
/// TODO: Hoist loop induction variable increments. This has to be
/// reevaluated. Generally, IV scheduling should be done before coalescing.
@@ -515,10 +500,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
}
-/// addVRegUseDeps - Add a register data dependency if the instruction that
-/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
-/// register antidependency from this SUnit to instructions that occur later in
-/// the same scheduling region if they write the virtual register.
+/// \brief Adds a register data dependency if the instruction that defines the
+/// virtual register used at OperIdx is mapped to an SUnit. Add a register
+/// antidependency from this SUnit to instructions that occur later in the same
+/// scheduling region if they write the virtual register.
///
/// TODO: Handle ExitSU "uses" properly.
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
@@ -545,87 +530,25 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
}
}
-/// Return true if MI is an instruction we are unable to reason about
+/// Returns true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
}
-/// This returns true if the two MIs need a chain edge between them.
-/// This is called on normal stores and loads.
-static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
- const DataLayout &DL, MachineInstr *MIa,
- MachineInstr *MIb) {
- const MachineFunction *MF = MIa->getParent()->getParent();
- const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
- assert ((MIa->mayStore() || MIb->mayStore()) &&
- "Dependency checked between two loads");
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(*MIa, *MIb, AA))
- return false;
-
- // To this point analysis is generic. From here on we do need AA.
- if (!AA)
- return true;
-
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
- return true;
-
- MachineMemOperand *MMOa = *MIa->memoperands_begin();
- MachineMemOperand *MMOb = *MIb->memoperands_begin();
-
- if (!MMOa->getValue() || !MMOb->getValue())
- return true;
-
- // The following interface to AA is fashioned after DAGCombiner::isAlias
- // and operates with MachineMemOperand offset with some important
- // assumptions:
- // - LLVM fundamentally assumes flat address spaces.
- // - MachineOperand offset can *only* result from legalization and
- // cannot affect queries other than the trivial case of overlap
- // checking.
- // - These offsets never wrap and never step outside
- // of allocated objects.
- // - There should never be any negative offsets here.
- //
- // FIXME: Modify API to hide this math from "user"
- // FIXME: Even before we go to AA we can reason locally about some
- // memory objects. It can save compile time, and possibly catch some
- // corner cases not currently covered.
-
- assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset");
- assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset");
-
- int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset());
- int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset;
- int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset;
-
- AliasResult AAResult =
- AA->alias(MemoryLocation(MMOa->getValue(), Overlapa,
- UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(MMOb->getValue(), Overlapb,
- UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
-
- return (AAResult != NoAlias);
-}
-
-/// Check whether two objects need a chain edge and add it if needed.
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
unsigned Latency) {
- if (MIsNeedChainEdge(AAForDep, &MFI, MF.getDataLayout(), SUa->getInstr(),
- SUb->getInstr())) {
+ if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
SDep Dep(SUa, SDep::MayAliasMem);
Dep.setLatency(Latency);
SUb->addPred(Dep);
}
}
-/// Create an SUnit for each real instruction, numbered in top-down topological
-/// order. The instruction order A < B, implies that no edge exists from B to A.
+/// \brief Creates an SUnit for each real instruction, numbered in top-down
+/// topological order. The instruction order A < B, implies that no edge exists
+/// from B to A.
///
/// Map each real instruction to its SUnit.
///
@@ -682,14 +605,13 @@ void ScheduleDAGInstrs::initSUnits() {
}
class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
-
/// Current total number of SUs in map.
unsigned NumNodes;
/// 1 for loads, 0 for stores. (see comment in SUList)
unsigned TrueMemOrderLatency;
-public:
+public:
Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
/// To keep NumNodes up to date, insert() is used instead of
@@ -697,8 +619,8 @@ public:
ValueType &operator[](const SUList &Key) {
llvm_unreachable("Don't use. Use insert() instead."); };
- /// Add SU to the SUList of V. If Map grows huge, reduce its size
- /// by calling reduce().
+ /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
+ /// reduce().
void inline insert(SUnit *SU, ValueType V) {
MapVector::operator[](V).push_back(SU);
NumNodes++;
@@ -723,7 +645,7 @@ public:
unsigned inline size() const { return NumNodes; }
- /// Count the number of SUs in this map after a reduction.
+ /// Counts the number of SUs in this map after a reduction.
void reComputeSize(void) {
NumNodes = 0;
for (auto &I : *this)
@@ -797,9 +719,6 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
map.reComputeSize();
}
-/// If RegPressure is non-null, compute register pressure as a side effect. The
-/// DAG builder is an efficient place to do it because it already visits
-/// operands.
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs,
@@ -1088,10 +1007,6 @@ void ScheduleDAGInstrs::Value2SUsMap::dump() {
}
}
-/// Reduce maps in FIFO order, by N SUs. This is better than turning
-/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
-/// it avoids unnecessary edges between seen SUs above the new
-/// BarrierChain, and those below it.
void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
Value2SUsMap &loads, unsigned N) {
DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
@@ -1142,7 +1057,6 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
loads.dump());
}
-/// \brief Initialize register live-range state for updating kills.
void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
// Start with no live registers.
LiveRegs.reset();
@@ -1178,32 +1092,35 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
return;
} else
- (--End)->clearRegisterKills(Reg, TRI);
+ (--End)->clearRegisterKills(Reg, TRI);
}
}
-bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
+void ScheduleDAGInstrs::toggleKillFlag(MachineInstr &MI, MachineOperand &MO) {
+ if (MO.isDebug())
+ return;
+
// Setting kill flag...
if (!MO.isKill()) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
- return false;
+ toggleBundleKillFlag(&MI, MO.getReg(), true, TRI);
+ return;
}
// If MO itself is live, clear the kill flag...
if (LiveRegs.test(MO.getReg())) {
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
- return false;
+ toggleBundleKillFlag(&MI, MO.getReg(), false, TRI);
+ return;
}
// If any subreg of MO is live, then create an imp-def for that
// subreg and keep MO marked as killed.
MO.setIsKill(false);
- toggleBundleKillFlag(MI, MO.getReg(), false, TRI);
+ toggleBundleKillFlag(&MI, MO.getReg(), false, TRI);
bool AllDead = true;
const unsigned SuperReg = MO.getReg();
- MachineInstrBuilder MIB(MF, MI);
+ MachineInstrBuilder MIB(MF, &MI);
for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) {
if (LiveRegs.test(*SubRegs)) {
MIB.addReg(*SubRegs, RegState::ImplicitDefine);
@@ -1213,13 +1130,12 @@ bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) {
if(AllDead) {
MO.setIsKill(true);
- toggleBundleKillFlag(MI, MO.getReg(), true, TRI);
+ toggleBundleKillFlag(&MI, MO.getReg(), true, TRI);
}
- return false;
}
-// FIXME: Reuse the LivePhysRegs utility for this.
void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
+ // FIXME: Reuse the LivePhysRegs utility for this.
DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
LiveRegs.resize(TRI->getNumRegs());
@@ -1289,7 +1205,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
if (MO.isKill() != kill) {
DEBUG(dbgs() << "Fixing " << MO << " in ");
- toggleKillFlag(&MI, MO);
+ toggleKillFlag(MI, MO);
DEBUG(MI.dump());
DEBUG({
if (MI.getOpcode() == TargetOpcode::BUNDLE) {
@@ -1319,6 +1235,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
}
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+ // Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
SU->getInstr()->dump();
#endif
@@ -1347,7 +1264,7 @@ std::string ScheduleDAGInstrs::getDAGName() const {
//===----------------------------------------------------------------------===//
namespace llvm {
-/// \brief Internal state used to compute SchedDFSResult.
+/// Internal state used to compute SchedDFSResult.
class SchedDFSImpl {
SchedDFSResult &R;
@@ -1358,8 +1275,8 @@ class SchedDFSImpl {
struct RootData {
unsigned NodeID;
- unsigned ParentNodeID; // Parent node (member of the parent subtree).
- unsigned SubInstrCount; // Instr count in this tree only, not children.
+ unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
+ unsigned SubInstrCount; ///< Instr count in this tree only, not children.
RootData(unsigned id): NodeID(id),
ParentNodeID(SchedDFSResult::InvalidSubtreeID),
@@ -1375,7 +1292,7 @@ public:
RootSet.setUniverse(R.DFSNodeData.size());
}
- /// Return true if this node been visited by the DFS traversal.
+ /// Returns true if this node been visited by the DFS traversal.
///
/// During visitPostorderNode the Node's SubtreeID is assigned to the Node
/// ID. Later, SubtreeID is updated but remains valid.
@@ -1384,7 +1301,7 @@ public:
!= SchedDFSResult::InvalidSubtreeID;
}
- /// Initialize this node's instruction count. We don't need to flag the node
+ /// Initializes this node's instruction count. We don't need to flag the node
/// visited until visitPostorder because the DAG cannot have cycles.
void visitPreorder(const SUnit *SU) {
R.DFSNodeData[SU->NodeNum].InstrCount =
@@ -1433,8 +1350,8 @@ public:
RootSet[SU->NodeNum] = RData;
}
- /// Called once for each tree edge after calling visitPostOrderNode on the
- /// predecessor. Increment the parent node's instruction count and
+ /// \brief Called once for each tree edge after calling visitPostOrderNode on
+ /// the predecessor. Increment the parent node's instruction count and
/// preemptively join this subtree to its parent's if it is small enough.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
R.DFSNodeData[Succ->NodeNum].InstrCount
@@ -1442,13 +1359,13 @@ public:
joinPredSubtree(PredDep, Succ);
}
- /// Add a connection for cross edges.
+ /// Adds a connection for cross edges.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
}
- /// Set each node's subtree ID to the representative ID and record connections
- /// between trees.
+ /// Sets each node's subtree ID to the representative ID and record
+ /// connections between trees.
void finalize() {
SubtreeClasses.compress();
R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
@@ -1484,8 +1401,8 @@ public:
}
protected:
- /// Join the predecessor subtree with the successor that is its DFS
- /// parent. Apply some heuristics before joining.
+ /// Joins the predecessor subtree with the successor that is its DFS parent.
+ /// Applies some heuristics before joining.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
bool CheckLimit = true) {
assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
@@ -1531,10 +1448,10 @@ protected:
} while (FromTree != SchedDFSResult::InvalidSubtreeID);
}
};
-} // namespace llvm
+} // end namespace llvm
namespace {
-/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+/// Manage the stack used by a reverse depth-first search over the DAG.
class SchedDAGReverseDFS {
std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
public:
@@ -1569,7 +1486,7 @@ static bool hasDataSucc(const SUnit *SU) {
return false;
}
-/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
/// search from this root.
void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
if (!IsBottomUp)
@@ -1626,8 +1543,8 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
}
}
-LLVM_DUMP_METHOD
-void ILPValue::print(raw_ostream &OS) const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const {
OS << InstrCount << " / " << Length << " = ";
if (!Length)
OS << "BADILP";
@@ -1635,8 +1552,7 @@ void ILPValue::print(raw_ostream &OS) const {
OS << format("%g", ((double)InstrCount / Length));
}
-LLVM_DUMP_METHOD
-void ILPValue::dump() const {
+LLVM_DUMP_METHOD void ILPValue::dump() const {
dbgs() << *this << '\n';
}
@@ -1648,4 +1564,5 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
return OS;
}
-} // namespace llvm
+} // end namespace llvm
+#endif
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 83bc1ba7beb9..b3d83d5313af 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,11 +15,13 @@
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <cassert>
using namespace llvm;
@@ -29,8 +31,7 @@ ScoreboardHazardRecognizer::ScoreboardHazardRecognizer(
const InstrItineraryData *II, const ScheduleDAG *SchedDAG,
const char *ParentDebugType)
: ScheduleHazardRecognizer(), DebugType(ParentDebugType), ItinData(II),
- DAG(SchedDAG), IssueWidth(0), IssueCount(0) {
-
+ DAG(SchedDAG) {
// Determine the maximum depth of any itinerary. This determines the depth of
// the scoreboard. We always make the scoreboard at least 1 cycle deep to
// avoid dealing with the boundary condition.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2c7bffe76503..4d468551ae24 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -53,10 +53,6 @@ STATISTIC(SlicedLoads, "Number of load sliced");
namespace {
static cl::opt<bool>
- CombinerAA("combiner-alias-analysis", cl::Hidden,
- cl::desc("Enable DAG combiner alias-analysis heuristics"));
-
- static cl::opt<bool>
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner's use of IR alias analysis"));
@@ -133,6 +129,9 @@ namespace {
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
+ assert(N->getOpcode() != ISD::DELETED_NODE &&
+ "Deleted Node added to Worklist");
+
// Skip handle nodes as they can't usefully be combined and confuse the
// zero-use deletion strategy.
if (N->getOpcode() == ISD::HANDLENODE)
@@ -177,6 +176,7 @@ namespace {
void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
private:
+ unsigned MaximumLegalStoreInBits;
/// Check the specified integer node value to see if it can be simplified or
/// if things it uses can be simplified by bit propagation.
@@ -232,9 +232,12 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
+ SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDC(SDNode *N);
+ SDValue visitUADDO(SDNode *N);
SDValue visitSUBC(SDNode *N);
+ SDValue visitUSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitSUBE(SDNode *N);
SDValue visitMUL(SDNode *N);
@@ -259,6 +262,7 @@ namespace {
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
SDValue visitRotate(SDNode *N);
+ SDValue visitABS(SDNode *N);
SDValue visitBSWAP(SDNode *N);
SDValue visitBITREVERSE(SDNode *N);
SDValue visitCTLZ(SDNode *N);
@@ -274,6 +278,7 @@ namespace {
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
+ SDValue visitAssertZext(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -336,6 +341,7 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
@@ -344,6 +350,8 @@ namespace {
bool NotExtCompare = false);
SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue N2, SDValue N3, ISD::CondCode CC);
+ SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL);
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, bool foldBooleans = true);
@@ -377,6 +385,7 @@ namespace {
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchLoadCombine(SDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -384,9 +393,9 @@ namespace {
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
- SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
- SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx);
+ SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
+ ArrayRef<int> VectorMask, SDValue VecIn1,
+ SDValue VecIn2, unsigned LeftIdx);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -416,15 +425,12 @@ namespace {
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
- MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
- MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
// Ptr to the mem node.
LSBaseSDNode *MemNode;
// Offset from the base ptr.
int64_t OffsetFromBase;
- // What is the sequence number of this mem node.
- // Lowest mem operand in the DAG starts at zero.
- unsigned SequenceNum;
};
/// This is a helper function for visitMUL to check the profitability
@@ -435,12 +441,6 @@ namespace {
SDValue &AddNode,
SDValue &ConstNode);
- /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
- /// constant build_vector of the stored constant values in Stores.
- SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
- ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains,
- EVT Ty) const;
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
@@ -451,34 +451,35 @@ namespace {
EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
bool &NarrowLoad);
+ /// Helper function for MergeConsecutiveStores which merges the
+ /// component store chains.
+ SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores);
+
/// This is a helper function for MergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store.
- /// \return number of stores that were merged into a merged store (always
- /// a prefix of \p StoreNode).
- bool MergeStoresOfConstantsOrVecElts(
- SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
- bool IsConstantSrc, bool UseVector);
+ /// \return True if a merged store was created.
+ bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
+ EVT MemVT, unsigned NumStores,
+ bool IsConstantSrc, bool UseVector);
/// This is a helper function for MergeConsecutiveStores.
/// Stores that may be merged are placed in StoreNodes.
- /// Loads that may alias with those stores are placed in AliasLoadNodes.
- void getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
+ void getStoreMergeCandidates(StoreSDNode *St,
+ SmallVectorImpl<MemOpLink> &StoreNodes);
/// Helper function for MergeConsecutiveStores. Checks if
/// Candidate stores have indirect dependency through their
/// operands. \return True if safe to merge
bool checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
/// Merge consecutive store operations into a wide store.
/// This optimization uses wide integers or vectors when possible.
/// \return number of stores that were merged into a merged store (the
/// affected nodes are stored as a prefix in \p StoreNodes).
- bool MergeConsecutiveStores(StoreSDNode *N,
- SmallVectorImpl<MemOpLink> &StoreNodes);
+ bool MergeConsecutiveStores(StoreSDNode *N);
/// \brief Try to transform a truncation where C is a constant:
/// (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -493,6 +494,13 @@ namespace {
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
+
+ MaximumLegalStoreInBits = 0;
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits();
}
/// Runs the dag combiner on all nodes in the work list
@@ -607,10 +615,16 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
switch (Op.getOpcode()) {
default: return false;
- case ISD::ConstantFP:
- // Don't invert constant FP values after legalize. The negated constant
- // isn't necessarily legal.
- return LegalOperations ? 0 : 1;
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ EVT VT = Op.getValueType();
+ return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+ }
case ISD::FADD:
// FIXME: determine better conditions for this xform.
if (!Options->UnsafeFPMath) return 0;
@@ -629,7 +643,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+ if (!Options->NoSignedZerosFPMath &&
+ !Op.getNode()->getFlags()->hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -1079,37 +1094,36 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
- if (!NN0.getNode())
- return SDValue();
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
- SDValue NN1;
- if (N0 == N1)
- NN1 = NN0;
- else {
- NN1 = PromoteOperand(N1, PVT, Replace1);
- if (!NN1.getNode())
- return SDValue();
- }
+ SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
+ SDLoc DL(Op);
- AddToWorklist(NN0.getNode());
- if (NN1.getNode())
- AddToWorklist(NN1.getNode());
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
- if (Replace0)
+ // New replace instances of N0 and N1
+ if (Replace0 && N0 && N0.getOpcode() != ISD::DELETED_NODE && NN0 &&
+ NN0.getOpcode() != ISD::DELETED_NODE) {
+ AddToWorklist(NN0.getNode());
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
- if (Replace1)
+ }
+
+ if (Replace1 && N1 && N1.getOpcode() != ISD::DELETED_NODE && NN1 &&
+ NN1.getOpcode() != ISD::DELETED_NODE) {
+ AddToWorklist(NN1.getNode());
ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+ }
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, NN0, NN1));
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
}
return SDValue();
}
@@ -1137,26 +1151,32 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
+ DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
+
bool Replace = false;
SDValue N0 = Op.getOperand(0);
+ SDValue N1 = Op.getOperand(1);
if (Opc == ISD::SRA)
- N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = SExtPromoteOperand(N0, PVT);
else if (Opc == ISD::SRL)
- N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+ N0 = ZExtPromoteOperand(N0, PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
+
if (!N0.getNode())
return SDValue();
+ SDLoc DL(Op);
+ SDValue RV =
+ DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
+
AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
- DEBUG(dbgs() << "\nPromoting ";
- Op.getNode()->dump(&DAG));
- SDLoc DL(Op);
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
+ // Deal with Op being deleted.
+ if (Op && Op.getOpcode() != ISD::DELETED_NODE)
+ return RV;
}
return SDValue();
}
@@ -1361,8 +1381,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
else {
assert(N->getValueType(0) == RV.getValueType() &&
N->getNumValues() == 1 && "Type mismatch");
- SDValue OpV = RV;
- DAG.ReplaceAllUsesWith(N, &OpV);
+ DAG.ReplaceAllUsesWith(N, &RV);
}
// Push the new node and any users onto the worklist
@@ -1389,7 +1408,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADD: return visitADD(N);
case ISD::SUB: return visitSUB(N);
case ISD::ADDC: return visitADDC(N);
+ case ISD::UADDO: return visitUADDO(N);
case ISD::SUBC: return visitSUBC(N);
+ case ISD::USUBO: return visitUSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::MUL: return visitMUL(N);
@@ -1415,6 +1436,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SRL: return visitSRL(N);
case ISD::ROTR:
case ISD::ROTL: return visitRotate(N);
+ case ISD::ABS: return visitABS(N);
case ISD::BSWAP: return visitBSWAP(N);
case ISD::BITREVERSE: return visitBITREVERSE(N);
case ISD::CTLZ: return visitCTLZ(N);
@@ -1430,6 +1452,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
+ case ISD::AssertZext: return visitAssertZext(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1574,7 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
- SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
bool Changed = false; // If we should replace this token factor.
@@ -1618,6 +1641,86 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
+ // Remove Nodes that are chained to another node in the list. Do so
+ // by walking up chains breath-first stopping when we've seen
+ // another operand. In general we must climb to the EntryNode, but we can exit
+ // early if we find all remaining work is associated with just one operand as
+ // no further pruning is possible.
+
+ // List of nodes to search through and original Ops from which they originate.
+ SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
+ SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
+ SmallPtrSet<SDNode *, 16> SeenChains;
+ bool DidPruneOps = false;
+
+ unsigned NumLeftToConsider = 0;
+ for (const SDValue &Op : Ops) {
+ Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
+ OpWorkCount.push_back(1);
+ }
+
+ auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
+ // If this is an Op, we can remove the op from the list. Remark any
+ // search associated with it as from the current OpNumber.
+ if (SeenOps.count(Op) != 0) {
+ Changed = true;
+ DidPruneOps = true;
+ unsigned OrigOpNumber = 0;
+ while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
+ OrigOpNumber++;
+ assert((OrigOpNumber != Ops.size()) &&
+ "expected to find TokenFactor Operand");
+ // Re-mark worklist from OrigOpNumber to OpNumber
+ for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
+ if (Worklist[i].second == OrigOpNumber) {
+ Worklist[i].second = OpNumber;
+ }
+ }
+ OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
+ OpWorkCount[OrigOpNumber] = 0;
+ NumLeftToConsider--;
+ }
+ // Add if it's a new chain
+ if (SeenChains.insert(Op).second) {
+ OpWorkCount[OpNumber]++;
+ Worklist.push_back(std::make_pair(Op, OpNumber));
+ }
+ };
+
+ for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
+ // We need at least be consider at least 2 Ops to prune.
+ if (NumLeftToConsider <= 1)
+ break;
+ auto CurNode = Worklist[i].first;
+ auto CurOpNumber = Worklist[i].second;
+ assert((OpWorkCount[CurOpNumber] > 0) &&
+ "Node should not appear in worklist");
+ switch (CurNode->getOpcode()) {
+ case ISD::EntryToken:
+ // Hitting EntryToken is the only way for the search to terminate without
+ // hitting
+ // another operand's search. Prevent us from marking this operand
+ // considered.
+ NumLeftToConsider++;
+ break;
+ case ISD::TokenFactor:
+ for (const SDValue &Op : CurNode->op_values())
+ AddToWorklist(i, Op.getNode(), CurOpNumber);
+ break;
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
+ break;
+ default:
+ if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
+ AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
+ break;
+ }
+ OpWorkCount[CurOpNumber]--;
+ if (OpWorkCount[CurOpNumber] == 0)
+ NumLeftToConsider--;
+ }
+
SDValue Result;
// If we've changed things around then replace token factor.
@@ -1626,15 +1729,22 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// The entry token is the only possible outcome.
Result = DAG.getEntryNode();
} else {
- // New and improved token factor.
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ if (DidPruneOps) {
+ SmallVector<SDValue, 8> PrunedOps;
+ //
+ for (const SDValue &Op : Ops) {
+ if (SeenChains.count(Op.getNode()) == 0)
+ PrunedOps.push_back(Op);
+ }
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+ } else {
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ }
}
- // Add users to worklist if AA is enabled, since it may introduce
- // a lot of new chained token factors while removing memory deps.
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- return CombineTo(N, Result, UseAA /*add to worklist*/);
+ // Add users to worklist, since we may introduce a lot of new
+ // chained token factors while removing memory deps.
+ return CombineTo(N, Result, true /*add to worklist*/);
}
return Result;
@@ -1664,6 +1774,60 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
+ auto BinOpcode = BO->getOpcode();
+ assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
+ BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
+ BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
+ BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
+ BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
+ BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
+ BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
+ BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
+ BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
+ "Unexpected binary operator");
+
+ // Bail out if any constants are opaque because we can't constant fold those.
+ SDValue C1 = BO->getOperand(1);
+ if (!isConstantOrConstantVector(C1, true) &&
+ !isConstantFPBuildVectorOrConstantFP(C1))
+ return SDValue();
+
+ // Don't do this unless the old select is going away. We want to eliminate the
+ // binary operator, not replace a binop with a select.
+ // TODO: Handle ISD::SELECT_CC.
+ SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
+ return SDValue();
+
+ SDValue CT = Sel.getOperand(1);
+ if (!isConstantOrConstantVector(CT, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CT))
+ return SDValue();
+
+ SDValue CF = Sel.getOperand(2);
+ if (!isConstantOrConstantVector(CF, true) &&
+ !isConstantFPBuildVectorOrConstantFP(CF))
+ return SDValue();
+
+ // We have a select-of-constants followed by a binary operator with a
+ // constant. Eliminate the binop by pulling the constant math into the select.
+ // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
+ EVT VT = Sel.getValueType();
+ SDLoc DL(Sel);
+ SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
+ assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
+ isConstantFPBuildVectorOrConstantFP(NewCT)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
+ assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
+ isConstantFPBuildVectorOrConstantFP(NewCF)) &&
+ "Failed to constant fold a binop with constant operands");
+
+ return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+}
+
SDValue DAGCombiner::visitADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1712,6 +1876,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
}
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate add
if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
return RADD;
@@ -1774,6 +1941,19 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
+ if (SDValue Combined = visitADDLike(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLike(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+ EVT VT = N0.getValueType();
+ SDLoc DL(LocReference);
+
// fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
@@ -1781,12 +1961,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::SHL, DL, VT,
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
- if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
- isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
- return DAG.getNode(ISD::SUB, DL, VT, N1,
- DAG.getNode(ISD::SHL, DL, VT,
- N0.getOperand(0).getOperand(1),
- N0.getOperand(1)));
if (N1.getOpcode() == ISD::AND) {
SDValue AndOp0 = N1.getOperand(0);
@@ -1797,7 +1971,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
// and similar xforms where the inner op is either ~0 or 0.
if (NumSignBits == DestBits &&
isOneConstantOrOneSplatConstant(N1->getOperand(1)))
- return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
}
// add (sext i1), X -> sub X, (zext i1)
@@ -1825,39 +1999,61 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// If the flag result is dead, turn this into an ADD.
if (!N->hasAnyUseOfValue(1))
- return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
// canonicalize constant to RHS.
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
- return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
+ return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
// fold (addc x, 0) -> x + no carry out
if (isNullConstant(N1))
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
+ DL, MVT::Glue));
- // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
- APInt LHSZero, LHSOne;
- APInt RHSZero, RHSOne;
- DAG.computeKnownBits(N0, LHSZero, LHSOne);
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
- if (LHSZero.getBoolValue()) {
- DAG.computeKnownBits(N1, RHSZero, RHSOne);
+ return SDValue();
+}
- // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
- // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
- if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
- return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
- DAG.getNode(ISD::CARRY_FALSE,
- SDLoc(N), MVT::Glue));
- }
+SDValue DAGCombiner::visitUADDO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an ADD.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // canonicalize constant to RHS.
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
+
+ // fold (uaddo x, 0) -> x + no carry out
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
return SDValue();
}
@@ -1920,6 +2116,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
N1.getNode());
}
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (sub x, c) -> (add x, -c)
@@ -2066,6 +2265,38 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // If the flag result is dead, turn this into an SUB.
+ if (!N->hasAnyUseOfValue(1))
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getUNDEF(CarryVT));
+
+ // fold (usubo x, x) -> 0 + no borrow
+ if (N0 == N1)
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
+ // fold (usubo x, 0) -> x + no borrow
+ if (isNullConstant(N1))
+ return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+
+ // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+ if (isAllOnesConstant(N0))
+ return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+ DAG.getConstant(0, DL, CarryVT));
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUBE(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2131,6 +2362,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, 1) -> x
if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (mul x, -1) -> 0-x
if (N1IsConst && ConstValue1.isAllOnesValue()) {
SDLoc DL(N);
@@ -2297,6 +2532,23 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
return combined;
}
+static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (DAG.isUndef(N->getOpcode(), {N0, N1}))
+ return DAG.getUNDEF(VT);
+
+ // undef / X -> 0
+ // undef % X -> 0
+ if (N0.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -2319,8 +2571,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return N0;
// fold (sdiv X, -1) -> 0-X
if (N1C && N1C->isAllOnesValue())
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), N0);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
+
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
// If we know the sign bits of both operands are zero, strength reduce to a
// udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
@@ -2372,7 +2629,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -2384,13 +2641,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2414,6 +2664,12 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (udiv x, (1 << c)) -> x >>u c
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1)) {
@@ -2444,7 +2700,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv x, c) -> alternate
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -2456,13 +2712,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem;
- // undef / X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X / undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2482,32 +2731,35 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
return Folded;
+ if (SDValue V = simplifyDivRem(N, DAG))
+ return V;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (isSigned) {
// If we know the sign bits of both operands are zero, strength reduce to a
// urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
} else {
- // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
if (DAG.isKnownToBeAPowerOfTwo(N1)) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, pow2) -> (and x, pow2-1)
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
- // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
if (N1.getOpcode() == ISD::SHL &&
DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
- APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
- SDValue Add =
- DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+ // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
AddToWorklist(Add.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0, Add);
}
}
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
@@ -2536,13 +2788,6 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (SDValue DivRem = useDivRem(N))
return DivRem.getValue(1);
- // undef % X -> 0
- if (N0.isUndef())
- return DAG.getConstant(0, DL, VT);
- // X % undef -> undef
- if (N1.isUndef())
- return N1;
-
return SDValue();
}
@@ -2932,95 +3177,139 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
}
+/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
+SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
+ const SDLoc &DL) {
+ SDValue LL, LR, RL, RR, N0CC, N1CC;
+ if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
+ !isSetCCEquivalent(N1, RL, RR, N1CC))
+ return SDValue();
+
+ assert(N0.getValueType() == N1.getValueType() &&
+ "Unexpected operand types for bitwise logic op");
+ assert(LL.getValueType() == LR.getValueType() &&
+ RL.getValueType() == RR.getValueType() &&
+ "Unexpected operand types for setcc");
+
+ // If we're here post-legalization or the logic op type is not i1, the logic
+ // op type must match a setcc result type. Also, all folds require new
+ // operations on the left and right operands, so those types must match.
+ EVT VT = N0.getValueType();
+ EVT OpVT = LL.getValueType();
+ if (LegalOperations || VT != MVT::i1)
+ if (VT != getSetCCResultType(OpVT))
+ return SDValue();
+ if (OpVT != RL.getValueType())
+ return SDValue();
+
+ ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
+ ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
+ bool IsInteger = OpVT.isInteger();
+ if (LR == RR && CC0 == CC1 && IsInteger) {
+ bool IsZero = isNullConstantOrNullSplatConstant(LR);
+ bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
+
+ // All bits clear?
+ bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
+ // All sign bits clear?
+ bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
+ // Any bits set?
+ bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
+ // Any sign bits set?
+ bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
+
+ // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
+ // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
+ // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
+ // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
+ if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(Or.getNode());
+ return DAG.getSetCC(DL, VT, Or, LR, CC1);
+ }
+
+ // All bits set?
+ bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
+ // All sign bits set?
+ bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
+ // Any bits clear?
+ bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
+ // Any sign bits clear?
+ bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
+
+ // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
+ // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
+ // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
+ // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
+ if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
+ AddToWorklist(And.getNode());
+ return DAG.getSetCC(DL, VT, And, LR, CC1);
+ }
+ }
+
+ // TODO: What is the 'or' equivalent of this fold?
+ // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
+ if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
+ ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
+ (isAllOnesConstant(LR) && isNullConstant(RR)))) {
+ SDValue One = DAG.getConstant(1, DL, OpVT);
+ SDValue Two = DAG.getConstant(2, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
+ AddToWorklist(Add.getNode());
+ return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
+ }
+
+ // Try more general transforms if the predicates match and the only user of
+ // the compares is the 'and' or 'or'.
+ if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
+ N0.hasOneUse() && N1.hasOneUse()) {
+ // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
+ // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
+ if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
+ SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
+ SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
+ SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, Or, Zero, CC1);
+ }
+ }
+
+ // Canonicalize equivalent operands to LL == RL.
+ if (LL == RR && LR == RL) {
+ CC1 = ISD::getSetCCSwappedOperands(CC1);
+ std::swap(RL, RR);
+ }
+
+ // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
+ if (LL == RL && LR == RR) {
+ ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
+ : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+ if (NewCC != ISD::SETCC_INVALID &&
+ (!LegalOperations ||
+ (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
+ TLI.isOperationLegal(ISD::SETCC, OpVT))))
+ return DAG.getSetCC(DL, VT, LL, LR, NewCC);
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
/// visitSELECT() already handles those cases.
-SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
- SDNode *LocReference) {
+SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
// fold (and x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(LocReference), VT);
- // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
- LL.getValueType().isInteger()) {
- // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
- if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- if (isAllOnesConstant(LR)) {
- // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
- if (Op1 == ISD::SETEQ) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
- if (Op1 == ISD::SETGT) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- }
- }
- // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
- if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
- Op0 == Op1 && LL.getValueType().isInteger() &&
- Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
- (isAllOnesConstant(LR) && isNullConstant(RR)))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDLoc DL(N0);
- SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
- LL, DAG.getConstant(1, DL,
- LL.getValueType()));
- AddToWorklist(ADDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
- DAG.getConstant(2, DL, LL.getValueType()),
- ISD::SETUGE);
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ return DAG.getConstant(0, DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
+ return V;
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64) {
@@ -3037,13 +3326,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
ADDC |= Mask;
if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
- SDLoc DL(N0);
+ SDLoc DL0(N0);
SDValue NewAdd =
- DAG.getNode(ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL0, VT,
N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
CombineTo(N0.getNode(), NewAdd);
// Return N so it doesn't get rechecked!
- return SDValue(LocReference, 0);
+ return SDValue(N, 0);
}
}
}
@@ -3068,7 +3357,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
unsigned MaskBits = AndMask.countTrailingOnes();
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
- if (APIntOps::isMask(AndMask) &&
+ if (AndMask.isMask() &&
// Required bits must not span the two halves of the integer and
// must fit in the half size type.
(ShiftBits + MaskBits <= Size / 2) &&
@@ -3108,7 +3397,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
bool &NarrowLoad) {
uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
- if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+ if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
return false;
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
@@ -3191,6 +3480,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate and
if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
return RAND;
@@ -3299,6 +3592,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
+
+ // Fold the AND away. NewLoad may get replaced immediately.
+ CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
if (Load->getExtensionType() == ISD::EXTLOAD) {
NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
Load->getValueType(0), SDLoc(Load),
@@ -3316,10 +3613,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
}
- // Fold the AND away, taking care not to fold to the old load node if we
- // replaced it.
- CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
-
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -3723,65 +4016,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
/// This contains all DAGCombine rules which reduce two values combined by
/// an Or operation to a single value \see visitANDLike().
-SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
EVT VT = N1.getValueType();
+ SDLoc DL(N);
+
// fold (or x, undef) -> -1
- if (!LegalOperations &&
- (N0.isUndef() || N1.isUndef())) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
- return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
- SDLoc(LocReference), VT);
- }
- // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
- SDValue LL, LR, RL, RR, CC0, CC1;
- if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
- ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
- ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
-
- if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
- // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
- // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
- if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ORNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
- }
- }
- // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
- // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
- if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
- EVT CCVT = getSetCCResultType(LR.getValueType());
- if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
- SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
- LR.getValueType(), LL, RL);
- AddToWorklist(ANDNode.getNode());
- return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
- }
- }
- }
- // canonicalize equivalent to ll == rl
- if (LL == RR && LR == RL) {
- Op1 = ISD::getSetCCSwappedOperands(Op1);
- std::swap(RL, RR);
- }
- if (LL == RL && LR == RR) {
- bool isInteger = LL.getValueType().isInteger();
- ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
- if (Result != ISD::SETCC_INVALID &&
- (!LegalOperations ||
- (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
- TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
- EVT CCVT = getSetCCResultType(LL.getValueType());
- if (N0.getValueType() == CCVT ||
- (!LegalOperations && N0.getValueType() == MVT::i1))
- return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
- LL, LR, Result);
- }
- }
- }
+ if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
+ return DAG.getAllOnesConstant(DL, VT);
+
+ if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
+ return V;
// (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
@@ -3802,7 +4046,6 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(0), N1.getOperand(0));
- SDLoc DL(LocReference);
return DAG.getNode(ISD::AND, DL, VT, X,
DAG.getConstant(LHSMask | RHSMask, DL, VT));
}
@@ -3818,7 +4061,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
(N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
N0.getOperand(1), N1.getOperand(1));
- return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
+ return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
}
return SDValue();
@@ -3847,14 +4090,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1, vector edition
if (ISD::isBuildVectorAllOnes(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(
- APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
- N0.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
if (ISD::isBuildVectorAllOnes(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(
- APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
- N1.getValueType());
+ return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
// fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
@@ -3867,7 +4106,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
// Ensure both shuffles have a zero input.
- if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
+ if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
@@ -3939,6 +4178,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold (or x, -1) -> -1
if (isAllOnesConstant(N1))
return N1;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (or x, c) -> c iff (x & ~c) == 0
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
@@ -3956,7 +4199,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
- // iff (c1 & c2) == 0.
+ // iff (c1 & c2) != 0.
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
@@ -3978,6 +4221,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
return SDValue(Rot, 0);
+ if (SDValue Load = MatchLoadCombine(N))
+ return Load;
+
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
@@ -4190,8 +4436,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
- APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
- SDValue Mask = DAG.getConstant(AllBits, DL, VT);
+ SDValue Mask = DAG.getAllOnesConstant(DL, VT);
if (LHSMask.getNode()) {
APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
@@ -4349,6 +4594,299 @@ struct BaseIndexOffset {
};
} // namespace
+namespace {
+/// Represents known origin of an individual byte in load combine pattern. The
+/// value of the byte is either constant zero or comes from memory.
+struct ByteProvider {
+ // For constant zero providers Load is set to nullptr. For memory providers
+ // Load represents the node which loads the byte from memory.
+ // ByteOffset is the offset of the byte in the value produced by the load.
+ LoadSDNode *Load;
+ unsigned ByteOffset;
+
+ ByteProvider() : Load(nullptr), ByteOffset(0) {}
+
+ static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
+ return ByteProvider(Load, ByteOffset);
+ }
+ static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
+
+ bool isConstantZero() const { return !Load; }
+ bool isMemory() const { return Load; }
+
+ bool operator==(const ByteProvider &Other) const {
+ return Other.Load == Load && Other.ByteOffset == ByteOffset;
+ }
+
+private:
+ ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
+ : Load(Load), ByteOffset(ByteOffset) {}
+};
+
+/// Recursively traverses the expression calculating the origin of the requested
+/// byte of the given value. Returns None if the provider can't be calculated.
+///
+/// For all the values except the root of the expression verifies that the value
+/// has exactly one use and if it's not true return None. This way if the origin
+/// of the byte is returned it's guaranteed that the values which contribute to
+/// the byte are not used outside of this expression.
+///
+/// Because the parts of the expression are not allowed to have more than one
+/// use this function iterates over trees, not DAGs. So it never visits the same
+/// node more than once.
+const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
+ unsigned Depth,
+ bool Root = false) {
+ // Typical i64 by i8 pattern requires recursion up to 8 calls depth
+ if (Depth == 10)
+ return None;
+
+ if (!Root && !Op.hasOneUse())
+ return None;
+
+ assert(Op.getValueType().isScalarInteger() && "can't handle other types");
+ unsigned BitWidth = Op.getValueSizeInBits();
+ if (BitWidth % 8 != 0)
+ return None;
+ unsigned ByteWidth = BitWidth / 8;
+ assert(Index < ByteWidth && "invalid index requested");
+ (void) ByteWidth;
+
+ switch (Op.getOpcode()) {
+ case ISD::OR: {
+ auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
+ if (!LHS)
+ return None;
+ auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
+ if (!RHS)
+ return None;
+
+ if (LHS->isConstantZero())
+ return RHS;
+ if (RHS->isConstantZero())
+ return LHS;
+ return None;
+ }
+ case ISD::SHL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return None;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+ if (BitShift % 8 != 0)
+ return None;
+ uint64_t ByteShift = BitShift / 8;
+
+ return Index < ByteShift
+ ? ByteProvider::getConstantZero()
+ : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
+ Depth + 1);
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return Op.getOpcode() == ISD::ZERO_EXTEND
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1);
+ }
+ case ISD::BSWAP:
+ return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
+ Depth + 1);
+ case ISD::LOAD: {
+ auto L = cast<LoadSDNode>(Op.getNode());
+ if (L->isVolatile() || L->isIndexed())
+ return None;
+
+ unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return None;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return L->getExtensionType() == ISD::ZEXTLOAD
+ ? Optional<ByteProvider>(ByteProvider::getConstantZero())
+ : None;
+ return ByteProvider::getMemory(L, Index);
+ }
+ }
+
+ return None;
+}
+} // namespace
+
+/// Match a pattern where a wide type scalar value is loaded by several narrow
+/// loads and combined by shifts and ors. Fold it into a single load or a load
+/// and a BSWAP if the targets supports it.
+///
+/// Assuming little endian target:
+/// i8 *a = ...
+/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
+/// =>
+/// i32 val = *((i32)a)
+///
+/// i8 *a = ...
+/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
+/// =>
+/// i32 val = BSWAP(*((i32)a))
+///
+/// TODO: This rule matches complex patterns with OR node roots and doesn't
+/// interact well with the worklist mechanism. When a part of the pattern is
+/// updated (e.g. one of the loads) its direct users are put into the worklist,
+/// but the root node of the pattern which triggers the load combine is not
+/// necessarily a direct user of the changed node. For example, once the address
+/// of t28 load is reassociated load combine won't be triggered:
+/// t25: i32 = add t4, Constant:i32<2>
+/// t26: i64 = sign_extend t25
+/// t27: i64 = add t2, t26
+/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
+/// t29: i32 = zero_extend t28
+/// t32: i32 = shl t29, Constant:i8<8>
+/// t33: i32 = or t23, t32
+/// As a possible fix visitLoad can check if the load can be a part of a load
+/// combine pattern and add corresponding OR roots to the worklist.
+SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR &&
+ "Can only match load combining against OR nodes");
+
+ // Handles simple types only
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+ unsigned ByteWidth = VT.getSizeInBits() / 8;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Before legalize we can introduce too wide illegal loads which will be later
+ // split into legal sized loads. This enables us to combine i64 load by i8
+ // patterns to a couple of i32 loads on 32 bit targets.
+ if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
+ return SDValue();
+
+ std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
+ unsigned BW, unsigned i) { return i; };
+ std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
+ unsigned BW, unsigned i) { return BW - i - 1; };
+
+ bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
+ auto MemoryByteOffset = [&] (ByteProvider P) {
+ assert(P.isMemory() && "Must be a memory byte provider");
+ unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
+ assert(LoadBitWidth % 8 == 0 &&
+ "can only analyze providers for individual bytes not bit");
+ unsigned LoadByteWidth = LoadBitWidth / 8;
+ return IsBigEndianTarget
+ ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
+ : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ };
+
+ Optional<BaseIndexOffset> Base;
+ SDValue Chain;
+
+ SmallSet<LoadSDNode *, 8> Loads;
+ Optional<ByteProvider> FirstByteProvider;
+ int64_t FirstOffset = INT64_MAX;
+
+ // Check if all the bytes of the OR we are looking at are loaded from the same
+ // base address. Collect bytes offsets from Base address in ByteOffsets.
+ SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
+ if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+ return SDValue();
+
+ LoadSDNode *L = P->Load;
+ assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+ "Must be enforced by calculateByteProvider");
+ assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
+
+ // All loads must share the same chain
+ SDValue LChain = L->getChain();
+ if (!Chain)
+ Chain = LChain;
+ else if (Chain != LChain)
+ return SDValue();
+
+ // Loads must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG);
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr))
+ return SDValue();
+
+ // Calculate the offset of the current byte from the base address
+ int64_t ByteOffsetFromBase = Ptr.Offset + MemoryByteOffset(*P);
+ ByteOffsets[i] = ByteOffsetFromBase;
+
+ // Remember the first byte load
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstByteProvider = P;
+ FirstOffset = ByteOffsetFromBase;
+ }
+
+ Loads.insert(L);
+ }
+ assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
+ "memory, so there must be at least one load which produces the value");
+ assert(Base && "Base address of the accessed memory location must be set");
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+
+ // Check if the bytes of the OR we are looking at match with either big or
+ // little endian value load
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < ByteWidth; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
+ if (!BigEndian && !LittleEndian)
+ return SDValue();
+ }
+ assert((BigEndian != LittleEndian) && "should be either or");
+ assert(FirstByteProvider && "must be set");
+
+ // Ensure that the first byte is loaded from zero offset of the first load.
+ // So the combined value can be loaded from the first load address.
+ if (MemoryByteOffset(*FirstByteProvider) != 0)
+ return SDValue();
+ LoadSDNode *FirstLoad = FirstByteProvider->Load;
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single load and bswap if needed.
+
+ // If the load needs byte swap check if the target supports it
+ bool NeedsBswap = IsBigEndianTarget != BigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // load and byte shuffling instead of several loads and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a load of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ VT, FirstLoad->getAddressSpace(),
+ FirstLoad->getAlignment(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ SDValue NewLoad =
+ DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
+ FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+
+ // Transfer chain users from old loads to the new load.
+ for (LoadSDNode *L : Loads)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
+
+ return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+}
+
SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4386,6 +4924,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (xor x, 0) -> x
if (isNullConstant(N1))
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// reassociate xor
if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
return RXOR;
@@ -4403,9 +4945,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
default:
llvm_unreachable("Unhandled SetCC Equivalent!");
case ISD::SETCC:
- return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
+ return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
case ISD::SELECT_CC:
- return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
+ return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
N0.getOperand(3), NotCC);
}
}
@@ -4470,6 +5012,17 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), DL, VT));
}
}
+
+ // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) &&
+ TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
+ if (C->getAPIntValue() == (OpSizeInBits - 1))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0));
+ }
+
// fold (xor x, x) -> 0
if (N0 == N1)
return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
@@ -4673,6 +5226,10 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl undef, x) -> 0
if (N0.isUndef())
return DAG.getConstant(0, SDLoc(N), VT);
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -4808,9 +5365,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
isConstantOrConstantVector(N1, /* No Opaques */ true)) {
- unsigned BitSize = VT.getScalarSizeInBits();
SDLoc DL(N);
- SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+ SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
}
@@ -4877,6 +5433,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
@@ -5024,6 +5584,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// if (srl x, c) is known to be zero, return 0
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -5074,9 +5638,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
- APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
SDValue Mask =
- DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+ DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
AddToWorklist(Mask.getNode());
return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
}
@@ -5202,6 +5765,22 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitABS(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // fold (abs c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ // fold (abs (abs x)) -> (abs x)
+ if (N0.getOpcode() == ISD::ABS)
+ return N0;
+ // fold (abs x) -> x iff not-negative
+ if (DAG.SignBitIsZero(N0))
+ return N0;
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -5217,7 +5796,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ // fold (bitreverse c1) -> c2
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
+ return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
@@ -5311,7 +5894,6 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
-// TODO: We should handle other cases of selecting between {-1,0,1} here.
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5320,6 +5902,67 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
EVT CondVT = Cond.getValueType();
SDLoc DL(N);
+ if (!VT.isInteger())
+ return SDValue();
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N1);
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C1 || !C2)
+ return SDValue();
+
+ // Only do this before legalization to avoid conflicting with target-specific
+ // transforms in the other direction (create a select from a zext/sext). There
+ // is also a target-independent combine here in DAGCombiner in the other
+ // direction for (select Cond, -1, 0) when the condition is not i1.
+ if (CondVT == MVT::i1 && !LegalOperations) {
+ if (C1->isNullValue() && C2->isOne()) {
+ // select Cond, 0, 1 --> zext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isNullValue() && C2->isAllOnesValue()) {
+ // select Cond, 0, -1 --> sext (!Cond)
+ SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
+ if (VT != MVT::i1)
+ NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
+ return NotCond;
+ }
+ if (C1->isOne() && C2->isNullValue()) {
+ // select Cond, 1, 0 --> zext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+ if (C1->isAllOnesValue() && C2->isNullValue()) {
+ // select Cond, -1, 0 --> sext (Cond)
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return Cond;
+ }
+
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add. Use a target hook because some targets may prefer to
+ // transform in the other direction.
+ if (TLI.convertSelectOfConstantsToMath()) {
+ if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1-1 --> add (zext Cond), C1-1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
+ // select Cond, C1, C1+1 --> add (sext Cond), C1+1
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
+ }
+ }
+
+ return SDValue();
+ }
+
// fold (select Cond, 0, 1) -> (xor Cond, 1)
// We can't do this reliably if integer based booleans have different contents
// to floating point based booleans. This is because we can't tell whether we
@@ -5329,15 +5972,14 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// undiscoverable (or not reasonably discoverable). For example, it could be
// in another basic block or it could require searching a complicated
// expression.
- if (VT.isInteger() &&
- (CondVT == MVT::i1 || (CondVT.isInteger() &&
- TLI.getBooleanContents(false, true) ==
- TargetLowering::ZeroOrOneBooleanContent &&
- TLI.getBooleanContents(false, false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
- isNullConstant(N1) && isOneConstant(N2)) {
- SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
- DAG.getConstant(1, DL, CondVT));
+ if (CondVT.isInteger() &&
+ TLI.getBooleanContents(false, true) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ C1->isNullValue() && C2->isOne()) {
+ SDValue NotCond =
+ DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
return NotCond;
return DAG.getZExtOrTrunc(NotCond, DL, VT);
@@ -5847,7 +6489,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
ISD::NON_EXTLOAD, MLD->isExpandingLoad());
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MLD->isExpandingLoad());
+ MLD->isExpandingLoad());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),
@@ -5921,34 +6563,6 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SimplifySelectOps(N, N1, N2))
return SDValue(N, 0); // Don't revisit N.
- // If the VSELECT result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (N0.getOpcode() == ISD::SETCC) {
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
- std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
- Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
-
- // Add the new VSELECT nodes to the work list in case they need to be split
- // again.
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
- }
-
// Fold (vselect (build_vector all_ones), N1, N2) -> N1
if (ISD::isBuildVectorAllOnes(N0.getNode()))
return N1;
@@ -6258,6 +6872,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
+ // Simplify TF.
+ AddToWorklist(NewChain.getNode());
+
CombineTo(N, NewValue);
// Replace uses of the original load (before extension)
@@ -6273,6 +6890,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
LegalOperations))
@@ -6281,8 +6899,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (sext x)) -> (sext x)
// fold (sext (aext x)) -> (sext x)
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
- N0.getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (sext (truncate (load x))) -> (sext (smaller load x))
@@ -6314,12 +6931,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
// bits, just sext from i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
} else {
// Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
// bits, just truncate to i32.
if (NumSignBits > OpBits-MidBits)
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
}
// fold (sext (truncate x)) -> (sextinreg x).
@@ -6329,7 +6946,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
else if (OpBits > DestBits)
Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
DAG.getValueType(N0.getValueType()));
}
}
@@ -6349,16 +6966,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
- ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6376,8 +6991,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
- LN0->getChain(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(),
LN0->getBasePtr(), MemVT,
LN0->getMemOperand());
CombineTo(N, ExtLoad);
@@ -6411,7 +7025,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
LN0->getMemOperand());
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
Mask = Mask.sext(VT.getSizeInBits());
- SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
@@ -6419,24 +7032,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0).getValueType(), ExtLoad);
CombineTo(N, And);
CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
- ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
- ISD::SIGN_EXTEND);
+ ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
}
if (N0.getOpcode() == ISD::SETCC) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ EVT N00VT = N0.getOperand(0).getValueType();
+
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(N0VT) ==
+ TLI.getBooleanContents(N00VT) ==
TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
- EVT SVT = getSetCCResultType(N0VT);
+ EVT SVT = getSetCCResultType(N00VT);
// We know that the # elements of the results is the same as the
// # elements of the compare (and the # elements of the compare result
@@ -6444,19 +7060,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == SVT.getSizeInBits())
- return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSetCC(DL, VT, N00, N01, CC);
// If the desired elements are smaller or larger than the source
- // elements we can use a matching integer vector type and then
- // truncate/sign extend
- EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
- if (SVT == MatchingVectorType) {
- SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
- N0.getOperand(0), N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+ // elements, we can use a matching integer vector type and then
+ // truncate/sign extend.
+ EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
+ if (SVT == MatchingVecType) {
+ SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
+ return DAG.getSExtOrTrunc(VsetCC, DL, VT);
}
}
@@ -6465,36 +7077,30 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// getBooleanContents().
unsigned SetCCWidth = N0.getScalarValueSizeInBits();
- SDLoc DL(N);
// To determine the "true" side of the select, we need to know the high bit
// of the value returned by the setcc if it evaluates to true.
// If the type of the setcc is i1, then the true case of the select is just
// sext(i1 1), that is, -1.
// If the type of the setcc is larger (say, i8) then the value of the high
- // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
+ // bit depends on getBooleanContents(), so ask TLI for a real "true" value
// of the appropriate width.
- SDValue ExtTrueVal =
- (SetCCWidth == 1)
- ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
- DL, VT)
- : TLI.getConstTrueVal(DAG, VT, DL);
-
- if (SDValue SCC = SimplifySelectCC(
- DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
- DAG.getConstant(0, DL, VT),
- cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
+ SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT)
+ : TLI.getConstTrueVal(DAG, VT, DL);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ if (SDValue SCC =
+ SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
if (!VT.isVector()) {
- EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
- if (!LegalOperations ||
- TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
- SDLoc DL(N);
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- SDValue SetCC =
- DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
- return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
- DAG.getConstant(0, DL, VT));
+ EVT SetCCVT = getSetCCResultType(N00VT);
+ // Don't do this transform for i1 because there's a select transform
+ // that would reverse it.
+ // TODO: We should not do this transform at all without a target hook
+ // because a sext is likely cheaper than a select?
+ if (SetCCVT.getScalarSizeInBits() != 1 &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
+ SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
+ return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
}
}
}
@@ -6502,7 +7108,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
DAG.SignBitIsZero(N0))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
return SDValue();
}
@@ -6677,13 +7283,14 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN0->getChain(),
LN0->getBasePtr(), N0.getValueType(),
LN0->getMemOperand());
- CombineTo(N, ExtLoad);
+
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
ISD::ZERO_EXTEND);
+ CombineTo(N, ExtLoad);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -6991,9 +7598,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitAssertZext(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT EVT = cast<VTSDNode>(N1)->getVT();
+
+ // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
+ if (N0.getOpcode() == ISD::AssertZext &&
+ EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ return N0;
+
+ return SDValue();
+}
+
/// See if the specified operand can be simplified with the knowledge that only
/// the bits specified by Mask are used. If so, return the simpler operand,
/// otherwise return a null SDValue.
+///
+/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
+/// simplify nodes with multiple uses more aggressively.)
SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
switch (V.getOpcode()) {
default: break;
@@ -7029,6 +7652,14 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
SimplifyLHS, V.getOperand(1));
}
+ break;
+ case ISD::AND: {
+ // X & -1 -> X (ignoring bits which aren't demanded).
+ ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
+ if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
+ return V.getOperand(0);
+ break;
+ }
}
return SDValue();
}
@@ -7244,6 +7875,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
}
+ // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
+ if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+ N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+ if (!LegalOperations ||
+ TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
+ return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+ }
+
// fold (sext_in_reg (zext x)) -> (sext x)
// iff we are extending the source sign bit.
if (N0.getOpcode() == ISD::ZERO_EXTEND) {
@@ -7254,7 +7895,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
- if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+ if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
// fold operands of sext_in_reg based on knowledge that the top bits are not
@@ -7496,6 +8137,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
VT.getSizeInBits())))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
+
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
@@ -7517,6 +8159,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
}
+
// fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
// where ... are all 'undef'.
if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
@@ -7582,6 +8225,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
+ // When the adde's carry is not used.
+ if (N0.getOpcode() == ISD::ADDE && N0.hasOneUse() &&
+ !N0.getNode()->hasAnyUseOfValue(1) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::ADDE, VT))) {
+ SDLoc SL(N);
+ auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
+ auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
+ return DAG.getNode(ISD::ADDE, SL, DAG.getVTList(VT, MVT::Glue),
+ X, Y, N0.getOperand(2));
+ }
+
return SDValue();
}
@@ -7672,6 +8327,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
// Only do this before legalize, since afterward the target may be depending
// on the bitconvert.
@@ -8040,6 +8698,11 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return DAG.getBuildVector(VT, DL, Ops);
}
+static bool isContractable(SDNode *N) {
+ SDNodeFlags F = cast<BinaryWithFlagsSDNode>(N)->Flags;
+ return F.hasAllowContract() || F.hasUnsafeAlgebra();
+}
+
/// Try to perform FMA combining on a given FADD node.
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
@@ -8048,24 +8711,27 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- ;
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8073,35 +8739,39 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
// If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
// prefer to fold the multiply with fewer uses.
- if (Aggressive && N0.getOpcode() == ISD::FMUL &&
- N1.getOpcode() == ISD::FMUL) {
+ if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
if (N0.getNode()->use_size() > N1.getNode()->use_size())
std::swap(N0, N1);
}
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse())) {
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N1.getOperand(0), N1.getOperand(1), N0);
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8113,7 +8783,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Note: Commutes FADD operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N10.getOperand(0)),
@@ -8154,7 +8824,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fadd (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&] (
@@ -8169,7 +8839,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
N020.getOperand(0), N020.getOperand(1),
N1);
@@ -8195,7 +8865,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
N002.getOperand(0), N002.getOperand(1),
N1);
@@ -8208,7 +8878,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
- if (N120.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N120))
return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
N120.getOperand(0), N120.getOperand(1),
N0);
@@ -8224,7 +8894,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N10 = N1.getOperand(0);
if (N10.getOpcode() == PreferredFusedOpcode) {
SDValue N102 = N10.getOperand(2);
- if (N102.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N102))
return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
N102.getOperand(0), N102.getOperand(1),
N0);
@@ -8244,23 +8914,26 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDLoc SL(N);
const TargetOptions &Options = DAG.getTarget().Options;
- bool AllowFusion =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
-
// Floating-point multiply-add with intermediate rounding.
bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+ TLI.isFMAFasterThanFMulAndFAdd(VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
return SDValue();
+ bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD);
+ // If the subtraction is not contractable, do not combine.
+ if (!AllowFusionGlobally && !isContractable(N))
+ return SDValue();
+
const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
- if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
+ if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
return SDValue();
// Always prefer FMAD to FMA for precision.
@@ -8268,9 +8941,16 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
bool LookThroughFPExt = TLI.isFPExtFree(VT);
+ // Is the node an FMUL and contractable either due to global flags or
+ // SDNodeFlags.
+ auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
+ if (N.getOpcode() != ISD::FMUL)
+ return false;
+ return AllowFusionGlobally || isContractable(N.getNode());
+ };
+
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
- if (N0.getOpcode() == ISD::FMUL &&
- (Aggressive || N0->hasOneUse())) {
+ if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(ISD::FNEG, SL, VT, N1));
@@ -8278,16 +8958,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FMUL &&
- (Aggressive || N1->hasOneUse()))
+ if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse()))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
N1.getOperand(0)),
N1.getOperand(1), N0);
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG &&
- N0.getOperand(0).getOpcode() == ISD::FMUL &&
+ if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
@@ -8297,12 +8975,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N00))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8316,7 +8994,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Note: Commutes FSUB operands.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N10))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8336,7 +9014,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FNEG) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8358,7 +9036,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == ISD::FP_EXTEND) {
SDValue N000 = N00.getOperand(0);
- if (N000.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N000)) {
return DAG.getNode(ISD::FNEG, SL, VT,
DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -8378,10 +9056,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma x, y (fma u, v, (fneg z)))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N0.getOpcode() == PreferredFusedOpcode &&
- N0.getOperand(2).getOpcode() == ISD::FMUL &&
- N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
+ if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
+ N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8395,9 +9072,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// -> (fma (fneg y), z, (fma (fneg u), v, x))
// FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
// are currently only supported on binary nodes.
- if (Options.UnsafeFPMath &&
- N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FMUL) {
+ if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode &&
+ isContractableFMUL(N1.getOperand(2))) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8410,14 +9086,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (AllowFusion && LookThroughFPExt) {
+ if (LookThroughFPExt) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (N0.getOpcode() == PreferredFusedOpcode) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
- if (N020.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N020))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8440,7 +9116,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getOpcode() == PreferredFusedOpcode) {
SDValue N002 = N00.getOperand(2);
- if (N002.getOpcode() == ISD::FMUL)
+ if (isContractableFMUL(N002))
return DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
N00.getOperand(0)),
@@ -8461,7 +9137,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (N1.getOpcode() == PreferredFusedOpcode &&
N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N1.getOperand(2).getOperand(0);
- if (N120.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N120)) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8488,7 +9164,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N100 = N1.getOperand(0).getOperand(0);
SDValue N101 = N1.getOperand(0).getOperand(1);
SDValue N102 = N1.getOperand(0).getOperand(2);
- if (N102.getOpcode() == ISD::FMUL) {
+ if (isContractableFMUL(N102)) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -8624,6 +9300,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
if (N0CFP && !N1CFP)
return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
@@ -8637,7 +9316,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
// fold (fadd A, 0) -> A
if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
if (N1C->isZero())
@@ -8771,13 +9450,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
// fold (fsub A, (fneg B)) -> (fadd A, B)
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return DAG.getNode(ISD::FADD, DL, VT, N0,
GetNegatedExpression(N1, DAG, LegalOperations), Flags);
// FIXME: Auto-upgrade the target/function-level option.
- if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+ if (Options.NoSignedZerosFPMath || N->getFlags()->hasNoSignedZeros()) {
// (fsub 0, B) -> -B
if (N0CFP && N0CFP->isZero()) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
@@ -8850,6 +9532,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(1.0))
return N0;
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
@@ -9104,6 +9789,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (N0CFP && N1CFP)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
if (Options.UnsafeFPMath) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
@@ -9207,6 +9895,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
&cast<BinaryWithFlagsSDNode>(N)->Flags);
+ if (SDValue NewSel = foldBinOpIntoSelect(N))
+ return NewSel;
+
return SDValue();
}
@@ -10361,7 +11052,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << "\n");
WorklistRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
-
+ AddUsersToWorklist(Chain.getNode());
if (N->use_empty())
deleteAndRecombine(N);
@@ -10414,7 +11105,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
if (PrevST->getBasePtr() == Ptr &&
PrevST->getValue().getValueType() == N->getValueType(0))
- return CombineTo(N, Chain.getOperand(1), Chain);
+ return CombineTo(N, PrevST->getOperand(1), Chain);
}
}
@@ -10432,14 +11123,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
}
}
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && LD->isUnindexed()) {
+ if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -11021,6 +11705,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
ArgChains);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
+ AddToWorklist(Chain.getNode());
return true;
}
@@ -11414,18 +12099,24 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
-SDValue DAGCombiner::getMergedConstantVectorStore(
- SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
- SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
- SmallVector<SDValue, 8> BuildVector;
+SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
+ unsigned NumStores) {
+ SmallVector<SDValue, 8> Chains;
+ SmallPtrSet<const SDNode *, 8> Visited;
+ SDLoc StoreDL(StoreNodes[0].MemNode);
+
+ for (unsigned i = 0; i < NumStores; ++i) {
+ Visited.insert(StoreNodes[i].MemNode);
+ }
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
- StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
- Chains.push_back(St->getChain());
- BuildVector.push_back(St->getValue());
+ // don't include nodes that are children
+ for (unsigned i = 0; i < NumStores; ++i) {
+ if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+ Chains.push_back(StoreNodes[i].MemNode->getChain());
}
- return DAG.getBuildVector(Ty, SL, BuildVector);
+ assert(Chains.size() > 0 && "Chain should have generated a chain");
+ return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -11436,22 +12127,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
return false;
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned LatestNodeUsed = 0;
-
- for (unsigned i=0; i < NumStores; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
- }
-
- SmallVector<SDValue, 8> Chains;
// The latest Node in the DAG.
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
SDLoc DL(StoreNodes[0].MemNode);
SDValue StoredVal;
@@ -11467,7 +12144,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
if (IsConstantSrc) {
- StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
+ SmallVector<SDValue, 8> BuildVector;
+ for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ SDValue Val = St->getValue();
+ if (MemVT.getScalarType().isInteger())
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
+ Val = DAG.getConstant(
+ (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
+ SDLoc(CFP), MemVT);
+ BuildVector.push_back(Val);
+ }
+ StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
@@ -11477,7 +12165,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if (Val.getValueType() != MemVT)
return false;
Ops.push_back(Val);
- Chains.push_back(St->getChain());
}
// Build the extracted vector elements back into a vector.
@@ -11497,7 +12184,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
for (unsigned i = 0; i < NumStores; ++i) {
unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
- Chains.push_back(St->getChain());
SDValue Val = St->getValue();
StoreInt <<= ElementSizeBytes * 8;
@@ -11515,54 +12201,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
- assert(!Chains.empty());
-
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA) {
- // Replace all merged stores with the new store.
- for (unsigned i = 0; i < NumStores; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumStores; ++i) {
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- // ReplaceAllUsesWith will replace all uses that existed when it was
- // called, but graph optimizations may cause new ones to appear. For
- // example, the case in pr14333 looks like
- //
- // St's chain -> St -> another store -> X
- //
- // And the only difference from St to the other store is the chain.
- // When we change it's chain to be St's chain they become identical,
- // get CSEed and the net result is that X is now a use of St.
- // Since we know that St is redundant, just iterate.
- while (!St->use_empty())
- DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
- }
+ // Replace all merged stores with the new store.
+ for (unsigned i = 0; i < NumStores; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
- StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
+ AddToWorklist(NewChain.getNode());
return true;
}
-void DAGCombiner::getStoreMergeAndAliasCandidates(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
- SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
+void DAGCombiner::getStoreMergeCandidates(
+ StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
+ EVT MemVT = St->getMemoryVT();
// We must have a base and an offset.
if (!BasePtr.Base.getNode())
@@ -11572,104 +12231,71 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
if (BasePtr.Base.isUndef())
return;
- // Walk up the chain and look for nodes with offsets from the same
- // base pointer. Stop when reaching an instruction with a different kind
- // or instruction which has a different base pointer.
- EVT MemVT = St->getMemoryVT();
- unsigned Seq = 0;
- StoreSDNode *Index = St;
-
-
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-
- if (UseAA) {
- // Look at other users of the same chain. Stores on the same chain do not
- // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
- // to be on the same chain, so don't bother looking at adjacent chains.
-
- SDValue Chain = St->getChain();
- for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
- if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
- if (I.getOperandNo() != 0)
- continue;
-
- if (OtherST->isVolatile() || OtherST->isIndexed())
- continue;
-
- if (OtherST->getMemoryVT() != MemVT)
- continue;
-
- BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
-
- if (Ptr.equalBaseIndex(BasePtr))
- StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
- }
- }
-
- return;
- }
-
- while (Index) {
- // If the chain has more than one use, then we can't reorder the mem ops.
- if (Index != St && !SDValue(Index, 0)->hasOneUse())
- break;
-
- // Find the base pointer and offset for this memory node.
- BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
-
- // Check that the base pointer is the same as the original one.
- if (!Ptr.equalBaseIndex(BasePtr))
- break;
-
- // The memory operands must not be volatile.
- if (Index->isVolatile() || Index->isIndexed())
- break;
-
- // No truncation.
- if (Index->isTruncatingStore())
- break;
-
- // The stored memory type must be the same.
- if (Index->getMemoryVT() != MemVT)
- break;
-
- // We do not allow under-aligned stores in order to prevent
- // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
- // be irrelevant here; what MATTERS is that we not move memory
- // operations that potentially overlap past each-other.
- if (Index->getAlignment() < MemVT.getStoreSize())
- break;
-
- // We found a potential memory operand to merge.
- StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
-
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
- SDNode *NextInChain = Index->getChain().getNode();
- while (1) {
- if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
- // We found a store node. Use it for the next iteration.
- Index = STn;
- break;
- } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
- if (Ldn->isVolatile()) {
- Index = nullptr;
- break;
+ bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+ bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
+ isa<ConstantFPSDNode>(St->getValue());
+ bool IsExtractVecSrc =
+ (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr) -> bool {
+ if (Other->isVolatile() || Other->isIndexed())
+ return false;
+ // We can merge constant floats to equivalent integers
+ if (Other->getMemoryVT() != MemVT)
+ if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsLoadSrc)
+ if (!isa<LoadSDNode>(Other->getValue()))
+ return false;
+ if (IsConstantSrc)
+ if (!(isa<ConstantSDNode>(Other->getValue()) ||
+ isa<ConstantFPSDNode>(Other->getValue())))
+ return false;
+ if (IsExtractVecSrc)
+ if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
+ return false;
+ Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
+ return (Ptr.equalBaseIndex(BasePtr));
+ };
+ // We looking for a root node which is an ancestor to all mergable
+ // stores. We search up through a load, to our root and then down
+ // through all children. For instance we will find Store{1,2,3} if
+ // St is Store1, Store2. or Store3 where the root is not a load
+ // which always true for nonvolatile ops. TODO: Expand
+ // the search to find all valid candidates through multiple layers of loads.
+ //
+ // Root
+ // |-------|-------|
+ // Load Load Store3
+ // | |
+ // Store1 Store2
+ //
+ // FIXME: We should be able to climb and
+ // descend TokenFactors to find candidates as well.
+
+ SDNode *RootNode = (St->getChain()).getNode();
+
+ if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
+ RootNode = Ldn->getChain().getNode();
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
+ for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
+ if (I2.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
+ BaseIndexOffset Ptr;
+ if (CandidateMatch(OtherST, Ptr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
+ }
+ } else
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ if (I.getOperandNo() == 0)
+ if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+ BaseIndexOffset Ptr;
+ if (CandidateMatch(OtherST, Ptr))
+ StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset));
}
-
- // Save the load node for later. Continue the scan.
- AliasLoadNodes.push_back(Ldn);
- NextInChain = Ldn->getChain().getNode();
- continue;
- } else {
- Index = nullptr;
- break;
- }
- }
- }
}
// We need to check that merging these stores does not cause a loop
@@ -11678,31 +12304,34 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
- SmallVectorImpl<MemOpLink> &StoreNodes) {
+ SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
SmallPtrSet<const SDNode *, 16> Visited;
SmallVector<const SDNode *, 8> Worklist;
// search ops of store candidates
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
SDNode *n = StoreNodes[i].MemNode;
// Potential loops may happen only through non-chain operands
for (unsigned j = 1; j < n->getNumOperands(); ++j)
Worklist.push_back(n->getOperand(j).getNode());
}
// search through DAG. We can stop early if we find a storenode
- for (unsigned i = 0; i < StoreNodes.size(); ++i) {
+ for (unsigned i = 0; i < NumStores; ++i) {
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
return false;
}
return true;
}
-bool DAGCombiner::MergeConsecutiveStores(
- StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
+bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (OptLevel == CodeGenOpt::None)
return false;
EVT MemVT = St->getMemoryVT();
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+
+ if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
+ return false;
+
bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
Attribute::NoImplicitFloat);
@@ -11731,145 +12360,137 @@ bool DAGCombiner::MergeConsecutiveStores(
if (MemVT.isVector() && IsLoadSrc)
return false;
- // Only look at ends of store sequences.
- SDValue Chain = SDValue(St, 0);
- if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
- return false;
-
- // Save the LoadSDNodes that we find in the chain.
- // We need to make sure that these nodes do not interfere with
- // any of the store nodes.
- SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-
- getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
+ SmallVector<MemOpLink, 8> StoreNodes;
+ // Find potential store merge candidates by searching through chain sub-DAG
+ getStoreMergeCandidates(St, StoreNodes);
// Check if there is anything to merge.
if (StoreNodes.size() < 2)
return false;
- // only do dependence check in AA case
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
- if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
- return false;
-
// Sort the memory operands according to their distance from the
- // base pointer. As a secondary criteria: make sure stores coming
- // later in the code come first in the list. This is important for
- // the non-UseAA case, because we're merging stores into the FINAL
- // store along a chain which potentially contains aliasing stores.
- // Thus, if there are multiple stores to the same address, the last
- // one can be considered for merging but not the others.
+ // base pointer.
std::sort(StoreNodes.begin(), StoreNodes.end(),
[](MemOpLink LHS, MemOpLink RHS) {
- return LHS.OffsetFromBase < RHS.OffsetFromBase ||
- (LHS.OffsetFromBase == RHS.OffsetFromBase &&
- LHS.SequenceNum < RHS.SequenceNum);
- });
+ return LHS.OffsetFromBase < RHS.OffsetFromBase;
+ });
// Scan the memory operations on the chain and find the first non-consecutive
// store memory address.
- unsigned LastConsecutiveStore = 0;
+ unsigned NumConsecutiveStores = 0;
int64_t StartAddress = StoreNodes[0].OffsetFromBase;
- for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
- // Check that the addresses are consecutive starting from the second
- // element in the list of stores.
- if (i > 0) {
- int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
- if (CurrAddress - StartAddress != (ElementSizeBytes * i))
- break;
- }
-
- // Check if this store interferes with any of the loads that we found.
- // If we find a load that alias with this store. Stop the sequence.
- if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
- return isAlias(Ldn, StoreNodes[i].MemNode);
- }))
+ // Check that the addresses are consecutive starting from the second
+ // element in the list of stores.
+ for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
+ int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+ if (CurrAddress - StartAddress != (ElementSizeBytes * i))
break;
-
- // Mark this node as useful.
- LastConsecutiveStore = i;
+ NumConsecutiveStores = i + 1;
}
+ if (NumConsecutiveStores < 2)
+ return false;
+
+ // Check that we can merge these candidates without causing a cycle
+ if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumConsecutiveStores))
+ return false;
+
+
// The node with the lowest store address.
- LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
- unsigned FirstStoreAS = FirstInChain->getAddressSpace();
- unsigned FirstStoreAlign = FirstInChain->getAlignment();
LLVMContext &Context = *DAG.getContext();
const DataLayout &DL = DAG.getDataLayout();
// Store the constants into memory as one consecutive store.
if (IsConstantSrc) {
- unsigned LastLegalType = 0;
- unsigned LastLegalVectorType = 0;
- bool NonZero = false;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue StoredVal = St->getValue();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
- NonZero |= !C->isNullValue();
- } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
- NonZero |= !C->getConstantFPValue()->isNullValue();
- } else {
- // Non-constant.
- break;
- }
+ bool RV = false;
+ while (NumConsecutiveStores > 1) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
+ unsigned LastLegalType = 0;
+ unsigned LastLegalVectorType = 0;
+ bool NonZero = false;
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
+ StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
+ SDValue StoredVal = ST->getValue();
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+ NonZero |= !C->isNullValue();
+ } else if (ConstantFPSDNode *C =
+ dyn_cast<ConstantFPSDNode>(StoredVal)) {
+ NonZero |= !C->getConstantFPValue()->isNullValue();
+ } else {
+ // Non-constant.
+ break;
+ }
- // Find a legal type for the constant store.
- unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
- EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
- bool IsFast;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast) {
- LastLegalType = i+1;
- // Or check whether a truncstore is legal.
- } else if (TLI.getTypeAction(Context, StoreTy) ==
- TargetLowering::TypePromoteInteger) {
- EVT LegalizedStoredValueTy =
- TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
- if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ // Find a legal type for the constant store.
+ unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
+ EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+ bool IsFast = false;
+ if (TLI.isTypeLegal(StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
IsFast) {
LastLegalType = i + 1;
+ // Or check whether a truncstore is legal.
+ } else if (TLI.getTypeAction(Context, StoreTy) ==
+ TargetLowering::TypePromoteInteger) {
+ EVT LegalizedStoredValueTy =
+ TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
+ if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
+ TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+ FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ IsFast) {
+ LastLegalType = i + 1;
+ }
}
- }
- // We only use vectors if the constant is known to be zero or the target
- // allows it and the function is not marked with the noimplicitfloat
- // attribute.
- if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
- FirstStoreAS)) &&
- !NoVectors) {
- // Find a legal type for the vector store.
- EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
- if (TLI.isTypeLegal(Ty) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) && IsFast)
- LastLegalVectorType = i + 1;
+ // We only use vectors if the constant is known to be zero or the target
+ // allows it and the function is not marked with the noimplicitfloat
+ // attribute.
+ if ((!NonZero ||
+ TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ !NoVectors) {
+ // Find a legal type for the vector store.
+ EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1);
+ if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
+ IsFast)
+ LastLegalVectorType = i + 1;
+ }
}
- }
- // Check if we found a legal integer type to store.
- if (LastLegalType == 0 && LastLegalVectorType == 0)
- return false;
+ // Check if we found a legal integer type that creates a meaningful merge.
+ if (LastLegalType < 2 && LastLegalVectorType < 2)
+ break;
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
- return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
- true, UseVector);
+ bool Merged = MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+ true, UseVector);
+ if (!Merged)
+ break;
+ // Remove merged stores for next iteration.
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
+ RV = true;
+ NumConsecutiveStores -= NumElem;
+ }
+ return RV;
}
// When extracting multiple vector elements, try to store them
// in one vector store rather than a sequence of scalar stores.
if (IsExtractVecSrc) {
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 0;
bool IsVec = MemVT.isVector();
- for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
unsigned StoreValOpcode = St->getValue().getOpcode();
// This restriction could be loosened.
@@ -11909,7 +12530,7 @@ bool DAGCombiner::MergeConsecutiveStores(
// Find acceptable loads. Loads need to have the same chain (token factor),
// must not be zext, volatile, indexed, and they must be consecutive.
BaseIndexOffset LdBasePtr;
- for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
+ for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
if (!Ld) break;
@@ -11942,7 +12563,7 @@ bool DAGCombiner::MergeConsecutiveStores(
}
// We found a potential memory operand to merge.
- LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
+ LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset));
}
if (LoadNodes.size() < 2)
@@ -11954,7 +12575,9 @@ bool DAGCombiner::MergeConsecutiveStores(
if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
St->getAlignment() >= RequiredAlignment)
return false;
-
+ LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
+ unsigned FirstStoreAS = FirstInChain->getAddressSpace();
+ unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
@@ -12023,31 +12646,12 @@ bool DAGCombiner::MergeConsecutiveStores(
// We add +1 here because the LastXXX variables refer to location while
// the NumElem refers to array/index size.
- unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
+ unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2)
return false;
- // Collect the chains from all merged stores.
- SmallVector<SDValue, 8> MergeStoreChains;
- MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
-
- // The latest Node in the DAG.
- unsigned LatestNodeUsed = 0;
- for (unsigned i=1; i<NumElem; ++i) {
- // Find a chain for the new wide-store operand. Notice that some
- // of the store nodes that we found may not be selected for inclusion
- // in the wide store. The chain we use needs to be the chain of the
- // latest store node which is *used* and replaced by the wide store.
- if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
- LatestNodeUsed = i;
-
- MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
- }
-
- LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
-
// Find if it is better to use vectors or integers to load and store
// to memory.
EVT JointMemOpVT;
@@ -12067,8 +12671,9 @@ bool DAGCombiner::MergeConsecutiveStores(
FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), FirstLoadAlign);
- SDValue NewStoreChain =
- DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+ SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+
+ AddToWorklist(NewStoreChain.getNode());
SDValue NewStore =
DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
@@ -12081,25 +12686,9 @@ bool DAGCombiner::MergeConsecutiveStores(
SDValue(NewLoad.getNode(), 1));
}
- if (UseAA) {
- // Replace the all stores with the new store.
- for (unsigned i = 0; i < NumElem; ++i)
- CombineTo(StoreNodes[i].MemNode, NewStore);
- } else {
- // Replace the last store with the new store.
- CombineTo(LatestOp, NewStore);
- // Erase all other stores.
- for (unsigned i = 0; i < NumElem; ++i) {
- // Remove all Store nodes.
- if (StoreNodes[i].MemNode == LatestOp)
- continue;
- StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
- deleteAndRecombine(St);
- }
- }
-
- StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
+ // Replace the all stores with the new store.
+ for (unsigned i = 0; i < NumElem; ++i)
+ CombineTo(StoreNodes[i].MemNode, NewStore);
return true;
}
@@ -12256,19 +12845,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
- bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
- : DAG.getSubtarget().useAA();
-#ifndef NDEBUG
- if (CombinerAAOnlyFunc.getNumOccurrences() &&
- CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
- UseAA = false;
-#endif
- if (UseAA && ST->isUnindexed()) {
- // FIXME: We should do this even without AA enabled. AA will just allow
- // FindBetterChain to work in more situations. The problem with this is that
- // any combine that expects memory operations to be on consecutive chains
- // first needs to be updated to look for users of the same chain.
-
+ if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
if (findBetterNeighborChains(ST)) {
@@ -12302,8 +12879,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SimplifyDemandedBits(
Value,
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits())))
+ ST->getMemoryVT().getScalarSizeInBits()))) {
+ // Re-visit the store if anything changed and the store hasn't been merged
+ // with another node (N is deleted) SimplifyDemandedBits will add Value's
+ // node back to the worklist if necessary, but we also need to re-visit
+ // the Store node itself.
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
return SDValue(N, 0);
+ }
}
// If this is a load followed by a store to the same location, then the store
@@ -12347,15 +12931,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
- SmallVector<MemOpLink, 8> StoreNodes;
- bool Changed = MergeConsecutiveStores(ST, StoreNodes);
+ bool Changed = MergeConsecutiveStores(ST);
if (!Changed) break;
-
- if (any_of(StoreNodes,
- [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
- // ST has been merged and no longer exists.
+ // Return N as merge only uses CombineTo and no worklist clean
+ // up is necessary.
+ if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
return SDValue(N, 0);
- }
}
}
@@ -12364,7 +12945,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Make sure to do this only after attempting to merge stores in order to
// avoid changing the types of some subset of stores due to visit order,
// preventing their merging.
- if (isa<ConstantFPSDNode>(Value)) {
+ if (isa<ConstantFPSDNode>(ST->getValue())) {
if (SDValue NewSt = replaceStoreOfFPConstant(ST))
return NewSt;
}
@@ -12493,10 +13074,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
- // If we can't generate a legal BUILD_VECTOR, exit
- if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return SDValue();
-
// Check that we know which element is being inserted
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
@@ -12523,6 +13100,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
}
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
@@ -12544,11 +13125,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// All the operands of BUILD_VECTOR must have the same type;
// we enforce that here.
EVT OpVT = Ops[0].getValueType();
- if (InVal.getValueType() != OpVT)
- InVal = OpVT.bitsGT(InVal.getValueType()) ?
- DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
- DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
- Ops[Elt] = InVal;
+ Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
}
// Return the new vector
@@ -12568,6 +13145,11 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
return SDValue();
+ ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
+ ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ return SDValue();
+
Align = NewAlign;
SDValue NewPtr = OriginalLoad->getBasePtr();
@@ -12639,6 +13221,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
EVT NVT = N->getValueType(0);
+ if (InVec.isUndef())
+ return DAG.getUNDEF(NVT);
+
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
@@ -13022,7 +13607,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
return DAG.getNode(Opcode, DL, VT, BV);
}
-SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
unsigned LeftIdx) {
@@ -13300,6 +13885,35 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ISD::allOperandsUndef(N))
return DAG.getUNDEF(VT);
+ // Check if we can express BUILD VECTOR via subvector extract.
+ if (!LegalTypes && (N->getNumOperands() > 1)) {
+ SDValue Op0 = N->getOperand(0);
+ auto checkElem = [&](SDValue Op) -> uint64_t {
+ if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
+ (Op0.getOperand(0) == Op.getOperand(0)))
+ if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return CNode->getZExtValue();
+ return -1;
+ };
+
+ int Offset = checkElem(Op0);
+ for (unsigned i = 0; i < N->getNumOperands(); ++i) {
+ if (Offset + i != checkElem(N->getOperand(i))) {
+ Offset = -1;
+ break;
+ }
+ }
+
+ if ((Offset == 0) &&
+ (Op0.getOperand(0).getValueType() == N->getValueType(0)))
+ return Op0.getOperand(0);
+ if ((Offset != -1) &&
+ ((Offset % N->getValueType(0).getVectorNumElements()) ==
+ 0)) // IDX must be multiple of output size.
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+
if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
return V;
@@ -13491,8 +14105,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
return SDValue();
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
- VT.getSizeInBits() / SclTy.getSizeInBits());
+ unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
+ if (VNTNumElms < 2)
+ return SDValue();
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
return SDValue();
@@ -13611,15 +14228,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
- if (V->getOpcode() == ISD::CONCAT_VECTORS) {
- // Combine:
- // (extract_subvec (concat V1, V2, ...), i)
- // Into:
- // Vi if possible
- // Only operand 0 is checked as 'concat' assumes all inputs of the same
- // type.
- if (V->getOperand(0).getValueType() != NVT)
- return SDValue();
+ // Extract from UNDEF is UNDEF.
+ if (V.isUndef())
+ return DAG.getUNDEF(NVT);
+
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same
+ // type.
+ if (V->getOpcode() == ISD::CONCAT_VECTORS &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ V->getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
assert((Idx % NumElems) == 0 &&
@@ -13633,19 +14254,16 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
- // being extracted are of same type, and are half size of larger vectors.
- EVT BigVT = V->getOperand(0).getValueType();
+ // being extracted are of same size.
EVT SmallVT = V->getOperand(1).getValueType();
- if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
+ if (!NVT.bitsEq(SmallVT))
return SDValue();
- // Only handle cases where both indexes are constants with the same type.
+ // Only handle cases where both indexes are constants.
ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
- if (InsIdx && ExtIdx &&
- InsIdx->getValueType(0).getSizeInBits() <= 64 &&
- ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+ if (InsIdx && ExtIdx) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
// Into:
@@ -13892,6 +14510,113 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
}
+// Match shuffles that can be converted to any_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
+// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
+SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool LegalOperations) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ ArrayRef<int> Mask = SVN->getMask();
+ SDValue N0 = SVN->getOperand(0);
+
+ // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
+ auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+ // power-of-2 extensions as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ if (!isAnyExtend(Scale))
+ continue;
+
+ EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+ return DAG.getBitcast(VT,
+ DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+ }
+
+ return SDValue();
+}
+
+// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
+// each source element of a large type into the lowest elements of a smaller
+// destination type. This is often generated during legalization.
+// If the source node itself was a '*_extend_vector_inreg' node then we should
+// then be able to remove it.
+SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
+ EVT VT = SVN->getValueType(0);
+ bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+ // TODO Add support for big-endian when we have a test case.
+ if (!VT.isInteger() || IsBigEndian)
+ return SDValue();
+
+ SDValue N0 = SVN->getOperand(0);
+ while (N0.getOpcode() == ISD::BITCAST)
+ N0 = N0.getOperand(0);
+
+ unsigned Opcode = N0.getOpcode();
+ if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
+ Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+
+ // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
+ // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
+ // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
+ auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
+ continue;
+ return false;
+ }
+ return true;
+ };
+
+ // At the moment we just handle the case where we've truncated back to the
+ // same size as before the extension.
+ // TODO: handle more extension/truncation cases as cases arise.
+ if (EltSizeInBits != ExtSrcSizeInBits)
+ return SDValue();
+
+ // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
+ // power-of-2 truncations as they are the most likely.
+ for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
+ if (isTruncate(Scale))
+ return DAG.getBitcast(VT, N00);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -13996,6 +14721,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
return S;
+ // Match shuffles that can be converted to any_vector_extend_in_reg.
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ return V;
+
+ // Combine "truncate_vector_in_reg" style shuffles.
+ if (SDValue V = combineTruncationShuffle(SVN, DAG))
+ return V;
+
if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
Level < AfterLegalizeVectorOps &&
(N1.isUndef() ||
@@ -14253,6 +14986,16 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ // If inserting an UNDEF, just return the original vector.
+ if (N1.isUndef())
+ return N0;
+
+ // If this is an insert of an extracted vector into an undef vector, we can
+ // just use the input to the extract.
+ if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
+
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
@@ -14262,26 +15005,39 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
N1, N2);
- if (N0.getValueType() != N1.getValueType())
+ if (!isa<ConstantSDNode>(N2))
return SDValue();
+ unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Canonicalize insert_subvector dag nodes.
+ // Example:
+ // (insert_subvector (insert_subvector A, Idx0), Idx1)
+ // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
+ if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
+ N1.getValueType() == N0.getOperand(1).getValueType() &&
+ isa<ConstantSDNode>(N0.getOperand(2))) {
+ unsigned OtherIdx = cast<ConstantSDNode>(N0.getOperand(2))->getZExtValue();
+ if (InsIdx < OtherIdx) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
+ N0.getOperand(0), N1, N2);
+ AddToWorklist(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
+ VT, NewOp, N0.getOperand(1), N0.getOperand(2));
+ }
+ }
+
// If the input vector is a concatenation, and the insert replaces
- // one of the halves, we can optimize into a single concat_vectors.
- if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
- N2.getOpcode() == ISD::Constant) {
- APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
+ // one of the pieces, we can optimize into a single concat_vectors.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
+ N0.getOperand(0).getValueType() == N1.getValueType()) {
+ unsigned Factor = N1.getValueType().getVectorNumElements();
- // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors Z, Y)
- if (InsIdx == 0)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
- N0.getOperand(1));
+ SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
+ Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
- // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
- // (concat_vectors X, Z)
- if (InsIdx == VT.getVectorNumElements() / 2)
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
- N1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
return SDValue();
@@ -15257,7 +16013,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
if (Base.getOpcode() == ISD::ADD) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
Base = Base.getOperand(0);
- Offset += C->getZExtValue();
+ Offset += C->getSExtValue();
}
}
@@ -15454,6 +16210,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
++Depth;
break;
+ case ISD::CopyFromReg:
+ // Forward past CopyFromReg.
+ Chains.push_back(Chain.getOperand(0));
+ ++Depth;
+ break;
+
default:
// For all other instructions we will just have to take what we can get.
Aliases.push_back(Chain);
@@ -15482,6 +16244,18 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
+// This function tries to collect a bunch of potentially interesting
+// nodes to improve the chains of, all at once. This might seem
+// redundant, as this function gets called when visiting every store
+// node, so why not let the work be done on each store as it's visited?
+//
+// I believe this is mainly important because MergeConsecutiveStores
+// is unable to deal with merging stores of different sizes, so unless
+// we improve the chains of all the potential candidates up-front
+// before running MergeConsecutiveStores, it might only see some of
+// the nodes that will eventually be candidates, and then not be able
+// to go from a partially-merged state to the desired final
+// fully-merged state.
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
@@ -15517,10 +16291,8 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
if (!Ptr.equalBaseIndex(BasePtr))
break;
- // Find the next memory operand in the chain. If the next operand in the
- // chain is a store then move up and continue the scan with the next
- // memory operand. If the next operand is a load save it and use alias
- // information to check if it interferes with anything.
+ // Walk up the chain to find the next store node, ignoring any
+ // intermediate loads. Any other kind of node will halt the loop.
SDNode *NextInChain = Index->getChain().getNode();
while (true) {
if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
@@ -15539,9 +16311,14 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
Index = nullptr;
break;
}
- }
+ } // end while
}
+ // At this point, ChainedStores lists all of the Store nodes
+ // reachable by iterating up through chain nodes matching the above
+ // conditions. For each such store identified, try to find an
+ // earlier chain to attach the store to which won't violate the
+ // required ordering.
bool MadeChangeToSt = false;
SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e2f33bb433ba..0584ab9f60d1 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,4 +1,4 @@
-//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -39,35 +39,76 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -78,21 +119,6 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
"target-specific selector");
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
-void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS,
- unsigned AttrIdx) {
- IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
- IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
- IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
- IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
- IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
- IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
- IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
- IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
- IsSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
- IsSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(AttrIdx);
-}
-
/// Set the current block to which generated machine instructions will be
/// appended, and clear the local CSE map.
void FastISel::startNewBlock() {
@@ -231,17 +257,13 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
// Try to emit the constant by using an integer constant with a cast.
const APFloat &Flt = CF->getValueAPF();
EVT IntVT = TLI.getPointerTy(DL);
-
- uint64_t x[2];
uint32_t IntBitWidth = IntVT.getSizeInBits();
+ APSInt SIntVal(IntBitWidth, /*isUnsigned=*/false);
bool isExact;
- (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
- APFloat::rmTowardZero, &isExact);
+ (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact);
if (isExact) {
- APInt IntVal(IntBitWidth, x);
-
unsigned IntegerReg =
- getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+ getRegForValue(ConstantInt::get(V->getContext(), SIntVal));
if (IntegerReg != 0)
Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg,
/*Kill=*/false);
@@ -646,7 +668,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::STACKMAP));
for (auto const &MO : Ops)
- MIB.addOperand(MO);
+ MIB.add(MO);
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
@@ -672,10 +694,8 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
Args.reserve(NumArgs);
// Populate the argument list.
- // Attributes for args start at offset 1, after the return attribute.
ImmutableCallSite CS(CI);
- for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
- ArgI != ArgE; ++ArgI) {
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) {
Value *V = CI->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
@@ -683,7 +703,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, AttrI);
+ Entry.setAttributes(&CS, ArgIdx);
Args.push_back(Entry);
}
@@ -826,7 +846,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
TII.get(TargetOpcode::PATCHPOINT));
for (auto &MO : Ops)
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI);
@@ -841,9 +861,9 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
return true;
}
-/// Returns an AttributeSet representing the attributes applied to the return
+/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
-static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
+static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
@@ -852,8 +872,8 @@ static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) {
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
- return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
- Attrs);
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
}
bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName,
@@ -885,9 +905,10 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol,
ArgListEntry Entry;
Entry.Val = V;
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI + 1);
+ Entry.setAttributes(&CS, ArgI);
Args.push_back(Entry);
}
+ TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args);
CallLoweringInfo CLI;
CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs);
@@ -1021,7 +1042,7 @@ bool FastISel::lowerCall(const CallInst *CI) {
Entry.Ty = V->getType();
// Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Entry.setAttributes(&CS, i - CS.arg_begin());
Args.push_back(Entry);
}
@@ -1149,7 +1170,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op)
+ .add(*Op)
.addImm(0)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
@@ -1362,7 +1383,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
if (const auto *Call = dyn_cast<CallInst>(I)) {
const Function *F = Call->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
// As a special case, don't handle calls to builtin library functions that
// may be translated directly to target instructions.
@@ -1665,7 +1686,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
SkipTargetIndependentISel(SkipTargetIndependentISel) {}
-FastISel::~FastISel() {}
+FastISel::~FastISel() = default;
bool FastISel::fastLowerArguments() { return false; }
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 4a9042cfb3f4..e85d1951e3ae 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -235,7 +235,6 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
- unsigned NumResults = CountResults(Node);
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
MIB.addReg(VRBase, RegState::Define);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index b0028252836a..fc7cd020fe2e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1192,8 +1192,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
- // we will make the index dependent on the load).
- if (SDNode::hasPredecessorHelper(ST, Visited, Worklist))
+ // we will make the index dependent on the load). Also, the store might be
+ // dependent on the extractelement and introduce a cycle when creating
+ // the load.
+ if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
+ ST->hasPredecessor(Op.getNode()))
continue;
StackPtr = ST->getBasePtr();
@@ -1909,8 +1912,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1935,9 +1938,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
InChain = TCChain;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setTailCall(isTailCall)
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1960,8 +1967,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
for (unsigned i = 0; i != NumOps; ++i) {
Entry.Node = Ops[i];
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1970,9 +1977,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1994,8 +2004,8 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2004,9 +2014,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2081,8 +2094,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
@@ -2090,8 +2103,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = FIPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2099,9 +2112,12 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2185,24 +2201,24 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
// Pass the argument.
Entry.Node = Node->getOperand(0);
Entry.Ty = RetTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
// Pass the return address of sin.
SDValue SinPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = SinPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
// Also pass the return address of the cos.
SDValue CosPtr = DAG.CreateStackTemporary(RetVT);
Entry.Node = CosPtr;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -2210,9 +2226,9 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args));
+ CLI.setDebugLoc(dl).setChain(InChain).setLibCallee(
+ TLI.getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), Callee,
+ std::move(Args));
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2529,12 +2545,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
for (unsigned J = 0; J != Sz; J += 8) {
- MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J));
- MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J));
- MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J));
- MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J));
- MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J));
- MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J));
+ MaskHi4 = MaskHi4 | (0xF0ull << J);
+ MaskLo4 = MaskLo4 | (0x0Full << J);
+ MaskHi2 = MaskHi2 | (0xCCull << J);
+ MaskLo2 = MaskLo2 | (0x33ull << J);
+ MaskHi1 = MaskHi1 | (0xAAull << J);
+ MaskLo1 = MaskLo1 | (0x55ull << J);
}
// BSWAP if the type is wider than a single byte.
@@ -3091,7 +3107,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getVectorIdxTy(DAG.getDataLayout()))));
}
- Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ Tmp1 = DAG.getBuildVector(VT, dl, Ops);
// We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
@@ -3790,8 +3806,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
VT.getScalarType(), Ex, Sh));
}
- SDValue Result =
- DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars);
+
+ SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
@@ -3830,10 +3846,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ .setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -3870,10 +3887,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Node->getOperand(0))
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ "abort", TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -4424,8 +4441,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
NewOps.push_back(Elt);
}
- SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
-
+ SDValue NewVec = DAG.getBuildVector(MidVT, SL, NewOps);
Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 72b56d84d945..6f2b1b94ce46 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -459,7 +459,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
- SoftenFloatResult(Op.getNode(), 0);
+ AddToWorklist(Op.getNode());
}
if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
@@ -472,8 +472,6 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
}
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
- if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
- Op = GetSoftenedFloat(Op);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
}
@@ -1054,15 +1052,15 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- assert(NVT.getSizeInBits() == integerPartWidth &&
+ assert(NVT.getSizeInBits() == 64 &&
"Do not know how to expand this float constant!");
APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
SDLoc dl(N);
Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
- APInt(integerPartWidth, C.getRawData()[1])),
+ APInt(64, C.getRawData()[1])),
dl, NVT);
Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
- APInt(integerPartWidth, C.getRawData()[0])),
+ APInt(64, C.getRawData()[0])),
dl, NVT);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index dc436ce04514..85068e890756 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -690,7 +690,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
case TargetLowering::TypePromoteInteger:
Res = GetPromotedInteger(InOp);
break;
- case TargetLowering::TypeSplitVector:
+ case TargetLowering::TypeSplitVector: {
EVT InVT = InOp.getValueType();
assert(InVT.isVector() && "Cannot split scalar types");
unsigned NumElts = InVT.getVectorNumElements();
@@ -709,6 +709,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
}
+ case TargetLowering::TypeWidenVector: {
+ SDValue WideInOp = GetWidenedVector(InOp);
+
+ // Truncate widened InOp.
+ unsigned NumElem = WideInOp.getValueType().getVectorNumElements();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getValueType(0).getScalarType(), NumElem);
+ SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, WideInOp);
+
+ // Zero extend so that the elements are of same type as those of NVT
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), NVT.getVectorElementType(),
+ NumElem);
+ SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc);
+
+ // Extract the low NVT subvector.
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx);
+ }
+ }
// Truncate to NVT instead of VT
return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
@@ -1089,6 +1109,10 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
SDValue Cond = N->getOperand(0);
EVT OpTy = N->getOperand(1).getValueType();
+ if (N->getOpcode() == ISD::VSELECT)
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
// Promote all the way up to the canonical SetCC type.
EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
Cond = PromoteTargetBoolean(Cond, OpVT);
@@ -2586,24 +2610,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Op;
Entry.Ty = ArgTy;
- Entry.isSExt = true;
- Entry.isZExt = false;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
Args.push_back(Entry);
}
// Also pass the address of the overflow check.
Entry.Node = Temp;
Entry.Ty = PtrTy->getPointerTo();
- Entry.isSExt = true;
- Entry.isZExt = false;
+ Entry.IsSExt = true;
+ Entry.IsZExt = false;
Args.push_back(Entry);
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
- .setSExtResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args))
+ .setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index cf19d75676cd..0a2b680e1c66 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -199,8 +199,7 @@ bool DAGTypeLegalizer::run() {
// non-leaves.
for (SDNode &Node : DAG.allnodes()) {
if (Node.getNumOperands() == 0) {
- Node.setNodeId(ReadyToProcess);
- Worklist.push_back(&Node);
+ AddToWorklist(&Node);
} else {
Node.setNodeId(Unanalyzed);
}
@@ -331,6 +330,12 @@ ScanOperands:
// to the worklist etc.
if (NeedsReanalyzing) {
assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+
+ // Remove any result values from SoftenedFloats as N will be revisited
+ // again.
+ for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i)
+ SoftenedFloats.erase(SDValue(N, i));
+
N->setNodeId(NewNode);
// Recompute the NodeId and correct processed operands, adding the node to
// the worklist if ready.
@@ -749,6 +754,8 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
// new uses of From due to CSE. If this happens, replace the new uses of
// From with To.
} while (!From.use_empty());
+
+ SoftenedFloats.erase(From);
}
void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
@@ -1077,8 +1084,8 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Node = Node->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
@@ -1087,9 +1094,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(SDLoc(Node))
+ .setChain(InChain)
+ .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
+ std::move(Args))
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ec55662d75c0..80c939700518 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -191,6 +191,11 @@ private:
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
SDValue &Lo, SDValue &Hi);
+ void AddToWorklist(SDNode *N) {
+ N->setNodeId(ReadyToProcess);
+ Worklist.push_back(N);
+ }
+
//===--------------------------------------------------------------------===//
// Integer Promotion Support: LegalizeIntegerTypes.cpp
//===--------------------------------------------------------------------===//
@@ -597,6 +602,7 @@ private:
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
+ SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
@@ -672,6 +678,7 @@ private:
SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
@@ -713,6 +720,7 @@ private:
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
+ SDValue WidenVSELECTAndMask(SDNode *N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
@@ -782,6 +790,13 @@ private:
/// By default, the vector will be widened with undefined values.
SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
+ /// Return a mask of vector type MaskVT to replace InMask. Also adjust
+ /// MaskVT to ToMaskVT if needed with vector extension or truncation.
+ SDValue convertMask(SDValue InMask, EVT MaskVT, EVT ToMaskVT);
+
+ /// Get the target mask VT, and widen if needed.
+ EVT getSETCCWidenedResultTy(SDValue SetCC);
+
//===--------------------------------------------------------------------===//
// Generic Splitting: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 3682c32460c6..c02b8960b36c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -512,8 +512,24 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the inputs.
+ SDValue Lo, Hi, LL, LH, RL, RH;
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+
+ return std::make_pair(Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
GetSplitOp(N->getOperand(1), LL, LH);
@@ -522,9 +538,16 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
SDValue Cond = N->getOperand(0);
CL = CH = Cond;
if (Cond.getValueType().isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC)
+ std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
- if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector)
+ else if (getTypeAction(Cond.getValueType()) ==
+ TargetLowering::TypeSplitVector)
GetSplitVector(Cond, CL, CH);
else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index d4fa20f35274..5f167f8de1cf 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,7 @@ class VectorLegalizer {
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
+ SDValue ExpandFSUB(SDValue Op);
SDValue ExpandBITREVERSE(SDValue Op);
SDValue ExpandCTLZ(SDValue Op);
SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
@@ -621,8 +622,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
- Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Op.getNode()->getValueType(0), Vals);
+ Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
} else {
SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
@@ -692,6 +692,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
return ExpandUINT_TO_FLOAT(Op);
case ISD::FNEG:
return ExpandFNEG(Op);
+ case ISD::FSUB:
+ return ExpandFSUB(Op);
case ISD::SETCC:
return UnrollVSETCC(Op);
case ISD::BITREVERSE:
@@ -720,8 +722,6 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
assert(VT.isVector() && !Mask.getValueType().isVector()
&& Op1.getValueType() == Op2.getValueType() && "Invalid type");
- unsigned NumElem = VT.getVectorNumElements();
-
// If we can't even use the basic vector operations of
// AND,OR,XOR, we will have to scalarize the op.
// Notice that the operation may be 'promoted' which means that it is
@@ -745,8 +745,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all-one or all zero.
- SmallVector<SDValue, 8> Ops(NumElem, Mask);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops);
+ Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -1025,6 +1024,18 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
+ // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
+ // we can defer this to operation legalization where it will be lowered as
+ // a+(-b).
+ EVT VT = Op.getValueType();
+ if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::FADD, VT))
+ return Op; // Defer to LegalizeDAG
+
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
EVT VT = Op.getValueType();
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
@@ -1102,7 +1113,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
(EltVT.getSizeInBits()), dl, EltVT),
DAG.getConstant(0, dl, EltVT));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6906f67ebacb..78fddb5ce8f5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -65,6 +65,11 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ R = ScalarizeVecRes_VecInregOp(N);
+ break;
case ISD::ANY_EXTEND:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -97,6 +102,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::ZERO_EXTEND:
+ case ISD::FCANONICALIZE:
R = ScalarizeVecRes_UnaryOp(N);
break;
@@ -257,6 +263,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
LHS, DAG.getValueType(ExtVT));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+
+ EVT OpVT = Op.getValueType();
+ EVT OpEltVT = OpVT.getVectorElementType();
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ Op = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ switch (N->getOpcode()) {
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
+ }
+
+ llvm_unreachable("Illegal extend_vector_inreg opcode");
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
// If the operand is wider than the vector element type then it is implicitly
// truncated. Make that explicit here.
@@ -486,7 +520,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
Ops[i] = GetScalarizedVector(N->getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
+ return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
}
/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
@@ -637,6 +671,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SINT_TO_FP:
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
+ case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -781,10 +816,10 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
- Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps);
+ Lo = DAG.getBuildVector(LoVT, dl, LoOps);
SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
- Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps);
+ Hi = DAG.getBuildVector(HiVT, dl, HiOps);
}
void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
@@ -928,7 +963,12 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
SDLoc dl(N);
SDValue InLo, InHi;
- GetSplitVector(N0, InLo, InHi);
+
+ if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N0, InLo, InHi);
+ else
+ std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
+
EVT InLoVT = InLo.getValueType();
unsigned InNumElements = InLoVT.getVectorNumElements();
@@ -1372,7 +1412,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
- Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps);
+ Output = DAG.getBuildVector(NewVT, dl, SVOps);
} else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined.
Output = DAG.getUNDEF(NewVT);
@@ -1466,8 +1506,15 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::FTRUNC:
+ case ISD::FCANONICALIZE:
Res = SplitVecOp_UnaryOp(N);
break;
+
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Res = SplitVecOp_ExtVecInRegOp(N);
+ break;
}
}
@@ -1615,7 +1662,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VecVT.getVectorNumElements());
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps);
+ Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
}
// Store the vector to the stack.
@@ -1629,6 +1676,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
MachinePointerInfo(), EltVT);
}
+SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
+ SDValue Lo, Hi;
+
+ // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
+ // splitting the result has the same effect as splitting the input operand.
+ SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
+}
+
SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
unsigned OpNo) {
EVT LoVT, HiVT;
@@ -1881,7 +1938,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
+ return DAG.getBuildVector(N->getValueType(0), DL, Elts);
}
SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
@@ -2323,6 +2380,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
+ if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
+ // If both input and result vector types are of same width, extend
+ // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
+ // accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::SIGN_EXTEND)
+ return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ if (Opcode == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ }
}
if (TLI.isTypeLegal(InWidenVT)) {
@@ -2375,7 +2441,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
@@ -2430,7 +2496,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
while (Ops.size() != WidenNumElts)
Ops.push_back(DAG.getUNDEF(WidenSVT));
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, DL, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
@@ -2593,7 +2659,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps);
+ return DAG.getBuildVector(WidenVT, dl, NewOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
@@ -2663,7 +2729,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -2704,7 +2770,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
@@ -2814,6 +2880,212 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
WidenVT, N->getOperand(0));
}
+// Return true if this is a node that could have two SETCCs as operands.
+static inline bool isLogicalMaskOp(unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return true;
+ }
+ return false;
+}
+
+// This is used just for the assert in convertMask(). Check that this either
+// a SETCC or a previously handled SETCC by convertMask().
+#ifndef NDEBUG
+static inline bool isSETCCorConvertedSETCC(SDValue N) {
+ if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
+ for (unsigned i = 1; i < N->getNumOperands(); ++i)
+ if (!N->getOperand(i)->isUndef())
+ return false;
+ N = N.getOperand(0);
+ }
+
+ if (N.getOpcode() == ISD::TRUNCATE)
+ N = N.getOperand(0);
+ else if (N.getOpcode() == ISD::SIGN_EXTEND)
+ N = N.getOperand(0);
+
+ return (N.getOpcode() == ISD::SETCC);
+}
+#endif
+
+// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
+// to ToMaskVT if needed with vector extension or truncation.
+SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
+ EVT ToMaskVT) {
+ LLVMContext &Ctx = *DAG.getContext();
+
+ // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
+ unsigned InMaskOpc = InMask->getOpcode();
+ assert((InMaskOpc == ISD::SETCC ||
+ (isLogicalMaskOp(InMaskOpc) &&
+ isSETCCorConvertedSETCC(InMask->getOperand(0)) &&
+ isSETCCorConvertedSETCC(InMask->getOperand(1)))) &&
+ "Unexpected mask argument.");
+
+ // Make a new Mask node, with a legal result VT.
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
+ Ops.push_back(InMask->getOperand(i));
+ SDValue Mask = DAG.getNode(InMaskOpc, SDLoc(InMask), MaskVT, Ops);
+
+ // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
+ // extend or truncate is needed.
+ unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
+ unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
+ if (MaskScalarBits < ToMaskScalBits) {
+ EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
+ } else if (MaskScalarBits > ToMaskScalBits) {
+ EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
+ MaskVT.getVectorNumElements());
+ Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
+ }
+
+ assert(Mask->getValueType(0).getScalarSizeInBits() ==
+ ToMaskVT.getScalarSizeInBits() &&
+ "Mask should have the right element size by now.");
+
+ // Adjust Mask to the right number of elements.
+ unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
+ if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy);
+ Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask,
+ ZeroIdx);
+ } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
+ unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
+ EVT SubVT = Mask->getValueType(0);
+ SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
+ SubConcatOps[0] = Mask;
+ for (unsigned i = 1; i < NumSubVecs; ++i)
+ SubConcatOps[i] = DAG.getUNDEF(SubVT);
+ Mask =
+ DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
+ }
+
+ assert((Mask->getValueType(0) == ToMaskVT) &&
+ "A mask of ToMaskVT should have been produced by now.");
+
+ return Mask;
+}
+
+// Get the target mask VT, and widen if needed.
+EVT DAGTypeLegalizer::getSETCCWidenedResultTy(SDValue SetCC) {
+ assert(SetCC->getOpcode() == ISD::SETCC);
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT MaskVT = getSetCCResultType(SetCC->getOperand(0).getValueType());
+ if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
+ MaskVT = TLI.getTypeToTransformTo(Ctx, MaskVT);
+ return MaskVT;
+}
+
+// This method tries to handle VSELECT and its mask by legalizing operands
+// (which may require widening) and if needed adjusting the mask vector type
+// to match that of the VSELECT. Without it, many cases end up with
+// scalarization of the SETCC, with many unnecessary instructions.
+SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Cond = N->getOperand(0);
+
+ if (N->getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+ return SDValue();
+
+ // If this is a splitted VSELECT that was previously already handled, do
+ // nothing.
+ if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+ return SDValue();
+
+ EVT VSelVT = N->getValueType(0);
+ // Only handle vector types which are a power of 2.
+ if (!isPowerOf2_64(VSelVT.getSizeInBits()))
+ return SDValue();
+
+ // Don't touch if this will be scalarized.
+ EVT FinalVT = VSelVT;
+ while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
+ FinalVT = EVT::getVectorVT(Ctx, FinalVT.getVectorElementType(),
+ FinalVT.getVectorNumElements() / 2);
+ if (FinalVT.getVectorNumElements() == 1)
+ return SDValue();
+
+ // If there is support for an i1 vector mask, don't touch.
+ if (Cond.getOpcode() == ISD::SETCC) {
+ EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+ while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
+ SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
+ EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
+ if (SetCCResVT.getScalarSizeInBits() == 1)
+ return SDValue();
+ }
+
+ // Get the VT and operands for VSELECT, and widen if needed.
+ SDValue VSelOp1 = N->getOperand(1);
+ SDValue VSelOp2 = N->getOperand(2);
+ if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) {
+ VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
+ VSelOp1 = GetWidenedVector(VSelOp1);
+ VSelOp2 = GetWidenedVector(VSelOp2);
+ }
+
+ // The mask of the VSELECT should have integer elements.
+ EVT ToMaskVT = VSelVT;
+ if (!ToMaskVT.getScalarType().isInteger())
+ ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
+
+ SDValue Mask;
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT MaskVT = getSETCCWidenedResultTy(Cond);
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else if (isLogicalMaskOp(Cond->getOpcode()) &&
+ Cond->getOperand(0).getOpcode() == ISD::SETCC &&
+ Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+ // Cond is (AND/OR/XOR (SETCC, SETCC))
+ SDValue SETCC0 = Cond->getOperand(0);
+ SDValue SETCC1 = Cond->getOperand(1);
+ EVT VT0 = getSETCCWidenedResultTy(SETCC0);
+ EVT VT1 = getSETCCWidenedResultTy(SETCC1);
+ unsigned ScalarBits0 = VT0.getScalarSizeInBits();
+ unsigned ScalarBits1 = VT1.getScalarSizeInBits();
+ unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
+ EVT MaskVT;
+ // If the two SETCCs have different VTs, either extend/truncate one of
+ // them to the other "towards" ToMaskVT, or truncate one and extend the
+ // other to ToMaskVT.
+ if (ScalarBits0 != ScalarBits1) {
+ EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
+ EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
+ if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
+ MaskVT = WideVT;
+ else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
+ MaskVT = NarrowVT;
+ else
+ MaskVT = ToMaskVT;
+ } else
+ // If the two SETCCs have the same VT, don't change it.
+ MaskVT = VT0;
+
+ // Make new SETCCs and logical nodes.
+ SETCC0 = convertMask(SETCC0, VT0, MaskVT);
+ SETCC1 = convertMask(SETCC1, VT1, MaskVT);
+ Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
+
+ // Convert the logical op for VSELECT if needed.
+ Mask = convertMask(Cond, MaskVT, ToMaskVT);
+ } else
+ return SDValue();
+
+ return DAG.getNode(ISD::VSELECT, SDLoc(N), VSelVT, Mask, VSelOp1, VSelOp2);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -2821,6 +3093,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
if (CondVT.isVector()) {
+ if (SDValue Res = WidenVSELECTAndMask(N))
+ return Res;
+
EVT CondEltVT = CondVT.getVectorElementType();
EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
CondEltVT, WidenNumElts);
@@ -3093,7 +3368,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
@@ -3144,7 +3419,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+ return DAG.getBuildVector(VT, dl, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -3565,10 +3840,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
for (; i != WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+ return DAG.getBuildVector(WidenVT, dl, Ops);
}
-
void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
@@ -3737,5 +4011,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
Ops[Idx] = FillVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+ return DAG.getBuildVector(NVT, dl, Ops);
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index ded8e68fcbce..a1d70ab6f036 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -57,10 +57,8 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
RegPressure.resize(NumRC);
std::fill(RegLimit.begin(), RegLimit.end(), 0);
std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end();
- I != E; ++I)
- RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
ParallelLiveRanges = 0;
HorizontalVerticalBalance = 0;
@@ -364,16 +362,11 @@ int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
return RegBalance;
if (RawPressure) {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+ for (const TargetRegisterClass *RC : TRI->regclasses())
RegBalance += rawRegPressureDelta(SU, RC->getID());
- }
}
else {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
if ((RegPressure[RC->getID()] +
rawRegPressureDelta(SU, RC->getID()) > 0) &&
(RegPressure[RC->getID()] +
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3549ccd9e345..e923e30e5037 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -422,11 +422,9 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
}
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
if (N->isMachineOpcode()) {
- if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
++NestLevel;
- } else if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameSetupOpcode()) {
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
if (NestLevel == 0)
return false;
--NestLevel;
@@ -480,12 +478,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
}
// Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
if (N->isMachineOpcode()) {
- if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
++NestLevel;
MaxNest = std::max(MaxNest, NestLevel);
- } else if (N->getMachineOpcode() ==
- (unsigned)TII->getCallFrameSetupOpcode()) {
+ } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
assert(NestLevel != 0);
--NestLevel;
if (NestLevel == 0)
@@ -550,7 +546,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
if (!LiveRegDefs[CallResource])
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
if (Node->isMachineOpcode() &&
- Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
unsigned NestLevel = 0;
unsigned MaxNest = 0;
SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
@@ -755,7 +751,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
@@ -826,7 +822,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
++NumLiveRegs;
LiveRegDefs[CallResource] = SU;
LiveRegGens[CallResource] = CallSeqEndForStart[SU];
@@ -839,7 +835,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
- SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
@@ -1305,7 +1301,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// If we're in the middle of scheduling a call, don't begin scheduling
// another call. Also, don't allow any physical registers to be live across
// the call.
- if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+ if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) ||
+ (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
// Check the special calling-sequence resource.
unsigned CallResource = TRI->getNumRegs();
if (LiveRegDefs[CallResource]) {
@@ -1659,9 +1656,8 @@ public:
RegPressure.resize(NumRC);
std::fill(RegLimit.begin(), RegLimit.end(), 0);
std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+ for (const TargetRegisterClass *RC : TRI->regclasses())
+ RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
}
}
@@ -1788,7 +1784,7 @@ public:
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void dump(ScheduleDAG *DAG) const override {
+ LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
// Emulate pop() without clobbering NodeQueueIds.
std::vector<SUnit*> DumpQueue = Queue;
SF DumpPicker = Picker;
@@ -1924,19 +1920,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
// Register Pressure Tracking
//===----------------------------------------------------------------------===//
-void RegReductionPQBase::dumpRegPressure() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
+LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
unsigned Id = RC->getID();
unsigned RP = RegPressure[Id];
if (!RP) continue;
DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
<< RegLimit[Id] << '\n');
}
-#endif
}
+#endif
bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
if (!TLI)
@@ -2092,7 +2086,7 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) {
RegPressure[RCId] -= Cost;
}
}
- dumpRegPressure();
+ DEBUG(dumpRegPressure());
}
void RegReductionPQBase::unscheduledNode(SUnit *SU) {
@@ -2172,7 +2166,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) {
}
}
- dumpRegPressure();
+ DEBUG(dumpRegPressure());
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3be622f8c179..3c8526ebb702 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -650,6 +650,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+ // Cannot completely remove virtual function even in release mode.
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
@@ -704,8 +705,8 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (!N->getHasDebugValue())
return;
- // Opportunistically insert immediate dbg_value uses, i.e. those with source
- // order number right after the N.
+ // Opportunistically insert immediate dbg_value uses, i.e. those with the same
+ // source order number as N.
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
@@ -713,7 +714,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
if (DVs[i]->isInvalidated())
continue;
unsigned DVOrder = DVs[i]->getOrder();
- if (!Order || DVOrder == ++Order) {
+ if (!Order || DVOrder == Order) {
MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
if (DbgMI) {
Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -835,8 +836,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
- Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
- VRBaseMap);
+ Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e225ba8703b7..003ea5030bfc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -289,28 +289,28 @@ static int isSignedOp(ISD::CondCode Opcode) {
}
ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool isInteger) {
- if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed integer setcc with an unsigned integer setcc.
return ISD::SETCC_INVALID;
unsigned Op = Op1 | Op2; // Combine all of the condition bits.
- // If the N and U bits get set then the resultant comparison DOES suddenly
- // care about orderedness, and is true when ordered.
+ // If the N and U bits get set, then the resultant comparison DOES suddenly
+ // care about orderedness, and it is true when ordered.
if (Op > ISD::SETTRUE2)
Op &= ~16; // Clear the U bit if the N bit is set.
// Canonicalize illegal integer setcc's.
- if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
+ if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT
Op = ISD::SETNE;
return ISD::CondCode(Op);
}
ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
- bool isInteger) {
- if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+ bool IsInteger) {
+ if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
// Cannot fold a signed setcc with an unsigned setcc.
return ISD::SETCC_INVALID;
@@ -318,7 +318,7 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
// Canonicalize illegal integer setcc's.
- if (isInteger) {
+ if (IsInteger) {
switch (Result) {
default: break;
case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT
@@ -871,11 +871,13 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
DbgInfo = new SDDbgInfo();
}
-void SelectionDAG::init(MachineFunction &mf) {
- MF = &mf;
+void SelectionDAG::init(MachineFunction &NewMF,
+ OptimizationRemarkEmitter &NewORE) {
+ MF = &NewMF;
+ ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
- Context = &mf.getFunction()->getContext();
+ Context = &MF->getFunction()->getContext();
}
SelectionDAG::~SelectionDAG() {
@@ -1994,8 +1996,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
/// us to only collect the known bits that are shared by the requested vector
/// elements.
-/// TODO: We only support DemandedElts on a few opcodes so far, the remainder
-/// should be added when they become necessary.
void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
APInt &KnownOne, const APInt &DemandedElts,
unsigned Depth) const {
@@ -2251,10 +2251,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero2.countLeadingOnes(),
BitWidth) - BitWidth;
- TrailZ = std::min(TrailZ, BitWidth);
- LeadZ = std::min(LeadZ, BitWidth);
- KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
- APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero.clearAllBits();
+ KnownZero.setLowBits(std::min(TrailZ, BitWidth));
+ KnownZero.setHighBits(std::min(LeadZ, BitWidth));
break;
}
case ISD::UDIV: {
@@ -2272,7 +2271,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
- KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
+ KnownZero.setHighBits(LeadZ);
break;
}
case ISD::SELECT:
@@ -2297,10 +2296,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
- case ISD::SADDO:
- case ISD::UADDO:
- case ISD::SSUBO:
- case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
@@ -2312,14 +2307,14 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBitsFrom(1);
break;
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrOneBooleanContent &&
BitWidth > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBitsFrom(1);
break;
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
@@ -2328,7 +2323,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero = KnownZero << *ShAmt;
KnownOne = KnownOne << *ShAmt;
// Low bits are known zero.
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue());
+ KnownZero.setLowBits(ShAmt->getZExtValue());
}
break;
case ISD::SRL:
@@ -2338,8 +2333,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZero = KnownZero.lshr(*ShAmt);
KnownOne = KnownOne.lshr(*ShAmt);
// High bits are known zero.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
- KnownZero |= HighBits;
+ KnownZero.setHighBits(ShAmt->getZExtValue());
}
break;
case ISD::SRA:
@@ -2350,13 +2344,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne = KnownOne.lshr(*ShAmt);
// If we know the value of the sign bit, then we know it is copied across
// the high bits by the shift amount.
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
APInt SignBit = APInt::getSignBit(BitWidth);
SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask.
if (KnownZero.intersects(SignBit)) {
- KnownZero |= HighBits; // New bits are known zero.
+ KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
} else if (KnownOne.intersects(SignBit)) {
- KnownOne |= HighBits; // New bits are known one.
+ KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
}
}
break;
@@ -2401,9 +2394,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP: {
- unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- KnownOne.clearAllBits();
+ KnownZero.setBitsFrom(Log2_32(BitWidth)+1);
break;
}
case ISD::LOAD: {
@@ -2412,26 +2403,39 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ KnownZero.setBitsFrom(MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
if (LD->getExtensionType() == ISD::NON_EXTLOAD)
computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
}
break;
}
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
+ EVT InVT = Op.getOperand(0).getValueType();
+ unsigned InBits = InVT.getScalarSizeInBits();
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,
+ DemandedElts.zext(InVT.getVectorNumElements()),
+ Depth + 1);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+ KnownZero.setBitsFrom(InBits);
+ break;
+ }
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
- APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
Depth + 1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
- KnownZero |= NewBits;
+ KnownZero.setBitsFrom(InBits);
break;
}
+ // TODO ISD::SIGN_EXTEND_VECTOR_INREG
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
@@ -2478,10 +2482,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
case ISD::FGETSIGN:
// All bits are zero except the low bit.
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBitsFrom(1);
break;
-
- case ISD::SUB: {
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+ case ISD::SUB:
+ case ISD::SUBC: {
if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
// We know that the top bits of C-X are clear if X contains less bits
// than C (i.e. no wrap-around can happen). For example, 20-X is
@@ -2499,13 +2514,40 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
if ((KnownZero2 & MaskV) == MaskV) {
unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
// Top bits known zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+ KnownZero.setHighBits(NLZ2);
}
}
}
- LLVM_FALLTHROUGH;
+
+ // If low bits are know to be zero in both operands, then we know they are
+ // going to be 0 in the result. Both addition and complement operations
+ // preserve the low zero bits.
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+ unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
+ if (KnownZeroLow == 0)
+ break;
+
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+ KnownZeroLow = std::min(KnownZeroLow,
+ KnownZero2.countTrailingOnes());
+ KnownZero.setBits(0, KnownZeroLow);
+ break;
}
+ case ISD::UADDO:
+ case ISD::SADDO:
+ if (Op.getResNo() == 1) {
+ // If we know the result of a setcc has the top bits zero, use this info.
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero.setBitsFrom(1);
+ break;
+ }
+ LLVM_FALLTHROUGH;
case ISD::ADD:
+ case ISD::ADDC:
case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
@@ -2526,19 +2568,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownZeroLow = std::min(KnownZeroLow,
KnownZero2.countTrailingOnes());
- if (Opcode == ISD::ADD) {
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
- if (KnownZeroHigh > 1)
- KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
+ if (Opcode == ISD::ADDE) {
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear.
+ // We then return to the caller that the low bit is unknown but that
+ // other bits are known zero.
+ if (KnownZeroLow >= 2)
+ KnownZero.setBits(1, KnownZeroLow);
break;
}
- // With ADDE, a carry bit may be added in, so we can only use this
- // information if we know (at least) that the low two bits are clear. We
- // then return to the caller that the low bit is unknown but that other bits
- // are known zero.
- if (KnownZeroLow >= 2) // ADDE
- KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow);
+ KnownZero.setLowBits(KnownZeroLow);
+ if (KnownZeroHigh > 1)
+ KnownZero.setHighBits(KnownZeroHigh - 1);
break;
}
case ISD::SREM:
@@ -2591,7 +2633,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
- KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
+ KnownZero.clearAllBits();
+ KnownZero.setHighBits(Leaders);
break;
}
case ISD::EXTRACT_ELEMENT: {
@@ -2673,6 +2716,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
}
break;
}
+ case ISD::BITREVERSE: {
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+ KnownZero = KnownZero2.reverseBits();
+ KnownOne = KnownOne2.reverseBits();
+ break;
+ }
case ISD::BSWAP: {
computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
Depth + 1);
@@ -2680,12 +2730,62 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
KnownOne = KnownOne2.byteSwap();
break;
}
- case ISD::SMIN:
- case ISD::SMAX:
- case ISD::UMIN:
+ case ISD::ABS: {
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+
+ // If the source's MSB is zero then we know the rest of the bits already.
+ if (KnownZero2[BitWidth - 1]) {
+ KnownZero = KnownZero2;
+ KnownOne = KnownOne2;
+ break;
+ }
+
+ // We only know that the absolute values's MSB will be zero iff there is
+ // a set bit that isn't the sign bit (otherwise it could be INT_MIN).
+ KnownOne2.clearBit(BitWidth - 1);
+ if (KnownOne2.getBoolValue()) {
+ KnownZero = APInt::getSignBit(BitWidth);
+ break;
+ }
+ break;
+ }
+ case ISD::UMIN: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+
+ // UMIN - we know that the result will have the maximum of the
+ // known zero leading bits of the inputs.
+ unsigned LeadZero = KnownZero.countLeadingOnes();
+ LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes());
+
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ KnownZero.setHighBits(LeadZero);
+ break;
+ }
case ISD::UMAX: {
computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
Depth + 1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+ Depth + 1);
+
+ // UMAX - we know that the result will have the maximum of the
+ // known one leading bits of the inputs.
+ unsigned LeadOne = KnownOne.countLeadingOnes();
+ LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes());
+
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ KnownOne.setHighBits(LeadOne);
+ break;
+ }
+ case ISD::SMIN:
+ case ISD::SMAX: {
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+ Depth + 1);
// If we don't know any bits, early out.
if (!KnownOne && !KnownZero)
break;
@@ -2699,7 +2799,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
// The low bits are known zero if the pointer is aligned.
- KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ KnownZero.setLowBits(Log2_32(Align));
break;
}
break;
@@ -2712,13 +2812,48 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
- TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+ TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, DemandedElts,
+ *this, Depth);
break;
}
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
+SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
+ SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ APInt N1Zero, N1One;
+ computeKnownBits(N1, N1Zero, N1One);
+ if (N1Zero.getBoolValue()) {
+ APInt N0Zero, N0One;
+ computeKnownBits(N0, N0Zero, N0One);
+
+ bool overflow;
+ (~N0Zero).uadd_ov(~N1Zero, overflow);
+ if (!overflow)
+ return OFK_Never;
+ }
+
+ // mulhi + 1 never overflow
+ if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
+ (~N1Zero & 0x01) == ~N1Zero)
+ return OFK_Never;
+
+ if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
+ APInt N0Zero, N0One;
+ computeKnownBits(N0, N0Zero, N0One);
+
+ if ((~N0Zero & 0x01) == ~N0Zero)
+ return OFK_Never;
+ }
+
+ return OFK_Sometime;
+}
+
bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarSizeInBits();
@@ -2745,7 +2880,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// Are all operands of a build vector constant powers of two?
if (Val.getOpcode() == ISD::BUILD_VECTOR)
- if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) {
+ if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
return false;
@@ -2764,6 +2899,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
EVT VT = Op.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return ComputeNumSignBits(Op, DemandedElts, Depth);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
unsigned Tmp, Tmp2;
@@ -2772,6 +2916,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
if (Depth == 6)
return 1; // Limit search depth.
+ if (!DemandedElts)
+ return 1; // No demanded elts, better to assume we don't know anything.
+
switch (Op.getOpcode()) {
default: break;
case ISD::AssertSext:
@@ -2786,7 +2933,28 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return Val.getNumSignBits();
}
+ case ISD::BUILD_VECTOR:
+ Tmp = VTBits;
+ for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
+ if (!DemandedElts[i])
+ continue;
+
+ SDValue SrcOp = Op.getOperand(i);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
+
+ // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+ if (SrcOp.getValueSizeInBits() != VTBits) {
+ assert(SrcOp.getValueSizeInBits() > VTBits &&
+ "Expected BUILD_VECTOR implicit truncation");
+ unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
+ Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ }
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ return Tmp;
+
case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
@@ -2799,7 +2967,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return std::max(Tmp, Tmp2);
case ISD::SRA:
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
APInt ShiftVal = C->getAPIntValue();
@@ -2887,6 +3055,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
}
break;
case ISD::ADD:
+ case ISD::ADDC:
// Add can have at most one carry bit. Thus we know that the output
// is, at worst, one more bit than the inputs.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2961,19 +3130,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
// result. Otherwise it gives either negative or > bitwidth result
return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
}
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+ unsigned NumElts = InVec.getValueType().getVectorNumElements();
+
+ ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+ // If we know the element index, split the demand between the
+ // source vector and the inserted element.
+ unsigned EltIdx = CEltNo->getZExtValue();
+
+ // If we demand the inserted element then get its sign bits.
+ Tmp = UINT_MAX;
+ if (DemandedElts[EltIdx])
+ Tmp = ComputeNumSignBits(InVal, Depth + 1);
+
+ // If we demand the source vector then get its sign bits, and determine
+ // the minimum.
+ APInt VectorElts = DemandedElts;
+ VectorElts.clearBit(EltIdx);
+ if (!!VectorElts) {
+ Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ } else {
+ // Unknown element index, so ignore DemandedElts and demand them all.
+ Tmp = ComputeNumSignBits(InVec, Depth + 1);
+ Tmp2 = ComputeNumSignBits(InVal, Depth + 1);
+ Tmp = std::min(Tmp, Tmp2);
+ }
+ assert(Tmp <= VTBits && "Failed to determine minimum sign bits");
+ return Tmp;
+ }
case ISD::EXTRACT_VECTOR_ELT: {
- // At the moment we keep this simple and skip tracking the specific
- // element. This way we get the lowest common denominator for all elements
- // of the vector.
- // TODO: get information for given vector element
+ SDValue InVec = Op.getOperand(0);
+ SDValue EltNo = Op.getOperand(1);
+ EVT VecVT = InVec.getValueType();
const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+ const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
// If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
// anything about sign bits. But if the sizes match we can derive knowledge
// about sign bits from the vector operand.
- if (BitWidth == EltBitWidth)
- return ComputeNumSignBits(Op.getOperand(0), Depth+1);
- break;
+ if (BitWidth != EltBitWidth)
+ break;
+
+ // If we know the element index, just demand that vector element, else for
+ // an unknown element index, ignore DemandedElts and demand them all.
+ APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+ if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+ DemandedSrcElts =
+ APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+ return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
}
case ISD::EXTRACT_SUBVECTOR:
return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3008,14 +3221,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
- unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
- if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+ unsigned NumBits =
+ TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
+ if (NumBits > 1)
+ FirstAnswer = std::max(FirstAnswer, NumBits);
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero, KnownOne;
- computeKnownBits(Op, KnownZero, KnownOne, Depth);
+ computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
APInt Mask;
if (KnownZero.isNegative()) { // sign bit is 0
@@ -3054,6 +3269,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
if (getTarget().Options.NoNaNsFPMath)
return true;
+ if (const BinaryWithFlagsSDNode *BF = dyn_cast<BinaryWithFlagsSDNode>(Op))
+ return BF->Flags.hasNoNaNs();
+
// If the value is a constant, we can obviously see if it is a NaN or not.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->getValueAPF().isNaN();
@@ -3206,6 +3424,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
break;
+ case ISD::ABS:
+ return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
+ case ISD::BITREVERSE:
+ return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(),
+ C->isOpaque());
case ISD::BSWAP:
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
@@ -3220,6 +3444,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
+ case ISD::FP16_TO_FP: {
+ bool Ignored;
+ APFloat FPV(APFloat::IEEEhalf(),
+ (Val.getBitWidth() == 16) ? Val : Val.trunc(16));
+
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)FPV.convert(EVTToAPFloatSemantics(VT),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstantFP(FPV, DL, VT);
+ }
}
}
@@ -3261,17 +3496,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
- integerPart x[2];
bool ignored;
- static_assert(integerPartWidth >= 64, "APFloat parts too small!");
+ APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT);
// FIXME need to be more flexible about rounding mode.
- APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
- Opcode==ISD::FP_TO_SINT,
- APFloat::rmTowardZero, &ignored);
- if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual
+ APFloat::opStatus s =
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored);
+ if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual
break;
- APInt api(VT.getSizeInBits(), x);
- return getConstant(api, DL, VT);
+ return getConstant(IntVal, DL, VT);
}
case ISD::BITCAST:
if (VT == MVT::i16 && C->getValueType(0) == MVT::f16)
@@ -3281,6 +3513,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT);
break;
+ case ISD::FP_TO_FP16: {
+ bool Ignored;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void)V.convert(APFloat::IEEEhalf(),
+ APFloat::rmNearestTiesToEven, &Ignored);
+ return getConstant(V.bitcastToAPInt(), DL, VT);
+ }
}
}
@@ -3303,6 +3543,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP:
+ case ISD::ABS:
+ case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
@@ -3420,6 +3662,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
+ case ISD::ABS:
+ assert(VT.isInteger() && VT == Operand.getValueType() &&
+ "Invalid ABS!");
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+ break;
case ISD::BSWAP:
assert(VT.isInteger() && VT == Operand.getValueType() &&
"Invalid BSWAP!");
@@ -3569,6 +3817,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
GA->getOffset() + uint64_t(Offset));
}
+bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
+ switch (Opcode) {
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: {
+ // If a divisor is zero/undef or any element of a divisor vector is
+ // zero/undef, the whole op is undef.
+ assert(Ops.size() == 2 && "Div/rem should have 2 operands");
+ SDValue Divisor = Ops[1];
+ if (Divisor.isUndef() || isNullConstant(Divisor))
+ return true;
+
+ return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
+ any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() || isNullConstant(V); });
+ // TODO: Handle signed overflow.
+ }
+ // TODO: Handle oversized shifts.
+ default:
+ return false;
+ }
+}
+
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
EVT VT, SDNode *Cst1,
SDNode *Cst2) {
@@ -3578,6 +3850,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+ return getUNDEF(VT);
+
// Handle the case of two scalars.
if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
@@ -3645,6 +3920,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
+ if (isUndef(Opcode, Ops))
+ return getUNDEF(VT);
+
// We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
if (!VT.isVector())
return SDValue();
@@ -3676,7 +3954,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// Find legal integer scalar type for constant promotion and
// ensure that its scalar size is at least as large as source.
EVT LegalSVT = VT.getScalarType();
- if (LegalSVT.isInteger()) {
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
if (LegalSVT.bitsLT(VT.getScalarType()))
return SDValue();
@@ -3910,35 +4188,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(EVT.bitsLE(VT) && "Not extending!");
if (EVT == VT) return N1; // Not actually extending
- auto SignExtendInReg = [&](APInt Val) {
+ auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) {
unsigned FromBits = EVT.getScalarSizeInBits();
Val <<= Val.getBitWidth() - FromBits;
Val = Val.ashr(Val.getBitWidth() - FromBits);
- return getConstant(Val, DL, VT.getScalarType());
+ return getConstant(Val, DL, ConstantVT);
};
if (N1C) {
const APInt &Val = N1C->getAPIntValue();
- return SignExtendInReg(Val);
+ return SignExtendInReg(Val, VT);
}
if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
SmallVector<SDValue, 8> Ops;
+ llvm::EVT OpVT = N1.getOperand(0).getValueType();
for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
SDValue Op = N1.getOperand(i);
if (Op.isUndef()) {
- Ops.push_back(getUNDEF(VT.getScalarType()));
+ Ops.push_back(getUNDEF(OpVT));
continue;
}
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- APInt Val = C->getAPIntValue();
- Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
- Ops.push_back(SignExtendInReg(Val));
- continue;
- }
- break;
+ ConstantSDNode *C = cast<ConstantSDNode>(Op);
+ APInt Val = C->getAPIntValue();
+ Ops.push_back(SignExtendInReg(Val, OpVT));
}
- if (Ops.size() == VT.getVectorNumElements())
- return getBuildVector(VT, DL, Ops);
+ return getBuildVector(VT, DL, Ops);
}
break;
}
@@ -4040,6 +4314,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (VT.getSimpleVT() == N1.getSimpleValueType())
return N1;
+ // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF.
+ if (N1.isUndef())
+ return getUNDEF(VT);
+
+ // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of
+ // the concat have the same type as the extract.
+ if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N1.getNumOperands() > 0 &&
+ VT == N1.getOperand(0).getValueType()) {
+ unsigned Factor = VT.getVectorNumElements();
+ return N1.getOperand(N2C->getZExtValue() / Factor);
+ }
+
// EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
// during shuffle legalization.
if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
@@ -4943,11 +5230,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -5004,11 +5291,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -5066,11 +5353,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
TargetLowering::CallLoweringInfo CLI(*this);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
- Dst.getValueType().getTypeForEVT(*getContext()),
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy(getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Dst.getValueType().getTypeForEVT(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy(getDataLayout())),
+ std::move(Args))
.setDiscardResult()
.setTailCall(isTailCall);
@@ -7049,6 +7336,21 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {
return Seen;
}
+/// Return true if the only users of N are contained in Nodes.
+bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
+ bool Seen = false;
+ for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *User = *I;
+ if (llvm::any_of(Nodes,
+ [&User](const SDNode *Node) { return User == Node; }))
+ Seen = true;
+ else
+ return false;
+ }
+
+ return Seen;
+}
+
/// isOperand - Return true if this node is an operand of N.
///
bool SDValue::isOperandOf(const SDNode *N) const {
@@ -7070,21 +7372,39 @@ bool SDNode::isOperandOf(const SDNode *N) const {
/// side-effecting instructions on any chain path. In practice, this looks
/// through token factors and non-volatile loads. In order to remain efficient,
/// this only looks a couple of nodes in, it does not do an exhaustive search.
+///
+/// Note that we only need to examine chains when we're searching for
+/// side-effects; SelectionDAG requires that all side-effects are represented
+/// by chains, even if another operand would force a specific ordering. This
+/// constraint is necessary to allow transformations like splitting loads.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
- unsigned Depth) const {
+ unsigned Depth) const {
if (*this == Dest) return true;
// Don't search too deeply, we just want to be able to see through
// TokenFactor's etc.
if (Depth == 0) return false;
- // If this is a token factor, all inputs to the TF happen in parallel. If any
- // of the operands of the TF does not reach dest, then we cannot do the xform.
+ // If this is a token factor, all inputs to the TF happen in parallel.
if (getOpcode() == ISD::TokenFactor) {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
- return false;
- return true;
+ // First, try a shallow search.
+ if (is_contained((*this)->ops(), Dest)) {
+ // We found the chain we want as an operand of this TokenFactor.
+ // Essentially, we reach the chain without side-effects if we could
+ // serialize the TokenFactor into a simple chain of operations with
+ // Dest as the last operation. This is automatically true if the
+ // chain has one use: there are no other ordering constraints.
+ // If the chain has more than one use, we give up: some other
+ // use of Dest might force a side-effect between Dest and the current
+ // node.
+ if (Dest.hasOneUse())
+ return true;
+ }
+ // Next, try a deep search: check whether every operand of the TokenFactor
+ // reaches Dest.
+ return all_of((*this)->ops(), [=](SDValue Op) {
+ return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
+ });
}
// Loads don't have side effects, look through them.
@@ -7102,11 +7422,6 @@ bool SDNode::hasPredecessor(const SDNode *N) const {
return hasPredecessorHelper(N, Visited, Worklist);
}
-uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
- assert(Num < NumOperands && "Invalid child # of SDNode!");
- return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
-}
-
const SDNodeFlags *SDNode::getFlags() const {
if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
return &FlagsNode->Flags;
@@ -7377,13 +7692,13 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
unsigned BitPos = j * EltBitSize;
if (OpVal.isUndef())
- SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+ SplatUndef.setBits(BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
- zextOrTrunc(sz) << BitPos;
+ SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize),
+ BitPos);
else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
- SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
- else
+ SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
+ else
return false;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 996c95bd5f07..8708f58f1e63 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -84,10 +84,6 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
-static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
- cl::desc("Enable fast-math-flags for DAG nodes"));
-
/// Minimum jump table density for normal functions.
static cl::opt<unsigned>
JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
@@ -634,10 +630,6 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
}
}
-/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-/// this value and returns the result as a ValueVT value. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
@@ -739,10 +731,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
}
-/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-/// specified value into the registers specified by this object. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V,
@@ -796,9 +784,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
-/// AddInlineAsmOperands - Add this value to the specified inlineasm node
-/// operand list. This adds the code marker and includes the number of
-/// values added into it.
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
@@ -850,12 +835,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
LPadToCallSiteMap.clear();
}
-/// clear - Clear out the current SelectionDAG and the associated
-/// state and prepare this SelectionDAGBuilder object to be used
-/// for a new block. This doesn't clear out information about
-/// additional blocks that are needed to complete switch lowering
-/// or PHI node updating; that information is cleared out as it is
-/// consumed.
void SelectionDAGBuilder::clear() {
NodeMap.clear();
UnusedArgNodeMap.clear();
@@ -867,21 +846,10 @@ void SelectionDAGBuilder::clear() {
StatepointLowering.clear();
}
-/// clearDanglingDebugInfo - Clear the dangling debug information
-/// map. This function is separated from the clear so that debug
-/// information that is dangling in a basic block can be properly
-/// resolved in a different basic block. This allows the
-/// SelectionDAG to resolve dangling debug information attached
-/// to PHI nodes.
void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
-/// getRoot - Return the current virtual root of the Selection DAG,
-/// flushing any PendingLoad items. This must be done before emitting
-/// a store or any other node that may need to be ordered after any
-/// prior load instructions.
-///
SDValue SelectionDAGBuilder::getRoot() {
if (PendingLoads.empty())
return DAG.getRoot();
@@ -901,10 +869,6 @@ SDValue SelectionDAGBuilder::getRoot() {
return Root;
}
-/// getControlRoot - Similar to getRoot, but instead of flushing all the
-/// PendingLoad items, flush all the PendingExports items. It is necessary
-/// to do this before emitting a terminator instruction.
-///
SDValue SelectionDAGBuilder::getControlRoot() {
SDValue Root = DAG.getRoot();
@@ -937,7 +901,9 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
- ++SDNodeOrder;
+ // Increase the SDNodeOrder if dealing with a non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(I))
+ ++SDNodeOrder;
CurInst = &I;
@@ -1403,16 +1369,16 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasAttribute(
+ AttributeList::ReturnIndex, Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1582,7 +1548,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
BranchProbability TProb,
- BranchProbability FProb) {
+ BranchProbability FProb,
+ bool InvertCond) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1596,10 +1563,14 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
- Condition = getICmpCondCode(IC->getPredicate());
+ ICmpInst::Predicate Pred =
+ InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ Condition = getICmpCondCode(Pred);
} else {
const FCmpInst *FC = cast<FCmpInst>(Cond);
- Condition = getFCmpCondCode(FC->getPredicate());
+ FCmpInst::Predicate Pred =
+ InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ Condition = getFCmpCondCode(Pred);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
}
@@ -1612,7 +1583,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
}
// Create a CaseBlock record representing this branch.
- CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
+ CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
}
@@ -1625,16 +1597,44 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc,
BranchProbability TProb,
- BranchProbability FProb) {
- // If this node is not part of the or/and tree, emit it as a branch.
+ BranchProbability FProb,
+ bool InvertCond) {
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
+ const Value *CondOp = BinaryOperator::getNotArgument(Cond);
+ if (InBlock(CondOp, CurBB->getBasicBlock())) {
+ FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+ }
+
const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ unsigned BOpc = 0;
+ if (BOp) {
+ BOpc = BOp->getOpcode();
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOpc != Opc || !BOp->hasOneUse() ||
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
- TProb, FProb);
+ TProb, FProb, InvertCond);
return;
}
@@ -1669,14 +1669,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -1702,14 +1702,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
}
}
@@ -1793,7 +1793,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
- getEdgeProbability(BrMBB, Succ1MBB));
+ getEdgeProbability(BrMBB, Succ1MBB),
+ /*InvertCond=*/false);
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -2027,7 +2028,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
Entry.Node = StackSlot;
Entry.Ty = FnTy->getParamType(0);
if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
- Entry.isInReg = true;
+ Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2581,13 +2582,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
Flags.setVectorReduction(vec_redux);
- if (EnableFMFInDAG) {
- Flags.setAllowReciprocal(FMF.allowReciprocal());
- Flags.setNoInfs(FMF.noInfs());
- Flags.setNoNaNs(FMF.noNaNs());
- Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
- }
+ Flags.setAllowReciprocal(FMF.allowReciprocal());
+ Flags.setAllowContract(FMF.allowContract());
+ Flags.setNoInfs(FMF.noInfs());
+ Flags.setNoNaNs(FMF.noNaNs());
+ Flags.setNoSignedZeros(FMF.noSignedZeros());
+ Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+
SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, &Flags);
setValue(&I, BinNodeValue);
@@ -2914,7 +2915,7 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
DestVT, N)); // convert types.
// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
// might fold any kind of constant expression to an integer constant and that
- // is not what we are looking for. Only regcognize a bitcast of a genuine
+ // is not what we are looking for. Only recognize a bitcast of a genuine
// constant integer as an opaque constant.
else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
@@ -3067,14 +3068,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (SrcNumElts > MaskNumElts) {
// Analyze the access pattern of the vector to see if we can extract
- // two subvectors and do the shuffle. The analysis is done by calculating
- // the range of elements the mask access on both vectors.
- int MinRange[2] = { static_cast<int>(SrcNumElts),
- static_cast<int>(SrcNumElts)};
- int MaxRange[2] = {-1, -1};
-
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ // two subvectors and do the shuffle.
+ int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
+ bool CanExtract = true;
+ for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;
@@ -3083,41 +3080,28 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
Input = 1;
Idx -= SrcNumElts;
}
- if (Idx > MaxRange[Input])
- MaxRange[Input] = Idx;
- if (Idx < MinRange[Input])
- MinRange[Input] = Idx;
- }
-
- // Check if the access is smaller than the vector size and can we find
- // a reasonable extract index.
- int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
- // Extract.
- int StartIdx[2]; // StartIdx to extract from
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
- RangeUse[Input] = 0; // Unused
- StartIdx[Input] = 0;
- continue;
- }
- // Find a good start index that is a multiple of the mask length. Then
- // see if the rest of the elements are in range.
- StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
- if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts <= SrcNumElts)
- RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ // If all the indices come from the same MaskNumElts sized portion of
+ // the sources we can use extract. Also make sure the extract wouldn't
+ // extract past the end of the source.
+ int NewStartIdx = alignDown(Idx, MaskNumElts);
+ if (NewStartIdx + MaskNumElts > SrcNumElts ||
+ (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
+ CanExtract = false;
+ // Make sure we always update StartIdx as we use it to track if all
+ // elements are undef.
+ StartIdx[Input] = NewStartIdx;
}
- if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
- if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
+ if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
- if (RangeUse[Input] == 0)
+ if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(
@@ -3128,16 +3112,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
// Calculate new mask.
- SmallVector<int, 8> MappedOps;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
- if (Idx >= 0) {
- if (Idx < (int)SrcNumElts)
- Idx -= StartIdx[0];
- else
- Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
- }
- MappedOps.push_back(Idx);
+ SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
+ for (int &Idx : MappedOps) {
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ else if (Idx >= 0)
+ Idx -= StartIdx[0];
}
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
@@ -3151,8 +3131,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
EVT EltVT = VT.getVectorElementType();
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<SDValue,8> Ops;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ for (int Idx : Mask) {
SDValue Res;
if (Idx < 0) {
@@ -3281,7 +3260,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// N = N + Offset
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
- // In an inbouds GEP with an offset that is nonnegative even when
+ // In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
@@ -4752,7 +4731,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
else
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op)
+ .add(*Op)
.addImm(Offset)
.addMetadata(Variable)
.addMetadata(Expr));
@@ -4764,7 +4743,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
DIExpression *Expr, int64_t Offset,
- DebugLoc dl,
+ const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
SDDbgValue *SDV;
auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
@@ -4794,9 +4773,9 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
# define setjmp_undefined_for_msvc
#endif
-/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
-/// we want to emit this as a call to a named external function, return the name
-/// otherwise lower it and return null.
+/// Lower the call to the specified intrinsic function. If we want to emit this
+/// as a call to a named external function, return the name. Otherwise, lower it
+/// and return null.
const char *
SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -4929,14 +4908,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl)
- .setChain(getRoot())
- .setCallee(TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(
- TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
DAG.setRoot(CallResult.second);
@@ -5301,6 +5278,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return nullptr;
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ visitConstrainedFPIntrinsic(I, Intrinsic);
+ return nullptr;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5537,7 +5521,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::trap: {
StringRef TrapFuncName =
I.getAttributes()
- .getAttribute(AttributeSet::FunctionIndex, "trap-func-name")
+ .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
.getValueAsString();
if (TrapFuncName.empty()) {
ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
@@ -5548,7 +5532,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TargetLowering::ArgListTy Args;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(),
TLI.getPointerTy(DAG.getDataLayout())),
@@ -5749,6 +5733,46 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
+void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ SDLoc sdl = getCurSDLoc();
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::experimental_constrained_fadd:
+ Opcode = ISD::STRICT_FADD;
+ break;
+ case Intrinsic::experimental_constrained_fsub:
+ Opcode = ISD::STRICT_FSUB;
+ break;
+ case Intrinsic::experimental_constrained_fmul:
+ Opcode = ISD::STRICT_FMUL;
+ break;
+ case Intrinsic::experimental_constrained_fdiv:
+ Opcode = ISD::STRICT_FDIV;
+ break;
+ case Intrinsic::experimental_constrained_frem:
+ Opcode = ISD::STRICT_FREM;
+ break;
+ }
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain = getRoot();
+ SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)) };
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+ ValueVTs.push_back(MVT::Other); // Out chain
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops);
+
+ assert(Result.getNode()->getNumValues() == 2);
+ SDValue OutChain = Result.getValue(1);
+ DAG.setRoot(OutChain);
+ SDValue FPResult = Result.getValue(0);
+ setValue(&I, FPResult);
+}
+
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
@@ -5827,7 +5851,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Type *RetTy = CS.getType();
TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
const Value *SwiftErrorVal = nullptr;
@@ -5843,6 +5866,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
+ TargetLowering::ArgListEntry Entry;
const Value *V = *i;
// Skip empty types
@@ -5852,11 +5876,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- // Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Entry.setAttributes(&CS, i - CS.arg_begin());
// Use swifterror virtual register as input to the call.
- if (Entry.isSwiftError && TLI.supportSwiftError()) {
+ if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
@@ -5869,7 +5892,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
- if (Entry.isSRet && isa<Instruction>(V))
+ if (Entry.IsSRet && isa<Instruction>(V))
isTailCall = false;
}
@@ -5912,8 +5935,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
}
}
-/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
+/// Return true if it only matters that the value is equal or not-equal to zero.
static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
for (const User *U : V->users()) {
if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -5928,13 +5950,17 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
}
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
- Type *LoadTy,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
// Cast pointer to the type we really want to load.
+ Type *LoadTy =
+ Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
+ if (LoadVT.isVector())
+ LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
+
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
@@ -5967,8 +5993,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
return LoadVal;
}
-/// processIntegerCallValue - Record the value for an instruction that
-/// produces an integer result, converting the type where necessary.
+/// Record the value for an instruction that produces an integer result,
+/// converting the type where necessary.
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
@@ -5981,20 +6007,13 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
setValue(&I, Value);
}
-/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a memcmp call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
- if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
- !I.getArgOperand(2)->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
-
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
@@ -6005,11 +6024,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
}
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
- std::pair<SDValue, SDValue> Res =
- TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
- getValue(LHS), getValue(RHS), getValue(Size),
- MachinePointerInfo(LHS),
- MachinePointerInfo(RHS));
+ std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
+ DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
+ getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
@@ -6018,88 +6035,79 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
- if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
- bool ActuallyDoIt = true;
- MVT LoadVT;
- Type *LoadTy;
- switch (CSize->getZExtValue()) {
- default:
- LoadVT = MVT::Other;
- LoadTy = nullptr;
- ActuallyDoIt = false;
- break;
- case 2:
- LoadVT = MVT::i16;
- LoadTy = Type::getInt16Ty(CSize->getContext());
- break;
- case 4:
- LoadVT = MVT::i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- break;
- case 8:
- LoadVT = MVT::i64;
- LoadTy = Type::getInt64Ty(CSize->getContext());
- break;
- /*
- case 16:
- LoadVT = MVT::v4i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- LoadTy = VectorType::get(LoadTy, 4);
- break;
- */
- }
-
- // This turns into unaligned loads. We only do this if the target natively
- // supports the MVT we'll be loading or if it is small enough (<= 4) that
- // we'll only produce a small number of byte loads.
+ if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I))
+ return false;
- // Require that we can find a legal MVT, and only do this if the target
- // supports unaligned loads of that type. Expanding into byte loads would
- // bloat the code.
+ // If the target has a fast compare for the given size, it will return a
+ // preferred load type for that size. Require that the load VT is legal and
+ // that the target supports unaligned loads of that type. Otherwise, return
+ // INVALID.
+ auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (ActuallyDoIt && CSize->getZExtValue() > 4) {
- unsigned DstAS = LHS->getType()->getPointerAddressSpace();
- unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ MVT LVT = TLI.hasFastEqualityCompare(NumBits);
+ if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
// TODO: Check alignment of src and dest ptrs.
- if (!TLI.isTypeLegal(LoadVT) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS))
- ActuallyDoIt = false;
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ if (!TLI.isTypeLegal(LVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
+ LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
}
- if (ActuallyDoIt) {
- SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
- SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+ return LVT;
+ };
- SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
- ISD::SETNE);
- processIntegerCallValue(I, Res, false);
- return true;
- }
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+ MVT LoadVT;
+ unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
+ switch (NumBitsToCompare) {
+ default:
+ return false;
+ case 16:
+ LoadVT = MVT::i16;
+ break;
+ case 32:
+ LoadVT = MVT::i32;
+ break;
+ case 64:
+ case 128:
+ case 256:
+ LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
+ break;
}
+ if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return false;
- return false;
+ SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
+ SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
+
+ // Bitcast to a wide integer type if the loads are vectors.
+ if (LoadVT.isVector()) {
+ EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
+ LoadL = DAG.getBitcast(CmpVT, LoadL);
+ LoadR = DAG.getBitcast(CmpVT, LoadR);
+ }
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
+ processIntegerCallValue(I, Cmp, false);
+ return true;
}
-/// visitMemChrCall -- See if we can lower a memchr call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a memchr call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
- // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *Src = I.getArgOperand(0);
const Value *Char = I.getArgOperand(1);
const Value *Length = I.getArgOperand(2);
- if (!Src->getType()->isPointerTy() ||
- !Char->getType()->isIntegerTy() ||
- !Length->getType()->isIntegerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6115,15 +6123,12 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
return false;
}
-///
-/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to
-/// to adjust the dst pointer by the size of the copied memory.
+/// See if we can lower a mempcpy call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
-
- // Verify argument count: void *mempcpy(void *, const void *, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
@@ -6158,19 +6163,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
return true;
}
-/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
-/// optimized form. If so, return true and lower it, otherwise return false
-/// and it will be lowered like a normal call.
+/// See if we can lower a strcpy call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
- // Verify that the prototype makes sense. char *strcpy(char *, char *)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6187,19 +6186,13 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
return false;
}
-/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a strcmp call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int strcmp(void*,void*)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6216,17 +6209,13 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
return false;
}
-/// visitStrLenCall -- See if we can lower a strlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strlen(char *)
- if (I.getNumArgOperands() != 1)
- return false;
-
const Value *Arg0 = I.getArgOperand(0);
- if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6241,19 +6230,13 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
return false;
}
-/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strnlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strnlen(char *, size_t)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6269,16 +6252,15 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
return false;
}
-/// visitUnaryFloatCall - If a call instruction is a unary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a unary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it, otherwise return
+/// false and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a unary floating-point call.
- if (I.getNumArgOperands() != 1 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp = getValue(I.getArgOperand(0));
@@ -6286,17 +6268,15 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
return true;
}
-/// visitBinaryFloatCall - If a call instruction is a binary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a binary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it. Otherwise return
+/// false, and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a binary floating-point call.
- if (I.getNumArgOperands() != 2 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- I.getType() != I.getArgOperand(1)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp0 = getValue(I.getArgOperand(0));
@@ -6336,20 +6316,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
// some reason.
- LibFunc::Func Func;
+ LibFunc Func;
if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
- LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- if (I.getNumArgOperands() == 2 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType() &&
- I.onlyReadsMemory()) {
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ // We already checked this call's prototype; verify it doesn't modify
+ // errno.
+ if (I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
@@ -6357,122 +6335,122 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
break;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
if (visitBinaryFloatCall(I, ISD::FMINNUM))
return;
break;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
if (visitBinaryFloatCall(I, ISD::FMAXNUM))
return;
break;
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
if (visitUnaryFloatCall(I, ISD::FSIN))
return;
break;
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
- case LibFunc::sqrt_finite:
- case LibFunc::sqrtf_finite:
- case LibFunc::sqrtl_finite:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite:
+ case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
if (visitUnaryFloatCall(I, ISD::FROUND))
return;
break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
break;
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
break;
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
if (visitMemCmpCall(I))
return;
break;
- case LibFunc::mempcpy:
+ case LibFunc_mempcpy:
if (visitMemPCpyCall(I))
return;
break;
- case LibFunc::memchr:
+ case LibFunc_memchr:
if (visitMemChrCall(I))
return;
break;
- case LibFunc::strcpy:
+ case LibFunc_strcpy:
if (visitStrCpyCall(I, false))
return;
break;
- case LibFunc::stpcpy:
+ case LibFunc_stpcpy:
if (visitStrCpyCall(I, true))
return;
break;
- case LibFunc::strcmp:
+ case LibFunc_strcmp:
if (visitStrCmpCall(I))
return;
break;
- case LibFunc::strlen:
+ case LibFunc_strlen:
if (visitStrLenCall(I))
return;
break;
- case LibFunc::strnlen:
+ case LibFunc_strnlen:
if (visitStrNLenCall(I))
return;
break;
@@ -7361,7 +7339,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
- for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
const Value *V = CS->getOperand(ArgI);
@@ -7370,7 +7348,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, AttrI);
+ Entry.setAttributes(&CS, ArgIdx);
Args.push_back(Entry);
}
@@ -7631,9 +7609,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
-/// Returns an AttributeSet representing the attributes applied to the return
+/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
-static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
@@ -7642,8 +7620,8 @@ static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
- return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
- Attrs);
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -7683,15 +7661,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isInReg = false;
- Entry.isSRet = true;
- Entry.isNest = false;
- Entry.isByVal = false;
- Entry.isReturned = false;
- Entry.isSwiftSelf = false;
- Entry.isSwiftError = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsInReg = false;
+ Entry.IsSRet = true;
+ Entry.IsNest = false;
+ Entry.IsByVal = false;
+ Entry.IsReturned = false;
+ Entry.IsSwiftSelf = false;
+ Entry.IsSwiftError = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
@@ -7724,7 +7702,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- if (Args[i].isSwiftError) {
+ if (Args[i].IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
@@ -7741,7 +7719,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
Type *FinalType = Args[i].Ty;
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
FinalType, CLI.CallConv, CLI.IsVarArg);
@@ -7754,11 +7732,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- if (Args[i].isZExt)
+ if (Args[i].IsZExt)
Flags.setZExt();
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
Flags.setSExt();
- if (Args[i].isInReg) {
+ if (Args[i].IsInReg) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (CLI.CallConv == CallingConv::X86_VectorCall &&
@@ -7771,15 +7749,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Set InReg Flag
Flags.setInReg();
}
- if (Args[i].isSRet)
+ if (Args[i].IsSRet)
Flags.setSRet();
- if (Args[i].isSwiftSelf)
+ if (Args[i].IsSwiftSelf)
Flags.setSwiftSelf();
- if (Args[i].isSwiftError)
+ if (Args[i].IsSwiftError)
Flags.setSwiftError();
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
Flags.setByVal();
- if (Args[i].isInAlloca) {
+ if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
@@ -7788,7 +7766,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
- if (Args[i].isByVal || Args[i].isInAlloca) {
+ if (Args[i].IsByVal || Args[i].IsInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
@@ -7801,7 +7779,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
FrameAlign = getByValTypeAlignment(ElementTy, DL);
Flags.setByValAlign(FrameAlign);
}
- if (Args[i].isNest)
+ if (Args[i].IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
@@ -7812,13 +7790,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
ExtendKind = ISD::SIGN_EXTEND;
- else if (Args[i].isZExt)
+ else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
// Conservatively only handle 'returned' on non-vectors for now
- if (Args[i].isReturned && !Op.getValueType().isVector()) {
+ if (Args[i].IsReturned && !Op.getValueType().isVector()) {
assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
"unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
@@ -7832,9 +7810,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// parameter extension method is not compatible with the return
// extension method
if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
- (ExtendKind != ISD::ANY_EXTEND &&
- CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
- Flags.setReturned();
+ (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
+ CLI.RetZExt == Args[i].IsZExt))
+ Flags.setReturned();
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
@@ -8010,6 +7988,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
+typedef DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>
+ ArgCopyElisionMapTy;
+
+/// Scan the entry block of the function in FuncInfo for arguments that look
+/// like copies into a local alloca. Record any copied arguments in
+/// ArgCopyElisionCandidates.
+static void
+findArgumentCopyElisionCandidates(const DataLayout &DL,
+ FunctionLoweringInfo *FuncInfo,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
+ // Record the state of every static alloca used in the entry block. Argument
+ // allocas are all used in the entry block, so we need approximately as many
+ // entries as we have arguments.
+ enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
+ SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
+ unsigned NumArgs = FuncInfo->Fn->arg_size();
+ StaticAllocas.reserve(NumArgs * 2);
+
+ auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
+ if (!V)
+ return nullptr;
+ V = V->stripPointerCasts();
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
+ return nullptr;
+ auto Iter = StaticAllocas.insert({AI, Unknown});
+ return &Iter.first->second;
+ };
+
+ // Look for stores of arguments to static allocas. Look through bitcasts and
+ // GEPs to handle type coercions, as long as the alloca is fully initialized
+ // by the store. Any non-store use of an alloca escapes it and any subsequent
+ // unanalyzed store might write it.
+ // FIXME: Handle structs initialized with multiple stores.
+ for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
+ // Look for stores, and handle non-store uses conservatively.
+ const auto *SI = dyn_cast<StoreInst>(&I);
+ if (!SI) {
+ // We will look through cast uses, so ignore them completely.
+ if (I.isCast())
+ continue;
+ // Ignore debug info intrinsics, they don't escape or store to allocas.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ // This is an unknown instruction. Assume it escapes or writes to all
+ // static alloca operands.
+ for (const Use &U : I.operands()) {
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
+ *Info = StaticAllocaInfo::Clobbered;
+ }
+ continue;
+ }
+
+ // If the stored value is a static alloca, mark it as escaped.
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
+ *Info = StaticAllocaInfo::Clobbered;
+
+ // Check if the destination is a static alloca.
+ const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
+ StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
+ if (!Info)
+ continue;
+ const AllocaInst *AI = cast<AllocaInst>(Dst);
+
+ // Skip allocas that have been initialized or clobbered.
+ if (*Info != StaticAllocaInfo::Unknown)
+ continue;
+
+ // Check if the stored value is an argument, and that this store fully
+ // initializes the alloca. Don't elide copies from the same argument twice.
+ const Value *Val = SI->getValueOperand()->stripPointerCasts();
+ const auto *Arg = dyn_cast<Argument>(Val);
+ if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ Arg->getType()->isEmptyTy() ||
+ DL.getTypeStoreSize(Arg->getType()) !=
+ DL.getTypeAllocSize(AI->getAllocatedType()) ||
+ ArgCopyElisionCandidates.count(Arg)) {
+ *Info = StaticAllocaInfo::Clobbered;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
+
+ // Mark this alloca and store for argument copy elision.
+ *Info = StaticAllocaInfo::Elidable;
+ ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
+
+ // Stop scanning if we've seen all arguments. This will happen early in -O0
+ // builds, which is useful, because -O0 builds have large entry blocks and
+ // many allocas.
+ if (ArgCopyElisionCandidates.size() == NumArgs)
+ break;
+ }
+}
+
+/// Try to elide argument copies from memory into a local alloca. Succeeds if
+/// ArgVal is a load from a suitable fixed stack object.
+static void tryToElideArgumentCopy(
+ FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
+ SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
+ SDValue ArgVal, bool &ArgHasUses) {
+ // Check if this is a load from a fixed stack object.
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+ if (!LNode)
+ return;
+ auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
+ if (!FINode)
+ return;
+
+ // Check that the fixed stack object is the right size and alignment.
+ // Look at the alignment that the user wrote on the alloca instead of looking
+ // at the stack object.
+ auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
+ assert(ArgCopyIter != ArgCopyElisionCandidates.end());
+ const AllocaInst *AI = ArgCopyIter->second.first;
+ int FixedIndex = FINode->getIndex();
+ int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+ int OldIndex = AllocaIndex;
+ MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+ if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
+ DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
+ return;
+ }
+ unsigned RequiredAlignment = AI->getAlignment();
+ if (!RequiredAlignment) {
+ RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+ AI->getAllocatedType());
+ }
+ if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+ DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << RequiredAlignment << " vs "
+ << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ return;
+ }
+
+ // Perform the elision. Delete the old stack object and replace its only use
+ // in the variable info map. Mark the stack object as mutable.
+ DEBUG({
+ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
+ << " Replacing frame index " << OldIndex << " with " << FixedIndex
+ << '\n';
+ });
+ MFI.RemoveStackObject(OldIndex);
+ MFI.setIsImmutableObjectIndex(FixedIndex, false);
+ AllocaIndex = FixedIndex;
+ ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
+ Chains.push_back(ArgVal.getValue(1));
+
+ // Avoid emitting code for the store implementing the copy.
+ const StoreInst *SI = ArgCopyIter->second.second;
+ ElidedArgCopyInstrs.insert(SI);
+
+ // Check for uses of the argument again so that we can avoid exporting ArgVal
+ // if it is't used by anything other than the store.
+ for (const Value *U : Arg.users()) {
+ if (U != SI) {
+ ArgHasUses = true;
+ break;
+ }
+ }
+}
+
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
@@ -8032,15 +8177,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Ins.push_back(RetArg);
}
+ // Look for stores of arguments to static allocas. Mark such arguments with a
+ // flag to ask the target to give us the memory location of that argument if
+ // available.
+ ArgCopyElisionMapTy ArgCopyElisionCandidates;
+ findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+
// Set up the incoming argument description vector.
- unsigned Idx = 1;
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; ++I, ++Idx) {
+ unsigned Idx = 0;
+ for (const Argument &Arg : F.args()) {
+ ++Idx;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
- bool isArgValueUsed = !I->use_empty();
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ bool isArgValueUsed = !Arg.use_empty();
unsigned PartBase = 0;
- Type *FinalType = I->getType();
+ Type *FinalType = Arg.getType();
if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
FinalType = cast<PointerType>(FinalType)->getElementType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
@@ -8060,7 +8211,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (F.getCallingConv() == CallingConv::X86_VectorCall &&
- isa<StructType>(I->getType())) {
+ isa<StructType>(Arg.getType())) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
@@ -8092,7 +8243,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
- PointerType *Ty = cast<PointerType>(I->getType());
+ PointerType *Ty = cast<PointerType>(Arg.getType());
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
@@ -8109,6 +8260,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
+ if (ArgCopyElisionCandidates.count(&Arg))
+ Flags.setCopyElisionCandidate();
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
@@ -8155,7 +8308,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Set up the argument values.
unsigned i = 0;
- Idx = 1;
+ Idx = 0;
if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
@@ -8181,25 +8334,39 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
++i;
}
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
- ++I, ++Idx) {
+ SmallVector<SDValue, 4> Chains;
+ DenseMap<int, int> ArgCopyElisionFrameIndexMap;
+ for (const Argument &Arg : F.args()) {
+ ++Idx;
SmallVector<SDValue, 4> ArgValues;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ continue;
+
+ bool ArgHasUses = !Arg.use_empty();
+
+ // Elide the copying store if the target loaded this argument from a
+ // suitable fixed stack object.
+ if (Ins[i].Flags.isCopyElisionCandidate()) {
+ tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
+ InVals[i], ArgHasUses);
+ }
// If this argument is unused then remember its value. It is used to generate
// debugging information.
bool isSwiftErrorArg =
TLI->supportSwiftError() &&
F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
- if (I->use_empty() && NumValues && !isSwiftErrorArg) {
- SDB->setUnusedArgValue(&*I, InVals[i]);
+ if (!ArgHasUses && !isSwiftErrorArg) {
+ SDB->setUnusedArgValue(&Arg, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
@@ -8210,16 +8377,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Even an apparant 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
- if (!I->use_empty() || isSwiftErrorArg) {
+ if (ArgHasUses || isSwiftErrorArg) {
Optional<ISD::NodeType> AssertOp;
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
AssertOp = ISD::AssertZext;
- ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
- NumParts, PartVT, VT,
- nullptr, AssertOp));
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+ PartVT, VT, nullptr, AssertOp));
}
i += NumParts;
@@ -8232,18 +8398,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
- SDB->setValue(&*I, Res);
+ SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Update the SwiftErrorVRegDefMap.
@@ -8263,18 +8429,36 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// uses with vregs.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- FuncInfo->ValueMap[&*I] = Reg;
+ FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
}
- if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
- FuncInfo->InitializeRegForValue(&*I);
- SDB->CopyToExportRegsIfNeeded(&*I);
+ if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&Arg);
+ SDB->CopyToExportRegsIfNeeded(&Arg);
}
}
+ if (!Chains.empty()) {
+ Chains.push_back(NewRoot);
+ NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ }
+
+ DAG.setRoot(NewRoot);
+
assert(i == InVals.size() && "Argument register count mismatch!");
+ // If any argument copy elisions occurred and we have debug info, update the
+ // stale frame indices used in the dbg.declare variable info table.
+ MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
+ if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+ if (I != ArgCopyElisionFrameIndexMap.end())
+ VI.Slot = I->second;
+ }
+ }
+
// Finally, if the target has anything special to do, allow it to do so.
EmitFunctionEntryCode();
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index abde8a89befc..c6acc09b6602 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -616,33 +616,27 @@ public:
void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
const TargetLibraryInfo *li);
- /// clear - Clear out the current SelectionDAG and the associated
- /// state and prepare this SelectionDAGBuilder object to be used
- /// for a new block. This doesn't clear out information about
- /// additional blocks that are needed to complete switch lowering
- /// or PHI node updating; that information is cleared out as it is
- /// consumed.
+ /// Clear out the current SelectionDAG and the associated state and prepare
+ /// this SelectionDAGBuilder object to be used for a new block. This doesn't
+ /// clear out information about additional blocks that are needed to complete
+ /// switch lowering or PHI node updating; that information is cleared out as
+ /// it is consumed.
void clear();
- /// clearDanglingDebugInfo - Clear the dangling debug information
- /// map. This function is separated from the clear so that debug
- /// information that is dangling in a basic block can be properly
- /// resolved in a different basic block. This allows the
- /// SelectionDAG to resolve dangling debug information attached
- /// to PHI nodes.
+ /// Clear the dangling debug information map. This function is separated from
+ /// the clear so that debug information that is dangling in a basic block can
+ /// be properly resolved in a different basic block. This allows the
+ /// SelectionDAG to resolve dangling debug information attached to PHI nodes.
void clearDanglingDebugInfo();
- /// getRoot - Return the current virtual root of the Selection DAG,
- /// flushing any PendingLoad items. This must be done before emitting
- /// a store or any other node that may need to be ordered after any
- /// prior load instructions.
- ///
+ /// Return the current virtual root of the Selection DAG, flushing any
+ /// PendingLoad items. This must be done before emitting a store or any other
+ /// node that may need to be ordered after any prior load instructions.
SDValue getRoot();
- /// getControlRoot - Similar to getRoot, but instead of flushing all the
- /// PendingLoad items, flush all the PendingExports items. It is necessary
- /// to do this before emitting a terminator instruction.
- ///
+ /// Similar to getRoot, but instead of flushing all the PendingLoad items,
+ /// flush all the PendingExports items. It is necessary to do this before
+ /// emitting a terminator instruction.
SDValue getControlRoot();
SDLoc getCurSDLoc() const {
@@ -688,12 +682,13 @@ public:
MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc, BranchProbability TW,
- BranchProbability FW);
+ BranchProbability FW, bool InvertCond);
void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
- BranchProbability TW, BranchProbability FW);
+ BranchProbability TW, BranchProbability FW,
+ bool InvertCond);
bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
@@ -900,6 +895,7 @@ private:
void visitInlineAsm(ImmutableCallSite CS);
const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+ void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitVAStart(const CallInst &I);
void visitVAArg(const VAArgInst &I);
@@ -944,8 +940,8 @@ private:
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
- DIExpression *Expr, int64_t Offset, DebugLoc dl,
- unsigned DbgSDNodeOrder);
+ DIExpression *Expr, int64_t Offset,
+ const DebugLoc &dl, unsigned DbgSDNodeOrder);
};
/// RegsForValue - This struct represents the registers (physical or virtual)
@@ -958,26 +954,23 @@ private:
/// type.
///
struct RegsForValue {
- /// ValueVTs - The value types of the values, which may not be legal, and
+ /// The value types of the values, which may not be legal, and
/// may need be promoted or synthesized from one or more registers.
- ///
SmallVector<EVT, 4> ValueVTs;
- /// RegVTs - The value types of the registers. This is the same size as
- /// ValueVTs and it records, for each value, what the type of the assigned
- /// register or registers are. (Individual values are never synthesized
- /// from more than one type of register.)
+ /// The value types of the registers. This is the same size as ValueVTs and it
+ /// records, for each value, what the type of the assigned register or
+ /// registers are. (Individual values are never synthesized from more than one
+ /// type of register.)
///
/// With virtual registers, the contents of RegVTs is redundant with TLI's
/// getRegisterType member function, however when with physical registers
/// it is necessary to have a separate record of the types.
- ///
SmallVector<MVT, 4> RegVTs;
- /// Regs - This list holds the registers assigned to the values.
+ /// This list holds the registers assigned to the values.
/// Each legal or promoted value requires one register, and each
/// expanded value requires multiple registers.
- ///
SmallVector<unsigned, 4> Regs;
RegsForValue();
@@ -987,33 +980,33 @@ struct RegsForValue {
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty);
- /// append - Add the specified values to this one.
+ /// Add the specified values to this one.
void append(const RegsForValue &RHS) {
ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
Regs.append(RHS.Regs.begin(), RHS.Regs.end());
}
- /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
- /// this value and returns the result as a ValueVTs value. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
+ /// Emit a series of CopyFromReg nodes that copies from this value and returns
+ /// the result as a ValueVTs value. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is NULL, no
+ /// flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V = nullptr) const;
- /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified
- /// value into the registers specified by this object. This uses Chain/Flag
- /// as the input and updates them for the output Chain/Flag. If the Flag
- /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used
- /// in printing better diagnostic messages on error.
+ /// Emit a series of CopyToReg nodes that copies the specified value into the
+ /// registers specified by this object. This uses Chain/Flag as the input and
+ /// updates them for the output Chain/Flag. If the Flag pointer is nullptr, no
+ /// flag is used. If V is not nullptr, then it is used in printing better
+ /// diagnostic messages on error.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
- /// AddInlineAsmOperands - Add this value to the specified inlineasm node
- /// operand list. This adds the code marker, matching input operand index
- /// (if applicable), and includes the number of values added into it.
+ /// Add this value to the specified inlineasm node operand list. This adds the
+ /// code marker, matching input operand index (if applicable), and includes
+ /// the number of values added into it.
void AddInlineAsmOperands(unsigned Kind, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 0faaad8a21b7..488c60a28ffb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -300,6 +300,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
// Bit manipulation
+ case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
case ISD::BSWAP: return "bswap";
case ISD::CTPOP: return "ctpop";
@@ -366,11 +367,13 @@ static Printable PrintNodeId(const SDNode &Node) {
});
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
-void SDNode::dump(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
}
+#endif
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
@@ -416,7 +419,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
else {
OS << "<APFloat(";
- CSDN->getValueAPF().bitcastToAPInt().dump();
+ CSDN->getValueAPF().bitcastToAPInt().print(OS, false);
OS << ")>";
}
} else if (const GlobalAddressSDNode *GADN =
@@ -566,6 +569,7 @@ static bool shouldPrintInline(const SDNode &Node) {
return Node.getNumOperands() == 0;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (const SDValue &Op : N->op_values()) {
if (shouldPrintInline(*Op.getNode()))
@@ -592,6 +596,7 @@ LLVM_DUMP_METHOD void SelectionDAG::dump() const {
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
dbgs() << "\n\n";
}
+#endif
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
OS << PrintNodeId(*this) << ": ";
@@ -618,6 +623,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
@@ -646,15 +652,16 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
DumpNodesr(OS, Op.getNode(), indent+2, G, once);
}
-void SDNode::dumpr() const {
+LLVM_DUMP_METHOD void SDNode::dumpr() const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, nullptr, once);
}
-void SDNode::dumpr(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dumpr(const SelectionDAG *G) const {
VisitedSDNodeSet once;
DumpNodesr(dbgs(), this, 0, G, once);
}
+#endif
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
@@ -688,14 +695,17 @@ void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
printrWithDepth(OS, G, 10);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
printrWithDepth(dbgs(), G, depth);
}
-void SDNode::dumprFull(const SelectionDAG *G) const {
+LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
dumprWithDepth(G, 10);
}
+#endif
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
printr(OS, G);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 64e6c221229b..e21204dbb966 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,40 +11,65 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -59,6 +84,13 @@
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -73,104 +105,6 @@ STATISTIC(NumEntryBlocks, "Number of entry blocks encountered");
STATISTIC(NumFastIselFailLowerArguments,
"Number of entry blocks where fast isel failed to lower arguments");
-#ifndef NDEBUG
-static cl::opt<bool>
-EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
- cl::desc("Enable extra verbose messages in the \"fast\" "
- "instruction selector"));
-
- // Terminators
-STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
-STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
-STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
-STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
-STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
-STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
-STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
-
- // Standard binary operators...
-STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
-STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
-STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
-STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
-STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
-STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
-STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
-STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
-STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
-STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
-STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
-STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
-
- // Logical operators...
-STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
-STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
-STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
-
- // Memory instructions...
-STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
-STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
-STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
-STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
-STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
-STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
-STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
-
- // Convert instructions...
-STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
-STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
-STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
-STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
-STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
-STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
-STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
-STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
-STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
-STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
-STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
-STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
-
- // Other instructions...
-STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
-STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
-STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
-STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
-STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
-STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
-STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
-STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
-STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
-STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
-STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
-STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
-STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
-STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
-STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
-
-// Intrinsic instructions...
-STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call");
-STATISTIC(NumFastIselFailSAddWithOverflow,
- "Fast isel fails on sadd.with.overflow");
-STATISTIC(NumFastIselFailUAddWithOverflow,
- "Fast isel fails on uadd.with.overflow");
-STATISTIC(NumFastIselFailSSubWithOverflow,
- "Fast isel fails on ssub.with.overflow");
-STATISTIC(NumFastIselFailUSubWithOverflow,
- "Fast isel fails on usub.with.overflow");
-STATISTIC(NumFastIselFailSMulWithOverflow,
- "Fast isel fails on smul.with.overflow");
-STATISTIC(NumFastIselFailUMulWithOverflow,
- "Fast isel fails on umul.with.overflow");
-STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress");
-STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call");
-STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call");
-STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call");
-#endif
-
-static cl::opt<bool>
-EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
- cl::desc("Enable verbose messages in the \"fast\" "
- "instruction selector"));
static cl::opt<int> EnableFastISelAbort(
"fast-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"fast\" instruction selection "
@@ -179,6 +113,11 @@ static cl::opt<int> EnableFastISelAbort(
"abort for argument lowering, and 3 will never fallback "
"to SelectionDAG."));
+static cl::opt<bool> EnableFastISelFallbackReport(
+ "fast-isel-report-on-fallback", cl::Hidden,
+ cl::desc("Emit a diagnostic when \"fast\" instruction selection "
+ "falls back to SelectionDAG."));
+
static cl::opt<bool>
UseMBPI("use-mbpi",
cl::desc("use Machine Branch Probability Info"),
@@ -238,7 +177,7 @@ MachinePassRegistry RegisterScheduler::Registry;
///
//===---------------------------------------------------------------------===//
static cl::opt<RegisterScheduler::FunctionPassCtor, false,
- RegisterPassParser<RegisterScheduler> >
+ RegisterPassParser<RegisterScheduler>>
ISHeuristic("pre-RA-sched",
cl::init(&createDefaultScheduler), cl::Hidden,
cl::desc("Instruction schedulers available (before register"
@@ -249,6 +188,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target",
createDefaultScheduler);
namespace llvm {
+
//===--------------------------------------------------------------------===//
/// \brief This class is used by SelectionDAGISel to temporarily override
/// the optimization level on a per-function basis.
@@ -318,6 +258,7 @@ namespace llvm {
"Unknown sched type!");
return createILPListDAGScheduler(IS, OptLevel);
}
+
} // end namespace llvm
// EmitInstrWithCustomInserter - This method should be implemented by targets
@@ -431,8 +372,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MachineFunctionProperties::Property::Selected))
return false;
// Do some sanity-checking on the command-line options.
- assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
- "-fast-isel-verbose requires -fast-isel");
assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort > 0 requires -fast-isel");
@@ -457,12 +396,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
+ ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
- CurDAG->init(*MF);
+ CurDAG->init(*MF, *ORE);
FuncInfo->set(Fn, *MF, CurDAG);
if (UseMBPI && OptLevel != CodeGenOpt::None)
@@ -502,6 +442,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TLI->initializeSplitCSR(EntryMBB);
SelectAllBasicBlocks(Fn);
+ if (FastISelFailed && EnableFastISelFallbackReport) {
+ DiagnosticInfoISelFallback DiagFallback(Fn);
+ Fn.getContext().diagnose(DiagFallback);
+ }
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
@@ -628,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
unsigned To = I->second;
// If To is also scheduled to be replaced, find what its ultimate
// replacement is.
- for (;;) {
+ while (true) {
DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
if (J == E) break;
To = J->second;
@@ -666,13 +610,30 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
+static void reportFastISelFailure(MachineFunction &MF,
+ OptimizationRemarkEmitter &ORE,
+ OptimizationRemarkMissed &R,
+ bool ShouldAbort) {
+ // Print the function name explicitly if we don't have a debug location (which
+ // makes the diagnostic less useful) or if we're going to emit a raw error.
+ if (!R.getLocation().isValid() || ShouldAbort)
+ R << (" (in function: " + MF.getName() + ")").str();
+
+ if (ShouldAbort)
+ report_fatal_error(R.getMsg());
+
+ ORE.emit(R);
+}
+
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
// Lower the instructions. If a call is emitted as a tail call, cease emitting
// nodes for this block.
- for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
- SDB->visit(*I);
+ for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+ if (!ElidedArgCopyInstrs.count(&*I))
+ SDB->visit(*I);
+ }
// Make sure the root of the DAG is up-to-date.
CurDAG->setRoot(SDB->getControlRoot());
@@ -731,6 +692,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
int BlockNumber = -1;
(void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
+
+ // Pre-type legalization allow creation of any node types.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
#ifndef NDEBUG
MatchFilterBB = (FilterDAGBasicBlockName.empty() ||
FilterDAGBasicBlockName ==
@@ -777,6 +742,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
<< " '" << BlockName << "'\n"; CurDAG->dump());
+ // Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
if (Changed) {
@@ -802,12 +768,18 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
+ DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
GroupDescription, TimePassesIsEnabled);
CurDAG->LegalizeTypes();
}
+ DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber
+ << " '" << BlockName << "'\n"; CurDAG->dump());
+
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -907,10 +879,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
namespace {
+
/// ISelUpdater - helper class to handle updates of the instruction selection
/// graph.
class ISelUpdater : public SelectionDAG::DAGUpdateListener {
SelectionDAG::allnodes_iterator &ISelPosition;
+
public:
ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp)
: SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {}
@@ -923,8 +897,53 @@ public:
++ISelPosition;
}
};
+
} // end anonymous namespace
+static bool isStrictFPOp(SDNode *Node, unsigned &NewOpc) {
+ unsigned OrigOpc = Node->getOpcode();
+ switch (OrigOpc) {
+ case ISD::STRICT_FADD: NewOpc = ISD::FADD; return true;
+ case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; return true;
+ case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; return true;
+ case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; return true;
+ case ISD::STRICT_FREM: NewOpc = ISD::FREM; return true;
+ default: return false;
+ }
+}
+
+SDNode* SelectionDAGISel::MutateStrictFPToFP(SDNode *Node, unsigned NewOpc) {
+ assert(((Node->getOpcode() == ISD::STRICT_FADD && NewOpc == ISD::FADD) ||
+ (Node->getOpcode() == ISD::STRICT_FSUB && NewOpc == ISD::FSUB) ||
+ (Node->getOpcode() == ISD::STRICT_FMUL && NewOpc == ISD::FMUL) ||
+ (Node->getOpcode() == ISD::STRICT_FDIV && NewOpc == ISD::FDIV) ||
+ (Node->getOpcode() == ISD::STRICT_FREM && NewOpc == ISD::FREM)) &&
+ "Unexpected StrictFP opcode!");
+
+ // We're taking this node out of the chain, so we need to re-link things.
+ SDValue InputChain = Node->getOperand(0);
+ SDValue OutputChain = SDValue(Node, 1);
+ CurDAG->ReplaceAllUsesOfValueWith(OutputChain, InputChain);
+
+ SDVTList VTs = CurDAG->getVTList(Node->getOperand(1).getValueType());
+ SDValue Ops[2] = { Node->getOperand(1), Node->getOperand(2) };
+ SDNode *Res = CurDAG->MorphNodeTo(Node, NewOpc, VTs, Ops);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ } else {
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+ CurDAG->RemoveDeadNode(Node);
+ }
+
+ return Res;
+}
+
void SelectionDAGISel::DoInstructionSelection() {
DEBUG(dbgs() << "===== Instruction selection begins: BB#"
<< FuncInfo->MBB->getNumber()
@@ -960,7 +979,23 @@ void SelectionDAGISel::DoInstructionSelection() {
if (Node->use_empty())
continue;
+ // When we are using non-default rounding modes or FP exception behavior
+ // FP operations are represented by StrictFP pseudo-operations. They
+ // need to be simplified here so that the target-specific instruction
+ // selectors know how to handle them.
+ //
+ // If the current node is a strict FP pseudo-op, the isStrictFPOp()
+ // function will provide the corresponding normal FP opcode to which the
+ // node should be mutated.
+ unsigned NormalFPOpc = ISD::UNDEF;
+ bool IsStrictFPOp = isStrictFPOp(Node, NormalFPOpc);
+ if (IsStrictFPOp)
+ Node = MutateStrictFPToFP(Node, NormalFPOpc);
+
Select(Node);
+
+ // FIXME: Add code here to attach an implicit def and use of
+ // target-specific FP environment registers.
}
CurDAG->setRoot(Dummy.getValue());
@@ -1046,116 +1081,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
-#ifndef NDEBUG
-// Collect per Instruction statistics for fast-isel misses. Only those
-// instructions that cause the bail are accounted for. It does not account for
-// instructions higher in the block. Thus, summing the per instructions stats
-// will not add up to what is reported by NumFastIselFailures.
-static void collectFailStats(const Instruction *I) {
- switch (I->getOpcode()) {
- default: assert (0 && "<Invalid operator> ");
-
- // Terminators
- case Instruction::Ret: NumFastIselFailRet++; return;
- case Instruction::Br: NumFastIselFailBr++; return;
- case Instruction::Switch: NumFastIselFailSwitch++; return;
- case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return;
- case Instruction::Invoke: NumFastIselFailInvoke++; return;
- case Instruction::Resume: NumFastIselFailResume++; return;
- case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
-
- // Standard binary operators...
- case Instruction::Add: NumFastIselFailAdd++; return;
- case Instruction::FAdd: NumFastIselFailFAdd++; return;
- case Instruction::Sub: NumFastIselFailSub++; return;
- case Instruction::FSub: NumFastIselFailFSub++; return;
- case Instruction::Mul: NumFastIselFailMul++; return;
- case Instruction::FMul: NumFastIselFailFMul++; return;
- case Instruction::UDiv: NumFastIselFailUDiv++; return;
- case Instruction::SDiv: NumFastIselFailSDiv++; return;
- case Instruction::FDiv: NumFastIselFailFDiv++; return;
- case Instruction::URem: NumFastIselFailURem++; return;
- case Instruction::SRem: NumFastIselFailSRem++; return;
- case Instruction::FRem: NumFastIselFailFRem++; return;
-
- // Logical operators...
- case Instruction::And: NumFastIselFailAnd++; return;
- case Instruction::Or: NumFastIselFailOr++; return;
- case Instruction::Xor: NumFastIselFailXor++; return;
-
- // Memory instructions...
- case Instruction::Alloca: NumFastIselFailAlloca++; return;
- case Instruction::Load: NumFastIselFailLoad++; return;
- case Instruction::Store: NumFastIselFailStore++; return;
- case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
- case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return;
- case Instruction::Fence: NumFastIselFailFence++; return;
- case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
-
- // Convert instructions...
- case Instruction::Trunc: NumFastIselFailTrunc++; return;
- case Instruction::ZExt: NumFastIselFailZExt++; return;
- case Instruction::SExt: NumFastIselFailSExt++; return;
- case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return;
- case Instruction::FPExt: NumFastIselFailFPExt++; return;
- case Instruction::FPToUI: NumFastIselFailFPToUI++; return;
- case Instruction::FPToSI: NumFastIselFailFPToSI++; return;
- case Instruction::UIToFP: NumFastIselFailUIToFP++; return;
- case Instruction::SIToFP: NumFastIselFailSIToFP++; return;
- case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
- case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
- case Instruction::BitCast: NumFastIselFailBitCast++; return;
-
- // Other instructions...
- case Instruction::ICmp: NumFastIselFailICmp++; return;
- case Instruction::FCmp: NumFastIselFailFCmp++; return;
- case Instruction::PHI: NumFastIselFailPHI++; return;
- case Instruction::Select: NumFastIselFailSelect++; return;
- case Instruction::Call: {
- if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- NumFastIselFailIntrinsicCall++; return;
- case Intrinsic::sadd_with_overflow:
- NumFastIselFailSAddWithOverflow++; return;
- case Intrinsic::uadd_with_overflow:
- NumFastIselFailUAddWithOverflow++; return;
- case Intrinsic::ssub_with_overflow:
- NumFastIselFailSSubWithOverflow++; return;
- case Intrinsic::usub_with_overflow:
- NumFastIselFailUSubWithOverflow++; return;
- case Intrinsic::smul_with_overflow:
- NumFastIselFailSMulWithOverflow++; return;
- case Intrinsic::umul_with_overflow:
- NumFastIselFailUMulWithOverflow++; return;
- case Intrinsic::frameaddress:
- NumFastIselFailFrameaddress++; return;
- case Intrinsic::sqrt:
- NumFastIselFailSqrt++; return;
- case Intrinsic::experimental_stackmap:
- NumFastIselFailStackMap++; return;
- case Intrinsic::experimental_patchpoint_void: // fall-through
- case Intrinsic::experimental_patchpoint_i64:
- NumFastIselFailPatchPoint++; return;
- }
- }
- NumFastIselFailCall++;
- return;
- }
- case Instruction::Shl: NumFastIselFailShl++; return;
- case Instruction::LShr: NumFastIselFailLShr++; return;
- case Instruction::AShr: NumFastIselFailAShr++; return;
- case Instruction::VAArg: NumFastIselFailVAArg++; return;
- case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
- case Instruction::InsertElement: NumFastIselFailInsertElement++; return;
- case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return;
- case Instruction::ExtractValue: NumFastIselFailExtractValue++; return;
- case Instruction::InsertValue: NumFastIselFailInsertValue++; return;
- case Instruction::LandingPad: NumFastIselFailLandingPad++; return;
- }
-}
-#endif // NDEBUG
-
/// Set up SwiftErrorVals by going through the function. If the function has
/// swifterror argument, it will be the first entry.
static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
@@ -1190,9 +1115,9 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
}
static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
+ FastISel *FastIS,
const TargetLowering *TLI,
const TargetInstrInfo *TII,
- const BasicBlock *LLVMBB,
SelectionDAGBuilder *SDB) {
if (!TLI->supportSwiftError())
return;
@@ -1202,22 +1127,27 @@ static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
if (FuncInfo->SwiftErrorVals.empty())
return;
- if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
- auto &DL = FuncInfo->MF->getDataLayout();
- auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
- // We will always generate a copy from the argument. It is always used at
- // least by the 'return' of the swifterror.
- if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
- continue;
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- // Assign Undef to Vreg. We construct MI directly to make sure it works
- // with FastISel.
- BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
- SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
- VReg);
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
- }
+ assert(FuncInfo->MBB == &*FuncInfo->MF->begin() &&
+ "expected to insert into entry block");
+ auto &DL = FuncInfo->MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+ // We will always generate a copy from the argument. It is always used at
+ // least by the 'return' of the swifterror.
+ if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
+ continue;
+ unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
+ SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+ VReg);
+
+ // Keep FastIS informed about the value we just inserted.
+ if (FastIS)
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+
+ FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
}
}
@@ -1340,6 +1270,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel)
@@ -1347,12 +1278,53 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
setupSwiftErrorVals(Fn, TLI, FuncInfo);
- // Iterate over all basic blocks in the function.
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
- for (ReversePostOrderTraversal<const Function*>::rpo_iterator
- I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
- const BasicBlock *LLVMBB = *I;
+ // Lower arguments up front. An RPO iteration always visits the entry block
+ // first.
+ assert(*RPOT.begin() == &Fn.getEntryBlock());
+ ++NumEntryBlocks;
+
+ // Set up FuncInfo for ISel. Entry blocks never have PHIs.
+ FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
+ FuncInfo->InsertPt = FuncInfo->MBB->begin();
+
+ if (!FastIS) {
+ LowerArguments(Fn);
+ } else {
+ // See if fast isel can lower the arguments.
+ FastIS->startNewBlock();
+ if (!FastIS->lowerArguments()) {
+ FastISelFailed = true;
+ // Fast isel failed to lower these arguments
+ ++NumFastIselFailLowerArguments;
+
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Fn.getSubprogram(),
+ &Fn.getEntryBlock());
+ R << "FastISel didn't lower all arguments: "
+ << ore::NV("Prototype", Fn.getType());
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1);
+
+ // Use SelectionDAG argument lowering
+ LowerArguments(Fn);
+ CurDAG->setRoot(SDB->getControlRoot());
+ SDB->clear();
+ CodeGenAndEmitDAG();
+ }
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(nullptr);
+ }
+ createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+
+ // Iterate over all basic blocks in the function.
+ for (const BasicBlock *LLVMBB : RPOT) {
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
@@ -1384,8 +1356,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
if (!FuncInfo->MBB)
continue; // Some blocks like catchpads have no code or MBB.
- FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
- createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB);
+
+ // Insert new instructions after any phi or argument setup code.
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Setup an EH landing-pad block.
FuncInfo->ExceptionPointerVirtReg = 0;
@@ -1396,35 +1369,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
- FastIS->startNewBlock();
-
- // Emit code for any incoming arguments. This must happen before
- // beginning FastISel on the entry block.
- if (LLVMBB == &Fn.getEntryBlock()) {
- ++NumEntryBlocks;
-
- // Lower any arguments needed in this block if this is the entry block.
- if (!FastIS->lowerArguments()) {
- // Fast isel failed to lower these arguments
- ++NumFastIselFailLowerArguments;
- if (EnableFastISelAbort > 1)
- report_fatal_error("FastISel didn't lower all arguments");
-
- // Use SelectionDAG argument lowering
- LowerArguments(Fn);
- CurDAG->setRoot(SDB->getControlRoot());
- SDB->clear();
- CodeGenAndEmitDAG();
- }
-
- // If we inserted any instructions at the beginning, make a note of
- // where they are, so we can be sure to emit subsequent instructions
- // after them.
- if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
- FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
- else
- FastIS->setLastLocalValue(nullptr);
- }
+ if (LLVMBB != &Fn.getEntryBlock())
+ FastIS->startNewBlock();
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Do FastISel on as many instructions as possible.
@@ -1432,7 +1378,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
const Instruction *Inst = &*std::prev(BI);
// If we no longer require this instruction, skip it.
- if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+ if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+ ElidedArgCopyInstrs.count(Inst)) {
--NumFastIselRemaining;
continue;
}
@@ -1443,6 +1390,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Try to select the instruction with FastISel.
if (FastIS->selectInstruction(Inst)) {
+ FastISelFailed = true;
--NumFastIselRemaining;
++NumFastIselSuccess;
// If fast isel succeeded, skip over all the folded instructions, and
@@ -1465,22 +1413,22 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
-#ifndef NDEBUG
- if (EnableFastISelVerbose2)
- collectFailStats(Inst);
-#endif
-
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
+ R << "FastISel missed call";
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
- dbgs() << "FastISel missed call: ";
- Inst->dump();
+ R << ": " << InstStr.str();
}
- if (EnableFastISelAbort > 2)
- // FastISel selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- report_fatal_error("FastISel didn't select the entire block");
+
+ reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 2);
if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
!Inst->use_empty()) {
@@ -1509,35 +1457,35 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
continue;
}
+ OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
+ Inst->getDebugLoc(), LLVMBB);
+
bool ShouldAbort = EnableFastISelAbort;
- if (EnableFastISelVerbose || EnableFastISelAbort) {
- if (isa<TerminatorInst>(Inst)) {
- // Use a different message for terminator misses.
- dbgs() << "FastISel missed terminator: ";
- // Don't abort unless for terminator unless the level is really high
- ShouldAbort = (EnableFastISelAbort > 2);
- } else {
- dbgs() << "FastISel miss: ";
- }
- Inst->dump();
+ if (isa<TerminatorInst>(Inst)) {
+ // Use a different message for terminator misses.
+ R << "FastISel missed terminator";
+ // Don't abort for terminator unless the level is really high
+ ShouldAbort = (EnableFastISelAbort > 2);
+ } else {
+ R << "FastISel missed";
+ }
+
+ if (R.isEnabled() || EnableFastISelAbort) {
+ std::string InstStrStorage;
+ raw_string_ostream InstStr(InstStrStorage);
+ InstStr << *Inst;
+ R << ": " << InstStr.str();
}
- if (ShouldAbort)
- // FastISel selector couldn't handle something and bailed.
- // For the purpose of debugging, just abort.
- report_fatal_error("FastISel didn't select the entire block");
+
+ reportFastISelFailure(*MF, *ORE, R, ShouldAbort);
NumFastIselFailures += NumFastIselRemaining;
break;
}
FastIS->recomputeInsertPt();
- } else {
- // Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock()) {
- ++NumEntryBlocks;
- LowerArguments(Fn);
- }
}
+
if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
bool FunctionBasedInstrumentation =
TLI->getSSPStackGuardCheck(*Fn.getParent());
@@ -1556,10 +1504,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// block.
bool HadTailCall;
SelectBasicBlock(Begin, BI, HadTailCall);
+
+ // But if FastISel was run, we already selected some of the block.
+ // If we emitted a tail-call, we need to delete any previously emitted
+ // instruction that follows it.
+ if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+ FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
}
FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
+ ElidedArgCopyInstrs.clear();
}
propagateSwiftErrorVRegs(FuncInfo);
@@ -2177,7 +2132,6 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
IgnoreChains = false;
}
-
SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
}
@@ -2554,7 +2508,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
@@ -2564,9 +2518,9 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// CheckChildSame - Implements OP_CheckChildXSame.
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N,
- const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
- unsigned ChildNo) {
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes,
+ unsigned ChildNo) {
if (ChildNo >= N.getNumOperands())
return false; // Match fails if out of range child #.
return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo),
@@ -2688,7 +2642,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
bool &Result,
const SelectionDAGISel &SDISel,
- SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+ SmallVectorImpl<std::pair<SDValue, SDNode*>> &RecordedNodes) {
switch (Table[Index++]) {
default:
Result = false;
@@ -2756,6 +2710,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
}
namespace {
+
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
@@ -2785,6 +2740,7 @@ class MatchStateUpdater : public SelectionDAG::DAGUpdateListener
SDNode **NodeToMatch;
SmallVectorImpl<std::pair<SDValue, SDNode *>> &RecordedNodes;
SmallVectorImpl<MatchScope> &MatchScopes;
+
public:
MatchStateUpdater(SelectionDAG &DAG, SDNode **NodeToMatch,
SmallVectorImpl<std::pair<SDValue, SDNode *>> &RN,
@@ -2816,6 +2772,7 @@ public:
J.setNode(E);
}
};
+
} // end anonymous namespace
void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
@@ -2921,7 +2878,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// with an OPC_SwitchOpcode instruction. Populate the table now, since this
// is the first time we're selecting an instruction.
unsigned Idx = 1;
- while (1) {
+ while (true) {
// Get the size of this case.
unsigned CaseSize = MatcherTable[Idx++];
if (CaseSize & 128)
@@ -2942,7 +2899,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MatcherIndex = OpcodeOffset[N.getOpcode()];
}
- while (1) {
+ while (true) {
assert(MatcherIndex < TableSize && "Invalid index");
#ifndef NDEBUG
unsigned CurrentOpcodeIndex = MatcherIndex;
@@ -2957,7 +2914,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// immediately fail, don't even bother pushing a scope for them.
unsigned FailIndex;
- while (1) {
+ while (true) {
unsigned NumToSkip = MatcherTable[MatcherIndex++];
if (NumToSkip & 128)
NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
@@ -3118,7 +3075,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
- while (1) {
+ while (true) {
// Get the size of this case.
CaseSize = MatcherTable[MatcherIndex++];
if (CaseSize & 128)
@@ -3149,7 +3106,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
MVT CurNodeVT = N.getSimpleValueType();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
- while (1) {
+ while (true) {
// Get the size of this case.
CaseSize = MatcherTable[MatcherIndex++];
if (CaseSize & 128)
@@ -3215,7 +3172,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// a single use.
bool HasMultipleUses = false;
for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
- if (!NodeStack[i].hasOneUse()) {
+ if (!NodeStack[i].getNode()->hasOneUse()) {
HasMultipleUses = true;
break;
}
@@ -3381,6 +3338,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
continue;
}
+ case OPC_Coverage: {
+ // This is emitted right before MorphNode/EmitNode.
+ // So it should be safe to assume that this node has been selected
+ unsigned index = MatcherTable[MatcherIndex++];
+ index |= (MatcherTable[MatcherIndex++] << 8);
+ dbgs() << "COVERED: " << getPatternForIndex(index) << "\n";
+ dbgs() << "INCLUDED: " << getIncludePathForIndex(index) << "\n";
+ continue;
+ }
case OPC_EmitNode: case OPC_MorphNodeTo:
case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2:
@@ -3473,7 +3439,6 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
nullptr));
}
-
} else {
assert(NodeToMatch->getOpcode() != ISD::DELETED_NODE &&
"NodeToMatch was removed partway through selection");
@@ -3610,7 +3575,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
// find a case to check.
DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
++NumDAGIselRetries;
- while (1) {
+ while (true) {
if (MatchScopes.empty()) {
CannotYetSelect(NodeToMatch);
return;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 690f0d2c8082..2756e276c6a9 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -55,14 +55,15 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CallerAttrs = F->getAttributes();
- if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex)
- .removeAttribute(Attribute::NoAlias).hasAttributes())
+ AttributeList CallerAttrs = F->getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias)
+ .hasAttributes())
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
return false;
// Check if the only use is a function return node.
@@ -96,19 +97,20 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
/// \brief Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
-void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
- unsigned AttrIdx) {
- isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt);
- isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt);
- isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg);
- isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet);
- isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest);
- isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal);
- isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca);
- isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned);
- isSwiftSelf = CS->paramHasAttr(AttrIdx, Attribute::SwiftSelf);
- isSwiftError = CS->paramHasAttr(AttrIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(AttrIdx);
+void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+ unsigned ArgIdx) {
+ IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
+ IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
+ IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
+ IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
+ IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
+ IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
+ IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
+ // FIXME: getParamAlignment is off by one from argument index.
+ Alignment = CS->getParamAlignment(ArgIdx + 1);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -125,8 +127,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
for (SDValue Op : Ops) {
Entry.Node = Op;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
- Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
Args.push_back(Entry);
}
@@ -138,10 +140,13 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
- .setSExtResult(signExtend).setZExtResult(!signExtend);
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setNoReturn(doesNotReturn)
+ .setDiscardResult(!isReturnValueUsed)
+ .setSExtResult(signExtend)
+ .setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
@@ -334,34 +339,35 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// Check to see if the specified operand of the specified instruction is a
-/// constant integer. If so, check to see if there are any bits set in the
-/// constant that are not demanded. If so, shrink the constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
- const APInt &Demanded) {
- SDLoc dl(Op);
+/// If the specified instruction has a constant integer operand and there are
+/// bits set in that constant that are not demanded, then clear those bits and
+/// return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(
+ SDValue Op, const APInt &Demanded) {
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
// FIXME: ISD::SELECT, ISD::SELECT_CC
- switch (Op.getOpcode()) {
- default: break;
+ switch (Opcode) {
+ default:
+ break;
case ISD::XOR:
case ISD::AND:
case ISD::OR: {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!C) return false;
+ auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Op1C)
+ return false;
- if (Op.getOpcode() == ISD::XOR &&
- (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+ // If this is a 'not' op, don't touch it because that's a canonical form.
+ const APInt &C = Op1C->getAPIntValue();
+ if (Opcode == ISD::XOR && (C | ~Demanded).isAllOnesValue())
return false;
- // if we can expand it to have all bits set, do it
- if (C->getAPIntValue().intersects(~Demanded)) {
+ if (C.intersects(~Demanded)) {
EVT VT = Op.getValueType();
- SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
- DAG.getConstant(Demanded &
- C->getAPIntValue(),
- dl, VT));
- return CombineTo(Op, New);
+ SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ return CombineTo(Op, NewOp);
}
break;
@@ -470,6 +476,21 @@ TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
return true;
}
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+ DAGCombinerInfo &DCI) const {
+
+ SelectionDAG &DAG = DCI.DAG;
+ TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ APInt KnownZero, KnownOne;
+
+ bool Simplified = SimplifyDemandedBits(Op, DemandedMask, KnownZero, KnownOne,
+ TLO);
+ if (Simplified)
+ DCI.CommitTargetLoweringOpt(TLO);
+ return Simplified;
+}
+
/// Look at Op. At this point, we know that only the DemandedMask bits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -711,8 +732,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- KnownZero = KnownZeroOut;
- KnownOne = KnownOneOut;
+ KnownZero = std::move(KnownZeroOut);
+ KnownOne = std::move(KnownOneOut);
break;
case ISD::SELECT:
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
@@ -750,6 +771,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
+ case ISD::SETCC: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // If (1) we only need the sign-bit, (2) the setcc operands are the same
+ // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+ // -1, we may be able to bypass the setcc.
+ if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth &&
+ getBooleanContents(Op.getValueType()) ==
+ BooleanContent::ZeroOrNegativeOneBooleanContent) {
+ // If we're testing X < 0, then this compare isn't needed - just use X!
+ // FIXME: We're limiting to integer types here, but this should also work
+ // if we don't care about FP signed-zero. The use of SETLT with FP means
+ // that we don't care about NaNs.
+ if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+ (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+ return TLO.CombineTo(Op, Op0);
+
+ // TODO: Should we check for other forms of sign-bit comparisons?
+ // Examples: X <= -1, X >= 0
+ }
+ if (getBooleanContents(Op0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero.setBitsFrom(1);
+ break;
+ }
case ISD::SHL:
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();
@@ -834,7 +882,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero <<= SA->getZExtValue();
KnownOne <<= SA->getZExtValue();
// low bits known zero.
- KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+ KnownZero.setLowBits(SA->getZExtValue());
}
break;
case ISD::SRL:
@@ -853,7 +901,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
- InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ InDemandedMask.setLowBits(ShAmt);
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
@@ -884,8 +932,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
- APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- KnownZero |= HighBits; // High bits known zero.
+ KnownZero.setHighBits(ShAmt); // High bits known zero.
}
break;
case ISD::SRA:
@@ -911,7 +958,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact())
- InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt);
+ InDemandedMask.setLowBits(ShAmt);
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
@@ -1075,7 +1122,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarSizeInBits();
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
- APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+ APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1);
APInt NewBits = ~InMask & NewMask;
// If none of the top bits are demanded, convert this into an any_extend.
@@ -1191,7 +1238,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return true;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero |= ~InMask & NewMask;
+ KnownZero |= ~InMask;
break;
}
case ISD::BITCAST:
@@ -1281,6 +1328,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
@@ -1295,6 +1343,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &,
const SelectionDAG &,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
@@ -2050,6 +2099,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETO || Cond == ISD::SETUO)
return DAG.getSetCC(dl, VT, N0, N0, Cond);
+ // setcc (fneg x), C -> setcc swap(pred) x, -C
+ if (N0.getOpcode() == ISD::FNEG) {
+ ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
+ if (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
+ SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
+ }
+ }
+
// If the condition is not legal, see if we can find an equivalent one
// which is legal.
if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
@@ -2470,10 +2529,7 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
- for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
- E = RI->regclass_end(); RCI != E; ++RCI) {
- const TargetRegisterClass *RC = *RCI;
-
+ for (const TargetRegisterClass *RC : RI->regclasses()) {
// If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
if (!isLegalRC(RC))
@@ -2933,7 +2989,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -3808,7 +3864,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
- CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
+ CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 209bbe54ea23..ab578df4069d 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -64,6 +64,7 @@ public:
private:
bool setupEntryBlockAndCallSites(Function &F);
+ bool undoSwiftErrorSelect(Function &F);
void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal);
Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads);
void lowerIncomingArguments(Function &F);
@@ -174,8 +175,8 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
// because the value needs to be added to the global context list.
auto &DL = F.getParent()->getDataLayout();
unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
- FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
- &EntryBB->front());
+ FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(),
+ nullptr, Align, "fn_context", &EntryBB->front());
// Fill in the function context structure.
for (LandingPadInst *LPI : LPads) {
@@ -458,14 +459,33 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
return true;
}
+bool SjLjEHPrepare::undoSwiftErrorSelect(Function &F) {
+ // We have inserted dummy copies 'select true, arg, undef' in the entry block
+ // for arguments to simplify this pass.
+ // swifterror arguments cannot be used in this way. Undo the select for the
+ // swifterror argument.
+ for (auto &AI : F.args()) {
+ if (AI.isSwiftError()) {
+ assert(AI.hasOneUse() && "Must have converted the argument to a select");
+ auto *Select = dyn_cast<SelectInst>(AI.use_begin()->getUser());
+ assert(Select && "There must be single select user");
+ auto *OrigSwiftError = cast<Argument>(Select->getTrueValue());
+ Select->replaceAllUsesWith(OrigSwiftError);
+ Select->eraseFromParent();
+ return true;
+ }
+ }
+ return false;
+}
+
bool SjLjEHPrepare::runOnFunction(Function &F) {
Module &M = *F.getParent();
RegisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), nullptr);
+ PointerType::getUnqual(FunctionContextTy));
UnregisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), nullptr);
+ PointerType::getUnqual(FunctionContextTy));
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
@@ -476,5 +496,7 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
bool Res = setupEntryBlockAndCallSites(F);
+ if (Res)
+ Res |= undoSwiftErrorSelect(F);
return Res;
}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index dba103e9bfb1..bc2a1d09056b 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -103,6 +103,48 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
return false;
}
+void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI) {
+ assert(!MI.isBundledWithPred() &&
+ "Use removeSingleMachineInstrFromMaps() instread");
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+}
+
+void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
+ Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
+ if (mi2iItr == mi2iMap.end())
+ return;
+
+ SlotIndex MIIndex = mi2iItr->second;
+ IndexListEntry &MIEntry = *MIIndex.listEntry();
+ assert(MIEntry.getInstr() == &MI && "Instruction indexes broken.");
+ mi2iMap.erase(mi2iItr);
+
+ // When removing the first instruction of a bundle update mapping to next
+ // instruction.
+ if (MI.isBundledWithSucc()) {
+ // Only the first instruction of a bundle should have an index assigned.
+ assert(!MI.isBundledWithPred() && "Should have first bundle isntruction");
+
+ MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator());
+ MachineInstr &NextMI = *Next;
+ MIEntry.setInstr(&NextMI);
+ mi2iMap.insert(std::make_pair(&NextMI, MIIndex));
+ return;
+ } else {
+ // FIXME: Eventually we want to actually delete these indexes.
+ MIEntry.setInstr(nullptr);
+ }
+}
+
void SlotIndexes::renumberIndexes() {
// Renumber updates the index of every element of the index list.
DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 1c6a84e53944..3a50aaa69985 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -487,12 +488,126 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
+SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ bool FirstCopy = !Def.isValid();
+ MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
+ .addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
+ | getInternalReadRegState(!FirstCopy), SubIdx)
+ .addReg(FromReg, 0, SubIdx);
+
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ if (FirstCopy) {
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ } else {
+ CopyMI->bundleWithPred();
+ }
+ LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
+ DestLI.refineSubRanges(Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange& SR) {
+ SR.createDeadDef(Def, Allocator);
+ });
+ return Def;
+}
+
+SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg,
+ LaneBitmask LaneMask, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
+ const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
+ // The full vreg is copied.
+ MachineInstr *CopyMI =
+ BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
+ }
+
+ // Only a subset of lanes needs to be copied. The following is a simple
+ // heuristic to construct a sequence of COPYs. We could add a target
+ // specific callback if this turns out to be suboptimal.
+ LiveInterval &DestLI = LIS.getInterval(Edit->get(RegIdx));
+
+ // First pass: Try to find a perfectly matching subregister index. If none
+ // exists find the one covering the most lanemask bits.
+ SmallVector<unsigned, 8> PossibleIndexes;
+ unsigned BestIdx = 0;
+ unsigned BestCover = 0;
+ const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
+ assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
+ for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) {
+ // Is this index even compatible with the given class?
+ if (TRI.getSubClassWithSubReg(RC, Idx) != RC)
+ continue;
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LaneMask) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // The index must not cover any lanes outside \p LaneMask.
+ if ((SubRegMask & ~LaneMask).any())
+ continue;
+
+ unsigned PopCount = countPopulation(SubRegMask.getAsInteger());
+ PossibleIndexes.push_back(Idx);
+ if (PopCount > BestCover) {
+ BestCover = PopCount;
+ BestIdx = Idx;
+ }
+ }
+
+ // Abort if we cannot possibly implement the COPY with the given indexes.
+ if (BestIdx == 0)
+ report_fatal_error("Impossible to implement partial COPY");
+
+ SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore,
+ BestIdx, DestLI, Late, SlotIndex());
+
+ // Greedy heuristic: Keep iterating keeping the best covering subreg index
+ // each time.
+ LaneBitmask LanesLeft =
+ LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover));
+ while (LanesLeft.any()) {
+ unsigned BestIdx = 0;
+ int BestCover = INT_MIN;
+ for (unsigned Idx : PossibleIndexes) {
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx);
+ // Early exit if we found a perfect match.
+ if (SubRegMask == LanesLeft) {
+ BestIdx = Idx;
+ break;
+ }
+
+ // Try to cover as much of the remaining lanes as possible but
+ // as few of the already covered lanes as possible.
+ int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger())
+ - countPopulation((SubRegMask & ~LanesLeft).getAsInteger());
+ if (Cover > BestCover) {
+ BestCover = Cover;
+ BestIdx = Idx;
+ }
+ }
+
+ if (BestIdx == 0)
+ report_fatal_error("Impossible to implement partial COPY");
+
+ buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
+ DestLI, Late, Def);
+ LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx);
+ }
+
+ return Def;
+}
+
VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
VNInfo *ParentVNI,
SlotIndex UseIdx,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- MachineInstr *CopyMI = nullptr;
SlotIndex Def;
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
@@ -505,24 +620,29 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
+ unsigned Reg = LI->reg;
bool DidRemat = false;
if (OrigVNI) {
LiveRangeEdit::Remat RM(ParentVNI);
RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
- Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+ Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late);
++NumRemats;
DidRemat = true;
}
}
if (!DidRemat) {
- // Can't remat, just insert a copy from parent.
- CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
- .addReg(Edit->getReg());
- Def = LIS.getSlotIndexes()
- ->insertMachineInstrInMaps(*CopyMI, Late)
- .getRegSlot();
+ LaneBitmask LaneMask;
+ if (LI->hasSubRanges()) {
+ LaneMask = LaneBitmask::getNone();
+ for (LiveInterval::SubRange &S : LI->subranges())
+ LaneMask |= S.LaneMask;
+ } else {
+ LaneMask = LaneBitmask::getAll();
+ }
+
++NumCopies;
+ Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late, RegIdx);
}
// Define the value in Reg.
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index a75738aaf446..9d409e924a3d 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -405,6 +405,17 @@ private:
/// deleteRematVictims - Delete defs that are dead after rematerializing.
void deleteRematVictims();
+ /// Add a copy instruction copying \p FromReg to \p ToReg before
+ /// \p InsertBefore. This can be invoked with a \p LaneMask which may make it
+ /// necessary to construct a sequence of copies to cover it exactly.
+ SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ bool Late, unsigned RegIdx);
+
+ SlotIndex buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex PrevCopy);
+
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 89c4b574f17f..f51d959a089a 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -385,14 +384,13 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-#ifndef NDEBUG
-
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
const BitVector &BV) const {
- DEBUG(dbgs() << tag << " : { ");
+ dbgs() << tag << " : { ";
for (unsigned I = 0, E = BV.size(); I != E; ++I)
- DEBUG(dbgs() << BV.test(I) << " ");
- DEBUG(dbgs() << "}\n");
+ dbgs() << BV.test(I) << " ";
+ dbgs() << "}\n";
}
LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
@@ -408,20 +406,19 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
LLVM_DUMP_METHOD void StackColoring::dump() const {
for (MachineBasicBlock *MBB : depth_first(MF)) {
- DEBUG(dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
- << MBB->getName() << "]\n");
- DEBUG(dumpBB(MBB));
+ dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
+ << MBB->getName() << "]\n";
+ dumpBB(MBB);
}
}
LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
- DEBUG(dbgs() << "Interval[" << I << "]:\n");
- DEBUG(Intervals[I]->dump());
+ dbgs() << "Interval[" << I << "]:\n";
+ Intervals[I]->dump();
}
}
-
-#endif // not NDEBUG
+#endif
static inline int getStartOrEndSlot(const MachineInstr &MI)
{
@@ -570,9 +567,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
// Step 2: compute begin/end sets for each block
- // NOTE: We use a reverse-post-order iteration to ensure that we obtain a
- // deterministic numbering, and because we'll need a post-order iteration
- // later for solving the liveness dataflow problem.
+ // NOTE: We use a depth-first iteration to ensure that we obtain a
+ // deterministic numbering.
for (MachineBasicBlock *MBB : depth_first(MF)) {
// Assign a serial number to this basic block.
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 9b7dd400fc92..1a8ec5bff322 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -1,4 +1,4 @@
-//===---------------------------- StackMaps.cpp ---------------------------===//
+//===- StackMaps.cpp ------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,23 +7,34 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <iterator>
+#include <utility>
using namespace llvm;
@@ -276,7 +287,8 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
}
LiveOuts.erase(
- remove_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+ llvm::remove_if(LiveOuts,
+ [](const LiveOutReg &LO) { return LO.Reg == 0; }),
LiveOuts.end());
return LiveOuts;
@@ -286,7 +298,6 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
bool recordResult) {
-
MCContext &OutContext = AP.OutStreamer->getContext();
MCSymbol *MILabel = OutContext.createTempSymbol();
AP.OutStreamer->EmitLabel(MILabel);
@@ -378,6 +389,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
}
#endif
}
+
void StackMaps::recordStatepoint(const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index c2c010a29d44..a8aafe78748d 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -1,4 +1,4 @@
-//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//===- StackProtector.cpp - Stack Protector Insertion ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,30 +14,38 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <cstdlib>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "stack-protector"
@@ -51,7 +59,7 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
char StackProtector::ID = 0;
INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors",
- false, true)
+ false, true)
FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
return new StackProtector(TM);
@@ -222,7 +230,16 @@ bool StackProtector::RequiresStackProtector() {
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
+ // We are constructing the OptimizationRemarkEmitter on the fly rather than
+ // using the analysis pass to avoid building DominatorTree and LoopInfo which
+ // are not available this late in the IR pipeline.
+ OptimizationRemarkEmitter ORE(F);
+
if (F->hasFnAttribute(Attribute::StackProtectReq)) {
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a function attribute or command-line switch");
NeedsProtector = true;
Strong = true; // Use the same heuristic as strong to determine SSPLayout
} else if (F->hasFnAttribute(Attribute::StackProtectStrong))
@@ -236,20 +253,29 @@ bool StackProtector::RequiresStackProtector() {
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (AI->isArrayAllocation()) {
+ OptimizationRemark Remark(DEBUG_TYPE, "StackProtectorAllocaOrArray",
+ &I);
+ Remark
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a call to alloca or use of a variable length array";
if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ ORE.emit(Remark);
NeedsProtector = true;
} else if (Strong) {
// Require protectors for all alloca calls in strong mode.
Layout.insert(std::make_pair(AI, SSPLK_SmallArray));
+ ORE.emit(Remark);
NeedsProtector = true;
}
} else {
// A call to alloca with a variable size requires protectors.
Layout.insert(std::make_pair(AI, SSPLK_LargeArray));
+ ORE.emit(Remark);
NeedsProtector = true;
}
continue;
@@ -259,6 +285,11 @@ bool StackProtector::RequiresStackProtector() {
if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray
: SSPLK_SmallArray));
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to a stack allocated buffer or struct containing a "
+ "buffer");
NeedsProtector = true;
continue;
}
@@ -266,6 +297,11 @@ bool StackProtector::RequiresStackProtector() {
if (Strong && HasAddressTaken(AI)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, SSPLK_AddrOf));
+ ORE.emit(
+ OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I)
+ << "Stack protection applied to function "
+ << ore::NV("Function", F)
+ << " due to the address of a local variable being taken");
NeedsProtector = true;
}
}
@@ -448,13 +484,13 @@ BasicBlock *StackProtector::CreateFailBB() {
Constant *StackChkFail =
M->getOrInsertFunction("__stack_smash_handler",
Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context), nullptr);
+ Type::getInt8PtrTy(Context));
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
Constant *StackChkFail =
- M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context),
- nullptr);
+ M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
+
B.CreateCall(StackChkFail, {});
}
B.CreateUnreachable();
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index 7709236bbaa8..d2414200e9d5 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -725,6 +725,7 @@ bool TailDuplicator::duplicateSimpleBB(
if (PredTBB == NextBB && PredFBB == nullptr)
PredTBB = nullptr;
+ auto DL = PredBB->findBranchDebugLoc();
TII->removeBranch(*PredBB);
if (!PredBB->isSuccessor(NewTarget))
@@ -735,7 +736,7 @@ bool TailDuplicator::duplicateSimpleBB(
}
if (PredTBB)
- TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+ TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL);
TDBBs.push_back(PredBB);
}
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f082add8c7dd..e5def6752e07 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -73,7 +73,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
return;
// Get the callee saved register list...
- const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
// Early exit if there are no callee saved registers.
if (!CSRegs || CSRegs[0] == 0)
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 01f91b96b58a..711144a34743 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -470,7 +470,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
// No need to fold return, the meta data, and function arguments
for (unsigned i = 0; i < StartIdx; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
MachineOperand &MO = MI.getOperand(i);
@@ -490,7 +490,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MIB.addImm(SpillOffset);
}
else
- MIB.addOperand(MO);
+ MIB.add(MO);
}
return NewMI;
}
@@ -941,12 +941,10 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
- if (MI.getOpcode() != FrameSetupOpcode &&
- MI.getOpcode() != FrameDestroyOpcode)
+ if (!isFrameInstr(MI))
return 0;
- int SPAdj = MI.getOperand(0).getImm();
- SPAdj = TFI->alignSPAdjust(SPAdj);
+ int SPAdj = TFI->alignSPAdjust(getFrameSize(MI));
if ((!StackGrowsDown && MI.getOpcode() == FrameSetupOpcode) ||
(StackGrowsDown && MI.getOpcode() == FrameDestroyOpcode))
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 003311b157fc..27630a3055cb 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -838,7 +838,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
- MaskAndBranchFoldingIsLegal = false;
EnableExtLdPromotion = false;
HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
@@ -851,7 +850,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
MinFunctionAlignment = 0;
PrefFunctionAlignment = 0;
PrefLoopAlignment = 0;
- GatherAllAliasesMaxDepth = 6;
+ GatherAllAliasesMaxDepth = 18;
MinStackArgumentAlignment = 1;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
@@ -901,6 +900,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
+ setOperationAction(ISD::ABS, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -1227,7 +1227,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// Copy operands before the frame-index.
for (unsigned i = 0; i < OperIdx; ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
// Add frame index operands recognized by stackmaps.cpp
if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
// indirect-mem-ref tag, size, #FI, offset.
@@ -1237,18 +1237,18 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
MIB.addImm(StackMaps::IndirectMemRefOp);
MIB.addImm(MFI.getObjectSize(FI));
- MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.add(MI->getOperand(OperIdx));
MIB.addImm(0);
} else {
// direct-mem-ref tag, #FI, offset.
// Used by patchpoint, and direct alloca arguments to statepoints
MIB.addImm(StackMaps::DirectMemRefOp);
- MIB.addOperand(MI->getOperand(OperIdx));
+ MIB.add(MI->getOperand(OperIdx));
MIB.addImm(0);
}
// Copy the operands after the frame index.
for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
// Inherit previous memory operands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -1589,7 +1589,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
+void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
@@ -1601,9 +1601,9 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
@@ -1621,13 +1621,13 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
- if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
+ if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
Flags.setSExt();
- else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
@@ -1818,7 +1818,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
- StackPtrTy->getPointerTo(0), nullptr);
+ StackPtrTy->getPointerTo(0));
return IRB.CreateCall(Fn);
}
@@ -1918,11 +1918,7 @@ void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
/// override the target defaults.
static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
const Function *F = MF.getFunction();
- StringRef RecipAttrName = "reciprocal-estimates";
- if (!F->hasFnAttribute(RecipAttrName))
- return StringRef();
-
- return F->getFnAttribute(RecipAttrName).getValueAsString();
+ return F->getFnAttribute("reciprocal-estimates").getValueAsString();
}
/// Construct a string for the given reciprocal operation of the given type.
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index eb2a28f574a5..34892680aceb 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,36 +12,52 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <string>
+
using namespace llvm;
using namespace dwarf;
@@ -53,10 +69,10 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
const GlobalValue *GV, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
unsigned Encoding = getPersonalityEncoding();
- if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
+ if ((Encoding & 0x80) == DW_EH_PE_indirect)
return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
TM.getSymbol(GV)->getName());
- if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
+ if ((Encoding & 0x70) == DW_EH_PE_absptr)
return TM.getSymbol(GV);
report_fatal_error("We do not support this DWARF encoding yet!");
}
@@ -86,8 +102,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
-
- if (Encoding & dwarf::DW_EH_PE_indirect) {
+ if (Encoding & DW_EH_PE_indirect) {
MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
@@ -102,7 +117,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
return TargetLoweringObjectFile::
getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
- Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ Encoding & ~DW_EH_PE_indirect, Streamer);
}
return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
@@ -117,8 +132,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
// section(".eh_frame") gcc will produce:
//
// .section .eh_frame,"a",@progbits
-
- if (Name == getInstrProfCoverageSectionName(false))
+
+ if (Name == getInstrProfSectionName(IPSK_covmap, Triple::ELF,
+ /*AddSegmentInfo=*/false))
return SectionKind::getMetadata();
if (Name.empty() || Name[0] != '.') return K;
@@ -149,7 +165,6 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
return K;
}
-
static unsigned getELFSectionType(StringRef Name, SectionKind K) {
// Use SHT_NOTE for section whose name starts with ".note" to allow
// emitting ELF notes from C variable declaration.
@@ -211,6 +226,20 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
return C;
}
+static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
+ const TargetMachine &TM) {
+ MDNode *MD = GO->getMetadata(LLVMContext::MD_associated);
+ if (!MD)
+ return nullptr;
+
+ auto *VM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+ if (!VM)
+ report_fatal_error("MD_associated operand is not ValueAsMetadata");
+
+ GlobalObject *OtherGO = dyn_cast<GlobalObject>(VM->getValue());
+ return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
+}
+
MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
StringRef SectionName = GO->getSection();
@@ -224,9 +253,23 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
Group = C->getName();
Flags |= ELF::SHF_GROUP;
}
- return getContext().getELFSection(SectionName,
- getELFSectionType(SectionName, Kind), Flags,
- /*EntrySize=*/0, Group);
+
+ // A section can have at most one associated section. Put each global with
+ // MD_associated in a unique section.
+ unsigned UniqueID = MCContext::GenericSectionID;
+ const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
+ if (AssociatedSymbol) {
+ UniqueID = NextUniqueID++;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+
+ MCSectionELF *Section = getContext().getELFSection(
+ SectionName, getELFSectionType(SectionName, Kind), Flags,
+ /*EntrySize=*/0, Group, UniqueID, AssociatedSymbol);
+ // Make sure that we did not get some other section with incompatible sh_link.
+ // This should not be possible due to UniqueID code above.
+ assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ return Section;
}
/// Return the section prefix name used by options FunctionsSections and
@@ -248,11 +291,10 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro";
}
-static MCSectionELF *
-selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
- SectionKind Kind, Mangler &Mang,
- const TargetMachine &TM, bool EmitUniqueSection,
- unsigned Flags, unsigned *NextUniqueID) {
+static MCSectionELF *selectELFSectionForGlobal(
+ MCContext &Ctx, const GlobalObject *GO, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection, unsigned Flags,
+ unsigned *NextUniqueID, const MCSymbolELF *AssociatedSymbol) {
unsigned EntrySize = 0;
if (Kind.isMergeableCString()) {
if (Kind.isMergeable2ByteCString()) {
@@ -319,7 +361,7 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
if (Kind.isExecuteOnly())
UniqueID = 0;
return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
- EntrySize, Group, UniqueID);
+ EntrySize, Group, UniqueID, AssociatedSymbol);
}
MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
@@ -337,8 +379,17 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
}
EmitUniqueSection |= GO->hasComdat();
- return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
- EmitUniqueSection, Flags, &NextUniqueID);
+ const MCSymbolELF *AssociatedSymbol = getAssociatedSymbol(GO, TM);
+ if (AssociatedSymbol) {
+ EmitUniqueSection = true;
+ Flags |= ELF::SHF_LINK_ORDER;
+ }
+
+ MCSectionELF *Section = selectELFSectionForGlobal(
+ getContext(), GO, Kind, getMangler(), TM, EmitUniqueSection, Flags,
+ &NextUniqueID, AssociatedSymbol);
+ assert(Section->getAssociatedSymbol() == AssociatedSymbol);
+ return Section;
}
MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
@@ -351,8 +402,9 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
return ReadOnlySection;
return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
- getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC,
- &NextUniqueID);
+ getMangler(), TM, EmitUniqueSection,
+ ELF::SHF_ALLOC, &NextUniqueID,
+ /* AssociatedSymbol */ nullptr);
}
bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
@@ -723,7 +775,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
return TargetLoweringObjectFile::
getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()),
- Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+ Encoding & ~DW_EH_PE_indirect, Streamer);
}
return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
@@ -1122,33 +1174,110 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
raw_ostream &OS, const GlobalValue *GV) const {
- if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
- return;
+ emitLinkerFlagsForGlobalCOFF(OS, GV, getTargetTriple(), getMangler());
+}
- const Triple &TT = getTargetTriple();
+//===----------------------------------------------------------------------===//
+// Wasm
+//===----------------------------------------------------------------------===//
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << " /EXPORT:";
- else
- OS << " -export:";
-
- if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
- std::string Flag;
- raw_string_ostream FlagOS(Flag);
- getMangler().getNameWithPrefix(FlagOS, GV, false);
- FlagOS.flush();
- if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
- OS << Flag.substr(1);
- else
- OS << Flag;
- } else {
- getMangler().getNameWithPrefix(OS, GV, false);
+static const Comdat *getWasmComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("Wasm COMDATs only support SelectionKind::Any, '" +
+ C->getName() + "' cannot be lowered.");
+
+ return C;
+}
+
+MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ llvm_unreachable("getExplicitSectionGlobal not yet implemented");
+ return nullptr;
+}
+
+static MCSectionWasm *
+selectWasmSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM, bool EmitUniqueSection,
+ unsigned Flags, unsigned *NextUniqueID) {
+ StringRef Group = "";
+ if (getWasmComdat(GO))
+ llvm_unreachable("comdat not yet supported for wasm");
+
+ bool UniqueSectionNames = TM.getUniqueSectionNames();
+ SmallString<128> Name = getSectionPrefixForGlobal(Kind);
+
+ if (const auto *F = dyn_cast<Function>(GO)) {
+ const auto &OptionalPrefix = F->getSectionPrefix();
+ if (OptionalPrefix)
+ Name += *OptionalPrefix;
}
- if (!GV->getValueType()->isFunctionTy()) {
- if (TT.isKnownWindowsMSVCEnvironment())
- OS << ",DATA";
- else
- OS << ",data";
+ if (EmitUniqueSection && UniqueSectionNames) {
+ Name.push_back('.');
+ TM.getNameWithPrefix(Name, GO, Mang, true);
+ }
+ unsigned UniqueID = MCContext::GenericSectionID;
+ if (EmitUniqueSection && !UniqueSectionNames) {
+ UniqueID = *NextUniqueID;
+ (*NextUniqueID)++;
}
+ return Ctx.getWasmSection(Name, /*Type=*/0, Flags,
+ Group, UniqueID);
+}
+
+MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+
+ if (Kind.isCommon())
+ report_fatal_error("mergable sections not supported yet on wasm");
+
+ // If we have -ffunction-section or -fdata-section then we should emit the
+ // global value to a uniqued section specifically for it.
+ bool EmitUniqueSection = false;
+ if (Kind.isText())
+ EmitUniqueSection = TM.getFunctionSections();
+ else
+ EmitUniqueSection = TM.getDataSections();
+ EmitUniqueSection |= GO->hasComdat();
+
+ return selectWasmSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
+ EmitUniqueSection, /*Flags=*/0,
+ &NextUniqueID);
+}
+
+bool TargetLoweringObjectFileWasm::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+}
+
+const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ // We may only use a PLT-relative relocation to refer to unnamed_addr
+ // functions.
+ if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
+ return nullptr;
+
+ // Basic sanity checks.
+ if (LHS->getType()->getPointerAddressSpace() != 0 ||
+ RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
+ RHS->isThreadLocal())
+ return nullptr;
+
+ return MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(TM.getSymbol(LHS), MCSymbolRefExpr::VK_None,
+ getContext()),
+ MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
+}
+
+void
+TargetLoweringObjectFileWasm::InitializeWasm() {
+ // TODO: Initialize StaticCtorSection and StaticDtorSection.
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index b6da8e0aa60d..c20d5ab814f8 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -34,14 +34,6 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
return false;
}
-/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
-/// is specified on the command line. When this flag is off(default), the
-/// code generator is not allowed to generate mad (multiply add) if the
-/// result is "less precise" than doing those operations individually.
-bool TargetOptions::LessPreciseFPMAD() const {
- return UnsafeFPMath || LessPreciseFPMADOption;
-}
-
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool TargetOptions::HonorSignDependentRoundingFPMath() const {
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index e7ea2b4563f9..150195f5f85b 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -92,6 +92,9 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(false),
cl::ZeroOrMore);
+static cl::opt<bool> EnableMachineOutliner("enable-machine-outliner",
+ cl::Hidden,
+ cl::desc("Enable machine outliner"));
static cl::opt<std::string>
PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
@@ -261,7 +264,8 @@ TargetPassConfig::~TargetPassConfig() {
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
: ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
- DisableVerify(false), EnableTailMerge(true) {
+ DisableVerify(false), EnableTailMerge(true),
+ RequireCodeGenSCCOrder(false) {
Impl = new PassConfigImpl();
@@ -279,6 +283,9 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
if (StringRef(PrintMachineInstrs.getValue()).equals(""))
TM->Options.PrintMachineCode = true;
+
+ if (TM->Options.EnableIPRA)
+ setRequiresCodeGenSCCOrder();
}
CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
@@ -531,7 +538,7 @@ void TargetPassConfig::addISelPrepare() {
addPreISel();
// Force codegen to run according to the callgraph.
- if (TM->Options.EnableIPRA)
+ if (requiresCodeGenSCCOrder())
addPass(new DummyCGSCCPass);
// Add both the safe stack and the stack protection passes: each of them will
@@ -668,9 +675,15 @@ void TargetPassConfig::addMachinePasses() {
addPass(&StackMapLivenessID, false);
addPass(&LiveDebugValuesID, false);
+ // Insert before XRay Instrumentation.
+ addPass(&FEntryInserterID, false);
+
addPass(&XRayInstrumentationID, false);
addPass(&PatchableFunctionID, false);
+ if (EnableMachineOutliner)
+ PM->add(createMachineOutlinerPass());
+
AddingMachinePasses = false;
}
@@ -704,6 +717,10 @@ void TargetPassConfig::addMachineSSAOptimization() {
addPass(&MachineLICMID, false);
addPass(&MachineCSEID, false);
+
+ // Coalesce basic blocks with the same branch condition
+ addPass(&BranchCoalescingID);
+
addPass(&MachineSinkingID);
addPass(&PeepholeOptimizerID);
@@ -730,7 +747,7 @@ MachinePassRegistry RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
-LLVM_DEFINE_ONCE_FLAG(InitializeDefaultRegisterAllocatorFlag);
+static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
static RegisterRegAlloc
defaultRegAlloc("default",
@@ -903,6 +920,11 @@ void TargetPassConfig::addBlockPlacement() {
//===---------------------------------------------------------------------===//
/// GlobalISel Configuration
//===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::isGlobalISelEnabled() const {
+ return false;
+}
+
bool TargetPassConfig::isGlobalISelAbortEnabled() const {
return EnableGlobalISelAbort == 1;
}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index cd50c5b6571d..66cdad278e8d 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -155,8 +155,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
// Pick the most sub register class of the right type that contains
// this physreg.
const TargetRegisterClass* BestRC = nullptr;
- for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
- const TargetRegisterClass* RC = *I;
+ for (const TargetRegisterClass* RC : regclasses()) {
if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
(!BestRC || BestRC->hasSubClass(RC)))
BestRC = RC;
@@ -185,10 +184,9 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
if (SubClass)
getAllocatableSetForRC(MF, SubClass, Allocatable);
} else {
- for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
- E = regclass_end(); I != E; ++I)
- if ((*I)->isAllocatable())
- getAllocatableSetForRC(MF, *I, Allocatable);
+ for (const TargetRegisterClass *C : regclasses())
+ if (C->isAllocatable())
+ getAllocatableSetForRC(MF, C, Allocatable);
}
// Mask out the reserved registers
@@ -415,9 +413,9 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void
-TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
- const TargetRegisterInfo *TRI) {
+LLVM_DUMP_METHOD
+void TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
+ const TargetRegisterInfo *TRI) {
dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n";
}
#endif
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 83e52d335354..0df34ce43112 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===//
+//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,12 +12,22 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -37,13 +47,14 @@ bool TargetSchedModel::hasInstrItineraries() const {
static unsigned gcd(unsigned Dividend, unsigned Divisor) {
// Dividend and Divisor will be naturally swapped as needed.
- while(Divisor) {
+ while (Divisor) {
unsigned Rem = Dividend % Divisor;
Dividend = Divisor;
Divisor = Rem;
};
return Dividend;
}
+
static unsigned lcm(unsigned A, unsigned B) {
unsigned LCM = (uint64_t(A) * B) / gcd(A, B);
assert((LCM >= A && LCM >= B) && "LCM overflow");
@@ -73,6 +84,29 @@ void TargetSchedModel::init(const MCSchedModel &sm,
}
}
+/// Returns true only if instruction is specified as single issue.
+bool TargetSchedModel::mustBeginGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->BeginGroup;
+ }
+ return false;
+}
+
+bool TargetSchedModel::mustEndGroup(const MachineInstr *MI,
+ const MCSchedClassDesc *SC) const {
+ if (hasInstrSchedModel()) {
+ if (!SC)
+ SC = resolveSchedClass(MI);
+ if (SC->isValid())
+ return SC->EndGroup;
+ }
+ return false;
+}
+
unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI,
const MCSchedClassDesc *SC) const {
if (hasInstrItineraries()) {
@@ -100,7 +134,6 @@ static unsigned capLatency(int Cycles) {
/// evaluation of predicates that depend on instruction operands or flags.
const MCSchedClassDesc *TargetSchedModel::
resolveSchedClass(const MachineInstr *MI) const {
-
// Get the definition's scheduling class descriptor from this machine model.
unsigned SchedClass = MI->getDesc().getSchedClass();
const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
@@ -244,7 +277,11 @@ unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
if (SCDesc->isValid() && !SCDesc->isVariant())
return computeInstrLatency(*SCDesc);
- llvm_unreachable("No MI sched latency");
+ if (SCDesc->isValid()) {
+ assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()");
+ return computeInstrLatency(*SCDesc);
+ }
+ return 0;
}
unsigned
@@ -298,3 +335,68 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
}
return 0;
}
+
+static Optional<double>
+getRTroughputFromItineraries(unsigned schedClass,
+ const InstrItineraryData *IID){
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const InstrStage *IS = IID->beginStage(schedClass),
+ *E = IID->endStage(schedClass);
+ IS != E; ++IS) {
+ unsigned Cycles = IS->getCycles();
+ if (!Cycles)
+ continue;
+ Throughput =
+ std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+static Optional<double>
+getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
+ const TargetSubtargetInfo *STI,
+ const MCSchedModel &SchedModel) {
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
+ *WEnd = STI->getWriteProcResEnd(SCDesc);
+ WPR != WEnd; ++WPR) {
+ unsigned Cycles = WPR->Cycles;
+ if (!Cycles)
+ return Optional<double>();
+
+ unsigned NumUnits =
+ SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
+ Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const {
+ if (hasInstrItineraries())
+ return getRTroughputFromItineraries(MI->getDesc().getSchedClass(),
+ getInstrItineraries());
+ if (hasInstrSchedModel())
+ return getRTroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
+ SchedModel);
+ return Optional<double>();
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const {
+ unsigned SchedClass = TII->get(Opcode).getSchedClass();
+ if (hasInstrItineraries())
+ return getRTroughputFromItineraries(SchedClass, getInstrItineraries());
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (SCDesc->isValid() && !SCDesc->isVariant())
+ return getRTroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
+ }
+ return Optional<double>();
+}
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index c74707d95b9e..0a444e0fff07 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -52,3 +55,46 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
bool TargetSubtargetInfo::useAA() const {
return false;
}
+
+static std::string createSchedInfoStr(unsigned Latency,
+ Optional<double> RThroughput) {
+ static const char *SchedPrefix = " sched: [";
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ if (Latency > 0 && RThroughput.hasValue())
+ CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue())
+ << "]";
+ else if (Latency > 0)
+ CS << SchedPrefix << Latency << ":?]";
+ else if (RThroughput.hasValue())
+ CS << SchedPrefix << "?:" << RThroughput.getValue() << "]";
+ CS.flush();
+ return Comment;
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
+ if (MI.isPseudo() || MI.isTerminator())
+ return std::string();
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ unsigned Latency = TSchedModel.computeInstrLatency(&MI);
+ Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI);
+ return createSchedInfoStr(Latency, RThroughput);
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ if (!TSchedModel.hasInstrSchedModel())
+ return std::string();
+ unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
+ Optional<double> RThroughput =
+ TSchedModel.computeInstrRThroughput(MCI.getOpcode());
+ return createSchedInfoStr(Latency, RThroughput);
+}
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0f1b2ed994b7..75359fe3c0ea 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -905,7 +905,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
++End;
}
- // Check if the reschedule will not break depedencies.
+ // Check if the reschedule will not break dependencies.
unsigned NumVisited = 0;
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
@@ -1785,7 +1785,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr *CopyMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
TII->get(TargetOpcode::COPY))
.addReg(DstReg, RegState::Define, SubIdx)
- .addOperand(UseMO);
+ .add(UseMO);
// The first def needs an <undef> flag because there is no live register
// before it.
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 0d506d646659..c8946010e9d1 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -167,6 +167,7 @@ class VirtRegRewriter : public MachineFunctionPass {
bool readsUndefSubreg(const MachineOperand &MO) const;
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
+ void expandCopyBundle(MachineInstr &MI) const;
public:
static char ID;
@@ -367,11 +368,41 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
}
if (Indexes)
- Indexes->removeMachineInstrFromMaps(MI);
- MI.eraseFromParent();
+ Indexes->removeSingleMachineInstrFromMaps(MI);
+ MI.eraseFromBundle();
DEBUG(dbgs() << " deleted.\n");
}
+/// The liverange splitting logic sometimes produces bundles of copies when
+/// subregisters are involved. Expand these into a sequence of copy instructions
+/// after processing the last in the bundle. Does not update LiveIntervals
+/// which we shouldn't need for this instruction anymore.
+void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
+ if (!MI.isCopy())
+ return;
+
+ if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) {
+ // Only do this when the complete bundle is made out of COPYs.
+ MachineBasicBlock &MBB = *MI.getParent();
+ for (MachineBasicBlock::reverse_instr_iterator I =
+ std::next(MI.getReverseIterator()), E = MBB.instr_rend();
+ I != E && I->isBundledWithSucc(); ++I) {
+ if (!I->isCopy())
+ return;
+ }
+
+ for (MachineBasicBlock::reverse_instr_iterator I = MI.getReverseIterator();
+ I->isBundledWithPred(); ) {
+ MachineInstr &MI = *I;
+ ++I;
+
+ MI.unbundleFromPred();
+ if (Indexes)
+ Indexes->insertMachineInstrInMaps(MI);
+ }
+ }
+}
+
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
@@ -431,12 +462,14 @@ void VirtRegRewriter::rewrite() {
}
}
- // The <def,undef> flag only makes sense for sub-register defs, and
- // we are substituting a full physreg. An <imp-use,kill> operand
- // from the SuperKills list will represent the partial read of the
- // super-register.
- if (MO.isDef())
+ // The <def,undef> and <def,internal> flags only make sense for
+ // sub-register defs, and we are substituting a full physreg. An
+ // <imp-use,kill> operand from the SuperKills list will represent the
+ // partial read of the super-register.
+ if (MO.isDef()) {
MO.setIsUndef(false);
+ MO.setIsInternalRead(false);
+ }
// PhysReg operands cannot have subregister indexes.
PhysReg = TRI->getSubReg(PhysReg, SubReg);
@@ -461,6 +494,8 @@ void VirtRegRewriter::rewrite() {
DEBUG(dbgs() << "> " << *MI);
+ expandCopyBundle(*MI);
+
// We can remove identity copies right now.
handleIdentityCopy(*MI);
}
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 568720c66e55..ae07e8b2fa03 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -86,6 +86,7 @@ private:
// All fields are reset by runOnFunction.
EHPersonality Personality = EHPersonality::Unknown;
+ const DataLayout *DL = nullptr;
DenseMap<BasicBlock *, ColorVector> BlockColors;
MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
};
@@ -111,6 +112,7 @@ bool WinEHPrepare::runOnFunction(Function &Fn) {
if (!isFuncletEHPersonality(Personality))
return false;
+ DL = &Fn.getParent()->getDataLayout();
return prepareExplicitEH(Fn);
}
@@ -1070,7 +1072,7 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
if (!isa<TerminatorInst>(EHPad)) {
// If the EHPad isn't a terminator, then we can insert a load in this block
// that will dominate all uses.
- SpillSlot = new AllocaInst(PN->getType(), nullptr,
+ SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(PN->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
@@ -1157,7 +1159,7 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
Function &F) {
// Lazilly create the spill slot.
if (!SpillSlot)
- SpillSlot = new AllocaInst(V->getType(), nullptr,
+ SpillSlot = new AllocaInst(V->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(V->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 63bd762eeb2b..7d2848bdc13b 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -81,7 +81,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
.addImm(T.getOpcode());
for (auto &MO : T.operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
Terminators.push_back(&T);
}
}
@@ -157,6 +157,11 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::arm:
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
+ case Triple::ArchType::ppc64le:
+ case Triple::ArchType::mips:
+ case Triple::ArchType::mipsel:
+ case Triple::ArchType::mips64:
+ case Triple::ArchType::mips64el:
// For the architectures which don't have a single return instruction
prependRetWithPatchableExit(MF, TII);
break;
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
index f9bff86b41c8..6e9214d72adc 100644
--- a/lib/DebugInfo/CodeView/CMakeLists.txt
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -5,16 +5,17 @@ add_llvm_library(LLVMDebugInfoCodeView
CVTypeDumper.cpp
CVTypeVisitor.cpp
EnumTables.cpp
+ Formatters.cpp
Line.cpp
ModuleSubstream.cpp
ModuleSubstreamVisitor.cpp
RecordSerialization.cpp
SymbolRecordMapping.cpp
SymbolDumper.cpp
+ SymbolSerializer.cpp
TypeDatabase.cpp
TypeDatabaseVisitor.cpp
TypeDumpVisitor.cpp
- TypeRecord.cpp
TypeRecordMapping.cpp
TypeSerializer.cpp
TypeStreamMerger.cpp
diff --git a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
index 75cfd0dd184e..4c78caf03477 100644
--- a/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -11,20 +11,11 @@
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/Support/BinaryByteStream.h"
using namespace llvm;
using namespace llvm::codeview;
-template <typename T>
-static Error takeObject(ArrayRef<uint8_t> &Data, const T *&Res) {
- if (Data.size() < sizeof(*Res))
- return llvm::make_error<CodeViewError>(cv_error_code::insufficient_buffer);
- Res = reinterpret_cast<const T *>(Data.data());
- Data = Data.drop_front(sizeof(*Res));
- return Error::success();
-}
-
CVSymbolVisitor::CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks)
: Callbacks(Callbacks) {}
diff --git a/lib/DebugInfo/CodeView/CVTypeDumper.cpp b/lib/DebugInfo/CodeView/CVTypeDumper.cpp
index fcd239cce0dd..bcc8218d9446 100644
--- a/lib/DebugInfo/CodeView/CVTypeDumper.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeDumper.cpp
@@ -14,7 +14,7 @@
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/Support/BinaryByteStream.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -28,6 +28,8 @@ Error CVTypeDumper::dump(const CVType &Record, TypeVisitorCallbacks &Dumper) {
Pipeline.addCallbackToPipeline(Dumper);
CVTypeVisitor Visitor(Pipeline);
+ if (Handler)
+ Visitor.addTypeServerHandler(*Handler);
CVType RecordCopy = Record;
if (auto EC = Visitor.visitTypeRecord(RecordCopy))
@@ -45,6 +47,8 @@ Error CVTypeDumper::dump(const CVTypeArray &Types,
Pipeline.addCallbackToPipeline(Dumper);
CVTypeVisitor Visitor(Pipeline);
+ if (Handler)
+ Visitor.addTypeServerHandler(*Handler);
if (auto EC = Visitor.visitTypeStream(Types))
return EC;
@@ -52,9 +56,9 @@ Error CVTypeDumper::dump(const CVTypeArray &Types,
}
Error CVTypeDumper::dump(ArrayRef<uint8_t> Data, TypeVisitorCallbacks &Dumper) {
- msf::ByteStream Stream(Data);
+ BinaryByteStream Stream(Data, llvm::support::little);
CVTypeArray Types;
- msf::StreamReader Reader(Stream);
+ BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readArray(Types, Reader.getLength()))
return EC;
diff --git a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index 5171e24f3aac..0069ee3cc904 100644
--- a/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -10,9 +10,14 @@
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
+#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
+#include "llvm/DebugInfo/CodeView/TypeServerHandler.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -21,7 +26,8 @@ CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
: Callbacks(Callbacks) {}
template <typename T>
-static Error visitKnownRecord(CVType &Record, TypeVisitorCallbacks &Callbacks) {
+static Error visitKnownRecord(CVTypeVisitor &Visitor, CVType &Record,
+ TypeVisitorCallbacks &Callbacks) {
TypeRecordKind RK = static_cast<TypeRecordKind>(Record.Type);
T KnownRecord(RK);
if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord))
@@ -39,7 +45,58 @@ static Error visitKnownMember(CVMemberRecord &Record,
return Error::success();
}
+static Expected<TypeServer2Record> deserializeTypeServerRecord(CVType &Record) {
+ class StealTypeServerVisitor : public TypeVisitorCallbacks {
+ public:
+ explicit StealTypeServerVisitor(TypeServer2Record &TR) : TR(TR) {}
+
+ Error visitKnownRecord(CVType &CVR, TypeServer2Record &Record) override {
+ TR = Record;
+ return Error::success();
+ }
+
+ private:
+ TypeServer2Record &TR;
+ };
+
+ TypeServer2Record R(TypeRecordKind::TypeServer2);
+ TypeDeserializer Deserializer;
+ StealTypeServerVisitor Thief(R);
+ TypeVisitorCallbackPipeline Pipeline;
+ Pipeline.addCallbackToPipeline(Deserializer);
+ Pipeline.addCallbackToPipeline(Thief);
+ CVTypeVisitor Visitor(Pipeline);
+ if (auto EC = Visitor.visitTypeRecord(Record))
+ return std::move(EC);
+
+ return R;
+}
+
+void CVTypeVisitor::addTypeServerHandler(TypeServerHandler &Handler) {
+ Handlers.push_back(&Handler);
+}
+
Error CVTypeVisitor::visitTypeRecord(CVType &Record) {
+ if (Record.Type == TypeLeafKind::LF_TYPESERVER2 && !Handlers.empty()) {
+ auto TS = deserializeTypeServerRecord(Record);
+ if (!TS)
+ return TS.takeError();
+
+ for (auto Handler : Handlers) {
+ auto ExpectedResult = Handler->handle(*TS, Callbacks);
+ // If there was an error, return the error.
+ if (!ExpectedResult)
+ return ExpectedResult.takeError();
+
+ // If the handler processed the record, return success.
+ if (*ExpectedResult)
+ return Error::success();
+
+ // Otherwise keep searching for a handler, eventually falling out and
+ // using the default record handler.
+ }
+ }
+
if (auto EC = Callbacks.visitTypeBegin(Record))
return EC;
@@ -50,7 +107,7 @@ Error CVTypeVisitor::visitTypeRecord(CVType &Record) {
break;
#define TYPE_RECORD(EnumName, EnumVal, Name) \
case EnumName: { \
- if (auto EC = visitKnownRecord<Name##Record>(Record, Callbacks)) \
+ if (auto EC = visitKnownRecord<Name##Record>(*this, Record, Callbacks)) \
return EC; \
break; \
}
@@ -109,7 +166,15 @@ Error CVTypeVisitor::visitTypeStream(const CVTypeArray &Types) {
return Error::success();
}
-Error CVTypeVisitor::visitFieldListMemberStream(msf::StreamReader Reader) {
+Error CVTypeVisitor::visitTypeStream(CVTypeRange Types) {
+ for (auto I : Types) {
+ if (auto EC = visitTypeRecord(I))
+ return EC;
+ }
+ return Error::success();
+}
+
+Error CVTypeVisitor::visitFieldListMemberStream(BinaryStreamReader Reader) {
FieldListDeserializer Deserializer(Reader);
TypeVisitorCallbackPipeline Pipeline;
Pipeline.addCallbackToPipeline(Deserializer);
@@ -130,7 +195,7 @@ Error CVTypeVisitor::visitFieldListMemberStream(msf::StreamReader Reader) {
}
Error CVTypeVisitor::visitFieldListMemberStream(ArrayRef<uint8_t> Data) {
- msf::ByteStream S(Data);
- msf::StreamReader SR(S);
+ BinaryByteStream S(Data, llvm::support::little);
+ BinaryStreamReader SR(S);
return visitFieldListMemberStream(SR);
}
diff --git a/lib/DebugInfo/CodeView/CodeViewError.cpp b/lib/DebugInfo/CodeView/CodeViewError.cpp
index 55c10c076eef..8de266b836b4 100644
--- a/lib/DebugInfo/CodeView/CodeViewError.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewError.cpp
@@ -31,6 +31,8 @@ public:
"bytes.";
case cv_error_code::corrupt_record:
return "The CodeView record is corrupted.";
+ case cv_error_code::no_records:
+ return "There are no records";
case cv_error_code::operation_unsupported:
return "The requested operation is not supported.";
case cv_error_code::unknown_member_record:
diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 9bd85cf9dc68..282e3103adc9 100644
--- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -10,8 +10,8 @@
#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -145,10 +145,10 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
if (isWriting()) {
// Truncate if we attempt to write too much.
StringRef S = Value.take_front(maxFieldLength() - 1);
- if (auto EC = Writer->writeZeroString(S))
+ if (auto EC = Writer->writeCString(S))
return EC;
} else {
- if (auto EC = Reader->readZeroString(Value))
+ if (auto EC = Reader->readCString(Value))
return EC;
}
return Error::success();
@@ -176,7 +176,7 @@ Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
if (auto EC = mapStringZ(V))
return EC;
}
- if (auto EC = Writer->writeInteger(uint8_t(0)))
+ if (auto EC = Writer->writeInteger<uint8_t>(0))
return EC;
} else {
StringRef S;
@@ -194,22 +194,22 @@ Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) {
assert(Value < 0 && "Encoded integer is not signed!");
if (Value >= std::numeric_limits<int8_t>::min()) {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_CHAR)))
+ if (auto EC = Writer->writeInteger<uint16_t>(LF_CHAR))
return EC;
- if (auto EC = Writer->writeInteger(static_cast<int8_t>(Value)))
+ if (auto EC = Writer->writeInteger<int8_t>(Value))
return EC;
} else if (Value >= std::numeric_limits<int16_t>::min()) {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_SHORT)))
+ if (auto EC = Writer->writeInteger<uint16_t>(LF_SHORT))
return EC;
- if (auto EC = Writer->writeInteger(static_cast<int16_t>(Value)))
+ if (auto EC = Writer->writeInteger<int16_t>(Value))
return EC;
} else if (Value >= std::numeric_limits<int32_t>::min()) {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_LONG)))
+ if (auto EC = Writer->writeInteger<uint16_t>(LF_LONG))
return EC;
- if (auto EC = Writer->writeInteger(static_cast<int32_t>(Value)))
+ if (auto EC = Writer->writeInteger<int32_t>(Value))
return EC;
} else {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_QUADWORD)))
+ if (auto EC = Writer->writeInteger<uint16_t>(LF_QUADWORD))
return EC;
if (auto EC = Writer->writeInteger(Value))
return EC;
@@ -219,20 +219,20 @@ Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) {
Error CodeViewRecordIO::writeEncodedUnsignedInteger(const uint64_t &Value) {
if (Value < LF_NUMERIC) {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(Value)))
+ if (auto EC = Writer->writeInteger<uint16_t>(Value))
return EC;
} else if (Value <= std::numeric_limits<uint16_t>::max()) {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_USHORT)))
+ if (auto EC = Writer->writeInteger<uint16_t>(LF_USHORT))
return EC;
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(Value)))
+ if (auto EC = Writer->writeInteger<uint16_t>(Value))
return EC;
} else if (Value <= std::numeric_limits<uint32_t>::max()) {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_ULONG)))
+ if (auto EC = Writer->writeInteger<uint16_t>(LF_ULONG))
return EC;
- if (auto EC = Writer->writeInteger(static_cast<uint32_t>(Value)))
+ if (auto EC = Writer->writeInteger<uint32_t>(Value))
return EC;
} else {
- if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_UQUADWORD)))
+ if (auto EC = Writer->writeInteger<uint16_t>(LF_UQUADWORD))
return EC;
if (auto EC = Writer->writeInteger(Value))
return EC;
diff --git a/lib/DebugInfo/CodeView/Formatters.cpp b/lib/DebugInfo/CodeView/Formatters.cpp
new file mode 100644
index 000000000000..ef00bd8570fa
--- /dev/null
+++ b/lib/DebugInfo/CodeView/Formatters.cpp
@@ -0,0 +1,37 @@
+//===- Formatters.cpp -------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/Formatters.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::codeview::detail;
+
+GuidAdapter::GuidAdapter(StringRef Guid)
+ : FormatAdapter(makeArrayRef(Guid.bytes_begin(), Guid.bytes_end())) {}
+
+GuidAdapter::GuidAdapter(ArrayRef<uint8_t> Guid)
+ : FormatAdapter(std::move(Guid)) {}
+
+void GuidAdapter::format(llvm::raw_ostream &Stream, StringRef Style) {
+ static const char *Lookup = "0123456789ABCDEF";
+
+ assert(Item.size() == 16 && "Expected 16-byte GUID");
+ Stream << "{";
+ for (int i = 0; i < 16;) {
+ uint8_t Byte = Item[i];
+ uint8_t HighNibble = (Byte >> 4) & 0xF;
+ uint8_t LowNibble = Byte & 0xF;
+ Stream << Lookup[HighNibble] << Lookup[LowNibble];
+ ++i;
+ if (i >= 4 && i <= 10 && i % 2 == 0)
+ Stream << "-";
+ }
+ Stream << "}";
+}
diff --git a/lib/DebugInfo/CodeView/ModuleSubstream.cpp b/lib/DebugInfo/CodeView/ModuleSubstream.cpp
index 768ebaa1c980..69a7c59116cf 100644
--- a/lib/DebugInfo/CodeView/ModuleSubstream.cpp
+++ b/lib/DebugInfo/CodeView/ModuleSubstream.cpp
@@ -9,22 +9,20 @@
#include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/Support/BinaryStreamReader.h"
using namespace llvm;
using namespace llvm::codeview;
-using namespace llvm::msf;
ModuleSubstream::ModuleSubstream() : Kind(ModuleSubstreamKind::None) {}
-ModuleSubstream::ModuleSubstream(ModuleSubstreamKind Kind,
- ReadableStreamRef Data)
+ModuleSubstream::ModuleSubstream(ModuleSubstreamKind Kind, BinaryStreamRef Data)
: Kind(Kind), Data(Data) {}
-Error ModuleSubstream::initialize(ReadableStreamRef Stream,
+Error ModuleSubstream::initialize(BinaryStreamRef Stream,
ModuleSubstream &Info) {
const ModuleSubsectionHeader *Header;
- StreamReader Reader(Stream);
+ BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readObject(Header))
return EC;
@@ -42,4 +40,4 @@ uint32_t ModuleSubstream::getRecordLength() const {
ModuleSubstreamKind ModuleSubstream::getSubstreamKind() const { return Kind; }
-ReadableStreamRef ModuleSubstream::getRecordData() const { return Data; }
+BinaryStreamRef ModuleSubstream::getRecordData() const { return Data; }
diff --git a/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp b/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
index 524793277980..e490a78cadbc 100644
--- a/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
+++ b/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
@@ -8,54 +8,52 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/MSF/StreamRef.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
using namespace llvm;
using namespace llvm::codeview;
-using namespace llvm::msf;
-Error IModuleSubstreamVisitor::visitSymbols(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitSymbols(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::Symbols, Data);
}
-Error IModuleSubstreamVisitor::visitLines(ReadableStreamRef Data,
+Error IModuleSubstreamVisitor::visitLines(BinaryStreamRef Data,
const LineSubstreamHeader *Header,
const LineInfoArray &Lines) {
return visitUnknown(ModuleSubstreamKind::Lines, Data);
}
-Error IModuleSubstreamVisitor::visitStringTable(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitStringTable(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::StringTable, Data);
}
Error IModuleSubstreamVisitor::visitFileChecksums(
- ReadableStreamRef Data, const FileChecksumArray &Checksums) {
+ BinaryStreamRef Data, const FileChecksumArray &Checksums) {
return visitUnknown(ModuleSubstreamKind::FileChecksums, Data);
}
-Error IModuleSubstreamVisitor::visitFrameData(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitFrameData(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::FrameData, Data);
}
-Error IModuleSubstreamVisitor::visitInlineeLines(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitInlineeLines(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::InlineeLines, Data);
}
-Error IModuleSubstreamVisitor::visitCrossScopeImports(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitCrossScopeImports(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::CrossScopeExports, Data);
}
-Error IModuleSubstreamVisitor::visitCrossScopeExports(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitCrossScopeExports(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::CrossScopeImports, Data);
}
-Error IModuleSubstreamVisitor::visitILLines(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitILLines(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::ILLines, Data);
}
-Error IModuleSubstreamVisitor::visitFuncMDTokenMap(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitFuncMDTokenMap(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::FuncMDTokenMap, Data);
}
-Error IModuleSubstreamVisitor::visitTypeMDTokenMap(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitTypeMDTokenMap(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::TypeMDTokenMap, Data);
}
-Error IModuleSubstreamVisitor::visitMergedAssemblyInput(
- ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitMergedAssemblyInput(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::MergedAssemblyInput, Data);
}
-Error IModuleSubstreamVisitor::visitCoffSymbolRVA(ReadableStreamRef Data) {
+Error IModuleSubstreamVisitor::visitCoffSymbolRVA(BinaryStreamRef Data) {
return visitUnknown(ModuleSubstreamKind::CoffSymbolRVA, Data);
}
@@ -65,7 +63,7 @@ Error llvm::codeview::visitModuleSubstream(const ModuleSubstream &R,
case ModuleSubstreamKind::Symbols:
return V.visitSymbols(R.getRecordData());
case ModuleSubstreamKind::Lines: {
- StreamReader Reader(R.getRecordData());
+ BinaryStreamReader Reader(R.getRecordData());
const LineSubstreamHeader *Header;
if (auto EC = Reader.readObject(Header))
return EC;
@@ -78,7 +76,7 @@ Error llvm::codeview::visitModuleSubstream(const ModuleSubstream &R,
case ModuleSubstreamKind::StringTable:
return V.visitStringTable(R.getRecordData());
case ModuleSubstreamKind::FileChecksums: {
- StreamReader Reader(R.getRecordData());
+ BinaryStreamReader Reader(R.getRecordData());
FileChecksumArray Checksums;
if (auto EC = Reader.readArray(Checksums, Reader.bytesRemaining()))
return EC;
diff --git a/lib/DebugInfo/CodeView/RecordSerialization.cpp b/lib/DebugInfo/CodeView/RecordSerialization.cpp
index 6f29caa9bbfc..6446670f60d8 100644
--- a/lib/DebugInfo/CodeView/RecordSerialization.cpp
+++ b/lib/DebugInfo/CodeView/RecordSerialization.cpp
@@ -16,7 +16,7 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/Support/BinaryByteStream.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -33,7 +33,7 @@ StringRef llvm::codeview::getBytesAsCString(ArrayRef<uint8_t> LeafData) {
return getBytesAsCharacters(LeafData).split('\0').first;
}
-Error llvm::codeview::consume(msf::StreamReader &Reader, APSInt &Num) {
+Error llvm::codeview::consume(BinaryStreamReader &Reader, APSInt &Num) {
// Used to avoid overload ambiguity on APInt construtor.
bool FalseVal = false;
uint16_t Short;
@@ -103,15 +103,15 @@ Error llvm::codeview::consume(msf::StreamReader &Reader, APSInt &Num) {
Error llvm::codeview::consume(StringRef &Data, APSInt &Num) {
ArrayRef<uint8_t> Bytes(Data.bytes_begin(), Data.bytes_end());
- msf::ByteStream S(Bytes);
- msf::StreamReader SR(S);
+ BinaryByteStream S(Bytes, llvm::support::little);
+ BinaryStreamReader SR(S);
auto EC = consume(SR, Num);
Data = Data.take_back(SR.bytesRemaining());
return EC;
}
/// Decode a numeric leaf value that is known to be a uint64_t.
-Error llvm::codeview::consume_numeric(msf::StreamReader &Reader,
+Error llvm::codeview::consume_numeric(BinaryStreamReader &Reader,
uint64_t &Num) {
APSInt N;
if (auto EC = consume(Reader, N))
@@ -123,27 +123,27 @@ Error llvm::codeview::consume_numeric(msf::StreamReader &Reader,
return Error::success();
}
-Error llvm::codeview::consume(msf::StreamReader &Reader, uint32_t &Item) {
+Error llvm::codeview::consume(BinaryStreamReader &Reader, uint32_t &Item) {
return Reader.readInteger(Item);
}
Error llvm::codeview::consume(StringRef &Data, uint32_t &Item) {
ArrayRef<uint8_t> Bytes(Data.bytes_begin(), Data.bytes_end());
- msf::ByteStream S(Bytes);
- msf::StreamReader SR(S);
+ BinaryByteStream S(Bytes, llvm::support::little);
+ BinaryStreamReader SR(S);
auto EC = consume(SR, Item);
Data = Data.take_back(SR.bytesRemaining());
return EC;
}
-Error llvm::codeview::consume(msf::StreamReader &Reader, int32_t &Item) {
+Error llvm::codeview::consume(BinaryStreamReader &Reader, int32_t &Item) {
return Reader.readInteger(Item);
}
-Error llvm::codeview::consume(msf::StreamReader &Reader, StringRef &Item) {
+Error llvm::codeview::consume(BinaryStreamReader &Reader, StringRef &Item) {
if (Reader.empty())
return make_error<CodeViewError>(cv_error_code::corrupt_record,
"Null terminated string buffer is empty!");
- return Reader.readZeroString(Item);
+ return Reader.readCString(Item);
}
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index fd54fba13c76..134471e81cac 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -468,8 +468,8 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
for (auto &Annotation : InlineSite.annotations()) {
switch (Annotation.OpCode) {
case BinaryAnnotationsOpCode::Invalid:
- return llvm::make_error<CodeViewError>(
- "Invalid binary annotation opcode!");
+ W.printString("(Annotation Padding)");
+ break;
case BinaryAnnotationsOpCode::CodeOffset:
case BinaryAnnotationsOpCode::ChangeCodeOffset:
case BinaryAnnotationsOpCode::ChangeCodeLength:
diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
new file mode 100644
index 000000000000..251cc431f52b
--- /dev/null
+++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -0,0 +1,52 @@
+//===- SymbolSerializer.cpp -------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator)
+ : Storage(Allocator), RecordBuffer(MaxRecordLength), Stream(RecordBuffer, llvm::support::little),
+ Writer(Stream), Mapping(Writer) { }
+
+Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
+ assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!");
+
+ Writer.setOffset(0);
+
+ if (auto EC = writeRecordPrefix(Record.kind()))
+ return EC;
+
+ CurrentSymbol = Record.kind();
+ if (auto EC = Mapping.visitSymbolBegin(Record))
+ return EC;
+
+ return Error::success();
+}
+
+Error SymbolSerializer::visitSymbolEnd(CVSymbol &Record) {
+ assert(CurrentSymbol.hasValue() && "Not in a symbol mapping!");
+
+ if (auto EC = Mapping.visitSymbolEnd(Record))
+ return EC;
+
+ uint32_t RecordEnd = Writer.getOffset();
+ uint16_t Length = RecordEnd - 2;
+ Writer.setOffset(0);
+ if (auto EC = Writer.writeInteger(Length))
+ return EC;
+
+ uint8_t *StableStorage = Storage.Allocate<uint8_t>(RecordEnd);
+ ::memcpy(StableStorage, &RecordBuffer[0], RecordEnd);
+ Record.RecordData = ArrayRef<uint8_t>(StableStorage, RecordEnd);
+ CurrentSymbol.reset();
+
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/TypeDatabase.cpp b/lib/DebugInfo/CodeView/TypeDatabase.cpp
index c7f72551dc8b..f9ded6ce2a86 100644
--- a/lib/DebugInfo/CodeView/TypeDatabase.cpp
+++ b/lib/DebugInfo/CodeView/TypeDatabase.cpp
@@ -71,7 +71,7 @@ TypeIndex TypeDatabase::getNextTypeIndex() const {
}
/// Records the name of a type, and reserves its type index.
-void TypeDatabase::recordType(StringRef Name, CVType Data) {
+void TypeDatabase::recordType(StringRef Name, const CVType &Data) {
CVUDTNames.push_back(Name);
TypeRecords.push_back(Data);
}
@@ -106,6 +106,10 @@ StringRef TypeDatabase::getTypeName(TypeIndex Index) const {
return "<unknown UDT>";
}
+const CVType &TypeDatabase::getTypeRecord(TypeIndex Index) const {
+ return TypeRecords[Index.getIndex() - TypeIndex::FirstNonSimpleIndex];
+}
+
bool TypeDatabase::containsTypeIndex(TypeIndex Index) const {
uint32_t I = Index.getIndex() - TypeIndex::FirstNonSimpleIndex;
return I < CVUDTNames.size();
diff --git a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp b/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp
index d9d563902182..c234afd2288b 100644
--- a/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDatabaseVisitor.cpp
@@ -83,6 +83,22 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ArgListRecord &Args) {
return Error::success();
}
+Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR,
+ StringListRecord &Strings) {
+ auto Indices = Strings.getIndices();
+ uint32_t Size = Indices.size();
+ SmallString<256> TypeName("\"");
+ for (uint32_t I = 0; I < Size; ++I) {
+ StringRef ArgTypeName = TypeDB.getTypeName(Indices[I]);
+ TypeName.append(ArgTypeName);
+ if (I + 1 != Size)
+ TypeName.append("\" \"");
+ }
+ TypeName.push_back('\"');
+ Name = TypeDB.saveTypeName(TypeName);
+ return Error::success();
+}
+
Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, ClassRecord &Class) {
Name = Class.getName();
return Error::success();
@@ -283,6 +299,10 @@ Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, BuildInfoRecord &BI) {
return Error::success();
}
+Error TypeDatabaseVisitor::visitKnownRecord(CVType &CVR, LabelRecord &R) {
+ return Error::success();
+}
+
Error TypeDatabaseVisitor::visitKnownMember(CVMemberRecord &CVR,
VFPtrRecord &VFP) {
return Error::success();
diff --git a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
index 033585ba8cc9..870d95221e7d 100644
--- a/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
+++ b/lib/DebugInfo/CodeView/TypeDumpVisitor.cpp
@@ -1,5 +1,4 @@
-//===-- TypeDumpVisitor.cpp - CodeView type info dumper -----------*- C++
-//-*-===//
+//===-- TypeDumpVisitor.cpp - CodeView type info dumper ----------*- C++-*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,13 +12,15 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/DebugInfo/CodeView/CVTypeDumper.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/Formatters.h"
#include "llvm/DebugInfo/CodeView/TypeDatabase.h"
#include "llvm/DebugInfo/CodeView/TypeDatabaseVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ScopedPrinter.h"
using namespace llvm;
@@ -145,6 +146,10 @@ static const EnumEntry<uint8_t> FunctionOptionEnum[] = {
ENUM_ENTRY(FunctionOptions, ConstructorWithVirtualBases),
};
+static const EnumEntry<uint16_t> LabelTypeEnum[] = {
+ ENUM_ENTRY(LabelType, Near), ENUM_ENTRY(LabelType, Far),
+};
+
#undef ENUM_ENTRY
static StringRef getLeafTypeName(TypeLeafKind LT) {
@@ -163,9 +168,14 @@ void TypeDumpVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI) const {
CVTypeDumper::printTypeIndex(*W, FieldName, TI, TypeDB);
}
+void TypeDumpVisitor::printItemIndex(StringRef FieldName, TypeIndex TI) const {
+ CVTypeDumper::printTypeIndex(*W, FieldName, TI, getSourceDB());
+}
+
Error TypeDumpVisitor::visitTypeBegin(CVType &Record) {
W->startLine() << getLeafTypeName(Record.Type);
- W->getOStream() << " (" << HexNumber(TypeDB.getNextTypeIndex().getIndex())
+ W->getOStream() << " ("
+ << HexNumber(getSourceDB().getNextTypeIndex().getIndex())
<< ")";
W->getOStream() << " {\n";
W->indent();
@@ -211,7 +221,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
}
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, StringIdRecord &String) {
- printTypeIndex("Id", String.getId());
+ printItemIndex("Id", String.getId());
W->printString("StringData", String.getString());
return Error::success();
}
@@ -227,6 +237,17 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, ArgListRecord &Args) {
return Error::success();
}
+Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, StringListRecord &Strs) {
+ auto Indices = Strs.getIndices();
+ uint32_t Size = Indices.size();
+ W->printNumber("NumStrings", Size);
+ ListScope Arguments(*W, "Strings");
+ for (uint32_t I = 0; I < Size; ++I) {
+ printTypeIndex("String", Indices[I]);
+ }
+ return Error::success();
+}
+
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, ClassRecord &Class) {
uint16_t Props = static_cast<uint16_t>(Class.getOptions());
W->printNumber("MemberCount", Class.getMemberCount());
@@ -329,14 +350,14 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
}
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, FuncIdRecord &Func) {
- printTypeIndex("ParentScope", Func.getParentScope());
+ printItemIndex("ParentScope", Func.getParentScope());
printTypeIndex("FunctionType", Func.getFunctionType());
W->printString("Name", Func.getName());
return Error::success();
}
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, TypeServer2Record &TS) {
- W->printBinary("Signature", TS.getGuid());
+ W->printString("Guid", formatv("{0}", fmt_guid(TS.getGuid())).str());
W->printNumber("Age", TS.getAge());
W->printString("Name", TS.getName());
return Error::success();
@@ -390,7 +411,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
UdtSourceLineRecord &Line) {
printTypeIndex("UDT", Line.getUDT());
- printTypeIndex("SourceFile", Line.getSourceFile());
+ printItemIndex("SourceFile", Line.getSourceFile());
W->printNumber("LineNumber", Line.getLineNumber());
return Error::success();
}
@@ -398,7 +419,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
Error TypeDumpVisitor::visitKnownRecord(CVType &CVR,
UdtModSourceLineRecord &Line) {
printTypeIndex("UDT", Line.getUDT());
- printTypeIndex("SourceFile", Line.getSourceFile());
+ printItemIndex("SourceFile", Line.getSourceFile());
W->printNumber("LineNumber", Line.getLineNumber());
W->printNumber("Module", Line.getModule());
return Error::success();
@@ -409,7 +430,7 @@ Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, BuildInfoRecord &Args) {
ListScope Arguments(*W, "Arguments");
for (auto Arg : Args.getArgs()) {
- printTypeIndex("ArgType", Arg);
+ printItemIndex("ArgType", Arg);
}
return Error::success();
}
@@ -530,3 +551,8 @@ Error TypeDumpVisitor::visitKnownMember(CVMemberRecord &CVR,
printTypeIndex("ContinuationIndex", Cont.getContinuationIndex());
return Error::success();
}
+
+Error TypeDumpVisitor::visitKnownRecord(CVType &CVR, LabelRecord &LR) {
+ W->printEnum("Mode", uint16_t(LR.Mode), makeArrayRef(LabelTypeEnum));
+ return Error::success();
+}
diff --git a/lib/DebugInfo/CodeView/TypeRecord.cpp b/lib/DebugInfo/CodeView/TypeRecord.cpp
deleted file mode 100644
index b951c068ca86..000000000000
--- a/lib/DebugInfo/CodeView/TypeRecord.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-//===-- TypeRecord.cpp ------------------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-//===----------------------------------------------------------------------===//
-// Type index remapping
-//===----------------------------------------------------------------------===//
-
-static bool remapIndex(ArrayRef<TypeIndex> IndexMap, TypeIndex &Idx) {
- // Simple types are unchanged.
- if (Idx.isSimple())
- return true;
- unsigned MapPos = Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
- if (MapPos < IndexMap.size()) {
- Idx = IndexMap[MapPos];
- return true;
- }
-
- // This type index is invalid. Remap this to "not translated by cvpack",
- // and return failure.
- Idx = TypeIndex(SimpleTypeKind::NotTranslated, SimpleTypeMode::Direct);
- return false;
-}
-
-bool ModifierRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, ModifiedType);
-}
-
-bool ProcedureRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, ReturnType);
- Success &= remapIndex(IndexMap, ArgumentList);
- return Success;
-}
-
-bool MemberFunctionRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, ReturnType);
- Success &= remapIndex(IndexMap, ClassType);
- Success &= remapIndex(IndexMap, ThisType);
- Success &= remapIndex(IndexMap, ArgumentList);
- return Success;
-}
-
-bool MemberFuncIdRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, ClassType);
- Success &= remapIndex(IndexMap, FunctionType);
- return Success;
-}
-
-bool ArgListRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- for (TypeIndex &Str : StringIndices)
- Success &= remapIndex(IndexMap, Str);
- return Success;
-}
-
-bool MemberPointerInfo::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, ContainingType);
-}
-
-bool PointerRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, ReferentType);
- if (isPointerToMember())
- Success &= MemberInfo->remapTypeIndices(IndexMap);
- return Success;
-}
-
-bool NestedTypeRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, Type);
-}
-
-bool ArrayRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, ElementType);
- Success &= remapIndex(IndexMap, IndexType);
- return Success;
-}
-
-bool TagRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, FieldList);
-}
-
-bool ClassRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= TagRecord::remapTypeIndices(IndexMap);
- Success &= remapIndex(IndexMap, DerivationList);
- Success &= remapIndex(IndexMap, VTableShape);
- return Success;
-}
-
-bool EnumRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= TagRecord::remapTypeIndices(IndexMap);
- Success &= remapIndex(IndexMap, UnderlyingType);
- return Success;
-}
-
-bool BitFieldRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, Type);
-}
-
-bool VFTableShapeRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return true;
-}
-
-bool TypeServer2Record::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return true;
-}
-
-bool StringIdRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, Id);
-}
-
-bool FuncIdRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, ParentScope);
- Success &= remapIndex(IndexMap, FunctionType);
- return Success;
-}
-
-bool UdtSourceLineRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, UDT);
- Success &= remapIndex(IndexMap, SourceFile);
- return Success;
-}
-
-bool UdtModSourceLineRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, UDT);
- Success &= remapIndex(IndexMap, SourceFile);
- return Success;
-}
-
-bool BuildInfoRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- for (TypeIndex &Arg : ArgIndices)
- Success &= remapIndex(IndexMap, Arg);
- return Success;
-}
-
-bool VFTableRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, CompleteClass);
- Success &= remapIndex(IndexMap, OverriddenVFTable);
- return Success;
-}
-
-bool OneMethodRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, Type);
- return Success;
-}
-
-bool MethodOverloadListRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- for (OneMethodRecord &Meth : Methods)
- if ((Success = Meth.remapTypeIndices(IndexMap)))
- return Success;
- return Success;
-}
-
-bool OverloadedMethodRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, MethodList);
-}
-
-bool DataMemberRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, Type);
-}
-
-bool StaticDataMemberRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, Type);
-}
-
-bool EnumeratorRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return true;
-}
-
-bool VFPtrRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, Type);
-}
-
-bool BaseClassRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, Type);
-}
-
-bool VirtualBaseClassRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- bool Success = true;
- Success &= remapIndex(IndexMap, BaseType);
- Success &= remapIndex(IndexMap, VBPtrType);
- return Success;
-}
-
-bool ListContinuationRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
- return remapIndex(IndexMap, ContinuationIndex);
-}
diff --git a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
index f46e08d55429..114f6fd2897e 100644
--- a/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -67,12 +67,9 @@ static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name,
error(IO.mapStringZ(N));
error(IO.mapStringZ(U));
} else {
- size_t BytesNeeded = Name.size() + 1;
- StringRef N = Name;
- if (BytesNeeded > BytesLeft) {
- size_t BytesToDrop = std::min(N.size(), BytesToDrop);
- N = N.drop_back(BytesToDrop);
- }
+ // Cap the length of the string at however many bytes we have available,
+ // plus one for the required null terminator.
+ auto N = StringRef(Name).take_front(BytesLeft - 1);
error(IO.mapStringZ(N));
}
} else {
@@ -174,6 +171,15 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArgListRecord &Record) {
error(IO.mapVectorN<uint32_t>(
+ Record.ArgIndices,
+ [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+
+ return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+ StringListRecord &Record) {
+ error(IO.mapVectorN<uint32_t>(
Record.StringIndices,
[](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
@@ -368,6 +374,14 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
TypeServer2Record &Record) {
+ error(IO.mapGuid(Record.Guid));
+ error(IO.mapInteger(Record.Age));
+ error(IO.mapStringZ(Record.Name));
+ return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) {
+ error(IO.mapEnum(Record.Mode));
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/TypeSerializer.cpp b/lib/DebugInfo/CodeView/TypeSerializer.cpp
index f24fcff86274..fd4d1853fa54 100644
--- a/lib/DebugInfo/CodeView/TypeSerializer.cpp
+++ b/lib/DebugInfo/CodeView/TypeSerializer.cpp
@@ -9,7 +9,7 @@
#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
+#include "llvm/Support/BinaryStreamWriter.h"
#include <string.h>
@@ -85,7 +85,8 @@ TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage)
: RecordStorage(Storage), LastTypeIndex(),
- RecordBuffer(MaxRecordLength * 2), Stream(RecordBuffer), Writer(Stream),
+ RecordBuffer(MaxRecordLength * 2),
+ Stream(RecordBuffer, llvm::support::little), Writer(Stream),
Mapping(Writer) {
// RecordBuffer needs to be able to hold enough data so that if we are 1
// byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes,
@@ -203,15 +204,15 @@ Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) {
uint8_t *SegmentBytes = RecordStorage.Allocate<uint8_t>(LengthWithSize);
auto SavedSegment = MutableArrayRef<uint8_t>(SegmentBytes, LengthWithSize);
- msf::MutableByteStream CS(SavedSegment);
- msf::StreamWriter CW(CS);
+ MutableBinaryByteStream CS(SavedSegment, llvm::support::little);
+ BinaryStreamWriter CW(CS);
if (auto EC = CW.writeBytes(CopyData))
return EC;
if (auto EC = CW.writeEnum(TypeLeafKind::LF_INDEX))
return EC;
- if (auto EC = CW.writeInteger(uint16_t(0)))
+ if (auto EC = CW.writeInteger<uint16_t>(0))
return EC;
- if (auto EC = CW.writeInteger(uint32_t(0xB0C0B0C0)))
+ if (auto EC = CW.writeInteger<uint32_t>(0xB0C0B0C0))
return EC;
FieldListSegments.push_back(SavedSegment);
diff --git a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index ed6cf5743a12..aad20ae6dda1 100644
--- a/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -52,12 +52,19 @@ namespace {
/// - If the type record already exists in the destination stream, discard it
/// and update the type index map to forward the source type index to the
/// existing destination type index.
+///
+/// As an additional complication, type stream merging actually produces two
+/// streams: an item (or IPI) stream and a type stream, as this is what is
+/// actually stored in the final PDB. We choose which records go where by
+/// looking at the record kind.
class TypeStreamMerger : public TypeVisitorCallbacks {
public:
- TypeStreamMerger(TypeTableBuilder &DestStream)
- : DestStream(DestStream), FieldListBuilder(DestStream) {
- assert(!hadError());
- }
+ TypeStreamMerger(TypeTableBuilder &DestIdStream,
+ TypeTableBuilder &DestTypeStream, TypeServerHandler *Handler)
+ : DestIdStream(DestIdStream), DestTypeStream(DestTypeStream),
+ FieldListBuilder(DestTypeStream), Handler(Handler) {}
+
+ static const TypeIndex Untranslated;
/// TypeVisitorCallbacks overrides.
#define TYPE_RECORD(EnumName, EnumVal, Name) \
@@ -74,42 +81,65 @@ public:
Error visitTypeEnd(CVType &Record) override;
Error visitMemberEnd(CVMemberRecord &Record) override;
- bool mergeStream(const CVTypeArray &Types);
+ Error mergeStream(const CVTypeArray &Types);
private:
+ void addMapping(TypeIndex Idx);
+
+ bool remapIndex(TypeIndex &Idx);
+
+ size_t slotForIndex(TypeIndex Idx) const {
+ assert(!Idx.isSimple() && "simple type indices have no slots");
+ return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
+ }
+
+ Error errorCorruptRecord() const {
+ return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
+ }
+
template <typename RecordType>
- Error visitKnownRecordImpl(RecordType &Record) {
- FoundBadTypeIndex |= !Record.remapTypeIndices(IndexMap);
- IndexMap.push_back(DestStream.writeKnownType(Record));
+ Error writeRecord(RecordType &R, bool RemapSuccess) {
+ TypeIndex DestIdx = Untranslated;
+ if (RemapSuccess)
+ DestIdx = DestTypeStream.writeKnownType(R);
+ addMapping(DestIdx);
return Error::success();
}
- Error visitKnownRecordImpl(FieldListRecord &Record) {
- CVTypeVisitor Visitor(*this);
-
- if (auto EC = Visitor.visitFieldListMemberStream(Record.Data))
- return EC;
+ template <typename RecordType>
+ Error writeIdRecord(RecordType &R, bool RemapSuccess) {
+ TypeIndex DestIdx = Untranslated;
+ if (RemapSuccess)
+ DestIdx = DestIdStream.writeKnownType(R);
+ addMapping(DestIdx);
return Error::success();
}
template <typename RecordType>
- Error visitKnownMemberRecordImpl(RecordType &Record) {
- FoundBadTypeIndex |= !Record.remapTypeIndices(IndexMap);
- FieldListBuilder.writeMemberType(Record);
+ Error writeMember(RecordType &R, bool RemapSuccess) {
+ if (RemapSuccess)
+ FieldListBuilder.writeMemberType(R);
+ else
+ HadUntranslatedMember = true;
return Error::success();
}
- bool hadError() { return FoundBadTypeIndex; }
+ Optional<Error> LastError;
+
+ bool IsSecondPass = false;
+
+ bool HadUntranslatedMember = false;
- bool FoundBadTypeIndex = false;
+ unsigned NumBadIndices = 0;
BumpPtrAllocator Allocator;
- TypeTableBuilder &DestStream;
+ TypeTableBuilder &DestIdStream;
+ TypeTableBuilder &DestTypeStream;
FieldListRecordBuilder FieldListBuilder;
+ TypeServerHandler *Handler;
- bool IsInFieldList{false};
- size_t BeginIndexMapSize = 0;
+ TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex};
/// Map from source type index to destination type index. Indexed by source
/// type index minus 0x1000.
@@ -118,70 +148,346 @@ private:
} // end anonymous namespace
+const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
+
Error TypeStreamMerger::visitTypeBegin(CVRecord<TypeLeafKind> &Rec) {
- if (Rec.Type == TypeLeafKind::LF_FIELDLIST) {
- assert(!IsInFieldList);
- IsInFieldList = true;
- FieldListBuilder.begin();
- } else
- BeginIndexMapSize = IndexMap.size();
return Error::success();
}
Error TypeStreamMerger::visitTypeEnd(CVRecord<TypeLeafKind> &Rec) {
- if (Rec.Type == TypeLeafKind::LF_FIELDLIST) {
- TypeIndex Index = FieldListBuilder.end();
- IndexMap.push_back(Index);
- IsInFieldList = false;
- }
+ CurIndex = TypeIndex(CurIndex.getIndex() + 1);
+ if (!IsSecondPass)
+ assert(IndexMap.size() == slotForIndex(CurIndex) &&
+ "visitKnownRecord should add one index map entry");
return Error::success();
}
Error TypeStreamMerger::visitMemberEnd(CVMemberRecord &Rec) {
- assert(IndexMap.size() == BeginIndexMapSize + 1);
return Error::success();
}
-#define TYPE_RECORD(EnumName, EnumVal, Name) \
- Error TypeStreamMerger::visitKnownRecord(CVType &CVR, \
- Name##Record &Record) { \
- return visitKnownRecordImpl(Record); \
+void TypeStreamMerger::addMapping(TypeIndex Idx) {
+ if (!IsSecondPass) {
+ assert(IndexMap.size() == slotForIndex(CurIndex) &&
+ "visitKnownRecord should add one index map entry");
+ IndexMap.push_back(Idx);
+ } else {
+ assert(slotForIndex(CurIndex) < IndexMap.size());
+ IndexMap[slotForIndex(CurIndex)] = Idx;
}
-#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#define MEMBER_RECORD(EnumName, EnumVal, Name) \
- Error TypeStreamMerger::visitKnownMember(CVMemberRecord &CVR, \
- Name##Record &Record) { \
- return visitKnownMemberRecordImpl(Record); \
+}
+
+bool TypeStreamMerger::remapIndex(TypeIndex &Idx) {
+ // Simple types are unchanged.
+ if (Idx.isSimple())
+ return true;
+
+ // Check if this type index refers to a record we've already translated
+ // successfully. If it refers to a type later in the stream or a record we
+ // had to defer, defer it until later pass.
+ unsigned MapPos = slotForIndex(Idx);
+ if (MapPos < IndexMap.size() && IndexMap[MapPos] != Untranslated) {
+ Idx = IndexMap[MapPos];
+ return true;
}
-#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+
+ // If this is the second pass and this index isn't in the map, then it points
+ // outside the current type stream, and this is a corrupt record.
+ if (IsSecondPass && MapPos >= IndexMap.size()) {
+ // FIXME: Print a more useful error. We can give the current record and the
+ // index that we think its pointing to.
+ LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
+ }
+
+ ++NumBadIndices;
+
+ // This type index is invalid. Remap this to "not translated by cvpack",
+ // and return failure.
+ Idx = Untranslated;
+ return false;
+}
+
+//----------------------------------------------------------------------------//
+// Item records
+//----------------------------------------------------------------------------//
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, FuncIdRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.ParentScope);
+ Success &= remapIndex(R.FunctionType);
+ return writeIdRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFuncIdRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.ClassType);
+ Success &= remapIndex(R.FunctionType);
+ return writeIdRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, StringIdRecord &R) {
+ return writeIdRecord(R, remapIndex(R.Id));
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, StringListRecord &R) {
+ bool Success = true;
+ for (TypeIndex &Str : R.StringIndices)
+ Success &= remapIndex(Str);
+ return writeIdRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, BuildInfoRecord &R) {
+ bool Success = true;
+ for (TypeIndex &Arg : R.ArgIndices)
+ Success &= remapIndex(Arg);
+ return writeIdRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, UdtSourceLineRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.UDT);
+ Success &= remapIndex(R.SourceFile);
+ // FIXME: Translate UdtSourceLineRecord into UdtModSourceLineRecords in the
+ // IPI stream.
+ return writeIdRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, UdtModSourceLineRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.UDT);
+ Success &= remapIndex(R.SourceFile);
+ return writeIdRecord(R, Success);
+}
+
+//----------------------------------------------------------------------------//
+// Type records
+//----------------------------------------------------------------------------//
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, ModifierRecord &R) {
+ return writeRecord(R, remapIndex(R.ModifiedType));
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, ProcedureRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.ReturnType);
+ Success &= remapIndex(R.ArgumentList);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFunctionRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.ReturnType);
+ Success &= remapIndex(R.ClassType);
+ Success &= remapIndex(R.ThisType);
+ Success &= remapIndex(R.ArgumentList);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &Type, ArgListRecord &R) {
+ bool Success = true;
+ for (TypeIndex &Arg : R.ArgIndices)
+ Success &= remapIndex(Arg);
+ if (auto EC = writeRecord(R, Success))
+ return EC;
+ return Error::success();
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, PointerRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.ReferentType);
+ if (R.isPointerToMember())
+ Success &= remapIndex(R.MemberInfo->ContainingType);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, ArrayRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.ElementType);
+ Success &= remapIndex(R.IndexType);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, ClassRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.FieldList);
+ Success &= remapIndex(R.DerivationList);
+ Success &= remapIndex(R.VTableShape);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, UnionRecord &R) {
+ return writeRecord(R, remapIndex(R.FieldList));
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, EnumRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.FieldList);
+ Success &= remapIndex(R.UnderlyingType);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, BitFieldRecord &R) {
+ return writeRecord(R, remapIndex(R.Type));
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableShapeRecord &R) {
+ return writeRecord(R, true);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, TypeServer2Record &R) {
+ return writeRecord(R, true);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, LabelRecord &R) {
+ return writeRecord(R, true);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.CompleteClass);
+ Success &= remapIndex(R.OverriddenVFTable);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &,
+ MethodOverloadListRecord &R) {
+ bool Success = true;
+ for (OneMethodRecord &Meth : R.Methods)
+ Success &= remapIndex(Meth.Type);
+ return writeRecord(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownRecord(CVType &, FieldListRecord &R) {
+ // Visit the members inside the field list.
+ HadUntranslatedMember = false;
+ FieldListBuilder.begin();
+ CVTypeVisitor Visitor(*this);
+ if (auto EC = Visitor.visitFieldListMemberStream(R.Data))
+ return EC;
+
+ // Write the record if we translated all field list members.
+ TypeIndex DestIdx = Untranslated;
+ if (!HadUntranslatedMember)
+ DestIdx = FieldListBuilder.end();
+ else
+ FieldListBuilder.reset();
+ addMapping(DestIdx);
+
+ return Error::success();
+}
+
+//----------------------------------------------------------------------------//
+// Member records
+//----------------------------------------------------------------------------//
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
+ NestedTypeRecord &R) {
+ return writeMember(R, remapIndex(R.Type));
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, OneMethodRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.Type);
+ return writeMember(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
+ OverloadedMethodRecord &R) {
+ return writeMember(R, remapIndex(R.MethodList));
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
+ DataMemberRecord &R) {
+ return writeMember(R, remapIndex(R.Type));
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
+ StaticDataMemberRecord &R) {
+ return writeMember(R, remapIndex(R.Type));
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
+ EnumeratorRecord &R) {
+ return writeMember(R, true);
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, VFPtrRecord &R) {
+ return writeMember(R, remapIndex(R.Type));
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, BaseClassRecord &R) {
+ return writeMember(R, remapIndex(R.Type));
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
+ VirtualBaseClassRecord &R) {
+ bool Success = true;
+ Success &= remapIndex(R.BaseType);
+ Success &= remapIndex(R.VBPtrType);
+ return writeMember(R, Success);
+}
+
+Error TypeStreamMerger::visitKnownMember(CVMemberRecord &,
+ ListContinuationRecord &R) {
+ return writeMember(R, remapIndex(R.ContinuationIndex));
+}
Error TypeStreamMerger::visitUnknownType(CVType &Rec) {
// We failed to translate a type. Translate this index as "not translated".
- IndexMap.push_back(
- TypeIndex(SimpleTypeKind::NotTranslated, SimpleTypeMode::Direct));
- return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
+ addMapping(TypeIndex(SimpleTypeKind::NotTranslated));
+ return errorCorruptRecord();
}
-bool TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
+Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
assert(IndexMap.empty());
TypeVisitorCallbackPipeline Pipeline;
+ LastError = Error::success();
TypeDeserializer Deserializer;
Pipeline.addCallbackToPipeline(Deserializer);
Pipeline.addCallbackToPipeline(*this);
CVTypeVisitor Visitor(Pipeline);
+ if (Handler)
+ Visitor.addTypeServerHandler(*Handler);
+
+ if (auto EC = Visitor.visitTypeStream(Types))
+ return EC;
+
+ // If we found bad indices but no other errors, try doing another pass and see
+ // if we can resolve the indices that weren't in the map on the first pass.
+ // This may require multiple passes, but we should always make progress. MASM
+ // is the only known CodeView producer that makes type streams that aren't
+ // topologically sorted. The standard library contains MASM-produced objects,
+ // so this is important to handle correctly, but we don't have to be too
+ // efficient. MASM type streams are usually very small.
+ while (!*LastError && NumBadIndices > 0) {
+ unsigned BadIndicesRemaining = NumBadIndices;
+ IsSecondPass = true;
+ NumBadIndices = 0;
+ CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex);
+ if (auto EC = Visitor.visitTypeStream(Types))
+ return EC;
- if (auto EC = Visitor.visitTypeStream(Types)) {
- consumeError(std::move(EC));
- return false;
+ assert(NumBadIndices <= BadIndicesRemaining &&
+ "second pass found more bad indices");
+ if (!*LastError && NumBadIndices == BadIndicesRemaining) {
+ return llvm::make_error<CodeViewError>(
+ cv_error_code::corrupt_record, "input type graph contains cycles");
+ }
}
+
IndexMap.clear();
- return !hadError();
+
+ Error Ret = std::move(*LastError);
+ LastError.reset();
+ return Ret;
}
-bool llvm::codeview::mergeTypeStreams(TypeTableBuilder &DestStream,
- const CVTypeArray &Types) {
- return TypeStreamMerger(DestStream).mergeStream(Types);
+Error llvm::codeview::mergeTypeStreams(TypeTableBuilder &DestIdStream,
+ TypeTableBuilder &DestTypeStream,
+ TypeServerHandler *Handler,
+ const CVTypeArray &Types) {
+ return TypeStreamMerger(DestIdStream, DestTypeStream, Handler)
+ .mergeStream(Types);
}
diff --git a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index 08bc74a81e9a..e7b4b777b43f 100644
--- a/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFAbbreviationDeclaration.cpp ----------------------------------===//
+//===- DWARFAbbreviationDeclaration.cpp -----------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,18 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
+
using namespace llvm;
using namespace dwarf;
@@ -86,7 +92,6 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data,
case DW_FORM_line_strp:
case DW_FORM_sec_offset:
case DW_FORM_strp_sup:
- case DW_FORM_ref_sup:
++FixedAttributeSize->NumDwarfOffsets;
break;
diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 7111ad3f9fc7..85e1eaedfc61 100644
--- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -1,4 +1,4 @@
-//===--- DWARFAcceleratorTable.cpp ----------------------------------------===//
+//===- DWARFAcceleratorTable.cpp ------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,19 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
+#include <utility>
-namespace llvm {
+using namespace llvm;
bool DWARFAcceleratorTable::extract() {
uint32_t Offset = 0;
@@ -46,7 +53,7 @@ bool DWARFAcceleratorTable::extract() {
return true;
}
-void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
+LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
// Dump the header.
OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n'
<< "Version = " << format("0x%04x", Hdr.Version) << '\n'
@@ -131,4 +138,3 @@ void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
}
}
}
-}
diff --git a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 948972f8f136..6e550f2e9ec9 100644
--- a/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -18,8 +18,10 @@ using namespace llvm;
void DWARFCompileUnit::dump(raw_ostream &OS) {
OS << format("0x%08x", getOffset()) << ": Compile Unit:"
<< " length = " << format("0x%08x", getLength())
- << " version = " << format("0x%04x", getVersion())
- << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
+ << " version = " << format("0x%04x", getVersion());
+ if (getVersion() >= 5)
+ OS << " unit_type = " << dwarf::UnitTypeString(getUnitType());
+ OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
<< " addr_size = " << format("0x%02x", getAddressByteSize())
<< " (next unit at " << format("0x%08x", getNextUnitOffset())
<< ")\n";
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 77f6f65ee131..cbce2dc89deb 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFContext.cpp --------------------------------------------------===//
+//===- DWARFContext.cpp ---------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,23 +7,45 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
+#include "llvm/DebugInfo/DWARF/DWARFSection.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Object/Decompressor.h"
#include "llvm/Object/MachO.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/RelocVisitor.h"
-#include "llvm/Support/Compression.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
using namespace llvm;
using namespace dwarf;
using namespace object;
@@ -128,8 +150,7 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH,
auto CUDIE = CU->getUnitDIE();
if (!CUDIE)
continue;
- if (auto StmtOffset =
- CUDIE.getAttributeValueAsSectionOffset(DW_AT_stmt_list)) {
+ if (auto StmtOffset = toSectionOffset(CUDIE.find(DW_AT_stmt_list))) {
DataExtractor lineData(getLineSection().Data, isLittleEndian(),
savedAddressByteSize);
DWARFDebugLine::LineTable LineTable;
@@ -387,7 +408,7 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) {
if (!UnitDIE)
return nullptr;
- auto Offset = UnitDIE.getAttributeValueAsSectionOffset(DW_AT_stmt_list);
+ auto Offset = toSectionOffset(UnitDIE.find(DW_AT_stmt_list));
if (!Offset)
return nullptr; // No line table for this compile unit.
@@ -440,23 +461,32 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
return getCompileUnitForOffset(CUOffset);
}
-static bool getFunctionNameForAddress(DWARFCompileUnit *CU, uint64_t Address,
- FunctionNameKind Kind,
- std::string &FunctionName) {
- if (Kind == FunctionNameKind::None)
- return false;
+static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU,
+ uint64_t Address,
+ FunctionNameKind Kind,
+ std::string &FunctionName,
+ uint32_t &StartLine) {
// The address may correspond to instruction in some inlined function,
// so we have to build the chain of inlined functions and take the
- // name of the topmost function in it.SmallVectorImpl<DWARFDie> &InlinedChain
+ // name of the topmost function in it.
SmallVector<DWARFDie, 4> InlinedChain;
CU->getInlinedChainForAddress(Address, InlinedChain);
- if (InlinedChain.size() == 0)
+ if (InlinedChain.empty())
return false;
- if (const char *Name = InlinedChain[0].getSubroutineName(Kind)) {
+
+ const DWARFDie &DIE = InlinedChain[0];
+ bool FoundResult = false;
+ const char *Name = nullptr;
+ if (Kind != FunctionNameKind::None && (Name = DIE.getSubroutineName(Kind))) {
FunctionName = Name;
- return true;
+ FoundResult = true;
+ }
+ if (auto DeclLineResult = DIE.getDeclLine()) {
+ StartLine = DeclLineResult;
+ FoundResult = true;
}
- return false;
+
+ return FoundResult;
}
DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
@@ -466,7 +496,9 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
if (!CU)
return Result;
- getFunctionNameForAddress(CU, Address, Spec.FNKind, Result.FunctionName);
+ getFunctionNameAndStartLineForAddress(CU, Address, Spec.FNKind,
+ Result.FunctionName,
+ Result.StartLine);
if (Spec.FLIKind != FileLineInfoKind::None) {
if (const DWARFLineTable *LineTable = getLineTableForUnit(CU))
LineTable->getFileLineInfoForAddress(Address, CU->getCompilationDir(),
@@ -484,13 +516,16 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
return Lines;
std::string FunctionName = "<invalid>";
- getFunctionNameForAddress(CU, Address, Spec.FNKind, FunctionName);
+ uint32_t StartLine = 0;
+ getFunctionNameAndStartLineForAddress(CU, Address, Spec.FNKind, FunctionName,
+ StartLine);
// If the Specifier says we don't need FileLineInfo, just
// return the top-most function at the starting address.
if (Spec.FLIKind == FileLineInfoKind::None) {
DILineInfo Result;
Result.FunctionName = FunctionName;
+ Result.StartLine = StartLine;
Lines.push_back(std::make_pair(Address, Result));
return Lines;
}
@@ -511,6 +546,7 @@ DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
Result.FunctionName = FunctionName;
Result.Line = Row.Line;
Result.Column = Row.Column;
+ Result.StartLine = StartLine;
Lines.push_back(std::make_pair(Row.Address, Result));
}
@@ -550,6 +586,8 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
// Get function name if necessary.
if (const char *Name = FunctionDIE.getSubroutineName(Spec.FNKind))
Frame.FunctionName = Name;
+ if (auto DeclLineResult = FunctionDIE.getDeclLine())
+ Frame.StartLine = DeclLineResult;
if (Spec.FLIKind != FileLineInfoKind::None) {
if (i == 0) {
// For the topmost frame, initialize the line table of this
@@ -578,6 +616,66 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
return InliningInfo;
}
+static Error createError(const Twine &Reason, llvm::Error E) {
+ return make_error<StringError>(Reason + toString(std::move(E)),
+ inconvertibleErrorCode());
+}
+
+/// Returns the address of symbol relocation used against. Used for futher
+/// relocations computation. Symbol's section load address is taken in account if
+/// LoadedObjectInfo interface is provided.
+static Expected<uint64_t> getSymbolAddress(const object::ObjectFile &Obj,
+ const RelocationRef &Reloc,
+ const LoadedObjectInfo *L) {
+ uint64_t Ret = 0;
+ object::section_iterator RSec = Obj.section_end();
+ object::symbol_iterator Sym = Reloc.getSymbol();
+
+ // First calculate the address of the symbol or section as it appears
+ // in the object file
+ if (Sym != Obj.symbol_end()) {
+ Expected<uint64_t> SymAddrOrErr = Sym->getAddress();
+ if (!SymAddrOrErr)
+ return createError("error: failed to compute symbol address: ",
+ SymAddrOrErr.takeError());
+
+ // Also remember what section this symbol is in for later
+ auto SectOrErr = Sym->getSection();
+ if (!SectOrErr)
+ return createError("error: failed to get symbol section: ",
+ SectOrErr.takeError());
+
+ RSec = *SectOrErr;
+ Ret = *SymAddrOrErr;
+ } else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
+ RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
+ Ret = RSec->getAddress();
+ }
+
+ // If we are given load addresses for the sections, we need to adjust:
+ // SymAddr = (Address of Symbol Or Section in File) -
+ // (Address of Section in File) +
+ // (Load Address of Section)
+ // RSec is now either the section being targeted or the section
+ // containing the symbol being targeted. In either case,
+ // we need to perform the same computation.
+ if (L && RSec != Obj.section_end())
+ if (uint64_t SectionLoadAddress = L->getSectionLoadAddress(*RSec))
+ Ret += SectionLoadAddress - RSec->getAddress();
+ return Ret;
+}
+
+static bool isRelocScattered(const object::ObjectFile &Obj,
+ const RelocationRef &Reloc) {
+ const MachOObjectFile *MachObj = dyn_cast<MachOObjectFile>(&Obj);
+ if (!MachObj)
+ return false;
+ // MachO also has relocations that point to sections and
+ // scattered relocations.
+ auto RelocInfo = MachObj->getRelocation(Reloc.getRawDataRefImpl());
+ return MachObj->isRelocationScattered(RelocInfo);
+}
+
DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
const LoadedObjectInfo *L)
: IsLittleEndian(Obj.isLittleEndian()),
@@ -618,40 +716,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
name = name.substr(
name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes.
- StringRef *SectionData =
- StringSwitch<StringRef *>(name)
- .Case("debug_info", &InfoSection.Data)
- .Case("debug_abbrev", &AbbrevSection)
- .Case("debug_loc", &LocSection.Data)
- .Case("debug_line", &LineSection.Data)
- .Case("debug_aranges", &ARangeSection)
- .Case("debug_frame", &DebugFrameSection)
- .Case("eh_frame", &EHFrameSection)
- .Case("debug_str", &StringSection)
- .Case("debug_ranges", &RangeSection)
- .Case("debug_macinfo", &MacinfoSection)
- .Case("debug_pubnames", &PubNamesSection)
- .Case("debug_pubtypes", &PubTypesSection)
- .Case("debug_gnu_pubnames", &GnuPubNamesSection)
- .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
- .Case("debug_info.dwo", &InfoDWOSection.Data)
- .Case("debug_abbrev.dwo", &AbbrevDWOSection)
- .Case("debug_loc.dwo", &LocDWOSection.Data)
- .Case("debug_line.dwo", &LineDWOSection.Data)
- .Case("debug_str.dwo", &StringDWOSection)
- .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
- .Case("debug_addr", &AddrSection)
- .Case("apple_names", &AppleNamesSection.Data)
- .Case("apple_types", &AppleTypesSection.Data)
- .Case("apple_namespaces", &AppleNamespacesSection.Data)
- .Case("apple_namespac", &AppleNamespacesSection.Data)
- .Case("apple_objc", &AppleObjCSection.Data)
- .Case("debug_cu_index", &CUIndexSection)
- .Case("debug_tu_index", &TUIndexSection)
- .Case("gdb_index", &GdbIndexSection)
- // Any more debug info sections go here.
- .Default(nullptr);
- if (SectionData) {
+ if (StringRef *SectionData = MapSectionToMember(name)) {
*SectionData = data;
if (name == "debug_ranges") {
// FIXME: Use the other dwo range section when we emit it.
@@ -716,73 +781,19 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
if (Section.relocation_begin() != Section.relocation_end()) {
uint64_t SectionSize = RelocatedSection->getSize();
for (const RelocationRef &Reloc : Section.relocations()) {
- uint64_t Address = Reloc.getOffset();
- uint64_t Type = Reloc.getType();
- uint64_t SymAddr = 0;
- uint64_t SectionLoadAddress = 0;
- object::symbol_iterator Sym = Reloc.getSymbol();
- object::section_iterator RSec = Obj.section_end();
-
- // First calculate the address of the symbol or section as it appears
- // in the objct file
- if (Sym != Obj.symbol_end()) {
- Expected<uint64_t> SymAddrOrErr = Sym->getAddress();
- if (!SymAddrOrErr) {
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(SymAddrOrErr.takeError(), OS, "");
- OS.flush();
- errs() << "error: failed to compute symbol address: "
- << Buf << '\n';
- continue;
- }
- SymAddr = *SymAddrOrErr;
- // Also remember what section this symbol is in for later
- auto SectOrErr = Sym->getSection();
- if (!SectOrErr) {
- std::string Buf;
- raw_string_ostream OS(Buf);
- logAllUnhandledErrors(SectOrErr.takeError(), OS, "");
- OS.flush();
- errs() << "error: failed to get symbol section: "
- << Buf << '\n';
- continue;
- }
- RSec = *SectOrErr;
- } else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
- // MachO also has relocations that point to sections and
- // scattered relocations.
- auto RelocInfo = MObj->getRelocation(Reloc.getRawDataRefImpl());
- if (MObj->isRelocationScattered(RelocInfo)) {
- // FIXME: it's not clear how to correctly handle scattered
- // relocations.
- continue;
- } else {
- RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
- SymAddr = RSec->getAddress();
- }
- }
+ // FIXME: it's not clear how to correctly handle scattered
+ // relocations.
+ if (isRelocScattered(Obj, Reloc))
+ continue;
- // If we are given load addresses for the sections, we need to adjust:
- // SymAddr = (Address of Symbol Or Section in File) -
- // (Address of Section in File) +
- // (Load Address of Section)
- if (L != nullptr && RSec != Obj.section_end()) {
- // RSec is now either the section being targeted or the section
- // containing the symbol being targeted. In either case,
- // we need to perform the same computation.
- StringRef SecName;
- RSec->getName(SecName);
-// llvm::dbgs() << "Name: '" << SecName
-// << "', RSec: " << RSec->getRawDataRefImpl()
-// << ", Section: " << Section.getRawDataRefImpl() << "\n";
- SectionLoadAddress = L->getSectionLoadAddress(*RSec);
- if (SectionLoadAddress != 0)
- SymAddr += SectionLoadAddress - RSec->getAddress();
+ Expected<uint64_t> SymAddrOrErr = getSymbolAddress(Obj, Reloc, L);
+ if (!SymAddrOrErr) {
+ errs() << toString(SymAddrOrErr.takeError()) << '\n';
+ continue;
}
object::RelocVisitor V(Obj);
- object::RelocToApply R(V.visit(Type, Reloc, SymAddr));
+ object::RelocToApply R(V.visit(Reloc.getType(), Reloc, *SymAddrOrErr));
if (V.error()) {
SmallString<32> Name;
Reloc.getTypeName(Name);
@@ -790,7 +801,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
<< Name << "\n";
continue;
}
-
+ uint64_t Address = Reloc.getOffset();
if (Address + R.Width > SectionSize) {
errs() << "error: " << R.Width << "-byte relocation starting "
<< Address << " bytes into section " << name << " which is "
@@ -812,4 +823,49 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
}
}
-void DWARFContextInMemory::anchor() { }
+DWARFContextInMemory::DWARFContextInMemory(
+ const StringMap<std::unique_ptr<MemoryBuffer>> &Sections, uint8_t AddrSize,
+ bool isLittleEndian)
+ : IsLittleEndian(isLittleEndian), AddressSize(AddrSize) {
+ for (const auto &SecIt : Sections) {
+ if (StringRef *SectionData = MapSectionToMember(SecIt.first()))
+ *SectionData = SecIt.second->getBuffer();
+ }
+}
+
+StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) {
+ return StringSwitch<StringRef *>(Name)
+ .Case("debug_info", &InfoSection.Data)
+ .Case("debug_abbrev", &AbbrevSection)
+ .Case("debug_loc", &LocSection.Data)
+ .Case("debug_line", &LineSection.Data)
+ .Case("debug_aranges", &ARangeSection)
+ .Case("debug_frame", &DebugFrameSection)
+ .Case("eh_frame", &EHFrameSection)
+ .Case("debug_str", &StringSection)
+ .Case("debug_ranges", &RangeSection)
+ .Case("debug_macinfo", &MacinfoSection)
+ .Case("debug_pubnames", &PubNamesSection)
+ .Case("debug_pubtypes", &PubTypesSection)
+ .Case("debug_gnu_pubnames", &GnuPubNamesSection)
+ .Case("debug_gnu_pubtypes", &GnuPubTypesSection)
+ .Case("debug_info.dwo", &InfoDWOSection.Data)
+ .Case("debug_abbrev.dwo", &AbbrevDWOSection)
+ .Case("debug_loc.dwo", &LocDWOSection.Data)
+ .Case("debug_line.dwo", &LineDWOSection.Data)
+ .Case("debug_str.dwo", &StringDWOSection)
+ .Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
+ .Case("debug_addr", &AddrSection)
+ .Case("apple_names", &AppleNamesSection.Data)
+ .Case("apple_types", &AppleTypesSection.Data)
+ .Case("apple_namespaces", &AppleNamespacesSection.Data)
+ .Case("apple_namespac", &AppleNamespacesSection.Data)
+ .Case("apple_objc", &AppleObjCSection.Data)
+ .Case("debug_cu_index", &CUIndexSection)
+ .Case("debug_tu_index", &TUIndexSection)
+ .Case("gdb_index", &GdbIndexSection)
+ // Any more debug info sections go here.
+ .Default(nullptr);
+}
+
+void DWARFContextInMemory::anchor() {}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index e63e28997ed0..76dd2e4c21bc 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugAbbrev.cpp ----------------------------------------------===//
+//===- DWARFDebugAbbrev.cpp -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,6 +10,10 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cinttypes>
+#include <cstdint>
+
using namespace llvm;
DWARFAbbreviationDeclarationSet::DWARFAbbreviationDeclarationSet() {
diff --git a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index 67589cd01e55..ed5d726ae4e2 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugArangeSet.cpp -------------------------------------------===//
+//===- DWARFDebugArangeSet.cpp --------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,8 +10,11 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <cstring>
+
using namespace llvm;
void DWARFDebugArangeSet::clear() {
diff --git a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
index 27a02c4c50d0..0cf71f530446 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugAranges.cpp -----------------------------------*- C++ -*-===//
+//===- DWARFDebugAranges.cpp ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,11 +11,13 @@
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DataExtractor.h"
#include <algorithm>
#include <cassert>
+#include <cstdint>
#include <set>
+#include <vector>
+
using namespace llvm;
void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
@@ -81,7 +83,7 @@ void DWARFDebugAranges::construct() {
std::sort(Endpoints.begin(), Endpoints.end());
uint64_t PrevAddress = -1ULL;
for (const auto &E : Endpoints) {
- if (PrevAddress < E.Address && ValidCUs.size() > 0) {
+ if (PrevAddress < E.Address && !ValidCUs.empty()) {
// If the address range between two endpoints is described by some
// CU, first try to extend the last range in Aranges. If we can't
// do it, start a new range.
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 32b8320e26c5..b55ed6a46849 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===//
+//===- DWARFDebugFrame.h - Parsing of .debug_frame ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
@@ -15,6 +14,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataExtractor.h"
@@ -465,8 +465,7 @@ void FrameEntry::dumpInstructions(raw_ostream &OS) const {
}
}
-DWARFDebugFrame::DWARFDebugFrame(bool IsEH) : IsEH(IsEH) {
-}
+DWARFDebugFrame::DWARFDebugFrame(bool IsEH) : IsEH(IsEH) {}
DWARFDebugFrame::~DWARFDebugFrame() = default;
@@ -485,17 +484,17 @@ static unsigned getSizeForEncoding(const DataExtractor &Data,
unsigned format = symbolEncoding & 0x0f;
switch (format) {
default: llvm_unreachable("Unknown Encoding");
- case dwarf::DW_EH_PE_absptr:
- case dwarf::DW_EH_PE_signed:
+ case DW_EH_PE_absptr:
+ case DW_EH_PE_signed:
return Data.getAddressSize();
- case dwarf::DW_EH_PE_udata2:
- case dwarf::DW_EH_PE_sdata2:
+ case DW_EH_PE_udata2:
+ case DW_EH_PE_sdata2:
return 2;
- case dwarf::DW_EH_PE_udata4:
- case dwarf::DW_EH_PE_sdata4:
+ case DW_EH_PE_udata4:
+ case DW_EH_PE_sdata4:
return 4;
- case dwarf::DW_EH_PE_udata8:
- case dwarf::DW_EH_PE_sdata8:
+ case DW_EH_PE_udata8:
+ case DW_EH_PE_sdata8:
return 8;
}
}
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index c487e1dca7c6..35f673c7acc6 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugInfoEntry.cpp -------------------------------------------===//
+//===- DWARFDebugInfoEntry.cpp --------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,20 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "SyntaxHighlighting.h"
-#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/DataExtractor.h"
+#include <cstddef>
+#include <cstdint>
+
using namespace llvm;
using namespace dwarf;
-using namespace syntax;
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
uint32_t *OffsetPtr) {
@@ -28,6 +25,7 @@ bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
const uint32_t UEndOffset = U.getNextUnitOffset();
return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0);
}
+
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
const DataExtractor &DebugInfoData,
uint32_t UEndOffset, uint32_t D) {
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 494059461fd7..e4670519b797 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugLine.cpp ------------------------------------------------===//
+//===- DWARFDebugLine.cpp -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallString.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
+#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <cstdio>
+#include <utility>
+
using namespace llvm;
using namespace dwarf;
+
typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
DWARFDebugLine::Prologue::Prologue() { clear(); }
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index ae5b9d70a2eb..e2799ab2d243 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugLoc.cpp -------------------------------------------------===//
+//===- DWARFDebugLoc.cpp --------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cinttypes>
+#include <cstdint>
using namespace llvm;
@@ -71,7 +76,7 @@ void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) {
}
}
if (data.isValidOffset(Offset))
- llvm::errs() << "error: failed to consume entire .debug_loc section\n";
+ errs() << "error: failed to consume entire .debug_loc section\n";
}
void DWARFDebugLocDWO::parse(DataExtractor data) {
@@ -85,8 +90,8 @@ void DWARFDebugLocDWO::parse(DataExtractor data) {
data.getU8(&Offset))) != dwarf::DW_LLE_end_of_list) {
if (Kind != dwarf::DW_LLE_startx_length) {
- llvm::errs() << "error: dumping support for LLE of kind " << (int)Kind
- << " not implemented\n";
+ errs() << "error: dumping support for LLE of kind " << (int)Kind
+ << " not implemented\n";
return;
}
@@ -123,4 +128,3 @@ void DWARFDebugLocDWO::dump(raw_ostream &OS) const {
}
}
}
-
diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
index 7710a90b5e13..e0a9adde8e58 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugMacro.cpp -----------------------------------------------===//
+//===- DWARFDebugMacro.cpp ------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,11 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
#include "SyntaxHighlighting.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
using namespace llvm;
using namespace dwarf;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
index 3c1fe93090c6..662e53d9d7e6 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugPubTable.cpp ---------------------------------------------===//
+//===- DWARFDebugPubTable.cpp ---------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,16 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
using namespace llvm;
-using namespace llvm::dwarf;
+using namespace dwarf;
DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian,
bool GnuStyle)
@@ -54,7 +58,7 @@ void DWARFDebugPubTable::dump(StringRef Name, raw_ostream &OS) const {
OS << format("0x%8.8x ", E.SecOffset);
if (GnuStyle) {
StringRef EntryLinkage =
- dwarf::GDBIndexEntryLinkageString(E.Descriptor.Linkage);
+ GDBIndexEntryLinkageString(E.Descriptor.Linkage);
StringRef EntryKind = dwarf::GDBIndexEntryKindString(E.Descriptor.Kind);
OS << format("%-8s", EntryLinkage.data()) << ' '
<< format("%-8s", EntryKind.data()) << ' ';
diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index d5df6885f5e9..f1d82fda8c06 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDebugRangesList.cpp ------------------------------------------===//
+//===- DWARFDebugRangesList.cpp -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,6 +10,9 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cinttypes>
+#include <cstdint>
+#include <utility>
using namespace llvm;
diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp
index 89b83b11ab68..4308cc2e2639 100644
--- a/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFDie.cpp ------------------------------------------------------===//
+//===- DWARFDie.cpp -------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,25 +7,33 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "SyntaxHighlighting.h"
-#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <string>
+#include <utility>
using namespace llvm;
using namespace dwarf;
using namespace syntax;
-namespace {
- static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) {
+static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) {
OS << " (";
do {
uint64_t Shift = countTrailingZeros(Val);
@@ -122,8 +130,6 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
OS << ")\n";
}
-} // end anonymous namespace
-
bool DWARFDie::isSubprogramDIE() const {
return getTag() == DW_TAG_subprogram;
}
@@ -134,7 +140,7 @@ bool DWARFDie::isSubroutineDIE() const {
}
Optional<DWARFFormValue>
-DWARFDie::getAttributeValue(dwarf::Attribute Attr) const {
+DWARFDie::find(dwarf::Attribute Attr) const {
if (!isValid())
return None;
auto AbbrevDecl = getAbbreviationDeclarationPtr();
@@ -143,54 +149,41 @@ DWARFDie::getAttributeValue(dwarf::Attribute Attr) const {
return None;
}
-const char *DWARFDie::getAttributeValueAsString(dwarf::Attribute Attr,
- const char *FailValue) const {
- auto FormValue = getAttributeValue(Attr);
- if (!FormValue)
- return FailValue;
- Optional<const char *> Result = FormValue->getAsCString();
- return Result.hasValue() ? Result.getValue() : FailValue;
-}
-
-Optional<uint64_t>
-DWARFDie::getAttributeValueAsAddress(dwarf::Attribute Attr) const {
- if (auto FormValue = getAttributeValue(Attr))
- return FormValue->getAsAddress();
- return None;
-}
-
-Optional<int64_t>
-DWARFDie::getAttributeValueAsSignedConstant(dwarf::Attribute Attr) const {
- if (auto FormValue = getAttributeValue(Attr))
- return FormValue->getAsSignedConstant();
- return None;
-}
-
-Optional<uint64_t>
-DWARFDie::getAttributeValueAsUnsignedConstant(dwarf::Attribute Attr) const {
- if (auto FormValue = getAttributeValue(Attr))
- return FormValue->getAsUnsignedConstant();
- return None;
-}
-
-Optional<uint64_t>
-DWARFDie::getAttributeValueAsReference(dwarf::Attribute Attr) const {
- if (auto FormValue = getAttributeValue(Attr))
- return FormValue->getAsReference();
+Optional<DWARFFormValue>
+DWARFDie::find(ArrayRef<dwarf::Attribute> Attrs) const {
+ if (!isValid())
+ return None;
+ auto AbbrevDecl = getAbbreviationDeclarationPtr();
+ if (AbbrevDecl) {
+ for (auto Attr : Attrs) {
+ if (auto Value = AbbrevDecl->getAttributeValue(getOffset(), Attr, *U))
+ return Value;
+ }
+ }
return None;
}
-Optional<uint64_t>
-DWARFDie::getAttributeValueAsSectionOffset(dwarf::Attribute Attr) const {
- if (auto FormValue = getAttributeValue(Attr))
- return FormValue->getAsSectionOffset();
+Optional<DWARFFormValue>
+DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const {
+ if (!isValid())
+ return None;
+ auto Die = *this;
+ if (auto Value = Die.find(Attrs))
+ return Value;
+ if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin))
+ Die = D;
+ if (auto Value = Die.find(Attrs))
+ return Value;
+ if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_specification))
+ Die = D;
+ if (auto Value = Die.find(Attrs))
+ return Value;
return None;
}
-
DWARFDie
DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
- auto SpecRef = getAttributeValueAsReference(Attr);
+ auto SpecRef = toReference(find(Attr));
if (SpecRef) {
auto SpecUnit = U->getUnitSection().getUnitForOffset(*SpecRef);
if (SpecUnit)
@@ -201,14 +194,11 @@ DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
Optional<uint64_t>
DWARFDie::getRangesBaseAttribute() const {
- auto Result = getAttributeValueAsSectionOffset(DW_AT_rnglists_base);
- if (Result)
- return Result;
- return getAttributeValueAsSectionOffset(DW_AT_GNU_ranges_base);
+ return toSectionOffset(find({DW_AT_rnglists_base, DW_AT_GNU_ranges_base}));
}
Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
- if (auto FormValue = getAttributeValue(DW_AT_high_pc)) {
+ if (auto FormValue = find(DW_AT_high_pc)) {
if (auto Address = FormValue->getAsAddress()) {
// High PC is an address.
return Address;
@@ -222,7 +212,7 @@ Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
}
bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const {
- auto LowPcAddr = getAttributeValueAsAddress(DW_AT_low_pc);
+ auto LowPcAddr = toAddress(find(DW_AT_low_pc));
if (!LowPcAddr)
return false;
if (auto HighPcAddr = getHighPC(*LowPcAddr)) {
@@ -243,7 +233,7 @@ DWARFDie::getAddressRanges() const {
return DWARFAddressRangesVector(1, std::make_pair(LowPC, HighPC));
}
// Multiple ranges from .debug_ranges section.
- auto RangesOffset = getAttributeValueAsSectionOffset(DW_AT_ranges);
+ auto RangesOffset = toSectionOffset(find(DW_AT_ranges));
if (RangesOffset) {
DWARFDebugRangeList RangeList;
if (U->extractRangeList(*RangesOffset, RangeList))
@@ -284,33 +274,26 @@ const char *
DWARFDie::getName(DINameKind Kind) const {
if (!isValid() || Kind == DINameKind::None)
return nullptr;
- const char *name = nullptr;
// Try to get mangled name only if it was asked for.
if (Kind == DINameKind::LinkageName) {
- if ((name = getAttributeValueAsString(DW_AT_MIPS_linkage_name, nullptr)))
- return name;
- if ((name = getAttributeValueAsString(DW_AT_linkage_name, nullptr)))
- return name;
+ if (auto Name = dwarf::toString(findRecursively({DW_AT_MIPS_linkage_name,
+ DW_AT_linkage_name}), nullptr))
+ return Name;
}
- if ((name = getAttributeValueAsString(DW_AT_name, nullptr)))
- return name;
- // Try to get name from specification DIE.
- DWARFDie SpecDie = getAttributeValueAsReferencedDie(DW_AT_specification);
- if (SpecDie && (name = SpecDie.getName(Kind)))
- return name;
- // Try to get name from abstract origin DIE.
- DWARFDie AbsDie = getAttributeValueAsReferencedDie(DW_AT_abstract_origin);
- if (AbsDie && (name = AbsDie.getName(Kind)))
- return name;
+ if (auto Name = dwarf::toString(findRecursively(DW_AT_name), nullptr))
+ return Name;
return nullptr;
}
+uint64_t DWARFDie::getDeclLine() const {
+ return toUnsigned(findRecursively(DW_AT_decl_line), 0);
+}
+
void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
uint32_t &CallColumn) const {
- CallFile = getAttributeValueAsUnsignedConstant(DW_AT_call_file).getValueOr(0);
- CallLine = getAttributeValueAsUnsignedConstant(DW_AT_call_line).getValueOr(0);
- CallColumn =
- getAttributeValueAsUnsignedConstant(DW_AT_call_column).getValueOr(0);
+ CallFile = toUnsigned(find(DW_AT_call_file), 0);
+ CallLine = toUnsigned(find(DW_AT_call_line), 0);
+ CallColumn = toUnsigned(find(DW_AT_call_column), 0);
}
void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
@@ -340,6 +323,12 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
// Dump all data in the DIE for the attributes.
for (const auto &AttrSpec : AbbrevDecl->attributes()) {
+ if (AttrSpec.Form == DW_FORM_implicit_const) {
+ // We are dumping .debug_info section ,
+ // implicit_const attribute values are not really stored here,
+ // but in .debug_abbrev section. So we just skip such attrs.
+ continue;
+ }
dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form,
Indent);
}
@@ -361,7 +350,6 @@ void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
}
}
-
void DWARFDie::getInlinedChainForAddress(
const uint64_t Address, SmallVectorImpl<DWARFDie> &InlinedChain) const {
if (isNULL())
@@ -399,3 +387,53 @@ DWARFDie DWARFDie::getSibling() const {
return U->getSibling(Die);
return DWARFDie();
}
+
+iterator_range<DWARFDie::attribute_iterator>
+DWARFDie::attributes() const {
+ return make_range(attribute_iterator(*this, false),
+ attribute_iterator(*this, true));
+}
+
+DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) :
+ Die(D), AttrValue(0), Index(0) {
+ auto AbbrDecl = Die.getAbbreviationDeclarationPtr();
+ assert(AbbrDecl && "Must have abbreviation declaration");
+ if (End) {
+ // This is the end iterator so we set the index to the attribute count.
+ Index = AbbrDecl->getNumAttributes();
+ } else {
+ // This is the begin iterator so we extract the value for this->Index.
+ AttrValue.Offset = D.getOffset() + AbbrDecl->getCodeByteSize();
+ updateForIndex(*AbbrDecl, 0);
+ }
+}
+
+void DWARFDie::attribute_iterator::updateForIndex(
+ const DWARFAbbreviationDeclaration &AbbrDecl, uint32_t I) {
+ Index = I;
+ // AbbrDecl must be valid befor calling this function.
+ auto NumAttrs = AbbrDecl.getNumAttributes();
+ if (Index < NumAttrs) {
+ AttrValue.Attr = AbbrDecl.getAttrByIndex(Index);
+ // Add the previous byte size of any previous attribute value.
+ AttrValue.Offset += AttrValue.ByteSize;
+ AttrValue.Value.setForm(AbbrDecl.getFormByIndex(Index));
+ uint32_t ParseOffset = AttrValue.Offset;
+ auto U = Die.getDwarfUnit();
+ assert(U && "Die must have valid DWARF unit");
+ bool b = AttrValue.Value.extractValue(U->getDebugInfoExtractor(),
+ &ParseOffset, U);
+ (void)b;
+ assert(b && "extractValue cannot fail on fully parsed DWARF");
+ AttrValue.ByteSize = ParseOffset - AttrValue.Offset;
+ } else {
+ assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only");
+ AttrValue.clear();
+ }
+}
+
+DWARFDie::attribute_iterator &DWARFDie::attribute_iterator::operator++() {
+ if (auto AbbrDecl = Die.getAbbreviationDeclarationPtr())
+ updateForIndex(*AbbrDecl, Index + 1);
+ return *this;
+}
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index dc9310dc4e89..6de57b999adc 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFFormValue.cpp ------------------------------------------------===//
+//===- DWARFFormValue.cpp -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,16 +9,21 @@
#include "SyntaxHighlighting.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include <cassert>
+#include <cinttypes>
+#include <cstdint>
#include <limits>
+
using namespace llvm;
using namespace dwarf;
using namespace syntax;
@@ -66,13 +71,16 @@ class FormSizeHelper {
public:
FormSizeHelper(uint16_t V, uint8_t A, llvm::dwarf::DwarfFormat F)
- : Version(V), AddrSize(A), Format(F) {}
+ : Version(V), AddrSize(A), Format(F) {}
+
uint8_t getAddressByteSize() const { return AddrSize; }
+
uint8_t getRefAddrByteSize() const {
if (Version == 2)
return AddrSize;
return getDwarfOffsetByteSize();
}
+
uint8_t getDwarfOffsetByteSize() const {
switch (Format) {
case dwarf::DwarfFormat::DWARF32:
@@ -120,14 +128,21 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
case DW_FORM_flag:
case DW_FORM_data1:
case DW_FORM_ref1:
+ case DW_FORM_strx1:
+ case DW_FORM_addrx1:
return 1;
case DW_FORM_data2:
case DW_FORM_ref2:
+ case DW_FORM_strx2:
+ case DW_FORM_addrx2:
return 2;
case DW_FORM_data4:
case DW_FORM_ref4:
+ case DW_FORM_ref_sup4:
+ case DW_FORM_strx4:
+ case DW_FORM_addrx4:
return 4;
case DW_FORM_strp:
@@ -136,7 +151,6 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
case DW_FORM_line_strp:
case DW_FORM_sec_offset:
case DW_FORM_strp_sup:
- case DW_FORM_ref_sup:
if (U)
return U->getDwarfOffsetByteSize();
return None;
@@ -144,6 +158,7 @@ static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
case DW_FORM_data8:
case DW_FORM_ref8:
case DW_FORM_ref_sig8:
+ case DW_FORM_ref_sup8:
return 8;
case DW_FORM_flag_present:
@@ -211,7 +226,14 @@ static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData,
case DW_FORM_ref4:
case DW_FORM_ref8:
case DW_FORM_ref_sig8:
- case DW_FORM_ref_sup:
+ case DW_FORM_ref_sup4:
+ case DW_FORM_ref_sup8:
+ case DW_FORM_strx1:
+ case DW_FORM_strx2:
+ case DW_FORM_strx4:
+ case DW_FORM_addrx1:
+ case DW_FORM_addrx2:
+ case DW_FORM_addrx4:
case DW_FORM_sec_offset:
case DW_FORM_strp:
case DW_FORM_strp_sup:
@@ -339,14 +361,21 @@ bool DWARFFormValue::extractValue(const DataExtractor &data,
case DW_FORM_data1:
case DW_FORM_ref1:
case DW_FORM_flag:
+ case DW_FORM_strx1:
+ case DW_FORM_addrx1:
Value.uval = data.getU8(offset_ptr);
break;
case DW_FORM_data2:
case DW_FORM_ref2:
+ case DW_FORM_strx2:
+ case DW_FORM_addrx2:
Value.uval = data.getU16(offset_ptr);
break;
case DW_FORM_data4:
- case DW_FORM_ref4: {
+ case DW_FORM_ref4:
+ case DW_FORM_ref_sup4:
+ case DW_FORM_strx4:
+ case DW_FORM_addrx4: {
Value.uval = data.getU32(offset_ptr);
if (!U)
break;
@@ -357,6 +386,7 @@ bool DWARFFormValue::extractValue(const DataExtractor &data,
}
case DW_FORM_data8:
case DW_FORM_ref8:
+ case DW_FORM_ref_sup8:
Value.uval = data.getU64(offset_ptr);
break;
case DW_FORM_sdata:
@@ -378,8 +408,7 @@ bool DWARFFormValue::extractValue(const DataExtractor &data,
case DW_FORM_GNU_ref_alt:
case DW_FORM_GNU_strp_alt:
case DW_FORM_line_strp:
- case DW_FORM_strp_sup:
- case DW_FORM_ref_sup: {
+ case DW_FORM_strp_sup: {
if (!U)
return false;
RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr);
@@ -400,7 +429,9 @@ bool DWARFFormValue::extractValue(const DataExtractor &data,
Value.uval = data.getULEB128(offset_ptr);
break;
default:
- return false;
+ // DWARFFormValue::skipValue() will have caught this and caused all
+ // DWARF DIEs to fail to be parsed, so this code is not be reachable.
+ llvm_unreachable("unsupported form");
}
} while (indirect);
@@ -495,21 +526,18 @@ DWARFFormValue::dump(raw_ostream &OS) const {
case DW_FORM_sdata: OS << Value.sval; break;
case DW_FORM_udata: OS << Value.uval; break;
- case DW_FORM_strp: {
+ case DW_FORM_strp:
OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue);
dumpString(OS);
break;
- }
- case DW_FORM_GNU_str_index: {
+ case DW_FORM_GNU_str_index:
OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue);
dumpString(OS);
break;
- }
- case DW_FORM_GNU_strp_alt: {
+ case DW_FORM_GNU_strp_alt:
OS << format("alt indirect string, offset: 0x%" PRIx64 "", uvalue);
dumpString(OS);
break;
- }
case DW_FORM_ref_addr:
OS << format("0x%016" PRIx64, uvalue);
break;
@@ -674,4 +702,3 @@ Optional<uint64_t> DWARFFormValue::getAsReferenceUVal() const {
return None;
return Value.uval;
}
-
diff --git a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index ebb996162f1b..76354a9b1ddb 100644
--- a/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFGdbIndex.cpp -------------------------------------------------===//
+//===- DWARFGdbIndex.cpp --------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,16 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+#include <utility>
using namespace llvm;
diff --git a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index 88fb20381f95..e0f819383289 100644
--- a/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFTypeUnit.cpp -------------------------------------------------===//
+//===- DWARFTypeUnit.cpp --------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,11 +7,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
-#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cinttypes>
using namespace llvm;
@@ -26,22 +29,24 @@ bool DWARFTypeUnit::extractImpl(DataExtractor debug_info,
void DWARFTypeUnit::dump(raw_ostream &OS, bool SummarizeTypes) {
DWARFDie TD = getDIEForOffset(TypeOffset + getOffset());
- const char *Name = TD.getAttributeValueAsString(llvm::dwarf::DW_AT_name, "");
+ const char *Name = TD.getName(DINameKind::ShortName);
if (SummarizeTypes) {
OS << "name = '" << Name << "'"
- << " type_signature = " << format("0x%16" PRIx64, TypeHash)
+ << " type_signature = " << format("0x%016" PRIx64, TypeHash)
<< " length = " << format("0x%08x", getLength()) << '\n';
return;
}
OS << format("0x%08x", getOffset()) << ": Type Unit:"
<< " length = " << format("0x%08x", getLength())
- << " version = " << format("0x%04x", getVersion())
- << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
+ << " version = " << format("0x%04x", getVersion());
+ if (getVersion() >= 5)
+ OS << " unit_type = " << dwarf::UnitTypeString(getUnitType());
+ OS << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
<< " addr_size = " << format("0x%02x", getAddressByteSize())
<< " name = '" << Name << "'"
- << " type_signature = " << format("0x%16" PRIx64, TypeHash)
+ << " type_signature = " << format("0x%016" PRIx64, TypeHash)
<< " type_offset = " << format("0x%04x", TypeOffset)
<< " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n";
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index ee2c569b0bce..4ee8e8f46d2e 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -13,6 +13,9 @@
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
@@ -20,12 +23,12 @@
#include "llvm/Support/Path.h"
#include <algorithm>
#include <cassert>
+#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <vector>
-namespace llvm {
-
+using namespace llvm;
using namespace dwarf;
void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
@@ -87,7 +90,15 @@ bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
Length = debug_info.getU32(offset_ptr);
Version = debug_info.getU16(offset_ptr);
- uint64_t AbbrOffset = debug_info.getU32(offset_ptr);
+ uint64_t AbbrOffset;
+ if (Version >= 5) {
+ UnitType = debug_info.getU8(offset_ptr);
+ AddrSize = debug_info.getU8(offset_ptr);
+ AbbrOffset = debug_info.getU32(offset_ptr);
+ } else {
+ AbbrOffset = debug_info.getU32(offset_ptr);
+ AddrSize = debug_info.getU8(offset_ptr);
+ }
if (IndexEntry) {
if (AbbrOffset)
return false;
@@ -99,7 +110,6 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
return false;
AbbrOffset = AbbrEntry->Offset;
}
- AddrSize = debug_info.getU8(offset_ptr);
bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
bool VersionOK = DWARFContext::isSupportedVersion(Version);
@@ -151,11 +161,11 @@ void DWARFUnit::clear() {
}
const char *DWARFUnit::getCompilationDir() {
- return getUnitDIE().getAttributeValueAsString(DW_AT_comp_dir, nullptr);
+ return dwarf::toString(getUnitDIE().find(DW_AT_comp_dir), nullptr);
}
Optional<uint64_t> DWARFUnit::getDWOId() {
- return getUnitDIE().getAttributeValueAsUnsignedConstant(DW_AT_GNU_dwo_id);
+ return toUnsigned(getUnitDIE().find(DW_AT_GNU_dwo_id));
}
void DWARFUnit::extractDIEsToVector(
@@ -225,17 +235,11 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
// If CU DIE was just parsed, copy several attribute values from it.
if (!HasCUDie) {
DWARFDie UnitDie = getUnitDIE();
- auto BaseAddr = UnitDie.getAttributeValueAsAddress(DW_AT_low_pc);
- if (!BaseAddr)
- BaseAddr = UnitDie.getAttributeValueAsAddress(DW_AT_entry_pc);
+ auto BaseAddr = toAddress(UnitDie.find({DW_AT_low_pc, DW_AT_entry_pc}));
if (BaseAddr)
setBaseAddress(*BaseAddr);
- AddrOffsetSectionBase =
- UnitDie.getAttributeValueAsSectionOffset(DW_AT_GNU_addr_base)
- .getValueOr(0);
- RangeSectionBase =
- UnitDie.getAttributeValueAsSectionOffset(DW_AT_rnglists_base)
- .getValueOr(0);
+ AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base), 0);
+ RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
// skeleton CU DIE, so that DWARF users not aware of it are not broken.
}
@@ -243,8 +247,7 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
return DieArray.size();
}
-DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath)
- : DWOU(nullptr) {
+DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath) {
auto Obj = object::ObjectFile::createObjectFile(DWOPath);
if (!Obj) {
// TODO: Actually report errors helpfully.
@@ -266,17 +269,16 @@ bool DWARFUnit::parseDWO() {
DWARFDie UnitDie = getUnitDIE();
if (!UnitDie)
return false;
- const char *DWOFileName =
- UnitDie.getAttributeValueAsString(DW_AT_GNU_dwo_name, nullptr);
+ auto DWOFileName = dwarf::toString(UnitDie.find(DW_AT_GNU_dwo_name));
if (!DWOFileName)
return false;
- const char *CompilationDir =
- UnitDie.getAttributeValueAsString(DW_AT_comp_dir, nullptr);
+ auto CompilationDir = dwarf::toString(UnitDie.find(DW_AT_comp_dir));
SmallString<16> AbsolutePath;
- if (sys::path::is_relative(DWOFileName) && CompilationDir != nullptr) {
- sys::path::append(AbsolutePath, CompilationDir);
+ if (sys::path::is_relative(*DWOFileName) && CompilationDir &&
+ *CompilationDir) {
+ sys::path::append(AbsolutePath, *CompilationDir);
}
- sys::path::append(AbsolutePath, DWOFileName);
+ sys::path::append(AbsolutePath, *DWOFileName);
DWO = llvm::make_unique<DWOHolder>(AbsolutePath);
DWARFUnit *DWOCU = DWO->getUnit();
// Verify that compile unit in .dwo file is valid.
@@ -374,8 +376,8 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address,
InlinedChain.clear();
}
-const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
- DWARFSectionKind Kind) {
+const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context,
+ DWARFSectionKind Kind) {
if (Kind == DW_SECT_INFO)
return Context.getCUIndex();
assert(Kind == DW_SECT_TYPES);
@@ -413,11 +415,10 @@ DWARFDie DWARFUnit::getSibling(const DWARFDebugInfoEntry *Die) {
return DWARFDie();
// Find the next DIE whose depth is the same as the Die's depth.
- for (size_t I=getDIEIndex(Die)+1, EndIdx = DieArray.size(); I<EndIdx; ++I) {
+ for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx;
+ ++I) {
if (DieArray[I].getDepth() == Depth)
return DWARFDie(this, &DieArray[I]);
}
return DWARFDie();
}
-
-} // end namespace llvm
diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
index 96b316957dfd..0981a4dfdfa5 100644
--- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -1,4 +1,4 @@
-//===-- DWARFUnitIndex.cpp ------------------------------------------------===//
+//===- DWARFUnitIndex.cpp -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
-
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cinttypes>
+#include <cstdint>
-namespace llvm {
+using namespace llvm;
bool DWARFUnitIndex::Header::parse(DataExtractor IndexData,
uint32_t *OffsetPtr) {
@@ -152,6 +156,7 @@ DWARFUnitIndex::Entry::getOffset(DWARFSectionKind Sec) const {
return &Contributions[i];
return nullptr;
}
+
const DWARFUnitIndex::Entry::SectionContribution *
DWARFUnitIndex::Entry::getOffset() const {
return &Contributions[Index->InfoColumn];
@@ -165,4 +170,3 @@ DWARFUnitIndex::getFromOffset(uint32_t Offset) const {
return &Rows[i];
return nullptr;
}
-}
diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
index 4f561d062b12..d4f44e446954 100644
--- a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
+++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
@@ -1,4 +1,4 @@
-//===-- SyntaxHighlighting.cpp ----------------------------------*- C++ -*-===//
+//===- SyntaxHighlighting.cpp ---------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,6 +9,8 @@
#include "SyntaxHighlighting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
using namespace dwarf;
using namespace syntax;
@@ -18,16 +20,16 @@ static cl::opt<cl::boolOrDefault>
cl::desc("use colored syntax highlighting (default=autodetect)"),
cl::init(cl::BOU_UNSET));
-WithColor::WithColor(llvm::raw_ostream &OS, enum HighlightColor Type) : OS(OS) {
+WithColor::WithColor(raw_ostream &OS, enum HighlightColor Type) : OS(OS) {
// Detect color from terminal type unless the user passed the --color option.
if (UseColor == cl::BOU_UNSET ? OS.has_colors() : UseColor == cl::BOU_TRUE) {
switch (Type) {
- case Address: OS.changeColor(llvm::raw_ostream::YELLOW); break;
- case String: OS.changeColor(llvm::raw_ostream::GREEN); break;
- case Tag: OS.changeColor(llvm::raw_ostream::BLUE); break;
- case Attribute: OS.changeColor(llvm::raw_ostream::CYAN); break;
- case Enumerator: OS.changeColor(llvm::raw_ostream::MAGENTA); break;
- case Macro: OS.changeColor(llvm::raw_ostream::RED); break;
+ case Address: OS.changeColor(raw_ostream::YELLOW); break;
+ case String: OS.changeColor(raw_ostream::GREEN); break;
+ case Tag: OS.changeColor(raw_ostream::BLUE); break;
+ case Attribute: OS.changeColor(raw_ostream::CYAN); break;
+ case Enumerator: OS.changeColor(raw_ostream::MAGENTA); break;
+ case Macro: OS.changeColor(raw_ostream::RED); break;
}
}
}
diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.h b/lib/DebugInfo/DWARF/SyntaxHighlighting.h
index 16e68351d5e1..277de973dbf0 100644
--- a/lib/DebugInfo/DWARF/SyntaxHighlighting.h
+++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.h
@@ -1,4 +1,4 @@
-//===-- SyntaxHighlighting.h ------------------------------------*- C++ -*-===//
+//===- SyntaxHighlighting.h -------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,9 +10,10 @@
#ifndef LLVM_LIB_DEBUGINFO_SYNTAXHIGHLIGHTING_H
#define LLVM_LIB_DEBUGINFO_SYNTAXHIGHLIGHTING_H
-#include "llvm/Support/raw_ostream.h"
-
namespace llvm {
+
+class raw_ostream;
+
namespace dwarf {
namespace syntax {
@@ -22,18 +23,20 @@ enum HighlightColor { Address, String, Tag, Attribute, Enumerator, Macro };
/// An RAII object that temporarily switches an output stream to a
/// specific color.
class WithColor {
- llvm::raw_ostream &OS;
+ raw_ostream &OS;
public:
/// To be used like this: WithColor(OS, syntax::String) << "text";
- WithColor(llvm::raw_ostream &OS, enum HighlightColor Type);
+ WithColor(raw_ostream &OS, enum HighlightColor Type);
~WithColor();
- llvm::raw_ostream& get() { return OS; }
- operator llvm::raw_ostream& () { return OS; }
+ raw_ostream& get() { return OS; }
+ operator raw_ostream& () { return OS; }
};
-}
-}
-}
-#endif
+} // end namespace syntax
+} // end namespace dwarf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_DEBUGINFO_SYNTAXHIGHLIGHTING_H
diff --git a/lib/DebugInfo/MSF/CMakeLists.txt b/lib/DebugInfo/MSF/CMakeLists.txt
index dcb2a8e0cc9c..6f38de336ee0 100644
--- a/lib/DebugInfo/MSF/CMakeLists.txt
+++ b/lib/DebugInfo/MSF/CMakeLists.txt
@@ -3,8 +3,6 @@ add_llvm_library(LLVMDebugInfoMSF
MSFBuilder.cpp
MSFCommon.cpp
MSFError.cpp
- StreamReader.cpp
- StreamWriter.cpp
ADDITIONAL_HEADER_DIRS
"${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/MSF"
)
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index e52c88a5bfb8..57953cfa338e 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -11,8 +11,8 @@
#include "llvm/DebugInfo/MSF/IMSFFile.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
-#include "llvm/DebugInfo/MSF/MSFError.h"
#include "llvm/DebugInfo/MSF/MSFStreamLayout.h"
+#include "llvm/Support/BinaryStreamError.h"
using namespace llvm;
using namespace llvm::msf;
@@ -47,22 +47,20 @@ static Interval intersect(const Interval &I1, const Interval &I2) {
MappedBlockStream::MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks,
const MSFStreamLayout &Layout,
- const ReadableStream &MsfData)
+ BinaryStreamRef MsfData)
: BlockSize(BlockSize), NumBlocks(NumBlocks), StreamLayout(Layout),
MsfData(MsfData) {}
std::unique_ptr<MappedBlockStream>
MappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks,
const MSFStreamLayout &Layout,
- const ReadableStream &MsfData) {
+ BinaryStreamRef MsfData) {
return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
BlockSize, NumBlocks, Layout, MsfData);
}
-std::unique_ptr<MappedBlockStream>
-MappedBlockStream::createIndexedStream(const MSFLayout &Layout,
- const ReadableStream &MsfData,
- uint32_t StreamIndex) {
+std::unique_ptr<MappedBlockStream> MappedBlockStream::createIndexedStream(
+ const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex) {
assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
MSFStreamLayout SL;
SL.Blocks = Layout.StreamMap[StreamIndex];
@@ -73,7 +71,7 @@ MappedBlockStream::createIndexedStream(const MSFLayout &Layout,
std::unique_ptr<MappedBlockStream>
MappedBlockStream::createDirectoryStream(const MSFLayout &Layout,
- const ReadableStream &MsfData) {
+ BinaryStreamRef MsfData) {
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
@@ -82,19 +80,17 @@ MappedBlockStream::createDirectoryStream(const MSFLayout &Layout,
std::unique_ptr<MappedBlockStream>
MappedBlockStream::createFpmStream(const MSFLayout &Layout,
- const ReadableStream &MsfData) {
+ BinaryStreamRef MsfData) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
}
Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
- ArrayRef<uint8_t> &Buffer) const {
+ ArrayRef<uint8_t> &Buffer) {
// Make sure we aren't trying to read beyond the end of the stream.
- if (Size > StreamLayout.Length)
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
- if (Offset > StreamLayout.Length - Size)
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
+ if (auto EC = checkOffset(Offset, Size))
+ return EC;
if (tryReadContiguously(Offset, Size, Buffer))
return Error::success();
@@ -168,11 +164,12 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
return Error::success();
}
-Error MappedBlockStream::readLongestContiguousChunk(
- uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
+Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
+ ArrayRef<uint8_t> &Buffer) {
// Make sure we aren't trying to read beyond the end of the stream.
- if (Offset >= StreamLayout.Length)
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
+ if (auto EC = checkOffset(Offset, 1))
+ return EC;
+
uint32_t First = Offset / BlockSize;
uint32_t Last = First;
@@ -197,10 +194,10 @@ Error MappedBlockStream::readLongestContiguousChunk(
return Error::success();
}
-uint32_t MappedBlockStream::getLength() const { return StreamLayout.Length; }
+uint32_t MappedBlockStream::getLength() { return StreamLayout.Length; }
bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
- ArrayRef<uint8_t> &Buffer) const {
+ ArrayRef<uint8_t> &Buffer) {
if (Size == 0) {
Buffer = ArrayRef<uint8_t>();
return true;
@@ -241,15 +238,13 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
}
Error MappedBlockStream::readBytes(uint32_t Offset,
- MutableArrayRef<uint8_t> Buffer) const {
+ MutableArrayRef<uint8_t> Buffer) {
uint32_t BlockNum = Offset / BlockSize;
uint32_t OffsetInBlock = Offset % BlockSize;
// Make sure we aren't trying to read beyond the end of the stream.
- if (Buffer.size() > StreamLayout.Length)
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
- if (Offset > StreamLayout.Length - Buffer.size())
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
+ if (auto EC = checkOffset(Offset, Buffer.size()))
+ return EC;
uint32_t BytesLeft = Buffer.size();
uint32_t BytesWritten = 0;
@@ -319,21 +314,21 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
WritableMappedBlockStream::WritableMappedBlockStream(
uint32_t BlockSize, uint32_t NumBlocks, const MSFStreamLayout &Layout,
- const WritableStream &MsfData)
+ WritableBinaryStreamRef MsfData)
: ReadInterface(BlockSize, NumBlocks, Layout, MsfData),
WriteInterface(MsfData) {}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks,
const MSFStreamLayout &Layout,
- const WritableStream &MsfData) {
+ WritableBinaryStreamRef MsfData) {
return llvm::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>(
BlockSize, NumBlocks, Layout, MsfData);
}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout,
- const WritableStream &MsfData,
+ WritableBinaryStreamRef MsfData,
uint32_t StreamIndex) {
assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
MSFStreamLayout SL;
@@ -344,7 +339,7 @@ WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout,
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createDirectoryStream(
- const MSFLayout &Layout, const WritableStream &MsfData) {
+ const MSFLayout &Layout, WritableBinaryStreamRef MsfData) {
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
@@ -353,34 +348,31 @@ WritableMappedBlockStream::createDirectoryStream(
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout,
- const WritableStream &MsfData) {
+ WritableBinaryStreamRef MsfData) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
}
Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
- ArrayRef<uint8_t> &Buffer) const {
+ ArrayRef<uint8_t> &Buffer) {
return ReadInterface.readBytes(Offset, Size, Buffer);
}
Error WritableMappedBlockStream::readLongestContiguousChunk(
- uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
+ uint32_t Offset, ArrayRef<uint8_t> &Buffer) {
return ReadInterface.readLongestContiguousChunk(Offset, Buffer);
}
-uint32_t WritableMappedBlockStream::getLength() const {
+uint32_t WritableMappedBlockStream::getLength() {
return ReadInterface.getLength();
}
Error WritableMappedBlockStream::writeBytes(uint32_t Offset,
- ArrayRef<uint8_t> Buffer) const {
+ ArrayRef<uint8_t> Buffer) {
// Make sure we aren't trying to write beyond the end of the stream.
- if (Buffer.size() > getStreamLength())
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
-
- if (Offset > getStreamLayout().Length - Buffer.size())
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
+ if (auto EC = checkOffset(Offset, Buffer.size()))
+ return EC;
uint32_t BlockNum = Offset / getBlockSize();
uint32_t OffsetInBlock = Offset % getBlockSize();
@@ -410,6 +402,4 @@ Error WritableMappedBlockStream::writeBytes(uint32_t Offset,
return Error::success();
}
-Error WritableMappedBlockStream::commit() const {
- return WriteInterface.commit();
-}
+Error WritableMappedBlockStream::commit() { return WriteInterface.commit(); }
diff --git a/lib/DebugInfo/MSF/StreamReader.cpp b/lib/DebugInfo/MSF/StreamReader.cpp
deleted file mode 100644
index b85fd14a3b7f..000000000000
--- a/lib/DebugInfo/MSF/StreamReader.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===- StreamReader.cpp - Reads bytes and objects from a stream -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-
-#include "llvm/DebugInfo/MSF/MSFError.h"
-#include "llvm/DebugInfo/MSF/StreamRef.h"
-
-using namespace llvm;
-using namespace llvm::msf;
-
-StreamReader::StreamReader(ReadableStreamRef S) : Stream(S), Offset(0) {}
-
-Error StreamReader::readLongestContiguousChunk(ArrayRef<uint8_t> &Buffer) {
- if (auto EC = Stream.readLongestContiguousChunk(Offset, Buffer))
- return EC;
- Offset += Buffer.size();
- return Error::success();
-}
-
-Error StreamReader::readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size) {
- if (auto EC = Stream.readBytes(Offset, Size, Buffer))
- return EC;
- Offset += Size;
- return Error::success();
-}
-
-Error StreamReader::readInteger(uint8_t &Dest) {
- const uint8_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readInteger(uint16_t &Dest) {
- const support::ulittle16_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readInteger(uint32_t &Dest) {
- const support::ulittle32_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readInteger(uint64_t &Dest) {
- const support::ulittle64_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readInteger(int8_t &Dest) {
- const int8_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readInteger(int16_t &Dest) {
- const support::little16_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readInteger(int32_t &Dest) {
- const support::little32_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readInteger(int64_t &Dest) {
- const support::little64_t *P;
- if (auto EC = readObject(P))
- return EC;
- Dest = *P;
- return Error::success();
-}
-
-Error StreamReader::readZeroString(StringRef &Dest) {
- uint32_t Length = 0;
- // First compute the length of the string by reading 1 byte at a time.
- uint32_t OriginalOffset = getOffset();
- const char *C;
- do {
- if (auto EC = readObject(C))
- return EC;
- if (*C != '\0')
- ++Length;
- } while (*C != '\0');
- // Now go back and request a reference for that many bytes.
- uint32_t NewOffset = getOffset();
- setOffset(OriginalOffset);
-
- ArrayRef<uint8_t> Data;
- if (auto EC = readBytes(Data, Length))
- return EC;
- Dest = StringRef(reinterpret_cast<const char *>(Data.begin()), Data.size());
-
- // Now set the offset back to where it was after we calculated the length.
- setOffset(NewOffset);
- return Error::success();
-}
-
-Error StreamReader::readFixedString(StringRef &Dest, uint32_t Length) {
- ArrayRef<uint8_t> Bytes;
- if (auto EC = readBytes(Bytes, Length))
- return EC;
- Dest = StringRef(reinterpret_cast<const char *>(Bytes.begin()), Bytes.size());
- return Error::success();
-}
-
-Error StreamReader::readStreamRef(ReadableStreamRef &Ref) {
- return readStreamRef(Ref, bytesRemaining());
-}
-
-Error StreamReader::readStreamRef(ReadableStreamRef &Ref, uint32_t Length) {
- if (bytesRemaining() < Length)
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
- Ref = Stream.slice(Offset, Length);
- Offset += Length;
- return Error::success();
-}
-
-Error StreamReader::skip(uint32_t Amount) {
- if (Amount > bytesRemaining())
- return make_error<MSFError>(msf_error_code::insufficient_buffer);
- Offset += Amount;
- return Error::success();
-}
-
-uint8_t StreamReader::peek() const {
- ArrayRef<uint8_t> Buffer;
- auto EC = Stream.readBytes(Offset, 1, Buffer);
- assert(!EC && "Cannot peek an empty buffer!");
- llvm::consumeError(std::move(EC));
- return Buffer[0];
-}
diff --git a/lib/DebugInfo/MSF/StreamWriter.cpp b/lib/DebugInfo/MSF/StreamWriter.cpp
deleted file mode 100644
index cdae7c5acc04..000000000000
--- a/lib/DebugInfo/MSF/StreamWriter.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-//===- StreamWrite.cpp - Writes bytes and objects to a stream -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
-
-#include "llvm/DebugInfo/MSF/MSFError.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/MSF/StreamRef.h"
-
-using namespace llvm;
-using namespace llvm::msf;
-
-StreamWriter::StreamWriter(WritableStreamRef S) : Stream(S), Offset(0) {}
-
-Error StreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
- if (auto EC = Stream.writeBytes(Offset, Buffer))
- return EC;
- Offset += Buffer.size();
- return Error::success();
-}
-
-Error StreamWriter::writeInteger(uint8_t Int) { return writeObject(Int); }
-
-Error StreamWriter::writeInteger(uint16_t Int) {
- return writeObject(support::ulittle16_t(Int));
-}
-
-Error StreamWriter::writeInteger(uint32_t Int) {
- return writeObject(support::ulittle32_t(Int));
-}
-
-Error StreamWriter::writeInteger(uint64_t Int) {
- return writeObject(support::ulittle64_t(Int));
-}
-
-Error StreamWriter::writeInteger(int8_t Int) { return writeObject(Int); }
-
-Error StreamWriter::writeInteger(int16_t Int) {
- return writeObject(support::little16_t(Int));
-}
-
-Error StreamWriter::writeInteger(int32_t Int) {
- return writeObject(support::little32_t(Int));
-}
-
-Error StreamWriter::writeInteger(int64_t Int) {
- return writeObject(support::little64_t(Int));
-}
-
-Error StreamWriter::writeZeroString(StringRef Str) {
- if (auto EC = writeFixedString(Str))
- return EC;
- if (auto EC = writeObject('\0'))
- return EC;
-
- return Error::success();
-}
-
-Error StreamWriter::writeFixedString(StringRef Str) {
- ArrayRef<uint8_t> Bytes(Str.bytes_begin(), Str.bytes_end());
- if (auto EC = Stream.writeBytes(Offset, Bytes))
- return EC;
-
- Offset += Str.size();
- return Error::success();
-}
-
-Error StreamWriter::writeStreamRef(ReadableStreamRef Ref) {
- if (auto EC = writeStreamRef(Ref, Ref.getLength()))
- return EC;
- // Don't increment Offset here, it is done by the overloaded call to
- // writeStreamRef.
- return Error::success();
-}
-
-Error StreamWriter::writeStreamRef(ReadableStreamRef Ref, uint32_t Length) {
- Ref = Ref.slice(0, Length);
-
- StreamReader SrcReader(Ref);
- // This is a bit tricky. If we just call readBytes, we are requiring that it
- // return us the entire stream as a contiguous buffer. For large streams this
- // will allocate a huge amount of space from the pool. Instead, iterate over
- // each contiguous chunk until we've consumed the entire stream.
- while (SrcReader.bytesRemaining() > 0) {
- ArrayRef<uint8_t> Chunk;
- if (auto EC = SrcReader.readLongestContiguousChunk(Chunk))
- return EC;
- if (auto EC = writeBytes(Chunk))
- return EC;
- }
- return Error::success();
-}
diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt
index 599f01eaf74f..f87a0b0a72e2 100644
--- a/lib/DebugInfo/PDB/CMakeLists.txt
+++ b/lib/DebugInfo/PDB/CMakeLists.txt
@@ -27,31 +27,38 @@ if(LLVM_ENABLE_DIA_SDK)
set(LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB/DIA")
endif()
-add_pdb_impl_folder(Raw
- Raw/DbiStream.cpp
- Raw/DbiStreamBuilder.cpp
- Raw/EnumTables.cpp
- Raw/GlobalsStream.cpp
- Raw/GSI.cpp
- Raw/Hash.cpp
- Raw/InfoStream.cpp
- Raw/InfoStreamBuilder.cpp
- Raw/ModInfo.cpp
- Raw/ModStream.cpp
- Raw/NameHashTable.cpp
- Raw/NameMap.cpp
- Raw/NameMapBuilder.cpp
- Raw/PDBFile.cpp
- Raw/PDBFileBuilder.cpp
- Raw/PublicsStream.cpp
- Raw/RawError.cpp
- Raw/RawSession.cpp
- Raw/SymbolStream.cpp
- Raw/TpiHashing.cpp
- Raw/TpiStream.cpp
- Raw/TpiStreamBuilder.cpp)
+add_pdb_impl_folder(Native
+ Native/DbiStream.cpp
+ Native/DbiStreamBuilder.cpp
+ Native/EnumTables.cpp
+ Native/GlobalsStream.cpp
+ Native/GSI.cpp
+ Native/Hash.cpp
+ Native/HashTable.cpp
+ Native/InfoStream.cpp
+ Native/InfoStreamBuilder.cpp
+ Native/ModInfo.cpp
+ Native/ModInfoBuilder.cpp
+ Native/ModStream.cpp
+ Native/NativeCompilandSymbol.cpp
+ Native/NativeEnumModules.cpp
+ Native/NativeExeSymbol.cpp
+ Native/NativeRawSymbol.cpp
+ Native/NamedStreamMap.cpp
+ Native/NativeSession.cpp
+ Native/PDBFile.cpp
+ Native/PDBFileBuilder.cpp
+ Native/PDBTypeServerHandler.cpp
+ Native/PublicsStream.cpp
+ Native/RawError.cpp
+ Native/StringTable.cpp
+ Native/StringTableBuilder.cpp
+ Native/SymbolStream.cpp
+ Native/TpiHashing.cpp
+ Native/TpiStream.cpp
+ Native/TpiStreamBuilder.cpp)
-list(APPEND LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB/Raw")
+list(APPEND LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB/Native")
list(APPEND LIBPDB_ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/PDB")
add_llvm_library(LLVMDebugInfoPDB
@@ -94,6 +101,7 @@ add_llvm_library(LLVMDebugInfoPDB
PDBSymbolUnknown.cpp
PDBSymbolUsingNamespace.cpp
PDBSymDumper.cpp
+ UDTLayout.cpp
${PDB_IMPL_SOURCES}
ADDITIONAL_HEADER_DIRS
diff --git a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
index bba5b0f94dca..5e8c0bdc171d 100644
--- a/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
+++ b/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp
@@ -10,9 +10,13 @@
#include "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/Formatters.h"
#include "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/raw_ostream.h"
@@ -178,9 +182,10 @@ void DumpDIAValue(llvm::raw_ostream &OS, int Indent, StringRef Name,
}
namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const GUID &Guid) {
- const PDB_UniqueId *Id = reinterpret_cast<const PDB_UniqueId *>(&Guid);
- OS << *Id;
+llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GUID &G) {
+ StringRef GuidBytes(reinterpret_cast<const char *>(&G), sizeof(G));
+ codeview::detail::GuidAdapter A(GuidBytes);
+ A.format(OS, "");
return OS;
}
}
@@ -715,6 +720,18 @@ uint32_t DIARawSymbol::getVirtualTableShapeId() const {
return PrivateGetDIAValue(Symbol, &IDiaSymbol::get_virtualTableShapeId);
}
+std::unique_ptr<PDBSymbolTypeVTable>
+DIARawSymbol::getVirtualBaseTableType() const {
+ CComPtr<IDiaSymbol> TableType;
+ if (FAILED(Symbol->get_virtualBaseTableType(&TableType)) || !TableType)
+ return nullptr;
+
+ auto RawVT = llvm::make_unique<DIARawSymbol>(Session, TableType);
+ auto Pointer =
+ llvm::make_unique<PDBSymbolTypePointer>(Session, std::move(RawVT));
+ return unique_dyn_cast<PDBSymbolTypeVTable>(Pointer->getPointeeType());
+}
+
PDB_DataKind DIARawSymbol::getDataKind() const {
return PrivateGetDIAValue<DWORD, PDB_DataKind>(Symbol,
&IDiaSymbol::get_dataKind);
diff --git a/lib/DebugInfo/PDB/Raw/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 4f4a0cf65785..b9f53578d326 100644
--- a/lib/DebugInfo/PDB/Raw/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -7,21 +7,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/MSF/StreamArray.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h"
-#include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
-#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstddef>
@@ -35,7 +34,7 @@ using namespace llvm::support;
template <typename ContribType>
static Error loadSectionContribs(FixedStreamArray<ContribType> &Output,
- StreamReader &Reader) {
+ BinaryStreamReader &Reader) {
if (Reader.bytesRemaining() % sizeof(ContribType) != 0)
return make_error<RawError>(
raw_error_code::corrupt_file,
@@ -48,13 +47,12 @@ static Error loadSectionContribs(FixedStreamArray<ContribType> &Output,
}
DbiStream::DbiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream)
- : Pdb(File), Stream(std::move(Stream)), Header(nullptr) {
-}
+ : Pdb(File), Stream(std::move(Stream)), Header(nullptr) {}
DbiStream::~DbiStream() = default;
Error DbiStream::reload() {
- StreamReader Reader(*Stream);
+ BinaryStreamReader Reader(*Stream);
if (Stream->getLength() < sizeof(DbiStreamHeader))
return make_error<RawError>(raw_error_code::corrupt_file,
@@ -127,8 +125,8 @@ Error DbiStream::reload() {
return EC;
if (auto EC = Reader.readStreamRef(ECSubstream, Header->ECSubstreamSize))
return EC;
- if (auto EC = Reader.readArray(DbgStreams, Header->OptionalDbgHdrSize /
- sizeof(ulittle16_t)))
+ if (auto EC = Reader.readArray(
+ DbgStreams, Header->OptionalDbgHdrSize / sizeof(ulittle16_t)))
return EC;
if (auto EC = initializeSectionContributionData())
@@ -147,7 +145,7 @@ Error DbiStream::reload() {
"Found unexpected bytes in DBI Stream.");
if (ECSubstream.getLength() > 0) {
- StreamReader ECReader(ECSubstream);
+ BinaryStreamReader ECReader(ECSubstream);
if (auto EC = ECNames.load(ECReader))
return EC;
}
@@ -209,16 +207,16 @@ PDB_Machine DbiStream::getMachineType() const {
return static_cast<PDB_Machine>(Machine);
}
-msf::FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() {
+FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() {
return SectionHeaders;
}
-msf::FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() {
+FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() {
return FpoRecords;
}
ArrayRef<ModuleInfoEx> DbiStream::modules() const { return ModuleInfos; }
-msf::FixedStreamArray<SecMapEntry> DbiStream::getSectionMap() const {
+FixedStreamArray<SecMapEntry> DbiStream::getSectionMap() const {
return SectionMap;
}
@@ -237,7 +235,7 @@ Error DbiStream::initializeSectionContributionData() {
if (SecContrSubstream.getLength() == 0)
return Error::success();
- StreamReader SCReader(SecContrSubstream);
+ BinaryStreamReader SCReader(SecContrSubstream);
if (auto EC = SCReader.readEnum(SectionContribVersion))
return EC;
@@ -256,7 +254,7 @@ Error DbiStream::initializeModInfoArray() {
// Since each ModInfo in the stream is a variable length, we have to iterate
// them to know how many there actually are.
- StreamReader Reader(ModInfoSubstream);
+ BinaryStreamReader Reader(ModInfoSubstream);
VarStreamArray<ModInfo> ModInfoArray;
if (auto EC = Reader.readArray(ModInfoArray, ModInfoSubstream.getLength()))
@@ -286,7 +284,7 @@ Error DbiStream::initializeSectionHeadersData() {
"Corrupted section header stream.");
size_t NumSections = StreamLen / sizeof(object::coff_section);
- msf::StreamReader Reader(*SHS);
+ BinaryStreamReader Reader(*SHS);
if (auto EC = Reader.readArray(SectionHeaders, NumSections))
return make_error<RawError>(raw_error_code::corrupt_file,
"Could not read a bitmap.");
@@ -318,7 +316,7 @@ Error DbiStream::initializeFpoRecords() {
"Corrupted New FPO stream.");
size_t NumRecords = StreamLen / sizeof(object::FpoData);
- msf::StreamReader Reader(*FS);
+ BinaryStreamReader Reader(*FS);
if (auto EC = Reader.readArray(FpoRecords, NumRecords))
return make_error<RawError>(raw_error_code::corrupt_file,
"Corrupted New FPO stream.");
@@ -330,7 +328,7 @@ Error DbiStream::initializeSectionMapData() {
if (SecMapSubstream.getLength() == 0)
return Error::success();
- StreamReader SMReader(SecMapSubstream);
+ BinaryStreamReader SMReader(SecMapSubstream);
const SecMapHeader *Header;
if (auto EC = SMReader.readObject(Header))
return EC;
@@ -344,7 +342,7 @@ Error DbiStream::initializeFileInfo() {
return Error::success();
const FileInfoSubstreamHeader *FH;
- StreamReader FISR(FileInfoSubstream);
+ BinaryStreamReader FISR(FileInfoSubstream);
if (auto EC = FISR.readObject(FH))
return EC;
@@ -413,14 +411,14 @@ uint32_t DbiStream::getDebugStreamIndex(DbgHeaderType Type) const {
}
Expected<StringRef> DbiStream::getFileNameForIndex(uint32_t Index) const {
- StreamReader Names(NamesBuffer);
+ BinaryStreamReader Names(NamesBuffer);
if (Index >= FileNameOffsets.size())
return make_error<RawError>(raw_error_code::index_out_of_bounds);
uint32_t FileOffset = FileNameOffsets[Index];
Names.setOffset(FileOffset);
StringRef Name;
- if (auto EC = Names.readZeroString(Name))
+ if (auto EC = Names.readCString(Name))
return std::move(EC);
return Name;
}
diff --git a/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 1d5b8d693b1e..a203aea60fe7 100644
--- a/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -7,15 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
-#include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/COFF.h"
using namespace llvm;
@@ -23,15 +24,13 @@ using namespace llvm::codeview;
using namespace llvm::msf;
using namespace llvm::pdb;
-namespace {
-class ModiSubstreamBuilder {};
-}
-
DbiStreamBuilder::DbiStreamBuilder(msf::MSFBuilder &Msf)
: Msf(Msf), Allocator(Msf.getAllocator()), Age(1), BuildNumber(0),
PdbDllVersion(0), PdbDllRbld(0), Flags(0), MachineType(PDB_Machine::x86),
Header(nullptr), DbgStreams((int)DbgHeaderType::Max) {}
+DbiStreamBuilder::~DbiStreamBuilder() {}
+
void DbiStreamBuilder::setVersionHeader(PdbRaw_DbiVer V) { VerHeader = V; }
void DbiStreamBuilder::setAge(uint32_t A) { Age = A; }
@@ -75,39 +74,37 @@ uint32_t DbiStreamBuilder::calculateSerializedLength() const {
calculateSectionMapStreamSize() + calculateDbgStreamsSize();
}
-Error DbiStreamBuilder::addModuleInfo(StringRef ObjFile, StringRef Module) {
- auto Entry = llvm::make_unique<ModuleInfo>();
- ModuleInfo *M = Entry.get();
- Entry->Mod = Module;
- Entry->Obj = ObjFile;
- auto Result = ModuleInfos.insert(std::make_pair(Module, std::move(Entry)));
+Expected<ModInfoBuilder &>
+DbiStreamBuilder::addModuleInfo(StringRef ModuleName) {
+ uint32_t Index = ModiList.size();
+ auto MIB = llvm::make_unique<ModInfoBuilder>(ModuleName, Index, Msf);
+ auto M = MIB.get();
+ auto Result = ModiMap.insert(std::make_pair(ModuleName, std::move(MIB)));
+
if (!Result.second)
return make_error<RawError>(raw_error_code::duplicate_entry,
"The specified module already exists");
- ModuleInfoList.push_back(M);
- return Error::success();
+ ModiList.push_back(M);
+ return *M;
}
Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) {
- auto ModIter = ModuleInfos.find(Module);
- if (ModIter == ModuleInfos.end())
+ auto ModIter = ModiMap.find(Module);
+ if (ModIter == ModiMap.end())
return make_error<RawError>(raw_error_code::no_entry,
"The specified module was not found");
uint32_t Index = SourceFileNames.size();
SourceFileNames.insert(std::make_pair(File, Index));
auto &ModEntry = *ModIter;
- ModEntry.second->SourceFiles.push_back(File);
+ ModEntry.second->addSourceFile(File);
return Error::success();
}
uint32_t DbiStreamBuilder::calculateModiSubstreamSize() const {
uint32_t Size = 0;
- for (const auto &M : ModuleInfoList) {
- Size += sizeof(ModuleInfoHeader);
- Size += M->Mod.size() + 1;
- Size += M->Obj.size() + 1;
- }
- return alignTo(Size, sizeof(uint32_t));
+ for (const auto &M : ModiList)
+ Size += M->calculateSerializedLength();
+ return Size;
}
uint32_t DbiStreamBuilder::calculateSectionContribsStreamSize() const {
@@ -127,11 +124,11 @@ uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const {
uint32_t Size = 0;
Size += sizeof(ulittle16_t); // NumModules
Size += sizeof(ulittle16_t); // NumSourceFiles
- Size += ModuleInfoList.size() * sizeof(ulittle16_t); // ModIndices
- Size += ModuleInfoList.size() * sizeof(ulittle16_t); // ModFileCounts
+ Size += ModiList.size() * sizeof(ulittle16_t); // ModIndices
+ Size += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts
uint32_t NumFileInfos = 0;
- for (const auto &M : ModuleInfoList)
- NumFileInfos += M->SourceFiles.size();
+ for (const auto &M : ModiList)
+ NumFileInfos += M->source_files().size();
Size += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets
Size += calculateNamesBufferSize();
return alignTo(Size, sizeof(uint32_t));
@@ -149,43 +146,20 @@ uint32_t DbiStreamBuilder::calculateDbgStreamsSize() const {
return DbgStreams.size() * sizeof(uint16_t);
}
-Error DbiStreamBuilder::generateModiSubstream() {
- uint32_t Size = calculateModiSubstreamSize();
- auto Data = Allocator.Allocate<uint8_t>(Size);
-
- ModInfoBuffer = MutableByteStream(MutableArrayRef<uint8_t>(Data, Size));
-
- StreamWriter ModiWriter(ModInfoBuffer);
- for (const auto &M : ModuleInfoList) {
- ModuleInfoHeader Layout = {};
- Layout.ModDiStream = kInvalidStreamIndex;
- Layout.NumFiles = M->SourceFiles.size();
- if (auto EC = ModiWriter.writeObject(Layout))
- return EC;
- if (auto EC = ModiWriter.writeZeroString(M->Mod))
- return EC;
- if (auto EC = ModiWriter.writeZeroString(M->Obj))
- return EC;
- }
- if (ModiWriter.bytesRemaining() > sizeof(uint32_t))
- return make_error<RawError>(raw_error_code::invalid_format,
- "Unexpected bytes in Modi Stream Data");
- return Error::success();
-}
-
Error DbiStreamBuilder::generateFileInfoSubstream() {
uint32_t Size = calculateFileInfoSubstreamSize();
uint32_t NameSize = calculateNamesBufferSize();
auto Data = Allocator.Allocate<uint8_t>(Size);
uint32_t NamesOffset = Size - NameSize;
- FileInfoBuffer = MutableByteStream(MutableArrayRef<uint8_t>(Data, Size));
+ FileInfoBuffer = MutableBinaryByteStream(MutableArrayRef<uint8_t>(Data, Size),
+ llvm::support::little);
- WritableStreamRef MetadataBuffer =
- WritableStreamRef(FileInfoBuffer).keep_front(NamesOffset);
- StreamWriter MetadataWriter(MetadataBuffer);
+ WritableBinaryStreamRef MetadataBuffer =
+ WritableBinaryStreamRef(FileInfoBuffer).keep_front(NamesOffset);
+ BinaryStreamWriter MetadataWriter(MetadataBuffer);
- uint16_t ModiCount = std::min<uint32_t>(UINT16_MAX, ModuleInfos.size());
+ uint16_t ModiCount = std::min<uint32_t>(UINT16_MAX, ModiList.size());
uint16_t FileCount = std::min<uint32_t>(UINT16_MAX, SourceFileNames.size());
if (auto EC = MetadataWriter.writeInteger(ModiCount)) // NumModules
return EC;
@@ -195,8 +169,8 @@ Error DbiStreamBuilder::generateFileInfoSubstream() {
if (auto EC = MetadataWriter.writeInteger(I)) // Mod Indices
return EC;
}
- for (const auto MI : ModuleInfoList) {
- FileCount = static_cast<uint16_t>(MI->SourceFiles.size());
+ for (const auto &MI : ModiList) {
+ FileCount = static_cast<uint16_t>(MI->source_files().size());
if (auto EC = MetadataWriter.writeInteger(FileCount)) // Mod File Counts
return EC;
}
@@ -205,16 +179,16 @@ Error DbiStreamBuilder::generateFileInfoSubstream() {
// A side effect of this is that this will actually compute the various
// file name offsets, so we can then go back and write the FileNameOffsets
// array to the other substream.
- NamesBuffer = WritableStreamRef(FileInfoBuffer).drop_front(NamesOffset);
- StreamWriter NameBufferWriter(NamesBuffer);
+ NamesBuffer = WritableBinaryStreamRef(FileInfoBuffer).drop_front(NamesOffset);
+ BinaryStreamWriter NameBufferWriter(NamesBuffer);
for (auto &Name : SourceFileNames) {
Name.second = NameBufferWriter.getOffset();
- if (auto EC = NameBufferWriter.writeZeroString(Name.getKey()))
+ if (auto EC = NameBufferWriter.writeCString(Name.getKey()))
return EC;
}
- for (const auto MI : ModuleInfoList) {
- for (StringRef Name : MI->SourceFiles) {
+ for (const auto &MI : ModiList) {
+ for (StringRef Name : MI->source_files()) {
auto Result = SourceFileNames.find(Name);
if (Result == SourceFileNames.end())
return make_error<RawError>(raw_error_code::no_entry,
@@ -240,13 +214,13 @@ Error DbiStreamBuilder::finalize() {
if (Header)
return Error::success();
- DbiStreamHeader *H = Allocator.Allocate<DbiStreamHeader>();
+ for (auto &MI : ModiList)
+ MI->finalize();
- if (auto EC = generateModiSubstream())
- return EC;
if (auto EC = generateFileInfoSubstream())
return EC;
+ DbiStreamHeader *H = Allocator.Allocate<DbiStreamHeader>();
H->VersionHeader = *VerHeader;
H->VersionSignature = -1;
H->Age = Age;
@@ -258,7 +232,7 @@ Error DbiStreamBuilder::finalize() {
H->ECSubstreamSize = 0;
H->FileInfoSize = FileInfoBuffer.getLength();
- H->ModiSubstreamSize = ModInfoBuffer.getLength();
+ H->ModiSubstreamSize = calculateModiSubstreamSize();
H->OptionalDbgHdrSize = DbgStreams.size() * sizeof(uint16_t);
H->SecContrSubstreamSize = calculateSectionContribsStreamSize();
H->SectionMapSize = calculateSectionMapStreamSize();
@@ -273,6 +247,11 @@ Error DbiStreamBuilder::finalize() {
}
Error DbiStreamBuilder::finalizeMsfLayout() {
+ for (auto &MI : ModiList) {
+ if (auto EC = MI->finalizeMsfLayout())
+ return EC;
+ }
+
uint32_t Length = calculateSerializedLength();
if (auto EC = Msf.setStreamSize(StreamDBI, Length))
return EC;
@@ -358,19 +337,21 @@ std::vector<SecMapEntry> DbiStreamBuilder::createSectionMap(
}
Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
- const msf::WritableStream &Buffer) {
+ WritableBinaryStreamRef MsfBuffer) {
if (auto EC = finalize())
return EC;
- auto InfoS =
- WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamDBI);
+ auto DbiS = WritableMappedBlockStream::createIndexedStream(Layout, MsfBuffer,
+ StreamDBI);
- StreamWriter Writer(*InfoS);
+ BinaryStreamWriter Writer(*DbiS);
if (auto EC = Writer.writeObject(*Header))
return EC;
- if (auto EC = Writer.writeStreamRef(ModInfoBuffer))
- return EC;
+ for (auto &M : ModiList) {
+ if (auto EC = M->commit(Writer, Layout, MsfBuffer))
+ return EC;
+ }
if (!SectionContribs.empty()) {
if (auto EC = Writer.writeEnum(DbiSecContribVer60))
@@ -399,8 +380,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (Stream.StreamNumber == kInvalidStreamIndex)
continue;
auto WritableStream = WritableMappedBlockStream::createIndexedStream(
- Layout, Buffer, Stream.StreamNumber);
- StreamWriter DbgStreamWriter(*WritableStream);
+ Layout, MsfBuffer, Stream.StreamNumber);
+ BinaryStreamWriter DbgStreamWriter(*WritableStream);
if (auto EC = DbgStreamWriter.writeArray(Stream.Data))
return EC;
}
diff --git a/lib/DebugInfo/PDB/Raw/EnumTables.cpp b/lib/DebugInfo/PDB/Native/EnumTables.cpp
index fc9270c69947..b3837dc72e5b 100644
--- a/lib/DebugInfo/PDB/Raw/EnumTables.cpp
+++ b/lib/DebugInfo/PDB/Native/EnumTables.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/EnumTables.h"
-#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/EnumTables.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
using namespace llvm;
using namespace llvm::pdb;
diff --git a/lib/DebugInfo/PDB/Raw/GSI.cpp b/lib/DebugInfo/PDB/Native/GSI.cpp
index 6ecbb5c8cfad..b219fe275f73 100644
--- a/lib/DebugInfo/PDB/Raw/GSI.cpp
+++ b/lib/DebugInfo/PDB/Native/GSI.cpp
@@ -9,10 +9,10 @@
#include "GSI.h"
-#include "llvm/DebugInfo/MSF/StreamArray.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
@@ -28,9 +28,9 @@ static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) {
return Error::success();
}
-Error readGSIHashBuckets(
- msf::FixedStreamArray<support::ulittle32_t> &HashBuckets,
- const GSIHashHeader *HashHdr, msf::StreamReader &Reader) {
+Error readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
+ const GSIHashHeader *HashHdr,
+ BinaryStreamReader &Reader) {
if (auto EC = checkHashHdrVersion(HashHdr))
return EC;
@@ -57,7 +57,7 @@ Error readGSIHashBuckets(
}
Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
- msf::StreamReader &Reader) {
+ BinaryStreamReader &Reader) {
if (Reader.readObject(HashHdr))
return make_error<RawError>(raw_error_code::corrupt_file,
"Stream does not contain a GSIHashHeader.");
@@ -70,9 +70,9 @@ Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
return Error::success();
}
-Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords,
+Error readGSIHashRecords(FixedStreamArray<PSHashRecord> &HashRecords,
const GSIHashHeader *HashHdr,
- msf::StreamReader &Reader) {
+ BinaryStreamReader &Reader) {
if (auto EC = checkHashHdrVersion(HashHdr))
return EC;
diff --git a/lib/DebugInfo/PDB/Raw/GSI.h b/lib/DebugInfo/PDB/Native/GSI.h
index 82cebd946538..9e63bc83548f 100644
--- a/lib/DebugInfo/PDB/Raw/GSI.h
+++ b/lib/DebugInfo/PDB/Native/GSI.h
@@ -25,17 +25,15 @@
#ifndef LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H
#define LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H
-#include "llvm/DebugInfo/MSF/StreamArray.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
namespace llvm {
-namespace msf {
-class StreamReader;
-}
+class BinaryStreamReader;
namespace pdb {
@@ -56,14 +54,14 @@ struct GSIHashHeader {
support::ulittle32_t NumBuckets;
};
-Error readGSIHashBuckets(
- msf::FixedStreamArray<support::ulittle32_t> &HashBuckets,
- const GSIHashHeader *HashHdr, msf::StreamReader &Reader);
+Error readGSIHashBuckets(FixedStreamArray<support::ulittle32_t> &HashBuckets,
+ const GSIHashHeader *HashHdr,
+ BinaryStreamReader &Reader);
Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
- msf::StreamReader &Reader);
-Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords,
+ BinaryStreamReader &Reader);
+Error readGSIHashRecords(FixedStreamArray<PSHashRecord> &HashRecords,
const GSIHashHeader *HashHdr,
- msf::StreamReader &Reader);
+ BinaryStreamReader &Reader);
}
}
diff --git a/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp b/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
index 31afc9200b1b..a2ee0f047c58 100644
--- a/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/GlobalsStream.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "GSI.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Error.h"
#include <algorithm>
@@ -23,7 +23,7 @@ GlobalsStream::GlobalsStream(std::unique_ptr<MappedBlockStream> Stream)
GlobalsStream::~GlobalsStream() = default;
Error GlobalsStream::reload() {
- StreamReader Reader(*Stream);
+ BinaryStreamReader Reader(*Stream);
const GSIHashHeader *HashHdr;
if (auto EC = readGSIHashHeader(HashHdr, Reader))
diff --git a/lib/DebugInfo/PDB/Raw/Hash.cpp b/lib/DebugInfo/PDB/Native/Hash.cpp
index b9f685ec69d4..2ad3f55dc5c3 100644
--- a/lib/DebugInfo/PDB/Raw/Hash.cpp
+++ b/lib/DebugInfo/PDB/Native/Hash.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Endian.h"
diff --git a/lib/DebugInfo/PDB/Native/HashTable.cpp b/lib/DebugInfo/PDB/Native/HashTable.cpp
new file mode 100644
index 000000000000..ebf8c9c04db1
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/HashTable.cpp
@@ -0,0 +1,302 @@
+//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+
+#include <assert.h>
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+HashTable::HashTable() : HashTable(8) {}
+
+HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); }
+
+Error HashTable::load(BinaryStreamReader &Stream) {
+ const Header *H;
+ if (auto EC = Stream.readObject(H))
+ return EC;
+ if (H->Capacity == 0)
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Invalid Hash Table Capacity");
+ if (H->Size > maxLoad(H->Capacity))
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Invalid Hash Table Size");
+
+ Buckets.resize(H->Capacity);
+
+ if (auto EC = readSparseBitVector(Stream, Present))
+ return EC;
+ if (Present.count() != H->Size)
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Present bit vector does not match size!");
+
+ if (auto EC = readSparseBitVector(Stream, Deleted))
+ return EC;
+ if (Present.intersects(Deleted))
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Present bit vector interesects deleted!");
+
+ for (uint32_t P : Present) {
+ if (auto EC = Stream.readInteger(Buckets[P].first))
+ return EC;
+ if (auto EC = Stream.readInteger(Buckets[P].second))
+ return EC;
+ }
+
+ return Error::success();
+}
+
+uint32_t HashTable::calculateSerializedLength() const {
+ uint32_t Size = sizeof(Header);
+
+ int NumBitsP = Present.find_last() + 1;
+ int NumBitsD = Deleted.find_last() + 1;
+
+ // Present bit set number of words, followed by that many actual words.
+ Size += sizeof(uint32_t);
+ Size += alignTo(NumBitsP, sizeof(uint32_t));
+
+ // Deleted bit set number of words, followed by that many actual words.
+ Size += sizeof(uint32_t);
+ Size += alignTo(NumBitsD, sizeof(uint32_t));
+
+ // One (Key, Value) pair for each entry Present.
+ Size += 2 * sizeof(uint32_t) * size();
+
+ return Size;
+}
+
+Error HashTable::commit(BinaryStreamWriter &Writer) const {
+ Header H;
+ H.Size = size();
+ H.Capacity = capacity();
+ if (auto EC = Writer.writeObject(H))
+ return EC;
+
+ if (auto EC = writeSparseBitVector(Writer, Present))
+ return EC;
+
+ if (auto EC = writeSparseBitVector(Writer, Deleted))
+ return EC;
+
+ for (const auto &Entry : *this) {
+ if (auto EC = Writer.writeInteger(Entry.first))
+ return EC;
+ if (auto EC = Writer.writeInteger(Entry.second))
+ return EC;
+ }
+ return Error::success();
+}
+
+void HashTable::clear() {
+ Buckets.resize(8);
+ Present.clear();
+ Deleted.clear();
+}
+
+uint32_t HashTable::capacity() const { return Buckets.size(); }
+uint32_t HashTable::size() const { return Present.count(); }
+
+HashTableIterator HashTable::begin() const { return HashTableIterator(*this); }
+HashTableIterator HashTable::end() const {
+ return HashTableIterator(*this, 0, true);
+}
+
+HashTableIterator HashTable::find(uint32_t K) {
+ uint32_t H = K % capacity();
+ uint32_t I = H;
+ Optional<uint32_t> FirstUnused;
+ do {
+ if (isPresent(I)) {
+ if (Buckets[I].first == K)
+ return HashTableIterator(*this, I, false);
+ } else {
+ if (!FirstUnused)
+ FirstUnused = I;
+ // Insertion occurs via linear probing from the slot hint, and will be
+ // inserted at the first empty / deleted location. Therefore, if we are
+ // probing and find a location that is neither present nor deleted, then
+ // nothing must have EVER been inserted at this location, and thus it is
+ // not possible for a matching value to occur later.
+ if (!isDeleted(I))
+ break;
+ }
+ I = (I + 1) % capacity();
+ } while (I != H);
+
+ // The only way FirstUnused would not be set is if every single entry in the
+ // table were Present. But this would violate the load factor constraints
+ // that we impose, so it should never happen.
+ assert(FirstUnused);
+ return HashTableIterator(*this, *FirstUnused, true);
+}
+
+void HashTable::set(uint32_t K, uint32_t V) {
+ auto Entry = find(K);
+ if (Entry != end()) {
+ assert(isPresent(Entry.index()));
+ assert(Buckets[Entry.index()].first == K);
+ // We're updating, no need to do anything special.
+ Buckets[Entry.index()].second = V;
+ return;
+ }
+
+ auto &B = Buckets[Entry.index()];
+ assert(!isPresent(Entry.index()));
+ assert(Entry.isEnd());
+ B.first = K;
+ B.second = V;
+ Present.set(Entry.index());
+ Deleted.reset(Entry.index());
+
+ grow();
+
+ assert(find(K) != end());
+}
+
+void HashTable::remove(uint32_t K) {
+ auto Iter = find(K);
+ // It wasn't here to begin with, just exit.
+ if (Iter == end())
+ return;
+
+ assert(Present.test(Iter.index()));
+ assert(!Deleted.test(Iter.index()));
+ Deleted.set(Iter.index());
+ Present.reset(Iter.index());
+}
+
+uint32_t HashTable::get(uint32_t K) {
+ auto I = find(K);
+ assert(I != end());
+ return (*I).second;
+}
+
+uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
+
+void HashTable::grow() {
+ uint32_t S = size();
+ if (S < maxLoad(capacity()))
+ return;
+ assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
+
+ uint32_t NewCapacity =
+ (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX;
+
+ // Growing requires rebuilding the table and re-hashing every item. Make a
+ // copy with a larger capacity, insert everything into the copy, then swap
+ // it in.
+ HashTable NewMap(NewCapacity);
+ for (auto I : Present) {
+ NewMap.set(Buckets[I].first, Buckets[I].second);
+ }
+
+ Buckets.swap(NewMap.Buckets);
+ std::swap(Present, NewMap.Present);
+ std::swap(Deleted, NewMap.Deleted);
+ assert(capacity() == NewCapacity);
+ assert(size() == S);
+}
+
+Error HashTable::readSparseBitVector(BinaryStreamReader &Stream,
+ SparseBitVector<> &V) {
+ uint32_t NumWords;
+ if (auto EC = Stream.readInteger(NumWords))
+ return joinErrors(
+ std::move(EC),
+ make_error<RawError>(raw_error_code::corrupt_file,
+ "Expected hash table number of words"));
+
+ for (uint32_t I = 0; I != NumWords; ++I) {
+ uint32_t Word;
+ if (auto EC = Stream.readInteger(Word))
+ return joinErrors(std::move(EC),
+ make_error<RawError>(raw_error_code::corrupt_file,
+ "Expected hash table word"));
+ for (unsigned Idx = 0; Idx < 32; ++Idx)
+ if (Word & (1U << Idx))
+ V.set((I * 32) + Idx);
+ }
+ return Error::success();
+}
+
+Error HashTable::writeSparseBitVector(BinaryStreamWriter &Writer,
+ SparseBitVector<> &Vec) {
+ int ReqBits = Vec.find_last() + 1;
+ uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t);
+ if (auto EC = Writer.writeInteger(NumWords))
+ return joinErrors(
+ std::move(EC),
+ make_error<RawError>(raw_error_code::corrupt_file,
+ "Could not write linear map number of words"));
+
+ uint32_t Idx = 0;
+ for (uint32_t I = 0; I != NumWords; ++I) {
+ uint32_t Word = 0;
+ for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) {
+ if (Vec.test(Idx))
+ Word |= (1 << WordIdx);
+ }
+ if (auto EC = Writer.writeInteger(Word))
+ return joinErrors(std::move(EC), make_error<RawError>(
+ raw_error_code::corrupt_file,
+ "Could not write linear map word"));
+ }
+ return Error::success();
+}
+
+HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index,
+ bool IsEnd)
+ : Map(&Map), Index(Index), IsEnd(IsEnd) {}
+
+HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) {
+ int I = Map.Present.find_first();
+ if (I == -1) {
+ Index = 0;
+ IsEnd = true;
+ } else {
+ Index = static_cast<uint32_t>(I);
+ IsEnd = false;
+ }
+}
+
+HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) {
+ Map = R.Map;
+ return *this;
+}
+
+bool HashTableIterator::operator==(const HashTableIterator &R) const {
+ if (IsEnd && R.IsEnd)
+ return true;
+ if (IsEnd != R.IsEnd)
+ return false;
+
+ return (Map == R.Map) && (Index == R.Index);
+}
+
+const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const {
+ assert(Map->Present.test(Index));
+ return Map->Buckets[Index];
+}
+
+HashTableIterator &HashTableIterator::operator++() {
+ while (Index < Map->Buckets.size()) {
+ ++Index;
+ if (Map->Present.test(Index))
+ return *this;
+ }
+
+ IsEnd = true;
+ return *this;
+}
diff --git a/lib/DebugInfo/PDB/Native/InfoStream.cpp b/lib/DebugInfo/PDB/Native/InfoStream.cpp
new file mode 100644
index 000000000000..2a1d12e82390
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/InfoStream.cpp
@@ -0,0 +1,126 @@
+//===- InfoStream.cpp - PDB Info Stream (Stream 1) Access -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::msf;
+using namespace llvm::pdb;
+
+InfoStream::InfoStream(std::unique_ptr<MappedBlockStream> Stream)
+ : Stream(std::move(Stream)) {}
+
+Error InfoStream::reload() {
+ BinaryStreamReader Reader(*Stream);
+
+ const InfoStreamHeader *H;
+ if (auto EC = Reader.readObject(H))
+ return joinErrors(
+ std::move(EC),
+ make_error<RawError>(raw_error_code::corrupt_file,
+ "PDB Stream does not contain a header."));
+
+ switch (H->Version) {
+ case PdbImplVC70:
+ case PdbImplVC80:
+ case PdbImplVC110:
+ case PdbImplVC140:
+ break;
+ default:
+ return make_error<RawError>(raw_error_code::corrupt_file,
+ "Unsupported PDB stream version.");
+ }
+
+ Version = H->Version;
+ Signature = H->Signature;
+ Age = H->Age;
+ Guid = H->Guid;
+
+ uint32_t Offset = Reader.getOffset();
+ if (auto EC = NamedStreams.load(Reader))
+ return EC;
+ uint32_t NewOffset = Reader.getOffset();
+ NamedStreamMapByteSize = NewOffset - Offset;
+
+ bool Stop = false;
+ while (!Stop && !Reader.empty()) {
+ PdbRaw_FeatureSig Sig;
+ if (auto EC = Reader.readEnum(Sig))
+ return EC;
+ // Since this value comes from a file, it's possible we have some strange
+ // value which doesn't correspond to any value. We don't want to warn on
+ // -Wcovered-switch-default in this case, so switch on the integral value
+ // instead of the enumeration value.
+ switch (uint32_t(Sig)) {
+ case uint32_t(PdbRaw_FeatureSig::VC110):
+ // No other flags for VC110 PDB.
+ Stop = true;
+ LLVM_FALLTHROUGH;
+ case uint32_t(PdbRaw_FeatureSig::VC140):
+ Features |= PdbFeatureContainsIdStream;
+ break;
+ case uint32_t(PdbRaw_FeatureSig::NoTypeMerge):
+ Features |= PdbFeatureNoTypeMerging;
+ break;
+ case uint32_t(PdbRaw_FeatureSig::MinimalDebugInfo):
+ Features |= PdbFeatureMinimalDebugInfo;
+ default:
+ continue;
+ }
+ FeatureSignatures.push_back(Sig);
+ }
+ return Error::success();
+}
+
+uint32_t InfoStream::getStreamSize() const { return Stream->getLength(); }
+
+uint32_t InfoStream::getNamedStreamIndex(llvm::StringRef Name) const {
+ uint32_t Result;
+ if (!NamedStreams.get(Name, Result))
+ return 0;
+ return Result;
+}
+
+iterator_range<StringMapConstIterator<uint32_t>>
+InfoStream::named_streams() const {
+ return NamedStreams.entries();
+}
+
+PdbRaw_ImplVer InfoStream::getVersion() const {
+ return static_cast<PdbRaw_ImplVer>(Version);
+}
+
+uint32_t InfoStream::getSignature() const { return Signature; }
+
+uint32_t InfoStream::getAge() const { return Age; }
+
+PDB_UniqueId InfoStream::getGuid() const { return Guid; }
+
+uint32_t InfoStream::getNamedStreamMapByteSize() const {
+ return NamedStreamMapByteSize;
+}
+
+PdbRaw_Features InfoStream::getFeatures() const { return Features; }
+
+ArrayRef<PdbRaw_FeatureSig> InfoStream::getFeatureSignatures() const {
+ return FeatureSignatures;
+}
+
+const NamedStreamMap &InfoStream::getNamedStreams() const {
+ return NamedStreams;
+}
diff --git a/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index 73fbf853b4f7..f019d410328a 100644
--- a/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -7,22 +7,26 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
-#include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamWriter.h"
using namespace llvm;
using namespace llvm::codeview;
using namespace llvm::msf;
using namespace llvm::pdb;
-InfoStreamBuilder::InfoStreamBuilder(msf::MSFBuilder &Msf)
- : Msf(Msf), Ver(PdbRaw_ImplVer::PdbImplVC70), Sig(-1), Age(0) {}
+InfoStreamBuilder::InfoStreamBuilder(msf::MSFBuilder &Msf,
+ NamedStreamMap &NamedStreams)
+ : Msf(Msf), Ver(PdbRaw_ImplVer::PdbImplVC70), Sig(-1), Age(0),
+ NamedStreams(NamedStreams) {}
void InfoStreamBuilder::setVersion(PdbRaw_ImplVer V) { Ver = V; }
@@ -32,26 +36,23 @@ void InfoStreamBuilder::setAge(uint32_t A) { Age = A; }
void InfoStreamBuilder::setGuid(PDB_UniqueId G) { Guid = G; }
-NameMapBuilder &InfoStreamBuilder::getNamedStreamsBuilder() {
- return NamedStreams;
-}
-
-uint32_t InfoStreamBuilder::calculateSerializedLength() const {
- return sizeof(InfoStreamHeader) + NamedStreams.calculateSerializedLength();
+void InfoStreamBuilder::addFeature(PdbRaw_FeatureSig Sig) {
+ Features.push_back(Sig);
}
Error InfoStreamBuilder::finalizeMsfLayout() {
- uint32_t Length = calculateSerializedLength();
+ uint32_t Length = sizeof(InfoStreamHeader) + NamedStreams.finalize() +
+ (Features.size() + 1) * sizeof(uint32_t);
if (auto EC = Msf.setStreamSize(StreamPDB, Length))
return EC;
return Error::success();
}
Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
- const msf::WritableStream &Buffer) const {
+ WritableBinaryStreamRef Buffer) const {
auto InfoS =
WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB);
- StreamWriter Writer(*InfoS);
+ BinaryStreamWriter Writer(*InfoS);
InfoStreamHeader H;
H.Age = Age;
@@ -61,5 +62,13 @@ Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = Writer.writeObject(H))
return EC;
- return NamedStreams.commit(Writer);
+ if (auto EC = NamedStreams.commit(Writer))
+ return EC;
+ if (auto EC = Writer.writeInteger(0))
+ return EC;
+ for (auto E : Features) {
+ if (auto EC = Writer.writeEnum(E))
+ return EC;
+ }
+ return Error::success();
}
diff --git a/lib/DebugInfo/PDB/Raw/ModInfo.cpp b/lib/DebugInfo/PDB/Native/ModInfo.cpp
index b34d7700d036..1405286fd088 100644
--- a/lib/DebugInfo/PDB/Raw/ModInfo.cpp
+++ b/lib/DebugInfo/PDB/Native/ModInfo.cpp
@@ -7,16 +7,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MathExtras.h"
#include <cstdint>
using namespace llvm;
-using namespace llvm::msf;
using namespace llvm::pdb;
using namespace llvm::support;
@@ -26,15 +25,15 @@ ModInfo::ModInfo(const ModInfo &Info) = default;
ModInfo::~ModInfo() = default;
-Error ModInfo::initialize(ReadableStreamRef Stream, ModInfo &Info) {
- StreamReader Reader(Stream);
+Error ModInfo::initialize(BinaryStreamRef Stream, ModInfo &Info) {
+ BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readObject(Info.Layout))
return EC;
- if (auto EC = Reader.readZeroString(Info.ModuleName))
+ if (auto EC = Reader.readCString(Info.ModuleName))
return EC;
- if (auto EC = Reader.readZeroString(Info.ObjFileName))
+ if (auto EC = Reader.readCString(Info.ObjFileName))
return EC;
return Error::success();
}
diff --git a/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp b/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp
new file mode 100644
index 000000000000..73c45a953520
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/ModInfoBuilder.cpp
@@ -0,0 +1,136 @@
+//===- ModInfoBuilder.cpp - PDB Module Info Stream Creation -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/ModInfoBuilder.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/Support/BinaryItemStream.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/COFF.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::msf;
+using namespace llvm::pdb;
+
+namespace llvm {
+template <> struct BinaryItemTraits<CVSymbol> {
+ static size_t length(const CVSymbol &Item) { return Item.RecordData.size(); }
+
+ static ArrayRef<uint8_t> bytes(const CVSymbol &Item) {
+ return Item.RecordData;
+ }
+};
+}
+
+static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize) {
+ uint32_t Size = sizeof(uint32_t); // Signature
+ Size += SymbolByteSize; // Symbol Data
+ Size += 0; // TODO: Layout.LineBytes
+ Size += 0; // TODO: Layout.C13Bytes
+ Size += sizeof(uint32_t); // GlobalRefs substream size (always 0)
+ Size += 0; // GlobalRefs substream bytes
+ return Size;
+}
+
+ModInfoBuilder::ModInfoBuilder(StringRef ModuleName, uint32_t ModIndex,
+ msf::MSFBuilder &Msf)
+ : MSF(Msf), ModuleName(ModuleName) {
+ Layout.Mod = ModIndex;
+}
+
+uint16_t ModInfoBuilder::getStreamIndex() const { return Layout.ModDiStream; }
+
+void ModInfoBuilder::setObjFileName(StringRef Name) { ObjFileName = Name; }
+
+void ModInfoBuilder::addSymbol(CVSymbol Symbol) {
+ Symbols.push_back(Symbol);
+ SymbolByteSize += Symbol.data().size();
+}
+
+void ModInfoBuilder::addSourceFile(StringRef Path) {
+ SourceFiles.push_back(Path);
+}
+
+uint32_t ModInfoBuilder::calculateSerializedLength() const {
+ uint32_t L = sizeof(Layout);
+ uint32_t M = ModuleName.size() + 1;
+ uint32_t O = ObjFileName.size() + 1;
+ return alignTo(L + M + O, sizeof(uint32_t));
+}
+
+void ModInfoBuilder::finalize() {
+ Layout.C13Bytes = 0;
+ Layout.FileNameOffs = 0; // TODO: Fix this
+ Layout.Flags = 0; // TODO: Fix this
+ Layout.LineBytes = 0;
+ (void)Layout.Mod; // Set in constructor
+ (void)Layout.ModDiStream; // Set in finalizeMsfLayout
+ Layout.NumFiles = SourceFiles.size();
+ Layout.PdbFilePathNI = 0;
+ Layout.SrcFileNameNI = 0;
+
+ // This value includes both the signature field as well as the record bytes
+ // from the symbol stream.
+ Layout.SymBytes = SymbolByteSize + sizeof(uint32_t);
+}
+
+Error ModInfoBuilder::finalizeMsfLayout() {
+ this->Layout.ModDiStream = kInvalidStreamIndex;
+ auto ExpectedSN = MSF.addStream(calculateDiSymbolStreamSize(SymbolByteSize));
+ if (!ExpectedSN)
+ return ExpectedSN.takeError();
+ Layout.ModDiStream = *ExpectedSN;
+ return Error::success();
+}
+
+Error ModInfoBuilder::commit(BinaryStreamWriter &ModiWriter,
+ const msf::MSFLayout &MsfLayout,
+ WritableBinaryStreamRef MsfBuffer) {
+ // We write the Modi record to the `ModiWriter`, but we additionally write its
+ // symbol stream to a brand new stream.
+ if (auto EC = ModiWriter.writeObject(Layout))
+ return EC;
+ if (auto EC = ModiWriter.writeCString(ModuleName))
+ return EC;
+ if (auto EC = ModiWriter.writeCString(ObjFileName))
+ return EC;
+ if (auto EC = ModiWriter.padToAlignment(sizeof(uint32_t)))
+ return EC;
+
+ if (Layout.ModDiStream != kInvalidStreamIndex) {
+ auto NS = WritableMappedBlockStream::createIndexedStream(
+ MsfLayout, MsfBuffer, Layout.ModDiStream);
+ WritableBinaryStreamRef Ref(*NS);
+ BinaryStreamWriter SymbolWriter(Ref);
+ // Write the symbols.
+ if (auto EC =
+ SymbolWriter.writeInteger<uint32_t>(COFF::DEBUG_SECTION_MAGIC))
+ return EC;
+ BinaryItemStream<CVSymbol> Records(llvm::support::endianness::little);
+ Records.setItems(Symbols);
+ BinaryStreamRef RecordsRef(Records);
+ if (auto EC = SymbolWriter.writeStreamRef(RecordsRef))
+ return EC;
+ // TODO: Write C11 Line data
+ // TODO: Write C13 Line data
+ // TODO: Figure out what GlobalRefs substream actually is and populate it.
+ if (auto EC = SymbolWriter.writeInteger<uint32_t>(0))
+ return EC;
+ if (SymbolWriter.bytesRemaining() > 0)
+ return make_error<RawError>(raw_error_code::stream_too_long);
+ }
+ return Error::success();
+}
diff --git a/lib/DebugInfo/PDB/Raw/ModStream.cpp b/lib/DebugInfo/PDB/Native/ModStream.cpp
index 0ffc5b7d44aa..08798cf0ed28 100644
--- a/lib/DebugInfo/PDB/Raw/ModStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModStream.cpp
@@ -7,15 +7,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/PDB/Native/ModStream.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/MSF/StreamRef.h"
-#include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
-#include "llvm/DebugInfo/PDB/Raw/ModStream.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Error.h"
#include <algorithm>
#include <cstdint>
@@ -31,7 +31,7 @@ ModStream::ModStream(const ModInfo &Module,
ModStream::~ModStream() = default;
Error ModStream::reload() {
- StreamReader Reader(*Stream);
+ BinaryStreamReader Reader(*Stream);
uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize();
uint32_t C11Size = Mod.getLineInfoByteSize();
@@ -41,7 +41,7 @@ Error ModStream::reload() {
return make_error<RawError>(raw_error_code::corrupt_file,
"Module has both C11 and C13 line info");
- ReadableStreamRef S;
+ BinaryStreamRef S;
if (auto EC = Reader.readInteger(Signature))
return EC;
@@ -53,7 +53,7 @@ Error ModStream::reload() {
if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size))
return EC;
- StreamReader LineReader(C13LinesSubstream);
+ BinaryStreamReader LineReader(C13LinesSubstream);
if (auto EC = LineReader.readArray(LineInfo, LineReader.bytesRemaining()))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
new file mode 100644
index 000000000000..c7ba32b82bc6
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp
@@ -0,0 +1,135 @@
+//===- NamedStreamMap.cpp - PDB Named Stream Map ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
+
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/DebugInfo/PDB/Native/HashTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+NamedStreamMap::NamedStreamMap() = default;
+
+Error NamedStreamMap::load(BinaryStreamReader &Stream) {
+ Mapping.clear();
+ FinalizedHashTable.clear();
+ FinalizedInfo.reset();
+
+ uint32_t StringBufferSize;
+ if (auto EC = Stream.readInteger(StringBufferSize))
+ return joinErrors(std::move(EC),
+ make_error<RawError>(raw_error_code::corrupt_file,
+ "Expected string buffer size"));
+
+ BinaryStreamRef StringsBuffer;
+ if (auto EC = Stream.readStreamRef(StringsBuffer, StringBufferSize))
+ return EC;
+
+ HashTable OffsetIndexMap;
+ if (auto EC = OffsetIndexMap.load(Stream))
+ return EC;
+
+ uint32_t NameOffset;
+ uint32_t NameIndex;
+ for (const auto &Entry : OffsetIndexMap) {
+ std::tie(NameOffset, NameIndex) = Entry;
+
+ // Compute the offset of the start of the string relative to the stream.
+ BinaryStreamReader NameReader(StringsBuffer);
+ NameReader.setOffset(NameOffset);
+ // Pump out our c-string from the stream.
+ StringRef Str;
+ if (auto EC = NameReader.readCString(Str))
+ return joinErrors(std::move(EC),
+ make_error<RawError>(raw_error_code::corrupt_file,
+ "Expected name map name"));
+
+ // Add this to a string-map from name to stream number.
+ Mapping.insert({Str, NameIndex});
+ }
+
+ return Error::success();
+}
+
+Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const {
+ assert(FinalizedInfo.hasValue());
+
+ // The first field is the number of bytes of string data.
+ if (auto EC = Writer.writeInteger(FinalizedInfo->StringDataBytes))
+ return EC;
+
+ // Now all of the string data itself.
+ for (const auto &Item : Mapping) {
+ if (auto EC = Writer.writeCString(Item.getKey()))
+ return EC;
+ }
+
+ // And finally the Offset Index map.
+ if (auto EC = FinalizedHashTable.commit(Writer))
+ return EC;
+
+ return Error::success();
+}
+
+uint32_t NamedStreamMap::finalize() {
+ if (FinalizedInfo.hasValue())
+ return FinalizedInfo->SerializedLength;
+
+ // Build the finalized hash table.
+ FinalizedHashTable.clear();
+ FinalizedInfo.emplace();
+ for (const auto &Item : Mapping) {
+ FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item.getValue());
+ FinalizedInfo->StringDataBytes += Item.getKeyLength() + 1;
+ }
+
+ // Number of bytes of string data.
+ FinalizedInfo->SerializedLength += sizeof(support::ulittle32_t);
+ // Followed by that many actual bytes of string data.
+ FinalizedInfo->SerializedLength += FinalizedInfo->StringDataBytes;
+ // Followed by the mapping from Offset to Index.
+ FinalizedInfo->SerializedLength +=
+ FinalizedHashTable.calculateSerializedLength();
+ return FinalizedInfo->SerializedLength;
+}
+
+iterator_range<StringMapConstIterator<uint32_t>>
+NamedStreamMap::entries() const {
+ return make_range<StringMapConstIterator<uint32_t>>(Mapping.begin(),
+ Mapping.end());
+}
+
+uint32_t NamedStreamMap::size() const { return Mapping.size(); }
+
+bool NamedStreamMap::get(StringRef Stream, uint32_t &StreamNo) const {
+ auto Iter = Mapping.find(Stream);
+ if (Iter == Mapping.end())
+ return false;
+ StreamNo = Iter->second;
+ return true;
+}
+
+void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) {
+ FinalizedInfo.reset();
+ Mapping[Stream] = StreamNo;
+}
+
+void NamedStreamMap::remove(StringRef Stream) {
+ FinalizedInfo.reset();
+ Mapping.erase(Stream);
+}
diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
new file mode 100644
index 000000000000..9c0cc0bf8233
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
@@ -0,0 +1,43 @@
+//===- NativeCompilandSymbol.cpp - Native impl for compilands ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+
+namespace llvm {
+namespace pdb {
+
+NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session,
+ const ModuleInfoEx &MI)
+ : NativeRawSymbol(Session), Module(MI) {}
+
+PDB_SymType NativeCompilandSymbol::getSymTag() const {
+ return PDB_SymType::Compiland;
+}
+
+bool NativeCompilandSymbol::isEditAndContinueEnabled() const {
+ return Module.Info.hasECInfo();
+}
+
+uint32_t NativeCompilandSymbol::getLexicalParentId() const { return 0; }
+
+// The usage of getObjFileName for getLibraryName and getModuleName for getName
+// may seem backwards, but it is consistent with DIA, which is what this API
+// was modeled after. We may rename these methods later to try to eliminate
+// this potential confusion.
+
+std::string NativeCompilandSymbol::getLibraryName() const {
+ return Module.Info.getObjFileName();
+}
+
+std::string NativeCompilandSymbol::getName() const {
+ return Module.Info.getModuleName();
+}
+
+} // namespace pdb
+} // namespace llvm
diff --git a/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
new file mode 100644
index 000000000000..7532110d005c
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeEnumModules.cpp
@@ -0,0 +1,52 @@
+//==- NativeEnumModules.cpp - Native Symbol Enumerator impl ------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
+
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+
+namespace llvm {
+namespace pdb {
+
+NativeEnumModules::NativeEnumModules(NativeSession &PDBSession,
+ ArrayRef<ModuleInfoEx> Modules,
+ uint32_t Index)
+ : Session(PDBSession), Modules(Modules), Index(Index) {}
+
+uint32_t NativeEnumModules::getChildCount() const {
+ return static_cast<uint32_t>(Modules.size());
+}
+
+std::unique_ptr<PDBSymbol>
+NativeEnumModules::getChildAtIndex(uint32_t Index) const {
+ if (Index >= Modules.size())
+ return nullptr;
+ return std::unique_ptr<PDBSymbol>(new PDBSymbolCompiland(Session,
+ std::unique_ptr<IPDBRawSymbol>(
+ new NativeCompilandSymbol(Session, Modules[Index]))));
+}
+
+std::unique_ptr<PDBSymbol> NativeEnumModules::getNext() {
+ if (Index >= Modules.size())
+ return nullptr;
+ return getChildAtIndex(Index++);
+}
+
+void NativeEnumModules::reset() { Index = 0; }
+
+NativeEnumModules *NativeEnumModules::clone() const {
+ return new NativeEnumModules(Session, Modules, Index);
+}
+
+}
+}
diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
new file mode 100644
index 000000000000..ec2a4b87457c
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp
@@ -0,0 +1,79 @@
+//===- NativeExeSymbol.cpp - native impl for PDBSymbolExe -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
+
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeEnumModules.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+
+namespace llvm {
+namespace pdb {
+
+NativeExeSymbol::NativeExeSymbol(NativeSession &Session)
+ : NativeRawSymbol(Session), File(Session.getPDBFile()) {}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeExeSymbol::findChildren(PDB_SymType Type) const {
+ switch (Type) {
+ case PDB_SymType::Compiland: {
+ auto Dbi = File.getPDBDbiStream();
+ if (Dbi) {
+ const auto Modules = Dbi->modules();
+ return std::unique_ptr<IPDBEnumSymbols>(
+ new NativeEnumModules(Session, Modules));
+ }
+ consumeError(Dbi.takeError());
+ break;
+ }
+ default:
+ break;
+ }
+ return nullptr;
+}
+
+uint32_t NativeExeSymbol::getAge() const {
+ auto IS = File.getPDBInfoStream();
+ if (IS)
+ return IS->getAge();
+ consumeError(IS.takeError());
+ return 0;
+}
+
+std::string NativeExeSymbol::getSymbolsFileName() const {
+ return File.getFilePath();
+}
+
+PDB_UniqueId NativeExeSymbol::getGuid() const {
+ auto IS = File.getPDBInfoStream();
+ if (IS)
+ return IS->getGuid();
+ consumeError(IS.takeError());
+ return PDB_UniqueId{{0}};
+}
+
+bool NativeExeSymbol::hasCTypes() const {
+ auto Dbi = File.getPDBDbiStream();
+ if (Dbi)
+ return Dbi->hasCTypes();
+ consumeError(Dbi.takeError());
+ return false;
+}
+
+bool NativeExeSymbol::hasPrivateSymbols() const {
+ auto Dbi = File.getPDBDbiStream();
+ if (Dbi)
+ return !Dbi->isStripped();
+ consumeError(Dbi.takeError());
+ return false;
+}
+
+} // namespace pdb
+} // namespace llvm
diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
new file mode 100644
index 000000000000..3aba35adb53f
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp
@@ -0,0 +1,706 @@
+//===- NativeRawSymbol.cpp - Native implementation of IPDBRawSymbol -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
+#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession)
+ : Session(PDBSession) {}
+
+void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeRawSymbol::findChildren(PDB_SymType Type) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeRawSymbol::findChildren(PDB_SymType Type, StringRef Name,
+ PDB_NameSearchFlags Flags) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeRawSymbol::findChildrenByRVA(PDB_SymType Type, StringRef Name,
+ PDB_NameSearchFlags Flags, uint32_t RVA) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumSymbols>
+NativeRawSymbol::findInlineFramesByRVA(uint32_t RVA) const {
+ return nullptr;
+}
+
+void NativeRawSymbol::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) const {
+ bytes.clear();
+}
+
+PDB_MemberAccess NativeRawSymbol::getAccess() const {
+ return PDB_MemberAccess::Private;
+}
+
+uint32_t NativeRawSymbol::getAddressOffset() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getAddressSection() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getAge() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getArrayIndexTypeId() const {
+ return 0;
+}
+
+void NativeRawSymbol::getBackEndVersion(VersionInfo &Version) const {
+ Version.Major = 0;
+ Version.Minor = 0;
+ Version.Build = 0;
+ Version.QFE = 0;
+}
+
+uint32_t NativeRawSymbol::getBaseDataOffset() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getBaseDataSlot() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getBaseSymbolId() const {
+ return 0;
+}
+
+PDB_BuiltinType NativeRawSymbol::getBuiltinType() const {
+ return PDB_BuiltinType::None;
+}
+
+uint32_t NativeRawSymbol::getBitPosition() const {
+ return 0;
+}
+
+PDB_CallingConv NativeRawSymbol::getCallingConvention() const {
+ return PDB_CallingConv::FarStdCall;
+}
+
+uint32_t NativeRawSymbol::getClassParentId() const {
+ return 0;
+}
+
+std::string NativeRawSymbol::getCompilerName() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getCount() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getCountLiveRanges() const {
+ return 0;
+}
+
+void NativeRawSymbol::getFrontEndVersion(VersionInfo &Version) const {
+ Version.Major = 0;
+ Version.Minor = 0;
+ Version.Build = 0;
+ Version.QFE = 0;
+}
+
+PDB_Lang NativeRawSymbol::getLanguage() const {
+ return PDB_Lang::Cobol;
+}
+
+uint32_t NativeRawSymbol::getLexicalParentId() const {
+ return 0;
+}
+
+std::string NativeRawSymbol::getLibraryName() const {
+ return "";
+}
+
+uint32_t NativeRawSymbol::getLiveRangeStartAddressOffset() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getLiveRangeStartAddressSection() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getLiveRangeStartRelativeVirtualAddress() const {
+ return 0;
+}
+
+codeview::RegisterId NativeRawSymbol::getLocalBasePointerRegisterId() const {
+ return codeview::RegisterId::EAX;
+}
+
+uint32_t NativeRawSymbol::getLowerBoundId() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getMemorySpaceKind() const {
+ return 0;
+}
+
+std::string NativeRawSymbol::getName() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getNumberOfAcceleratorPointerTags() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getNumberOfColumns() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getNumberOfModifiers() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getNumberOfRegisterIndices() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getNumberOfRows() const {
+ return 0;
+}
+
+std::string NativeRawSymbol::getObjectFileName() const {
+ return "";
+}
+
+uint32_t NativeRawSymbol::getOemId() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getOemSymbolId() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getOffsetInUdt() const {
+ return 0;
+}
+
+PDB_Cpu NativeRawSymbol::getPlatform() const {
+ return PDB_Cpu::Intel8080;
+}
+
+uint32_t NativeRawSymbol::getRank() const {
+ return 0;
+}
+
+codeview::RegisterId NativeRawSymbol::getRegisterId() const {
+ return codeview::RegisterId::EAX;
+}
+
+uint32_t NativeRawSymbol::getRegisterType() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getRelativeVirtualAddress() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getSamplerSlot() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getSignature() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getSizeInUdt() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getSlot() const {
+ return 0;
+}
+
+std::string NativeRawSymbol::getSourceFileName() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getStride() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getSubTypeId() const {
+ return 0;
+}
+
+std::string NativeRawSymbol::getSymbolsFileName() const { return ""; }
+
+uint32_t NativeRawSymbol::getSymIndexId() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getTargetOffset() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getTargetRelativeVirtualAddress() const {
+ return 0;
+}
+
+uint64_t NativeRawSymbol::getTargetVirtualAddress() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getTargetSection() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getTextureSlot() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getTimeStamp() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getToken() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getTypeId() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getUavSlot() const {
+ return 0;
+}
+
+std::string NativeRawSymbol::getUndecoratedName() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getUnmodifiedTypeId() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getUpperBoundId() const {
+ return 0;
+}
+
+Variant NativeRawSymbol::getValue() const {
+ return Variant();
+}
+
+uint32_t NativeRawSymbol::getVirtualBaseDispIndex() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getVirtualBaseOffset() const {
+ return 0;
+}
+
+uint32_t NativeRawSymbol::getVirtualTableShapeId() const {
+ return 0;
+}
+
+std::unique_ptr<PDBSymbolTypeVTable>
+NativeRawSymbol::getVirtualBaseTableType() const {
+ return nullptr;
+}
+
+PDB_DataKind NativeRawSymbol::getDataKind() const {
+ return PDB_DataKind::Unknown;
+}
+
+PDB_SymType NativeRawSymbol::getSymTag() const {
+ return PDB_SymType::None;
+}
+
+PDB_UniqueId NativeRawSymbol::getGuid() const {
+ return PDB_UniqueId{{0}};
+}
+
+int32_t NativeRawSymbol::getOffset() const {
+ return 0;
+}
+
+int32_t NativeRawSymbol::getThisAdjust() const {
+ return 0;
+}
+
+int32_t NativeRawSymbol::getVirtualBasePointerOffset() const {
+ return 0;
+}
+
+PDB_LocType NativeRawSymbol::getLocationType() const {
+ return PDB_LocType::Null;
+}
+
+PDB_Machine NativeRawSymbol::getMachineType() const {
+ return PDB_Machine::Invalid;
+}
+
+codeview::ThunkOrdinal NativeRawSymbol::getThunkOrdinal() const {
+ return codeview::ThunkOrdinal::Standard;
+}
+
+uint64_t NativeRawSymbol::getLength() const {
+ return 0;
+}
+
+uint64_t NativeRawSymbol::getLiveRangeLength() const {
+ return 0;
+}
+
+uint64_t NativeRawSymbol::getVirtualAddress() const {
+ return 0;
+}
+
+PDB_UdtType NativeRawSymbol::getUdtKind() const {
+ return PDB_UdtType::Struct;
+}
+
+bool NativeRawSymbol::hasConstructor() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasCustomCallingConvention() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasFarReturn() const {
+ return false;
+}
+
+bool NativeRawSymbol::isCode() const {
+ return false;
+}
+
+bool NativeRawSymbol::isCompilerGenerated() const {
+ return false;
+}
+
+bool NativeRawSymbol::isConstType() const {
+ return false;
+}
+
+bool NativeRawSymbol::isEditAndContinueEnabled() const {
+ return false;
+}
+
+bool NativeRawSymbol::isFunction() const {
+ return false;
+}
+
+bool NativeRawSymbol::getAddressTaken() const {
+ return false;
+}
+
+bool NativeRawSymbol::getNoStackOrdering() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasAlloca() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasAssignmentOperator() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasCTypes() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasCastOperator() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasDebugInfo() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasEH() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasEHa() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasInlAsm() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasInlineAttribute() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasInterruptReturn() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasFramePointer() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasLongJump() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasManagedCode() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasNestedTypes() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasNoInlineAttribute() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasNoReturnAttribute() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasOptimizedCodeDebugInfo() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasOverloadedOperator() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasSEH() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasSecurityChecks() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasSetJump() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasStrictGSCheck() const {
+ return false;
+}
+
+bool NativeRawSymbol::isAcceleratorGroupSharedLocal() const {
+ return false;
+}
+
+bool NativeRawSymbol::isAcceleratorPointerTagLiveRange() const {
+ return false;
+}
+
+bool NativeRawSymbol::isAcceleratorStubFunction() const {
+ return false;
+}
+
+bool NativeRawSymbol::isAggregated() const {
+ return false;
+}
+
+bool NativeRawSymbol::isIntroVirtualFunction() const {
+ return false;
+}
+
+bool NativeRawSymbol::isCVTCIL() const {
+ return false;
+}
+
+bool NativeRawSymbol::isConstructorVirtualBase() const {
+ return false;
+}
+
+bool NativeRawSymbol::isCxxReturnUdt() const {
+ return false;
+}
+
+bool NativeRawSymbol::isDataAligned() const {
+ return false;
+}
+
+bool NativeRawSymbol::isHLSLData() const {
+ return false;
+}
+
+bool NativeRawSymbol::isHotpatchable() const {
+ return false;
+}
+
+bool NativeRawSymbol::isIndirectVirtualBaseClass() const {
+ return false;
+}
+
+bool NativeRawSymbol::isInterfaceUdt() const {
+ return false;
+}
+
+bool NativeRawSymbol::isIntrinsic() const {
+ return false;
+}
+
+bool NativeRawSymbol::isLTCG() const {
+ return false;
+}
+
+bool NativeRawSymbol::isLocationControlFlowDependent() const {
+ return false;
+}
+
+bool NativeRawSymbol::isMSILNetmodule() const {
+ return false;
+}
+
+bool NativeRawSymbol::isMatrixRowMajor() const {
+ return false;
+}
+
+bool NativeRawSymbol::isManagedCode() const {
+ return false;
+}
+
+bool NativeRawSymbol::isMSILCode() const {
+ return false;
+}
+
+bool NativeRawSymbol::isMultipleInheritance() const {
+ return false;
+}
+
+bool NativeRawSymbol::isNaked() const {
+ return false;
+}
+
+bool NativeRawSymbol::isNested() const {
+ return false;
+}
+
+bool NativeRawSymbol::isOptimizedAway() const {
+ return false;
+}
+
+bool NativeRawSymbol::isPacked() const {
+ return false;
+}
+
+bool NativeRawSymbol::isPointerBasedOnSymbolValue() const {
+ return false;
+}
+
+bool NativeRawSymbol::isPointerToDataMember() const {
+ return false;
+}
+
+bool NativeRawSymbol::isPointerToMemberFunction() const {
+ return false;
+}
+
+bool NativeRawSymbol::isPureVirtual() const {
+ return false;
+}
+
+bool NativeRawSymbol::isRValueReference() const {
+ return false;
+}
+
+bool NativeRawSymbol::isRefUdt() const {
+ return false;
+}
+
+bool NativeRawSymbol::isReference() const {
+ return false;
+}
+
+bool NativeRawSymbol::isRestrictedType() const {
+ return false;
+}
+
+bool NativeRawSymbol::isReturnValue() const {
+ return false;
+}
+
+bool NativeRawSymbol::isSafeBuffers() const {
+ return false;
+}
+
+bool NativeRawSymbol::isScoped() const {
+ return false;
+}
+
+bool NativeRawSymbol::isSdl() const {
+ return false;
+}
+
+bool NativeRawSymbol::isSingleInheritance() const {
+ return false;
+}
+
+bool NativeRawSymbol::isSplitted() const {
+ return false;
+}
+
+bool NativeRawSymbol::isStatic() const {
+ return false;
+}
+
+bool NativeRawSymbol::hasPrivateSymbols() const {
+ return false;
+}
+
+bool NativeRawSymbol::isUnalignedType() const {
+ return false;
+}
+
+bool NativeRawSymbol::isUnreached() const {
+ return false;
+}
+
+bool NativeRawSymbol::isValueUdt() const {
+ return false;
+}
+
+bool NativeRawSymbol::isVirtual() const {
+ return false;
+}
+
+bool NativeRawSymbol::isVirtualBaseClass() const {
+ return false;
+}
+
+bool NativeRawSymbol::isVirtualInheritance() const {
+ return false;
+}
+
+bool NativeRawSymbol::isVolatileType() const {
+ return false;
+}
+
+bool NativeRawSymbol::wasInlined() const {
+ return false;
+}
+
+std::string NativeRawSymbol::getUnused() const {
+ return "";
+}
diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp
new file mode 100644
index 000000000000..7e6843bceb7d
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -0,0 +1,146 @@
+//===- NativeSession.cpp - Native implementation of IPDBSession -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/GenericError.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
+#include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <memory>
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::pdb;
+
+NativeSession::NativeSession(std::unique_ptr<PDBFile> PdbFile,
+ std::unique_ptr<BumpPtrAllocator> Allocator)
+ : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {}
+
+NativeSession::~NativeSession() = default;
+
+Error NativeSession::createFromPdb(StringRef Path,
+ std::unique_ptr<IPDBSession> &Session) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
+ MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
+ /*RequiresNullTerminator=*/false);
+ if (!ErrorOrBuffer)
+ return make_error<GenericError>(generic_error_code::invalid_path);
+
+ std::unique_ptr<MemoryBuffer> Buffer = std::move(*ErrorOrBuffer);
+ auto Stream = llvm::make_unique<MemoryBufferByteStream>(
+ std::move(Buffer), llvm::support::little);
+
+ auto Allocator = llvm::make_unique<BumpPtrAllocator>();
+ auto File = llvm::make_unique<PDBFile>(Path, std::move(Stream), *Allocator);
+ if (auto EC = File->parseFileHeaders())
+ return EC;
+ if (auto EC = File->parseStreamData())
+ return EC;
+
+ Session =
+ llvm::make_unique<NativeSession>(std::move(File), std::move(Allocator));
+
+ return Error::success();
+}
+
+Error NativeSession::createFromExe(StringRef Path,
+ std::unique_ptr<IPDBSession> &Session) {
+ return make_error<RawError>(raw_error_code::feature_unsupported);
+}
+
+uint64_t NativeSession::getLoadAddress() const { return 0; }
+
+void NativeSession::setLoadAddress(uint64_t Address) {}
+
+std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() const {
+ auto RawSymbol =
+ llvm::make_unique<NativeExeSymbol>(const_cast<NativeSession &>(*this));
+ auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol)));
+ std::unique_ptr<PDBSymbolExe> ExeSymbol(
+ static_cast<PDBSymbolExe *>(PdbSymbol.release()));
+ return ExeSymbol;
+}
+
+std::unique_ptr<PDBSymbol>
+NativeSession::getSymbolById(uint32_t SymbolId) const {
+ return nullptr;
+}
+
+std::unique_ptr<PDBSymbol>
+NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumLineNumbers>
+NativeSession::findLineNumbers(const PDBSymbolCompiland &Compiland,
+ const IPDBSourceFile &File) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumLineNumbers>
+NativeSession::findLineNumbersByAddress(uint64_t Address,
+ uint32_t Length) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumSourceFiles>
+NativeSession::findSourceFiles(const PDBSymbolCompiland *Compiland,
+ StringRef Pattern,
+ PDB_NameSearchFlags Flags) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBSourceFile>
+NativeSession::findOneSourceFile(const PDBSymbolCompiland *Compiland,
+ StringRef Pattern,
+ PDB_NameSearchFlags Flags) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>>
+NativeSession::findCompilandsForSourceFile(StringRef Pattern,
+ PDB_NameSearchFlags Flags) const {
+ return nullptr;
+}
+
+std::unique_ptr<PDBSymbolCompiland>
+NativeSession::findOneCompilandForSourceFile(StringRef Pattern,
+ PDB_NameSearchFlags Flags) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumSourceFiles> NativeSession::getAllSourceFiles() const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumSourceFiles> NativeSession::getSourceFilesForCompiland(
+ const PDBSymbolCompiland &Compiland) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBSourceFile>
+NativeSession::getSourceFileById(uint32_t FileId) const {
+ return nullptr;
+}
+
+std::unique_ptr<IPDBEnumDataStreams> NativeSession::getDebugStreams() const {
+ return nullptr;
+}
diff --git a/lib/DebugInfo/PDB/Raw/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 53491518b8c7..943e7fa13ab7 100644
--- a/lib/DebugInfo/PDB/Raw/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -7,24 +7,25 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
-#include "llvm/DebugInfo/MSF/StreamArray.h"
-#include "llvm/DebugInfo/MSF/StreamInterface.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
-#include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
-#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/StringTable.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/Support/BinaryStream.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -38,12 +39,18 @@ namespace {
typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
} // end anonymous namespace
-PDBFile::PDBFile(std::unique_ptr<ReadableStream> PdbFileBuffer,
+PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
BumpPtrAllocator &Allocator)
- : Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
+ : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
PDBFile::~PDBFile() = default;
+StringRef PDBFile::getFilePath() const { return FilePath; }
+
+StringRef PDBFile::getFileDirectory() const {
+ return sys::path::parent_path(FilePath);
+}
+
uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
uint32_t PDBFile::getFreeBlockMapBlock() const {
@@ -106,7 +113,7 @@ Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
}
Error PDBFile::parseFileHeaders() {
- StreamReader Reader(*Buffer);
+ BinaryStreamReader Reader(*Buffer);
// Initialize SB.
const msf::SuperBlock *SB = nullptr;
@@ -140,7 +147,7 @@ Error PDBFile::parseFileHeaders() {
// See the function fpmPn() for more information:
// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer);
- StreamReader FpmReader(*FpmStream);
+ BinaryStreamReader FpmReader(*FpmStream);
ArrayRef<uint8_t> FpmBytes;
if (auto EC = FpmReader.readBytes(FpmBytes,
msf::getFullFpmByteSize(ContainerLayout)))
@@ -178,7 +185,7 @@ Error PDBFile::parseStreamData() {
// subclass of IPDBStreamData which only accesses the fields that have already
// been parsed, we can avoid this and reuse MappedBlockStream.
auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer);
- StreamReader Reader(*DS);
+ BinaryStreamReader Reader(*DS);
if (auto EC = Reader.readInteger(NumStreams))
return EC;
@@ -229,7 +236,8 @@ Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
auto GlobalS = safelyCreateIndexedStream(
ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
- if (!GlobalS) return GlobalS.takeError();
+ if (!GlobalS)
+ return GlobalS.takeError();
auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
if (auto EC = TempGlobals->reload())
return std::move(EC);
@@ -241,7 +249,8 @@ Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
Expected<InfoStream &> PDBFile::getPDBInfoStream() {
if (!Info) {
auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
- if (!InfoS) return InfoS.takeError();
+ if (!InfoS)
+ return InfoS.takeError();
auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
if (auto EC = TempInfo->reload())
return std::move(EC);
@@ -253,7 +262,8 @@ Expected<InfoStream &> PDBFile::getPDBInfoStream() {
Expected<DbiStream &> PDBFile::getPDBDbiStream() {
if (!Dbi) {
auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
- if (!DbiS) return DbiS.takeError();
+ if (!DbiS)
+ return DbiS.takeError();
auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS));
if (auto EC = TempDbi->reload())
return std::move(EC);
@@ -265,7 +275,8 @@ Expected<DbiStream &> PDBFile::getPDBDbiStream() {
Expected<TpiStream &> PDBFile::getPDBTpiStream() {
if (!Tpi) {
auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
- if (!TpiS) return TpiS.takeError();
+ if (!TpiS)
+ return TpiS.takeError();
auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
if (auto EC = TempTpi->reload())
return std::move(EC);
@@ -277,7 +288,8 @@ Expected<TpiStream &> PDBFile::getPDBTpiStream() {
Expected<TpiStream &> PDBFile::getPDBIpiStream() {
if (!Ipi) {
auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
- if (!IpiS) return IpiS.takeError();
+ if (!IpiS)
+ return IpiS.takeError();
auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
if (auto EC = TempIpi->reload())
return std::move(EC);
@@ -294,7 +306,8 @@ Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
auto PublicS = safelyCreateIndexedStream(
ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
- if (!PublicS) return PublicS.takeError();
+ if (!PublicS)
+ return PublicS.takeError();
auto TempPublics =
llvm::make_unique<PublicsStream>(*this, std::move(*PublicS));
if (auto EC = TempPublics->reload())
@@ -313,7 +326,8 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
auto SymbolS =
safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
- if (!SymbolS) return SymbolS.takeError();
+ if (!SymbolS)
+ return SymbolS.takeError();
auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
if (auto EC = TempSymbols->reload())
@@ -323,8 +337,8 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
return *Symbols;
}
-Expected<NameHashTable &> PDBFile::getStringTable() {
- if (!StringTable || !StringTableStream) {
+Expected<StringTable &> PDBFile::getStringTable() {
+ if (!Strings || !StringTableStream) {
auto IS = getPDBInfoStream();
if (!IS)
return IS.takeError();
@@ -333,23 +347,25 @@ Expected<NameHashTable &> PDBFile::getStringTable() {
auto NS =
safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
- if (!NS) return NS.takeError();
+ if (!NS)
+ return NS.takeError();
- StreamReader Reader(**NS);
- auto N = llvm::make_unique<NameHashTable>();
+ BinaryStreamReader Reader(**NS);
+ auto N = llvm::make_unique<StringTable>();
if (auto EC = N->load(Reader))
return std::move(EC);
- StringTable = std::move(N);
+ Strings = std::move(N);
StringTableStream = std::move(*NS);
}
- return *StringTable;
+ return *Strings;
}
bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
bool PDBFile::hasPDBGlobalsStream() {
auto DbiS = getPDBDbiStream();
- if (!DbiS) return false;
+ if (!DbiS)
+ return false;
return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
}
@@ -359,13 +375,15 @@ bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); }
bool PDBFile::hasPDBPublicsStream() {
auto DbiS = getPDBDbiStream();
- if (!DbiS) return false;
+ if (!DbiS)
+ return false;
return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
}
bool PDBFile::hasPDBSymbolStream() {
auto DbiS = getPDBDbiStream();
- if (!DbiS) return false;
+ if (!DbiS)
+ return false;
return DbiS->getSymRecordStreamIndex() < getNumStreams();
}
@@ -373,18 +391,19 @@ bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
bool PDBFile::hasStringTable() {
auto IS = getPDBInfoStream();
- if (!IS) return false;
+ if (!IS)
+ return false;
return IS->getNamedStreamIndex("/names") < getNumStreams();
}
-/// Wrapper around MappedBlockStream::createIndexedStream()
-/// that checks if a stream with that index actually exists.
-/// If it does not, the return value will have an MSFError with
-/// code msf_error_code::no_stream. Else, the return value will
-/// contain the stream returned by createIndexedStream().
-Expected<std::unique_ptr<MappedBlockStream>> PDBFile::safelyCreateIndexedStream(
- const MSFLayout &Layout, const ReadableStream &MsfData,
- uint32_t StreamIndex) const {
+/// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
+/// stream with that index actually exists. If it does not, the return value
+/// will have an MSFError with code msf_error_code::no_stream. Else, the return
+/// value will contain the stream returned by createIndexedStream().
+Expected<std::unique_ptr<MappedBlockStream>>
+PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
+ BinaryStreamRef MsfData,
+ uint32_t StreamIndex) const {
if (StreamIndex >= getNumStreams())
return make_error<RawError>(raw_error_code::no_stream);
return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex);
diff --git a/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 6fec0e32a8ae..b3c84903bc7e 100644
--- a/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -7,21 +7,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
-#include "llvm/DebugInfo/MSF/StreamInterface.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
#include "llvm/DebugInfo/PDB/GenericError.h"
-#include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h"
-#include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
+#include "llvm/Support/BinaryStream.h"
+#include "llvm/Support/BinaryStreamWriter.h"
using namespace llvm;
using namespace llvm::codeview;
@@ -44,7 +45,7 @@ MSFBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; }
InfoStreamBuilder &PDBFileBuilder::getInfoBuilder() {
if (!Info)
- Info = llvm::make_unique<InfoStreamBuilder>(*Msf);
+ Info = llvm::make_unique<InfoStreamBuilder>(*Msf, NamedStreams);
return *Info;
}
@@ -66,7 +67,26 @@ TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() {
return *Ipi;
}
-Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() const {
+StringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; }
+
+Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) {
+ auto ExpectedStream = Msf->addStream(Size);
+ if (!ExpectedStream)
+ return ExpectedStream.takeError();
+ NamedStreams.set(Name, *ExpectedStream);
+ return Error::success();
+}
+
+Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() {
+ uint32_t StringTableSize = Strings.finalize();
+
+ if (auto EC = addNamedStream("/names", StringTableSize))
+ return std::move(EC);
+ if (auto EC = addNamedStream("/LinkInfo", 0))
+ return std::move(EC);
+ if (auto EC = addNamedStream("/src/headerblock", 0))
+ return std::move(EC);
+
if (Info) {
if (auto EC = Info->finalizeMsfLayout())
return std::move(EC);
@@ -98,8 +118,9 @@ Error PDBFileBuilder::commit(StringRef Filename) {
if (OutFileOrError.getError())
return llvm::make_error<pdb::GenericError>(generic_error_code::invalid_path,
Filename);
- FileBufferByteStream Buffer(std::move(*OutFileOrError));
- StreamWriter Writer(Buffer);
+ FileBufferByteStream Buffer(std::move(*OutFileOrError),
+ llvm::support::little);
+ BinaryStreamWriter Writer(Buffer);
if (auto EC = Writer.writeObject(*Layout.SB))
return EC;
@@ -111,9 +132,8 @@ Error PDBFileBuilder::commit(StringRef Filename) {
auto DirStream =
WritableMappedBlockStream::createDirectoryStream(Layout, Buffer);
- StreamWriter DW(*DirStream);
- if (auto EC =
- DW.writeInteger(static_cast<uint32_t>(Layout.StreamSizes.size())))
+ BinaryStreamWriter DW(*DirStream);
+ if (auto EC = DW.writeInteger<uint32_t>(Layout.StreamSizes.size()))
return EC;
if (auto EC = DW.writeArray(Layout.StreamSizes))
@@ -124,6 +144,16 @@ Error PDBFileBuilder::commit(StringRef Filename) {
return EC;
}
+ uint32_t StringTableStreamNo = 0;
+ if (!NamedStreams.get("/names", StringTableStreamNo))
+ return llvm::make_error<pdb::RawError>(raw_error_code::no_stream);
+
+ auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
+ StringTableStreamNo);
+ BinaryStreamWriter NSWriter(*NS);
+ if (auto EC = Strings.commit(NSWriter))
+ return EC;
+
if (Info) {
if (auto EC = Info->commit(Layout, Buffer))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
new file mode 100644
index 000000000000..629f3e80b0ed
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp
@@ -0,0 +1,119 @@
+//===- PDBTypeServerHandler.cpp ---------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Handles CodeView LF_TYPESERVER2 records by attempting to locate a matching
+// PDB file, then loading the PDB file and visiting all types from the
+// referenced PDB using the original supplied visitor.
+//
+// The net effect of this is that when visiting a PDB containing a TypeServer
+// record, the TypeServer record is "replaced" with all of the records in
+// the referenced PDB file. If a single instance of PDBTypeServerHandler
+// encounters the same TypeServer multiple times (for example reusing one
+// PDBTypeServerHandler across multiple visitations of distinct object files or
+// PDB files), PDBTypeServerHandler will optionally revisit all the records
+// again, or simply consume the record and do nothing.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h"
+
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/PDB/GenericError.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+static void ignoreErrors(Error EC) {
+ llvm::handleAllErrors(std::move(EC), [&](ErrorInfoBase &EIB) {});
+}
+
+PDBTypeServerHandler::PDBTypeServerHandler(bool RevisitAlways)
+ : RevisitAlways(RevisitAlways) {}
+
+void PDBTypeServerHandler::addSearchPath(StringRef Path) {
+ if (Path.empty() || !sys::fs::is_directory(Path))
+ return;
+
+ SearchPaths.push_back(Path);
+}
+
+Expected<bool>
+PDBTypeServerHandler::handleInternal(PDBFile &File,
+ TypeVisitorCallbacks &Callbacks) {
+ auto ExpectedTpi = File.getPDBTpiStream();
+ if (!ExpectedTpi)
+ return ExpectedTpi.takeError();
+ CVTypeVisitor Visitor(Callbacks);
+
+ if (auto EC = Visitor.visitTypeStream(ExpectedTpi->types(nullptr)))
+ return std::move(EC);
+
+ return true;
+}
+
+Expected<bool> PDBTypeServerHandler::handle(TypeServer2Record &TS,
+ TypeVisitorCallbacks &Callbacks) {
+ if (Session) {
+ // If we've already handled this TypeServer and we only want to handle each
+ // TypeServer once, consume the record without doing anything.
+ if (!RevisitAlways)
+ return true;
+
+ return handleInternal(Session->getPDBFile(), Callbacks);
+ }
+
+ StringRef File = sys::path::filename(TS.Name);
+ if (File.empty())
+ return make_error<CodeViewError>(
+ cv_error_code::corrupt_record,
+ "TypeServer2Record does not contain filename!");
+
+ for (auto Path : SearchPaths) {
+ sys::path::append(Path, File);
+ if (!sys::fs::exists(Path))
+ continue;
+
+ std::unique_ptr<IPDBSession> ThisSession;
+ if (auto EC = loadDataForPDB(PDB_ReaderType::Native, Path, ThisSession)) {
+ // It is not an error if this PDB fails to load, it just means that it
+ // doesn't match and we should continue searching.
+ ignoreErrors(std::move(EC));
+ continue;
+ }
+
+ std::unique_ptr<NativeSession> NS(
+ static_cast<NativeSession *>(ThisSession.release()));
+ PDBFile &File = NS->getPDBFile();
+ auto ExpectedInfo = File.getPDBInfoStream();
+ // All PDB Files should have an Info stream.
+ if (!ExpectedInfo)
+ return ExpectedInfo.takeError();
+
+ // Just because a file with a matching name was found and it was an actual
+ // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
+ // must match the GUID specified in the TypeServer2 record.
+ ArrayRef<uint8_t> GuidBytes(ExpectedInfo->getGuid().Guid);
+ StringRef GuidStr(reinterpret_cast<const char *>(GuidBytes.begin()),
+ GuidBytes.size());
+ if (GuidStr != TS.Guid)
+ continue;
+
+ Session = std::move(NS);
+ return handleInternal(File, Callbacks);
+ }
+
+ // We couldn't find a matching PDB, so let it be handled by someone else.
+ return false;
+}
diff --git a/lib/DebugInfo/PDB/Raw/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
index b31f605a078c..58202577672a 100644
--- a/lib/DebugInfo/PDB/Raw/PublicsStream.cpp
+++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp
@@ -22,15 +22,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
#include "GSI.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <algorithm>
@@ -69,7 +69,7 @@ uint32_t PublicsStream::getAddrMap() const { return Header->AddrMap; }
// we skip over the hash table which we believe contains information about
// public symbols.
Error PublicsStream::reload() {
- StreamReader Reader(*Stream);
+ BinaryStreamReader Reader(*Stream);
// Check stream size.
if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader))
diff --git a/lib/DebugInfo/PDB/Raw/RawError.cpp b/lib/DebugInfo/PDB/Native/RawError.cpp
index f4a5057509eb..548289fff3df 100644
--- a/lib/DebugInfo/PDB/Raw/RawError.cpp
+++ b/lib/DebugInfo/PDB/Native/RawError.cpp
@@ -1,4 +1,4 @@
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
@@ -38,6 +38,8 @@ public:
return "The entry does not exist.";
case raw_error_code::not_writable:
return "The PDB does not support writing.";
+ case raw_error_code::stream_too_long:
+ return "The stream was longer than expected.";
case raw_error_code::invalid_tpi_hash:
return "The Type record has an invalid hash value.";
}
diff --git a/lib/DebugInfo/PDB/Raw/NameHashTable.cpp b/lib/DebugInfo/PDB/Native/StringTable.cpp
index 84cccb354bd8..7e28389b8383 100644
--- a/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
+++ b/lib/DebugInfo/PDB/Native/StringTable.cpp
@@ -1,4 +1,4 @@
-//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===//
+//===- StringTable.cpp - PDB String Table -----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,33 +7,29 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
+#include "llvm/DebugInfo/PDB/Native/StringTable.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/Hash.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
-using namespace llvm::msf;
using namespace llvm::support;
using namespace llvm::pdb;
-NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {}
+StringTable::StringTable() {}
-Error NameHashTable::load(StreamReader &Stream) {
- struct Header {
- support::ulittle32_t Signature;
- support::ulittle32_t HashVersion;
- support::ulittle32_t ByteSize;
- };
+Error StringTable::load(BinaryStreamReader &Stream) {
+ ByteSize = Stream.getLength();
- const Header *H;
+ const StringTableHeader *H;
if (auto EC = Stream.readObject(H))
return EC;
- if (H->Signature != 0xEFFEEFFE)
+ if (H->Signature != StringTableSignature)
return make_error<RawError>(raw_error_code::corrupt_file,
"Invalid hash table signature");
if (H->HashVersion != 1 && H->HashVersion != 2)
@@ -62,10 +58,19 @@ Error NameHashTable::load(StreamReader &Stream) {
if (auto EC = Stream.readInteger(NameCount))
return EC;
+
+ if (Stream.bytesRemaining() > 0)
+ return make_error<RawError>(raw_error_code::stream_too_long,
+ "Unexpected bytes found in string table");
+
return Error::success();
}
-StringRef NameHashTable::getStringForID(uint32_t ID) const {
+uint32_t StringTable::getByteSize() const {
+ return ByteSize;
+}
+
+StringRef StringTable::getStringForID(uint32_t ID) const {
if (ID == IDs[0])
return StringRef();
@@ -73,14 +78,14 @@ StringRef NameHashTable::getStringForID(uint32_t ID) const {
// the starting offset of the string we're looking for. So just seek into
// the desired offset and a read a null terminated stream from that offset.
StringRef Result;
- StreamReader NameReader(NamesBuffer);
+ BinaryStreamReader NameReader(NamesBuffer);
NameReader.setOffset(ID);
- if (auto EC = NameReader.readZeroString(Result))
+ if (auto EC = NameReader.readCString(Result))
consumeError(std::move(EC));
return Result;
}
-uint32_t NameHashTable::getIDForString(StringRef Str) const {
+uint32_t StringTable::getIDForString(StringRef Str) const {
uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str);
size_t Count = IDs.size();
uint32_t Start = Hash % Count;
@@ -99,6 +104,6 @@ uint32_t NameHashTable::getIDForString(StringRef Str) const {
return IDs[0];
}
-FixedStreamArray<support::ulittle32_t> NameHashTable::name_ids() const {
+FixedStreamArray<support::ulittle32_t> StringTable::name_ids() const {
return IDs;
}
diff --git a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp
new file mode 100644
index 000000000000..e0f8370ab608
--- /dev/null
+++ b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp
@@ -0,0 +1,102 @@
+//===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace llvm::support;
+using namespace llvm::support::endian;
+using namespace llvm::pdb;
+
+uint32_t StringTableBuilder::insert(StringRef S) {
+ auto P = Strings.insert({S, StringSize});
+
+ // If a given string didn't exist in the string table, we want to increment
+ // the string table size.
+ if (P.second)
+ StringSize += S.size() + 1; // +1 for '\0'
+ return P.first->second;
+}
+
+static uint32_t computeBucketCount(uint32_t NumStrings) {
+ // The /names stream is basically an on-disk open-addressing hash table.
+ // Hash collisions are resolved by linear probing. We cannot make
+ // utilization 100% because it will make the linear probing extremely
+ // slow. But lower utilization wastes disk space. As a reasonable
+ // load factor, we choose 80%. We need +1 because slot 0 is reserved.
+ return (NumStrings + 1) * 1.25;
+}
+
+uint32_t StringTableBuilder::finalize() {
+ uint32_t Size = 0;
+ Size += sizeof(StringTableHeader);
+ Size += StringSize;
+ Size += sizeof(uint32_t); // Hash table begins with 4-byte size field.
+
+ uint32_t BucketCount = computeBucketCount(Strings.size());
+ Size += BucketCount * sizeof(uint32_t);
+
+ Size +=
+ sizeof(uint32_t); // The /names stream ends with the number of strings.
+ return Size;
+}
+
+Error StringTableBuilder::commit(BinaryStreamWriter &Writer) const {
+ // Write a header
+ StringTableHeader H;
+ H.Signature = StringTableSignature;
+ H.HashVersion = 1;
+ H.ByteSize = StringSize;
+ if (auto EC = Writer.writeObject(H))
+ return EC;
+
+ // Write a string table.
+ uint32_t StringStart = Writer.getOffset();
+ for (auto Pair : Strings) {
+ StringRef S = Pair.first;
+ uint32_t Offset = Pair.second;
+ Writer.setOffset(StringStart + Offset);
+ if (auto EC = Writer.writeCString(S))
+ return EC;
+ }
+ Writer.setOffset(StringStart + StringSize);
+
+ // Write a hash table.
+ uint32_t BucketCount = computeBucketCount(Strings.size());
+ if (auto EC = Writer.writeInteger(BucketCount))
+ return EC;
+ std::vector<ulittle32_t> Buckets(BucketCount);
+
+ for (auto Pair : Strings) {
+ StringRef S = Pair.first;
+ uint32_t Offset = Pair.second;
+ uint32_t Hash = hashStringV1(S);
+
+ for (uint32_t I = 0; I != BucketCount; ++I) {
+ uint32_t Slot = (Hash + I) % BucketCount;
+ if (Slot == 0)
+ continue; // Skip reserved slot
+ if (Buckets[Slot] != 0)
+ continue;
+ Buckets[Slot] = Offset;
+ break;
+ }
+ }
+
+ if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
+ return EC;
+ if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size())))
+ return EC;
+ return Error::success();
+}
diff --git a/lib/DebugInfo/PDB/Raw/SymbolStream.cpp b/lib/DebugInfo/PDB/Native/SymbolStream.cpp
index 2f3ac3497f39..9e9ebd11495b 100644
--- a/lib/DebugInfo/PDB/Raw/SymbolStream.cpp
+++ b/lib/DebugInfo/PDB/Native/SymbolStream.cpp
@@ -7,16 +7,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
using namespace llvm;
@@ -30,7 +29,7 @@ SymbolStream::SymbolStream(std::unique_ptr<MappedBlockStream> Stream)
SymbolStream::~SymbolStream() {}
Error SymbolStream::reload() {
- StreamReader Reader(*Stream);
+ BinaryStreamReader Reader(*Stream);
if (auto EC = Reader.readArray(SymbolRecords, Stream->getLength()))
return EC;
diff --git a/lib/DebugInfo/PDB/Raw/TpiHashing.cpp b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
index 6c3ddb3d57af..16904a5a27ed 100644
--- a/lib/DebugInfo/PDB/Raw/TpiHashing.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiHashing.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DebugInfo/PDB/Raw/TpiHashing.h"
+#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
-#include "llvm/DebugInfo/PDB/Raw/Hash.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/Hash.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
using namespace llvm;
using namespace llvm::codeview;
diff --git a/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index a1167cd98454..5fef3edf8c2d 100644
--- a/lib/DebugInfo/PDB/Raw/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -7,19 +7,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/TpiHashing.h"
-#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
+#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <algorithm>
@@ -53,7 +54,7 @@ Error TpiStream::verifyHashValues() {
}
Error TpiStream::reload() {
- StreamReader Reader(*Stream);
+ BinaryStreamReader Reader(*Stream);
if (Reader.bytesRemaining() < sizeof(TpiStreamHeader))
return make_error<RawError>(raw_error_code::corrupt_file,
@@ -92,11 +93,12 @@ Error TpiStream::reload() {
auto HS = MappedBlockStream::createIndexedStream(
Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex);
- StreamReader HSR(*HS);
+ BinaryStreamReader HSR(*HS);
+ // There should be a hash value for every type record, or no hashes at all.
uint32_t NumHashValues =
Header->HashValueBuffer.Length / sizeof(ulittle32_t);
- if (NumHashValues != NumTypeRecords())
+ if (NumHashValues != NumTypeRecords() && NumHashValues != 0)
return make_error<RawError>(
raw_error_code::corrupt_file,
"TPI hash count does not match with the number of type records.");
@@ -113,18 +115,19 @@ Error TpiStream::reload() {
if (auto EC = HSR.readArray(TypeIndexOffsets, NumTypeIndexOffsets))
return EC;
- HSR.setOffset(Header->HashAdjBuffer.Off);
- uint32_t NumHashAdjustments =
- Header->HashAdjBuffer.Length / sizeof(TypeIndexOffset);
- if (auto EC = HSR.readArray(HashAdjustments, NumHashAdjustments))
- return EC;
+ if (Header->HashAdjBuffer.Length > 0) {
+ HSR.setOffset(Header->HashAdjBuffer.Off);
+ if (auto EC = HashAdjusters.load(HSR))
+ return EC;
+ }
HashStream = std::move(HS);
// TPI hash table is a parallel array for the type records.
// Verify that the hash values match with type records.
- if (auto EC = verifyHashValues())
- return EC;
+ if (NumHashValues > 0)
+ if (auto EC = verifyHashValues())
+ return EC;
}
return Error::success();
@@ -154,23 +157,17 @@ uint16_t TpiStream::getTypeHashStreamAuxIndex() const {
uint32_t TpiStream::NumHashBuckets() const { return Header->NumHashBuckets; }
uint32_t TpiStream::getHashKeySize() const { return Header->HashKeySize; }
-FixedStreamArray<support::ulittle32_t>
-TpiStream::getHashValues() const {
+FixedStreamArray<support::ulittle32_t> TpiStream::getHashValues() const {
return HashValues;
}
-FixedStreamArray<TypeIndexOffset>
-TpiStream::getTypeIndexOffsets() const {
+FixedStreamArray<TypeIndexOffset> TpiStream::getTypeIndexOffsets() const {
return TypeIndexOffsets;
}
-FixedStreamArray<TypeIndexOffset>
-TpiStream::getHashAdjustments() const {
- return HashAdjustments;
-}
+HashTable &TpiStream::getHashAdjusters() { return HashAdjusters; }
-iterator_range<CVTypeArray::Iterator>
-TpiStream::types(bool *HadError) const {
+CVTypeRange TpiStream::types(bool *HadError) const {
return make_range(TypeRecords.begin(HadError), TypeRecords.end());
}
diff --git a/lib/DebugInfo/PDB/Raw/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index c769321f18c1..375c35b11145 100644
--- a/lib/DebugInfo/PDB/Raw/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -7,22 +7,22 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
-#include "llvm/DebugInfo/MSF/StreamArray.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryStreamArray.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <algorithm>
@@ -43,9 +43,22 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) {
VerHeader = Version;
}
-void TpiStreamBuilder::addTypeRecord(const codeview::CVType &Record) {
+void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record,
+ Optional<uint32_t> Hash) {
+ // If we just crossed an 8KB threshold, add a type index offset.
+ size_t NewSize = TypeRecordBytes + Record.size();
+ constexpr size_t EightKB = 8 * 1024;
+ if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) {
+ TypeIndexOffsets.push_back(
+ {codeview::TypeIndex(codeview::TypeIndex::FirstNonSimpleIndex +
+ TypeRecords.size()),
+ ulittle32_t(TypeRecordBytes)});
+ }
+ TypeRecordBytes = NewSize;
+
TypeRecords.push_back(Record);
- TypeRecordStream.setItems(TypeRecords);
+ if (Hash)
+ TypeHashes.push_back(*Hash);
}
Error TpiStreamBuilder::finalize() {
@@ -55,13 +68,12 @@ Error TpiStreamBuilder::finalize() {
TpiStreamHeader *H = Allocator.Allocate<TpiStreamHeader>();
uint32_t Count = TypeRecords.size();
- uint32_t HashBufferSize = calculateHashBufferSize();
H->Version = *VerHeader;
H->HeaderSize = sizeof(TpiStreamHeader);
H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex;
H->TypeIndexEnd = H->TypeIndexBegin + Count;
- H->TypeRecordBytes = TypeRecordStream.getLength();
+ H->TypeRecordBytes = TypeRecordBytes;
H->HashStreamIndex = HashStreamIndex;
H->HashAuxStreamIndex = kInvalidStreamIndex;
@@ -72,24 +84,32 @@ Error TpiStreamBuilder::finalize() {
// the `HashStreamIndex` field of the `TpiStreamHeader`. Therefore, the data
// begins at offset 0 of this independent stream.
H->HashValueBuffer.Off = 0;
- H->HashValueBuffer.Length = HashBufferSize;
+ H->HashValueBuffer.Length = calculateHashBufferSize();
+
+ // We never write any adjustments into our PDBs, so this is usually some
+ // offset with zero length.
H->HashAdjBuffer.Off = H->HashValueBuffer.Off + H->HashValueBuffer.Length;
H->HashAdjBuffer.Length = 0;
+
H->IndexOffsetBuffer.Off = H->HashAdjBuffer.Off + H->HashAdjBuffer.Length;
- H->IndexOffsetBuffer.Length = 0;
+ H->IndexOffsetBuffer.Length = calculateIndexOffsetSize();
Header = H;
return Error::success();
}
-uint32_t TpiStreamBuilder::calculateSerializedLength() const {
- return sizeof(TpiStreamHeader) + TypeRecordStream.getLength();
+uint32_t TpiStreamBuilder::calculateSerializedLength() {
+ return sizeof(TpiStreamHeader) + TypeRecordBytes;
}
uint32_t TpiStreamBuilder::calculateHashBufferSize() const {
- if (TypeRecords.empty() || !TypeRecords[0].Hash.hasValue())
- return 0;
- return TypeRecords.size() * sizeof(ulittle32_t);
+ assert((TypeRecords.size() == TypeHashes.size() || TypeHashes.empty()) &&
+ "either all or no type records should have hashes");
+ return TypeHashes.size() * sizeof(ulittle32_t);
+}
+
+uint32_t TpiStreamBuilder::calculateIndexOffsetSize() const {
+ return TypeIndexOffsets.size() * sizeof(TypeIndexOffset);
}
Error TpiStreamBuilder::finalizeMsfLayout() {
@@ -97,48 +117,60 @@ Error TpiStreamBuilder::finalizeMsfLayout() {
if (auto EC = Msf.setStreamSize(Idx, Length))
return EC;
- uint32_t HashBufferSize = calculateHashBufferSize();
+ uint32_t HashStreamSize =
+ calculateHashBufferSize() + calculateIndexOffsetSize();
- if (HashBufferSize == 0)
+ if (HashStreamSize == 0)
return Error::success();
- auto ExpectedIndex = Msf.addStream(HashBufferSize);
+ auto ExpectedIndex = Msf.addStream(HashStreamSize);
if (!ExpectedIndex)
return ExpectedIndex.takeError();
HashStreamIndex = *ExpectedIndex;
- ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeRecords.size());
- MutableArrayRef<ulittle32_t> HashBuffer(H, TypeRecords.size());
- for (uint32_t I = 0; I < TypeRecords.size(); ++I) {
- HashBuffer[I] = *TypeRecords[I].Hash % MinTpiHashBuckets;
+ if (!TypeHashes.empty()) {
+ ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeHashes.size());
+ MutableArrayRef<ulittle32_t> HashBuffer(H, TypeHashes.size());
+ for (uint32_t I = 0; I < TypeHashes.size(); ++I) {
+ HashBuffer[I] = TypeHashes[I] % MinTpiHashBuckets;
+ }
+ ArrayRef<uint8_t> Bytes(
+ reinterpret_cast<const uint8_t *>(HashBuffer.data()),
+ calculateHashBufferSize());
+ HashValueStream =
+ llvm::make_unique<BinaryByteStream>(Bytes, llvm::support::little);
}
- ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(HashBuffer.data()),
- HashBufferSize);
- HashValueStream = llvm::make_unique<ByteStream>(Bytes);
return Error::success();
}
Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
- const msf::WritableStream &Buffer) {
+ WritableBinaryStreamRef Buffer) {
if (auto EC = finalize())
return EC;
auto InfoS =
WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx);
- StreamWriter Writer(*InfoS);
+ BinaryStreamWriter Writer(*InfoS);
if (auto EC = Writer.writeObject(*Header))
return EC;
- auto RecordArray = VarStreamArray<codeview::CVType>(TypeRecordStream);
- if (auto EC = Writer.writeArray(RecordArray))
- return EC;
+ for (auto Rec : TypeRecords)
+ if (auto EC = Writer.writeBytes(Rec))
+ return EC;
if (HashStreamIndex != kInvalidStreamIndex) {
auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
HashStreamIndex);
- StreamWriter HW(*HVS);
- if (auto EC = HW.writeStreamRef(*HashValueStream))
- return EC;
+ BinaryStreamWriter HW(*HVS);
+ if (HashValueStream) {
+ if (auto EC = HW.writeStreamRef(*HashValueStream))
+ return EC;
+ }
+
+ for (auto &IndexOffset : TypeIndexOffsets) {
+ if (auto EC = HW.writeObject(IndexOffset))
+ return EC;
+ }
}
return Error::success();
diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp
index 0d720591b81d..7e3acc1165f3 100644
--- a/lib/DebugInfo/PDB/PDB.cpp
+++ b/lib/DebugInfo/PDB/PDB.cpp
@@ -17,7 +17,7 @@
#if LLVM_ENABLE_DIA_SDK
#include "llvm/DebugInfo/PDB/DIA/DIASession.h"
#endif
-#include "llvm/DebugInfo/PDB/Raw/RawSession.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
@@ -27,8 +27,8 @@ using namespace llvm::pdb;
Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
std::unique_ptr<IPDBSession> &Session) {
// Create the correct concrete instance type based on the value of Type.
- if (Type == PDB_ReaderType::Raw)
- return RawSession::createFromPdb(Path, Session);
+ if (Type == PDB_ReaderType::Native)
+ return NativeSession::createFromPdb(Path, Session);
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromPdb(Path, Session);
@@ -40,8 +40,8 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
std::unique_ptr<IPDBSession> &Session) {
// Create the correct concrete instance type based on the value of Type.
- if (Type == PDB_ReaderType::Raw)
- return RawSession::createFromExe(Path, Session);
+ if (Type == PDB_ReaderType::Native)
+ return NativeSession::createFromExe(Path, Session);
#if LLVM_ENABLE_DIA_SDK
return DIASession::createFromExe(Path, Session);
diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp
index b7eee6e53941..dc22a30facab 100644
--- a/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -10,6 +10,7 @@
#include "llvm/DebugInfo/PDB/PDBExtras.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/Formatters.h"
using namespace llvm;
using namespace llvm::pdb;
@@ -259,6 +260,12 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
return OS;
}
+raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UniqueId &Guid) {
+ codeview::detail::GuidAdapter A(Guid.Guid);
+ A.format(OS, "");
+ return OS;
+}
+
raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UdtType &Type) {
switch (Type) {
CASE_OUTPUT_ENUM_CLASS_STR(PDB_UdtType, Class, "class", OS)
@@ -269,25 +276,6 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UdtType &Type) {
return OS;
}
-raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_UniqueId &Id) {
- static const char *Lookup = "0123456789ABCDEF";
-
- static_assert(sizeof(PDB_UniqueId) == 16, "Expected 16-byte GUID");
- ArrayRef<uint8_t> GuidBytes(reinterpret_cast<const uint8_t*>(&Id), 16);
- OS << "{";
- for (int i=0; i < 16;) {
- uint8_t Byte = GuidBytes[i];
- uint8_t HighNibble = (Byte >> 4) & 0xF;
- uint8_t LowNibble = Byte & 0xF;
- OS << Lookup[HighNibble] << Lookup[LowNibble];
- ++i;
- if (i>=4 && i<=10 && i%2==0)
- OS << "-";
- }
- OS << "}";
- return OS;
-}
-
raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
const PDB_Machine &Machine) {
switch (Machine) {
diff --git a/lib/DebugInfo/PDB/PDBSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbol.cpp
index 633e11aacf12..74010c2dd7dd 100644
--- a/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -10,6 +10,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbol.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
#include "llvm/DebugInfo/PDB/PDBSymbolAnnotation.h"
#include "llvm/DebugInfo/PDB/PDBSymbolBlock.h"
@@ -53,6 +54,9 @@ PDBSymbol::PDBSymbol(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
: Session(PDBSession), RawSymbol(std::move(Symbol)) {}
+PDBSymbol::PDBSymbol(PDBSymbol &Symbol)
+ : Session(Symbol.Session), RawSymbol(std::move(Symbol.RawSymbol)) {}
+
PDBSymbol::~PDBSymbol() = default;
#define FACTORY_SYMTAG_CASE(Tag, Type) \
@@ -99,16 +103,30 @@ PDBSymbol::create(const IPDBSession &PDBSession,
}
}
-#define TRY_DUMP_TYPE(Type) \
- if (const Type *DerivedThis = dyn_cast<Type>(this)) \
- Dumper.dump(OS, Indent, *DerivedThis);
-
-#define ELSE_TRY_DUMP_TYPE(Type, Dumper) else TRY_DUMP_TYPE(Type, Dumper)
-
void PDBSymbol::defaultDump(raw_ostream &OS, int Indent) const {
RawSymbol->dump(OS, Indent);
}
+void PDBSymbol::dumpProperties() const {
+ outs() << "\n";
+ defaultDump(outs(), 0);
+ outs().flush();
+}
+
+void PDBSymbol::dumpChildStats() const {
+ TagStats Stats;
+ getChildStats(Stats);
+ outs() << "\n";
+ for (auto &Stat : Stats) {
+ outs() << Stat.first << ": " << Stat.second << "\n";
+ }
+ outs().flush();
+}
+
+std::unique_ptr<PDBSymbol> PDBSymbol::clone() const {
+ return Session.getSymbolById(getSymIndexId());
+}
+
PDB_SymType PDBSymbol::getSymTag() const { return RawSymbol->getSymTag(); }
uint32_t PDBSymbol::getSymIndexId() const { return RawSymbol->getSymIndexId(); }
@@ -141,6 +159,8 @@ PDBSymbol::findInlineFramesByRVA(uint32_t RVA) const {
std::unique_ptr<IPDBEnumSymbols>
PDBSymbol::getChildStats(TagStats &Stats) const {
std::unique_ptr<IPDBEnumSymbols> Result(findAllChildren());
+ if (!Result)
+ return nullptr;
Stats.clear();
while (auto Child = Result->getNext()) {
++Stats[Child->getSymTag()];
@@ -148,3 +168,7 @@ PDBSymbol::getChildStats(TagStats &Stats) const {
Result->reset();
return Result;
}
+
+std::unique_ptr<PDBSymbol> PDBSymbol::getSymbolByIdHelper(uint32_t Id) const {
+ return Session.getSymbolById(Id);
+}
diff --git a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
index cdb167b6191c..3648272e1d0e 100644
--- a/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp
@@ -18,7 +18,9 @@ using namespace llvm::pdb;
PDBSymbolAnnotation::PDBSymbolAnnotation(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Annotation);
+}
void PDBSymbolAnnotation::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
index fd5dc9427abf..7385d3ba1489 100644
--- a/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolBlock.cpp
@@ -19,6 +19,8 @@ using namespace llvm::pdb;
PDBSymbolBlock::PDBSymbolBlock(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Block);
+}
void PDBSymbolBlock::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index ebff08846cac..854cf42d1bae 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolCompiland::PDBSymbolCompiland(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Compiland);
+}
void PDBSymbolCompiland::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
index 6dbd5228f2cd..e08450e0ad0c 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolCompilandDetails::PDBSymbolCompilandDetails(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::CompilandDetails);
+}
void PDBSymbolCompilandDetails::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
index 9c7f0b1be56f..2f1c43666ae5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
@@ -20,7 +20,9 @@ using namespace llvm::pdb;
PDBSymbolCompilandEnv::PDBSymbolCompilandEnv(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::CompilandEnv);
+}
std::string PDBSymbolCompilandEnv::getValue() const {
Variant Value = RawSymbol->getValue();
diff --git a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
index 0ea387a0eabb..9ec20bb62d75 100644
--- a/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolCustom.cpp
@@ -20,7 +20,9 @@ using namespace llvm::pdb;
PDBSymbolCustom::PDBSymbolCustom(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> CustomSymbol)
- : PDBSymbol(PDBSession, std::move(CustomSymbol)) {}
+ : PDBSymbol(PDBSession, std::move(CustomSymbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Custom);
+}
void PDBSymbolCustom::getDataBytes(llvm::SmallVector<uint8_t, 32> &bytes) {
RawSymbol->getDataBytes(bytes);
diff --git a/lib/DebugInfo/PDB/PDBSymbolData.cpp b/lib/DebugInfo/PDB/PDBSymbolData.cpp
index 62bb6f3f41e2..60026689c6f1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolData.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolData.cpp
@@ -19,10 +19,8 @@ using namespace llvm::pdb;
PDBSymbolData::PDBSymbolData(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> DataSymbol)
- : PDBSymbol(PDBSession, std::move(DataSymbol)) {}
-
-std::unique_ptr<PDBSymbol> PDBSymbolData::getType() const {
- return Session.getSymbolById(getTypeId());
+ : PDBSymbol(PDBSession, std::move(DataSymbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Data);
}
void PDBSymbolData::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolExe.cpp b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
index 60101c168a79..7417167b61ad 100644
--- a/lib/DebugInfo/PDB/PDBSymbolExe.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolExe.cpp
@@ -10,6 +10,7 @@
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
#include <utility>
@@ -18,6 +19,18 @@ using namespace llvm::pdb;
PDBSymbolExe::PDBSymbolExe(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Exe);
+}
void PDBSymbolExe::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
+
+uint32_t PDBSymbolExe::getPointerByteSize() const {
+ auto Pointer = findOneChild<PDBSymbolTypePointer>();
+ if (Pointer)
+ return Pointer->getLength();
+
+ if (getMachineType() == PDB_Machine::x86)
+ return 4;
+ return 8;
+}
diff --git a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
index 35251c0cc1c1..0734a1f8314a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFunc.cpp
@@ -85,10 +85,8 @@ private:
PDBSymbolFunc::PDBSymbolFunc(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
-
-std::unique_ptr<PDBSymbolTypeFunctionSig> PDBSymbolFunc::getSignature() const {
- return Session.getConcreteSymbolById<PDBSymbolTypeFunctionSig>(getTypeId());
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Function);
}
std::unique_ptr<IPDBEnumChildren<PDBSymbolData>>
@@ -96,8 +94,15 @@ PDBSymbolFunc::getArguments() const {
return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
}
-std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolFunc::getClassParent() const {
- return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId());
-}
-
void PDBSymbolFunc::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
+
+bool PDBSymbolFunc::isDestructor() const {
+ std::string Name = getName();
+ if (Name.empty())
+ return false;
+ if (Name[0] == '~')
+ return true;
+ if (Name == "__vecDelDtor")
+ return true;
+ return false;
+}
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
index 77e996f651df..482c95e3a850 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolFuncDebugEnd::PDBSymbolFuncDebugEnd(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::FuncDebugEnd);
+}
void PDBSymbolFuncDebugEnd::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
index 9c653879176b..ae23c7619e2a 100644
--- a/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolFuncDebugStart::PDBSymbolFuncDebugStart(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::FuncDebugStart);
+}
void PDBSymbolFuncDebugStart::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
index d2cfd11c35e4..a67a20d8e352 100644
--- a/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolLabel.cpp
@@ -18,6 +18,8 @@ using namespace llvm::pdb;
PDBSymbolLabel::PDBSymbolLabel(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Label);
+}
void PDBSymbolLabel::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
index 97d668740818..87bb4044216b 100644
--- a/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolPublicSymbol::PDBSymbolPublicSymbol(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::PublicSymbol);
+}
void PDBSymbolPublicSymbol::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
index ef8897d12af4..b2648197f9cc 100644
--- a/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolThunk.cpp
@@ -18,6 +18,8 @@ using namespace llvm::pdb;
PDBSymbolThunk::PDBSymbolThunk(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Thunk);
+}
void PDBSymbolThunk::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
index c010cc5d7678..a8054a42d866 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp
@@ -19,12 +19,14 @@ using namespace llvm::pdb;
PDBSymbolTypeArray::PDBSymbolTypeArray(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
-
-std::unique_ptr<PDBSymbol> PDBSymbolTypeArray::getElementType() const {
- return Session.getSymbolById(getTypeId());
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::ArrayType);
}
void PDBSymbolTypeArray::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
+
+void PDBSymbolTypeArray::dumpRight(PDBSymDumper &Dumper) const {
+ Dumper.dumpRight(*this);
+}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
index 382c397b24d2..0ee18d471624 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolTypeBaseClass::PDBSymbolTypeBaseClass(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::BaseClass);
+}
void PDBSymbolTypeBaseClass::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
index e5d65bf5d1fd..0bf563af7df5 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
@@ -18,7 +18,9 @@ using namespace llvm::pdb;
PDBSymbolTypeBuiltin::PDBSymbolTypeBuiltin(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::BuiltinType);
+}
void PDBSymbolTypeBuiltin::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
index 1d80c97f9ede..f617d8d0c2df 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolTypeCustom::PDBSymbolTypeCustom(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::CustomType);
+}
void PDBSymbolTypeCustom::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
index 535d97dcd21e..68ba87c1cdf8 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp
@@ -20,7 +20,9 @@ using namespace llvm::pdb;
PDBSymbolTypeDimension::PDBSymbolTypeDimension(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Dimension);
+}
void PDBSymbolTypeDimension::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
index 788f2b732aaa..2addea072c88 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp
@@ -21,15 +21,8 @@ using namespace llvm::pdb;
PDBSymbolTypeEnum::PDBSymbolTypeEnum(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
-
-std::unique_ptr<PDBSymbolTypeUDT> PDBSymbolTypeEnum::getClassParent() const {
- return Session.getConcreteSymbolById<PDBSymbolTypeUDT>(getClassParentId());
-}
-
-std::unique_ptr<PDBSymbolTypeBuiltin>
-PDBSymbolTypeEnum::getUnderlyingType() const {
- return Session.getConcreteSymbolById<PDBSymbolTypeBuiltin>(getTypeId());
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Enum);
}
void PDBSymbolTypeEnum::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
index 5831baebb993..ec27985e91d1 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolTypeFriend::PDBSymbolTypeFriend(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Friend);
+}
void PDBSymbolTypeFriend::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
index c6f586db9e57..4d5cd63f6857 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
@@ -18,7 +18,9 @@ using namespace llvm::pdb;
PDBSymbolTypeFunctionArg::PDBSymbolTypeFunctionArg(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::FunctionArg);
+}
void PDBSymbolTypeFunctionArg::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
index 057ae260885f..473529d1b043 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
@@ -68,10 +68,8 @@ private:
PDBSymbolTypeFunctionSig::PDBSymbolTypeFunctionSig(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
-
-std::unique_ptr<PDBSymbol> PDBSymbolTypeFunctionSig::getReturnType() const {
- return Session.getSymbolById(getTypeId());
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::FunctionSig);
}
std::unique_ptr<IPDBEnumSymbols>
@@ -79,13 +77,10 @@ PDBSymbolTypeFunctionSig::getArguments() const {
return llvm::make_unique<FunctionArgEnumerator>(Session, *this);
}
-std::unique_ptr<PDBSymbol> PDBSymbolTypeFunctionSig::getClassParent() const {
- uint32_t ClassId = getClassParentId();
- if (ClassId == 0)
- return nullptr;
- return Session.getSymbolById(ClassId);
-}
-
void PDBSymbolTypeFunctionSig::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
+
+void PDBSymbolTypeFunctionSig::dumpRight(PDBSymDumper &Dumper) const {
+ Dumper.dumpRight(*this);
+}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
index 072d2cfd42fb..86e0ec4f8565 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolTypeManaged::PDBSymbolTypeManaged(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::ManagedType);
+}
void PDBSymbolTypeManaged::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
index 699771450a5d..69819811d61f 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp
@@ -19,12 +19,14 @@ using namespace llvm::pdb;
PDBSymbolTypePointer::PDBSymbolTypePointer(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
-
-std::unique_ptr<PDBSymbol> PDBSymbolTypePointer::getPointeeType() const {
- return Session.getSymbolById(getTypeId());
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::PointerType);
}
void PDBSymbolTypePointer::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
}
+
+void PDBSymbolTypePointer::dumpRight(PDBSymDumper &Dumper) const {
+ Dumper.dumpRight(*this);
+}
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
index 0f283b9e21a4..102b540e0fef 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
@@ -18,7 +18,9 @@ using namespace llvm::pdb;
PDBSymbolTypeTypedef::PDBSymbolTypeTypedef(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::Typedef);
+}
void PDBSymbolTypeTypedef::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
index c71838cc7a6f..15dc15352165 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp
@@ -9,7 +9,15 @@
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
+#include "llvm/DebugInfo/PDB/UDTLayout.h"
#include <utility>
@@ -18,6 +26,8 @@ using namespace llvm::pdb;
PDBSymbolTypeUDT::PDBSymbolTypeUDT(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::UDT);
+}
void PDBSymbolTypeUDT::dump(PDBSymDumper &Dumper) const { Dumper.dump(*this); }
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
index 6b76db5912ce..9a21855f57f0 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp
@@ -18,7 +18,9 @@ using namespace llvm::pdb;
PDBSymbolTypeVTable::PDBSymbolTypeVTable(const IPDBSession &PDBSession,
std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::VTable);
+}
void PDBSymbolTypeVTable::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
index ef509d64bf60..a516a4d2c429 100644
--- a/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolTypeVTableShape::PDBSymbolTypeVTableShape(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::VTableShape);
+}
void PDBSymbolTypeVTableShape::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
index 6a62d554f42c..020aec9e98a8 100644
--- a/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
+++ b/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
@@ -19,7 +19,9 @@ using namespace llvm::pdb;
PDBSymbolUsingNamespace::PDBSymbolUsingNamespace(
const IPDBSession &PDBSession, std::unique_ptr<IPDBRawSymbol> Symbol)
- : PDBSymbol(PDBSession, std::move(Symbol)) {}
+ : PDBSymbol(PDBSession, std::move(Symbol)) {
+ assert(RawSymbol->getSymTag() == PDB_SymType::UsingNamespace);
+}
void PDBSymbolUsingNamespace::dump(PDBSymDumper &Dumper) const {
Dumper.dump(*this);
diff --git a/lib/DebugInfo/PDB/Raw/InfoStream.cpp b/lib/DebugInfo/PDB/Raw/InfoStream.cpp
deleted file mode 100644
index f19535d11806..000000000000
--- a/lib/DebugInfo/PDB/Raw/InfoStream.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- InfoStream.cpp - PDB Info Stream (Stream 1) Access -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
-
-using namespace llvm;
-using namespace llvm::codeview;
-using namespace llvm::msf;
-using namespace llvm::pdb;
-
-InfoStream::InfoStream(std::unique_ptr<MappedBlockStream> Stream)
- : Stream(std::move(Stream)) {}
-
-Error InfoStream::reload() {
- StreamReader Reader(*Stream);
-
- const InfoStreamHeader *H;
- if (auto EC = Reader.readObject(H))
- return joinErrors(
- std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "PDB Stream does not contain a header."));
-
- switch (H->Version) {
- case PdbImplVC70:
- case PdbImplVC80:
- case PdbImplVC110:
- case PdbImplVC140:
- break;
- default:
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Unsupported PDB stream version.");
- }
-
- Version = H->Version;
- Signature = H->Signature;
- Age = H->Age;
- Guid = H->Guid;
-
- return NamedStreams.load(Reader);
-}
-
-uint32_t InfoStream::getNamedStreamIndex(llvm::StringRef Name) const {
- uint32_t Result;
- if (!NamedStreams.tryGetValue(Name, Result))
- return 0;
- return Result;
-}
-
-iterator_range<StringMapConstIterator<uint32_t>>
-InfoStream::named_streams() const {
- return NamedStreams.entries();
-}
-
-PdbRaw_ImplVer InfoStream::getVersion() const {
- return static_cast<PdbRaw_ImplVer>(Version);
-}
-
-uint32_t InfoStream::getSignature() const { return Signature; }
-
-uint32_t InfoStream::getAge() const { return Age; }
-
-PDB_UniqueId InfoStream::getGuid() const { return Guid; }
diff --git a/lib/DebugInfo/PDB/Raw/NameMap.cpp b/lib/DebugInfo/PDB/Raw/NameMap.cpp
deleted file mode 100644
index 0f55f58da381..000000000000
--- a/lib/DebugInfo/PDB/Raw/NameMap.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-//===- NameMap.cpp - PDB Name Map -------------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/DebugInfo/MSF/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/NameMap.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/Support/Error.h"
-#include <algorithm>
-#include <cstdint>
-
-using namespace llvm;
-using namespace llvm::msf;
-using namespace llvm::pdb;
-
-NameMap::NameMap() = default;
-
-Error NameMap::load(StreamReader &Stream) {
- // This is some sort of weird string-set/hash table encoded in the stream.
- // It starts with the number of bytes in the table.
- uint32_t NumberOfBytes;
- if (auto EC = Stream.readInteger(NumberOfBytes))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map length"));
- if (Stream.bytesRemaining() < NumberOfBytes)
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Invalid name map length");
-
- // Following that field is the starting offset of strings in the name table.
- uint32_t StringsOffset = Stream.getOffset();
- Stream.setOffset(StringsOffset + NumberOfBytes);
-
- // This appears to be equivalent to the total number of strings *actually*
- // in the name table.
- uint32_t HashSize;
- if (auto EC = Stream.readInteger(HashSize))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map hash size"));
-
- // This appears to be an upper bound on the number of strings in the name
- // table.
- uint32_t MaxNumberOfStrings;
- if (auto EC = Stream.readInteger(MaxNumberOfStrings))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map max strings"));
-
- if (MaxNumberOfStrings > (UINT32_MAX / sizeof(uint32_t)))
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Implausible number of strings");
-
- const uint32_t MaxNumberOfWords = UINT32_MAX / (sizeof(uint32_t) * 8);
-
- // This appears to be a hash table which uses bitfields to determine whether
- // or not a bucket is 'present'.
- uint32_t NumPresentWords;
- if (auto EC = Stream.readInteger(NumPresentWords))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map num words"));
-
- if (NumPresentWords > MaxNumberOfWords)
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Number of present words is too large");
-
- SparseBitVector<> Present;
- for (uint32_t I = 0; I != NumPresentWords; ++I) {
- uint32_t Word;
- if (auto EC = Stream.readInteger(Word))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map word"));
- for (unsigned Idx = 0; Idx < 32; ++Idx)
- if (Word & (1U << Idx))
- Present.set((I * 32) + Idx);
- }
-
- // This appears to be a hash table which uses bitfields to determine whether
- // or not a bucket is 'deleted'.
- uint32_t NumDeletedWords;
- if (auto EC = Stream.readInteger(NumDeletedWords))
- return joinErrors(
- std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map num deleted words"));
-
- if (NumDeletedWords > MaxNumberOfWords)
- return make_error<RawError>(raw_error_code::corrupt_file,
- "Number of deleted words is too large");
-
- SparseBitVector<> Deleted;
- for (uint32_t I = 0; I != NumDeletedWords; ++I) {
- uint32_t Word;
- if (auto EC = Stream.readInteger(Word))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map word"));
- for (unsigned Idx = 0; Idx < 32; ++Idx)
- if (Word & (1U << Idx))
- Deleted.set((I * 32) + Idx);
- }
-
- for (unsigned I : Present) {
- // For all present entries, dump out their mapping.
- (void)I;
-
- // This appears to be an offset relative to the start of the strings.
- // It tells us where the null-terminated string begins.
- uint32_t NameOffset;
- if (auto EC = Stream.readInteger(NameOffset))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map name offset"));
-
- // This appears to be a stream number into the stream directory.
- uint32_t NameIndex;
- if (auto EC = Stream.readInteger(NameIndex))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map name index"));
-
- // Compute the offset of the start of the string relative to the stream.
- uint32_t StringOffset = StringsOffset + NameOffset;
- uint32_t OldOffset = Stream.getOffset();
- // Pump out our c-string from the stream.
- StringRef Str;
- Stream.setOffset(StringOffset);
- if (auto EC = Stream.readZeroString(Str))
- return joinErrors(std::move(EC),
- make_error<RawError>(raw_error_code::corrupt_file,
- "Expected name map name"));
-
- Stream.setOffset(OldOffset);
- // Add this to a string-map from name to stream number.
- Mapping.insert({Str, NameIndex});
- }
-
- return Error::success();
-}
-
-iterator_range<StringMapConstIterator<uint32_t>> NameMap::entries() const {
- return make_range<StringMapConstIterator<uint32_t>>(Mapping.begin(),
- Mapping.end());
-}
-
-bool NameMap::tryGetValue(StringRef Name, uint32_t &Value) const {
- auto Iter = Mapping.find(Name);
- if (Iter == Mapping.end())
- return false;
- Value = Iter->second;
- return true;
-}
diff --git a/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp b/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp
deleted file mode 100644
index f570d5931b0f..000000000000
--- a/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-//===- NameMapBuilder.cpp - PDB Name Map Builder ----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/MSF/StreamWriter.h"
-#include "llvm/DebugInfo/PDB/Raw/NameMap.h"
-#include "llvm/DebugInfo/PDB/Raw/NameMapBuilder.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <algorithm>
-#include <cstdint>
-
-using namespace llvm;
-using namespace llvm::pdb;
-
-NameMapBuilder::NameMapBuilder() = default;
-
-void NameMapBuilder::addMapping(StringRef Name, uint32_t Mapping) {
- StringDataBytes += Name.size() + 1;
- Map.insert({Name, Mapping});
-}
-
-Expected<std::unique_ptr<NameMap>> NameMapBuilder::build() {
- auto Result = llvm::make_unique<NameMap>();
- Result->Mapping = Map;
- return std::move(Result);
-}
-
-uint32_t NameMapBuilder::calculateSerializedLength() const {
- uint32_t TotalLength = 0;
-
- TotalLength += sizeof(support::ulittle32_t); // StringDataBytes value
- TotalLength += StringDataBytes; // actual string data
-
- TotalLength += sizeof(support::ulittle32_t); // Hash Size
- TotalLength += sizeof(support::ulittle32_t); // Max Number of Strings
- TotalLength += sizeof(support::ulittle32_t); // Num Present Words
- // One bitmask word for each present entry
- TotalLength += Map.size() * sizeof(support::ulittle32_t);
- TotalLength += sizeof(support::ulittle32_t); // Num Deleted Words
-
- // For each present word, which we are treating as equivalent to the number of
- // entries in the table, we have a pair of integers. An offset into the
- // string data, and a corresponding stream number.
- TotalLength += Map.size() * 2 * sizeof(support::ulittle32_t);
-
- return TotalLength;
-}
-
-Error NameMapBuilder::commit(msf::StreamWriter &Writer) const {
- // The first field is the number of bytes of string data. So add
- // up the length of all strings plus a null terminator for each
- // one.
- uint32_t NumBytes = 0;
- for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
- NumBytes += B->getKeyLength() + 1;
- }
-
- if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data
- return EC;
- // Now all of the string data itself.
- for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
- if (auto EC = Writer.writeZeroString(B->getKey()))
- return EC;
- }
-
- if (auto EC = Writer.writeInteger(Map.size())) // Hash Size
- return EC;
-
- if (auto EC = Writer.writeInteger(Map.size())) // Max Number of Strings
- return EC;
-
- if (auto EC = Writer.writeInteger(Map.size())) // Num Present Words
- return EC;
-
- // For each entry in the mapping, write a bit mask which represents a bucket
- // to store it in. We don't use this, so the value we write isn't important
- // to us, it just has to be there.
- for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
- if (auto EC = Writer.writeInteger(1U))
- return EC;
- }
-
- if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words
- return EC;
-
- // Mappings of each word.
- uint32_t OffsetSoFar = 0;
- for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
- // This is a list of key value pairs where the key is the offset into the
- // strings buffer, and the value is a stream number. Write each pair.
- if (auto EC = Writer.writeInteger(OffsetSoFar))
- return EC;
-
- if (auto EC = Writer.writeInteger(B->second))
- return EC;
-
- OffsetSoFar += B->getKeyLength() + 1;
- }
-
- return Error::success();
-}
diff --git a/lib/DebugInfo/PDB/Raw/RawSession.cpp b/lib/DebugInfo/PDB/Raw/RawSession.cpp
deleted file mode 100644
index cd3a2064c717..000000000000
--- a/lib/DebugInfo/PDB/Raw/RawSession.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//===- RawSession.cpp - Raw implementation of IPDBSession -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/MSF/ByteStream.h"
-#include "llvm/DebugInfo/PDB/GenericError.h"
-#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
-#include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
-#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-#include "llvm/DebugInfo/PDB/Raw/RawSession.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
-#include <memory>
-
-using namespace llvm;
-using namespace llvm::msf;
-using namespace llvm::pdb;
-
-RawSession::RawSession(std::unique_ptr<PDBFile> PdbFile,
- std::unique_ptr<BumpPtrAllocator> Allocator)
- : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {}
-
-RawSession::~RawSession() = default;
-
-Error RawSession::createFromPdb(StringRef Path,
- std::unique_ptr<IPDBSession> &Session) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
- MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
- /*RequiresNullTerminator=*/false);
- if (!ErrorOrBuffer)
- return make_error<GenericError>(generic_error_code::invalid_path);
-
- std::unique_ptr<MemoryBuffer> Buffer = std::move(*ErrorOrBuffer);
- auto Stream = llvm::make_unique<MemoryBufferByteStream>(std::move(Buffer));
-
- auto Allocator = llvm::make_unique<BumpPtrAllocator>();
- auto File = llvm::make_unique<PDBFile>(std::move(Stream), *Allocator);
- if (auto EC = File->parseFileHeaders())
- return EC;
- if (auto EC = File->parseStreamData())
- return EC;
-
- Session =
- llvm::make_unique<RawSession>(std::move(File), std::move(Allocator));
-
- return Error::success();
-}
-
-Error RawSession::createFromExe(StringRef Path,
- std::unique_ptr<IPDBSession> &Session) {
- return make_error<RawError>(raw_error_code::feature_unsupported);
-}
-
-uint64_t RawSession::getLoadAddress() const { return 0; }
-
-void RawSession::setLoadAddress(uint64_t Address) {}
-
-std::unique_ptr<PDBSymbolExe> RawSession::getGlobalScope() const {
- return nullptr;
-}
-
-std::unique_ptr<PDBSymbol> RawSession::getSymbolById(uint32_t SymbolId) const {
- return nullptr;
-}
-
-std::unique_ptr<PDBSymbol>
-RawSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBEnumLineNumbers>
-RawSession::findLineNumbers(const PDBSymbolCompiland &Compiland,
- const IPDBSourceFile &File) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBEnumLineNumbers>
-RawSession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBEnumSourceFiles>
-RawSession::findSourceFiles(const PDBSymbolCompiland *Compiland,
- StringRef Pattern,
- PDB_NameSearchFlags Flags) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBSourceFile>
-RawSession::findOneSourceFile(const PDBSymbolCompiland *Compiland,
- StringRef Pattern,
- PDB_NameSearchFlags Flags) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>>
-RawSession::findCompilandsForSourceFile(StringRef Pattern,
- PDB_NameSearchFlags Flags) const {
- return nullptr;
-}
-
-std::unique_ptr<PDBSymbolCompiland>
-RawSession::findOneCompilandForSourceFile(StringRef Pattern,
- PDB_NameSearchFlags Flags) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBEnumSourceFiles> RawSession::getAllSourceFiles() const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBEnumSourceFiles> RawSession::getSourceFilesForCompiland(
- const PDBSymbolCompiland &Compiland) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBSourceFile>
-RawSession::getSourceFileById(uint32_t FileId) const {
- return nullptr;
-}
-
-std::unique_ptr<IPDBEnumDataStreams> RawSession::getDebugStreams() const {
- return nullptr;
-}
diff --git a/lib/DebugInfo/PDB/UDTLayout.cpp b/lib/DebugInfo/PDB/UDTLayout.cpp
new file mode 100644
index 000000000000..61cef093d4ce
--- /dev/null
+++ b/lib/DebugInfo/PDB/UDTLayout.cpp
@@ -0,0 +1,335 @@
+//===- UDTLayout.cpp --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/UDTLayout.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/DebugInfo/PDB/PDBSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypePointer.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
+#include "llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h"
+
+#include <utility>
+
+using namespace llvm;
+using namespace llvm::pdb;
+
+static std::unique_ptr<PDBSymbol> getSymbolType(const PDBSymbol &Symbol) {
+ const IPDBSession &Session = Symbol.getSession();
+ const IPDBRawSymbol &RawSymbol = Symbol.getRawSymbol();
+ uint32_t TypeId = RawSymbol.getTypeId();
+ return Session.getSymbolById(TypeId);
+}
+
+static uint32_t getTypeLength(const PDBSymbol &Symbol) {
+ auto SymbolType = getSymbolType(Symbol);
+ const IPDBRawSymbol &RawType = SymbolType->getRawSymbol();
+
+ return RawType.getLength();
+}
+
+StorageItemBase::StorageItemBase(const UDTLayoutBase &Parent,
+ const PDBSymbol &Symbol,
+ const std::string &Name,
+ uint32_t OffsetInParent, uint32_t Size)
+ : Parent(Parent), Symbol(Symbol), Name(Name),
+ OffsetInParent(OffsetInParent), SizeOf(Size) {
+ UsedBytes.resize(SizeOf, true);
+}
+
+uint32_t StorageItemBase::deepPaddingSize() const {
+ // sizeof(Field) - sizeof(typeof(Field)) is trailing padding.
+ return SizeOf - getTypeLength(Symbol);
+}
+
+DataMemberLayoutItem::DataMemberLayoutItem(
+ const UDTLayoutBase &Parent, std::unique_ptr<PDBSymbolData> DataMember)
+ : StorageItemBase(Parent, *DataMember, DataMember->getName(),
+ DataMember->getOffset(), getTypeLength(*DataMember)),
+ DataMember(std::move(DataMember)) {
+ auto Type = this->DataMember->getType();
+ if (auto UDT = unique_dyn_cast<PDBSymbolTypeUDT>(Type)) {
+ // UDT data members might have padding in between fields, but otherwise
+ // a member should occupy its entire storage.
+ UsedBytes.resize(SizeOf, false);
+ UdtLayout = llvm::make_unique<ClassLayout>(std::move(UDT));
+ }
+}
+
+const PDBSymbolData &DataMemberLayoutItem::getDataMember() {
+ return *dyn_cast<PDBSymbolData>(&Symbol);
+}
+
+bool DataMemberLayoutItem::hasUDTLayout() const { return UdtLayout != nullptr; }
+
+const ClassLayout &DataMemberLayoutItem::getUDTLayout() const {
+ return *UdtLayout;
+}
+
+uint32_t DataMemberLayoutItem::deepPaddingSize() const {
+ uint32_t Result = StorageItemBase::deepPaddingSize();
+ if (UdtLayout)
+ Result += UdtLayout->deepPaddingSize();
+ return Result;
+}
+
+VTableLayoutItem::VTableLayoutItem(const UDTLayoutBase &Parent,
+ std::unique_ptr<PDBSymbolTypeVTable> VTable)
+ : StorageItemBase(Parent, *VTable, "<vtbl>", 0, getTypeLength(*VTable)),
+ VTable(std::move(VTable)) {
+ auto VTableType = cast<PDBSymbolTypePointer>(this->VTable->getType());
+ ElementSize = VTableType->getLength();
+
+ Shape =
+ unique_dyn_cast<PDBSymbolTypeVTableShape>(VTableType->getPointeeType());
+ if (Shape)
+ VTableFuncs.resize(Shape->getCount());
+}
+
+UDTLayoutBase::UDTLayoutBase(const PDBSymbol &Symbol, const std::string &Name,
+ uint32_t Size)
+ : SymbolBase(Symbol), Name(Name), SizeOf(Size) {
+ UsedBytes.resize(Size);
+ ChildrenPerByte.resize(Size);
+ initializeChildren(Symbol);
+}
+
+ClassLayout::ClassLayout(const PDBSymbolTypeUDT &UDT)
+ : UDTLayoutBase(UDT, UDT.getName(), UDT.getLength()), UDT(UDT) {}
+
+ClassLayout::ClassLayout(std::unique_ptr<PDBSymbolTypeUDT> UDT)
+ : ClassLayout(*UDT) {
+ OwnedStorage = std::move(UDT);
+}
+
+BaseClassLayout::BaseClassLayout(const UDTLayoutBase &Parent,
+ std::unique_ptr<PDBSymbolTypeBaseClass> Base)
+ : UDTLayoutBase(*Base, Base->getName(), Base->getLength()),
+ StorageItemBase(Parent, *Base, Base->getName(), Base->getOffset(),
+ Base->getLength()),
+ Base(std::move(Base)) {
+ IsVirtualBase = this->Base->isVirtualBaseClass();
+}
+
+uint32_t UDTLayoutBase::shallowPaddingSize() const {
+ return UsedBytes.size() - UsedBytes.count();
+}
+
+uint32_t UDTLayoutBase::deepPaddingSize() const {
+ uint32_t Result = shallowPaddingSize();
+ for (auto &Child : ChildStorage)
+ Result += Child->deepPaddingSize();
+ return Result;
+}
+
+void UDTLayoutBase::initializeChildren(const PDBSymbol &Sym) {
+ // Handled bases first, followed by VTables, followed by data members,
+ // followed by functions, followed by other. This ordering is necessary
+ // so that bases and vtables get initialized before any functions which
+ // may override them.
+
+ UniquePtrVector<PDBSymbolTypeBaseClass> Bases;
+ UniquePtrVector<PDBSymbolTypeVTable> VTables;
+ UniquePtrVector<PDBSymbolData> Members;
+ auto Children = Sym.findAllChildren();
+ while (auto Child = Children->getNext()) {
+ if (auto Base = unique_dyn_cast<PDBSymbolTypeBaseClass>(Child)) {
+ if (Base->isVirtualBaseClass())
+ VirtualBases.push_back(std::move(Base));
+ else
+ Bases.push_back(std::move(Base));
+ }
+
+ else if (auto Data = unique_dyn_cast<PDBSymbolData>(Child)) {
+ if (Data->getDataKind() == PDB_DataKind::Member)
+ Members.push_back(std::move(Data));
+ else
+ Other.push_back(std::move(Child));
+ } else if (auto VT = unique_dyn_cast<PDBSymbolTypeVTable>(Child))
+ VTables.push_back(std::move(VT));
+ else if (auto Func = unique_dyn_cast<PDBSymbolFunc>(Child))
+ Funcs.push_back(std::move(Func));
+ else
+ Other.push_back(std::move(Child));
+ }
+
+ for (auto &Base : Bases) {
+ auto BL = llvm::make_unique<BaseClassLayout>(*this, std::move(Base));
+ BaseClasses.push_back(BL.get());
+
+ addChildToLayout(std::move(BL));
+ }
+
+ for (auto &VT : VTables) {
+ auto VTLayout = llvm::make_unique<VTableLayoutItem>(*this, std::move(VT));
+
+ VTable = VTLayout.get();
+
+ addChildToLayout(std::move(VTLayout));
+ continue;
+ }
+
+ for (auto &Data : Members) {
+ auto DM = llvm::make_unique<DataMemberLayoutItem>(*this, std::move(Data));
+
+ addChildToLayout(std::move(DM));
+ }
+
+ for (auto &Func : Funcs) {
+ if (!Func->isVirtual())
+ continue;
+
+ if (Func->isIntroVirtualFunction())
+ addVirtualIntro(*Func);
+ else
+ addVirtualOverride(*Func);
+ }
+}
+
+void UDTLayoutBase::addVirtualIntro(PDBSymbolFunc &Func) {
+ // Kind of a hack, but we prefer the more common destructor name that people
+ // are familiar with, e.g. ~ClassName. It seems there are always both and
+ // the vector deleting destructor overwrites the nice destructor, so just
+ // ignore the vector deleting destructor.
+ if (Func.getName() == "__vecDelDtor")
+ return;
+
+ if (!VTable) {
+ // FIXME: Handle this. What's most likely happening is we have an intro
+ // virtual in a derived class where the base also has an intro virtual.
+ // In this case the vtable lives in the base. What we really need is
+ // for each UDTLayoutBase to contain a list of all its vtables, and
+ // then propagate this list up the hierarchy so that derived classes have
+ // direct access to their bases' vtables.
+ return;
+ }
+
+ uint32_t Stride = VTable->getElementSize();
+
+ uint32_t Index = Func.getVirtualBaseOffset();
+ assert(Index % Stride == 0);
+ Index /= Stride;
+
+ VTable->setFunction(Index, Func);
+}
+
+VTableLayoutItem *UDTLayoutBase::findVTableAtOffset(uint32_t RelativeOffset) {
+ if (VTable && VTable->getOffsetInParent() == RelativeOffset)
+ return VTable;
+ for (auto Base : BaseClasses) {
+ uint32_t Begin = Base->getOffsetInParent();
+ uint32_t End = Begin + Base->getSize();
+ if (RelativeOffset < Begin || RelativeOffset >= End)
+ continue;
+
+ return Base->findVTableAtOffset(RelativeOffset - Begin);
+ }
+
+ return nullptr;
+}
+
+void UDTLayoutBase::addVirtualOverride(PDBSymbolFunc &Func) {
+ auto Signature = Func.getSignature();
+ auto ThisAdjust = Signature->getThisAdjust();
+ // ThisAdjust tells us which VTable we're looking for. Specifically, it's
+ // the offset into the current class of the VTable we're looking for. So
+ // look through the base hierarchy until we find one such that
+ // AbsoluteOffset(VT) == ThisAdjust
+ VTableLayoutItem *VT = findVTableAtOffset(ThisAdjust);
+ if (!VT) {
+ // FIXME: There really should be a vtable here. If there's not it probably
+ // means that the vtable is in a virtual base, which we don't yet support.
+ assert(!VirtualBases.empty());
+ return;
+ }
+ int32_t OverrideIndex = -1;
+ // Now we've found the VTable. Func will not have a virtual base offset set,
+ // so instead we need to compare names and signatures. We iterate each item
+ // in the VTable. All items should already have non null entries because they
+ // were initialized by the intro virtual, which was guaranteed to come before.
+ for (auto ItemAndIndex : enumerate(VT->funcs())) {
+ auto Item = ItemAndIndex.value();
+ assert(Item);
+ // If the name doesn't match, this isn't an override. Note that it's ok
+ // for the return type to not match (e.g. co-variant return).
+ if (Item->getName() != Func.getName()) {
+ if (Item->isDestructor() && Func.isDestructor()) {
+ OverrideIndex = ItemAndIndex.index();
+ break;
+ }
+ continue;
+ }
+ // Now make sure it's the right overload. Get the signature of the existing
+ // vtable method and make sure it has the same arglist and the same cv-ness.
+ auto ExistingSig = Item->getSignature();
+ if (ExistingSig->isConstType() != Signature->isConstType())
+ continue;
+ if (ExistingSig->isVolatileType() != Signature->isVolatileType())
+ continue;
+
+ // Now compare arguments. Using the raw bytes of the PDB this would be
+ // trivial
+ // because there is an ArgListId and they should be identical. But DIA
+ // doesn't
+ // expose this, so the best we can do is iterate each argument and confirm
+ // that
+ // each one is identical.
+ if (ExistingSig->getCount() != Signature->getCount())
+ continue;
+ bool IsMatch = true;
+ auto ExistingEnumerator = ExistingSig->getArguments();
+ auto NewEnumerator = Signature->getArguments();
+ for (uint32_t I = 0; I < ExistingEnumerator->getChildCount(); ++I) {
+ auto ExistingArg = ExistingEnumerator->getNext();
+ auto NewArg = NewEnumerator->getNext();
+ if (ExistingArg->getSymIndexId() != NewArg->getSymIndexId()) {
+ IsMatch = false;
+ break;
+ }
+ }
+ if (!IsMatch)
+ continue;
+
+ // It's a match! Stick the new function into the VTable.
+ OverrideIndex = ItemAndIndex.index();
+ break;
+ }
+ if (OverrideIndex == -1) {
+ // FIXME: This is probably due to one of the other FIXMEs in this file.
+ return;
+ }
+ VT->setFunction(OverrideIndex, Func);
+}
+
+void UDTLayoutBase::addChildToLayout(std::unique_ptr<StorageItemBase> Child) {
+ uint32_t Begin = Child->getOffsetInParent();
+ uint32_t End = Begin + Child->getSize();
+ // Due to the empty base optimization, End might point outside the bounds of
+ // the parent class. If that happens, just clamp the value.
+ End = std::min(End, getClassSize());
+
+ UsedBytes.set(Begin, End);
+ while (Begin != End) {
+ ChildrenPerByte[Begin].push_back(Child.get());
+ ++Begin;
+ }
+
+ auto Loc = std::upper_bound(
+ ChildStorage.begin(), ChildStorage.end(), Begin,
+ [](uint32_t Off, const std::unique_ptr<StorageItemBase> &Item) {
+ return Off < Item->getOffsetInParent();
+ });
+
+ ChildStorage.insert(Loc, std::move(Child));
+} \ No newline at end of file
diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp
index be5c603a38ef..c1e2536d6e20 100644
--- a/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -78,8 +78,18 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
std::string Filename = Info.FileName;
if (Filename == kDILineInfoBadString)
Filename = kBadString;
- OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
- printContext(Filename, Info.Line);
+ if (!Verbose) {
+ OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+ printContext(Filename, Info.Line);
+ return;
+ }
+ OS << " Filename: " << Filename << "\n";
+ if (Info.StartLine)
+ OS << "Function start line: " << Info.StartLine << "\n";
+ OS << " Line: " << Info.Line << "\n";
+ OS << " Column: " << Info.Column << "\n";
+ if (Info.Discriminator)
+ OS << " Discriminator: " << Info.Discriminator << "\n";
}
DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) {
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index f6940080089f..f672680cb9ea 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- SymbolizableObjectFile.cpp ----------------------------------------===//
+//===- SymbolizableObjectFile.cpp -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,15 +12,29 @@
//===----------------------------------------------------------------------===//
#include "SymbolizableObjectFile.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/DataExtractor.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-
-namespace llvm {
-namespace symbolize {
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+using namespace llvm;
using namespace object;
+using namespace symbolize;
static DILineInfoSpecifier
getDILineInfoSpecifier(FunctionNameKind FNKind) {
@@ -73,14 +87,17 @@ SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
: Module(Obj), DebugInfoContext(std::move(DICtx)) {}
namespace {
+
struct OffsetNamePair {
uint32_t Offset;
StringRef Name;
+
bool operator<(const OffsetNamePair &R) const {
return Offset < R.Offset;
}
};
-}
+
+} // end anonymous namespace
std::error_code SymbolizableObjectFile::addCoffExportSymbols(
const COFFObjectFile *CoffObj) {
@@ -147,7 +164,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
return errorToErrorCode(SymbolNameOrErr.takeError());
StringRef SymbolName = *SymbolNameOrErr;
// Mach-O symbol table names have leading underscore, skip it.
- if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
+ if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
SymbolName = SymbolName.drop_front();
// FIXME: If a function has alias, there are two entries in symbol table
// with same address size. Make sure we choose the correct one.
@@ -252,7 +269,3 @@ DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const {
Res.Size);
return Res;
}
-
-} // namespace symbolize
-} // namespace llvm
-
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
index 8583b6a36e63..216cca8de4f5 100644
--- a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -1,4 +1,4 @@
-//===-- SymbolizableObjectFile.h -------------------------------- C++ -----===//
+//===- SymbolizableObjectFile.h ---------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,14 +13,20 @@
#ifndef LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
#define LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/Support/ErrorOr.h"
+#include <cstdint>
#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
namespace llvm {
+
class DataExtractor;
-}
-namespace llvm {
namespace symbolize {
class SymbolizableObjectFile : public SymbolizableModule {
@@ -65,6 +71,7 @@ private:
// If size is 0, assume that symbol occupies the whole memory range up to
// the following symbol.
uint64_t Size;
+
friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
return s1.Addr < s2.Addr;
}
@@ -76,7 +83,8 @@ private:
std::unique_ptr<DIContext> DICtx);
};
-} // namespace symbolize
-} // namespace llvm
+} // end namespace symbolize
+
+} // end namespace llvm
-#endif // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+#endif // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
diff --git a/lib/Demangle/ItaniumDemangle.cpp b/lib/Demangle/ItaniumDemangle.cpp
index 097b6ca2e083..49dbe74d25df 100644
--- a/lib/Demangle/ItaniumDemangle.cpp
+++ b/lib/Demangle/ItaniumDemangle.cpp
@@ -36,6 +36,12 @@ enum {
success
};
+enum {
+ CV_const = (1 << 0),
+ CV_volatile = (1 << 1),
+ CV_restrict = (1 << 2),
+};
+
template <class C>
static const char *parse_type(const char *first, const char *last, C &db);
template <class C>
@@ -436,15 +442,15 @@ static const char *parse_cv_qualifiers(const char *first, const char *last,
cv = 0;
if (first != last) {
if (*first == 'r') {
- cv |= 4;
+ cv |= CV_restrict;
++first;
}
if (*first == 'V') {
- cv |= 2;
+ cv |= CV_volatile;
++first;
}
if (*first == 'K') {
- cv |= 1;
+ cv |= CV_const;
++first;
}
}
@@ -1396,7 +1402,8 @@ static const char *parse_function_type(const char *first, const char *last,
int ref_qual = 0;
while (true) {
if (t == last) {
- db.names.pop_back();
+ if (!db.names.empty())
+ db.names.pop_back();
return first;
}
if (*t == 'E') {
@@ -1663,27 +1670,30 @@ static const char *parse_type(const char *first, const char *last, C &db) {
db.subs.emplace_back();
for (size_t k = k0; k < k1; ++k) {
if (is_function) {
- size_t p = db.names[k].second.size();
- if (db.names[k].second[p - 2] == '&')
- p -= 3;
- else if (db.names[k].second.back() == '&')
+ auto &name = db.names[k].second;
+ size_t p = name.size();
+
+ if (name[p - 2] == '&' && name[p - 1] == '&')
p -= 2;
- if (cv & 1) {
- db.names[k].second.insert(p, " const");
+ else if (name.back() == '&')
+ p -= 1;
+
+ if (cv & CV_const) {
+ name.insert(p, " const");
p += 6;
}
- if (cv & 2) {
- db.names[k].second.insert(p, " volatile");
+ if (cv & CV_volatile) {
+ name.insert(p, " volatile");
p += 9;
}
- if (cv & 4)
- db.names[k].second.insert(p, " restrict");
+ if (cv & CV_restrict)
+ name.insert(p, " restrict");
} else {
- if (cv & 1)
+ if (cv & CV_const)
db.names[k].first.append(" const");
- if (cv & 2)
+ if (cv & CV_volatile)
db.names[k].first.append(" volatile");
- if (cv & 4)
+ if (cv & CV_restrict)
db.names[k].first.append(" restrict");
}
db.subs.back().push_back(db.names[k]);
@@ -3826,6 +3836,8 @@ static const char *parse_call_offset(const char *first, const char *last) {
// ::= GV <object name> # Guard variable for one-time
// initialization
// # No <type>
+// ::= TW <object name> # Thread-local wrapper
+// ::= TH <object name> # Thread-local initialization
// extension ::= TC <first type> <number> _ <second type> # construction
// vtable for second-in-first
// extension ::= GR <object name> # reference temporary for object
@@ -3919,6 +3931,27 @@ static const char *parse_special_name(const char *first, const char *last,
}
}
break;
+ case 'W':
+ // TW <object name> # Thread-local wrapper
+ t = parse_name(first + 2, last, db);
+ if (t != first + 2) {
+ if (db.names.empty())
+ return first;
+ db.names.back().first.insert(0, "thread-local wrapper routine for ");
+ first = t;
+ }
+ break;
+ case 'H':
+ // TH <object name> # Thread-local initialization
+ t = parse_name(first + 2, last, db);
+ if (t != first + 2) {
+ if (db.names.empty())
+ return first;
+ db.names.back().first.insert(
+ 0, "thread-local initialization routine for ");
+ first = t;
+ }
+ break;
default:
// T <call-offset> <base encoding>
{
@@ -4074,11 +4107,11 @@ static const char *parse_encoding(const char *first, const char *last, C &db) {
if (db.names.empty())
return first;
db.names.back().first += ')';
- if (cv & 1)
+ if (cv & CV_const)
db.names.back().first.append(" const");
- if (cv & 2)
+ if (cv & CV_volatile)
db.names.back().first.append(" volatile");
- if (cv & 4)
+ if (cv & CV_restrict)
db.names.back().first.append(" restrict");
if (ref == 1)
db.names.back().first.append(" &");
@@ -4225,20 +4258,11 @@ char *llvm::itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
*status = invalid_args;
return nullptr;
}
-
- size_t len = std::strlen(mangled_name);
- if (len < 2 || strncmp(mangled_name, "_Z", 2)) {
- if (len < 4 || strncmp(mangled_name, "___Z", 4)) {
- if (status)
- *status = invalid_mangled_name;
- return nullptr;
- }
- }
-
size_t internal_size = buf != nullptr ? *n : 0;
Db db;
db.template_param.emplace_back();
int internal_status = success;
+ size_t len = std::strlen(mangled_name);
demangle(mangled_name, mangled_name + len, db, internal_status);
if (internal_status == success && db.fix_forward_references &&
!db.template_param.empty() && !db.template_param.front().empty()) {
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index b4bed325f491..2ee72f9a8c16 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -515,7 +515,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
// to the function tells DynamicLibrary to load the program, not a library.
if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, ErrorStr))
return nullptr;
-
+
// If the user specified a memory manager but didn't specify which engine to
// create, we assume they only want the JIT, and we fail if they only want
// the interpreter.
@@ -616,7 +616,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
for (unsigned int i = 0; i < elemNum; ++i) {
Type *ElemTy = STy->getElementType(i);
if (ElemTy->isIntegerTy())
- Result.AggregateVal[i].IntVal =
+ Result.AggregateVal[i].IntVal =
APInt(ElemTy->getPrimitiveSizeInBits(), 0);
else if (ElemTy->isAggregateType()) {
const Constant *ElemUndef = UndefValue::get(ElemTy);
@@ -727,7 +727,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
APFloat apf = APFloat(APFloat::x87DoubleExtended(), GV.IntVal);
uint64_t v;
bool ignored;
- (void)apf.convertToInteger(&v, BitWidth,
+ (void)apf.convertToInteger(makeMutableArrayRef(v), BitWidth,
CE->getOpcode()==Instruction::FPToSI,
APFloat::rmTowardZero, &ignored);
GV.IntVal = v; // endian?
@@ -979,7 +979,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
// Check if vector holds integers.
if (ElemTy->isIntegerTy()) {
if (CAZ) {
- GenericValue intZero;
+ GenericValue intZero;
intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull);
std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
intZero);
@@ -1079,7 +1079,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
*(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal;
if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) {
unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8;
- StoreIntToMemory(Val.AggregateVal[i].IntVal,
+ StoreIntToMemory(Val.AggregateVal[i].IntVal,
(uint8_t*)Ptr + numOfBytes*i, numOfBytes);
}
}
@@ -1186,7 +1186,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
DEBUG(Init->dump());
if (isa<UndefValue>(Init))
return;
-
+
if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
unsigned ElementSize =
getDataLayout().getTypeAllocSize(CP->getType()->getElementType());
@@ -1194,12 +1194,12 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
return;
}
-
+
if (isa<ConstantAggregateZero>(Init)) {
memset(Addr, 0, (size_t)getDataLayout().getTypeAllocSize(Init->getType()));
return;
}
-
+
if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
unsigned ElementSize =
getDataLayout().getTypeAllocSize(CPA->getType()->getElementType());
@@ -1207,7 +1207,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
return;
}
-
+
if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
const StructLayout *SL =
getDataLayout().getStructLayout(cast<StructType>(CPS->getType()));
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 1d7c6e714ed0..e956dbebaffe 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -188,7 +188,7 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
for (auto &F : *Mod) {
auto Attrs = F.getAttributes();
StringRef Value(options.NoFramePointerElim ? "true" : "false");
- Attrs = Attrs.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
+ Attrs = Attrs.addAttribute(F.getContext(), AttributeList::FunctionIndex,
"no-frame-pointer-elim", Value);
F.setAttributes(Attrs);
}
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
index 3b8c4b973e68..e6c33b2ecc2a 100644
--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -4,7 +4,7 @@ if( HAVE_LIBDL )
set(LLVM_INTEL_JIT_LIBS ${CMAKE_DL_LIBS})
endif()
-set(LLVM_INTEL_JIT_LIBS ${PTHREAD_LIB} ${LLVM_INTEL_JIT_LIBS})
+set(LLVM_INTEL_JIT_LIBS ${LLVM_PTHREAD_LIB} ${LLVM_INTEL_JIT_LIBS})
add_llvm_library(LLVMIntelJITEvents
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 923f6e7147db..e29e9fc2c702 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -899,10 +899,10 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
// Check to see if any of the cases match...
BasicBlock *Dest = nullptr;
- for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
- GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF);
+ for (auto Case : I.cases()) {
+ GenericValue CaseVal = getOperandValue(Case.getCaseValue(), SF);
if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
- Dest = cast<BasicBlock>(i.getCaseSuccessor());
+ Dest = cast<BasicBlock>(Case.getCaseSuccessor());
break;
}
}
diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt
index 685e882e4a83..f83e002c758f 100644
--- a/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMOrcJIT
OrcCBindings.cpp
OrcError.cpp
OrcMCJITReplacement.cpp
+ RPCUtils.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index a74fae775ac4..a79dd844bf4f 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -16,7 +16,7 @@
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Error.h"
@@ -30,7 +30,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
class OrcCBindingsStack {
public:
typedef orc::JITCompileCallbackManager CompileCallbackMgr;
- typedef orc::ObjectLinkingLayer<> ObjLayerT;
+ typedef orc::RTDyldObjectLinkingLayer<> ObjLayerT;
typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
typedef orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr>
CODLayerT;
diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp
index c531fe369920..9e70c4ac1dbf 100644
--- a/lib/ExecutionEngine/Orc/OrcError.cpp
+++ b/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -39,14 +39,19 @@ public:
return "Remote indirect stubs owner does not exist";
case OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse:
return "Remote indirect stubs owner Id already in use";
+ case OrcErrorCode::RPCConnectionClosed:
+ return "RPC connection closed";
+ case OrcErrorCode::RPCCouldNotNegotiateFunction:
+ return "Could not negotiate RPC function";
case OrcErrorCode::RPCResponseAbandoned:
return "RPC response abandoned";
case OrcErrorCode::UnexpectedRPCCall:
return "Unexpected RPC call";
case OrcErrorCode::UnexpectedRPCResponse:
return "Unexpected RPC response";
- case OrcErrorCode::UnknownRPCFunction:
- return "Unknown RPC function";
+ case OrcErrorCode::UnknownErrorCodeFromRemote:
+ return "Unknown error returned from remote RPC function "
+ "(Use StringError to get error message)";
}
llvm_unreachable("Unhandled error code");
}
@@ -58,10 +63,10 @@ static ManagedStatic<OrcErrorCategory> OrcErrCat;
namespace llvm {
namespace orc {
-Error orcError(OrcErrorCode ErrCode) {
+std::error_code orcError(OrcErrorCode ErrCode) {
typedef std::underlying_type<OrcErrorCode>::type UT;
- return errorCodeToError(
- std::error_code(static_cast<UT>(ErrCode), *OrcErrCat));
+ return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat);
}
+
}
}
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index af70960a1f92..a5100a56bcf1 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -24,7 +24,7 @@
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
#include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
-#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Mangler.h"
#include "llvm/Object/Archive.h"
@@ -315,7 +315,7 @@ private:
NotifyObjectLoadedT(OrcMCJITReplacement &M) : M(M) {}
template <typename ObjListT>
- void operator()(ObjectLinkingLayerBase::ObjSetHandleT H,
+ void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H,
const ObjListT &Objects,
const LoadedObjInfoListT &Infos) const {
M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad);
@@ -344,7 +344,7 @@ private:
public:
NotifyFinalizedT(OrcMCJITReplacement &M) : M(M) {}
- void operator()(ObjectLinkingLayerBase::ObjSetHandleT H) {
+ void operator()(RTDyldObjectLinkingLayerBase::ObjSetHandleT H) {
M.UnfinalizedSections.erase(H);
}
@@ -361,7 +361,7 @@ private:
return MangledName;
}
- typedef ObjectLinkingLayer<NotifyObjectLoadedT> ObjectLayerT;
+ typedef RTDyldObjectLinkingLayer<NotifyObjectLoadedT> ObjectLayerT;
typedef IRCompileLayer<ObjectLayerT> CompileLayerT;
typedef LazyEmittingLayer<CompileLayerT> LazyEmitLayerT;
diff --git a/lib/ExecutionEngine/Orc/RPCUtils.cpp b/lib/ExecutionEngine/Orc/RPCUtils.cpp
new file mode 100644
index 000000000000..2a7ab5ca8180
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/RPCUtils.cpp
@@ -0,0 +1,55 @@
+//===--------------- RPCUtils.cpp - RPCUtils implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RPCUtils implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/RPCUtils.h"
+
+char llvm::orc::rpc::RPCFatalError::ID = 0;
+char llvm::orc::rpc::ConnectionClosed::ID = 0;
+char llvm::orc::rpc::ResponseAbandoned::ID = 0;
+char llvm::orc::rpc::CouldNotNegotiate::ID = 0;
+
+namespace llvm {
+namespace orc {
+namespace rpc {
+
+std::error_code ConnectionClosed::convertToErrorCode() const {
+ return orcError(OrcErrorCode::RPCConnectionClosed);
+}
+
+void ConnectionClosed::log(raw_ostream &OS) const {
+ OS << "RPC connection already closed";
+}
+
+std::error_code ResponseAbandoned::convertToErrorCode() const {
+ return orcError(OrcErrorCode::RPCResponseAbandoned);
+}
+
+void ResponseAbandoned::log(raw_ostream &OS) const {
+ OS << "RPC response abandoned";
+}
+
+CouldNotNegotiate::CouldNotNegotiate(std::string Signature)
+ : Signature(std::move(Signature)) {}
+
+std::error_code CouldNotNegotiate::convertToErrorCode() const {
+ return orcError(OrcErrorCode::RPCCouldNotNegotiateFunction);
+}
+
+void CouldNotNegotiate::log(raw_ostream &OS) const {
+ OS << "Could not negotiate RPC function " << Signature;
+}
+
+
+} // end namespace rpc
+} // end namespace orc
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 63b56f725209..df9d2ceba329 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -443,7 +443,7 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
SI != SE; ++SI) {
const SectionRef &Section = *SI;
- bool IsRequired = isRequiredForExecution(Section);
+ bool IsRequired = isRequiredForExecution(Section) || ProcessAllSections;
// Consider only the sections that are required to be loaded for execution
if (IsRequired) {
@@ -484,6 +484,14 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
}
}
+ // Compute Global Offset Table size. If it is not zero we
+ // also update alignment, which is equal to a size of a
+ // single GOT entry.
+ if (unsigned GotSize = computeGOTSize(Obj)) {
+ RWSectionSizes.push_back(GotSize);
+ RWDataAlign = std::max<uint32_t>(RWDataAlign, getGOTEntrySize());
+ }
+
// Compute the size of all common symbols
uint64_t CommonSize = 0;
uint32_t CommonAlign = 1;
@@ -518,6 +526,24 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
return Error::success();
}
+// compute GOT size
+unsigned RuntimeDyldImpl::computeGOTSize(const ObjectFile &Obj) {
+ size_t GotEntrySize = getGOTEntrySize();
+ if (!GotEntrySize)
+ return 0;
+
+ size_t GotSize = 0;
+ for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
+ SI != SE; ++SI) {
+
+ for (const RelocationRef &Reloc : SI->relocations())
+ if (relocationNeedsGot(Reloc))
+ GotSize += GotEntrySize;
+ }
+
+ return GotSize;
+}
+
// compute stub buffer size for the given section
unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj,
const SectionRef &Section) {
@@ -677,7 +703,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
unsigned PaddingSize = 0;
unsigned StubBufSize = 0;
- bool IsRequired = isRequiredForExecution(Section);
+ bool IsRequired = isRequiredForExecution(Section) || ProcessAllSections;
bool IsVirtual = Section.isVirtual();
bool IsZeroInit = isZeroInit(Section);
bool IsReadOnly = isReadOnlyData(Section);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 05615d3cc6cf..f780137d0874 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -272,6 +272,8 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
default:
llvm_unreachable("Relocation type not implemented yet!");
break;
+ case ELF::R_X86_64_NONE:
+ break;
case ELF::R_X86_64_64: {
support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) =
Value + Addend;
@@ -419,6 +421,18 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
// from bits 11:0 of X
or32AArch64Imm(TargetPtr, Value + Addend);
break;
+ case ELF::R_AARCH64_LDST8_ABS_LO12_NC:
+ // Operation: S + A
+ // Immediate goes in bits 21:10 of LD/ST instruction, taken
+ // from bits 11:0 of X
+ or32AArch64Imm(TargetPtr, getBits(Value + Addend, 0, 11));
+ break;
+ case ELF::R_AARCH64_LDST16_ABS_LO12_NC:
+ // Operation: S + A
+ // Immediate goes in bits 21:10 of LD/ST instruction, taken
+ // from bits 11:1 of X
+ or32AArch64Imm(TargetPtr, getBits(Value + Addend, 1, 11));
+ break;
case ELF::R_AARCH64_LDST32_ABS_LO12_NC:
// Operation: S + A
// Immediate goes in bits 21:10 of LD/ST instruction, taken
@@ -431,6 +445,12 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
// from bits 11:3 of X
or32AArch64Imm(TargetPtr, getBits(Value + Addend, 3, 11));
break;
+ case ELF::R_AARCH64_LDST128_ABS_LO12_NC:
+ // Operation: S + A
+ // Immediate goes in bits 21:10 of LD/ST instruction, taken
+ // from bits 11:4 of X
+ or32AArch64Imm(TargetPtr, getBits(Value + Addend, 4, 11));
+ break;
}
}
@@ -900,7 +920,7 @@ uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType,
}
// Sometimes we don't need to create thunk for a branch.
-// This typically happens when branch target is located
+// This typically happens when branch target is located
// in the same object file. In such case target is either
// a weak symbol or symbol in a different executable section.
// This function checks if branch target is located in the
@@ -941,6 +961,61 @@ bool RuntimeDyldELF::resolveAArch64ShortBranch(
return true;
}
+void RuntimeDyldELF::resolveAArch64Branch(unsigned SectionID,
+ const RelocationValueRef &Value,
+ relocation_iterator RelI,
+ StubMap &Stubs) {
+
+ DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
+ SectionEntry &Section = Sections[SectionID];
+
+ uint64_t Offset = RelI->getOffset();
+ unsigned RelType = RelI->getType();
+ // Look for an existing stub.
+ StubMap::const_iterator i = Stubs.find(Value);
+ if (i != Stubs.end()) {
+ resolveRelocation(Section, Offset,
+ (uint64_t)Section.getAddressWithOffset(i->second),
+ RelType, 0);
+ DEBUG(dbgs() << " Stub function found\n");
+ } else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) {
+ // Create a new stub function.
+ DEBUG(dbgs() << " Create a new stub function\n");
+ Stubs[Value] = Section.getStubOffset();
+ uint8_t *StubTargetAddr = createStubFunction(
+ Section.getAddressWithOffset(Section.getStubOffset()));
+
+ RelocationEntry REmovz_g3(SectionID, StubTargetAddr - Section.getAddress(),
+ ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
+ RelocationEntry REmovk_g2(SectionID,
+ StubTargetAddr - Section.getAddress() + 4,
+ ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
+ RelocationEntry REmovk_g1(SectionID,
+ StubTargetAddr - Section.getAddress() + 8,
+ ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
+ RelocationEntry REmovk_g0(SectionID,
+ StubTargetAddr - Section.getAddress() + 12,
+ ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
+
+ if (Value.SymbolName) {
+ addRelocationForSymbol(REmovz_g3, Value.SymbolName);
+ addRelocationForSymbol(REmovk_g2, Value.SymbolName);
+ addRelocationForSymbol(REmovk_g1, Value.SymbolName);
+ addRelocationForSymbol(REmovk_g0, Value.SymbolName);
+ } else {
+ addRelocationForSection(REmovz_g3, Value.SectionID);
+ addRelocationForSection(REmovk_g2, Value.SectionID);
+ addRelocationForSection(REmovk_g1, Value.SectionID);
+ addRelocationForSection(REmovk_g0, Value.SectionID);
+ }
+ resolveRelocation(Section, Offset,
+ reinterpret_cast<uint64_t>(Section.getAddressWithOffset(
+ Section.getStubOffset())),
+ RelType, 0);
+ Section.advanceStubOffset(getMaxStubSize());
+ }
+}
+
Expected<relocation_iterator>
RuntimeDyldELF::processRelocationRef(
unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
@@ -1035,55 +1110,22 @@ RuntimeDyldELF::processRelocationRef(
DEBUG(dbgs() << "\t\tSectionID: " << SectionID << " Offset: " << Offset
<< "\n");
- if ((Arch == Triple::aarch64 || Arch == Triple::aarch64_be) &&
- (RelType == ELF::R_AARCH64_CALL26 || RelType == ELF::R_AARCH64_JUMP26)) {
- // This is an AArch64 branch relocation, need to use a stub function.
- DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
- SectionEntry &Section = Sections[SectionID];
-
- // Look for an existing stub.
- StubMap::const_iterator i = Stubs.find(Value);
- if (i != Stubs.end()) {
- resolveRelocation(Section, Offset,
- (uint64_t)Section.getAddressWithOffset(i->second),
- RelType, 0);
- DEBUG(dbgs() << " Stub function found\n");
- } else if (!resolveAArch64ShortBranch(SectionID, RelI, Value)) {
- // Create a new stub function.
- DEBUG(dbgs() << " Create a new stub function\n");
- Stubs[Value] = Section.getStubOffset();
- uint8_t *StubTargetAddr = createStubFunction(
- Section.getAddressWithOffset(Section.getStubOffset()));
-
- RelocationEntry REmovz_g3(SectionID,
- StubTargetAddr - Section.getAddress(),
- ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
- RelocationEntry REmovk_g2(SectionID, StubTargetAddr -
- Section.getAddress() + 4,
- ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
- RelocationEntry REmovk_g1(SectionID, StubTargetAddr -
- Section.getAddress() + 8,
- ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
- RelocationEntry REmovk_g0(SectionID, StubTargetAddr -
- Section.getAddress() + 12,
- ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
-
- if (Value.SymbolName) {
- addRelocationForSymbol(REmovz_g3, Value.SymbolName);
- addRelocationForSymbol(REmovk_g2, Value.SymbolName);
- addRelocationForSymbol(REmovk_g1, Value.SymbolName);
- addRelocationForSymbol(REmovk_g0, Value.SymbolName);
- } else {
- addRelocationForSection(REmovz_g3, Value.SectionID);
- addRelocationForSection(REmovk_g2, Value.SectionID);
- addRelocationForSection(REmovk_g1, Value.SectionID);
- addRelocationForSection(REmovk_g0, Value.SectionID);
- }
- resolveRelocation(Section, Offset,
- reinterpret_cast<uint64_t>(Section.getAddressWithOffset(
- Section.getStubOffset())),
- RelType, 0);
- Section.advanceStubOffset(getMaxStubSize());
+ if ((Arch == Triple::aarch64 || Arch == Triple::aarch64_be)) {
+ if (RelType == ELF::R_AARCH64_CALL26 || RelType == ELF::R_AARCH64_JUMP26) {
+ resolveAArch64Branch(SectionID, Value, RelI, Stubs);
+ } else if (RelType == ELF::R_AARCH64_ADR_GOT_PAGE) {
+ // Craete new GOT entry or find existing one. If GOT entry is
+ // to be created, then we also emit ABS64 relocation for it.
+ uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_AARCH64_ABS64);
+ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
+ ELF::R_AARCH64_ADR_PREL_PG_HI21);
+
+ } else if (RelType == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
+ uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_AARCH64_ABS64);
+ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
+ ELF::R_AARCH64_LDST64_ABS_LO12_NC);
+ } else {
+ processSimpleRelocation(SectionID, Offset, RelType, Value);
}
} else if (Arch == Triple::arm) {
if (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL ||
@@ -1232,7 +1274,7 @@ RuntimeDyldELF::processRelocationRef(
if (i != GOTSymbolOffsets.end())
RE.SymOffset = i->second;
else {
- RE.SymOffset = allocateGOTEntries(SectionID, 1);
+ RE.SymOffset = allocateGOTEntries(1);
GOTSymbolOffsets[TargetName] = RE.SymOffset;
}
}
@@ -1489,14 +1531,15 @@ RuntimeDyldELF::processRelocationRef(
Section.advanceStubOffset(getMaxStubSize());
// Allocate a GOT Entry
- uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+ uint64_t GOTOffset = allocateGOTEntries(1);
// The load of the GOT address has an addend of -4
- resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
+ resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4,
+ ELF::R_X86_64_PC32);
// Fill in the value of the symbol we're targeting into the GOT
addRelocationForSymbol(
- computeGOTOffsetRE(SectionID, GOTOffset, 0, ELF::R_X86_64_64),
+ computeGOTOffsetRE(GOTOffset, 0, ELF::R_X86_64_64),
Value.SymbolName);
}
@@ -1511,11 +1554,13 @@ RuntimeDyldELF::processRelocationRef(
} else if (RelType == ELF::R_X86_64_GOTPCREL ||
RelType == ELF::R_X86_64_GOTPCRELX ||
RelType == ELF::R_X86_64_REX_GOTPCRELX) {
- uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
- resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend);
+ uint64_t GOTOffset = allocateGOTEntries(1);
+ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
+ ELF::R_X86_64_PC32);
// Fill in the value of the symbol we're targeting into the GOT
- RelocationEntry RE = computeGOTOffsetRE(SectionID, GOTOffset, Value.Offset, ELF::R_X86_64_64);
+ RelocationEntry RE =
+ computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_X86_64_64);
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
else
@@ -1573,9 +1618,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
return Result;
}
-uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no)
-{
- (void)SectionID; // The GOT Section is the same for all section in the object file
+uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned no) {
if (GOTSectionID == 0) {
GOTSectionID = Sections.size();
// Reserve a section id. We'll allocate the section later
@@ -1587,17 +1630,38 @@ uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no)
return StartOffset;
}
-void RuntimeDyldELF::resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset, uint64_t GOTOffset)
-{
+uint64_t RuntimeDyldELF::findOrAllocGOTEntry(const RelocationValueRef &Value,
+ unsigned GOTRelType) {
+ auto E = GOTOffsetMap.insert({Value, 0});
+ if (E.second) {
+ uint64_t GOTOffset = allocateGOTEntries(1);
+
+ // Create relocation for newly created GOT entry
+ RelocationEntry RE =
+ computeGOTOffsetRE(GOTOffset, Value.Offset, GOTRelType);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+
+ E.first->second = GOTOffset;
+ }
+
+ return E.first->second;
+}
+
+void RuntimeDyldELF::resolveGOTOffsetRelocation(unsigned SectionID,
+ uint64_t Offset,
+ uint64_t GOTOffset,
+ uint32_t Type) {
// Fill in the relative address of the GOT Entry into the stub
- RelocationEntry GOTRE(SectionID, Offset, ELF::R_X86_64_PC32, GOTOffset);
+ RelocationEntry GOTRE(SectionID, Offset, Type, GOTOffset);
addRelocationForSection(GOTRE, GOTSectionID);
}
-RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t GOTOffset, uint64_t SymbolOffset,
- uint32_t Type)
-{
- (void)SectionID; // The GOT Section is the same for all section in the object file
+RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(uint64_t GOTOffset,
+ uint64_t SymbolOffset,
+ uint32_t Type) {
return RelocationEntry(GOTSectionID, GOTOffset, Type, SymbolOffset);
}
@@ -1663,6 +1727,19 @@ bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const {
return Obj.isELF();
}
+bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const {
+ unsigned RelTy = R.getType();
+ if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
+ return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE ||
+ RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC;
+
+ if (Arch == Triple::x86_64)
+ return RelTy == ELF::R_X86_64_GOTPCREL ||
+ RelTy == ELF::R_X86_64_GOTPCRELX ||
+ RelTy == ELF::R_X86_64_REX_GOTPCRELX;
+ return false;
+}
+
bool RuntimeDyldELF::relocationNeedsStub(const RelocationRef &R) const {
if (Arch != Triple::x86_64)
return true; // Conservative answer
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index d1867d091fe2..498979705b77 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -43,6 +43,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
bool resolveAArch64ShortBranch(unsigned SectionID, relocation_iterator RelI,
const RelocationValueRef &Value);
+ void resolveAArch64Branch(unsigned SectionID, const RelocationValueRef &Value,
+ relocation_iterator RelI, StubMap &Stubs);
+
void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
uint32_t Value, uint32_t Type, int32_t Addend);
@@ -88,24 +91,26 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
ObjSectionToIDMap &LocalSections,
RelocationValueRef &Rel);
protected:
- size_t getGOTEntrySize();
+ size_t getGOTEntrySize() override;
private:
SectionEntry &getSection(unsigned SectionID) { return Sections[SectionID]; }
// Allocate no GOT entries for use in the given section.
- uint64_t allocateGOTEntries(unsigned SectionID, unsigned no);
+ uint64_t allocateGOTEntries(unsigned no);
+
+ // Find GOT entry corresponding to relocation or create new one.
+ uint64_t findOrAllocGOTEntry(const RelocationValueRef &Value,
+ unsigned GOTRelType);
// Resolve the relvative address of GOTOffset in Section ID and place
// it at the given Offset
void resolveGOTOffsetRelocation(unsigned SectionID, uint64_t Offset,
- uint64_t GOTOffset);
+ uint64_t GOTOffset, uint32_t Type);
// For a GOT entry referenced from SectionID, compute a relocation entry
// that will place the final resolved value in the GOT slot
- RelocationEntry computeGOTOffsetRE(unsigned SectionID,
- uint64_t GOTOffset,
- uint64_t SymbolOffset,
+ RelocationEntry computeGOTOffsetRE(uint64_t GOTOffset, uint64_t SymbolOffset,
unsigned Type);
// Compute the address in memory where we can find the placeholder
@@ -146,6 +151,10 @@ private:
SmallVector<SID, 2> UnregisteredEHFrameSections;
SmallVector<SID, 2> RegisteredEHFrameSections;
+ // Map between GOT relocation value and corresponding GOT offset
+ std::map<RelocationValueRef, uint64_t> GOTOffsetMap;
+
+ bool relocationNeedsGot(const RelocationRef &R) const override;
bool relocationNeedsStub(const RelocationRef &R) const override;
public:
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 279d0de2da76..f5cc883d98fd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -213,7 +213,7 @@ public:
}
};
-/// @brief Symbol info for RuntimeDyld.
+/// @brief Symbol info for RuntimeDyld.
class SymbolTableEntry {
public:
SymbolTableEntry()
@@ -426,6 +426,9 @@ protected:
uint64_t &RODataSize, uint32_t &RODataAlign,
uint64_t &RWDataSize, uint32_t &RWDataAlign);
+ // \brief Compute GOT size
+ unsigned computeGOTSize(const ObjectFile &Obj);
+
// \brief Compute the stub buffer size required for a section
unsigned computeSectionStubBufSize(const ObjectFile &Obj,
const SectionRef &Section);
@@ -433,6 +436,14 @@ protected:
// \brief Implementation of the generic part of the loadObject algorithm.
Expected<ObjSectionToIDMap> loadObjectImpl(const object::ObjectFile &Obj);
+ // \brief Return size of Global Offset Table (GOT) entry
+ virtual size_t getGOTEntrySize() { return 0; }
+
+ // \brief Return true if the relocation R may require allocating a GOT entry.
+ virtual bool relocationNeedsGot(const RelocationRef &R) const {
+ return false;
+ }
+
// \brief Return true if the relocation R may require allocating a stub.
virtual bool relocationNeedsStub(const RelocationRef &R) const {
return true; // Conservative answer
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index 70bd017bae6b..59cef04cdece 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -12,8 +12,9 @@ if( LLVM_USE_SANITIZE_COVERAGE )
FuzzerCrossOver.cpp
FuzzerDriver.cpp
FuzzerExtFunctionsDlsym.cpp
+ FuzzerExtFunctionsDlsymWin.cpp
FuzzerExtFunctionsWeak.cpp
- FuzzerExtFunctionsWeakAlias.cpp
+ FuzzerExtraCounters.cpp
FuzzerIO.cpp
FuzzerIOPosix.cpp
FuzzerIOWindows.cpp
@@ -21,6 +22,8 @@ if( LLVM_USE_SANITIZE_COVERAGE )
FuzzerMerge.cpp
FuzzerMutate.cpp
FuzzerSHA1.cpp
+ FuzzerShmemPosix.cpp
+ FuzzerShmemWindows.cpp
FuzzerTracePC.cpp
FuzzerTraceState.cpp
FuzzerUtil.cpp
@@ -32,12 +35,12 @@ if( LLVM_USE_SANITIZE_COVERAGE )
add_library(LLVMFuzzerNoMain STATIC
$<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
)
- target_link_libraries(LLVMFuzzerNoMain ${PTHREAD_LIB})
+ target_link_libraries(LLVMFuzzerNoMain ${LLVM_PTHREAD_LIB})
add_library(LLVMFuzzer STATIC
FuzzerMain.cpp
$<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
)
- target_link_libraries(LLVMFuzzer ${PTHREAD_LIB})
+ target_link_libraries(LLVMFuzzer ${LLVM_PTHREAD_LIB})
if( LLVM_INCLUDE_TESTS )
add_subdirectory(test)
diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h
index 468d5e5ddc70..0f0573994a03 100644
--- a/lib/Fuzzer/FuzzerCorpus.h
+++ b/lib/Fuzzer/FuzzerCorpus.h
@@ -37,8 +37,8 @@ struct InputInfo {
};
class InputCorpus {
+ static const size_t kFeatureSetSize = 1 << 21;
public:
- static const size_t kFeatureSetSize = 1 << 16;
InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) {
memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature));
memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature));
@@ -68,7 +68,8 @@ class InputCorpus {
}
bool empty() const { return Inputs.empty(); }
const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
- void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile = false) {
+ void AddToCorpus(const Unit &U, size_t NumFeatures,
+ bool MayDeleteFile = false) {
assert(!U.empty());
uint8_t Hash[kSHA1NumBytes];
if (FeatureDebug)
@@ -82,7 +83,7 @@ class InputCorpus {
II.MayDeleteFile = MayDeleteFile;
memcpy(II.Sha1, Hash, kSHA1NumBytes);
UpdateCorpusDistribution();
- ValidateFeatureSet();
+ // ValidateFeatureSet();
}
bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
@@ -97,7 +98,7 @@ class InputCorpus {
// Hypothesis: units added to the corpus last are more likely to be
// interesting. This function gives more weight to the more recent units.
size_t ChooseUnitIdxToMutate(Random &Rand) {
- size_t Idx = static_cast<size_t>(CorpusDistribution(Rand.Get_mt19937()));
+ size_t Idx = static_cast<size_t>(CorpusDistribution(Rand));
assert(Idx < Inputs.size());
return Idx;
}
@@ -132,7 +133,7 @@ class InputCorpus {
Printf("EVICTED %zd\n", Idx);
}
- bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
+ void AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) {
assert(NewSize);
Idx = Idx % kFeatureSetSize;
uint32_t OldSize = GetFeature(Idx);
@@ -144,23 +145,20 @@ class InputCorpus {
II.NumFeatures--;
if (II.NumFeatures == 0)
DeleteInput(OldIdx);
+ } else {
+ NumAddedFeatures++;
}
+ NumUpdatedFeatures++;
if (FeatureDebug)
Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize);
SmallestElementPerFeature[Idx] = Inputs.size();
InputSizesPerFeature[Idx] = NewSize;
CountingFeatures = true;
- return true;
}
- return false;
}
- size_t NumFeatures() const {
- size_t Res = 0;
- for (size_t i = 0; i < kFeatureSetSize; i++)
- Res += GetFeature(i) != 0;
- return Res;
- }
+ size_t NumFeatures() const { return NumAddedFeatures; }
+ size_t NumFeatureUpdates() const { return NumUpdatedFeatures; }
void ResetFeatureSet() {
assert(Inputs.empty());
@@ -213,6 +211,8 @@ private:
std::vector<InputInfo*> Inputs;
bool CountingFeatures = false;
+ size_t NumAddedFeatures = 0;
+ size_t NumUpdatedFeatures = 0;
uint32_t InputSizesPerFeature[kFeatureSetSize];
uint32_t SmallestElementPerFeature[kFeatureSetSize];
diff --git a/lib/Fuzzer/FuzzerDefs.h b/lib/Fuzzer/FuzzerDefs.h
index 0f5b8a7cf211..bd1827508002 100644
--- a/lib/Fuzzer/FuzzerDefs.h
+++ b/lib/Fuzzer/FuzzerDefs.h
@@ -47,8 +47,30 @@
#ifdef __clang__ // avoid gcc warning.
# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
+# define ALWAYS_INLINE __attribute__((always_inline))
#else
# define ATTRIBUTE_NO_SANITIZE_MEMORY
+# define ALWAYS_INLINE
+#endif // __clang__
+
+#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
+
+#if defined(__has_feature)
+# if __has_feature(address_sanitizer)
+# define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS
+# elif __has_feature(memory_sanitizer)
+# define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY
+# else
+# define ATTRIBUTE_NO_SANITIZE_ALL
+# endif
+#else
+# define ATTRIBUTE_NO_SANITIZE_ALL
+#endif
+
+#if LIBFUZZER_WINDOWS
+#define ATTRIBUTE_INTERFACE __declspec(dllexport)
+#else
+#define ATTRIBUTE_INTERFACE __attribute__((visibility("default")))
#endif
namespace fuzzer {
@@ -74,9 +96,10 @@ typedef int (*UserCallback)(const uint8_t *Data, size_t Size);
int FuzzerDriver(int *argc, char ***argv, UserCallback Callback);
-struct ScopedDoingMyOwnMemmem {
- ScopedDoingMyOwnMemmem();
- ~ScopedDoingMyOwnMemmem();
+struct ScopedDoingMyOwnMemOrStr {
+ ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr++; }
+ ~ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr--; }
+ static int DoingMyOwnMemOrStr;
};
inline uint8_t Bswap(uint8_t x) { return x; }
@@ -84,6 +107,10 @@ inline uint16_t Bswap(uint16_t x) { return __builtin_bswap16(x); }
inline uint32_t Bswap(uint32_t x) { return __builtin_bswap32(x); }
inline uint64_t Bswap(uint64_t x) { return __builtin_bswap64(x); }
+uint8_t *ExtraCountersBegin();
+uint8_t *ExtraCountersEnd();
+void ClearExtraCounters();
+
} // namespace fuzzer
#endif // LLVM_FUZZER_DEFS_H
diff --git a/lib/Fuzzer/FuzzerDictionary.h b/lib/Fuzzer/FuzzerDictionary.h
index eba0eabb6838..84cee87b8971 100644
--- a/lib/Fuzzer/FuzzerDictionary.h
+++ b/lib/Fuzzer/FuzzerDictionary.h
@@ -20,8 +20,9 @@
namespace fuzzer {
// A simple POD sized array of bytes.
-template <size_t kMaxSize> class FixedWord {
+template <size_t kMaxSizeT> class FixedWord {
public:
+ static const size_t kMaxSize = kMaxSizeT;
FixedWord() {}
FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
@@ -32,10 +33,12 @@ public:
}
bool operator==(const FixedWord<kMaxSize> &w) const {
+ ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str;
return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
}
bool operator<(const FixedWord<kMaxSize> &w) const {
+ ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str;
if (Size != w.Size)
return Size < w.Size;
return memcmp(Data, w.Data, Size) < 0;
@@ -50,7 +53,7 @@ private:
uint8_t Data[kMaxSize];
};
-typedef FixedWord<27> Word; // 28 bytes.
+typedef FixedWord<64> Word;
class DictionaryEntry {
public:
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 2bbcb25275e4..0fb83ca64de6 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -15,6 +15,7 @@
#include "FuzzerIO.h"
#include "FuzzerMutate.h"
#include "FuzzerRandom.h"
+#include "FuzzerShmem.h"
#include "FuzzerTracePC.h"
#include <algorithm>
#include <atomic>
@@ -277,7 +278,19 @@ static bool AllInputsAreFiles() {
return true;
}
-int MinimizeCrashInput(const std::vector<std::string> &Args) {
+static std::string GetDedupTokenFromFile(const std::string &Path) {
+ auto S = FileToString(Path);
+ auto Beg = S.find("DEDUP_TOKEN:");
+ if (Beg == std::string::npos)
+ return "";
+ auto End = S.find('\n', Beg);
+ if (End == std::string::npos)
+ return "";
+ return S.substr(Beg, End - Beg);
+}
+
+int MinimizeCrashInput(const std::vector<std::string> &Args,
+ const FuzzingOptions &Options) {
if (Inputs->size() != 1) {
Printf("ERROR: -minimize_crash should be given one input file\n");
exit(1);
@@ -294,19 +307,18 @@ int MinimizeCrashInput(const std::vector<std::string> &Args) {
"INFO: defaulting to -max_total_time=600\n");
BaseCmd += " -max_total_time=600";
}
- // BaseCmd += " > /dev/null 2>&1 ";
+
+ auto LogFilePath = DirPlusFile(
+ TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
+ auto LogFileRedirect = " > " + LogFilePath + " 2>&1 ";
std::string CurrentFilePath = InputFilePath;
while (true) {
Unit U = FileToVector(CurrentFilePath);
- if (U.size() < 2) {
- Printf("CRASH_MIN: '%s' is small enough\n", CurrentFilePath.c_str());
- return 0;
- }
Printf("CRASH_MIN: minimizing crash input: '%s' (%zd bytes)\n",
CurrentFilePath.c_str(), U.size());
- auto Cmd = BaseCmd + " " + CurrentFilePath;
+ auto Cmd = BaseCmd + " " + CurrentFilePath + LogFileRedirect;
Printf("CRASH_MIN: executing: %s\n", Cmd.c_str());
int ExitCode = ExecuteCommand(Cmd);
@@ -317,12 +329,19 @@ int MinimizeCrashInput(const std::vector<std::string> &Args) {
Printf("CRASH_MIN: '%s' (%zd bytes) caused a crash. Will try to minimize "
"it further\n",
CurrentFilePath.c_str(), U.size());
-
- std::string ArtifactPath = "minimized-from-" + Hash(U);
+ auto DedupToken1 = GetDedupTokenFromFile(LogFilePath);
+ if (!DedupToken1.empty())
+ Printf("CRASH_MIN: DedupToken1: %s\n", DedupToken1.c_str());
+
+ std::string ArtifactPath =
+ Flags.exact_artifact_path
+ ? Flags.exact_artifact_path
+ : Options.ArtifactPrefix + "minimized-from-" + Hash(U);
Cmd += " -minimize_crash_internal_step=1 -exact_artifact_path=" +
ArtifactPath;
Printf("CRASH_MIN: executing: %s\n", Cmd.c_str());
ExitCode = ExecuteCommand(Cmd);
+ CopyFileToErr(LogFilePath);
if (ExitCode == 0) {
if (Flags.exact_artifact_path) {
CurrentFilePath = Flags.exact_artifact_path;
@@ -330,11 +349,26 @@ int MinimizeCrashInput(const std::vector<std::string> &Args) {
}
Printf("CRASH_MIN: failed to minimize beyond %s (%d bytes), exiting\n",
CurrentFilePath.c_str(), U.size());
- return 0;
+ break;
}
+ auto DedupToken2 = GetDedupTokenFromFile(LogFilePath);
+ if (!DedupToken2.empty())
+ Printf("CRASH_MIN: DedupToken2: %s\n", DedupToken2.c_str());
+
+ if (DedupToken1 != DedupToken2) {
+ if (Flags.exact_artifact_path) {
+ CurrentFilePath = Flags.exact_artifact_path;
+ WriteToFile(U, CurrentFilePath);
+ }
+ Printf("CRASH_MIN: mismatch in dedup tokens"
+ " (looks like a different bug). Won't minimize further\n");
+ break;
+ }
+
CurrentFilePath = ArtifactPath;
- Printf("\n\n\n\n\n\n*********************************\n");
+ Printf("*********************************\n");
}
+ RemoveFile(LogFilePath);
return 0;
}
@@ -342,8 +376,11 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) {
assert(Inputs->size() == 1);
std::string InputFilePath = Inputs->at(0);
Unit U = FileToVector(InputFilePath);
- assert(U.size() > 2);
Printf("INFO: Starting MinimizeCrashInputInternalStep: %zd\n", U.size());
+ if (U.size() < 2) {
+ Printf("INFO: The input is small enough, exiting\n");
+ exit(0);
+ }
Corpus->AddToCorpus(U, 0);
F->SetMaxInputLen(U.size());
F->SetMaxMutationLen(U.size() - 1);
@@ -353,24 +390,94 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) {
return 0;
}
+int AnalyzeDictionary(Fuzzer *F, const std::vector<Unit>& Dict,
+ UnitVector& Corpus) {
+ Printf("Started dictionary minimization (up to %d tests)\n",
+ Dict.size() * Corpus.size() * 2);
+
+ // Scores and usage count for each dictionary unit.
+ std::vector<int> Scores(Dict.size());
+ std::vector<int> Usages(Dict.size());
+
+ std::vector<size_t> InitialFeatures;
+ std::vector<size_t> ModifiedFeatures;
+ for (auto &C : Corpus) {
+ // Get coverage for the testcase without modifications.
+ F->ExecuteCallback(C.data(), C.size());
+ InitialFeatures.clear();
+ TPC.CollectFeatures([&](size_t Feature) -> bool {
+ InitialFeatures.push_back(Feature);
+ return true;
+ });
+
+ for (size_t i = 0; i < Dict.size(); ++i) {
+ auto Data = C;
+ auto StartPos = std::search(Data.begin(), Data.end(),
+ Dict[i].begin(), Dict[i].end());
+ // Skip dictionary unit, if the testcase does not contain it.
+ if (StartPos == Data.end())
+ continue;
+
+ ++Usages[i];
+ while (StartPos != Data.end()) {
+ // Replace all occurrences of dictionary unit in the testcase.
+ auto EndPos = StartPos + Dict[i].size();
+ for (auto It = StartPos; It != EndPos; ++It)
+ *It ^= 0xFF;
+
+ StartPos = std::search(EndPos, Data.end(),
+ Dict[i].begin(), Dict[i].end());
+ }
+
+ // Get coverage for testcase with masked occurrences of dictionary unit.
+ F->ExecuteCallback(Data.data(), Data.size());
+ ModifiedFeatures.clear();
+ TPC.CollectFeatures([&](size_t Feature) -> bool {
+ ModifiedFeatures.push_back(Feature);
+ return true;
+ });
+
+ if (InitialFeatures == ModifiedFeatures)
+ --Scores[i];
+ else
+ Scores[i] += 2;
+ }
+ }
+
+ Printf("###### Useless dictionary elements. ######\n");
+ for (size_t i = 0; i < Dict.size(); ++i) {
+ // Dictionary units with positive score are treated as useful ones.
+ if (Scores[i] > 0)
+ continue;
+
+ Printf("\"");
+ PrintASCII(Dict[i].data(), Dict[i].size(), "\"");
+ Printf(" # Score: %d, Used: %d\n", Scores[i], Usages[i]);
+ }
+ Printf("###### End of useless dictionary elements. ######\n");
+ return 0;
+}
+
int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
using namespace fuzzer;
assert(argc && argv && "Argument pointers cannot be nullptr");
+ std::string Argv0((*argv)[0]);
EF = new ExternalFunctions();
if (EF->LLVMFuzzerInitialize)
EF->LLVMFuzzerInitialize(argc, argv);
const std::vector<std::string> Args(*argv, *argv + *argc);
assert(!Args.empty());
ProgName = new std::string(Args[0]);
+ if (Argv0 != *ProgName) {
+ Printf("ERROR: argv[0] has been modified in LLVMFuzzerInitialize\n");
+ exit(1);
+ }
ParseFlags(Args);
if (Flags.help) {
PrintHelp();
return 0;
}
- if (Flags.minimize_crash)
- return MinimizeCrashInput(Args);
-
if (Flags.close_fd_mask & 2)
DupAndCloseStderr();
if (Flags.close_fd_mask & 1)
@@ -401,7 +508,6 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.MutateDepth = Flags.mutate_depth;
Options.UseCounters = Flags.use_counters;
Options.UseIndirCalls = Flags.use_indir_calls;
- Options.UseMemcmp = Flags.use_memcmp;
Options.UseMemmem = Flags.use_memmem;
Options.UseCmp = Flags.use_cmp;
Options.UseValueProfile = Flags.use_value_profile;
@@ -471,9 +577,37 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
Options.HandleXfsz = Flags.handle_xfsz;
SetSignalHandler(Options);
+ if (Flags.minimize_crash)
+ return MinimizeCrashInput(Args, Options);
+
if (Flags.minimize_crash_internal_step)
return MinimizeCrashInputInternalStep(F, Corpus);
+ if (auto Name = Flags.run_equivalence_server) {
+ SMR.Destroy(Name);
+ if (!SMR.Create(Name)) {
+ Printf("ERROR: can't create shared memory region\n");
+ return 1;
+ }
+ Printf("INFO: EQUIVALENCE SERVER UP\n");
+ while (true) {
+ SMR.WaitClient();
+ size_t Size = SMR.ReadByteArraySize();
+ SMR.WriteByteArray(nullptr, 0);
+ F->RunOne(SMR.GetByteArray(), Size);
+ SMR.PostServer();
+ }
+ return 0;
+ }
+
+ if (auto Name = Flags.use_equivalence_server) {
+ if (!SMR.Open(Name)) {
+ Printf("ERROR: can't open shared memory region\n");
+ return 1;
+ }
+ Printf("INFO: EQUIVALENCE CLIENT UP\n");
+ }
+
if (DoPlainRun) {
Options.SaveArtifacts = false;
int Runs = std::max(1, Flags.runs);
@@ -499,14 +633,12 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
if (Flags.merge) {
if (Options.MaxLen == 0)
F->SetMaxInputLen(kMaxSaneLen);
- if (TPC.UsingTracePcGuard()) {
- if (Flags.merge_control_file)
- F->CrashResistantMergeInternalStep(Flags.merge_control_file);
- else
- F->CrashResistantMerge(Args, *Inputs);
- } else {
- F->Merge(*Inputs);
- }
+ if (Flags.merge_control_file)
+ F->CrashResistantMergeInternalStep(Flags.merge_control_file);
+ else
+ F->CrashResistantMerge(Args, *Inputs,
+ Flags.load_coverage_summary,
+ Flags.save_coverage_summary);
exit(0);
}
@@ -519,6 +651,19 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
TemporaryMaxLen, /*ExitOnError=*/false);
}
+ if (Flags.analyze_dict) {
+ if (Dictionary.empty() || Inputs->empty()) {
+ Printf("ERROR: can't analyze dict without dict and corpus provided\n");
+ return 1;
+ }
+ if (AnalyzeDictionary(F, Dictionary, InitialCorpus)) {
+ Printf("Dictionary analysis failed\n");
+ exit(1);
+ }
+ Printf("Dictionary analysis suceeded\n");
+ exit(0);
+ }
+
if (Options.MaxLen == 0) {
size_t MaxLen = 0;
for (auto &U : InitialCorpus)
@@ -536,7 +681,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
F->Loop();
if (Flags.verbosity)
- Printf("Done %d runs in %zd second(s)\n", F->getTotalNumberOfRuns(),
+ Printf("Done %zd runs in %zd second(s)\n", F->getTotalNumberOfRuns(),
F->secondsSinceProcessStartUp());
F->PrintFinalStats();
diff --git a/lib/Fuzzer/FuzzerExtFunctions.def b/lib/Fuzzer/FuzzerExtFunctions.def
index 61c72e4a209e..3bc5302c31c6 100644
--- a/lib/Fuzzer/FuzzerExtFunctions.def
+++ b/lib/Fuzzer/FuzzerExtFunctions.def
@@ -29,22 +29,18 @@ EXT_FUNC(LLVMFuzzerCustomCrossOver, size_t,
EXT_FUNC(__lsan_enable, void, (), false);
EXT_FUNC(__lsan_disable, void, (), false);
EXT_FUNC(__lsan_do_recoverable_leak_check, int, (), false);
-EXT_FUNC(__sanitizer_get_number_of_counters, size_t, (), false);
EXT_FUNC(__sanitizer_install_malloc_and_free_hooks, int,
(void (*malloc_hook)(const volatile void *, size_t),
void (*free_hook)(const volatile void *)),
false);
-EXT_FUNC(__sanitizer_get_total_unique_caller_callee_pairs, size_t, (), false);
-EXT_FUNC(__sanitizer_get_total_unique_coverage, size_t, (), true);
-EXT_FUNC(__sanitizer_print_memory_profile, int, (size_t), false);
+EXT_FUNC(__sanitizer_print_memory_profile, int, (size_t, size_t), false);
EXT_FUNC(__sanitizer_print_stack_trace, void, (), true);
EXT_FUNC(__sanitizer_symbolize_pc, void,
(void *, const char *fmt, char *out_buf, size_t out_buf_size), false);
EXT_FUNC(__sanitizer_get_module_and_offset_for_pc, int,
(void *pc, char *module_path,
size_t module_path_len,void **pc_offset), false);
-EXT_FUNC(__sanitizer_reset_coverage, void, (), true);
EXT_FUNC(__sanitizer_set_death_callback, void, (void (*)(void)), true);
EXT_FUNC(__sanitizer_set_report_fd, void, (void*), false);
-EXT_FUNC(__sanitizer_update_counter_bitset_and_clear_counters, uintptr_t,
- (uint8_t*), false);
+EXT_FUNC(__sanitizer_dump_coverage, void, (const uintptr_t *, uintptr_t),
+ false);
diff --git a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp
new file mode 100644
index 000000000000..77521698c80a
--- /dev/null
+++ b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp
@@ -0,0 +1,60 @@
+//===- FuzzerExtFunctionsDlsymWin.cpp - Interface to external functions ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Implementation using dynamic loading for Windows.
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_WINDOWS
+
+#include "FuzzerExtFunctions.h"
+#include "FuzzerIO.h"
+#include "Windows.h"
+#include "Psapi.h"
+
+namespace fuzzer {
+
+ExternalFunctions::ExternalFunctions() {
+ HMODULE Modules[1024];
+ DWORD BytesNeeded;
+ HANDLE CurrentProcess = GetCurrentProcess();
+
+ if (!EnumProcessModules(CurrentProcess, Modules, sizeof(Modules),
+ &BytesNeeded)) {
+ Printf("EnumProcessModules failed (error: %d).\n", GetLastError());
+ exit(1);
+ }
+
+ if (sizeof(Modules) < BytesNeeded) {
+ Printf("Error: the array is not big enough to hold all loaded modules.\n");
+ exit(1);
+ }
+
+ for (size_t i = 0; i < (BytesNeeded / sizeof(HMODULE)); i++)
+ {
+ FARPROC Fn;
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \
+ if (this->NAME == nullptr) { \
+ Fn = GetProcAddress(Modules[i], #NAME); \
+ if (Fn == nullptr) \
+ Fn = GetProcAddress(Modules[i], #NAME "__dll"); \
+ this->NAME = (decltype(ExternalFunctions::NAME)) Fn; \
+ }
+#include "FuzzerExtFunctions.def"
+#undef EXT_FUNC
+ }
+
+#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \
+ if (this->NAME == nullptr && WARN) \
+ Printf("WARNING: Failed to find function \"%s\".\n", #NAME);
+#include "FuzzerExtFunctions.def"
+#undef EXT_FUNC
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_WINDOWS
diff --git a/lib/Fuzzer/FuzzerExtraCounters.cpp b/lib/Fuzzer/FuzzerExtraCounters.cpp
new file mode 100644
index 000000000000..07dbe0fdee76
--- /dev/null
+++ b/lib/Fuzzer/FuzzerExtraCounters.cpp
@@ -0,0 +1,41 @@
+//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Extra coverage counters defined by user code.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerDefs.h"
+
+#if LIBFUZZER_LINUX
+__attribute__((weak)) extern uint8_t __start___libfuzzer_extra_counters;
+__attribute__((weak)) extern uint8_t __stop___libfuzzer_extra_counters;
+
+namespace fuzzer {
+uint8_t *ExtraCountersBegin() { return &__start___libfuzzer_extra_counters; }
+uint8_t *ExtraCountersEnd() { return &__stop___libfuzzer_extra_counters; }
+ATTRIBUTE_NO_SANITIZE_ALL
+void ClearExtraCounters() { // hand-written memset, don't asan-ify.
+ uintptr_t *Beg = reinterpret_cast<uintptr_t*>(ExtraCountersBegin());
+ uintptr_t *End = reinterpret_cast<uintptr_t*>(ExtraCountersEnd());
+ for (; Beg < End; Beg++) {
+ *Beg = 0;
+ __asm__ __volatile__("" : : : "memory");
+ }
+}
+
+} // namespace fuzzer
+
+#else
+// TODO: implement for other platforms.
+namespace fuzzer {
+uint8_t *ExtraCountersBegin() { return nullptr; }
+uint8_t *ExtraCountersEnd() { return nullptr; }
+void ClearExtraCounters() {}
+} // namespace fuzzer
+
+#endif
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 22aad353acec..28bf0ca8ce69 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -39,14 +39,19 @@ FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
"merged into the 1-st corpus. Only interesting units will be taken. "
"This flag can be used to minimize a corpus.")
FUZZER_FLAG_STRING(merge_control_file, "internal flag")
+FUZZER_FLAG_STRING(save_coverage_summary, "Experimental:"
+ " save coverage summary to a given file."
+ " Used with -merge=1")
+FUZZER_FLAG_STRING(load_coverage_summary, "Experimental:"
+ " load coverage summary from a given file."
+ " Treat this coverage as belonging to the first corpus. "
+ " Used with -merge=1")
FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided"
" crash input. Use with -runs=N or -max_total_time=N to limit "
"the number attempts")
FUZZER_FLAG_INT(minimize_crash_internal_step, 0, "internal flag")
FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters")
FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters")
-FUZZER_FLAG_INT(use_memcmp, 1,
- "Use hints from intercepting memcmp, strcmp, etc")
FUZZER_FLAG_INT(use_memmem, 1,
"Use hints from intercepting memmem, strstr, etc")
FUZZER_FLAG_INT(use_value_profile, 0,
@@ -85,7 +90,7 @@ FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information at exit."
FUZZER_FLAG_INT(dump_coverage, 0, "If 1, dump coverage information at exit."
" Experimental, only with trace-pc-guard")
FUZZER_FLAG_INT(handle_segv, 1, "If 1, try to intercept SIGSEGV.")
-FUZZER_FLAG_INT(handle_bus, 1, "If 1, try to intercept SIGSEGV.")
+FUZZER_FLAG_INT(handle_bus, 1, "If 1, try to intercept SIGBUS.")
FUZZER_FLAG_INT(handle_abrt, 1, "If 1, try to intercept SIGABRT.")
FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.")
FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.")
@@ -108,6 +113,10 @@ FUZZER_FLAG_STRING(exit_on_item, "Exit if an item with a given sha1 sum"
" was added to the corpus. "
"Used primarily for testing libFuzzer itself.")
+FUZZER_FLAG_STRING(run_equivalence_server, "Experimental")
+FUZZER_FLAG_STRING(use_equivalence_server, "Experimental")
+FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
+
FUZZER_DEPRECATED_FLAG(exit_on_first)
FUZZER_DEPRECATED_FLAG(save_minimized_corpus)
FUZZER_DEPRECATED_FLAG(sync_command)
diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp
index eda8e8772930..e3f609ed8a80 100644
--- a/lib/Fuzzer/FuzzerIO.cpp
+++ b/lib/Fuzzer/FuzzerIO.cpp
@@ -96,14 +96,15 @@ void DupAndCloseStderr() {
if (NewOutputFile) {
OutputFile = NewOutputFile;
if (EF->__sanitizer_set_report_fd)
- EF->__sanitizer_set_report_fd(reinterpret_cast<void *>(OutputFd));
- CloseFile(2);
+ EF->__sanitizer_set_report_fd(
+ reinterpret_cast<void *>(GetHandleFromFd(OutputFd)));
+ DiscardOutput(2);
}
}
}
void CloseStdout() {
- CloseFile(1);
+ DiscardOutput(1);
}
void Printf(const char *Fmt, ...) {
diff --git a/lib/Fuzzer/FuzzerIO.h b/lib/Fuzzer/FuzzerIO.h
index 15bfd3d34727..3b66a52d1a64 100644
--- a/lib/Fuzzer/FuzzerIO.h
+++ b/lib/Fuzzer/FuzzerIO.h
@@ -40,12 +40,17 @@ std::string DirName(const std::string &FileName);
// Returns path to a TmpDir.
std::string TmpDir();
+bool IsInterestingCoverageFile(const std::string &FileName);
+
void DupAndCloseStderr();
void CloseStdout();
void Printf(const char *Fmt, ...);
+// Print using raw syscalls, useful when printing at early init stages.
+void RawPrint(const char *Str);
+
// Platform specific functions:
bool IsFile(const std::string &Path);
@@ -62,6 +67,10 @@ int DuplicateFile(int Fd);
void RemoveFile(const std::string &Path);
+void DiscardOutput(int Fd);
+
+intptr_t GetHandleFromFd(int fd);
+
} // namespace fuzzer
#endif // LLVM_FUZZER_IO_H
diff --git a/lib/Fuzzer/FuzzerIOPosix.cpp b/lib/Fuzzer/FuzzerIOPosix.cpp
index 6d8edf6ff538..c5ebdbac467b 100644
--- a/lib/Fuzzer/FuzzerIOPosix.cpp
+++ b/lib/Fuzzer/FuzzerIOPosix.cpp
@@ -75,6 +75,18 @@ void RemoveFile(const std::string &Path) {
unlink(Path.c_str());
}
+void DiscardOutput(int Fd) {
+ FILE* Temp = fopen("/dev/null", "w");
+ if (!Temp)
+ return;
+ dup2(fileno(Temp), Fd);
+ fclose(Temp);
+}
+
+intptr_t GetHandleFromFd(int fd) {
+ return static_cast<intptr_t>(fd);
+}
+
std::string DirName(const std::string &FileName) {
char *Tmp = new char[FileName.size() + 1];
memcpy(Tmp, FileName.c_str(), FileName.size() + 1);
@@ -89,6 +101,23 @@ std::string TmpDir() {
return "/tmp";
}
+bool IsInterestingCoverageFile(const std::string &FileName) {
+ if (FileName.find("compiler-rt/lib/") != std::string::npos)
+ return false; // sanitizer internal.
+ if (FileName.find("/usr/lib/") != std::string::npos)
+ return false;
+ if (FileName.find("/usr/include/") != std::string::npos)
+ return false;
+ if (FileName == "<null>")
+ return false;
+ return true;
+}
+
+
+void RawPrint(const char *Str) {
+ write(2, Str, strlen(Str));
+}
+
} // namespace fuzzer
#endif // LIBFUZZER_POSIX
diff --git a/lib/Fuzzer/FuzzerIOWindows.cpp b/lib/Fuzzer/FuzzerIOWindows.cpp
index 056f0721a336..75d4e3a06071 100644
--- a/lib/Fuzzer/FuzzerIOWindows.cpp
+++ b/lib/Fuzzer/FuzzerIOWindows.cpp
@@ -89,8 +89,10 @@ void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
HANDLE FindHandle(FindFirstFileA(Path.c_str(), &FindInfo));
if (FindHandle == INVALID_HANDLE_VALUE)
{
- Printf("No file found in: %s.\n", Dir.c_str());
- return;
+ if (GetLastError() == ERROR_FILE_NOT_FOUND)
+ return;
+ Printf("No such directory: %s; exiting\n", Dir.c_str());
+ exit(1);
}
do {
@@ -139,6 +141,18 @@ void RemoveFile(const std::string &Path) {
_unlink(Path.c_str());
}
+void DiscardOutput(int Fd) {
+ FILE* Temp = fopen("nul", "w");
+ if (!Temp)
+ return;
+ _dup2(_fileno(Temp), Fd);
+ fclose(Temp);
+}
+
+intptr_t GetHandleFromFd(int fd) {
+ return _get_osfhandle(fd);
+}
+
static bool IsSeparator(char C) {
return C == '\\' || C == '/';
}
@@ -277,7 +291,32 @@ std::string DirName(const std::string &FileName) {
return FileName.substr(0, LocationLen + DirLen);
}
-std::string TmpDir() { return "TODO: implement TmpDir"; }
+std::string TmpDir() {
+ std::string Tmp;
+ Tmp.resize(MAX_PATH + 1);
+ DWORD Size = GetTempPathA(Tmp.size(), &Tmp[0]);
+ if (Size == 0) {
+ Printf("Couldn't get Tmp path.\n");
+ exit(1);
+ }
+ Tmp.resize(Size);
+ return Tmp;
+}
+
+bool IsInterestingCoverageFile(const std::string &FileName) {
+ if (FileName.find("Program Files") != std::string::npos)
+ return false;
+ if (FileName.find("compiler-rt\\lib\\") != std::string::npos)
+ return false; // sanitizer internal.
+ if (FileName == "<null>")
+ return false;
+ return true;
+}
+
+void RawPrint(const char *Str) {
+ // Not tested, may or may not work. Fix if needed.
+ Printf("%s", Str);
+}
} // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h
index d47e20e3a2b9..c2c0a39843c0 100644
--- a/lib/Fuzzer/FuzzerInterface.h
+++ b/lib/Fuzzer/FuzzerInterface.h
@@ -55,7 +55,7 @@ size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1,
unsigned int Seed);
// Experimental, may go away in future.
-// libFuzzer-provided function to be used inside LLVMFuzzerTestOneInput.
+// libFuzzer-provided function to be used inside LLVMFuzzerCustomMutator.
// Mutates raw data in [Data, Data+Size) inplace.
// Returns the new size, which is not greater than MaxSize.
size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize);
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index 0d2c7a78aca8..c26615631ecd 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -32,26 +32,6 @@ using namespace std::chrono;
class Fuzzer {
public:
- // Aggregates all available coverage measurements.
- struct Coverage {
- Coverage() { Reset(); }
-
- void Reset() {
- BlockCoverage = 0;
- CallerCalleeCoverage = 0;
- CounterBitmapBits = 0;
- CounterBitmap.clear();
- VPMap.Reset();
- }
-
- size_t BlockCoverage;
- size_t CallerCalleeCoverage;
- // Precalculated number of bits in CounterBitmap.
- size_t CounterBitmapBits;
- std::vector<uint8_t> CounterBitmap;
- ValueBitMap VPMap;
- };
-
Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
FuzzingOptions Options);
~Fuzzer();
@@ -90,25 +70,23 @@ public:
// Merge Corpora[1:] into Corpora[0].
void Merge(const std::vector<std::string> &Corpora);
void CrashResistantMerge(const std::vector<std::string> &Args,
- const std::vector<std::string> &Corpora);
+ const std::vector<std::string> &Corpora,
+ const char *CoverageSummaryInputPathOrNull,
+ const char *CoverageSummaryOutputPathOrNull);
void CrashResistantMergeInternalStep(const std::string &ControlFilePath);
- // Returns a subset of 'Extra' that adds coverage to 'Initial'.
- UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra);
MutationDispatcher &GetMD() { return MD; }
void PrintFinalStats();
void SetMaxInputLen(size_t MaxInputLen);
void SetMaxMutationLen(size_t MaxMutationLen);
void RssLimitCallback();
- // Public for tests.
- void ResetCoverage();
-
bool InFuzzingThread() const { return IsMyThread; }
size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const;
void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size,
bool DuringInitialCorpusExecution);
void HandleMalloc(size_t Size);
+ void AnnounceOutput(const uint8_t *Data, size_t Size);
private:
void AlarmCallback();
@@ -134,16 +112,10 @@ private:
// Stop tracing.
void StopTraceRecording();
- void SetDeathCallback();
static void StaticDeathCallback();
void DumpCurrentUnit(const char *Prefix);
void DeathCallback();
- void ResetEdgeCoverage();
- void ResetCounters();
- void PrepareCounters(Fuzzer::Coverage *C);
- bool RecordMaxCoverage(Fuzzer::Coverage *C);
-
void AllocateCurrentUnitData();
uint8_t *CurrentUnitData = nullptr;
std::atomic<size_t> CurrentUnitSize;
@@ -166,16 +138,11 @@ private:
long TimeOfLongestUnitInSeconds = 0;
long EpochOfLastReadOfOutputCorpus = 0;
- // Maximum recorded coverage.
- Coverage MaxCoverage;
-
size_t MaxInputLen = 0;
size_t MaxMutationLen = 0;
// Need to know our own thread.
static thread_local bool IsMyThread;
-
- bool InMergeMode = false;
};
}; // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 9f49d1557990..704092896eb6 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -14,6 +14,7 @@
#include "FuzzerIO.h"
#include "FuzzerMutate.h"
#include "FuzzerRandom.h"
+#include "FuzzerShmem.h"
#include "FuzzerTracePC.h"
#include <algorithm>
#include <cstring>
@@ -42,73 +43,11 @@ static const size_t kMaxUnitSizeToPrint = 256;
thread_local bool Fuzzer::IsMyThread;
-static void MissingExternalApiFunction(const char *FnName) {
- Printf("ERROR: %s is not defined. Exiting.\n"
- "Did you use -fsanitize-coverage=... to build your code?\n",
- FnName);
- exit(1);
-}
-
-#define CHECK_EXTERNAL_FUNCTION(fn) \
- do { \
- if (!(EF->fn)) \
- MissingExternalApiFunction(#fn); \
- } while (false)
+SharedMemoryRegion SMR;
// Only one Fuzzer per process.
static Fuzzer *F;
-void Fuzzer::ResetEdgeCoverage() {
- CHECK_EXTERNAL_FUNCTION(__sanitizer_reset_coverage);
- EF->__sanitizer_reset_coverage();
-}
-
-void Fuzzer::ResetCounters() {
- if (Options.UseCounters)
- EF->__sanitizer_update_counter_bitset_and_clear_counters(0);
-}
-
-void Fuzzer::PrepareCounters(Fuzzer::Coverage *C) {
- if (Options.UseCounters) {
- size_t NumCounters = EF->__sanitizer_get_number_of_counters();
- C->CounterBitmap.resize(NumCounters);
- }
-}
-
-// Records data to a maximum coverage tracker. Returns true if additional
-// coverage was discovered.
-bool Fuzzer::RecordMaxCoverage(Fuzzer::Coverage *C) {
- bool Res = false;
-
- uint64_t NewBlockCoverage = EF->__sanitizer_get_total_unique_coverage();
- if (NewBlockCoverage > C->BlockCoverage) {
- Res = true;
- C->BlockCoverage = NewBlockCoverage;
- }
-
- if (Options.UseIndirCalls &&
- EF->__sanitizer_get_total_unique_caller_callee_pairs) {
- uint64_t NewCallerCalleeCoverage =
- EF->__sanitizer_get_total_unique_caller_callee_pairs();
- if (NewCallerCalleeCoverage > C->CallerCalleeCoverage) {
- Res = true;
- C->CallerCalleeCoverage = NewCallerCalleeCoverage;
- }
- }
-
- if (Options.UseCounters) {
- uint64_t CounterDelta =
- EF->__sanitizer_update_counter_bitset_and_clear_counters(
- C->CounterBitmap.data());
- if (CounterDelta > 0) {
- Res = true;
- C->CounterBitmapBits += CounterDelta;
- }
- }
-
- return Res;
-}
-
// Leak detection is expensive, so we first check if there were more mallocs
// than frees (using the sanitizer malloc hooks) and only then try to call lsan.
struct MallocFreeTracer {
@@ -176,12 +115,12 @@ void Fuzzer::HandleMalloc(size_t Size) {
Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD,
FuzzingOptions Options)
: CB(CB), Corpus(Corpus), MD(MD), Options(Options) {
- SetDeathCallback();
+ if (EF->__sanitizer_set_death_callback)
+ EF->__sanitizer_set_death_callback(StaticDeathCallback);
InitializeTraceState();
assert(!F);
F = this;
TPC.ResetMaps();
- ResetCoverage();
IsMyThread = true;
if (Options.DetectLeaks && EF->__sanitizer_install_malloc_and_free_hooks)
EF->__sanitizer_install_malloc_and_free_hooks(MallocHook, FreeHook);
@@ -206,33 +145,12 @@ void Fuzzer::AllocateCurrentUnitData() {
CurrentUnitData = new uint8_t[MaxInputLen];
}
-void Fuzzer::SetDeathCallback() {
- CHECK_EXTERNAL_FUNCTION(__sanitizer_set_death_callback);
- EF->__sanitizer_set_death_callback(StaticDeathCallback);
-}
-
void Fuzzer::StaticDeathCallback() {
assert(F);
F->DeathCallback();
}
-static void WarnOnUnsuccessfullMerge(bool DoWarn) {
- if (!DoWarn) return;
- Printf(
- "***\n"
- "***\n"
- "***\n"
- "*** NOTE: merge did not succeed due to a failure on one of the inputs.\n"
- "*** You will need to filter out crashes from the corpus, e.g. like this:\n"
- "*** for f in WITH_CRASHES/*; do ./fuzzer $f && cp $f NO_CRASHES; done\n"
- "*** Future versions may have crash-resistant merge, stay tuned.\n"
- "***\n"
- "***\n"
- "***\n");
-}
-
void Fuzzer::DumpCurrentUnit(const char *Prefix) {
- WarnOnUnsuccessfullMerge(InMergeMode);
if (!CurrentUnitData) return; // Happens when running individual inputs.
MD.PrintMutationSequence();
Printf("; base unit: %s\n", Sha1ToString(BaseSha1).c_str());
@@ -293,7 +211,10 @@ void Fuzzer::InterruptCallback() {
NO_SANITIZE_MEMORY
void Fuzzer::AlarmCallback() {
assert(Options.UnitTimeoutSec > 0);
+ // In Windows Alarm callback is executed by a different thread.
+#if !LIBFUZZER_WINDOWS
if (!InFuzzingThread()) return;
+#endif
if (!RunningCB)
return; // We have not started running units yet.
size_t Seconds =
@@ -323,7 +244,7 @@ void Fuzzer::RssLimitCallback() {
GetPid(), GetPeakRSSMb(), Options.RssLimitMb);
Printf(" To change the out-of-memory limit use -rss_limit_mb=<N>\n\n");
if (EF->__sanitizer_print_memory_profile)
- EF->__sanitizer_print_memory_profile(95);
+ EF->__sanitizer_print_memory_profile(95, 8);
DumpCurrentUnit("oom-");
Printf("SUMMARY: libFuzzer: out-of-memory\n");
PrintFinalStats();
@@ -338,26 +259,18 @@ void Fuzzer::PrintStats(const char *Where, const char *End, size_t Units) {
csvHeaderPrinted = true;
Printf("runs,block_cov,bits,cc_cov,corpus,execs_per_sec,tbms,reason\n");
}
- Printf("%zd,%zd,%zd,%zd,%zd,%zd,%s\n", TotalNumberOfRuns,
- MaxCoverage.BlockCoverage, MaxCoverage.CounterBitmapBits,
- MaxCoverage.CallerCalleeCoverage, Corpus.size(), ExecPerSec, Where);
+ Printf("%zd,%zd,%zd,%zd,%s\n", TotalNumberOfRuns,
+ TPC.GetTotalPCCoverage(),
+ Corpus.size(), ExecPerSec, Where);
}
if (!Options.Verbosity)
return;
Printf("#%zd\t%s", TotalNumberOfRuns, Where);
- if (MaxCoverage.BlockCoverage)
- Printf(" cov: %zd", MaxCoverage.BlockCoverage);
- if (size_t N = MaxCoverage.VPMap.GetNumBitsSinceLastMerge())
- Printf(" vp: %zd", N);
if (size_t N = TPC.GetTotalPCCoverage())
Printf(" cov: %zd", N);
- if (auto TB = MaxCoverage.CounterBitmapBits)
- Printf(" bits: %zd", TB);
if (size_t N = Corpus.NumFeatures())
Printf( " ft: %zd", N);
- if (MaxCoverage.CallerCalleeCoverage)
- Printf(" indir: %zd", MaxCoverage.CallerCalleeCoverage);
if (!Corpus.empty()) {
Printf(" corp: %zd", Corpus.NumActiveUnits());
if (size_t N = Corpus.SizeInBytes()) {
@@ -456,7 +369,7 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) {
}
void Fuzzer::ShuffleCorpus(UnitVector *V) {
- std::random_shuffle(V->begin(), V->end(), MD.GetRand());
+ std::shuffle(V->begin(), V->end(), MD.GetRand());
if (Options.PreferSmall)
std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
return A.size() < B.size();
@@ -476,8 +389,6 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
if (size_t NumFeatures = RunOne(U)) {
CheckExitOnSrcPosOrItem();
Corpus.AddToCorpus(U, NumFeatures);
- if (Options.Verbosity >= 2)
- Printf("NEW0: %zd L %zd\n", MaxCoverage.BlockCoverage, U.size());
}
TryDetectingAMemoryLeak(U.data(), U.size(),
/*DuringInitialCorpusExecution*/ true);
@@ -496,18 +407,11 @@ size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) {
ExecuteCallback(Data, Size);
- size_t Res = 0;
- if (size_t NumFeatures = TPC.CollectFeatures([&](size_t Feature) -> bool {
- return Corpus.AddFeature(Feature, Size, Options.Shrink);
- }))
- Res = NumFeatures;
-
- if (!TPC.UsingTracePcGuard()) {
- if (TPC.UpdateValueProfileMap(&MaxCoverage.VPMap))
- Res = 1;
- if (!Res && RecordMaxCoverage(&MaxCoverage))
- Res = 1;
- }
+ size_t NumUpdatesBefore = Corpus.NumFeatureUpdates();
+ TPC.CollectFeatures([&](size_t Feature) {
+ Corpus.AddFeature(Feature, Size, Options.Shrink);
+ });
+ size_t NumUpdatesAfter = Corpus.NumFeatureUpdates();
auto TimeOfUnit =
duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
@@ -520,7 +424,7 @@ size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) {
Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds);
WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-");
}
- return Res;
+ return NumUpdatesAfter - NumUpdatesBefore;
}
size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const {
@@ -531,6 +435,8 @@ size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const {
void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) {
assert(InFuzzingThread());
+ if (SMR.IsClient())
+ SMR.WriteByteArray(Data, Size);
// We copy the contents of Unit into a separate heap buffer
// so that we reliably find buffer overflows in it.
uint8_t *DataCopy = new uint8_t[Size];
@@ -540,7 +446,6 @@ void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) {
CurrentUnitSize = Size;
AllocTracer.Start(Options.TraceMalloc);
UnitStartTime = system_clock::now();
- ResetCounters(); // Reset coverage right before the callback.
TPC.ResetMaps();
RunningCB = true;
int Res = CB(DataCopy, Size);
@@ -597,77 +502,6 @@ void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) {
TPC.PrintNewPCs();
}
-// Finds minimal number of units in 'Extra' that add coverage to 'Initial'.
-// We do it by actually executing the units, sometimes more than once,
-// because we may be using different coverage-like signals and the only
-// common thing between them is that we can say "this unit found new stuff".
-UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial,
- const UnitVector &Extra) {
- UnitVector Res = Extra;
- UnitVector Tmp;
- size_t OldSize = Res.size();
- for (int Iter = 0; Iter < 10; Iter++) {
- ShuffleCorpus(&Res);
- TPC.ResetMaps();
- Corpus.ResetFeatureSet();
- ResetCoverage();
-
- for (auto &U : Initial) {
- TPC.ResetMaps();
- RunOne(U);
- }
-
- Tmp.clear();
- for (auto &U : Res) {
- TPC.ResetMaps();
- if (RunOne(U))
- Tmp.push_back(U);
- }
-
- char Stat[7] = "MIN ";
- Stat[3] = '0' + Iter;
- PrintStats(Stat, "\n", Tmp.size());
-
- size_t NewSize = Tmp.size();
- assert(NewSize <= OldSize);
- Res.swap(Tmp);
-
- if (NewSize + 5 >= OldSize)
- break;
- OldSize = NewSize;
- }
- return Res;
-}
-
-void Fuzzer::Merge(const std::vector<std::string> &Corpora) {
- if (Corpora.size() <= 1) {
- Printf("Merge requires two or more corpus dirs\n");
- return;
- }
- InMergeMode = true;
- std::vector<std::string> ExtraCorpora(Corpora.begin() + 1, Corpora.end());
-
- assert(MaxInputLen > 0);
- UnitVector Initial, Extra;
- ReadDirToVectorOfUnits(Corpora[0].c_str(), &Initial, nullptr, MaxInputLen,
- true);
- for (auto &C : ExtraCorpora)
- ReadDirToVectorOfUnits(C.c_str(), &Extra, nullptr, MaxInputLen, true);
-
- if (!Initial.empty()) {
- Printf("=== Minimizing the initial corpus of %zd units\n", Initial.size());
- Initial = FindExtraUnits({}, Initial);
- }
-
- Printf("=== Merging extra %zd units\n", Extra.size());
- auto Res = FindExtraUnits(Initial, Extra);
-
- for (auto &U: Res)
- WriteToOutputCorpus(U);
-
- Printf("=== Merge: written %zd units\n", Res.size());
-}
-
// Tries detecting a memory leak on the particular input that we have just
// executed before calling this function.
void Fuzzer::TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size,
@@ -762,12 +596,6 @@ void Fuzzer::MutateAndTestOne() {
}
}
-void Fuzzer::ResetCoverage() {
- ResetEdgeCoverage();
- MaxCoverage.Reset();
- PrepareCounters(&MaxCoverage);
-}
-
void Fuzzer::Loop() {
TPC.InitializePrintNewPCs();
system_clock::time_point LastCorpusReload = system_clock::now();
@@ -792,7 +620,7 @@ void Fuzzer::Loop() {
}
void Fuzzer::MinimizeCrashLoop(const Unit &U) {
- if (U.size() <= 2) return;
+ if (U.size() <= 1) return;
while (!TimedOut() && TotalNumberOfRuns < Options.MaxNumberOfRuns) {
MD.StartMutationSequence();
memcpy(CurrentUnitData, U.data(), U.size());
@@ -806,6 +634,29 @@ void Fuzzer::MinimizeCrashLoop(const Unit &U) {
}
}
+void Fuzzer::AnnounceOutput(const uint8_t *Data, size_t Size) {
+ if (SMR.IsServer()) {
+ SMR.WriteByteArray(Data, Size);
+ } else if (SMR.IsClient()) {
+ SMR.PostClient();
+ SMR.WaitServer();
+ size_t OtherSize = SMR.ReadByteArraySize();
+ uint8_t *OtherData = SMR.GetByteArray();
+ if (Size != OtherSize || memcmp(Data, OtherData, Size) != 0) {
+ size_t i = 0;
+ for (i = 0; i < Min(Size, OtherSize); i++)
+ if (Data[i] != OtherData[i])
+ break;
+ Printf("==%lu== ERROR: libFuzzer: equivalence-mismatch. Sizes: %zd %zd; "
+ "offset %zd\n", GetPid(), Size, OtherSize, i);
+ DumpCurrentUnit("mismatch-");
+ Printf("SUMMARY: libFuzzer: equivalence-mismatch\n");
+ PrintFinalStats();
+ _Exit(Options.ErrorExitCode);
+ }
+ }
+}
+
} // namespace fuzzer
extern "C" {
@@ -814,4 +665,10 @@ size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
assert(fuzzer::F);
return fuzzer::F->GetMD().DefaultMutate(Data, Size, MaxSize);
}
+
+// Experimental
+void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size) {
+ assert(fuzzer::F);
+ fuzzer::F->AnnounceOutput(Data, Size);
+}
} // extern "C"
diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp
index 9e559115680c..e66460c29e2f 100644
--- a/lib/Fuzzer/FuzzerMerge.cpp
+++ b/lib/Fuzzer/FuzzerMerge.cpp
@@ -17,6 +17,7 @@
#include <fstream>
#include <iterator>
+#include <set>
#include <sstream>
namespace fuzzer {
@@ -73,6 +74,7 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) {
size_t ExpectedStartMarker = 0;
const size_t kInvalidStartMarker = -1;
size_t LastSeenStartMarker = kInvalidStartMarker;
+ std::vector<uint32_t> TmpFeatures;
while (std::getline(IS, Line, '\n')) {
std::istringstream ISS1(Line);
std::string Marker;
@@ -88,17 +90,17 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) {
assert(ExpectedStartMarker < Files.size());
ExpectedStartMarker++;
} else if (Marker == "DONE") {
- // DONE FILE_SIZE COV1 COV2 COV3 ...
+ // DONE FILE_ID COV1 COV2 COV3 ...
size_t CurrentFileIdx = N;
if (CurrentFileIdx != LastSeenStartMarker)
return false;
LastSeenStartMarker = kInvalidStartMarker;
if (ParseCoverage) {
- auto &V = Files[CurrentFileIdx].Features;
- V.clear();
+ TmpFeatures.clear(); // use a vector from outer scope to avoid resizes.
while (ISS1 >> std::hex >> N)
- V.push_back(N);
- std::sort(V.begin(), V.end());
+ TmpFeatures.push_back(N);
+ std::sort(TmpFeatures.begin(), TmpFeatures.end());
+ Files[CurrentFileIdx].Features = TmpFeatures;
}
} else {
return false;
@@ -111,12 +113,20 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) {
return true;
}
+size_t Merger::ApproximateMemoryConsumption() const {
+ size_t Res = 0;
+ for (const auto &F: Files)
+ Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]);
+ return Res;
+}
+
// Decides which files need to be merged (add thost to NewFiles).
// Returns the number of new features added.
-size_t Merger::Merge(std::vector<std::string> *NewFiles) {
+size_t Merger::Merge(const std::set<uint32_t> &InitialFeatures,
+ std::vector<std::string> *NewFiles) {
NewFiles->clear();
assert(NumFilesInFirstCorpus <= Files.size());
- std::set<uint32_t> AllFeatures;
+ std::set<uint32_t> AllFeatures(InitialFeatures);
// What features are in the initial corpus?
for (size_t i = 0; i < NumFilesInFirstCorpus; i++) {
@@ -158,6 +168,42 @@ size_t Merger::Merge(std::vector<std::string> *NewFiles) {
return AllFeatures.size() - InitialNumFeatures;
}
+void Merger::PrintSummary(std::ostream &OS) {
+ for (auto &File : Files) {
+ OS << std::hex;
+ OS << File.Name << " size: " << File.Size << " features: ";
+ for (auto Feature : File.Features)
+ OS << " " << Feature;
+ OS << "\n";
+ }
+}
+
+std::set<uint32_t> Merger::AllFeatures() const {
+ std::set<uint32_t> S;
+ for (auto &File : Files)
+ S.insert(File.Features.begin(), File.Features.end());
+ return S;
+}
+
+std::set<uint32_t> Merger::ParseSummary(std::istream &IS) {
+ std::string Line, Tmp;
+ std::set<uint32_t> Res;
+ while (std::getline(IS, Line, '\n')) {
+ size_t N;
+ std::istringstream ISS1(Line);
+ ISS1 >> Tmp; // Name
+ ISS1 >> Tmp; // size:
+ assert(Tmp == "size:" && "Corrupt summary file");
+ ISS1 >> std::hex;
+ ISS1 >> N; // File Size
+ ISS1 >> Tmp; // features:
+ assert(Tmp == "features:" && "Corrupt summary file");
+ while (ISS1 >> std::hex >> N)
+ Res.insert(N);
+ }
+ return Res;
+}
+
// Inner process. May crash if the target crashes.
void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str());
@@ -208,7 +254,9 @@ void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
// Outer process. Does not call the target code and thus sohuld not fail.
void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
- const std::vector<std::string> &Corpora) {
+ const std::vector<std::string> &Corpora,
+ const char *CoverageSummaryInputPathOrNull,
+ const char *CoverageSummaryOutputPathOrNull) {
if (Corpora.size() <= 1) {
Printf("Merge requires two or more corpus dirs\n");
return;
@@ -239,15 +287,21 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
// Execute the inner process untill it passes.
// Every inner process should execute at least one input.
std::string BaseCmd = CloneArgsWithoutX(Args, "keep-all-flags");
+ bool Success = false;
for (size_t i = 1; i <= AllFiles.size(); i++) {
Printf("MERGE-OUTER: attempt %zd\n", i);
auto ExitCode =
ExecuteCommand(BaseCmd + " -merge_control_file=" + CFPath);
if (!ExitCode) {
Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", i);
+ Success = true;
break;
}
}
+ if (!Success) {
+ Printf("MERGE-OUTER: zero succesfull attempts, exiting\n");
+ exit(1);
+ }
// Read the control file and do the merge.
Merger M;
std::ifstream IF(CFPath);
@@ -256,8 +310,23 @@ void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
IF.seekg(0, IF.beg);
M.ParseOrExit(IF, true);
IF.close();
+ Printf("MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n",
+ M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb());
+ if (CoverageSummaryOutputPathOrNull) {
+ Printf("MERGE-OUTER: writing coverage summary for %zd files to %s\n",
+ M.Files.size(), CoverageSummaryOutputPathOrNull);
+ std::ofstream SummaryOut(CoverageSummaryOutputPathOrNull);
+ M.PrintSummary(SummaryOut);
+ }
std::vector<std::string> NewFiles;
- size_t NumNewFeatures = M.Merge(&NewFiles);
+ std::set<uint32_t> InitialFeatures;
+ if (CoverageSummaryInputPathOrNull) {
+ std::ifstream SummaryIn(CoverageSummaryInputPathOrNull);
+ InitialFeatures = M.ParseSummary(SummaryIn);
+ Printf("MERGE-OUTER: coverage summary loaded from %s, %zd features found\n",
+ CoverageSummaryInputPathOrNull, InitialFeatures.size());
+ }
+ size_t NumNewFeatures = M.Merge(InitialFeatures, &NewFiles);
Printf("MERGE-OUTER: %zd new files with %zd new features added\n",
NewFiles.size(), NumNewFeatures);
for (auto &F: NewFiles)
diff --git a/lib/Fuzzer/FuzzerMerge.h b/lib/Fuzzer/FuzzerMerge.h
index 8a2fe5d74f88..cf4a0863571d 100644
--- a/lib/Fuzzer/FuzzerMerge.h
+++ b/lib/Fuzzer/FuzzerMerge.h
@@ -43,7 +43,9 @@
#include "FuzzerDefs.h"
#include <istream>
+#include <ostream>
#include <set>
+#include <vector>
namespace fuzzer {
@@ -62,7 +64,15 @@ struct Merger {
bool Parse(std::istream &IS, bool ParseCoverage);
bool Parse(const std::string &Str, bool ParseCoverage);
void ParseOrExit(std::istream &IS, bool ParseCoverage);
- size_t Merge(std::vector<std::string> *NewFiles);
+ void PrintSummary(std::ostream &OS);
+ std::set<uint32_t> ParseSummary(std::istream &IS);
+ size_t Merge(const std::set<uint32_t> &InitialFeatures,
+ std::vector<std::string> *NewFiles);
+ size_t Merge(std::vector<std::string> *NewFiles) {
+ return Merge({}, NewFiles);
+ }
+ size_t ApproximateMemoryConsumption() const;
+ std::set<uint32_t> AllFeatures() const;
};
} // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp
index 96a87b879d6f..cd846c7deec5 100644
--- a/lib/Fuzzer/FuzzerMutate.cpp
+++ b/lib/Fuzzer/FuzzerMutate.cpp
@@ -81,8 +81,8 @@ size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size,
const Unit &Other = (*Corpus)[Idx];
if (Other.empty())
return 0;
- MutateInPlaceHere.resize(MaxSize);
- auto &U = MutateInPlaceHere;
+ CustomCrossOverInPlaceHere.resize(MaxSize);
+ auto &U = CustomCrossOverInPlaceHere;
size_t NewSize = EF->LLVMFuzzerCustomCrossOver(
Data, Size, Other.data(), Other.size(), U.data(), U.size(), Rand.Rand());
if (!NewSize)
@@ -94,21 +94,18 @@ size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size,
size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size,
size_t MaxSize) {
- if (Size > MaxSize) return 0;
- assert(Size);
+ if (Size > MaxSize || Size == 0) return 0;
size_t ShuffleAmount =
Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size.
size_t ShuffleStart = Rand(Size - ShuffleAmount);
assert(ShuffleStart + ShuffleAmount <= Size);
- std::random_shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount,
- Rand);
+ std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand);
return Size;
}
size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size,
size_t MaxSize) {
- assert(Size);
- if (Size == 1) return 0;
+ if (Size <= 1) return 0;
size_t N = Rand(Size / 2) + 1;
assert(N < Size);
size_t Idx = Rand(Size - N + 1);
@@ -200,28 +197,27 @@ size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size,
// It first tries to find one of the arguments (possibly swapped) in the
// input and if it succeeds it creates a DE with a position hint.
// Otherwise it creates a DE with one of the arguments w/o a position hint.
-template <class T>
DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP(
- T Arg1, T Arg2, const uint8_t *Data, size_t Size) {
- ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
+ const void *Arg1, const void *Arg2,
+ const void *Arg1Mutation, const void *Arg2Mutation,
+ size_t ArgSize, const uint8_t *Data,
+ size_t Size) {
+ ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str;
bool HandleFirst = Rand.RandBool();
- T ExistingBytes, DesiredBytes;
+ const void *ExistingBytes, *DesiredBytes;
Word W;
const uint8_t *End = Data + Size;
for (int Arg = 0; Arg < 2; Arg++) {
ExistingBytes = HandleFirst ? Arg1 : Arg2;
- DesiredBytes = HandleFirst ? Arg2 : Arg1;
- DesiredBytes += Rand(-1, 1);
- if (Rand.RandBool()) ExistingBytes = Bswap(ExistingBytes);
- if (Rand.RandBool()) DesiredBytes = Bswap(DesiredBytes);
+ DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation;
HandleFirst = !HandleFirst;
- W.Set(reinterpret_cast<uint8_t*>(&DesiredBytes), sizeof(T));
+ W.Set(reinterpret_cast<const uint8_t*>(DesiredBytes), ArgSize);
const size_t kMaxNumPositions = 8;
size_t Positions[kMaxNumPositions];
size_t NumPositions = 0;
for (const uint8_t *Cur = Data;
Cur < End && NumPositions < kMaxNumPositions; Cur++) {
- Cur = (uint8_t *)SearchMemory(Cur, End - Cur, &ExistingBytes, sizeof(T));
+ Cur = (uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize);
if (!Cur) break;
Positions[NumPositions++] = Cur - Data;
}
@@ -232,21 +228,48 @@ DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP(
return DE;
}
+
+template <class T>
+DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP(
+ T Arg1, T Arg2, const uint8_t *Data, size_t Size) {
+ if (Rand.RandBool()) Arg1 = Bswap(Arg1);
+ if (Rand.RandBool()) Arg2 = Bswap(Arg2);
+ T Arg1Mutation = Arg1 + Rand(-1, 1);
+ T Arg2Mutation = Arg2 + Rand(-1, 1);
+ return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation,
+ sizeof(Arg1), Data, Size);
+}
+
+DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP(
+ const Word &Arg1, const Word &Arg2, const uint8_t *Data, size_t Size) {
+ return MakeDictionaryEntryFromCMP(Arg1.data(), Arg2.data(), Arg1.data(),
+ Arg2.data(), Arg1.size(), Data, Size);
+}
+
size_t MutationDispatcher::Mutate_AddWordFromTORC(
uint8_t *Data, size_t Size, size_t MaxSize) {
Word W;
DictionaryEntry DE;
- if (Rand.RandBool()) {
+ switch (Rand(3)) {
+ case 0: {
auto X = TPC.TORC8.Get(Rand.Rand());
DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
- } else {
+ } break;
+ case 1: {
auto X = TPC.TORC4.Get(Rand.Rand());
if ((X.A >> 16) == 0 && (X.B >> 16) == 0 && Rand.RandBool())
- DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data,
- Size);
+ DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data, Size);
else
DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
+ } break;
+ case 2: {
+ auto X = TPC.TORCW.Get(Rand.Rand());
+ DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size);
+ } break;
+ default:
+ assert(0);
}
+ if (!DE.GetW().size()) return 0;
Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE);
if (!Size) return 0;
DictionaryEntry &DERef =
@@ -317,7 +340,7 @@ size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize,
size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size,
size_t MaxSize) {
- if (Size > MaxSize) return 0;
+ if (Size > MaxSize || Size == 0) return 0;
if (Rand.RandBool())
return CopyPartOf(Data, Size, Data, Size);
else
@@ -413,9 +436,9 @@ size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size,
break;
case 1:
NewSize = InsertPartOf(O.data(), O.size(), U.data(), U.size(), MaxSize);
- if (NewSize)
- break;
- // LLVM_FALLTHROUGH;
+ if (!NewSize)
+ NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size());
+ break;
case 2:
NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size());
break;
@@ -437,6 +460,7 @@ void MutationDispatcher::RecordSuccessfulMutationSequence() {
for (auto DE : CurrentDictionaryEntrySequence) {
// PersistentAutoDictionary.AddWithSuccessCountOne(DE);
DE->IncSuccessCount();
+ assert(DE->GetW().size());
// Linear search is fine here as this happens seldom.
if (!PersistentAutoDictionary.ContainsWord(DE->GetW()))
PersistentAutoDictionary.push_back({DE->GetW(), 1});
@@ -451,6 +475,7 @@ void MutationDispatcher::PrintRecommendedDictionary() {
if (V.empty()) return;
Printf("###### Recommended dictionary. ######\n");
for (auto &DE: V) {
+ assert(DE.GetW().size());
Printf("\"");
PrintASCII(DE.GetW(), "\"");
Printf(" # Uses: %zd\n", DE.GetUseCount());
@@ -485,14 +510,6 @@ size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size,
size_t MaxSize,
const std::vector<Mutator> &Mutators) {
assert(MaxSize > 0);
- if (Size == 0) {
- for (size_t i = 0; i < Min(size_t(4), MaxSize); i++)
- Data[i] = RandCh(Rand);
- if (Options.OnlyASCII)
- ToASCII(Data, MaxSize);
- return MaxSize;
- }
- assert(Size > 0);
// Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize),
// in which case they will return 0.
// Try several times before returning un-mutated data.
@@ -506,7 +523,8 @@ size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size,
return NewSize;
}
}
- return std::min(Size, MaxSize);
+ *Data = ' ';
+ return 1; // Fallback, should not happen frequently.
}
void MutationDispatcher::AddWordToManualDictionary(const Word &W) {
diff --git a/lib/Fuzzer/FuzzerMutate.h b/lib/Fuzzer/FuzzerMutate.h
index d3c0b0012468..8c8fb3fd74c7 100644
--- a/lib/Fuzzer/FuzzerMutate.h
+++ b/lib/Fuzzer/FuzzerMutate.h
@@ -14,6 +14,7 @@
#include "FuzzerDefs.h"
#include "FuzzerDictionary.h"
+#include "FuzzerOptions.h"
#include "FuzzerRandom.h"
namespace fuzzer {
@@ -113,9 +114,16 @@ private:
template <class T>
DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2,
const uint8_t *Data, size_t Size);
+ DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2,
+ const uint8_t *Data, size_t Size);
+ DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2,
+ const void *Arg1Mutation,
+ const void *Arg2Mutation,
+ size_t ArgSize,
+ const uint8_t *Data, size_t Size);
Random &Rand;
- const FuzzingOptions &Options;
+ const FuzzingOptions Options;
// Dictionary provided by the user via -dict=DICT_FILE.
Dictionary ManualDictionary;
@@ -135,6 +143,9 @@ private:
const InputCorpus *Corpus = nullptr;
std::vector<uint8_t> MutateInPlaceHere;
+ // CustomCrossOver needs its own buffer as a custom implementation may call
+ // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere.
+ std::vector<uint8_t> CustomCrossOverInPlaceHere;
std::vector<Mutator> Mutators;
std::vector<Mutator> DefaultMutators;
diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h
index 6f72205600b9..872def0326f0 100644
--- a/lib/Fuzzer/FuzzerOptions.h
+++ b/lib/Fuzzer/FuzzerOptions.h
@@ -1,4 +1,3 @@
-//===- FuzzerOptions.h - Internal header for the Fuzzer ---------*- C++ -* ===//
//
// The LLVM Compiler Infrastructure
//
@@ -29,7 +28,6 @@ struct FuzzingOptions {
int MutateDepth = 5;
bool UseCounters = false;
bool UseIndirCalls = true;
- bool UseMemcmp = true;
bool UseMemmem = true;
bool UseCmp = false;
bool UseValueProfile = false;
diff --git a/lib/Fuzzer/FuzzerRandom.h b/lib/Fuzzer/FuzzerRandom.h
index b1be0bb935fa..8a1aa3ef5fdc 100644
--- a/lib/Fuzzer/FuzzerRandom.h
+++ b/lib/Fuzzer/FuzzerRandom.h
@@ -15,10 +15,11 @@
#include <random>
namespace fuzzer {
-class Random {
+class Random : public std::mt19937 {
public:
- Random(unsigned int seed) : R(seed) {}
- size_t Rand() { return R(); }
+ Random(unsigned int seed) : std::mt19937(seed) {}
+ result_type operator()() { return this->std::mt19937::operator()(); }
+ size_t Rand() { return this->operator()(); }
size_t RandBool() { return Rand() % 2; }
size_t operator()(size_t n) { return n ? Rand() % n : 0; }
intptr_t operator()(intptr_t From, intptr_t To) {
@@ -26,9 +27,6 @@ class Random {
intptr_t RangeSize = To - From + 1;
return operator()(RangeSize) + From;
}
- std::mt19937 &Get_mt19937() { return R; }
- private:
- std::mt19937 R;
};
} // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerShmem.h b/lib/Fuzzer/FuzzerShmem.h
new file mode 100644
index 000000000000..53568e0acb69
--- /dev/null
+++ b/lib/Fuzzer/FuzzerShmem.h
@@ -0,0 +1,69 @@
+//===- FuzzerShmem.h - shared memory interface ------------------*- C++ -* ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SharedMemoryRegion
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_SHMEM_H
+#define LLVM_FUZZER_SHMEM_H
+
+#include <algorithm>
+#include <cstring>
+#include <string>
+
+#include "FuzzerDefs.h"
+
+namespace fuzzer {
+
+class SharedMemoryRegion {
+ public:
+ bool Create(const char *Name);
+ bool Open(const char *Name);
+ bool Destroy(const char *Name);
+ uint8_t *GetData() { return Data; }
+ void PostServer() {Post(0);}
+ void WaitServer() {Wait(0);}
+ void PostClient() {Post(1);}
+ void WaitClient() {Wait(1);}
+
+ size_t WriteByteArray(const uint8_t *Bytes, size_t N) {
+ assert(N <= kShmemSize - sizeof(N));
+ memcpy(GetData(), &N, sizeof(N));
+ memcpy(GetData() + sizeof(N), Bytes, N);
+ assert(N == ReadByteArraySize());
+ return N;
+ }
+ size_t ReadByteArraySize() {
+ size_t Res;
+ memcpy(&Res, GetData(), sizeof(Res));
+ return Res;
+ }
+ uint8_t *GetByteArray() { return GetData() + sizeof(size_t); }
+
+ bool IsServer() const { return Data && IAmServer; }
+ bool IsClient() const { return Data && !IAmServer; }
+
+private:
+
+ static const size_t kShmemSize = 1 << 22;
+ bool IAmServer;
+ std::string Path(const char *Name);
+ std::string SemName(const char *Name, int Idx);
+ void Post(int Idx);
+ void Wait(int Idx);
+
+ bool Map(int fd);
+ uint8_t *Data = nullptr;
+ void *Semaphore[2];
+};
+
+extern SharedMemoryRegion SMR;
+
+} // namespace fuzzer
+
+#endif // LLVM_FUZZER_SHMEM_H
diff --git a/lib/Fuzzer/FuzzerShmemPosix.cpp b/lib/Fuzzer/FuzzerShmemPosix.cpp
new file mode 100644
index 000000000000..2723bdd86f48
--- /dev/null
+++ b/lib/Fuzzer/FuzzerShmemPosix.cpp
@@ -0,0 +1,103 @@
+//===- FuzzerShmemPosix.cpp - Posix shared memory ---------------*- C++ -* ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SharedMemoryRegion
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_POSIX
+
+#include "FuzzerIO.h"
+#include "FuzzerShmem.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <semaphore.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+namespace fuzzer {
+
+std::string SharedMemoryRegion::Path(const char *Name) {
+ return DirPlusFile(TmpDir(), Name);
+}
+
+std::string SharedMemoryRegion::SemName(const char *Name, int Idx) {
+ std::string Res(Name);
+ return Res + (char)('0' + Idx);
+}
+
+bool SharedMemoryRegion::Map(int fd) {
+ Data =
+ (uint8_t *)mmap(0, kShmemSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
+ if (Data == (uint8_t*)-1)
+ return false;
+ return true;
+}
+
+bool SharedMemoryRegion::Create(const char *Name) {
+ int fd = open(Path(Name).c_str(), O_CREAT | O_RDWR, 0777);
+ if (fd < 0) return false;
+ if (ftruncate(fd, kShmemSize) < 0) return false;
+ if (!Map(fd))
+ return false;
+ for (int i = 0; i < 2; i++) {
+ sem_unlink(SemName(Name, i).c_str());
+ Semaphore[i] = sem_open(SemName(Name, i).c_str(), O_CREAT, 0644, 0);
+ if (Semaphore[i] == (void *)-1)
+ return false;
+ }
+ IAmServer = true;
+ return true;
+}
+
+bool SharedMemoryRegion::Open(const char *Name) {
+ int fd = open(Path(Name).c_str(), O_RDWR);
+ if (fd < 0) return false;
+ struct stat stat_res;
+ if (0 != fstat(fd, &stat_res))
+ return false;
+ assert(stat_res.st_size == kShmemSize);
+ if (!Map(fd))
+ return false;
+ for (int i = 0; i < 2; i++) {
+ Semaphore[i] = sem_open(SemName(Name, i).c_str(), 0);
+ if (Semaphore[i] == (void *)-1)
+ return false;
+ }
+ IAmServer = false;
+ return true;
+}
+
+bool SharedMemoryRegion::Destroy(const char *Name) {
+ return 0 == unlink(Path(Name).c_str());
+}
+
+void SharedMemoryRegion::Post(int Idx) {
+ assert(Idx == 0 || Idx == 1);
+ sem_post((sem_t*)Semaphore[Idx]);
+}
+
+void SharedMemoryRegion::Wait(int Idx) {
+ assert(Idx == 0 || Idx == 1);
+ for (int i = 0; i < 10 && sem_wait((sem_t*)Semaphore[Idx]); i++) {
+ // sem_wait may fail if interrupted by a signal.
+ sleep(i);
+ if (i)
+ Printf("%s: sem_wait[%d] failed %s\n", i < 9 ? "WARNING" : "ERROR", i,
+ strerror(errno));
+ if (i == 9) abort();
+ }
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_POSIX
diff --git a/lib/Fuzzer/FuzzerShmemWindows.cpp b/lib/Fuzzer/FuzzerShmemWindows.cpp
new file mode 100644
index 000000000000..6325b4b8e5b4
--- /dev/null
+++ b/lib/Fuzzer/FuzzerShmemWindows.cpp
@@ -0,0 +1,64 @@
+//===- FuzzerShmemWindows.cpp - Posix shared memory -------------*- C++ -* ===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// SharedMemoryRegion
+//===----------------------------------------------------------------------===//
+#include "FuzzerDefs.h"
+#if LIBFUZZER_WINDOWS
+
+#include "FuzzerIO.h"
+#include "FuzzerShmem.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+namespace fuzzer {
+
+std::string SharedMemoryRegion::Path(const char *Name) {
+ return DirPlusFile(TmpDir(), Name);
+}
+
+std::string SharedMemoryRegion::SemName(const char *Name, int Idx) {
+ std::string Res(Name);
+ return Res + (char)('0' + Idx);
+}
+
+bool SharedMemoryRegion::Map(int fd) {
+ assert(0 && "UNIMPLEMENTED");
+ return false;
+}
+
+bool SharedMemoryRegion::Create(const char *Name) {
+ assert(0 && "UNIMPLEMENTED");
+ return false;
+}
+
+bool SharedMemoryRegion::Open(const char *Name) {
+ assert(0 && "UNIMPLEMENTED");
+ return false;
+}
+
+bool SharedMemoryRegion::Destroy(const char *Name) {
+ assert(0 && "UNIMPLEMENTED");
+ return false;
+}
+
+void SharedMemoryRegion::Post(int Idx) {
+ assert(0 && "UNIMPLEMENTED");
+}
+
+void SharedMemoryRegion::Wait(int Idx) {
+ Semaphore[1] = nullptr;
+ assert(0 && "UNIMPLEMENTED");
+}
+
+} // namespace fuzzer
+
+#endif // LIBFUZZER_WINDOWS
diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp
index 39d6e6026210..ce0f7a47eee6 100644
--- a/lib/Fuzzer/FuzzerTracePC.cpp
+++ b/lib/Fuzzer/FuzzerTracePC.cpp
@@ -18,27 +18,37 @@
#include "FuzzerExtFunctions.h"
#include "FuzzerIO.h"
#include "FuzzerTracePC.h"
+#include "FuzzerUtil.h"
#include "FuzzerValueBitMap.h"
#include <map>
-#include <sanitizer/coverage_interface.h>
#include <set>
#include <sstream>
+// The coverage counters and PCs.
+// These are declared as global variables named "__sancov_*" to simplify
+// experiments with inlined instrumentation.
+alignas(64) ATTRIBUTE_INTERFACE
+uint8_t __sancov_trace_pc_guard_8bit_counters[fuzzer::TracePC::kNumPCs];
+
+ATTRIBUTE_INTERFACE
+uintptr_t __sancov_trace_pc_pcs[fuzzer::TracePC::kNumPCs];
+
namespace fuzzer {
TracePC TPC;
-void TracePC::HandleTrace(uint32_t *Guard, uintptr_t PC) {
- uint32_t Idx = *Guard;
- if (!Idx) return;
- PCs[Idx % kNumPCs] = PC;
- Counters[Idx % kNumCounters]++;
+uint8_t *TracePC::Counters() const {
+ return __sancov_trace_pc_guard_8bit_counters;
+}
+
+uintptr_t *TracePC::PCs() const {
+ return __sancov_trace_pc_pcs;
}
size_t TracePC::GetTotalPCCoverage() {
size_t Res = 0;
- for (size_t i = 1; i < GetNumPCs(); i++)
- if (PCs[i])
+ for (size_t i = 1, N = GetNumPCs(); i < N; i++)
+ if (PCs()[i])
Res++;
return Res;
}
@@ -46,8 +56,16 @@ size_t TracePC::GetTotalPCCoverage() {
void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) {
if (Start == Stop || *Start) return;
assert(NumModules < sizeof(Modules) / sizeof(Modules[0]));
- for (uint32_t *P = Start; P < Stop; P++)
- *P = ++NumGuards;
+ for (uint32_t *P = Start; P < Stop; P++) {
+ NumGuards++;
+ if (NumGuards == kNumPCs) {
+ RawPrint(
+ "WARNING: The binary has too many instrumented PCs.\n"
+ " You may want to reduce the size of the binary\n"
+ " for more efficient fuzzing and precise coverage data\n");
+ }
+ *P = NumGuards % kNumPCs;
+ }
Modules[NumModules].Start = Start;
Modules[NumModules].Stop = Stop;
NumModules++;
@@ -60,23 +78,12 @@ void TracePC::PrintModuleInfo() {
Printf("\n");
}
+ATTRIBUTE_NO_SANITIZE_ALL
void TracePC::HandleCallerCallee(uintptr_t Caller, uintptr_t Callee) {
const uintptr_t kBits = 12;
const uintptr_t kMask = (1 << kBits) - 1;
uintptr_t Idx = (Caller & kMask) | ((Callee & kMask) << kBits);
- HandleValueProfile(Idx);
-}
-
-static bool IsInterestingCoverageFile(std::string &File) {
- if (File.find("compiler-rt/lib/") != std::string::npos)
- return false; // sanitizer internal.
- if (File.find("/usr/lib/") != std::string::npos)
- return false;
- if (File.find("/usr/include/") != std::string::npos)
- return false;
- if (File == "<null>")
- return false;
- return true;
+ ValueProfileMap.AddValueModPrime(Idx);
}
void TracePC::InitializePrintNewPCs() {
@@ -84,16 +91,16 @@ void TracePC::InitializePrintNewPCs() {
assert(!PrintedPCs);
PrintedPCs = new std::set<uintptr_t>;
for (size_t i = 1; i < GetNumPCs(); i++)
- if (PCs[i])
- PrintedPCs->insert(PCs[i]);
+ if (PCs()[i])
+ PrintedPCs->insert(PCs()[i]);
}
void TracePC::PrintNewPCs() {
if (!DoPrintNewPCs) return;
assert(PrintedPCs);
for (size_t i = 1; i < GetNumPCs(); i++)
- if (PCs[i] && PrintedPCs->insert(PCs[i]).second)
- PrintPC("\tNEW_PC: %p %F %L\n", "\tNEW_PC: %p\n", PCs[i]);
+ if (PCs()[i] && PrintedPCs->insert(PCs()[i]).second)
+ PrintPC("\tNEW_PC: %p %F %L\n", "\tNEW_PC: %p\n", PCs()[i]);
}
void TracePC::PrintCoverage() {
@@ -110,20 +117,21 @@ void TracePC::PrintCoverage() {
CoveredLines;
Printf("COVERAGE:\n");
for (size_t i = 1; i < GetNumPCs(); i++) {
- if (!PCs[i]) continue;
- std::string FileStr = DescribePC("%s", PCs[i]);
+ uintptr_t PC = PCs()[i];
+ if (!PC) continue;
+ std::string FileStr = DescribePC("%s", PC);
if (!IsInterestingCoverageFile(FileStr)) continue;
- std::string FixedPCStr = DescribePC("%p", PCs[i]);
- std::string FunctionStr = DescribePC("%F", PCs[i]);
- std::string LineStr = DescribePC("%l", PCs[i]);
+ std::string FixedPCStr = DescribePC("%p", PC);
+ std::string FunctionStr = DescribePC("%F", PC);
+ std::string LineStr = DescribePC("%l", PC);
char ModulePathRaw[4096] = ""; // What's PATH_MAX in portable C++?
void *OffsetRaw = nullptr;
if (!EF->__sanitizer_get_module_and_offset_for_pc(
- reinterpret_cast<void *>(PCs[i]), ModulePathRaw,
+ reinterpret_cast<void *>(PC), ModulePathRaw,
sizeof(ModulePathRaw), &OffsetRaw))
continue;
std::string Module = ModulePathRaw;
- uintptr_t FixedPC = std::stol(FixedPCStr, 0, 16);
+ uintptr_t FixedPC = std::stoull(FixedPCStr, 0, 16);
uintptr_t PcOffset = reinterpret_cast<uintptr_t>(OffsetRaw);
ModuleOffsets[Module] = FixedPC - PcOffset;
CoveredPCsPerModule[Module].push_back(PcOffset);
@@ -154,8 +162,8 @@ void TracePC::PrintCoverage() {
Printf("MODULE_WITH_COVERAGE: %s\n", ModuleName.c_str());
// sancov does not yet fully support DSOs.
// std::string Cmd = "sancov -print-coverage-pcs " + ModuleName;
- std::string Cmd = "objdump -d " + ModuleName +
- " | grep 'call.*__sanitizer_cov_trace_pc_guard' | awk -F: '{print $1}'";
+ std::string Cmd = DisassembleCmd(ModuleName) + " | " +
+ SearchRegexCmd("call.*__sanitizer_cov_trace_pc_guard");
std::string SanCovOutput;
if (!ExecuteCommandAndReadOutput(Cmd, &SanCovOutput)) {
Printf("INFO: Command failed: %s\n", Cmd.c_str());
@@ -164,7 +172,11 @@ void TracePC::PrintCoverage() {
std::istringstream ISS(SanCovOutput);
std::string S;
while (std::getline(ISS, S, '\n')) {
- uintptr_t PcOffset = std::stol(S, 0, 16);
+ size_t PcOffsetEnd = S.find(':');
+ if (PcOffsetEnd == std::string::npos)
+ continue;
+ S.resize(PcOffsetEnd);
+ uintptr_t PcOffset = std::stoull(S, 0, 16);
if (!std::binary_search(CoveredOffsets.begin(), CoveredOffsets.end(),
PcOffset)) {
uintptr_t PC = ModuleOffset + PcOffset;
@@ -196,8 +208,19 @@ void TracePC::PrintCoverage() {
}
}
+inline ALWAYS_INLINE uintptr_t GetPreviousInstructionPc(uintptr_t PC) {
+ // TODO: this implementation is x86 only.
+ // see sanitizer_common GetPreviousInstructionPc for full implementation.
+ return PC - 1;
+}
+
void TracePC::DumpCoverage() {
- __sanitizer_dump_coverage(PCs, GetNumPCs());
+ if (EF->__sanitizer_dump_coverage) {
+ std::vector<uintptr_t> PCsCopy(GetNumPCs());
+ for (size_t i = 0; i < GetNumPCs(); i++)
+ PCsCopy[i] = PCs()[i] ? GetPreviousInstructionPc(PCs()[i]) : 0;
+ EF->__sanitizer_dump_coverage(PCsCopy.data(), PCsCopy.size());
+ }
}
// Value profile.
@@ -210,97 +233,118 @@ void TracePC::DumpCoverage() {
// For cmp instructions the interesting value is a XOR of the parameters.
// The interesting value is mixed up with the PC and is then added to the map.
-ATTRIBUTE_NO_SANITIZE_MEMORY
+ATTRIBUTE_NO_SANITIZE_ALL
void TracePC::AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2,
- size_t n) {
+ size_t n, bool StopAtZero) {
if (!n) return;
- size_t Len = std::min(n, (size_t)32);
- const uint8_t *A1 = reinterpret_cast<const uint8_t *>(s1);
- const uint8_t *A2 = reinterpret_cast<const uint8_t *>(s2);
- size_t I = 0;
- for (; I < Len; I++)
- if (A1[I] != A2[I])
- break;
- size_t PC = reinterpret_cast<size_t>(caller_pc);
- size_t Idx = I;
- // if (I < Len)
- // Idx += __builtin_popcountl((A1[I] ^ A2[I])) - 1;
- TPC.HandleValueProfile((PC & 4095) | (Idx << 12));
-}
-
-ATTRIBUTE_NO_SANITIZE_MEMORY
-void TracePC::AddValueForStrcmp(void *caller_pc, const char *s1, const char *s2,
- size_t n) {
- if (!n) return;
- size_t Len = std::min(n, (size_t)32);
+ size_t Len = std::min(n, Word::GetMaxSize());
const uint8_t *A1 = reinterpret_cast<const uint8_t *>(s1);
const uint8_t *A2 = reinterpret_cast<const uint8_t *>(s2);
+ uint8_t B1[Word::kMaxSize];
+ uint8_t B2[Word::kMaxSize];
+ // Copy the data into locals in this non-msan-instrumented function
+ // to avoid msan complaining further.
+ size_t Hash = 0; // Compute some simple hash of both strings.
+ for (size_t i = 0; i < Len; i++) {
+ B1[i] = A1[i];
+ B2[i] = A2[i];
+ size_t T = B1[i];
+ Hash ^= (T << 8) | B2[i];
+ }
size_t I = 0;
for (; I < Len; I++)
- if (A1[I] != A2[I] || A1[I] == 0)
+ if (B1[I] != B2[I] || (StopAtZero && B1[I] == 0))
break;
size_t PC = reinterpret_cast<size_t>(caller_pc);
- size_t Idx = I;
- // if (I < Len && A1[I])
- // Idx += __builtin_popcountl((A1[I] ^ A2[I])) - 1;
- TPC.HandleValueProfile((PC & 4095) | (Idx << 12));
+ size_t Idx = (PC & 4095) | (I << 12);
+ ValueProfileMap.AddValue(Idx);
+ TORCW.Insert(Idx ^ Hash, Word(B1, Len), Word(B2, Len));
}
template <class T>
-ATTRIBUTE_TARGET_POPCNT
-#ifdef __clang__ // g++ can't handle this __attribute__ here :(
-__attribute__((always_inline))
-#endif // __clang__
-void TracePC::HandleCmp(void *PC, T Arg1, T Arg2) {
- uintptr_t PCuint = reinterpret_cast<uintptr_t>(PC);
+ATTRIBUTE_TARGET_POPCNT ALWAYS_INLINE
+ATTRIBUTE_NO_SANITIZE_ALL
+void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) {
uint64_t ArgXor = Arg1 ^ Arg2;
- uint64_t ArgDistance = __builtin_popcountl(ArgXor) + 1; // [1,65]
- uintptr_t Idx = ((PCuint & 4095) + 1) * ArgDistance;
+ uint64_t ArgDistance = __builtin_popcountll(ArgXor) + 1; // [1,65]
+ uintptr_t Idx = ((PC & 4095) + 1) * ArgDistance;
if (sizeof(T) == 4)
TORC4.Insert(ArgXor, Arg1, Arg2);
else if (sizeof(T) == 8)
TORC8.Insert(ArgXor, Arg1, Arg2);
- HandleValueProfile(Idx);
+ ValueProfileMap.AddValue(Idx);
}
} // namespace fuzzer
extern "C" {
-__attribute__((visibility("default")))
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
void __sanitizer_cov_trace_pc_guard(uint32_t *Guard) {
- uintptr_t PC = (uintptr_t)__builtin_return_address(0);
- fuzzer::TPC.HandleTrace(Guard, PC);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ uint32_t Idx = *Guard;
+ __sancov_trace_pc_pcs[Idx] = PC;
+ __sancov_trace_pc_guard_8bit_counters[Idx]++;
+}
+
+// Best-effort support for -fsanitize-coverage=trace-pc, which is available
+// in both Clang and GCC.
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+void __sanitizer_cov_trace_pc() {
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ uintptr_t Idx = PC & (((uintptr_t)1 << fuzzer::TracePC::kTracePcBits) - 1);
+ __sancov_trace_pc_pcs[Idx] = PC;
+ __sancov_trace_pc_guard_8bit_counters[Idx]++;
}
-__attribute__((visibility("default")))
+ATTRIBUTE_INTERFACE
void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) {
fuzzer::TPC.HandleInit(Start, Stop);
}
-__attribute__((visibility("default")))
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) {
- uintptr_t PC = (uintptr_t)__builtin_return_address(0);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
fuzzer::TPC.HandleCallerCallee(PC, Callee);
}
-__attribute__((visibility("default")))
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) {
- fuzzer::TPC.HandleCmp(__builtin_return_address(0), Arg1, Arg2);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
}
-__attribute__((visibility("default")))
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
- fuzzer::TPC.HandleCmp(__builtin_return_address(0), Arg1, Arg2);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
}
-__attribute__((visibility("default")))
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) {
- fuzzer::TPC.HandleCmp(__builtin_return_address(0), Arg1, Arg2);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
}
-__attribute__((visibility("default")))
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) {
- fuzzer::TPC.HandleCmp(__builtin_return_address(0), Arg1, Arg2);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ fuzzer::TPC.HandleCmp(PC, Arg1, Arg2);
}
-__attribute__((visibility("default")))
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {
uint64_t N = Cases[0];
uint64_t ValSizeInBits = Cases[1];
@@ -308,7 +352,7 @@ void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {
// Skip the most common and the most boring case.
if (Vals[N - 1] < 256 && Val < 256)
return;
- char *PC = (char*)__builtin_return_address(0);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
size_t i;
uint64_t Token = 0;
for (i = 0; i < N; i++) {
@@ -325,17 +369,27 @@ void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {
fuzzer::TPC.HandleCmp(PC + i, Token, (uint64_t)(0));
}
-__attribute__((visibility("default")))
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_div4(uint32_t Val) {
- fuzzer::TPC.HandleCmp(__builtin_return_address(0), Val, (uint32_t)0);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ fuzzer::TPC.HandleCmp(PC, Val, (uint32_t)0);
}
-__attribute__((visibility("default")))
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_div8(uint64_t Val) {
- fuzzer::TPC.HandleCmp(__builtin_return_address(0), Val, (uint64_t)0);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ fuzzer::TPC.HandleCmp(PC, Val, (uint64_t)0);
}
-__attribute__((visibility("default")))
+
+ATTRIBUTE_INTERFACE
+ATTRIBUTE_NO_SANITIZE_ALL
+ATTRIBUTE_TARGET_POPCNT
void __sanitizer_cov_trace_gep(uintptr_t Idx) {
- fuzzer::TPC.HandleCmp(__builtin_return_address(0), Idx, (uintptr_t)0);
+ uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+ fuzzer::TPC.HandleCmp(PC, Idx, (uintptr_t)0);
}
-
} // extern "C"
diff --git a/lib/Fuzzer/FuzzerTracePC.h b/lib/Fuzzer/FuzzerTracePC.h
index b6b26b6c9af8..6523fa06005c 100644
--- a/lib/Fuzzer/FuzzerTracePC.h
+++ b/lib/Fuzzer/FuzzerTracePC.h
@@ -13,7 +13,9 @@
#define LLVM_FUZZER_TRACE_PC
#include "FuzzerDefs.h"
+#include "FuzzerDictionary.h"
#include "FuzzerValueBitMap.h"
+
#include <set>
namespace fuzzer {
@@ -31,7 +33,8 @@ struct TableOfRecentCompares {
struct Pair {
T A, B;
};
- void Insert(size_t Idx, T Arg1, T Arg2) {
+ ATTRIBUTE_NO_SANITIZE_ALL
+ void Insert(size_t Idx, const T &Arg1, const T &Arg2) {
Idx = Idx % kSize;
Table[Idx].A = Arg1;
Table[Idx].B = Arg2;
@@ -44,25 +47,23 @@ struct TableOfRecentCompares {
class TracePC {
public:
- static const size_t kFeatureSetSize = ValueBitMap::kNumberOfItems;
+ static const size_t kNumPCs = 1 << 21;
+ // How many bits of PC are used from __sanitizer_cov_trace_pc.
+ static const size_t kTracePcBits = 18;
- void HandleTrace(uint32_t *guard, uintptr_t PC);
void HandleInit(uint32_t *start, uint32_t *stop);
void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee);
- void HandleValueProfile(size_t Value) { ValueProfileMap.AddValue(Value); }
- template <class T> void HandleCmp(void *PC, T Arg1, T Arg2);
+ template <class T> void HandleCmp(uintptr_t PC, T Arg1, T Arg2);
size_t GetTotalPCCoverage();
void SetUseCounters(bool UC) { UseCounters = UC; }
void SetUseValueProfile(bool VP) { UseValueProfile = VP; }
void SetPrintNewPCs(bool P) { DoPrintNewPCs = P; }
- template <class Callback> size_t CollectFeatures(Callback CB);
- bool UpdateValueProfileMap(ValueBitMap *MaxValueProfileMap) {
- return UseValueProfile && MaxValueProfileMap->MergeFrom(ValueProfileMap);
- }
+ template <class Callback> void CollectFeatures(Callback CB) const;
void ResetMaps() {
ValueProfileMap.Reset();
- memset(Counters, 0, sizeof(Counters));
+ memset(Counters(), 0, GetNumPCs());
+ ClearExtraCounters();
}
void UpdateFeatureSet(size_t CurrentElementIdx, size_t CurrentElementSize);
@@ -74,22 +75,20 @@ class TracePC {
void DumpCoverage();
void AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2,
- size_t n);
- void AddValueForStrcmp(void *caller_pc, const char *s1, const char *s2,
- size_t n);
-
- bool UsingTracePcGuard() const {return NumModules; }
+ size_t n, bool StopAtZero);
- static const size_t kTORCSize = 1 << 5;
- TableOfRecentCompares<uint32_t, kTORCSize> TORC4;
- TableOfRecentCompares<uint64_t, kTORCSize> TORC8;
+ TableOfRecentCompares<uint32_t, 32> TORC4;
+ TableOfRecentCompares<uint64_t, 32> TORC8;
+ TableOfRecentCompares<Word, 32> TORCW;
void PrintNewPCs();
void InitializePrintNewPCs();
- size_t GetNumPCs() const { return Min(kNumPCs, NumGuards + 1); }
+ size_t GetNumPCs() const {
+ return NumGuards == 0 ? (1 << kTracePcBits) : Min(kNumPCs, NumGuards + 1);
+ }
uintptr_t GetPC(size_t Idx) {
assert(Idx < GetNumPCs());
- return PCs[Idx];
+ return PCs()[Idx];
}
private:
@@ -105,51 +104,55 @@ private:
size_t NumModules; // linker-initialized.
size_t NumGuards; // linker-initialized.
- static const size_t kNumCounters = 1 << 14;
- alignas(8) uint8_t Counters[kNumCounters];
-
- static const size_t kNumPCs = 1 << 24;
- uintptr_t PCs[kNumPCs];
+ uint8_t *Counters() const;
+ uintptr_t *PCs() const;
std::set<uintptr_t> *PrintedPCs;
ValueBitMap ValueProfileMap;
};
-template <class Callback>
-size_t TracePC::CollectFeatures(Callback CB) {
- if (!UsingTracePcGuard()) return 0;
- size_t Res = 0;
- const size_t Step = 8;
- assert(reinterpret_cast<uintptr_t>(Counters) % Step == 0);
- size_t N = Min(kNumCounters, NumGuards + 1);
- N = (N + Step - 1) & ~(Step - 1); // Round up.
- for (size_t Idx = 0; Idx < N; Idx += Step) {
- uint64_t Bundle = *reinterpret_cast<uint64_t*>(&Counters[Idx]);
- if (!Bundle) continue;
- for (size_t i = Idx; i < Idx + Step; i++) {
- uint8_t Counter = (Bundle >> ((i - Idx) * 8)) & 0xff;
- if (!Counter) continue;
- Counters[i] = 0;
- unsigned Bit = 0;
- /**/ if (Counter >= 128) Bit = 7;
- else if (Counter >= 32) Bit = 6;
- else if (Counter >= 16) Bit = 5;
- else if (Counter >= 8) Bit = 4;
- else if (Counter >= 4) Bit = 3;
- else if (Counter >= 3) Bit = 2;
- else if (Counter >= 2) Bit = 1;
- size_t Feature = (i * 8 + Bit);
- if (CB(Feature))
- Res++;
- }
- }
+template <class Callback> // void Callback(size_t Idx, uint8_t Value);
+ATTRIBUTE_NO_SANITIZE_ALL
+void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End,
+ size_t FirstFeature, Callback Handle8bitCounter) {
+ typedef uintptr_t LargeType;
+ const size_t Step = sizeof(LargeType) / sizeof(uint8_t);
+ assert(!(reinterpret_cast<uintptr_t>(Begin) % 64));
+ for (auto P = Begin; P < End; P += Step)
+ if (LargeType Bundle = *reinterpret_cast<const LargeType *>(P))
+ for (size_t I = 0; I < Step; I++, Bundle >>= 8)
+ if (uint8_t V = Bundle & 0xff)
+ Handle8bitCounter(FirstFeature + P - Begin + I, V);
+}
+
+template <class Callback> // bool Callback(size_t Feature)
+ATTRIBUTE_NO_SANITIZE_ALL
+__attribute__((noinline))
+void TracePC::CollectFeatures(Callback HandleFeature) const {
+ uint8_t *Counters = this->Counters();
+ size_t N = GetNumPCs();
+ auto Handle8bitCounter = [&](size_t Idx, uint8_t Counter) {
+ assert(Counter);
+ unsigned Bit = 0;
+ /**/ if (Counter >= 128) Bit = 7;
+ else if (Counter >= 32) Bit = 6;
+ else if (Counter >= 16) Bit = 5;
+ else if (Counter >= 8) Bit = 4;
+ else if (Counter >= 4) Bit = 3;
+ else if (Counter >= 3) Bit = 2;
+ else if (Counter >= 2) Bit = 1;
+ HandleFeature(Idx * 8 + Bit);
+ };
+
+ ForEachNonZeroByte(Counters, Counters + N, 0, Handle8bitCounter);
+ ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), N * 8,
+ Handle8bitCounter);
+
if (UseValueProfile)
ValueProfileMap.ForEach([&](size_t Idx) {
- if (CB(NumGuards * 8 + Idx))
- Res++;
+ HandleFeature(N * 8 + Idx);
});
- return Res;
}
extern TracePC TPC;
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index 2ad9702fab0e..a486223d650c 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -13,7 +13,6 @@
#include "FuzzerInternal.h"
#include "FuzzerIO.h"
#include "FuzzerMutate.h"
-#include "FuzzerRandom.h"
#include "FuzzerTracePC.h"
#include <algorithm>
#include <cstring>
@@ -23,19 +22,10 @@
namespace fuzzer {
-// For now, very simple: put Size bytes of Data at position Pos.
-struct TraceBasedMutation {
- uint32_t Pos;
- Word W;
-};
-
// Declared as static globals for faster checks inside the hooks.
-static bool RecordingMemcmp = false;
static bool RecordingMemmem = false;
-static bool DoingMyOwnMemmem = false;
-ScopedDoingMyOwnMemmem::ScopedDoingMyOwnMemmem() { DoingMyOwnMemmem = true; }
-ScopedDoingMyOwnMemmem::~ScopedDoingMyOwnMemmem() { DoingMyOwnMemmem = false; }
+int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr;
class TraceState {
public:
@@ -43,64 +33,21 @@ public:
const Fuzzer *F)
: MD(MD), Options(Options), F(F) {}
- void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
- const uint8_t *Data2);
-
- int TryToAddDesiredData(const uint8_t *PresentData,
- const uint8_t *DesiredData, size_t DataSize);
-
void StartTraceRecording() {
- if (!Options.UseMemcmp)
+ if (!Options.UseMemmem)
return;
- RecordingMemcmp = Options.UseMemcmp;
- RecordingMemmem = Options.UseMemmem;
- NumMutations = 0;
+ RecordingMemmem = true;
InterestingWords.clear();
MD.ClearAutoDictionary();
}
void StopTraceRecording() {
- if (!RecordingMemcmp)
+ if (!RecordingMemmem)
return;
- RecordingMemcmp = false;
- for (size_t i = 0; i < NumMutations; i++) {
- auto &M = Mutations[i];
- if (Options.Verbosity >= 2) {
- AutoDictUnitCounts[M.W]++;
- AutoDictAdds++;
- if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
- typedef std::pair<size_t, Word> CU;
- std::vector<CU> CountedUnits;
- for (auto &I : AutoDictUnitCounts)
- CountedUnits.push_back(std::make_pair(I.second, I.first));
- std::sort(CountedUnits.begin(), CountedUnits.end(),
- [](const CU &a, const CU &b) { return a.first > b.first; });
- Printf("AutoDict:\n");
- for (auto &I : CountedUnits) {
- Printf(" %zd ", I.first);
- PrintASCII(I.second.data(), I.second.size());
- Printf("\n");
- }
- }
- }
- MD.AddWordToAutoDictionary({M.W, M.Pos});
- }
for (auto &W : InterestingWords)
MD.AddWordToAutoDictionary({W});
}
- void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
- if (NumMutations >= kMaxMutations) return;
- auto &M = Mutations[NumMutations++];
- M.Pos = Pos;
- M.W.Set(Data, Size);
- }
-
- void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
- assert(Size <= sizeof(Data));
- AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
- }
-
void AddInterestingWord(const uint8_t *Data, size_t Size) {
if (!RecordingMemmem || !F->InFuzzingThread()) return;
if (Size <= 1) return;
@@ -110,75 +57,14 @@ public:
}
private:
- bool IsTwoByteData(uint64_t Data) {
- int64_t Signed = static_cast<int64_t>(Data);
- Signed >>= 16;
- return Signed == 0 || Signed == -1L;
- }
-
- // We don't want to create too many trace-based mutations as it is both
- // expensive and useless. So after some number of mutations is collected,
- // start rejecting some of them. The more there are mutations the more we
- // reject.
- bool WantToHandleOneMoreMutation() {
- const size_t FirstN = 64;
- // Gladly handle first N mutations.
- if (NumMutations <= FirstN) return true;
- size_t Diff = NumMutations - FirstN;
- size_t DiffLog = sizeof(long) * 8 - __builtin_clzl((long)Diff);
- assert(DiffLog > 0 && DiffLog < 64);
- bool WantThisOne = MD.GetRand()(1 << DiffLog) == 0; // 1 out of DiffLog.
- return WantThisOne;
- }
- static const size_t kMaxMutations = 1 << 16;
- size_t NumMutations;
- TraceBasedMutation Mutations[kMaxMutations];
// TODO: std::set is too inefficient, need to have a custom DS here.
std::set<Word> InterestingWords;
MutationDispatcher &MD;
const FuzzingOptions Options;
const Fuzzer *F;
- std::map<Word, size_t> AutoDictUnitCounts;
- size_t AutoDictAdds = 0;
};
-int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
- const uint8_t *DesiredData,
- size_t DataSize) {
- if (NumMutations >= kMaxMutations || !WantToHandleOneMoreMutation()) return 0;
- ScopedDoingMyOwnMemmem scoped_doing_my_own_memmem;
- const uint8_t *UnitData;
- auto UnitSize = F->GetCurrentUnitInFuzzingThead(&UnitData);
- int Res = 0;
- const uint8_t *Beg = UnitData;
- const uint8_t *End = Beg + UnitSize;
- for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
- Cur = (uint8_t *)SearchMemory(Cur, End - Cur, PresentData, DataSize);
- if (!Cur)
- break;
- size_t Pos = Cur - Beg;
- assert(Pos < UnitSize);
- AddMutation(Pos, DataSize, DesiredData);
- Res++;
- }
- return Res;
-}
-
-void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
- const uint8_t *Data2) {
- if (!RecordingMemcmp || !F->InFuzzingThread()) return;
- CmpSize = std::min(CmpSize, Word::GetMaxSize());
- int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
- int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
- if ((Added1 || Added2) && Options.Verbosity >= 3) {
- Printf("MemCmp Added %d%d: ", Added1, Added2);
- if (Added1) PrintASCII(Data1, CmpSize);
- if (Added2) PrintASCII(Data2, CmpSize);
- Printf("\n");
- }
-}
-
static TraceState *TS;
void Fuzzer::StartTraceRecording() {
@@ -192,7 +78,7 @@ void Fuzzer::StopTraceRecording() {
}
void Fuzzer::InitializeTraceState() {
- if (!Options.UseMemcmp) return;
+ if (!Options.UseMemmem) return;
TS = new TraceState(MD, Options, this);
}
@@ -202,10 +88,17 @@ static size_t InternalStrnlen(const char *S, size_t MaxLen) {
return Len;
}
+// Finds min of (strlen(S1), strlen(S2)).
+// Needed bacause one of these strings may actually be non-zero terminated.
+static size_t InternalStrnlen2(const char *S1, const char *S2) {
+ size_t Len = 0;
+ for (; S1[Len] && S2[Len]; Len++) {}
+ return Len;
+}
+
} // namespace fuzzer
using fuzzer::TS;
-using fuzzer::RecordingMemcmp;
extern "C" {
@@ -215,62 +108,72 @@ extern "C" {
#endif
#if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
const void *s2, size_t n, int result) {
- fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n);
- if (!RecordingMemcmp) return;
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
if (result == 0) return; // No reason to mutate.
if (n <= 1) return; // Not interesting.
- TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
- reinterpret_cast<const uint8_t *>(s2));
+ fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false);
}
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
const char *s2, size_t n, int result) {
- fuzzer::TPC.AddValueForStrcmp(caller_pc, s1, s2, n);
- if (!RecordingMemcmp) return;
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
if (result == 0) return; // No reason to mutate.
size_t Len1 = fuzzer::InternalStrnlen(s1, n);
size_t Len2 = fuzzer::InternalStrnlen(s2, n);
n = std::min(n, Len1);
n = std::min(n, Len2);
if (n <= 1) return; // Not interesting.
- TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
- reinterpret_cast<const uint8_t *>(s2));
+ fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true);
}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
const char *s2, int result) {
- fuzzer::TPC.AddValueForStrcmp(caller_pc, s1, s2, 64);
- if (!RecordingMemcmp) return;
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
if (result == 0) return; // No reason to mutate.
- size_t Len1 = strlen(s1);
- size_t Len2 = strlen(s2);
- size_t N = std::min(Len1, Len2);
+ size_t N = fuzzer::InternalStrnlen2(s1, s2);
if (N <= 1) return; // Not interesting.
- TS->TraceMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
- reinterpret_cast<const uint8_t *>(s2));
+ fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true);
}
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1,
const char *s2, size_t n, int result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result);
}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1,
const char *s2, int result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result);
}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1,
const char *s2, char *result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1,
const char *s2, char *result) {
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), strlen(s2));
}
+
+ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY
void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1,
const void *s2, size_t len2, void *result) {
- if (fuzzer::DoingMyOwnMemmem) return;
+ if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return;
TS->AddInterestingWord(reinterpret_cast<const uint8_t *>(s2), len2);
}
diff --git a/lib/Fuzzer/FuzzerUtil.h b/lib/Fuzzer/FuzzerUtil.h
index 08058c56e4c5..f84fd9ef0fce 100644
--- a/lib/Fuzzer/FuzzerUtil.h
+++ b/lib/Fuzzer/FuzzerUtil.h
@@ -67,6 +67,10 @@ inline std::string CloneArgsWithoutX(const std::vector<std::string> &Args,
return CloneArgsWithoutX(Args, X, X);
}
+std::string DisassembleCmd(const std::string &FileName);
+
+std::string SearchRegexCmd(const std::string &Regex);
+
} // namespace fuzzer
#endif // LLVM_FUZZER_UTIL_H
diff --git a/lib/Fuzzer/FuzzerUtilPosix.cpp b/lib/Fuzzer/FuzzerUtilPosix.cpp
index e8d48dc81a3b..0161309fbf86 100644
--- a/lib/Fuzzer/FuzzerUtilPosix.cpp
+++ b/lib/Fuzzer/FuzzerUtilPosix.cpp
@@ -118,6 +118,14 @@ const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt,
return memmem(Data, DataLen, Patt, PattLen);
}
+std::string DisassembleCmd(const std::string &FileName) {
+ return "objdump -d " + FileName;
+}
+
+std::string SearchRegexCmd(const std::string &Regex) {
+ return "grep '" + Regex + "'";
+}
+
} // namespace fuzzer
#endif // LIBFUZZER_POSIX
diff --git a/lib/Fuzzer/FuzzerUtilWindows.cpp b/lib/Fuzzer/FuzzerUtilWindows.cpp
index 3ca1f2c8f562..08bb3cf3be15 100644
--- a/lib/Fuzzer/FuzzerUtilWindows.cpp
+++ b/lib/Fuzzer/FuzzerUtilWindows.cpp
@@ -28,7 +28,7 @@ namespace fuzzer {
static const FuzzingOptions* HandlerOpt = nullptr;
-LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) {
+static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) {
switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
case EXCEPTION_ACCESS_VIOLATION:
case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
@@ -126,10 +126,7 @@ void SetSignalHandler(const FuzzingOptions& Options) {
if (Options.HandleSegv || Options.HandleBus || Options.HandleIll ||
Options.HandleFpe)
- if (!AddVectoredExceptionHandler(1, ExceptionHandler)) {
- Printf("libFuzzer: AddVectoredExceptionHandler failed.\n");
- exit(1);
- }
+ SetUnhandledExceptionFilter(ExceptionHandler);
if (Options.HandleAbrt)
if (SIG_ERR == signal(SIGABRT, CrashHandler)) {
@@ -178,6 +175,17 @@ const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt,
return NULL;
}
+std::string DisassembleCmd(const std::string &FileName) {
+ if (ExecuteCommand("dumpbin /summary > nul") == 0)
+ return "dumpbin /disasm " + FileName;
+ Printf("libFuzzer: couldn't find tool to disassemble (dumpbin)\n");
+ exit(1);
+}
+
+std::string SearchRegexCmd(const std::string &Regex) {
+ return "findstr /r \"" + Regex + "\"";
+}
+
} // namespace fuzzer
#endif // LIBFUZZER_WINDOWS
diff --git a/lib/Fuzzer/FuzzerValueBitMap.h b/lib/Fuzzer/FuzzerValueBitMap.h
index 0692acd13ee3..8f7ff74300f4 100644
--- a/lib/Fuzzer/FuzzerValueBitMap.h
+++ b/lib/Fuzzer/FuzzerValueBitMap.h
@@ -18,19 +18,20 @@ namespace fuzzer {
// A bit map containing kMapSizeInWords bits.
struct ValueBitMap {
- static const size_t kMapSizeInBits = 65371; // Prime.
- static const size_t kMapSizeInBitsAligned = 65536; // 2^16
+ static const size_t kMapSizeInBits = 1 << 16;
+ static const size_t kMapPrimeMod = 65371; // Largest Prime < kMapSizeInBits;
static const size_t kBitsInWord = (sizeof(uintptr_t) * 8);
- static const size_t kMapSizeInWords = kMapSizeInBitsAligned / kBitsInWord;
+ static const size_t kMapSizeInWords = kMapSizeInBits / kBitsInWord;
public:
- static const size_t kNumberOfItems = kMapSizeInBits;
+
// Clears all bits.
void Reset() { memset(Map, 0, sizeof(Map)); }
// Computes a hash function of Value and sets the corresponding bit.
// Returns true if the bit was changed from 0 to 1.
+ ATTRIBUTE_NO_SANITIZE_ALL
inline bool AddValue(uintptr_t Value) {
- uintptr_t Idx = Value < kMapSizeInBits ? Value : Value % kMapSizeInBits;
+ uintptr_t Idx = Value % kMapSizeInBits;
uintptr_t WordIdx = Idx / kBitsInWord;
uintptr_t BitIdx = Idx % kBitsInWord;
uintptr_t Old = Map[WordIdx];
@@ -39,6 +40,11 @@ struct ValueBitMap {
return New != Old;
}
+ ATTRIBUTE_NO_SANITIZE_ALL
+ inline bool AddValueModPrime(uintptr_t Value) {
+ return AddValue(Value % kMapPrimeMod);
+ }
+
inline bool Get(uintptr_t Idx) {
assert(Idx < kMapSizeInBits);
uintptr_t WordIdx = Idx / kBitsInWord;
@@ -62,14 +68,15 @@ struct ValueBitMap {
Other.Map[i] = 0;
}
if (M)
- Res += __builtin_popcountl(M);
+ Res += __builtin_popcountll(M);
}
NumBits = Res;
return OldNumBits < NumBits;
}
template <class Callback>
- void ForEach(Callback CB) {
+ ATTRIBUTE_NO_SANITIZE_ALL
+ void ForEach(Callback CB) const {
for (size_t i = 0; i < kMapSizeInWords; i++)
if (uintptr_t M = Map[i])
for (size_t j = 0; j < sizeof(M) * 8; j++)
diff --git a/lib/Fuzzer/afl/afl_driver.cpp b/lib/Fuzzer/afl/afl_driver.cpp
index fc9589552ba3..b3a54e57fceb 100644
--- a/lib/Fuzzer/afl/afl_driver.cpp
+++ b/lib/Fuzzer/afl/afl_driver.cpp
@@ -238,6 +238,13 @@ static void maybe_duplicate_stderr() {
}
}
+// Define LLVMFuzzerMutate to avoid link failures for targets that use it
+// with libFuzzer's LLVMFuzzerCustomMutator.
+extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
+ assert(false && "LLVMFuzzerMutate should not be called from afl_driver");
+ return 0;
+}
+
int main(int argc, char **argv) {
fprintf(stderr, "======================= INFO =========================\n"
"This binary is built for AFL-fuzz.\n"
diff --git a/lib/Fuzzer/build.sh b/lib/Fuzzer/build.sh
index 27c148ad43db..4556af5daf7d 100755
--- a/lib/Fuzzer/build.sh
+++ b/lib/Fuzzer/build.sh
@@ -1,7 +1,8 @@
#!/bin/bash
LIBFUZZER_SRC_DIR=$(dirname $0)
+CXX="${CXX:-clang}"
for f in $LIBFUZZER_SRC_DIR/*.cpp; do
- clang -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c &
+ $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c &
done
wait
rm -f libFuzzer.a
diff --git a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
index 577481431ae2..69b0d59fb8ef 100644
--- a/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
+++ b/lib/Fuzzer/test/AbsNegAndConstant64Test.cpp
@@ -14,7 +14,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
uint64_t y;
memcpy(&x, Data, sizeof(x));
memcpy(&y, Data + sizeof(x), sizeof(y));
- if (labs(x) < 0 && y == 0xbaddcafedeadbeefUL) {
+ if (llabs(x) < 0 && y == 0xbaddcafedeadbeefULL) {
printf("BINGO; Found the target, exiting; x = 0x%lx y 0x%lx\n", x, y);
exit(1);
}
diff --git a/lib/Fuzzer/test/BadStrcmpTest.cpp b/lib/Fuzzer/test/BadStrcmpTest.cpp
new file mode 100644
index 000000000000..159cd7ea5f70
--- /dev/null
+++ b/lib/Fuzzer/test/BadStrcmpTest.cpp
@@ -0,0 +1,19 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that we don't creash in case of bad strcmp params.
+#include <cstdint>
+#include <cstring>
+#include <cstddef>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size != 10) return 0;
+ // Data is not zero-terminated, so this call is bad.
+ // Still, there are cases when such calles appear, see e.g.
+ // https://bugs.llvm.org/show_bug.cgi?id=32357
+ Sink = strcmp(reinterpret_cast<const char*>(Data), "123456789");
+ return 0;
+}
+
diff --git a/lib/Fuzzer/test/BogusInitializeTest.cpp b/lib/Fuzzer/test/BogusInitializeTest.cpp
new file mode 100644
index 000000000000..c7e81a5478b2
--- /dev/null
+++ b/lib/Fuzzer/test/BogusInitializeTest.cpp
@@ -0,0 +1,15 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Make sure LLVMFuzzerInitialize does not change argv[0].
+#include <stddef.h>
+#include <stdint.h>
+
+extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
+ ***argv = 'X';
+ return 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ return 0;
+}
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index c0457746a0e7..f72bc3909a3c 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -11,21 +11,35 @@ set(variables_to_filter
LIBFUZZER_FLAGS_BASE
)
foreach (VARNAME ${variables_to_filter})
- string(REPLACE " " ";" BUILD_FLAGS_AS_LIST "${${VARNAME}}")
- set(new_flags "")
- foreach (flag ${BUILD_FLAGS_AS_LIST})
- # NOTE: Use of XX here is to avoid a CMake warning due to CMP0054
- if (NOT ("XX${flag}" MATCHES "XX-O[0123s]"))
- set(new_flags "${new_flags} ${flag}")
- else()
- set(new_flags "${new_flags} -O0")
- endif()
- endforeach()
- set(${VARNAME} "${new_flags}")
+ string(REGEX REPLACE "([-/]O)[123s]" "\\10" ${VARNAME} "${${VARNAME}}")
endforeach()
# Enable the coverage instrumentation (it is disabled for the Fuzzer lib).
-set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp,trace-div,trace-gep -g")
+set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp,trace-div,trace-gep -gline-tables-only")
+
+if(MSVC)
+ # For tests use the CRT specified for release build
+ # (asan doesn't support MDd and MTd)
+ if ("${LLVM_USE_CRT_RELEASE}" STREQUAL "")
+ set(CRT_FLAG " /MD ")
+ else()
+ set(CRT_FLAG " /${LLVM_USE_CRT_RELEASE} ")
+ endif()
+ # In order to use the sanitizers in Windows, we need to link against many
+ # runtime libraries which will depend on the target being created
+ # (executable or dll) and the c runtime library used (MT/MD).
+ # By default, cmake uses link.exe for linking, which fails because we don't
+ # specify the appropiate dependencies.
+ # As we don't want to consider all of that possible situations which depends
+ # on the implementation of the compiler-rt, the simplest option is to change
+ # the rules for linking executables and shared libraries, using the compiler
+ # instead of link.exe. Clang will consider the sanitizer flags, and
+ # automatically provide the required libraries to the linker.
+ set(CMAKE_CXX_LINK_EXECUTABLE "<CMAKE_CXX_COMPILER> <FLAGS> ${CMAKE_CXX_FLAGS} ${CRT_FLAG} <OBJECTS> -o <TARGET> <LINK_LIBRARIES> /link <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS>")
+ set(CMAKE_CXX_CREATE_SHARED_LIBRARY "<CMAKE_CXX_COMPILER> ${CMAKE_CXX_FLAGS} ${CRT_FLAG} /LD <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG> <TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES> /link <LINK_FLAGS>")
+endif()
+
+add_custom_target(TestBinaries)
# add_libfuzzer_test(<name>
# SOURCES source0.cpp [source1.cpp ...]
@@ -51,12 +65,9 @@ function(add_libfuzzer_test name)
PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"${CMAKE_BINARY_DIR}/lib/Fuzzer/test"
)
- set(TestBinaries ${TestBinaries} LLVMFuzzer-${name} PARENT_SCOPE)
+ add_dependencies(TestBinaries LLVMFuzzer-${name})
endfunction()
-# Variable to keep track of all test targets
-set(TestBinaries)
-
###############################################################################
# Basic tests
###############################################################################
@@ -65,16 +76,23 @@ set(Tests
AbsNegAndConstantTest
AbsNegAndConstant64Test
AccumulateAllocationsTest
+ BadStrcmpTest
+ BogusInitializeTest
BufferOverflowOnInput
CallerCalleeTest
CounterTest
+ CustomCrossOverAndMutateTest
CustomCrossOverTest
CustomMutatorTest
+ CxxStringEqTest
DivTest
EmptyTest
+ EquivalenceATest
+ EquivalenceBTest
FourIndependentBranchesTest
FullCoverageSetTest
InitializeTest
+ Memcmp64BytesTest
MemcmpTest
LeakTest
LeakTimeoutTest
@@ -92,6 +110,7 @@ set(Tests
SimpleHashTest
SimpleTest
SimpleThreadedTest
+ SingleByteInputTest
SingleMemcmpTest
SingleStrcmpTest
SingleStrncmpTest
@@ -105,17 +124,19 @@ set(Tests
SwapCmpTest
SwitchTest
Switch2Test
+ TableLookupTest
ThreadedLeakTest
ThreadedTest
TimeoutTest
TimeoutEmptyTest
TraceMallocTest
+ TwoDifferentBugsTest
)
-if(APPLE)
- # LeakSanitizer is not supported on OSX right now
+if(APPLE OR MSVC)
+ # LeakSanitizer is not supported on OSX and Windows right now
set(HAS_LSAN 0)
- message(WARNING "LeakSanitizer is not supported on Apple platforms."
+ message(WARNING "LeakSanitizer is not supported."
" Building and running LibFuzzer LeakSanitizer tests is disabled."
)
else()
@@ -126,6 +147,17 @@ foreach(Test ${Tests})
add_libfuzzer_test(${Test} SOURCES ${Test}.cpp)
endforeach()
+function(test_export_symbol target symbol)
+ if(MSVC)
+ set_target_properties(LLVMFuzzer-${target} PROPERTIES LINK_FLAGS
+ "-export:${symbol}")
+ endif()
+endfunction()
+
+test_export_symbol(InitializeTest "LLVMFuzzerInitialize")
+test_export_symbol(BogusInitializeTest "LLVMFuzzerInitialize")
+test_export_symbol(CustomCrossOverTest "LLVMFuzzerCustomCrossOver")
+test_export_symbol(CustomMutatorTest "LLVMFuzzerCustomMutator")
###############################################################################
# Unit tests
@@ -150,13 +182,13 @@ target_include_directories(LLVMFuzzer-Unittest PRIVATE
"${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include"
)
-set(TestBinaries ${TestBinaries} LLVMFuzzer-Unittest)
+add_dependencies(TestBinaries LLVMFuzzer-Unittest)
set_target_properties(LLVMFuzzer-Unittest
PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}"
)
-set(TestBinaries ${TestBinaries} LLVMFuzzer-StandaloneInitializeTest)
+add_dependencies(TestBinaries LLVMFuzzer-StandaloneInitializeTest)
set_target_properties(LLVMFuzzer-StandaloneInitializeTest
PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}"
@@ -170,6 +202,7 @@ include_directories(..)
# add_subdirectory(uninstrumented)
add_subdirectory(no-coverage)
+add_subdirectory(trace-pc)
add_subdirectory(ubsan)
add_library(LLVMFuzzer-DSO1 SHARED DSO1.cpp)
@@ -187,12 +220,22 @@ target_link_libraries(LLVMFuzzer-DSOTest
set_target_properties(LLVMFuzzer-DSOTest PROPERTIES RUNTIME_OUTPUT_DIRECTORY
"${CMAKE_BINARY_DIR}/lib/Fuzzer/test")
-set_target_properties(LLVMFuzzer-DSO1 PROPERTIES LIBRARY_OUTPUT_DIRECTORY
- "${CMAKE_BINARY_DIR}/lib/Fuzzer/lib")
-set_target_properties(LLVMFuzzer-DSO2 PROPERTIES LIBRARY_OUTPUT_DIRECTORY
- "${CMAKE_BINARY_DIR}/lib/Fuzzer/lib")
-set(TestBinaries ${TestBinaries} LLVMFuzzer-DSOTest)
+if(MSVC)
+ set_output_directory(LLVMFuzzer-DSO1
+ BINARY_DIR "${CMAKE_BINARY_DIR}/lib/Fuzzer/test"
+ LIBRARY_DIR "${CMAKE_BINARY_DIR}/lib/Fuzzer/test")
+ set_output_directory(LLVMFuzzer-DSO2
+ BINARY_DIR "${CMAKE_BINARY_DIR}/lib/Fuzzer/test"
+ LIBRARY_DIR "${CMAKE_BINARY_DIR}/lib/Fuzzer/test")
+else(MSVC)
+ set_output_directory(LLVMFuzzer-DSO1
+ LIBRARY_DIR "${CMAKE_BINARY_DIR}/lib/Fuzzer/lib")
+ set_output_directory(LLVMFuzzer-DSO2
+ LIBRARY_DIR "${CMAKE_BINARY_DIR}/lib/Fuzzer/lib")
+endif()
+
+add_dependencies(TestBinaries LLVMFuzzer-DSOTest)
###############################################################################
# Configure lit to run the tests
@@ -200,6 +243,10 @@ set(TestBinaries ${TestBinaries} LLVMFuzzer-DSOTest)
# Note this is done after declaring all tests so we can inform lit if any tests
# need to be disabled.
###############################################################################
+set(LIBFUZZER_POSIX 1)
+if (MSVC)
+ set(LIBFUZZER_POSIX 0)
+endif()
configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
@@ -213,5 +260,11 @@ configure_lit_site_cfg(
add_lit_testsuite(check-fuzzer "Running Fuzzer tests"
${CMAKE_CURRENT_BINARY_DIR}
- DEPENDS ${TestBinaries} FileCheck not
+ DEPENDS TestBinaries
)
+
+# Don't add dependencies on Windows. The linker step would fail on Windows,
+# since cmake will use link.exe for linking and won't include compiler-rt libs.
+if(NOT MSVC)
+ add_dependencies(check-fuzzer FileCheck sancov not)
+endif()
diff --git a/lib/Fuzzer/test/CustomCrossOverAndMutateTest.cpp b/lib/Fuzzer/test/CustomCrossOverAndMutateTest.cpp
new file mode 100644
index 000000000000..74fc939534ca
--- /dev/null
+++ b/lib/Fuzzer/test/CustomCrossOverAndMutateTest.cpp
@@ -0,0 +1,34 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test that libFuzzer does not crash when LLVMFuzzerMutate called from
+// LLVMFuzzerCustomCrossOver.
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <string.h>
+#include <string>
+#include <vector>
+
+#include "FuzzerInterface.h"
+
+static volatile int sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ std::string Str(reinterpret_cast<const char *>(Data), Size);
+ if (Size && Data[0] == '0')
+ sink++;
+ return 0;
+}
+
+extern "C" size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1,
+ const uint8_t *Data2, size_t Size2,
+ uint8_t *Out, size_t MaxOutSize,
+ unsigned int Seed) {
+ std::vector<uint8_t> Buffer(MaxOutSize * 10);
+ LLVMFuzzerMutate(Buffer.data(), Buffer.size(), Buffer.size());
+ size_t Size = std::min(Size1, MaxOutSize);
+ memcpy(Out, Data1, Size);
+ return Size;
+}
diff --git a/lib/Fuzzer/test/CxxStringEqTest.cpp b/lib/Fuzzer/test/CxxStringEqTest.cpp
new file mode 100644
index 000000000000..e0e23c972ccb
--- /dev/null
+++ b/lib/Fuzzer/test/CxxStringEqTest.cpp
@@ -0,0 +1,25 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. Must find a specific string
+// used in std::string operator ==.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <string>
+#include <iostream>
+
+static volatile int Sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ std::string Str((const char*)Data, Size);
+ bool Eq = Str == "FooBar";
+ Sink = Str == "123456"; // Try to confuse the fuzzer
+ if (Eq) {
+ std::cout << "BINGO; Found the target, exiting\n";
+ std::cout.flush();
+ abort();
+ }
+ return 0;
+}
+
diff --git a/lib/Fuzzer/test/DSO1.cpp b/lib/Fuzzer/test/DSO1.cpp
index 4a293890f4b0..72a5ec4a0cde 100644
--- a/lib/Fuzzer/test/DSO1.cpp
+++ b/lib/Fuzzer/test/DSO1.cpp
@@ -2,7 +2,9 @@
// License. See LICENSE.TXT for details.
// Source code for a simple DSO.
-
+#ifdef _WIN32
+__declspec( dllexport )
+#endif
int DSO1(int a) {
if (a < 123456)
return 0;
diff --git a/lib/Fuzzer/test/DSO2.cpp b/lib/Fuzzer/test/DSO2.cpp
index 04b308d193ac..2967055dc227 100644
--- a/lib/Fuzzer/test/DSO2.cpp
+++ b/lib/Fuzzer/test/DSO2.cpp
@@ -2,7 +2,9 @@
// License. See LICENSE.TXT for details.
// Source code for a simple DSO.
-
+#ifdef _WIN32
+__declspec( dllexport )
+#endif
int DSO2(int a) {
if (a < 3598235)
return 0;
diff --git a/lib/Fuzzer/test/EquivalenceATest.cpp b/lib/Fuzzer/test/EquivalenceATest.cpp
new file mode 100644
index 000000000000..7d1ebb0f6a4a
--- /dev/null
+++ b/lib/Fuzzer/test/EquivalenceATest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+// Test for libFuzzer's "equivalence" fuzzing, part A.
+extern "C" void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size);
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ // fprintf(stderr, "A %zd\n", Size);
+ uint8_t Result[50];
+ if (Size > 50) Size = 50;
+ for (size_t i = 0; i < Size; i++)
+ Result[Size - i - 1] = Data[i];
+ LLVMFuzzerAnnounceOutput(Result, Size);
+ return 0;
+}
diff --git a/lib/Fuzzer/test/EquivalenceBTest.cpp b/lib/Fuzzer/test/EquivalenceBTest.cpp
new file mode 100644
index 000000000000..b1de208b57f6
--- /dev/null
+++ b/lib/Fuzzer/test/EquivalenceBTest.cpp
@@ -0,0 +1,27 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+// Test for libFuzzer's "equivalence" fuzzing, part B.
+extern "C" void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size);
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ // fprintf(stderr, "B %zd\n", Size);
+ uint8_t Result[50];
+ if (Size > 50) Size = 50;
+ for (size_t i = 0; i < Size; i++)
+ Result[Size - i - 1] = Data[i];
+
+ // Be a bit different from EquivalenceATest
+ if (Size > 10 && Data[5] == 'B' && Data[6] == 'C' && Data[7] == 'D') {
+ static int c;
+ if (!c)
+ fprintf(stderr, "ZZZZZZZ\n");
+ c = 1;
+ Result[2]++;
+ }
+
+ LLVMFuzzerAnnounceOutput(Result, Size);
+ return 0;
+}
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index 4992ef57b6ca..78ea874f2ce2 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -10,10 +10,12 @@
#include "FuzzerDictionary.h"
#include "FuzzerMerge.h"
#include "FuzzerMutate.h"
+#include "FuzzerTracePC.h"
#include "FuzzerRandom.h"
#include "gtest/gtest.h"
#include <memory>
#include <set>
+#include <sstream>
using namespace fuzzer;
@@ -584,15 +586,15 @@ TEST(FuzzerUtil, Base64) {
TEST(Corpus, Distribution) {
Random Rand(0);
- InputCorpus C("");
+ std::unique_ptr<InputCorpus> C(new InputCorpus(""));
size_t N = 10;
size_t TriesPerUnit = 1<<16;
for (size_t i = 0; i < N; i++)
- C.AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 0);
+ C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 0);
std::vector<size_t> Hist(N);
for (size_t i = 0; i < N * TriesPerUnit; i++) {
- Hist[C.ChooseUnitIdxToMutate(Rand)]++;
+ Hist[C->ChooseUnitIdxToMutate(Rand)]++;
}
for (size_t i = 0; i < N; i++) {
// A weak sanity check that every unit gets invoked.
@@ -636,7 +638,10 @@ static void Merge(const std::string &Input,
Merger M;
std::vector<std::string> NewFiles;
EXPECT_TRUE(M.Parse(Input, true));
+ std::stringstream SS;
+ M.PrintSummary(SS);
EXPECT_EQ(NumNewFeatures, M.Merge(&NewFiles));
+ EXPECT_EQ(M.AllFeatures(), M.ParseSummary(SS));
EQ(NewFiles, Result);
}
@@ -706,6 +711,16 @@ TEST(Merge, Good) {
EQ(M.Files[2].Features, {1, 3, 6});
EXPECT_EQ(3U, M.Merge(&NewFiles));
EQ(NewFiles, {"B"});
+
+ // Same as the above, but with InitialFeatures.
+ EXPECT_TRUE(M.Parse("2\n0\nB\nC\n"
+ "STARTED 0 1001\nDONE 0 4 5 6 \n"
+ "STARTED 1 1002\nDONE 1 6 1 3\n"
+ "", true));
+ EQ(M.Files[0].Features, {4, 5, 6});
+ EQ(M.Files[1].Features, {1, 3, 6});
+ EXPECT_EQ(3U, M.Merge({1, 2, 3}, &NewFiles));
+ EQ(NewFiles, {"B"});
}
TEST(Merge, Merge) {
@@ -736,3 +751,25 @@ TEST(Merge, Merge) {
"STARTED 3 1000\nDONE 3 1 \n",
{"B", "D"}, 3);
}
+
+TEST(Fuzzer, ForEachNonZeroByte) {
+ const size_t N = 64;
+ alignas(64) uint8_t Ar[N + 8] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 2, 0, 0, 0, 0, 0, 0,
+ 0, 0, 3, 0, 4, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 5, 0, 6, 0, 0,
+ 0, 0, 0, 0, 0, 0, 7, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 8,
+ 9, 9, 9, 9, 9, 9, 9, 9,
+ };
+ typedef std::vector<std::pair<size_t, uint8_t> > Vec;
+ Vec Res, Expected;
+ auto CB = [&](size_t Idx, uint8_t V) { Res.push_back({Idx, V}); };
+ ForEachNonZeroByte(Ar, Ar + N, 100, CB);
+ Expected = {{108, 1}, {109, 2}, {118, 3}, {120, 4},
+ {135, 5}, {137, 6}, {146, 7}, {163, 8}};
+ EXPECT_EQ(Res, Expected);
+}
diff --git a/lib/Fuzzer/test/LargeTest.cpp b/lib/Fuzzer/test/LargeTest.cpp
new file mode 100644
index 000000000000..83ed61971801
--- /dev/null
+++ b/lib/Fuzzer/test/LargeTest.cpp
@@ -0,0 +1,37 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// A fuzz target with lots of edges.
+#include <cstdint>
+#include <cstdlib>
+
+static inline void break_optimization(const void *arg) {
+ __asm__ __volatile__("" : : "r" (arg) : "memory");
+}
+
+#define A \
+ do { \
+ i++; \
+ c++; \
+ if (Data[(i + __LINE__) % Size] == (c % 256)) \
+ break_optimization(Data); \
+ else \
+ break_optimization(0); \
+ } while (0)
+
+// for (int i = 0, n = Data[(__LINE__ - 1) % Size] % 16; i < n; i++)
+
+#define B do{A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; A; }while(0)
+#define C do{B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; B; }while(0)
+#define D do{C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; C; }while(0)
+#define E do{D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; D; }while(0)
+
+volatile int sink;
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (!Size) return 0;
+ int c = 0;
+ int i = 0;
+ D;
+ return 0;
+}
+
diff --git a/lib/Fuzzer/test/LoadTest.cpp b/lib/Fuzzer/test/LoadTest.cpp
index c1780d5c7bd9..eef16c7be51e 100644
--- a/lib/Fuzzer/test/LoadTest.cpp
+++ b/lib/Fuzzer/test/LoadTest.cpp
@@ -14,7 +14,7 @@ int array[kArraySize];
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if (Size < 8) return 0;
- size_t a = 0;
+ uint64_t a = 0;
memcpy(&a, Data, 8);
Sink = array[a % (kArraySize + 1)];
return 0;
diff --git a/lib/Fuzzer/test/Memcmp64BytesTest.cpp b/lib/Fuzzer/test/Memcmp64BytesTest.cpp
new file mode 100644
index 000000000000..e81526b578a3
--- /dev/null
+++ b/lib/Fuzzer/test/Memcmp64BytesTest.cpp
@@ -0,0 +1,20 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cassert>
+#include <cstring>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ const char kString64Bytes[] =
+ "123456789 123456789 123456789 123456789 123456789 123456789 1234";
+ assert(sizeof(kString64Bytes) == 65);
+ if (Size >= 64 && memcmp(Data, kString64Bytes, 64) == 0) {
+ fprintf(stderr, "BINGO\n");
+ exit(1);
+ }
+ return 0;
+}
diff --git a/lib/Fuzzer/test/UninstrumentedTest.cpp b/lib/Fuzzer/test/NotinstrumentedTest.cpp
index ffe952c749d2..ffe952c749d2 100644
--- a/lib/Fuzzer/test/UninstrumentedTest.cpp
+++ b/lib/Fuzzer/test/NotinstrumentedTest.cpp
diff --git a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp
index ea23a601aa23..316b7682b8e6 100644
--- a/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp
+++ b/lib/Fuzzer/test/OutOfMemorySingleLargeMallocTest.cpp
@@ -15,7 +15,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if (Size > 0 && Data[0] == 'H') {
if (Size > 1 && Data[1] == 'i') {
if (Size > 2 && Data[2] == '!') {
- size_t kSize = 0xff000000U;
+ size_t kSize = 0x20000000U;
char *p = new char[kSize];
SinkPtr = p;
delete [] p;
diff --git a/lib/Fuzzer/test/RepeatedMemcmp.cpp b/lib/Fuzzer/test/RepeatedMemcmp.cpp
index a327bbee7815..7377f65ed76d 100644
--- a/lib/Fuzzer/test/RepeatedMemcmp.cpp
+++ b/lib/Fuzzer/test/RepeatedMemcmp.cpp
@@ -8,13 +8,16 @@
#include <cstdlib>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
- int Matches = 0;
- for (size_t i = 0; i + 2 < Size; i += 3) {
- const char *Pat = i % 2 ? "foo" : "bar";
- if (!memcmp(Data + i, Pat, 3))
- Matches++;
- }
- if (Matches > 20) {
+ int Matches1 = 0;
+ for (size_t i = 0; i + 2 < Size; i += 3)
+ if (!memcmp(Data + i, "foo", 3))
+ Matches1++;
+ int Matches2 = 0;
+ for (size_t i = 0; i + 2 < Size; i += 3)
+ if (!memcmp(Data + i, "bar", 3))
+ Matches2++;
+
+ if (Matches1 > 10 && Matches2 > 10) {
fprintf(stderr, "BINGO!\n");
exit(1);
}
diff --git a/lib/Fuzzer/test/SimpleCmpTest.cpp b/lib/Fuzzer/test/SimpleCmpTest.cpp
index 0220c30f9a6b..12b5cdda0660 100644
--- a/lib/Fuzzer/test/SimpleCmpTest.cpp
+++ b/lib/Fuzzer/test/SimpleCmpTest.cpp
@@ -26,12 +26,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
memcpy(&y, Data + 8, 8); // 16
memcpy(&z, Data + 16, sizeof(z)); // 20
memcpy(&a, Data + 20, sizeof(a)); // 22
+ const bool k32bit = sizeof(void*) == 4;
- if (x > 1234567890 && PrintOnce(__LINE__) &&
- x < 1234567895 && PrintOnce(__LINE__) &&
+ if ((k32bit || x > 1234567890) && PrintOnce(__LINE__) &&
+ (k32bit || x < 1234567895) && PrintOnce(__LINE__) &&
a == 0x4242 && PrintOnce(__LINE__) &&
- y >= 987654321 && PrintOnce(__LINE__) &&
- y <= 987654325 && PrintOnce(__LINE__) &&
+ (k32bit || y >= 987654321) && PrintOnce(__LINE__) &&
+ (k32bit || y <= 987654325) && PrintOnce(__LINE__) &&
z < -10000 && PrintOnce(__LINE__) &&
z >= -10005 && PrintOnce(__LINE__) &&
z != -10003 && PrintOnce(__LINE__) &&
diff --git a/lib/Fuzzer/test/SingleByteInputTest.cpp b/lib/Fuzzer/test/SingleByteInputTest.cpp
new file mode 100644
index 000000000000..4ce819d230ce
--- /dev/null
+++ b/lib/Fuzzer/test/SingleByteInputTest.cpp
@@ -0,0 +1,17 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer, need just one byte to crash.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <cstdio>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size > 0 && Data[Size/2] == 42) {
+ fprintf(stderr, "BINGO\n");
+ abort();
+ }
+ return 0;
+}
+
diff --git a/lib/Fuzzer/test/SingleStrcmpTest.cpp b/lib/Fuzzer/test/SingleStrcmpTest.cpp
index 73470b527eeb..48f481dfc51a 100644
--- a/lib/Fuzzer/test/SingleStrcmpTest.cpp
+++ b/lib/Fuzzer/test/SingleStrcmpTest.cpp
@@ -8,10 +8,14 @@
#include <cstdlib>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
- char *S = (char*)Data;
- if (Size >= 7 && !strcmp(S, "qwerty")) {
- fprintf(stderr, "BINGO\n");
- exit(1);
+ if (Size >= 7) {
+ char Copy[7];
+ memcpy(Copy, Data, 6);
+ Copy[6] = 0;
+ if (!strcmp(Copy, "qwerty")) {
+ fprintf(stderr, "BINGO\n");
+ exit(1);
+ }
}
return 0;
}
diff --git a/lib/Fuzzer/test/SingleStrncmpTest.cpp b/lib/Fuzzer/test/SingleStrncmpTest.cpp
index dbcc464b0a78..e5601da86329 100644
--- a/lib/Fuzzer/test/SingleStrncmpTest.cpp
+++ b/lib/Fuzzer/test/SingleStrncmpTest.cpp
@@ -9,7 +9,8 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
char *S = (char*)Data;
- if (Size >= 6 && !strncmp(S, "qwerty", 6)) {
+ volatile auto Strncmp = &(strncmp); // Make sure strncmp is not inlined.
+ if (Size >= 6 && !Strncmp(S, "qwerty", 6)) {
fprintf(stderr, "BINGO\n");
exit(1);
}
diff --git a/lib/Fuzzer/test/SwapCmpTest.cpp b/lib/Fuzzer/test/SwapCmpTest.cpp
index f79db4ccf714..b90ac72c22c4 100644
--- a/lib/Fuzzer/test/SwapCmpTest.cpp
+++ b/lib/Fuzzer/test/SwapCmpTest.cpp
@@ -19,8 +19,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
x = __builtin_bswap64(x);
y = __builtin_bswap32(y);
z = __builtin_bswap16(z);
+ const bool k32bit = sizeof(void*) == 4;
- if (x == 0x46555A5A5A5A5546ULL &&
+ if ((k32bit || x == 0x46555A5A5A5A5546ULL) &&
z == 0x4F4B &&
y == 0x66757A7A &&
true
diff --git a/lib/Fuzzer/test/TableLookupTest.cpp b/lib/Fuzzer/test/TableLookupTest.cpp
new file mode 100644
index 000000000000..f9d5610820ff
--- /dev/null
+++ b/lib/Fuzzer/test/TableLookupTest.cpp
@@ -0,0 +1,45 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Make sure the fuzzer eventually finds all possible values of a variable
+// within a range.
+#include <cstring>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+#include <set>
+
+const size_t N = 1 << 12;
+
+// Define an array of counters that will be understood by libFuzzer
+// as extra coverage signal. The array must be:
+// * uint8_t
+// * aligned by 64
+// * in the section named __libfuzzer_extra_counters.
+// The target code may declare more than one such array.
+//
+// Use either `Counters[Idx] = 1` or `Counters[Idx]++;`
+// depending on whether multiple occurrences of the event 'Idx'
+// is important to distinguish from one occurrence.
+#ifdef __linux__
+alignas(64) __attribute__((section("__libfuzzer_extra_counters")))
+#endif
+static uint8_t Counters[N];
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ static std::set<uint16_t> SeenIdx;
+ if (Size != 4) return 0;
+ uint32_t Idx;
+ memcpy(&Idx, Data, 4);
+ Idx %= N;
+ assert(Counters[Idx] == 0); // libFuzzer should reset these between the runs.
+ // Or Counters[Idx]=1 if we don't care how many times this happened.
+ Counters[Idx]++;
+ SeenIdx.insert(Idx);
+ if (SeenIdx.size() == N) {
+ fprintf(stderr, "BINGO: found all values\n");
+ abort();
+ }
+ return 0;
+}
diff --git a/lib/Fuzzer/test/TwoDifferentBugsTest.cpp b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp
new file mode 100644
index 000000000000..42c0d192ba86
--- /dev/null
+++ b/lib/Fuzzer/test/TwoDifferentBugsTest.cpp
@@ -0,0 +1,22 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Simple test for a fuzzer. This test may trigger two different bugs.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <iostream>
+
+static volatile int *Null = 0;
+
+void Foo() { Null[1] = 0; }
+void Bar() { Null[2] = 0; }
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+ if (Size < 10 && Data[0] == 'H')
+ Foo();
+ if (Size >= 10 && Data[0] == 'H')
+ Bar();
+ return 0;
+}
+
diff --git a/lib/Fuzzer/test/afl-driver-extra-stats.test b/lib/Fuzzer/test/afl-driver-extra-stats.test
index 81e384e7dad2..1b0818e55ea5 100644
--- a/lib/Fuzzer/test/afl-driver-extra-stats.test
+++ b/lib/Fuzzer/test/afl-driver-extra-stats.test
@@ -1,3 +1,5 @@
+REQUIRES: posix
+
; Test that not specifying an extra stats file isn't broken.
RUN: unset AFL_DRIVER_EXTRA_STATS_FILENAME
RUN: AFLDriverTest
diff --git a/lib/Fuzzer/test/afl-driver-stderr.test b/lib/Fuzzer/test/afl-driver-stderr.test
index c0f9c8398c2a..e835acd4275b 100644
--- a/lib/Fuzzer/test/afl-driver-stderr.test
+++ b/lib/Fuzzer/test/afl-driver-stderr.test
@@ -1,3 +1,5 @@
+REQUIRES: posix
+
; Test that not specifying a stderr file isn't broken.
RUN: unset AFL_DRIVER_STDERR_DUPLICATE_FILENAME
RUN: AFLDriverTest
diff --git a/lib/Fuzzer/test/bad-strcmp.test b/lib/Fuzzer/test/bad-strcmp.test
new file mode 100644
index 000000000000..9a2f3742a5f4
--- /dev/null
+++ b/lib/Fuzzer/test/bad-strcmp.test
@@ -0,0 +1 @@
+RUN: LLVMFuzzer-BadStrcmpTest -runs=100000
diff --git a/lib/Fuzzer/test/coverage.test b/lib/Fuzzer/test/coverage.test
index fa11be502ef9..ff3fdff57a3d 100644
--- a/lib/Fuzzer/test/coverage.test
+++ b/lib/Fuzzer/test/coverage.test
@@ -1,9 +1,11 @@
+XFAIL: darwin
+
CHECK: COVERAGE:
CHECK-DAG: COVERED: {{.*}}in LLVMFuzzerTestOneInput {{.*}}NullDerefTest.cpp:13
CHECK-DAG: COVERED: {{.*}}in LLVMFuzzerTestOneInput {{.*}}NullDerefTest.cpp:14
CHECK-DAG: COVERED: {{.*}}in LLVMFuzzerTestOneInput {{.*}}NullDerefTest.cpp:16
CHECK-DAG: COVERED: {{.*}}in LLVMFuzzerTestOneInput {{.*}}NullDerefTest.cpp:19
-CHECK: COVERED_DIRS: {{.*}}lib/Fuzzer/test
+CHECK: COVERED_DIRS: {{.*}}lib{{[/\\]}}Fuzzer{{[/\\]}}test
RUN: not LLVMFuzzer-NullDerefTest -print_coverage=1 2>&1 | FileCheck %s
RUN: LLVMFuzzer-DSOTest -print_coverage=1 -runs=0 2>&1 | FileCheck %s --check-prefix=DSO
diff --git a/lib/Fuzzer/test/cxxstring.test b/lib/Fuzzer/test/cxxstring.test
new file mode 100644
index 000000000000..c60d7aee9686
--- /dev/null
+++ b/lib/Fuzzer/test/cxxstring.test
@@ -0,0 +1,2 @@
+RUN: not LLVMFuzzer-CxxStringEqTest -seed=1 -runs=1000000 2>&1 | FileCheck %s
+CHECK: BINGO
diff --git a/lib/Fuzzer/test/disable-leaks.test b/lib/Fuzzer/test/disable-leaks.test
new file mode 100644
index 000000000000..467b64ccc6f4
--- /dev/null
+++ b/lib/Fuzzer/test/disable-leaks.test
@@ -0,0 +1,4 @@
+REQUIRES: lsan
+RUN: LLVMFuzzer-AccumulateAllocationsTest -detect_leaks=1 -runs=100000 2>&1 | FileCheck %s --check-prefix=ACCUMULATE_ALLOCS
+ACCUMULATE_ALLOCS: INFO: libFuzzer disabled leak detection after every mutation
+
diff --git a/lib/Fuzzer/test/dump_coverage.test b/lib/Fuzzer/test/dump_coverage.test
index 9bd98daa3619..8acc8304fc60 100644
--- a/lib/Fuzzer/test/dump_coverage.test
+++ b/lib/Fuzzer/test/dump_coverage.test
@@ -1,16 +1,14 @@
-RUN: DIR=%t_workdir
-RUN: BUILD_DIR=$(pwd)
-RUN: rm -rf $DIR && mkdir -p $DIR && cd $DIR
-RUN: not $BUILD_DIR/LLVMFuzzer-NullDerefTest -dump_coverage=1 2>&1 | FileCheck %s
-RUN: $BUILD_DIR/LLVMFuzzer-DSOTest -dump_coverage=1 -runs=0 2>&1 | FileCheck %s --check-prefix=DSO
-RUN: not $BUILD_DIR/LLVMFuzzer-NullDerefTest -dump_coverage=0 2>&1 | FileCheck %s --check-prefix=NOCOV
-RUN: rm -rf $DIR
-
-
-CHECK: SanitizerCoverage: ./LLVMFuzzer-NullDerefTest.{{.*}}.sancov {{.*}} PCs written
-
-DSO: SanitizerCoverage: ./LLVMFuzzer-DSOTest.{{.*}}.sancov {{.*}} PCs written
-DSO-DAG: SanitizerCoverage: ./libLLVMFuzzer-DSO1.{{.*}}.sancov {{.*}} PCs written
-DSO-DAG: SanitizerCoverage: ./libLLVMFuzzer-DSO2.{{.*}}.sancov {{.*}} PCs written
+RUN: rm -rf %t_workdir && mkdir -p %t_workdir
+RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' not LLVMFuzzer-NullDerefTest -dump_coverage=1 2>&1 | FileCheck %s
+RUN: sancov -covered-functions LLVMFuzzer-NullDerefTest* %t_workdir/*.sancov | FileCheck %s --check-prefix=SANCOV
+RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' LLVMFuzzer-DSOTest -dump_coverage=1 -runs=0 2>&1 | FileCheck %s --check-prefix=DSO
+RUN: env ASAN_OPTIONS=coverage_dir='"%t_workdir"' not LLVMFuzzer-NullDerefTest -dump_coverage=0 2>&1 | FileCheck %s --check-prefix=NOCOV
+
+CHECK: SanitizerCoverage: {{.*}}LLVMFuzzer-NullDerefTest.{{.*}}.sancov {{.*}} PCs written
+SANCOV: LLVMFuzzerTestOneInput
+
+DSO: SanitizerCoverage: {{.*}}LLVMFuzzer-DSOTest.{{.*}}.sancov {{.*}} PCs written
+DSO-DAG: SanitizerCoverage: {{.*}}LLVMFuzzer-DSO1.{{.*}}.sancov {{.*}} PCs written
+DSO-DAG: SanitizerCoverage: {{.*}}LLVMFuzzer-DSO2.{{.*}}.sancov {{.*}} PCs written
NOCOV-NOT: SanitizerCoverage: {{.*}} PCs written
diff --git a/lib/Fuzzer/test/equivalence-signals.test b/lib/Fuzzer/test/equivalence-signals.test
new file mode 100644
index 000000000000..81a7f37602cc
--- /dev/null
+++ b/lib/Fuzzer/test/equivalence-signals.test
@@ -0,0 +1,9 @@
+REQUIRES: posix
+# Run EquivalenceATest against itself with a small timeout
+# to stress the signal handling and ensure that shmem doesn't mind
+# the signals.
+
+RUN: LLVMFuzzer-EquivalenceATest -timeout=1 -run_equivalence_server=EQUIV_SIG_TEST & export APID=$!
+RUN: sleep 3
+RUN: LLVMFuzzer-EquivalenceATest -timeout=1 -use_equivalence_server=EQUIV_SIG_TEST -runs=500000 2>&1
+RUN: kill -9 $APID
diff --git a/lib/Fuzzer/test/equivalence.test b/lib/Fuzzer/test/equivalence.test
new file mode 100644
index 000000000000..015ba855c600
--- /dev/null
+++ b/lib/Fuzzer/test/equivalence.test
@@ -0,0 +1,8 @@
+REQUIRES: posix
+
+RUN: LLVMFuzzer-EquivalenceATest -run_equivalence_server=EQUIV_TEST & export APID=$!
+RUN: sleep 3
+RUN: not LLVMFuzzer-EquivalenceBTest -use_equivalence_server=EQUIV_TEST -max_len=4096 2>&1 | FileCheck %s
+CHECK: ERROR: libFuzzer: equivalence-mismatch. Sizes: {{.*}}; offset 2
+CHECK: SUMMARY: libFuzzer: equivalence-mismatch
+RUN: kill -9 $APID
diff --git a/lib/Fuzzer/test/extra-counters.test b/lib/Fuzzer/test/extra-counters.test
new file mode 100644
index 000000000000..61fce44784b7
--- /dev/null
+++ b/lib/Fuzzer/test/extra-counters.test
@@ -0,0 +1,6 @@
+REQUIRES: linux
+
+RUN: not LLVMFuzzer-TableLookupTest -print_final_stats=1 2>&1 | FileCheck %s
+CHECK: BINGO
+// Expecting >= 4096 new_units_added
+CHECK: stat::new_units_added:{{.*[4][0-9][0-9][0-9]}}
diff --git a/lib/Fuzzer/test/fuzzer-customcrossover.test b/lib/Fuzzer/test/fuzzer-customcrossover.test
index 28d39ce31dec..ccf8261af8ad 100644
--- a/lib/Fuzzer/test/fuzzer-customcrossover.test
+++ b/lib/Fuzzer/test/fuzzer-customcrossover.test
@@ -2,7 +2,7 @@ RUN: rm -rf %t/CustomCrossover
RUN: mkdir -p %t/CustomCrossover
RUN: echo "0123456789" > %t/CustomCrossover/digits
RUN: echo "abcdefghij" > %t/CustomCrossover/chars
-RUN: not LLVMFuzzer-CustomCrossOverTest -seed=1 -use_memcmp=0 -runs=100000 %t/CustomCrossover 2>&1 | FileCheck %s --check-prefix=LLVMFuzzerCustomCrossover
+RUN: not LLVMFuzzer-CustomCrossOverTest -seed=1 -runs=100000 %t/CustomCrossover 2>&1 | FileCheck %s --check-prefix=LLVMFuzzerCustomCrossover
RUN: rm -rf %t/CustomCrossover
LLVMFuzzerCustomCrossover: In LLVMFuzzerCustomCrossover
diff --git a/lib/Fuzzer/test/fuzzer-customcrossoverandmutate.test b/lib/Fuzzer/test/fuzzer-customcrossoverandmutate.test
new file mode 100644
index 000000000000..1e322ec0da63
--- /dev/null
+++ b/lib/Fuzzer/test/fuzzer-customcrossoverandmutate.test
@@ -0,0 +1 @@
+RUN: LLVMFuzzer-CustomCrossOverAndMutateTest -seed=1 -runs=100000
diff --git a/lib/Fuzzer/test/fuzzer-dirs.test b/lib/Fuzzer/test/fuzzer-dirs.test
index 63afe8dfcf9c..3de64f278f5d 100644
--- a/lib/Fuzzer/test/fuzzer-dirs.test
+++ b/lib/Fuzzer/test/fuzzer-dirs.test
@@ -5,9 +5,9 @@ RUN: echo b > %t/SUB1/SUB2/b
RUN: echo c > %t/SUB1/SUB2/SUB3/c
RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
SUBDIRS: READ units: 3
-RUN: echo zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/long
+RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/long
RUN: LLVMFuzzer-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=LONG
-LONG: INFO: -max_len is not provided, using 94
+LONG: INFO: -max_len is not provided, using 93
RUN: rm -rf %t/SUB1
RUN: not LLVMFuzzer-SimpleTest NONEXISTENT_DIR 2>&1 | FileCheck %s --check-prefix=NONEXISTENT_DIR
diff --git a/lib/Fuzzer/test/fuzzer-jobs.test b/lib/Fuzzer/test/fuzzer-jobs.test
deleted file mode 100644
index 5bf8cfadfb75..000000000000
--- a/lib/Fuzzer/test/fuzzer-jobs.test
+++ /dev/null
@@ -1,29 +0,0 @@
-RUN: rm -rf %tmp
-RUN: mkdir %tmp && cd %tmp
-# Create a shared corpus directory
-RUN: rm -rf FuzzerJobsTestCORPUS
-RUN: mkdir FuzzerJobsTestCORPUS
-RUN: rm -f fuzz-{0,1}.log
-# Start fuzzer and in parallel check that the output files
-# that should be created exist.
-RUN: LLVMFuzzer-EmptyTest -max_total_time=4 -jobs=2 -workers=2 FuzzerJobsTestCORPUS > %t-fuzzer-jobs-test.log 2>&1 & export FUZZER_PID=$!
-# Wait a short while to give time for the child processes
-# to start fuzzing
-RUN: sleep 2
-# If the instances are running in parallel they should have created their log
-# files by now.
-RUN: ls fuzz-0.log
-RUN: ls fuzz-1.log
-# Wait for libfuzzer to finish.
-# This probably isn't portable but we need a way to block until
-# the fuzzer is done otherwise we might remove the files while
-# they are being used.
-RUN: while kill -0 ${FUZZER_PID}; do : ; done
-RUN: rm -f fuzz-{0,1}.log
-RUN: rm -rf FuzzerJobsTestCORPUS
-RUN: FileCheck -input-file=%t-fuzzer-jobs-test.log %s
-RUN: rm %t-fuzzer-jobs-test.log
-RUN: cd ../
-
-CHECK-DAG: Job 0 exited with exit code 0
-CHECK-DAG: Job 1 exited with exit code 0
diff --git a/lib/Fuzzer/test/fuzzer-leak.test b/lib/Fuzzer/test/fuzzer-leak.test
index 9cf5c743fff5..13e3ad740e6d 100644
--- a/lib/Fuzzer/test/fuzzer-leak.test
+++ b/lib/Fuzzer/test/fuzzer-leak.test
@@ -29,7 +29,5 @@ RUN: not LLVMFuzzer-LeakTimeoutTest -timeout=1 2>&1 | FileCheck %s --check-prefi
LEAK_TIMEOUT: ERROR: libFuzzer: timeout after
LEAK_TIMEOUT-NOT: LeakSanitizer
-RUN: LLVMFuzzer-AccumulateAllocationsTest -detect_leaks=1 -runs=100000 2>&1 | FileCheck %s --check-prefix=ACCUMULATE_ALLOCS
-ACCUMULATE_ALLOCS: INFO: libFuzzer disabled leak detection after every mutation
RUN: LLVMFuzzer-LeakTest -error_exitcode=0
diff --git a/lib/Fuzzer/test/fuzzer-oom.test b/lib/Fuzzer/test/fuzzer-oom.test
index 8caf649e9f04..e9d33552723e 100644
--- a/lib/Fuzzer/test/fuzzer-oom.test
+++ b/lib/Fuzzer/test/fuzzer-oom.test
@@ -1,10 +1,12 @@
+XFAIL: darwin
RUN: not LLVMFuzzer-OutOfMemoryTest -rss_limit_mb=300 2>&1 | FileCheck %s
+
CHECK: ERROR: libFuzzer: out-of-memory (used: {{.*}}; limit: 300Mb)
CHECK: Test unit written to ./oom-
SUMMARY: libFuzzer: out-of-memory
-RUN: not LLVMFuzzer-OutOfMemorySingleLargeMallocTest 2>&1 | FileCheck %s --check-prefix=SINGLE_LARGE_MALLOC
-SINGLE_LARGE_MALLOC: libFuzzer: out-of-memory (malloc(42{{.*}}))
+RUN: not LLVMFuzzer-OutOfMemorySingleLargeMallocTest -rss_limit_mb=300 2>&1 | FileCheck %s --check-prefix=SINGLE_LARGE_MALLOC
+SINGLE_LARGE_MALLOC: libFuzzer: out-of-memory (malloc(53{{.*}}))
SINGLE_LARGE_MALLOC: in LLVMFuzzerTestOneInput
# Check that -rss_limit_mb=0 means no limit.
diff --git a/lib/Fuzzer/test/fuzzer-segv.test b/lib/Fuzzer/test/fuzzer-segv.test
index 330f03bcc494..b9a6a5ce44ca 100644
--- a/lib/Fuzzer/test/fuzzer-segv.test
+++ b/lib/Fuzzer/test/fuzzer-segv.test
@@ -1,4 +1,4 @@
-RUN: ASAN_OPTIONS=handle_segv=0 not LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=LIBFUZZER_OWN_SEGV_HANDLER
+RUN: env ASAN_OPTIONS=handle_segv=0 not LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=LIBFUZZER_OWN_SEGV_HANDLER
LIBFUZZER_OWN_SEGV_HANDLER: == ERROR: libFuzzer: deadly signal
LIBFUZZER_OWN_SEGV_HANDLER: SUMMARY: libFuzzer: deadly signal
LIBFUZZER_OWN_SEGV_HANDLER: Test unit written to ./crash-
diff --git a/lib/Fuzzer/test/fuzzer-singleinputs.test b/lib/Fuzzer/test/fuzzer-singleinputs.test
index ca8403bff81f..500e5da8faa9 100644
--- a/lib/Fuzzer/test/fuzzer-singleinputs.test
+++ b/lib/Fuzzer/test/fuzzer-singleinputs.test
@@ -8,7 +8,7 @@ RUN: echo bbb > %tmp/SINGLE_INPUTS/bbb
RUN: LLVMFuzzer-SimpleTest %tmp/SINGLE_INPUTS/aaa %tmp/SINGLE_INPUTS/bbb 2>&1 | FileCheck %s --check-prefix=SINGLE_INPUTS
RUN: LLVMFuzzer-SimpleTest -max_len=2 %tmp/SINGLE_INPUTS/aaa %tmp/SINGLE_INPUTS/bbb 2>&1 | FileCheck %s --check-prefix=SINGLE_INPUTS
RUN: rm -rf %tmp/SINGLE_INPUTS
-SINGLE_INPUTS: LLVMFuzzer-SimpleTest: Running 2 inputs 1 time(s) each.
+SINGLE_INPUTS: LLVMFuzzer-SimpleTest{{.*}}: Running 2 inputs 1 time(s) each.
SINGLE_INPUTS: aaa in
SINGLE_INPUTS: bbb in
SINGLE_INPUTS: NOTE: fuzzing was not performed, you have only
diff --git a/lib/Fuzzer/test/fuzzer-traces-hooks.test b/lib/Fuzzer/test/fuzzer-traces-hooks.test
index 71fe6f2daf11..f93a8b7199e2 100644
--- a/lib/Fuzzer/test/fuzzer-traces-hooks.test
+++ b/lib/Fuzzer/test/fuzzer-traces-hooks.test
@@ -1,25 +1,17 @@
-// FIXME: Support sanitizer hooks for memcmp and strcmp need
-// to be implemented in the sanitizer runtime for platforms other
-// than linux
-REQUIRES: linux
+// FIXME: Support for sanitizer hooks for memcmp and strcmp needs to
+// be implemented in the sanitizer runtime for this test
+UNSUPPORTED: windows
CHECK: BINGO
-Done1000000: Done 1000000 runs in
-RUN: not LLVMFuzzer-MemcmpTest -seed=4294967295 -runs=100000 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-MemcmpTest -use_memcmp=0 -seed=4294967295 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
+RUN: not LLVMFuzzer-MemcmpTest -seed=1 -runs=2000000 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-StrncmpTest -seed=1 -runs=2000000 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-StrcmpTest -seed=1 -runs=2000000 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-StrstrTest -seed=1 -runs=2000000 2>&1 | FileCheck %s
-RUN: not LLVMFuzzer-StrncmpTest -seed=2 -runs=100000 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-StrncmpTest -use_memcmp=0 -seed=3 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
+RUN: not LLVMFuzzer-Memcmp64BytesTest -seed=1 -runs=1000000 2>&1 | FileCheck %s
-RUN: not LLVMFuzzer-StrcmpTest -seed=4 -runs=200000 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-StrcmpTest -use_memcmp=0 -seed=5 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
-
-RUN: not LLVMFuzzer-StrstrTest -seed=6 -runs=200000 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-StrstrTest -use_memmem=0 -seed=7 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
-
-RUN: LLVMFuzzer-RepeatedMemcmp -seed=10 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
+RUN: LLVMFuzzer-RepeatedMemcmp -seed=11 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
RECOMMENDED_DICT:###### Recommended dictionary. ######
RECOMMENDED_DICT-DAG: "foo"
RECOMMENDED_DICT-DAG: "bar"
RECOMMENDED_DICT:###### End of recommended dictionary. ######
-
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index 2f91c2195ca9..ff46d32b387d 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -11,7 +11,7 @@ MaxTotalTime: Done {{.*}} runs in {{.}} second(s)
RUN: not LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=NullDerefTest
RUN: not LLVMFuzzer-NullDerefTest -close_fd_mask=3 2>&1 | FileCheck %s --check-prefix=NullDerefTest
-NullDerefTest: ERROR: AddressSanitizer: SEGV on unknown address
+NullDerefTest: ERROR: AddressSanitizer: {{SEGV|access-violation}} on unknown address
NullDerefTest: Test unit written to ./crash-
RUN: not LLVMFuzzer-NullDerefTest -artifact_prefix=ZZZ 2>&1 | FileCheck %s --check-prefix=NullDerefTestPrefix
NullDerefTestPrefix: Test unit written to ZZZcrash-
@@ -34,7 +34,7 @@ COUNTERS: BINGO
DISABLED: not LLVMFuzzer-UninstrumentedTest-Uninstrumented 2>&1 | FileCheck %s --check-prefix=UNINSTRUMENTED
UNINSTRUMENTED: ERROR: __sanitizer_set_death_callback is not defined. Exiting.
-RUN: not LLVMFuzzer-UninstrumentedTest-NoCoverage 2>&1 | FileCheck %s --check-prefix=NO_COVERAGE
+RUN: not LLVMFuzzer-NotinstrumentedTest-NoCoverage 2>&1 | FileCheck %s --check-prefix=NO_COVERAGE
NO_COVERAGE: ERROR: no interesting inputs were found. Is the code instrumented for coverage? Exiting
RUN: not LLVMFuzzer-BufferOverflowOnInput 2>&1 | FileCheck %s --check-prefix=OOB
@@ -51,7 +51,10 @@ RUN: LLVMFuzzer-SimpleTest -exit_on_src_pos=SimpleTest.cpp:17 2
RUN: LLVMFuzzer-ShrinkControlFlowTest -exit_on_src_pos=ShrinkControlFlowTest.cpp:23 2>&1 | FileCheck %s --check-prefix=EXIT_ON_SRC_POS
EXIT_ON_SRC_POS: INFO: found line matching '{{.*}}', exiting.
-RUN: ASAN_OPTIONS=strict_string_checks=1 not LLVMFuzzer-StrncmpOOBTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=STRNCMP
+RUN: env ASAN_OPTIONS=strict_string_checks=1 not LLVMFuzzer-StrncmpOOBTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=STRNCMP
STRNCMP: AddressSanitizer: heap-buffer-overflow
STRNCMP-NOT: __sanitizer_weak_hook_strncmp
STRNCMP: in LLVMFuzzerTestOneInput
+
+RUN: not LLVMFuzzer-BogusInitializeTest 2>&1 | FileCheck %s --check-prefix=BOGUS_INITIALIZE
+BOGUS_INITIALIZE: argv[0] has been modified in LLVMFuzzerInitialize
diff --git a/lib/Fuzzer/test/lit.cfg b/lib/Fuzzer/test/lit.cfg
index 745af0c38245..85c95b42d1ea 100644
--- a/lib/Fuzzer/test/lit.cfg
+++ b/lib/Fuzzer/test/lit.cfg
@@ -6,6 +6,23 @@ config.test_format = lit.formats.ShTest(True)
config.suffixes = ['.test']
config.test_source_root = os.path.dirname(__file__)
+# Choose between lit's internal shell pipeline runner and a real shell. If
+# LIT_USE_INTERNAL_SHELL is in the environment, we use that as an override.
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+if use_lit_shell:
+ # 0 is external, "" is default, and everything else is internal.
+ execute_external = (use_lit_shell == "0")
+else:
+ # Otherwise we default to internal on Windows and external elsewhere, as
+ # bash on Windows is usually very slow.
+ execute_external = (not sys.platform in ['win32'])
+
+# testFormat: The test format to use to interpret tests.
+#
+# For now we require '&&' between commands, until they get globally killed and
+# the test runner updated.
+config.test_format = lit.formats.ShTest(execute_external)
+
# Tweak PATH to include llvm tools dir and current exec dir.
llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
if (not llvm_tools_dir) or (not os.path.exists(llvm_tools_dir)):
@@ -20,6 +37,15 @@ if config.has_lsan:
else:
lit_config.note('lsan feature unavailable')
+if sys.platform.startswith('win') or sys.platform.startswith('cygwin'):
+ config.available_features.add('windows')
+
+if sys.platform.startswith('darwin'):
+ config.available_features.add('darwin')
+
+if config.is_posix:
+ config.available_features.add('posix')
+
if sys.platform.startswith('linux'):
# Note the value of ``sys.platform`` is not consistent
# between python 2 and 3, hence the use of ``.startswith()``.
diff --git a/lib/Fuzzer/test/lit.site.cfg.in b/lib/Fuzzer/test/lit.site.cfg.in
index 03e86c487ca9..069f2b72c0d9 100644
--- a/lib/Fuzzer/test/lit.site.cfg.in
+++ b/lib/Fuzzer/test/lit.site.cfg.in
@@ -1,4 +1,5 @@
config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
config.has_lsan = True if @HAS_LSAN@ == 1 else False
+config.is_posix = @LIBFUZZER_POSIX@
lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/lib/Fuzzer/test/merge-posix.test b/lib/Fuzzer/test/merge-posix.test
new file mode 100644
index 000000000000..47b90b986791
--- /dev/null
+++ b/lib/Fuzzer/test/merge-posix.test
@@ -0,0 +1,23 @@
+REQUIRES: posix
+
+RUN: rm -rf %tmp/T1 %tmp/T2
+RUN: mkdir -p %tmp/T1 %tmp/T2
+
+RUN: echo F..... > %tmp/T1/1
+RUN: echo .U.... > %tmp/T1/2
+RUN: echo ..Z... > %tmp/T1/3
+
+RUN: echo .....F > %tmp/T2/1
+RUN: echo ....U. > %tmp/T2/2
+RUN: echo ...Z.. > %tmp/T2/3
+RUN: echo ...Z.. > %tmp/T2/4
+RUN: echo ....E. > %tmp/T2/5
+RUN: echo .....R > %tmp/T2/6
+
+# Check that we can report an error if file size exceeded
+RUN: (ulimit -f 1; not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=SIGXFSZ)
+SIGXFSZ: ERROR: libFuzzer: file size exceeded
+
+# Check that we honor TMPDIR
+RUN: TMPDIR=DIR_DOES_NOT_EXIST not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=TMPDIR
+TMPDIR: MERGE-OUTER: failed to write to the control file: DIR_DOES_NOT_EXIST/libFuzzerTemp
diff --git a/lib/Fuzzer/test/merge-summary.test b/lib/Fuzzer/test/merge-summary.test
new file mode 100644
index 000000000000..df9d62dec636
--- /dev/null
+++ b/lib/Fuzzer/test/merge-summary.test
@@ -0,0 +1,15 @@
+RUN: rm -rf %t/T1 %t/T2
+RUN: mkdir -p %t/T0 %t/T1 %t/T2
+RUN: echo ...Z.. > %t/T2/1
+RUN: echo ....E. > %t/T2/2
+RUN: echo .....R > %t/T2/3
+RUN: echo F..... > %t/T2/a
+RUN: echo .U.... > %t/T2/b
+RUN: echo ..Z... > %t/T2/c
+
+RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %t/T1 %t/T2 -save_coverage_summary=%t/SUMMARY 2>&1 | FileCheck %s --check-prefix=SAVE_SUMMARY
+SAVE_SUMMARY: MERGE-OUTER: writing coverage summary for 6 files to {{.*}}SUMMARY
+RUN: rm %t/T1/*
+RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %t/T1 %t/T2 -load_coverage_summary=%t/SUMMARY 2>&1 | FileCheck %s --check-prefix=LOAD_SUMMARY
+LOAD_SUMMARY: MERGE-OUTER: coverage summary loaded from {{.*}}SUMMAR
+LOAD_SUMMARY: MERGE-OUTER: 0 new files with 0 new features added
diff --git a/lib/Fuzzer/test/merge.test b/lib/Fuzzer/test/merge.test
index 5c7d30e41caa..e59da8c3e091 100644
--- a/lib/Fuzzer/test/merge.test
+++ b/lib/Fuzzer/test/merge.test
@@ -1,12 +1,13 @@
CHECK: BINGO
-RUN: rm -rf %tmp/T1 %tmp/T2
-RUN: mkdir -p %tmp/T1 %tmp/T2
-RUN: echo F..... > %tmp/T1/1
-RUN: echo .U.... > %tmp/T1/2
-RUN: echo ..Z... > %tmp/T1/3
+RUN: rm -rf %tmp/T0 %tmp/T1 %tmp/T2
+RUN: mkdir -p %tmp/T0 %tmp/T1 %tmp/T2
+RUN: echo F..... > %tmp/T0/1
+RUN: echo .U.... > %tmp/T0/2
+RUN: echo ..Z... > %tmp/T0/3
# T1 has 3 elements, T2 is empty.
+RUN: cp %tmp/T0/* %tmp/T1/
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK1
CHECK1: MERGE-OUTER: 3 files, 3 in the initial corpus
CHECK1: MERGE-OUTER: 0 new files with 0 new features added
@@ -29,13 +30,15 @@ CHECK3: MERGE-OUTER: 12 files, 6 in the initial corpus
CHECK3: MERGE-OUTER: 0 new files with 0 new features added
# Check that we respect max_len during the merge and don't crash.
-RUN: rm %tmp/T1/??*
+RUN: rm %tmp/T1/*
+RUN: cp %tmp/T0/* %tmp/T1/
RUN: echo looooooooong > %tmp/T2/looooooooong
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 -max_len=6 2>&1 | FileCheck %s --check-prefix=MAX_LEN
MAX_LEN: MERGE-OUTER: 3 new files
# Check that merge tolerates failures.
-RUN: rm %tmp/T1/??*
+RUN: rm %tmp/T1/*
+RUN: cp %tmp/T0/* %tmp/T1/
RUN: echo 'FUZZER' > %tmp/T2/FUZZER
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=MERGE_WITH_CRASH
MERGE_WITH_CRASH: MERGE-OUTER: succesfull in 2 attempt(s)
@@ -45,10 +48,6 @@ MERGE_WITH_CRASH: MERGE-OUTER: 3 new files
RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 -max_len=5 2>&1 | FileCheck %s --check-prefix=MERGE_LEN5
MERGE_LEN5: MERGE-OUTER: succesfull in 1 attempt(s)
-# Check that we honor TMPDIR
-RUN: TMPDIR=DIR_DOES_NOT_EXIST not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=TMPDIR
-TMPDIR: MERGE-OUTER: failed to write to the control file: DIR_DOES_NOT_EXIST/libFuzzerTemp
-
-# Check that we can report an error if file size exceeded
-RUN: (ulimit -f 1; not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=SIGXFSZ)
-SIGXFSZ: ERROR: libFuzzer: file size exceeded
+RUN: rm -rf %tmp/T1/* %tmp/T2/*
+RUN: not LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=EMPTY
+EMPTY: MERGE-OUTER: zero succesfull attempts, exiting
diff --git a/lib/Fuzzer/test/minimize_crash.test b/lib/Fuzzer/test/minimize_crash.test
index 7e5406598e4a..5643c6bacb09 100644
--- a/lib/Fuzzer/test/minimize_crash.test
+++ b/lib/Fuzzer/test/minimize_crash.test
@@ -1,6 +1,13 @@
RUN: echo 'Hi!rv349f34t3gg' > not_minimal_crash
RUN: LLVMFuzzer-NullDerefTest -minimize_crash=1 not_minimal_crash -max_total_time=2 2>&1 | FileCheck %s
-CHECK: CRASH_MIN: failed to minimize beyond minimized-from-{{.*}} (3 bytes), exiting
+CHECK: CRASH_MIN: failed to minimize beyond ./minimized-from-{{.*}} (3 bytes), exiting
RUN: LLVMFuzzer-NullDerefTest -minimize_crash=1 not_minimal_crash -max_total_time=2 -exact_artifact_path=exact_minimized_path 2>&1 | FileCheck %s --check-prefix=CHECK_EXACT
CHECK_EXACT: CRASH_MIN: failed to minimize beyond exact_minimized_path (3 bytes), exiting
RUN: rm not_minimal_crash minimized-from-* exact_minimized_path
+
+RUN: echo -n 'abcd*xyz' > not_minimal_crash
+RUN: LLVMFuzzer-SingleByteInputTest -minimize_crash=1 not_minimal_crash -exact_artifact_path=exact_minimized_path 2>&1 | FileCheck %s --check-prefix=MIN1
+MIN1: Test unit written to exact_minimized_path
+MIN1: Test unit written to exact_minimized_path
+MIN1: INFO: The input is small enough, exiting
+MIN1: CRASH_MIN: failed to minimize beyond exact_minimized_path (1 bytes), exiting
diff --git a/lib/Fuzzer/test/minimize_two_crashes.test b/lib/Fuzzer/test/minimize_two_crashes.test
new file mode 100644
index 000000000000..2358d8c2a92e
--- /dev/null
+++ b/lib/Fuzzer/test/minimize_two_crashes.test
@@ -0,0 +1,16 @@
+# Test that the minimizer stops when it sees a differe bug.
+
+RUN: rm -rf %t && mkdir %t
+RUN: echo H12345678901234667888090 > %t/long_crash
+RUN: env ASAN_OPTIONS=dedup_token_length=3 LLVMFuzzer-TwoDifferentBugsTest -seed=1 -minimize_crash=1 %t/long_crash -exact_artifact_path=%t/result 2>&1 | FileCheck %s
+
+CHECK: DedupToken1: DEDUP_TOKEN: Bar
+CHECK: DedupToken2: DEDUP_TOKEN: Bar
+CHECK: DedupToken1: DEDUP_TOKEN: Bar
+CHECK: DedupToken2: DEDUP_TOKEN: Foo
+CHECK: CRASH_MIN: mismatch in dedup tokens
+
+RUN: not LLVMFuzzer-TwoDifferentBugsTest %t/result 2>&1 | FileCheck %s --check-prefix=VERIFY
+
+VERIFY: ERROR: AddressSanitizer:
+VERIFY: in Bar
diff --git a/lib/Fuzzer/test/no-coverage/CMakeLists.txt b/lib/Fuzzer/test/no-coverage/CMakeLists.txt
index d2f6f438ad79..52e7240333ee 100644
--- a/lib/Fuzzer/test/no-coverage/CMakeLists.txt
+++ b/lib/Fuzzer/test/no-coverage/CMakeLists.txt
@@ -5,7 +5,7 @@ set(CMAKE_CXX_FLAGS
"${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard")
set(NoCoverageTests
- UninstrumentedTest
+ NotinstrumentedTest
)
foreach(Test ${NoCoverageTests})
@@ -16,14 +16,14 @@ endforeach()
###############################################################################
# AFL Driver test
###############################################################################
+if(NOT MSVC)
+ add_executable(AFLDriverTest
+ ../AFLDriverTest.cpp ../../afl/afl_driver.cpp)
-add_executable(AFLDriverTest
- ../AFLDriverTest.cpp ../../afl/afl_driver.cpp)
+ set_target_properties(AFLDriverTest
+ PROPERTIES RUNTIME_OUTPUT_DIRECTORY
+ "${CMAKE_BINARY_DIR}/lib/Fuzzer/test"
+ )
-set_target_properties(AFLDriverTest
- PROPERTIES RUNTIME_OUTPUT_DIRECTORY
- "${CMAKE_BINARY_DIR}/lib/Fuzzer/test"
- )
-
-# Propagate value into parent directory
-set(TestBinaries ${TestBinaries} AFLDriverTest PARENT_SCOPE)
+ add_dependencies(TestBinaries AFLDriverTest)
+endif()
diff --git a/lib/Fuzzer/test/trace-malloc-2.test b/lib/Fuzzer/test/trace-malloc-2.test
new file mode 100644
index 000000000000..7719b650c791
--- /dev/null
+++ b/lib/Fuzzer/test/trace-malloc-2.test
@@ -0,0 +1,8 @@
+// FIXME: This test infinite loops on darwin because it crashes
+// printing a stack trace repeatedly
+UNSUPPORTED: darwin
+
+RUN: LLVMFuzzer-TraceMallocTest -seed=1 -trace_malloc=2 -runs=1000 2>&1 | FileCheck %s --check-prefix=TRACE2
+TRACE2-DAG: FREE[0]
+TRACE2-DAG: MALLOC[0]
+TRACE2-DAG: in LLVMFuzzerTestOneInput
diff --git a/lib/Fuzzer/test/trace-malloc.test b/lib/Fuzzer/test/trace-malloc.test
index c95147904d42..25694cc2de5c 100644
--- a/lib/Fuzzer/test/trace-malloc.test
+++ b/lib/Fuzzer/test/trace-malloc.test
@@ -3,8 +3,3 @@ CHECK-DAG: MallocFreeTracer: STOP 0 0 (same)
CHECK-DAG: MallocFreeTracer: STOP 0 1 (DIFFERENT)
CHECK-DAG: MallocFreeTracer: STOP 1 0 (DIFFERENT)
CHECK-DAG: MallocFreeTracer: STOP 1 1 (same)
-
-RUN: LLVMFuzzer-TraceMallocTest -seed=1 -trace_malloc=2 -runs=1000 2>&1 | FileCheck %s --check-prefix=TRACE2
-TRACE2-DAG: FREE[0]
-TRACE2-DAG: MALLOC[0]
-TRACE2-DAG: in LLVMFuzzerTestOneInput
diff --git a/lib/Fuzzer/test/trace-pc.test b/lib/Fuzzer/test/trace-pc.test
new file mode 100644
index 000000000000..3709677b71b6
--- /dev/null
+++ b/lib/Fuzzer/test/trace-pc.test
@@ -0,0 +1,2 @@
+CHECK: BINGO
+RUN: LLVMFuzzer-SimpleTest-TracePC -runs=100000 -seed=1 2>&1 | FileCheck %s
diff --git a/lib/Fuzzer/test/trace-pc/CMakeLists.txt b/lib/Fuzzer/test/trace-pc/CMakeLists.txt
new file mode 100644
index 000000000000..e800f82cc5dc
--- /dev/null
+++ b/lib/Fuzzer/test/trace-pc/CMakeLists.txt
@@ -0,0 +1,13 @@
+# These tests are not instrumented with coverage and don't
+# have coverage rt in the binary.
+
+set(CMAKE_CXX_FLAGS
+ "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=edge,trace-cmp,indirect-calls,8bit-counters,trace-pc-guard -fsanitize-coverage=trace-pc")
+
+set(TracePCTests
+ SimpleTest
+ )
+
+foreach(Test ${TracePCTests})
+ add_libfuzzer_test(${Test}-TracePC SOURCES ../${Test}.cpp)
+endforeach()
diff --git a/lib/Fuzzer/test/ubsan/CMakeLists.txt b/lib/Fuzzer/test/ubsan/CMakeLists.txt
index 7a9eacdbe7df..55e0a118186b 100644
--- a/lib/Fuzzer/test/ubsan/CMakeLists.txt
+++ b/lib/Fuzzer/test/ubsan/CMakeLists.txt
@@ -10,6 +10,3 @@ set(UbsanTests
foreach(Test ${UbsanTests})
add_libfuzzer_test(${Test}-Ubsan SOURCES ../${Test}.cpp)
endforeach()
-
-# Propagate value into parent directory
-set(TestBinaries ${TestBinaries} PARENT_SCOPE)
diff --git a/lib/Fuzzer/test/ulimit.test b/lib/Fuzzer/test/ulimit.test
index a60636c351bd..c2faca13f728 100644
--- a/lib/Fuzzer/test/ulimit.test
+++ b/lib/Fuzzer/test/ulimit.test
@@ -1,2 +1,4 @@
+REQUIRES: posix
+
RUN: ulimit -s 1000
RUN: LLVMFuzzer-SimpleTest
diff --git a/lib/Fuzzer/test/uninstrumented/CMakeLists.txt b/lib/Fuzzer/test/uninstrumented/CMakeLists.txt
index 29b66e6e586a..f4ab59e5b18d 100644
--- a/lib/Fuzzer/test/uninstrumented/CMakeLists.txt
+++ b/lib/Fuzzer/test/uninstrumented/CMakeLists.txt
@@ -11,6 +11,3 @@ set(UninstrumentedTests
foreach(Test ${UninstrumentedTests})
add_libfuzzer_test(${Test}-Uninstrumented SOURCES ../${Test}.cpp)
endforeach()
-
-# Propagate value into parent directory
-set(TestBinaries ${TestBinaries} PARENT_SCOPE)
diff --git a/lib/Fuzzer/test/value-profile-div.test b/lib/Fuzzer/test/value-profile-div.test
index ba45e4129d30..b966a8916512 100644
--- a/lib/Fuzzer/test/value-profile-div.test
+++ b/lib/Fuzzer/test/value-profile-div.test
@@ -1,3 +1,3 @@
-CHECK: AddressSanitizer: FPE
+CHECK: AddressSanitizer: {{FPE|int-divide-by-zero}}
RUN: not LLVMFuzzer-DivTest -seed=1 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
diff --git a/lib/Fuzzer/test/value-profile-mem.test b/lib/Fuzzer/test/value-profile-mem.test
index 09d737dbe736..880b2692910a 100644
--- a/lib/Fuzzer/test/value-profile-mem.test
+++ b/lib/Fuzzer/test/value-profile-mem.test
@@ -1,2 +1,2 @@
CHECK: BINGO
-RUN: not LLVMFuzzer-SingleMemcmpTest -seed=1 -use_cmp=0 -use_memcmp=0 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-SingleMemcmpTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
diff --git a/lib/Fuzzer/test/value-profile-strcmp.test b/lib/Fuzzer/test/value-profile-strcmp.test
index 1e7ef9b45e96..7f1047594548 100644
--- a/lib/Fuzzer/test/value-profile-strcmp.test
+++ b/lib/Fuzzer/test/value-profile-strcmp.test
@@ -1,2 +1,2 @@
CHECK: BINGO
-RUN: not LLVMFuzzer-SingleStrcmpTest -seed=1 -use_cmp=0 -use_memcmp=0 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-SingleStrcmpTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
diff --git a/lib/Fuzzer/test/value-profile-strncmp.test b/lib/Fuzzer/test/value-profile-strncmp.test
index 650973180c06..84a74c4f0ad2 100644
--- a/lib/Fuzzer/test/value-profile-strncmp.test
+++ b/lib/Fuzzer/test/value-profile-strncmp.test
@@ -1,2 +1,2 @@
CHECK: BINGO
-RUN: not LLVMFuzzer-SingleStrncmpTest -seed=1 -use_cmp=0 -use_memcmp=0 -use_value_profile=1 -runs=10000000 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-SingleStrncmpTest -seed=1 -use_cmp=0 -use_value_profile=1 -runs=100000000 2>&1 | FileCheck %s
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index eecef9423f2e..d0b77e7218b9 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -832,7 +833,7 @@ void SlotTracker::processModule() {
// Add all the function attributes to the table.
// FIXME: Add attributes of other objects?
AttributeSet FnAttrs = F.getAttributes().getFnAttributes();
- if (FnAttrs.hasAttributes(AttributeSet::FunctionIndex))
+ if (FnAttrs.hasAttributes())
CreateAttributeSetSlot(FnAttrs);
}
@@ -867,15 +868,10 @@ void SlotTracker::processFunction() {
// We allow direct calls to any llvm.foo function here, because the
// target may not be linked into the optimizer.
- if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (auto CS = ImmutableCallSite(&I)) {
// Add all the call attributes to the table.
- AttributeSet Attrs = CI->getAttributes().getFnAttributes();
- if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
- CreateAttributeSetSlot(Attrs);
- } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
- // Add all the call attributes to the table.
- AttributeSet Attrs = II->getAttributes().getFnAttributes();
- if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ AttributeSet Attrs = CS.getAttributes().getFnAttributes();
+ if (Attrs.hasAttributes())
CreateAttributeSetSlot(Attrs);
}
}
@@ -1016,8 +1012,7 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) {
}
void SlotTracker::CreateAttributeSetSlot(AttributeSet AS) {
- assert(AS.hasAttributes(AttributeSet::FunctionIndex) &&
- "Doesn't need a slot!");
+ assert(AS.hasAttributes() && "Doesn't need a slot!");
as_iterator I = asMap.find(AS);
if (I != asMap.end())
@@ -1073,6 +1068,8 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
Out << " nsz";
if (FPO->hasAllowReciprocal())
Out << " arcp";
+ if (FPO->hasAllowContract())
+ Out << " contract";
}
}
@@ -1614,6 +1611,9 @@ static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
Printer.printInt("offset", N->getOffsetInBits());
Printer.printDIFlags("flags", N->getFlags());
Printer.printMetadata("extraData", N->getRawExtraData());
+ if (const auto &DWARFAddressSpace = N->getDWARFAddressSpace())
+ Printer.printInt("dwarfAddressSpace", *DWARFAddressSpace,
+ /* ShouldSkipZero */ false);
Out << ")";
}
@@ -1688,6 +1688,8 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
Printer.printMetadata("macros", N->getRawMacros());
Printer.printInt("dwoId", N->getDWOId());
Printer.printBool("splitDebugInlining", N->getSplitDebugInlining(), true);
+ Printer.printBool("debugInfoForProfiling", N->getDebugInfoForProfiling(),
+ false);
Out << ")";
}
@@ -2083,7 +2085,8 @@ public:
void printModule(const Module *M);
void writeOperand(const Value *Op, bool PrintType);
- void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
+ void writeParamOperand(const Value *Operand, AttributeList Attrs,
+ unsigned Idx);
void writeOperandBundles(ImmutableCallSite CS);
void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
@@ -2099,7 +2102,7 @@ public:
void printIndirectSymbol(const GlobalIndirectSymbol *GIS);
void printComdat(const Comdat *C);
void printFunction(const Function *F);
- void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
+ void printArgument(const Argument *FA, AttributeList Attrs, unsigned Idx);
void printBasicBlock(const BasicBlock *BB);
void printInstructionLine(const Instruction &I);
void printInstruction(const Instruction &I);
@@ -2178,7 +2181,7 @@ void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
}
void AssemblyWriter::writeParamOperand(const Value *Operand,
- AttributeSet Attrs, unsigned Idx) {
+ AttributeList Attrs, unsigned Idx) {
if (!Operand) {
Out << "<null operand!>";
return;
@@ -2596,19 +2599,12 @@ void AssemblyWriter::printFunction(const Function *F) {
if (F->isMaterializable())
Out << "; Materializable\n";
- const AttributeSet &Attrs = F->getAttributes();
- if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) {
+ const AttributeList &Attrs = F->getAttributes();
+ if (Attrs.hasAttributes(AttributeList::FunctionIndex)) {
AttributeSet AS = Attrs.getFnAttributes();
std::string AttrStr;
- unsigned Idx = 0;
- for (unsigned E = AS.getNumSlots(); Idx != E; ++Idx)
- if (AS.getSlotIndex(Idx) == AttributeSet::FunctionIndex)
- break;
-
- for (AttributeSet::iterator I = AS.begin(Idx), E = AS.end(Idx);
- I != E; ++I) {
- Attribute Attr = *I;
+ for (const Attribute &Attr : AS) {
if (!Attr.isStringAttribute()) {
if (!AttrStr.empty()) AttrStr += ' ';
AttrStr += Attr.getAsString();
@@ -2641,8 +2637,8 @@ void AssemblyWriter::printFunction(const Function *F) {
}
FunctionType *FT = F->getFunctionType();
- if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
- Out << Attrs.getAsString(AttributeSet::ReturnIndex) << ' ';
+ if (Attrs.hasAttributes(AttributeList::ReturnIndex))
+ Out << Attrs.getAsString(AttributeList::ReturnIndex) << ' ';
TypePrinter.print(F->getReturnType(), Out);
Out << ' ';
WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
@@ -2681,7 +2677,7 @@ void AssemblyWriter::printFunction(const Function *F) {
StringRef UA = getUnnamedAddrEncoding(F->getUnnamedAddr());
if (!UA.empty())
Out << ' ' << UA;
- if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ if (Attrs.hasAttributes(AttributeList::FunctionIndex))
Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
if (F->hasSection()) {
Out << " section \"";
@@ -2730,8 +2726,8 @@ void AssemblyWriter::printFunction(const Function *F) {
/// printArgument - This member is called for every argument that is passed into
/// the function. Simply print it out
///
-void AssemblyWriter::printArgument(const Argument *Arg,
- AttributeSet Attrs, unsigned Idx) {
+void AssemblyWriter::printArgument(const Argument *Arg, AttributeList Attrs,
+ unsigned Idx) {
// Output type...
TypePrinter.print(Arg->getType(), Out);
@@ -2901,12 +2897,11 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ";
writeOperand(SI.getDefaultDest(), true);
Out << " [";
- for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
- i != e; ++i) {
+ for (auto Case : SI.cases()) {
Out << "\n ";
- writeOperand(i.getCaseValue(), true);
+ writeOperand(Case.getCaseValue(), true);
Out << ", ";
- writeOperand(i.getCaseSuccessor(), true);
+ writeOperand(Case.getCaseSuccessor(), true);
}
Out << "\n ]";
} else if (isa<IndirectBrInst>(I)) {
@@ -3015,10 +3010,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Operand = CI->getCalledValue();
FunctionType *FTy = CI->getFunctionType();
Type *RetTy = FTy->getReturnType();
- const AttributeSet &PAL = CI->getAttributes();
+ const AttributeList &PAL = CI->getAttributes();
- if (PAL.hasAttributes(AttributeSet::ReturnIndex))
- Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex);
+ if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// If possible, print out the short form of the call instruction. We can
// only do this if the first argument is a pointer to a nonvararg function,
@@ -3043,7 +3038,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ...";
Out << ')';
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ if (PAL.hasAttributes(AttributeList::FunctionIndex))
Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
writeOperandBundles(CI);
@@ -3052,7 +3047,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Operand = II->getCalledValue();
FunctionType *FTy = II->getFunctionType();
Type *RetTy = FTy->getReturnType();
- const AttributeSet &PAL = II->getAttributes();
+ const AttributeList &PAL = II->getAttributes();
// Print the calling convention being used.
if (II->getCallingConv() != CallingConv::C) {
@@ -3060,8 +3055,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
PrintCallingConv(II->getCallingConv(), Out);
}
- if (PAL.hasAttributes(AttributeSet::ReturnIndex))
- Out << ' ' << PAL.getAsString(AttributeSet::ReturnIndex);
+ if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// If possible, print out the short form of the invoke instruction. We can
// only do this if the first argument is a pointer to a nonvararg function,
@@ -3079,7 +3074,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
}
Out << ')';
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
+ if (PAL.hasAttributes(AttributeList::FunctionIndex))
Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
writeOperandBundles(II);
@@ -3109,6 +3104,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (AI->getAlignment()) {
Out << ", align " << AI->getAlignment();
}
+
+ unsigned AddrSpace = AI->getType()->getAddressSpace();
+ if (AddrSpace != 0) {
+ Out << ", addrspace(" << AddrSpace << ')';
+ }
+
} else if (isa<CastInst>(I)) {
if (Operand) {
Out << ' ';
@@ -3242,7 +3243,7 @@ void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
}
void AssemblyWriter::writeAllAttributeGroups() {
- std::vector<std::pair<AttributeSet, unsigned> > asVec;
+ std::vector<std::pair<AttributeSet, unsigned>> asVec;
asVec.resize(Machine.as_size());
for (SlotTracker::as_iterator I = Machine.as_begin(), E = Machine.as_end();
@@ -3251,7 +3252,7 @@ void AssemblyWriter::writeAllAttributeGroups() {
for (const auto &I : asVec)
Out << "attributes #" << I.second << " = { "
- << I.first.getAsString(AttributeSet::FunctionIndex, true) << " }\n";
+ << I.first.getAsString(true) << " }\n";
}
void AssemblyWriter::printUseListOrder(const UseListOrder &Order) {
@@ -3535,6 +3536,7 @@ void Metadata::print(raw_ostream &OS, ModuleSlotTracker &MST,
printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// Value::dump - allow easy printing of Values from the debugger.
LLVM_DUMP_METHOD
void Value::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
@@ -3566,3 +3568,4 @@ void Metadata::dump(const Module *M) const {
print(dbgs(), M, /*IsForDebug=*/true);
dbgs() << '\n';
}
+#endif
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index d0d27101aa86..09f037365793 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -16,7 +16,6 @@
#ifndef LLVM_LIB_IR_ATTRIBUTEIMPL_H
#define LLVM_LIB_IR_ATTRIBUTEIMPL_H
-#include "AttributeSetNode.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/StringRef.h"
@@ -144,16 +143,74 @@ public:
StringRef getStringValue() const { return Val; }
};
-typedef std::pair<unsigned, AttributeSetNode *> IndexAttrPair;
+//===----------------------------------------------------------------------===//
+/// \class
+/// \brief This class represents a group of attributes that apply to one
+/// element: function, return type, or parameter.
+class AttributeSetNode final
+ : public FoldingSetNode,
+ private TrailingObjects<AttributeSetNode, Attribute> {
+ friend TrailingObjects;
+
+ /// Bitset with a bit for each available attribute Attribute::AttrKind.
+ uint64_t AvailableAttrs;
+ unsigned NumAttrs; ///< Number of attributes in this node.
+
+ AttributeSetNode(ArrayRef<Attribute> Attrs);
+
+public:
+ // AttributesSetNode is uniqued, these should not be available.
+ AttributeSetNode(const AttributeSetNode &) = delete;
+ AttributeSetNode &operator=(const AttributeSetNode &) = delete;
+
+ void operator delete(void *p) { ::operator delete(p); }
+
+ static AttributeSetNode *get(LLVMContext &C, const AttrBuilder &B);
+
+ static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
+
+ /// \brief Return the number of attributes this AttributeList contains.
+ unsigned getNumAttributes() const { return NumAttrs; }
+
+ bool hasAttribute(Attribute::AttrKind Kind) const {
+ return AvailableAttrs & ((uint64_t)1) << Kind;
+ }
+ bool hasAttribute(StringRef Kind) const;
+ bool hasAttributes() const { return NumAttrs != 0; }
+
+ Attribute getAttribute(Attribute::AttrKind Kind) const;
+ Attribute getAttribute(StringRef Kind) const;
+
+ unsigned getAlignment() const;
+ unsigned getStackAlignment() const;
+ uint64_t getDereferenceableBytes() const;
+ uint64_t getDereferenceableOrNullBytes() const;
+ std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
+ std::string getAsString(bool InAttrGrp) const;
+
+ typedef const Attribute *iterator;
+ iterator begin() const { return getTrailingObjects<Attribute>(); }
+ iterator end() const { return begin() + NumAttrs; }
+
+ void Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, makeArrayRef(begin(), end()));
+ }
+ static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
+ for (const auto &Attr : AttrList)
+ Attr.Profile(ID);
+ }
+};
+
+typedef std::pair<unsigned, AttributeSet> IndexAttrPair;
//===----------------------------------------------------------------------===//
/// \class
/// \brief This class represents a set of attributes that apply to the function,
/// return type, and parameters.
-class AttributeSetImpl final
+class AttributeListImpl final
: public FoldingSetNode,
- private TrailingObjects<AttributeSetImpl, IndexAttrPair> {
- friend class AttributeSet;
+ private TrailingObjects<AttributeListImpl, IndexAttrPair> {
+ friend class AttributeList;
friend TrailingObjects;
private:
@@ -166,52 +223,21 @@ private:
size_t numTrailingObjects(OverloadToken<IndexAttrPair>) { return NumSlots; }
/// \brief Return a pointer to the IndexAttrPair for the specified slot.
- const IndexAttrPair *getNode(unsigned Slot) const {
+ const IndexAttrPair *getSlotPair(unsigned Slot) const {
return getTrailingObjects<IndexAttrPair>() + Slot;
}
public:
- AttributeSetImpl(LLVMContext &C,
- ArrayRef<std::pair<unsigned, AttributeSetNode *>> Slots)
- : Context(C), NumSlots(Slots.size()), AvailableFunctionAttrs(0) {
- static_assert(Attribute::EndAttrKinds <=
- sizeof(AvailableFunctionAttrs) * CHAR_BIT,
- "Too many attributes");
-
-#ifndef NDEBUG
- if (Slots.size() >= 2) {
- for (const std::pair<unsigned, AttributeSetNode *> *i = Slots.begin() + 1,
- *e = Slots.end();
- i != e; ++i) {
- assert((i-1)->first <= i->first && "Attribute set not ordered!");
- }
- }
-#endif
- // There's memory after the node where we can store the entries in.
- std::copy(Slots.begin(), Slots.end(), getTrailingObjects<IndexAttrPair>());
-
- // Initialize AvailableFunctionAttrs summary bitset.
- if (NumSlots > 0) {
- static_assert(AttributeSet::FunctionIndex == ~0u,
- "FunctionIndex should be biggest possible index");
- const std::pair<unsigned, AttributeSetNode *> &Last = Slots.back();
- if (Last.first == AttributeSet::FunctionIndex) {
- const AttributeSetNode *Node = Last.second;
- for (Attribute I : *Node) {
- if (!I.isStringAttribute())
- AvailableFunctionAttrs |= ((uint64_t)1) << I.getKindAsEnum();
- }
- }
- }
- }
+ AttributeListImpl(LLVMContext &C,
+ ArrayRef<std::pair<unsigned, AttributeSet>> Slots);
// AttributesSetImpt is uniqued, these should not be available.
- AttributeSetImpl(const AttributeSetImpl &) = delete;
- AttributeSetImpl &operator=(const AttributeSetImpl &) = delete;
+ AttributeListImpl(const AttributeListImpl &) = delete;
+ AttributeListImpl &operator=(const AttributeListImpl &) = delete;
void operator delete(void *p) { ::operator delete(p); }
- /// \brief Get the context that created this AttributeSetImpl.
+ /// \brief Get the context that created this AttributeListImpl.
LLVMContext &getContext() { return Context; }
/// \brief Return the number of slots used in this attribute list. This is
@@ -224,42 +250,35 @@ public:
/// attributes are applied to, not the index into the AttrNodes list where the
/// attributes reside.
unsigned getSlotIndex(unsigned Slot) const {
- return getNode(Slot)->first;
+ return getSlotPair(Slot)->first;
+ }
+
+ /// \brief Retrieve the attribute set node for the given "slot" in the
+ /// AttrNode list.
+ AttributeSet getSlotNode(unsigned Slot) const {
+ return getSlotPair(Slot)->second;
}
/// \brief Retrieve the attributes for the given "slot" in the AttrNode list.
/// \p Slot is an index into the AttrNodes list, not the index of the return /
/// parameter/ function which the attributes apply to.
- AttributeSet getSlotAttributes(unsigned Slot) const {
- return AttributeSet::get(Context, *getNode(Slot));
+ AttributeList getSlotAttributes(unsigned Slot) const {
+ return AttributeList::get(Context, *getSlotPair(Slot));
}
- /// \brief Retrieve the attribute set node for the given "slot" in the
- /// AttrNode list.
- AttributeSetNode *getSlotNode(unsigned Slot) const {
- return getNode(Slot)->second;
- }
-
- /// \brief Return true if the AttributeSetNode for the FunctionIndex has an
+ /// \brief Return true if the AttributeSet or the FunctionIndex has an
/// enum attribute of the given kind.
bool hasFnAttribute(Attribute::AttrKind Kind) const {
return AvailableFunctionAttrs & ((uint64_t)1) << Kind;
}
- typedef AttributeSetNode::iterator iterator;
- iterator begin(unsigned Slot) const { return getSlotNode(Slot)->begin(); }
- iterator end(unsigned Slot) const { return getSlotNode(Slot)->end(); }
+ typedef AttributeSet::iterator iterator;
+ iterator begin(unsigned Slot) const { return getSlotNode(Slot).begin(); }
+ iterator end(unsigned Slot) const { return getSlotNode(Slot).end(); }
- void Profile(FoldingSetNodeID &ID) const {
- Profile(ID, makeArrayRef(getNode(0), getNumSlots()));
- }
+ void Profile(FoldingSetNodeID &ID) const;
static void Profile(FoldingSetNodeID &ID,
- ArrayRef<std::pair<unsigned, AttributeSetNode*>> Nodes) {
- for (const auto &Node : Nodes) {
- ID.AddInteger(Node.first);
- ID.AddPointer(Node.second);
- }
- }
+ ArrayRef<std::pair<unsigned, AttributeSet>> Nodes);
void dump() const;
};
diff --git a/lib/IR/AttributeSetNode.h b/lib/IR/AttributeSetNode.h
deleted file mode 100644
index 23ce3713c20b..000000000000
--- a/lib/IR/AttributeSetNode.h
+++ /dev/null
@@ -1,106 +0,0 @@
-//===-- AttributeSetNode.h - AttributeSet Internal Node ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief This file defines the node class used internally by AttributeSet.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_ATTRIBUTESETNODE_H
-#define LLVM_IR_ATTRIBUTESETNODE_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/Support/TrailingObjects.h"
-#include <algorithm>
-#include <climits>
-#include <cstdint>
-#include <string>
-#include <utility>
-
-namespace llvm {
-
-//===----------------------------------------------------------------------===//
-/// \class
-/// \brief This class represents a group of attributes that apply to one
-/// element: function, return type, or parameter.
-class AttributeSetNode final
- : public FoldingSetNode,
- private TrailingObjects<AttributeSetNode, Attribute> {
- friend TrailingObjects;
-
- unsigned NumAttrs; ///< Number of attributes in this node.
- /// Bitset with a bit for each available attribute Attribute::AttrKind.
- uint64_t AvailableAttrs;
-
- AttributeSetNode(ArrayRef<Attribute> Attrs)
- : NumAttrs(Attrs.size()), AvailableAttrs(0) {
- static_assert(Attribute::EndAttrKinds <= sizeof(AvailableAttrs) * CHAR_BIT,
- "Too many attributes for AvailableAttrs");
- // There's memory after the node where we can store the entries in.
- std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
-
- for (Attribute I : *this) {
- if (!I.isStringAttribute()) {
- AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
- }
- }
- }
-
-public:
- // AttributesSetNode is uniqued, these should not be available.
- AttributeSetNode(const AttributeSetNode &) = delete;
- AttributeSetNode &operator=(const AttributeSetNode &) = delete;
-
- void operator delete(void *p) { ::operator delete(p); }
-
- static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
-
- static AttributeSetNode *get(AttributeSet AS, unsigned Index) {
- return AS.getAttributes(Index);
- }
-
- /// \brief Return the number of attributes this AttributeSet contains.
- unsigned getNumAttributes() const { return NumAttrs; }
-
- bool hasAttribute(Attribute::AttrKind Kind) const {
- return AvailableAttrs & ((uint64_t)1) << Kind;
- }
- bool hasAttribute(StringRef Kind) const;
- bool hasAttributes() const { return NumAttrs != 0; }
-
- Attribute getAttribute(Attribute::AttrKind Kind) const;
- Attribute getAttribute(StringRef Kind) const;
-
- unsigned getAlignment() const;
- unsigned getStackAlignment() const;
- uint64_t getDereferenceableBytes() const;
- uint64_t getDereferenceableOrNullBytes() const;
- std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
- std::string getAsString(bool InAttrGrp) const;
-
- typedef const Attribute *iterator;
- iterator begin() const { return getTrailingObjects<Attribute>(); }
- iterator end() const { return begin() + NumAttrs; }
-
- void Profile(FoldingSetNodeID &ID) const {
- Profile(ID, makeArrayRef(begin(), end()));
- }
- static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
- for (const auto &Attr : AttrList)
- Attr.Profile(ID);
- }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_IR_ATTRIBUTESETNODE_H
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 1ec53cf1e1d6..2b7359dab807 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -1,4 +1,4 @@
-//===-- Attributes.cpp - Implement AttributesList -------------------------===//
+//===- Attributes.cpp - Implement AttributesList --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,23 +9,38 @@
//
// \file
// \brief This file implements the Attribute, AttributeImpl, AttrBuilder,
-// AttributeSetImpl, and AttributeSet classes.
+// AttributeListImpl, and AttributeList classes.
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Function.h"
#include "AttributeImpl.h"
#include "LLVMContextImpl.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <map>
+#include <string>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -411,9 +426,12 @@ bool Attribute::operator<(Attribute A) const {
//===----------------------------------------------------------------------===//
// Pin the vtables to this file.
-AttributeImpl::~AttributeImpl() {}
+AttributeImpl::~AttributeImpl() = default;
+
void EnumAttributeImpl::anchor() {}
+
void IntAttributeImpl::anchor() {}
+
void StringAttributeImpl::anchor() {}
bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const {
@@ -473,9 +491,86 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
}
//===----------------------------------------------------------------------===//
+// AttributeSet Definition
+//===----------------------------------------------------------------------===//
+
+AttributeSet AttributeSet::get(LLVMContext &C, const AttrBuilder &B) {
+ return AttributeSet(AttributeSetNode::get(C, B));
+}
+
+AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<Attribute> Attrs) {
+ return AttributeSet(AttributeSetNode::get(C, Attrs));
+}
+
+unsigned AttributeSet::getNumAttributes() const {
+ return SetNode ? SetNode->getNumAttributes() : 0;
+}
+
+bool AttributeSet::hasAttribute(Attribute::AttrKind Kind) const {
+ return SetNode ? SetNode->hasAttribute(Kind) : 0;
+}
+
+bool AttributeSet::hasAttribute(StringRef Kind) const {
+ return SetNode ? SetNode->hasAttribute(Kind) : 0;
+}
+
+Attribute AttributeSet::getAttribute(Attribute::AttrKind Kind) const {
+ return SetNode ? SetNode->getAttribute(Kind) : Attribute();
+}
+
+Attribute AttributeSet::getAttribute(StringRef Kind) const {
+ return SetNode ? SetNode->getAttribute(Kind) : Attribute();
+}
+
+unsigned AttributeSet::getAlignment() const {
+ return SetNode ? SetNode->getAlignment() : 0;
+}
+
+unsigned AttributeSet::getStackAlignment() const {
+ return SetNode ? SetNode->getStackAlignment() : 0;
+}
+
+uint64_t AttributeSet::getDereferenceableBytes() const {
+ return SetNode ? SetNode->getDereferenceableBytes() : 0;
+}
+
+uint64_t AttributeSet::getDereferenceableOrNullBytes() const {
+ return SetNode ? SetNode->getDereferenceableOrNullBytes() : 0;
+}
+
+std::pair<unsigned, Optional<unsigned>> AttributeSet::getAllocSizeArgs() const {
+ return SetNode ? SetNode->getAllocSizeArgs()
+ : std::pair<unsigned, Optional<unsigned>>(0, 0);
+}
+
+std::string AttributeSet::getAsString(bool InAttrGrp) const {
+ return SetNode ? SetNode->getAsString(InAttrGrp) : "";
+}
+
+AttributeSet::iterator AttributeSet::begin() const {
+ return SetNode ? SetNode->begin() : nullptr;
+}
+
+AttributeSet::iterator AttributeSet::end() const {
+ return SetNode ? SetNode->end() : nullptr;
+}
+
+//===----------------------------------------------------------------------===//
// AttributeSetNode Definition
//===----------------------------------------------------------------------===//
+AttributeSetNode::AttributeSetNode(ArrayRef<Attribute> Attrs)
+ : AvailableAttrs(0), NumAttrs(Attrs.size()) {
+ // There's memory after the node where we can store the entries in.
+ std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
+
+ for (Attribute I : *this) {
+ if (!I.isStringAttribute()) {
+ AvailableAttrs |= ((uint64_t)1) << I.getKindAsEnum();
+ }
+ }
+}
+
AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
ArrayRef<Attribute> Attrs) {
if (Attrs.empty())
@@ -504,10 +599,52 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint);
}
- // Return the AttributesListNode that we found or created.
+ // Return the AttributeSetNode that we found or created.
return PA;
}
+AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
+ // Add target-independent attributes.
+ SmallVector<Attribute, 8> Attrs;
+ for (Attribute::AttrKind Kind = Attribute::None;
+ Kind != Attribute::EndAttrKinds; Kind = Attribute::AttrKind(Kind + 1)) {
+ if (!B.contains(Kind))
+ continue;
+
+ Attribute Attr;
+ switch (Kind) {
+ case Attribute::Alignment:
+ Attr = Attribute::getWithAlignment(C, B.getAlignment());
+ break;
+ case Attribute::StackAlignment:
+ Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment());
+ break;
+ case Attribute::Dereferenceable:
+ Attr = Attribute::getWithDereferenceableBytes(
+ C, B.getDereferenceableBytes());
+ break;
+ case Attribute::DereferenceableOrNull:
+ Attr = Attribute::getWithDereferenceableOrNullBytes(
+ C, B.getDereferenceableOrNullBytes());
+ break;
+ case Attribute::AllocSize: {
+ auto A = B.getAllocSizeArgs();
+ Attr = Attribute::getWithAllocSizeArgs(C, A.first, A.second);
+ break;
+ }
+ default:
+ Attr = Attribute::get(C, Kind);
+ }
+ Attrs.push_back(Attr);
+ }
+
+ // Add target-dependent (string) attributes.
+ for (const auto &TDA : B.td_attrs())
+ Attrs.emplace_back(Attribute::get(C, TDA.first, TDA.second));
+
+ return get(C, Attrs);
+}
+
bool AttributeSetNode::hasAttribute(StringRef Kind) const {
for (Attribute I : *this)
if (I.hasAttribute(Kind))
@@ -578,46 +715,106 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
}
//===----------------------------------------------------------------------===//
-// AttributeSetImpl Definition
+// AttributeListImpl Definition
//===----------------------------------------------------------------------===//
-LLVM_DUMP_METHOD void AttributeSetImpl::dump() const {
- AttributeSet(const_cast<AttributeSetImpl *>(this)).dump();
+AttributeListImpl::AttributeListImpl(
+ LLVMContext &C, ArrayRef<std::pair<unsigned, AttributeSet>> Slots)
+ : Context(C), NumSlots(Slots.size()), AvailableFunctionAttrs(0) {
+#ifndef NDEBUG
+ assert(!Slots.empty() && "pointless AttributeListImpl");
+ if (Slots.size() >= 2) {
+ auto &PrevPair = Slots.front();
+ for (auto &CurPair : Slots.drop_front()) {
+ assert(PrevPair.first <= CurPair.first && "Attribute set not ordered!");
+ }
+ }
+#endif
+
+ // There's memory after the node where we can store the entries in.
+ std::copy(Slots.begin(), Slots.end(), getTrailingObjects<IndexAttrPair>());
+
+ // Initialize AvailableFunctionAttrs summary bitset.
+ static_assert(Attribute::EndAttrKinds <=
+ sizeof(AvailableFunctionAttrs) * CHAR_BIT,
+ "Too many attributes");
+ static_assert(AttributeList::FunctionIndex == ~0u,
+ "FunctionIndex should be biggest possible index");
+ const auto &Last = Slots.back();
+ if (Last.first == AttributeList::FunctionIndex) {
+ AttributeSet Node = Last.second;
+ for (Attribute I : Node) {
+ if (!I.isStringAttribute())
+ AvailableFunctionAttrs |= ((uint64_t)1) << I.getKindAsEnum();
+ }
+ }
+}
+
+void AttributeListImpl::Profile(FoldingSetNodeID &ID) const {
+ Profile(ID, makeArrayRef(getSlotPair(0), getNumSlots()));
+}
+
+void AttributeListImpl::Profile(
+ FoldingSetNodeID &ID, ArrayRef<std::pair<unsigned, AttributeSet>> Nodes) {
+ for (const auto &Node : Nodes) {
+ ID.AddInteger(Node.first);
+ ID.AddPointer(Node.second.SetNode);
+ }
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void AttributeListImpl::dump() const {
+ AttributeList(const_cast<AttributeListImpl *>(this)).dump();
+}
+#endif
+
//===----------------------------------------------------------------------===//
-// AttributeSet Construction and Mutation Methods
+// AttributeList Construction and Mutation Methods
//===----------------------------------------------------------------------===//
-AttributeSet
-AttributeSet::getImpl(LLVMContext &C,
- ArrayRef<std::pair<unsigned, AttributeSetNode*> > Attrs) {
+AttributeList AttributeList::getImpl(
+ LLVMContext &C, ArrayRef<std::pair<unsigned, AttributeSet>> Attrs) {
+ assert(!Attrs.empty() && "creating pointless AttributeList");
+#ifndef NDEBUG
+ unsigned LastIndex = 0;
+ bool IsFirst = true;
+ for (auto &&AttrPair : Attrs) {
+ assert((IsFirst || LastIndex < AttrPair.first) &&
+ "unsorted or duplicate AttributeList indices");
+ assert(AttrPair.second.hasAttributes() && "pointless AttributeList slot");
+ LastIndex = AttrPair.first;
+ IsFirst = false;
+ }
+#endif
+
LLVMContextImpl *pImpl = C.pImpl;
FoldingSetNodeID ID;
- AttributeSetImpl::Profile(ID, Attrs);
+ AttributeListImpl::Profile(ID, Attrs);
void *InsertPoint;
- AttributeSetImpl *PA = pImpl->AttrsLists.FindNodeOrInsertPos(ID, InsertPoint);
+ AttributeListImpl *PA =
+ pImpl->AttrsLists.FindNodeOrInsertPos(ID, InsertPoint);
// If we didn't find any existing attributes of the same shape then
// create a new one and insert it.
if (!PA) {
- // Coallocate entries after the AttributeSetImpl itself.
+ // Coallocate entries after the AttributeListImpl itself.
void *Mem = ::operator new(
- AttributeSetImpl::totalSizeToAlloc<IndexAttrPair>(Attrs.size()));
- PA = new (Mem) AttributeSetImpl(C, Attrs);
+ AttributeListImpl::totalSizeToAlloc<IndexAttrPair>(Attrs.size()));
+ PA = new (Mem) AttributeListImpl(C, Attrs);
pImpl->AttrsLists.InsertNode(PA, InsertPoint);
}
// Return the AttributesList that we found or created.
- return AttributeSet(PA);
+ return AttributeList(PA);
}
-AttributeSet AttributeSet::get(LLVMContext &C,
- ArrayRef<std::pair<unsigned, Attribute> > Attrs){
+AttributeList
+AttributeList::get(LLVMContext &C,
+ ArrayRef<std::pair<unsigned, Attribute>> Attrs) {
// If there are no attributes then return a null AttributesList pointer.
if (Attrs.empty())
- return AttributeSet();
+ return AttributeList();
assert(std::is_sorted(Attrs.begin(), Attrs.end(),
[](const std::pair<unsigned, Attribute> &LHS,
@@ -632,8 +829,8 @@ AttributeSet AttributeSet::get(LLVMContext &C,
// Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
// list.
- SmallVector<std::pair<unsigned, AttributeSetNode*>, 8> AttrPairVec;
- for (ArrayRef<std::pair<unsigned, Attribute> >::iterator I = Attrs.begin(),
+ SmallVector<std::pair<unsigned, AttributeSet>, 8> AttrPairVec;
+ for (ArrayRef<std::pair<unsigned, Attribute>>::iterator I = Attrs.begin(),
E = Attrs.end(); I != E; ) {
unsigned Index = I->first;
SmallVector<Attribute, 4> AttrVec;
@@ -642,103 +839,87 @@ AttributeSet AttributeSet::get(LLVMContext &C,
++I;
}
- AttrPairVec.emplace_back(Index, AttributeSetNode::get(C, AttrVec));
+ AttrPairVec.emplace_back(Index, AttributeSet::get(C, AttrVec));
}
return getImpl(C, AttrPairVec);
}
-AttributeSet AttributeSet::get(LLVMContext &C,
- ArrayRef<std::pair<unsigned,
- AttributeSetNode*> > Attrs) {
+AttributeList
+AttributeList::get(LLVMContext &C,
+ ArrayRef<std::pair<unsigned, AttributeSet>> Attrs) {
// If there are no attributes then return a null AttributesList pointer.
if (Attrs.empty())
- return AttributeSet();
+ return AttributeList();
return getImpl(C, Attrs);
}
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) {
- if (!B.hasAttributes())
- return AttributeSet();
-
- // Add target-independent attributes.
- SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
- for (Attribute::AttrKind Kind = Attribute::None;
- Kind != Attribute::EndAttrKinds; Kind = Attribute::AttrKind(Kind + 1)) {
- if (!B.contains(Kind))
- continue;
-
- Attribute Attr;
- switch (Kind) {
- case Attribute::Alignment:
- Attr = Attribute::getWithAlignment(C, B.getAlignment());
- break;
- case Attribute::StackAlignment:
- Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment());
- break;
- case Attribute::Dereferenceable:
- Attr = Attribute::getWithDereferenceableBytes(
- C, B.getDereferenceableBytes());
- break;
- case Attribute::DereferenceableOrNull:
- Attr = Attribute::getWithDereferenceableOrNullBytes(
- C, B.getDereferenceableOrNullBytes());
- break;
- case Attribute::AllocSize: {
- auto A = B.getAllocSizeArgs();
- Attr = Attribute::getWithAllocSizeArgs(C, A.first, A.second);
- break;
- }
- default:
- Attr = Attribute::get(C, Kind);
- }
- Attrs.emplace_back(Index, Attr);
+AttributeList AttributeList::get(LLVMContext &C, AttributeSet FnAttrs,
+ AttributeSet RetAttrs,
+ ArrayRef<AttributeSet> ArgAttrs) {
+ SmallVector<std::pair<unsigned, AttributeSet>, 8> AttrPairs;
+ if (RetAttrs.hasAttributes())
+ AttrPairs.emplace_back(ReturnIndex, RetAttrs);
+ size_t Index = 1;
+ for (AttributeSet AS : ArgAttrs) {
+ if (AS.hasAttributes())
+ AttrPairs.emplace_back(Index, AS);
+ ++Index;
}
+ if (FnAttrs.hasAttributes())
+ AttrPairs.emplace_back(FunctionIndex, FnAttrs);
+ if (AttrPairs.empty())
+ return AttributeList();
+ return getImpl(C, AttrPairs);
+}
- // Add target-dependent (string) attributes.
- for (const auto &TDA : B.td_attrs())
- Attrs.emplace_back(Index, Attribute::get(C, TDA.first, TDA.second));
-
- return get(C, Attrs);
+AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
+ const AttrBuilder &B) {
+ if (!B.hasAttributes())
+ return AttributeList();
+ AttributeSet AS = AttributeSet::get(C, B);
+ std::pair<unsigned, AttributeSet> Arr[1] = {{Index, AS}};
+ return getImpl(C, Arr);
}
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
- ArrayRef<Attribute::AttrKind> Kinds) {
+AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
+ ArrayRef<Attribute::AttrKind> Kinds) {
SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
for (Attribute::AttrKind K : Kinds)
Attrs.emplace_back(Index, Attribute::get(C, K));
return get(C, Attrs);
}
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
- ArrayRef<StringRef> Kinds) {
+AttributeList AttributeList::get(LLVMContext &C, unsigned Index,
+ ArrayRef<StringRef> Kinds) {
SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
for (StringRef K : Kinds)
Attrs.emplace_back(Index, Attribute::get(C, K));
return get(C, Attrs);
}
-AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
- if (Attrs.empty()) return AttributeSet();
+AttributeList AttributeList::get(LLVMContext &C,
+ ArrayRef<AttributeList> Attrs) {
+ if (Attrs.empty())
+ return AttributeList();
if (Attrs.size() == 1) return Attrs[0];
- SmallVector<std::pair<unsigned, AttributeSetNode*>, 8> AttrNodeVec;
- AttributeSetImpl *A0 = Attrs[0].pImpl;
+ SmallVector<std::pair<unsigned, AttributeSet>, 8> AttrNodeVec;
+ AttributeListImpl *A0 = Attrs[0].pImpl;
if (A0)
- AttrNodeVec.append(A0->getNode(0), A0->getNode(A0->getNumSlots()));
+ AttrNodeVec.append(A0->getSlotPair(0), A0->getSlotPair(A0->getNumSlots()));
// Copy all attributes from Attrs into AttrNodeVec while keeping AttrNodeVec
// ordered by index. Because we know that each list in Attrs is ordered by
// index we only need to merge each successive list in rather than doing a
// full sort.
for (unsigned I = 1, E = Attrs.size(); I != E; ++I) {
- AttributeSetImpl *AS = Attrs[I].pImpl;
- if (!AS) continue;
- SmallVector<std::pair<unsigned, AttributeSetNode *>, 8>::iterator
+ AttributeListImpl *ALI = Attrs[I].pImpl;
+ if (!ALI) continue;
+ SmallVector<std::pair<unsigned, AttributeSet>, 8>::iterator
ANVI = AttrNodeVec.begin(), ANVE;
- for (const IndexAttrPair *AI = AS->getNode(0),
- *AE = AS->getNode(AS->getNumSlots());
+ for (const IndexAttrPair *AI = ALI->getSlotPair(0),
+ *AE = ALI->getSlotPair(ALI->getNumSlots());
AI != AE; ++AI) {
ANVE = AttrNodeVec.end();
while (ANVI != ANVE && ANVI->first <= AI->first)
@@ -750,113 +931,123 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
return getImpl(C, AttrNodeVec);
}
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const {
+AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const {
if (hasAttribute(Index, Kind)) return *this;
- return addAttributes(C, Index, AttributeSet::get(C, Index, Kind));
+ return addAttributes(C, Index, AttributeList::get(C, Index, Kind));
}
-AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind, StringRef Value) const {
- llvm::AttrBuilder B;
+AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
+ StringRef Kind,
+ StringRef Value) const {
+ AttrBuilder B;
B.addAttribute(Kind, Value);
- return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+ return addAttributes(C, Index, AttributeList::get(C, Index, B));
}
-AttributeSet AttributeSet::addAttribute(LLVMContext &C,
- ArrayRef<unsigned> Indices,
- Attribute A) const {
+AttributeList AttributeList::addAttribute(LLVMContext &C,
+ ArrayRef<unsigned> Indices,
+ Attribute A) const {
+ assert(std::is_sorted(Indices.begin(), Indices.end()));
+
unsigned I = 0, E = pImpl ? pImpl->getNumSlots() : 0;
- auto IdxI = Indices.begin(), IdxE = Indices.end();
- SmallVector<AttributeSet, 4> AttrSet;
-
- while (I != E && IdxI != IdxE) {
- if (getSlotIndex(I) < *IdxI)
- AttrSet.emplace_back(getSlotAttributes(I++));
- else if (getSlotIndex(I) > *IdxI)
- AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A)));
- else {
- AttrBuilder B(getSlotAttributes(I), *IdxI);
- B.addAttribute(A);
- AttrSet.emplace_back(AttributeSet::get(C, *IdxI, B));
+ SmallVector<IndexAttrPair, 4> AttrVec;
+ for (unsigned Index : Indices) {
+ // Add all attribute slots before the current index.
+ for (; I < E && getSlotIndex(I) < Index; ++I)
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
+
+ // Add the attribute at this index. If we already have attributes at this
+ // index, merge them into a new set.
+ AttrBuilder B;
+ if (I < E && getSlotIndex(I) == Index) {
+ B.merge(AttrBuilder(pImpl->getSlotNode(I)));
++I;
- ++IdxI;
}
+ B.addAttribute(A);
+ AttrVec.emplace_back(Index, AttributeSet::get(C, B));
}
- while (I != E)
- AttrSet.emplace_back(getSlotAttributes(I++));
-
- while (IdxI != IdxE)
- AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A)));
+ // Add remaining attributes.
+ for (; I < E; ++I)
+ AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
- return get(C, AttrSet);
+ return get(C, AttrVec);
}
-AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index,
- AttributeSet Attrs) const {
+AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
+ AttributeList Attrs) const {
if (!pImpl) return Attrs;
if (!Attrs.pImpl) return *this;
+ return addAttributes(C, Index, Attrs.getAttributes(Index));
+}
+
+AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
+ AttributeSet AS) const {
+ if (!AS.hasAttributes())
+ return *this;
+
#ifndef NDEBUG
// FIXME it is not obvious how this should work for alignment. For now, say
// we can't change a known alignment.
unsigned OldAlign = getParamAlignment(Index);
- unsigned NewAlign = Attrs.getParamAlignment(Index);
+ unsigned NewAlign = AS.getAlignment();
assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
"Attempt to change alignment!");
#endif
- // Add the attribute slots before the one we're trying to add.
- SmallVector<AttributeSet, 4> AttrSet;
+ SmallVector<std::pair<unsigned, AttributeSet>, 4> AttrSet;
uint64_t NumAttrs = pImpl->getNumSlots();
- AttributeSet AS;
- uint64_t LastIndex = 0;
- for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
- if (getSlotIndex(I) >= Index) {
- if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
- break;
- }
- LastIndex = I + 1;
- AttrSet.push_back(getSlotAttributes(I));
- }
-
- // Now add the attribute into the correct slot. There may already be an
- // AttributeSet there.
- AttrBuilder B(AS, Index);
+ unsigned I;
- for (unsigned I = 0, E = Attrs.pImpl->getNumSlots(); I != E; ++I)
- if (Attrs.getSlotIndex(I) == Index) {
- for (AttributeSetImpl::iterator II = Attrs.pImpl->begin(I),
- IE = Attrs.pImpl->end(I); II != IE; ++II)
- B.addAttribute(*II);
+ // Add all the attribute slots before the one we need to merge.
+ for (I = 0; I < NumAttrs; ++I) {
+ if (getSlotIndex(I) >= Index)
break;
- }
+ AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
+ }
- AttrSet.push_back(AttributeSet::get(C, Index, B));
+ if (I < NumAttrs && getSlotIndex(I) == Index) {
+ // We need to merge two AttributeSets.
+ AttributeSet Merged = AttributeSet::get(
+ C, AttrBuilder(pImpl->getSlotNode(I)).merge(AttrBuilder(AS)));
+ AttrSet.emplace_back(Index, Merged);
+ ++I;
+ } else {
+ // Otherwise, there were no attributes at this position in the original
+ // list. Add the set as is.
+ AttrSet.emplace_back(Index, AS);
+ }
- // Add the remaining attribute slots.
- for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
- AttrSet.push_back(getSlotAttributes(I));
+ // Add the remaining entries.
+ for (; I < NumAttrs; ++I)
+ AttrSet.emplace_back(getSlotIndex(I), pImpl->getSlotNode(I));
return get(C, AttrSet);
}
-AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const {
+AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
+ const AttrBuilder &B) const {
+ return get(C, Index, AttributeSet::get(C, B));
+}
+
+AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const {
if (!hasAttribute(Index, Kind)) return *this;
- return removeAttributes(C, Index, AttributeSet::get(C, Index, Kind));
+ return removeAttributes(C, Index, AttributeList::get(C, Index, Kind));
}
-AttributeSet AttributeSet::removeAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind) const {
+AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
+ StringRef Kind) const {
if (!hasAttribute(Index, Kind)) return *this;
- return removeAttributes(C, Index, AttributeSet::get(C, Index, Kind));
+ return removeAttributes(C, Index, AttributeList::get(C, Index, Kind));
}
-AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
- AttributeSet Attrs) const {
- if (!pImpl) return AttributeSet();
+AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
+ AttributeList Attrs) const {
+ if (!pImpl)
+ return AttributeList();
if (!Attrs.pImpl) return *this;
// FIXME it is not obvious how this should work for alignment.
@@ -865,13 +1056,13 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
"Attempt to change alignment!");
// Add the attribute slots before the one we're trying to add.
- SmallVector<AttributeSet, 4> AttrSet;
+ SmallVector<AttributeList, 4> AttrSet;
uint64_t NumAttrs = pImpl->getNumSlots();
- AttributeSet AS;
+ AttributeList AL;
uint64_t LastIndex = 0;
for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
if (getSlotIndex(I) >= Index) {
- if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
+ if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++);
break;
}
LastIndex = I + 1;
@@ -879,8 +1070,8 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
}
// Now remove the attribute from the correct slot. There may already be an
- // AttributeSet there.
- AttrBuilder B(AS, Index);
+ // AttributeList there.
+ AttrBuilder B(AL, Index);
for (unsigned I = 0, E = Attrs.pImpl->getNumSlots(); I != E; ++I)
if (Attrs.getSlotIndex(I) == Index) {
@@ -888,7 +1079,7 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
break;
}
- AttrSet.push_back(AttributeSet::get(C, Index, B));
+ AttrSet.push_back(AttributeList::get(C, Index, B));
// Add the remaining attribute slots.
for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -897,22 +1088,23 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
return get(C, AttrSet);
}
-AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &Attrs) const {
- if (!pImpl) return AttributeSet();
+AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
+ const AttrBuilder &Attrs) const {
+ if (!pImpl)
+ return AttributeList();
// FIXME it is not obvious how this should work for alignment.
// For now, say we can't pass in alignment, which no current use does.
assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!");
// Add the attribute slots before the one we're trying to add.
- SmallVector<AttributeSet, 4> AttrSet;
+ SmallVector<AttributeList, 4> AttrSet;
uint64_t NumAttrs = pImpl->getNumSlots();
- AttributeSet AS;
+ AttributeList AL;
uint64_t LastIndex = 0;
for (unsigned I = 0, E = NumAttrs; I != E; ++I) {
if (getSlotIndex(I) >= Index) {
- if (getSlotIndex(I) == Index) AS = getSlotAttributes(LastIndex++);
+ if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++);
break;
}
LastIndex = I + 1;
@@ -920,11 +1112,11 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
}
// Now remove the attribute from the correct slot. There may already be an
- // AttributeSet there.
- AttrBuilder B(AS, Index);
+ // AttributeList there.
+ AttrBuilder B(AL, Index);
B.remove(Attrs);
- AttrSet.push_back(AttributeSet::get(C, Index, B));
+ AttrSet.push_back(AttributeList::get(C, Index, B));
// Add the remaining attribute slots.
for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I)
@@ -933,94 +1125,96 @@ AttributeSet AttributeSet::removeAttributes(LLVMContext &C, unsigned Index,
return get(C, AttrSet);
}
-AttributeSet AttributeSet::addDereferenceableAttr(LLVMContext &C, unsigned Index,
- uint64_t Bytes) const {
- llvm::AttrBuilder B;
+AttributeList AttributeList::removeAttributes(LLVMContext &C,
+ unsigned WithoutIndex) const {
+ if (!pImpl)
+ return AttributeList();
+
+ SmallVector<std::pair<unsigned, AttributeSet>, 4> AttrSet;
+ for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) {
+ unsigned Index = getSlotIndex(I);
+ if (Index != WithoutIndex)
+ AttrSet.push_back({Index, pImpl->getSlotNode(I)});
+ }
+ return get(C, AttrSet);
+}
+
+AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C,
+ unsigned Index,
+ uint64_t Bytes) const {
+ AttrBuilder B;
B.addDereferenceableAttr(Bytes);
- return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+ return addAttributes(C, Index, AttributeList::get(C, Index, B));
}
-AttributeSet AttributeSet::addDereferenceableOrNullAttr(LLVMContext &C,
- unsigned Index,
- uint64_t Bytes) const {
- llvm::AttrBuilder B;
+AttributeList
+AttributeList::addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
+ uint64_t Bytes) const {
+ AttrBuilder B;
B.addDereferenceableOrNullAttr(Bytes);
- return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+ return addAttributes(C, Index, AttributeList::get(C, Index, B));
}
-AttributeSet
-AttributeSet::addAllocSizeAttr(LLVMContext &C, unsigned Index,
- unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg) {
- llvm::AttrBuilder B;
+AttributeList
+AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index,
+ unsigned ElemSizeArg,
+ const Optional<unsigned> &NumElemsArg) {
+ AttrBuilder B;
B.addAllocSizeAttr(ElemSizeArg, NumElemsArg);
- return addAttributes(C, Index, AttributeSet::get(C, Index, B));
+ return addAttributes(C, Index, AttributeList::get(C, Index, B));
}
//===----------------------------------------------------------------------===//
-// AttributeSet Accessor Methods
+// AttributeList Accessor Methods
//===----------------------------------------------------------------------===//
-LLVMContext &AttributeSet::getContext() const {
- return pImpl->getContext();
-}
+LLVMContext &AttributeList::getContext() const { return pImpl->getContext(); }
-AttributeSet AttributeSet::getParamAttributes(unsigned Index) const {
- return pImpl && hasAttributes(Index) ?
- AttributeSet::get(pImpl->getContext(),
- ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
- std::make_pair(Index, getAttributes(Index)))) :
- AttributeSet();
+AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const {
+ return getAttributes(ArgNo + 1);
}
-AttributeSet AttributeSet::getRetAttributes() const {
- return pImpl && hasAttributes(ReturnIndex) ?
- AttributeSet::get(pImpl->getContext(),
- ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
- std::make_pair(ReturnIndex,
- getAttributes(ReturnIndex)))) :
- AttributeSet();
+AttributeSet AttributeList::getRetAttributes() const {
+ return getAttributes(ReturnIndex);
}
-AttributeSet AttributeSet::getFnAttributes() const {
- return pImpl && hasAttributes(FunctionIndex) ?
- AttributeSet::get(pImpl->getContext(),
- ArrayRef<std::pair<unsigned, AttributeSetNode*> >(
- std::make_pair(FunctionIndex,
- getAttributes(FunctionIndex)))) :
- AttributeSet();
+AttributeSet AttributeList::getFnAttributes() const {
+ return getAttributes(FunctionIndex);
}
-bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN && ASN->hasAttribute(Kind);
+bool AttributeList::hasAttribute(unsigned Index,
+ Attribute::AttrKind Kind) const {
+ return getAttributes(Index).hasAttribute(Kind);
}
-bool AttributeSet::hasAttribute(unsigned Index, StringRef Kind) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN && ASN->hasAttribute(Kind);
+bool AttributeList::hasAttribute(unsigned Index, StringRef Kind) const {
+ return getAttributes(Index).hasAttribute(Kind);
}
-bool AttributeSet::hasAttributes(unsigned Index) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN && ASN->hasAttributes();
+bool AttributeList::hasAttributes(unsigned Index) const {
+ return getAttributes(Index).hasAttributes();
}
-bool AttributeSet::hasFnAttribute(Attribute::AttrKind Kind) const {
+bool AttributeList::hasFnAttribute(Attribute::AttrKind Kind) const {
return pImpl && pImpl->hasFnAttribute(Kind);
}
-bool AttributeSet::hasFnAttribute(StringRef Kind) const {
- return hasAttribute(AttributeSet::FunctionIndex, Kind);
+bool AttributeList::hasFnAttribute(StringRef Kind) const {
+ return hasAttribute(AttributeList::FunctionIndex, Kind);
+}
+
+bool AttributeList::hasParamAttribute(unsigned ArgNo,
+ Attribute::AttrKind Kind) const {
+ return hasAttribute(ArgNo + 1, Kind);
}
-bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr,
- unsigned *Index) const {
+bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr,
+ unsigned *Index) const {
if (!pImpl) return false;
for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I)
- for (AttributeSetImpl::iterator II = pImpl->begin(I),
- IE = pImpl->end(I); II != IE; ++II)
+ for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I);
+ II != IE; ++II)
if (II->hasAttribute(Attr)) {
if (Index) *Index = pImpl->getSlotIndex(I);
return true;
@@ -1029,93 +1223,85 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr,
return false;
}
-Attribute AttributeSet::getAttribute(unsigned Index,
- Attribute::AttrKind Kind) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getAttribute(Kind) : Attribute();
+Attribute AttributeList::getAttribute(unsigned Index,
+ Attribute::AttrKind Kind) const {
+ return getAttributes(Index).getAttribute(Kind);
}
-Attribute AttributeSet::getAttribute(unsigned Index,
- StringRef Kind) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getAttribute(Kind) : Attribute();
+Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const {
+ return getAttributes(Index).getAttribute(Kind);
}
-unsigned AttributeSet::getParamAlignment(unsigned Index) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getAlignment() : 0;
+unsigned AttributeList::getParamAlignment(unsigned Index) const {
+ return getAttributes(Index).getAlignment();
}
-unsigned AttributeSet::getStackAlignment(unsigned Index) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getStackAlignment() : 0;
+unsigned AttributeList::getStackAlignment(unsigned Index) const {
+ return getAttributes(Index).getStackAlignment();
}
-uint64_t AttributeSet::getDereferenceableBytes(unsigned Index) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getDereferenceableBytes() : 0;
+uint64_t AttributeList::getDereferenceableBytes(unsigned Index) const {
+ return getAttributes(Index).getDereferenceableBytes();
}
-uint64_t AttributeSet::getDereferenceableOrNullBytes(unsigned Index) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getDereferenceableOrNullBytes() : 0;
+uint64_t AttributeList::getDereferenceableOrNullBytes(unsigned Index) const {
+ return getAttributes(Index).getDereferenceableOrNullBytes();
}
std::pair<unsigned, Optional<unsigned>>
-AttributeSet::getAllocSizeArgs(unsigned Index) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getAllocSizeArgs() : std::make_pair(0u, Optional<unsigned>(0u));
+AttributeList::getAllocSizeArgs(unsigned Index) const {
+ return getAttributes(Index).getAllocSizeArgs();
}
-std::string AttributeSet::getAsString(unsigned Index, bool InAttrGrp) const {
- AttributeSetNode *ASN = getAttributes(Index);
- return ASN ? ASN->getAsString(InAttrGrp) : std::string("");
+std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const {
+ return getAttributes(Index).getAsString(InAttrGrp);
}
-AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const {
- if (!pImpl) return nullptr;
+AttributeSet AttributeList::getAttributes(unsigned Index) const {
+ if (!pImpl) return AttributeSet();
// Loop through to find the attribute node we want.
for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I)
if (pImpl->getSlotIndex(I) == Index)
return pImpl->getSlotNode(I);
- return nullptr;
+ return AttributeSet();
}
-AttributeSet::iterator AttributeSet::begin(unsigned Slot) const {
+AttributeList::iterator AttributeList::begin(unsigned Slot) const {
if (!pImpl)
return ArrayRef<Attribute>().begin();
return pImpl->begin(Slot);
}
-AttributeSet::iterator AttributeSet::end(unsigned Slot) const {
+AttributeList::iterator AttributeList::end(unsigned Slot) const {
if (!pImpl)
return ArrayRef<Attribute>().end();
return pImpl->end(Slot);
}
//===----------------------------------------------------------------------===//
-// AttributeSet Introspection Methods
+// AttributeList Introspection Methods
//===----------------------------------------------------------------------===//
-unsigned AttributeSet::getNumSlots() const {
+unsigned AttributeList::getNumSlots() const {
return pImpl ? pImpl->getNumSlots() : 0;
}
-unsigned AttributeSet::getSlotIndex(unsigned Slot) const {
+unsigned AttributeList::getSlotIndex(unsigned Slot) const {
assert(pImpl && Slot < pImpl->getNumSlots() &&
"Slot # out of range!");
return pImpl->getSlotIndex(Slot);
}
-AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const {
+AttributeList AttributeList::getSlotAttributes(unsigned Slot) const {
assert(pImpl && Slot < pImpl->getNumSlots() &&
"Slot # out of range!");
return pImpl->getSlotAttributes(Slot);
}
-LLVM_DUMP_METHOD void AttributeSet::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void AttributeList::dump() const {
dbgs() << "PAL[\n";
for (unsigned i = 0, e = getNumSlots(); i < e; ++i) {
@@ -1130,28 +1316,34 @@ LLVM_DUMP_METHOD void AttributeSet::dump() const {
dbgs() << "]\n";
}
+#endif
//===----------------------------------------------------------------------===//
// AttrBuilder Method Implementations
//===----------------------------------------------------------------------===//
-AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
- : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
- DerefOrNullBytes(0), AllocSizeArgs(0) {
- AttributeSetImpl *pImpl = AS.pImpl;
+AttrBuilder::AttrBuilder(AttributeList AL, unsigned Index) {
+ AttributeListImpl *pImpl = AL.pImpl;
if (!pImpl) return;
for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) {
if (pImpl->getSlotIndex(I) != Index) continue;
- for (AttributeSetImpl::iterator II = pImpl->begin(I),
- IE = pImpl->end(I); II != IE; ++II)
+ for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I);
+ II != IE; ++II)
addAttribute(*II);
break;
}
}
+AttrBuilder::AttrBuilder(AttributeSet AS) {
+ if (AS.hasAttributes()) {
+ for (const Attribute &A : AS)
+ addAttribute(A);
+ }
+}
+
void AttrBuilder::clear() {
Attrs.reset();
TargetDepAttrs.clear();
@@ -1213,7 +1405,7 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
return *this;
}
-AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
+AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) {
unsigned Slot = ~0U;
for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
if (A.getSlotIndex(I) == Index) {
@@ -1221,9 +1413,10 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
break;
}
- assert(Slot != ~0U && "Couldn't find index in AttributeSet!");
+ assert(Slot != ~0U && "Couldn't find index in AttributeList!");
- for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
+ for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E;
+ ++I) {
Attribute Attr = *I;
if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
removeAttribute(Attr.getKindAsEnum());
@@ -1359,7 +1552,7 @@ bool AttrBuilder::overlaps(const AttrBuilder &B) const {
return true;
// Then check if any target dependent ones do.
- for (auto I : td_attrs())
+ for (const auto &I : td_attrs())
if (B.contains(I.first))
return true;
@@ -1374,7 +1567,7 @@ bool AttrBuilder::hasAttributes() const {
return !Attrs.none() || !TargetDepAttrs.empty();
}
-bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
+bool AttrBuilder::hasAttributes(AttributeList A, uint64_t Index) const {
unsigned Slot = ~0U;
for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I)
if (A.getSlotIndex(I) == Index) {
@@ -1384,7 +1577,8 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
assert(Slot != ~0U && "Couldn't find the index!");
- for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
+ for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E;
+ ++I) {
Attribute Attr = *I;
if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
if (Attrs[I->getKindAsEnum()])
@@ -1485,16 +1679,15 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
B.addAttribute(Attribute::StackProtect)
.addAttribute(Attribute::StackProtectStrong)
.addAttribute(Attribute::StackProtectReq);
- AttributeSet OldSSPAttr = AttributeSet::get(Caller.getContext(),
- AttributeSet::FunctionIndex,
- B);
+ AttributeList OldSSPAttr =
+ AttributeList::get(Caller.getContext(), AttributeList::FunctionIndex, B);
if (Callee.hasFnAttribute(Attribute::StackProtectReq)) {
- Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr);
Caller.addFnAttr(Attribute::StackProtectReq);
} else if (Callee.hasFnAttribute(Attribute::StackProtectStrong) &&
!Caller.hasFnAttribute(Attribute::StackProtectReq)) {
- Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+ Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr);
Caller.addFnAttr(Attribute::StackProtectStrong);
} else if (Callee.hasFnAttribute(Attribute::StackProtect) &&
!Caller.hasFnAttribute(Attribute::StackProtectReq) &&
@@ -1510,7 +1703,6 @@ bool AttributeFuncs::areInlineCompatible(const Function &Caller,
return hasCompatibleFnAttrs(Caller, Callee);
}
-
void AttributeFuncs::mergeAttributesForInlining(Function &Caller,
const Function &Callee) {
mergeFnAttrs(Caller, Callee);
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index e3a7bae02e0a..2897434a2b8d 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -33,10 +34,10 @@ using namespace llvm;
static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
-// Upgrade the declarations of the SSE4.1 functions whose arguments have
+// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
// changed their type from v4f32 to v2i64.
-static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
- Function *&NewFn) {
+static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
+ Function *&NewFn) {
// Check whether this is an old version of the function, which received
// v4f32 arguments.
Type *Arg0Type = F->getFunctionType()->getParamType(0);
@@ -65,6 +66,265 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
return true;
}
+static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
+ // All of the intrinsics matches below should be marked with which llvm
+ // version started autoupgrading them. At some point in the future we would
+ // like to use this information to remove upgrade code for some older
+ // intrinsics. It is currently undecided how we will determine that future
+ // point.
+ if (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
+ Name.startswith("sse2.pcmpgt.") || // Added in 3.1
+ Name.startswith("avx2.pcmpeq.") || // Added in 3.1
+ Name.startswith("avx2.pcmpgt.") || // Added in 3.1
+ Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
+ Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
+ Name == "sse.add.ss" || // Added in 4.0
+ Name == "sse2.add.sd" || // Added in 4.0
+ Name == "sse.sub.ss" || // Added in 4.0
+ Name == "sse2.sub.sd" || // Added in 4.0
+ Name == "sse.mul.ss" || // Added in 4.0
+ Name == "sse2.mul.sd" || // Added in 4.0
+ Name == "sse.div.ss" || // Added in 4.0
+ Name == "sse2.div.sd" || // Added in 4.0
+ Name == "sse41.pmaxsb" || // Added in 3.9
+ Name == "sse2.pmaxs.w" || // Added in 3.9
+ Name == "sse41.pmaxsd" || // Added in 3.9
+ Name == "sse2.pmaxu.b" || // Added in 3.9
+ Name == "sse41.pmaxuw" || // Added in 3.9
+ Name == "sse41.pmaxud" || // Added in 3.9
+ Name == "sse41.pminsb" || // Added in 3.9
+ Name == "sse2.pmins.w" || // Added in 3.9
+ Name == "sse41.pminsd" || // Added in 3.9
+ Name == "sse2.pminu.b" || // Added in 3.9
+ Name == "sse41.pminuw" || // Added in 3.9
+ Name == "sse41.pminud" || // Added in 3.9
+ Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
+ Name.startswith("avx2.pmax") || // Added in 3.9
+ Name.startswith("avx2.pmin") || // Added in 3.9
+ Name.startswith("avx512.mask.pmax") || // Added in 4.0
+ Name.startswith("avx512.mask.pmin") || // Added in 4.0
+ Name.startswith("avx2.vbroadcast") || // Added in 3.8
+ Name.startswith("avx2.pbroadcast") || // Added in 3.8
+ Name.startswith("avx.vpermil.") || // Added in 3.1
+ Name.startswith("sse2.pshuf") || // Added in 3.9
+ Name.startswith("avx512.pbroadcast") || // Added in 3.9
+ Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
+ Name.startswith("avx512.mask.movddup") || // Added in 3.9
+ Name.startswith("avx512.mask.movshdup") || // Added in 3.9
+ Name.startswith("avx512.mask.movsldup") || // Added in 3.9
+ Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
+ Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
+ Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
+ Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
+ Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
+ Name.startswith("avx512.mask.punpckl") || // Added in 3.9
+ Name.startswith("avx512.mask.punpckh") || // Added in 3.9
+ Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
+ Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
+ Name.startswith("avx512.mask.pand.") || // Added in 3.9
+ Name.startswith("avx512.mask.pandn.") || // Added in 3.9
+ Name.startswith("avx512.mask.por.") || // Added in 3.9
+ Name.startswith("avx512.mask.pxor.") || // Added in 3.9
+ Name.startswith("avx512.mask.and.") || // Added in 3.9
+ Name.startswith("avx512.mask.andn.") || // Added in 3.9
+ Name.startswith("avx512.mask.or.") || // Added in 3.9
+ Name.startswith("avx512.mask.xor.") || // Added in 3.9
+ Name.startswith("avx512.mask.padd.") || // Added in 4.0
+ Name.startswith("avx512.mask.psub.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmull.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
+ Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
+ Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
+ Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
+ Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
+ Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
+ Name == "avx512.mask.add.pd.128" || // Added in 4.0
+ Name == "avx512.mask.add.pd.256" || // Added in 4.0
+ Name == "avx512.mask.add.ps.128" || // Added in 4.0
+ Name == "avx512.mask.add.ps.256" || // Added in 4.0
+ Name == "avx512.mask.div.pd.128" || // Added in 4.0
+ Name == "avx512.mask.div.pd.256" || // Added in 4.0
+ Name == "avx512.mask.div.ps.128" || // Added in 4.0
+ Name == "avx512.mask.div.ps.256" || // Added in 4.0
+ Name == "avx512.mask.mul.pd.128" || // Added in 4.0
+ Name == "avx512.mask.mul.pd.256" || // Added in 4.0
+ Name == "avx512.mask.mul.ps.128" || // Added in 4.0
+ Name == "avx512.mask.mul.ps.256" || // Added in 4.0
+ Name == "avx512.mask.sub.pd.128" || // Added in 4.0
+ Name == "avx512.mask.sub.pd.256" || // Added in 4.0
+ Name == "avx512.mask.sub.ps.128" || // Added in 4.0
+ Name == "avx512.mask.sub.ps.256" || // Added in 4.0
+ Name == "avx512.mask.max.pd.128" || // Added in 5.0
+ Name == "avx512.mask.max.pd.256" || // Added in 5.0
+ Name == "avx512.mask.max.ps.128" || // Added in 5.0
+ Name == "avx512.mask.max.ps.256" || // Added in 5.0
+ Name == "avx512.mask.min.pd.128" || // Added in 5.0
+ Name == "avx512.mask.min.pd.256" || // Added in 5.0
+ Name == "avx512.mask.min.ps.128" || // Added in 5.0
+ Name == "avx512.mask.min.ps.256" || // Added in 5.0
+ Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psll.w") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psra.w") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
+ Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
+ Name.startswith("avx512.mask.pslli") || // Added in 4.0
+ Name.startswith("avx512.mask.psrai") || // Added in 4.0
+ Name.startswith("avx512.mask.psrli") || // Added in 4.0
+ Name.startswith("avx512.mask.psllv") || // Added in 4.0
+ Name.startswith("avx512.mask.psrav") || // Added in 4.0
+ Name.startswith("avx512.mask.psrlv") || // Added in 4.0
+ Name.startswith("sse41.pmovsx") || // Added in 3.8
+ Name.startswith("sse41.pmovzx") || // Added in 3.9
+ Name.startswith("avx2.pmovsx") || // Added in 3.9
+ Name.startswith("avx2.pmovzx") || // Added in 3.9
+ Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
+ Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
+ Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
+ Name == "sse2.cvtdq2pd" || // Added in 3.9
+ Name == "sse2.cvtps2pd" || // Added in 3.9
+ Name == "avx.cvtdq2.pd.256" || // Added in 3.9
+ Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
+ Name.startswith("avx.vinsertf128.") || // Added in 3.7
+ Name == "avx2.vinserti128" || // Added in 3.7
+ Name.startswith("avx512.mask.insert") || // Added in 4.0
+ Name.startswith("avx.vextractf128.") || // Added in 3.7
+ Name == "avx2.vextracti128" || // Added in 3.7
+ Name.startswith("avx512.mask.vextract") || // Added in 4.0
+ Name.startswith("sse4a.movnt.") || // Added in 3.9
+ Name.startswith("avx.movnt.") || // Added in 3.2
+ Name.startswith("avx512.storent.") || // Added in 3.9
+ Name == "sse41.movntdqa" || // Added in 5.0
+ Name == "avx2.movntdqa" || // Added in 5.0
+ Name == "avx512.movntdqa" || // Added in 5.0
+ Name == "sse2.storel.dq" || // Added in 3.9
+ Name.startswith("sse.storeu.") || // Added in 3.9
+ Name.startswith("sse2.storeu.") || // Added in 3.9
+ Name.startswith("avx.storeu.") || // Added in 3.9
+ Name.startswith("avx512.mask.storeu.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.p") || // Added in 3.9
+ Name.startswith("avx512.mask.store.b.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.w.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.d.") || // Added in 3.9
+ Name.startswith("avx512.mask.store.q.") || // Added in 3.9
+ Name.startswith("avx512.mask.loadu.") || // Added in 3.9
+ Name.startswith("avx512.mask.load.") || // Added in 3.9
+ Name == "sse42.crc32.64.8" || // Added in 3.4
+ Name.startswith("avx.vbroadcast.s") || // Added in 3.5
+ Name.startswith("avx512.mask.palignr.") || // Added in 3.9
+ Name.startswith("avx512.mask.valign.") || // Added in 4.0
+ Name.startswith("sse2.psll.dq") || // Added in 3.7
+ Name.startswith("sse2.psrl.dq") || // Added in 3.7
+ Name.startswith("avx2.psll.dq") || // Added in 3.7
+ Name.startswith("avx2.psrl.dq") || // Added in 3.7
+ Name.startswith("avx512.psll.dq") || // Added in 3.9
+ Name.startswith("avx512.psrl.dq") || // Added in 3.9
+ Name == "sse41.pblendw" || // Added in 3.7
+ Name.startswith("sse41.blendp") || // Added in 3.7
+ Name.startswith("avx.blend.p") || // Added in 3.7
+ Name == "avx2.pblendw" || // Added in 3.7
+ Name.startswith("avx2.pblendd.") || // Added in 3.7
+ Name.startswith("avx.vbroadcastf128") || // Added in 4.0
+ Name == "avx2.vbroadcasti128" || // Added in 3.7
+ Name == "xop.vpcmov" || // Added in 3.8
+ Name == "xop.vpcmov.256" || // Added in 5.0
+ Name.startswith("avx512.mask.move.s") || // Added in 4.0
+ Name.startswith("avx512.cvtmask2") || // Added in 5.0
+ (Name.startswith("xop.vpcom") && // Added in 3.2
+ F->arg_size() == 2))
+ return true;
+
+ return false;
+}
+
+static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
+ Function *&NewFn) {
+ // Only handle intrinsics that start with "x86.".
+ if (!Name.startswith("x86."))
+ return false;
+ // Remove "x86." prefix.
+ Name = Name.substr(4);
+
+ if (ShouldUpgradeX86Intrinsic(F, Name)) {
+ NewFn = nullptr;
+ return true;
+ }
+
+ // SSE4.1 ptest functions may have an old signature.
+ if (Name.startswith("sse41.ptest")) { // Added in 3.2
+ if (Name.substr(11) == "c")
+ return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
+ if (Name.substr(11) == "z")
+ return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
+ if (Name.substr(11) == "nzc")
+ return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
+ }
+ // Several blend and other instructions with masks used the wrong number of
+ // bits.
+ if (Name == "sse41.insertps") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
+ NewFn);
+ if (Name == "sse41.dppd") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
+ NewFn);
+ if (Name == "sse41.dpps") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
+ NewFn);
+ if (Name == "sse41.mpsadbw") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
+ NewFn);
+ if (Name == "avx.dp.ps.256") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
+ NewFn);
+ if (Name == "avx2.mpsadbw") // Added in 3.6
+ return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
+ NewFn);
+
+ // frcz.ss/sd may need to have an argument dropped. Added in 3.2
+ if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_xop_vfrcz_ss);
+ return true;
+ }
+ if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::x86_xop_vfrcz_sd);
+ return true;
+ }
+ // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
+ if (Name.startswith("xop.vpermil2")) { // Added in 3.9
+ auto Idx = F->getFunctionType()->getParamType(2);
+ if (Idx->isFPOrFPVectorTy()) {
+ rename(F);
+ unsigned IdxSize = Idx->getPrimitiveSizeInBits();
+ unsigned EltSize = Idx->getScalarSizeInBits();
+ Intrinsic::ID Permil2ID;
+ if (EltSize == 64 && IdxSize == 128)
+ Permil2ID = Intrinsic::x86_xop_vpermil2pd;
+ else if (EltSize == 32 && IdxSize == 128)
+ Permil2ID = Intrinsic::x86_xop_vpermil2ps;
+ else if (EltSize == 64 && IdxSize == 256)
+ Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
+ else
+ Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
+ return true;
+ }
+ }
+
+ return false;
+}
+
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
@@ -155,26 +415,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
}
- case 'i': {
- if (Name.startswith("invariant.start")) {
+ case 'i':
+ case 'l': {
+ bool IsLifetimeStart = Name.startswith("lifetime.start");
+ if (IsLifetimeStart || Name.startswith("invariant.start")) {
+ Intrinsic::ID ID = IsLifetimeStart ?
+ Intrinsic::lifetime_start : Intrinsic::invariant_start;
auto Args = F->getFunctionType()->params();
Type* ObjectPtr[1] = {Args[1]};
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) {
+ if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
rename(F);
- NewFn = Intrinsic::getDeclaration(
- F->getParent(), Intrinsic::invariant_start, ObjectPtr);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
- if (Name.startswith("invariant.end")) {
+
+ bool IsLifetimeEnd = Name.startswith("lifetime.end");
+ if (IsLifetimeEnd || Name.startswith("invariant.end")) {
+ Intrinsic::ID ID = IsLifetimeEnd ?
+ Intrinsic::lifetime_end : Intrinsic::invariant_end;
+
auto Args = F->getFunctionType()->params();
- Type* ObjectPtr[1] = {Args[2]};
- if (F->getName() !=
- Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) {
+ Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
+ if (F->getName() != Intrinsic::getName(ID, ObjectPtr)) {
rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::invariant_end, ObjectPtr);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
return true;
}
}
@@ -204,16 +469,48 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
}
+ case 'n': {
+ if (Name.startswith("nvvm.")) {
+ Name = Name.substr(5);
+
+ // The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
+ Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
+ .Cases("brev32", "brev64", Intrinsic::bitreverse)
+ .Case("clz.i", Intrinsic::ctlz)
+ .Case("popc.i", Intrinsic::ctpop)
+ .Default(Intrinsic::not_intrinsic);
+ if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
+ {F->getReturnType()});
+ return true;
+ }
+ // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
+ // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
+ //
+ // TODO: We could add lohi.i2d.
+ bool Expand = StringSwitch<bool>(Name)
+ .Cases("abs.i", "abs.ll", true)
+ .Cases("clz.ll", "popc.ll", "h2f", true)
+ .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
+ .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
+ .Default(false);
+ if (Expand) {
+ NewFn = nullptr;
+ return true;
+ }
+ }
+ }
case 'o':
// We only need to change the name to match the mangling including the
// address space.
- if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
+ if (Name.startswith("objectsize.")) {
Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
- if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
+ if (F->arg_size() == 2 ||
+ F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::objectsize, Tys);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
+ Tys);
return true;
}
}
@@ -226,236 +523,15 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
- case 'x': {
- bool IsX86 = Name.startswith("x86.");
- if (IsX86)
- Name = Name.substr(4);
-
- // All of the intrinsics matches below should be marked with which llvm
- // version started autoupgrading them. At some point in the future we would
- // like to use this information to remove upgrade code for some older
- // intrinsics. It is currently undecided how we will determine that future
- // point.
- if (IsX86 &&
- (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
- Name.startswith("sse2.pcmpgt.") || // Added in 3.1
- Name.startswith("avx2.pcmpeq.") || // Added in 3.1
- Name.startswith("avx2.pcmpgt.") || // Added in 3.1
- Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
- Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
- Name == "sse.add.ss" || // Added in 4.0
- Name == "sse2.add.sd" || // Added in 4.0
- Name == "sse.sub.ss" || // Added in 4.0
- Name == "sse2.sub.sd" || // Added in 4.0
- Name == "sse.mul.ss" || // Added in 4.0
- Name == "sse2.mul.sd" || // Added in 4.0
- Name == "sse.div.ss" || // Added in 4.0
- Name == "sse2.div.sd" || // Added in 4.0
- Name == "sse41.pmaxsb" || // Added in 3.9
- Name == "sse2.pmaxs.w" || // Added in 3.9
- Name == "sse41.pmaxsd" || // Added in 3.9
- Name == "sse2.pmaxu.b" || // Added in 3.9
- Name == "sse41.pmaxuw" || // Added in 3.9
- Name == "sse41.pmaxud" || // Added in 3.9
- Name == "sse41.pminsb" || // Added in 3.9
- Name == "sse2.pmins.w" || // Added in 3.9
- Name == "sse41.pminsd" || // Added in 3.9
- Name == "sse2.pminu.b" || // Added in 3.9
- Name == "sse41.pminuw" || // Added in 3.9
- Name == "sse41.pminud" || // Added in 3.9
- Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
- Name.startswith("avx2.pmax") || // Added in 3.9
- Name.startswith("avx2.pmin") || // Added in 3.9
- Name.startswith("avx512.mask.pmax") || // Added in 4.0
- Name.startswith("avx512.mask.pmin") || // Added in 4.0
- Name.startswith("avx2.vbroadcast") || // Added in 3.8
- Name.startswith("avx2.pbroadcast") || // Added in 3.8
- Name.startswith("avx.vpermil.") || // Added in 3.1
- Name.startswith("sse2.pshuf") || // Added in 3.9
- Name.startswith("avx512.pbroadcast") || // Added in 3.9
- Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
- Name.startswith("avx512.mask.movddup") || // Added in 3.9
- Name.startswith("avx512.mask.movshdup") || // Added in 3.9
- Name.startswith("avx512.mask.movsldup") || // Added in 3.9
- Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
- Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
- Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
- Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
- Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
- Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
- Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
- Name.startswith("avx512.mask.punpckl") || // Added in 3.9
- Name.startswith("avx512.mask.punpckh") || // Added in 3.9
- Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
- Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
- Name.startswith("avx512.mask.pand.") || // Added in 3.9
- Name.startswith("avx512.mask.pandn.") || // Added in 3.9
- Name.startswith("avx512.mask.por.") || // Added in 3.9
- Name.startswith("avx512.mask.pxor.") || // Added in 3.9
- Name.startswith("avx512.mask.and.") || // Added in 3.9
- Name.startswith("avx512.mask.andn.") || // Added in 3.9
- Name.startswith("avx512.mask.or.") || // Added in 3.9
- Name.startswith("avx512.mask.xor.") || // Added in 3.9
- Name.startswith("avx512.mask.padd.") || // Added in 4.0
- Name.startswith("avx512.mask.psub.") || // Added in 4.0
- Name.startswith("avx512.mask.pmull.") || // Added in 4.0
- Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
- Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
- Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
- Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
- Name == "avx512.mask.add.pd.128" || // Added in 4.0
- Name == "avx512.mask.add.pd.256" || // Added in 4.0
- Name == "avx512.mask.add.ps.128" || // Added in 4.0
- Name == "avx512.mask.add.ps.256" || // Added in 4.0
- Name == "avx512.mask.div.pd.128" || // Added in 4.0
- Name == "avx512.mask.div.pd.256" || // Added in 4.0
- Name == "avx512.mask.div.ps.128" || // Added in 4.0
- Name == "avx512.mask.div.ps.256" || // Added in 4.0
- Name == "avx512.mask.mul.pd.128" || // Added in 4.0
- Name == "avx512.mask.mul.pd.256" || // Added in 4.0
- Name == "avx512.mask.mul.ps.128" || // Added in 4.0
- Name == "avx512.mask.mul.ps.256" || // Added in 4.0
- Name == "avx512.mask.sub.pd.128" || // Added in 4.0
- Name == "avx512.mask.sub.pd.256" || // Added in 4.0
- Name == "avx512.mask.sub.ps.128" || // Added in 4.0
- Name == "avx512.mask.sub.ps.256" || // Added in 4.0
- Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
- Name.startswith("avx512.mask.psll.d") || // Added in 4.0
- Name.startswith("avx512.mask.psll.q") || // Added in 4.0
- Name.startswith("avx512.mask.psll.w") || // Added in 4.0
- Name.startswith("avx512.mask.psra.d") || // Added in 4.0
- Name.startswith("avx512.mask.psra.q") || // Added in 4.0
- Name.startswith("avx512.mask.psra.w") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
- Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
- Name.startswith("avx512.mask.pslli") || // Added in 4.0
- Name.startswith("avx512.mask.psrai") || // Added in 4.0
- Name.startswith("avx512.mask.psrli") || // Added in 4.0
- Name.startswith("avx512.mask.psllv") || // Added in 4.0
- Name.startswith("avx512.mask.psrav") || // Added in 4.0
- Name.startswith("avx512.mask.psrlv") || // Added in 4.0
- Name.startswith("sse41.pmovsx") || // Added in 3.8
- Name.startswith("sse41.pmovzx") || // Added in 3.9
- Name.startswith("avx2.pmovsx") || // Added in 3.9
- Name.startswith("avx2.pmovzx") || // Added in 3.9
- Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
- Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
- Name == "sse2.cvtdq2pd" || // Added in 3.9
- Name == "sse2.cvtps2pd" || // Added in 3.9
- Name == "avx.cvtdq2.pd.256" || // Added in 3.9
- Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
- Name.startswith("avx.vinsertf128.") || // Added in 3.7
- Name == "avx2.vinserti128" || // Added in 3.7
- Name.startswith("avx512.mask.insert") || // Added in 4.0
- Name.startswith("avx.vextractf128.") || // Added in 3.7
- Name == "avx2.vextracti128" || // Added in 3.7
- Name.startswith("avx512.mask.vextract") || // Added in 4.0
- Name.startswith("sse4a.movnt.") || // Added in 3.9
- Name.startswith("avx.movnt.") || // Added in 3.2
- Name.startswith("avx512.storent.") || // Added in 3.9
- Name == "sse2.storel.dq" || // Added in 3.9
- Name.startswith("sse.storeu.") || // Added in 3.9
- Name.startswith("sse2.storeu.") || // Added in 3.9
- Name.startswith("avx.storeu.") || // Added in 3.9
- Name.startswith("avx512.mask.storeu.") || // Added in 3.9
- Name.startswith("avx512.mask.store.p") || // Added in 3.9
- Name.startswith("avx512.mask.store.b.") || // Added in 3.9
- Name.startswith("avx512.mask.store.w.") || // Added in 3.9
- Name.startswith("avx512.mask.store.d.") || // Added in 3.9
- Name.startswith("avx512.mask.store.q.") || // Added in 3.9
- Name.startswith("avx512.mask.loadu.") || // Added in 3.9
- Name.startswith("avx512.mask.load.") || // Added in 3.9
- Name == "sse42.crc32.64.8" || // Added in 3.4
- Name.startswith("avx.vbroadcast.s") || // Added in 3.5
- Name.startswith("avx512.mask.palignr.") || // Added in 3.9
- Name.startswith("avx512.mask.valign.") || // Added in 4.0
- Name.startswith("sse2.psll.dq") || // Added in 3.7
- Name.startswith("sse2.psrl.dq") || // Added in 3.7
- Name.startswith("avx2.psll.dq") || // Added in 3.7
- Name.startswith("avx2.psrl.dq") || // Added in 3.7
- Name.startswith("avx512.psll.dq") || // Added in 3.9
- Name.startswith("avx512.psrl.dq") || // Added in 3.9
- Name == "sse41.pblendw" || // Added in 3.7
- Name.startswith("sse41.blendp") || // Added in 3.7
- Name.startswith("avx.blend.p") || // Added in 3.7
- Name == "avx2.pblendw" || // Added in 3.7
- Name.startswith("avx2.pblendd.") || // Added in 3.7
- Name.startswith("avx.vbroadcastf128") || // Added in 4.0
- Name == "avx2.vbroadcasti128" || // Added in 3.7
- Name == "xop.vpcmov" || // Added in 3.8
- Name.startswith("avx512.mask.move.s") || // Added in 4.0
- (Name.startswith("xop.vpcom") && // Added in 3.2
- F->arg_size() == 2))) {
- NewFn = nullptr;
- return true;
- }
- // SSE4.1 ptest functions may have an old signature.
- if (IsX86 && Name.startswith("sse41.ptest")) { // Added in 3.2
- if (Name.substr(11) == "c")
- return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
- if (Name.substr(11) == "z")
- return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
- if (Name.substr(11) == "nzc")
- return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
- }
- // Several blend and other instructions with masks used the wrong number of
- // bits.
- if (IsX86 && Name == "sse41.insertps") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
- NewFn);
- if (IsX86 && Name == "sse41.dppd") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
- NewFn);
- if (IsX86 && Name == "sse41.dpps") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
- NewFn);
- if (IsX86 && Name == "sse41.mpsadbw") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
- NewFn);
- if (IsX86 && Name == "avx.dp.ps.256") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
- NewFn);
- if (IsX86 && Name == "avx2.mpsadbw") // Added in 3.6
- return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
- NewFn);
-
- // frcz.ss/sd may need to have an argument dropped. Added in 3.2
- if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::x86_xop_vfrcz_ss);
- return true;
- }
- if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
- rename(F);
- NewFn = Intrinsic::getDeclaration(F->getParent(),
- Intrinsic::x86_xop_vfrcz_sd);
+ case 'x':
+ if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
- }
- // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
- if (IsX86 && Name.startswith("xop.vpermil2")) { // Added in 3.9
- auto Params = F->getFunctionType()->params();
- auto Idx = Params[2];
- if (Idx->getScalarType()->isFloatingPointTy()) {
- rename(F);
- unsigned IdxSize = Idx->getPrimitiveSizeInBits();
- unsigned EltSize = Idx->getScalarSizeInBits();
- Intrinsic::ID Permil2ID;
- if (EltSize == 64 && IdxSize == 128)
- Permil2ID = Intrinsic::x86_xop_vpermil2pd;
- else if (EltSize == 32 && IdxSize == 128)
- Permil2ID = Intrinsic::x86_xop_vpermil2ps;
- else if (EltSize == 64 && IdxSize == 256)
- Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
- else
- Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
- NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
- return true;
- }
- }
- break;
}
+ // Remangle our intrinsic since we upgrade the mangling
+ auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
+ if (Result != None) {
+ NewFn = Result.getValue();
+ return true;
}
// This may not belong here. This function is effectively being overloaded
@@ -733,6 +809,15 @@ static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
return Builder.CreateInsertElement(A, Select, (uint64_t)0);
}
+
+static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallInst &CI) {
+ Value* Op = CI.getArgOperand(0);
+ Type* ReturnOp = CI.getType();
+ unsigned NumElts = CI.getType()->getVectorNumElements();
+ Value *Mask = getX86MaskVec(Builder, Op, NumElts);
+ return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
+}
+
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -753,6 +838,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool IsX86 = Name.startswith("x86.");
if (IsX86)
Name = Name.substr(4);
+ bool IsNVVM = Name.startswith("nvvm.");
+ if (IsNVVM)
+ Name = Name.substr(5);
if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
@@ -838,18 +926,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
- if (IsX86 && (Name.startswith("avx512.mask.storeu."))) {
- UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), /*Aligned*/false);
-
- // Remove intrinsic.
- CI->eraseFromParent();
- return;
- }
-
- if (IsX86 && (Name.startswith("avx512.mask.store."))) {
+ if (IsX86 && (Name.startswith("avx512.mask.store"))) {
+ // "avx512.mask.storeu." or "avx512.mask.store."
+ bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
- CI->getArgOperand(2), /*Aligned*/true);
+ CI->getArgOperand(2), Aligned);
// Remove intrinsic.
CI->eraseFromParent();
@@ -858,15 +939,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Rep;
// Upgrade packed integer vector compare intrinsics to compare instructions.
- if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
- Name.startswith("avx2.pcmpeq."))) {
- Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
- "pcmpeq");
- Rep = Builder.CreateSExt(Rep, CI->getType(), "");
- } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
- Name.startswith("avx2.pcmpgt."))) {
- Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
- "pcmpgt");
+ if (IsX86 && (Name.startswith("sse2.pcmp") ||
+ Name.startswith("avx2.pcmp"))) {
+ // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
+ bool CmpEq = Name[9] == 'e';
+ Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
+ CI->getArgOperand(0), CI->getArgOperand(1));
Rep = Builder.CreateSExt(Rep, CI->getType(), "");
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
@@ -904,10 +982,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
Builder.CreateFDiv(Elt0, Elt1),
ConstantInt::get(I32Ty, 0));
- } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
- Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
- } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
- Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
+ } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
+ // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
+ bool CmpEq = Name[16] == 'e';
+ Rep = upgradeMaskedCompare(Builder, *CI,
+ CmpEq ? ICmpInst::ICMP_EQ
+ : ICmpInst::ICMP_SGT);
} else if (IsX86 && (Name == "sse41.pmaxsb" ||
Name == "sse2.pmaxs.w" ||
Name == "sse41.pmaxsd" ||
@@ -1019,15 +1099,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep =
Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
Builder.getInt8(Imm)});
- } else if (IsX86 && Name == "xop.vpcmov") {
- Value *Arg0 = CI->getArgOperand(0);
- Value *Arg1 = CI->getArgOperand(1);
+ } else if (IsX86 && Name.startswith("xop.vpcmov")) {
Value *Sel = CI->getArgOperand(2);
- unsigned NumElts = CI->getType()->getVectorNumElements();
- Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
- Value *NotSel = Builder.CreateXor(Sel, MinusOne);
- Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
- Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
+ Value *NotSel = Builder.CreateNot(Sel);
+ Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
+ Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
Rep = Builder.CreateOr(Sel0, Sel1);
} else if (IsX86 && Name == "sse42.crc32.64.8") {
Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
@@ -1461,6 +1537,43 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::ctlz,
+ CI->getType()),
+ { CI->getArgOperand(0), Builder.getInt1(false) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
+ } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
+ Name.startswith("avx512.mask.min.p"))) {
+ bool IsMin = Name[13] == 'i';
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
+ unsigned EltWidth = VecTy->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (!IsMin && VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_sse_max_ps;
+ else if (!IsMin && VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_sse2_max_pd;
+ else if (!IsMin && VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx_max_ps_256;
+ else if (!IsMin && VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx_max_pd_256;
+ else if (IsMin && VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_sse_min_ps;
+ else if (IsMin && VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_sse2_min_pd;
+ else if (IsMin && VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx_min_ps_256;
+ else if (IsMin && VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx_min_pd_256;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
VectorType *VecTy = cast<VectorType>(CI->getType());
Intrinsic::ID IID;
@@ -1501,6 +1614,42 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
+ bool IsUnsigned = Name[16] == 'u';
+ bool IsDW = Name[18] == 'd';
+ VectorType *VecTy = cast<VectorType>(CI->getType());
+ Intrinsic::ID IID;
+ if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse2_packsswb_128;
+ else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packsswb;
+ else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packsswb_512;
+ else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse2_packssdw_128;
+ else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packssdw;
+ else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packssdw_512;
+ else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse2_packuswb_128;
+ else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packuswb;
+ else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packuswb_512;
+ else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
+ IID = Intrinsic::x86_sse41_packusdw;
+ else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
+ IID = Intrinsic::x86_avx2_packusdw;
+ else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
+ IID = Intrinsic::x86_avx512_packusdw_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+ { CI->getArgOperand(0), CI->getArgOperand(1) });
+ Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+ CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.psll")) {
bool IsImmediate = Name[16] == 'i' ||
(Name.size() > 18 && Name[18] == 'i');
@@ -1705,6 +1854,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
} else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
Rep = upgradeMaskedMove(Builder, *CI);
+ } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
+ Rep = UpgradeMaskToInt(Builder, *CI);
} else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
Intrinsic::ID IID;
if (Name.endswith("ps.128"))
@@ -1727,6 +1878,64 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
{ CI->getArgOperand(0), CI->getArgOperand(1) });
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
+ } else if (IsX86 && Name.endswith(".movntdqa")) {
+ Module *M = F->getParent();
+ MDNode *Node = MDNode::get(
+ C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
+
+ Value *Ptr = CI->getArgOperand(0);
+ VectorType *VTy = cast<VectorType>(CI->getType());
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Value *BC =
+ Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
+ LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8);
+ LI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ Rep = LI;
+ } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
+ Value *Arg = CI->getArgOperand(0);
+ Value *Neg = Builder.CreateNeg(Arg, "neg");
+ Value *Cmp = Builder.CreateICmpSGE(
+ Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
+ Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
+ } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
+ Name == "max.ui" || Name == "max.ull")) {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
+ ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
+ : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
+ Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
+ } else if (IsNVVM && (Name == "min.i" || Name == "min.ll" ||
+ Name == "min.ui" || Name == "min.ull")) {
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
+ ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
+ : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
+ Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
+ } else if (IsNVVM && Name == "clz.ll") {
+ // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
+ Value *Arg = CI->getArgOperand(0);
+ Value *Ctlz = Builder.CreateCall(
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+ {Arg->getType()}),
+ {Arg, Builder.getFalse()}, "ctlz");
+ Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
+ } else if (IsNVVM && Name == "popc.ll") {
+ // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
+ // i64.
+ Value *Arg = CI->getArgOperand(0);
+ Value *Popc = Builder.CreateCall(
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
+ {Arg->getType()}),
+ Arg, "ctpop");
+ Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
+ } else if (IsNVVM && Name == "h2f") {
+ Rep = Builder.CreateCall(Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::convert_from_fp16,
+ {Builder.getFloatTy()}),
+ CI->getArgOperand(0), "h2f");
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
@@ -1737,13 +1946,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
return;
}
- std::string Name = CI->getName();
- if (!Name.empty())
- CI->setName(Name + ".old");
-
+ CallInst *NewCall = nullptr;
switch (NewFn->getIntrinsicID()) {
- default:
- llvm_unreachable("Unknown function for CallInst upgrade.");
+ default: {
+ // Handle generic mangling change, but nothing else
+ assert(
+ (CI->getCalledFunction()->getName() != NewFn->getName()) &&
+ "Unknown function for CallInst upgrade and isn't just a name change");
+ CI->setCalledFunction(NewFn);
+ return;
+ }
case Intrinsic::arm_neon_vld1:
case Intrinsic::arm_neon_vld2:
@@ -1761,43 +1973,43 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::arm_neon_vst4lane: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
}
case Intrinsic::bitreverse:
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
+ break;
case Intrinsic::ctlz:
case Intrinsic::cttz:
assert(CI->getNumArgOperands() == 1 &&
"Mismatch between function args and call args");
- CI->replaceAllUsesWith(Builder.CreateCall(
- NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
- CI->eraseFromParent();
- return;
-
- case Intrinsic::objectsize:
- CI->replaceAllUsesWith(Builder.CreateCall(
- NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
- CI->eraseFromParent();
- return;
+ NewCall =
+ Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
+ break;
- case Intrinsic::ctpop: {
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
- CI->eraseFromParent();
- return;
+ case Intrinsic::objectsize: {
+ Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
+ ? Builder.getFalse()
+ : CI->getArgOperand(2);
+ NewCall = Builder.CreateCall(
+ NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize});
+ break;
}
+ case Intrinsic::ctpop:
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
+ break;
+
+ case Intrinsic::convert_from_fp16:
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
+ break;
+
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
- CI->replaceAllUsesWith(
- Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
+ break;
case Intrinsic::x86_xop_vpermil2pd:
case Intrinsic::x86_xop_vpermil2ps:
@@ -1808,9 +2020,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args, Name));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
}
case Intrinsic::x86_sse41_ptestc:
@@ -1832,10 +2043,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
- CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
+ break;
}
case Intrinsic::x86_sse41_insertps:
@@ -1851,17 +2060,13 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Replace the last argument with a trunc.
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
-
- CallInst *NewCall = Builder.CreateCall(NewFn, Args);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
}
case Intrinsic::thread_pointer: {
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {}));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, {});
+ break;
}
case Intrinsic::invariant_start:
@@ -1870,11 +2075,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::masked_store: {
SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
CI->arg_operands().end());
- CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
- CI->eraseFromParent();
- return;
+ NewCall = Builder.CreateCall(NewFn, Args);
+ break;
+ }
}
+ assert(NewCall && "Should have either set this variable or returned through "
+ "the default case");
+ std::string Name = CI->getName();
+ if (!Name.empty()) {
+ CI->setName(Name + ".old");
+ NewCall->setName(Name);
}
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
}
void llvm::UpgradeCallsToIntrinsic(Function *F) {
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 19e784923658..90ca21ab91f8 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -117,28 +117,19 @@ const Module *BasicBlock::getModule() const {
return getParent()->getParent();
}
-Module *BasicBlock::getModule() {
- return getParent()->getParent();
-}
-
-TerminatorInst *BasicBlock::getTerminator() {
- if (InstList.empty()) return nullptr;
- return dyn_cast<TerminatorInst>(&InstList.back());
-}
-
const TerminatorInst *BasicBlock::getTerminator() const {
if (InstList.empty()) return nullptr;
return dyn_cast<TerminatorInst>(&InstList.back());
}
-CallInst *BasicBlock::getTerminatingMustTailCall() {
+const CallInst *BasicBlock::getTerminatingMustTailCall() const {
if (InstList.empty())
return nullptr;
- ReturnInst *RI = dyn_cast<ReturnInst>(&InstList.back());
+ const ReturnInst *RI = dyn_cast<ReturnInst>(&InstList.back());
if (!RI || RI == &InstList.front())
return nullptr;
- Instruction *Prev = RI->getPrevNode();
+ const Instruction *Prev = RI->getPrevNode();
if (!Prev)
return nullptr;
@@ -162,7 +153,7 @@ CallInst *BasicBlock::getTerminatingMustTailCall() {
return nullptr;
}
-CallInst *BasicBlock::getTerminatingDeoptimizeCall() {
+const CallInst *BasicBlock::getTerminatingDeoptimizeCall() const {
if (InstList.empty())
return nullptr;
auto *RI = dyn_cast<ReturnInst>(&InstList.back());
@@ -177,22 +168,22 @@ CallInst *BasicBlock::getTerminatingDeoptimizeCall() {
return nullptr;
}
-Instruction* BasicBlock::getFirstNonPHI() {
- for (Instruction &I : *this)
+const Instruction* BasicBlock::getFirstNonPHI() const {
+ for (const Instruction &I : *this)
if (!isa<PHINode>(I))
return &I;
return nullptr;
}
-Instruction* BasicBlock::getFirstNonPHIOrDbg() {
- for (Instruction &I : *this)
+const Instruction* BasicBlock::getFirstNonPHIOrDbg() const {
+ for (const Instruction &I : *this)
if (!isa<PHINode>(I) && !isa<DbgInfoIntrinsic>(I))
return &I;
return nullptr;
}
-Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
- for (Instruction &I : *this) {
+const Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() const {
+ for (const Instruction &I : *this) {
if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
continue;
@@ -206,12 +197,12 @@ Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
return nullptr;
}
-BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
- Instruction *FirstNonPHI = getFirstNonPHI();
+BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const {
+ const Instruction *FirstNonPHI = getFirstNonPHI();
if (!FirstNonPHI)
return end();
- iterator InsertPt = FirstNonPHI->getIterator();
+ const_iterator InsertPt = FirstNonPHI->getIterator();
if (InsertPt->isEHPad()) ++InsertPt;
return InsertPt;
}
@@ -223,10 +214,10 @@ void BasicBlock::dropAllReferences() {
/// If this basic block has a single predecessor block,
/// return the block, otherwise return a null pointer.
-BasicBlock *BasicBlock::getSinglePredecessor() {
- pred_iterator PI = pred_begin(this), E = pred_end(this);
+const BasicBlock *BasicBlock::getSinglePredecessor() const {
+ const_pred_iterator PI = pred_begin(this), E = pred_end(this);
if (PI == E) return nullptr; // No preds.
- BasicBlock *ThePred = *PI;
+ const BasicBlock *ThePred = *PI;
++PI;
return (PI == E) ? ThePred : nullptr /*multiple preds*/;
}
@@ -236,10 +227,10 @@ BasicBlock *BasicBlock::getSinglePredecessor() {
/// Note that unique predecessor doesn't mean single edge, there can be
/// multiple edges from the unique predecessor to this block (for example
/// a switch statement with multiple cases having the same destination).
-BasicBlock *BasicBlock::getUniquePredecessor() {
- pred_iterator PI = pred_begin(this), E = pred_end(this);
+const BasicBlock *BasicBlock::getUniquePredecessor() const {
+ const_pred_iterator PI = pred_begin(this), E = pred_end(this);
if (PI == E) return nullptr; // No preds.
- BasicBlock *PredBB = *PI;
+ const BasicBlock *PredBB = *PI;
++PI;
for (;PI != E; ++PI) {
if (*PI != PredBB)
@@ -250,18 +241,18 @@ BasicBlock *BasicBlock::getUniquePredecessor() {
return PredBB;
}
-BasicBlock *BasicBlock::getSingleSuccessor() {
- succ_iterator SI = succ_begin(this), E = succ_end(this);
+const BasicBlock *BasicBlock::getSingleSuccessor() const {
+ succ_const_iterator SI = succ_begin(this), E = succ_end(this);
if (SI == E) return nullptr; // no successors
- BasicBlock *TheSucc = *SI;
+ const BasicBlock *TheSucc = *SI;
++SI;
return (SI == E) ? TheSucc : nullptr /* multiple successors */;
}
-BasicBlock *BasicBlock::getUniqueSuccessor() {
- succ_iterator SI = succ_begin(this), E = succ_end(this);
+const BasicBlock *BasicBlock::getUniqueSuccessor() const {
+ succ_const_iterator SI = succ_begin(this), E = succ_end(this);
if (SI == E) return nullptr; // No successors
- BasicBlock *SuccBB = *SI;
+ const BasicBlock *SuccBB = *SI;
++SI;
for (;SI != E; ++SI) {
if (*SI != SuccBB)
@@ -438,9 +429,6 @@ bool BasicBlock::isLandingPad() const {
}
/// Return the landingpad instruction associated with the landing pad.
-LandingPadInst *BasicBlock::getLandingPadInst() {
- return dyn_cast<LandingPadInst>(getFirstNonPHI());
-}
const LandingPadInst *BasicBlock::getLandingPadInst() const {
return dyn_cast<LandingPadInst>(getFirstNonPHI());
}
diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp
index fc1b48d1c190..e27ecad0a884 100644
--- a/lib/IR/Comdat.cpp
+++ b/lib/IR/Comdat.cpp
@@ -1,4 +1,4 @@
-//===-- Comdat.cpp - Implement Metadata classes --------------------------===//
+//===- Comdat.cpp - Implement Metadata classes ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Comdat.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Comdat.h"
+
using namespace llvm;
Comdat::Comdat(Comdat &&C) : Name(C.Name), SK(C.SK) {}
-Comdat::Comdat() : Name(nullptr), SK(Comdat::Any) {}
+Comdat::Comdat() = default;
StringRef Comdat::getName() const { return Name->first(); }
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 098ff90a0a95..bba230677ebf 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -18,6 +18,7 @@
//===----------------------------------------------------------------------===//
#include "ConstantFold.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -606,17 +607,15 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
const APFloat &V = FPC->getValueAPF();
bool ignored;
- uint64_t x[2];
uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+ APSInt IntVal(DestBitWidth, opc == Instruction::FPToUI);
if (APFloat::opInvalidOp ==
- V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
- APFloat::rmTowardZero, &ignored)) {
+ V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored)) {
// Undefined behavior invoked - the destination type can't represent
// the input constant.
return UndefValue::get(DestTy);
}
- APInt Val(DestBitWidth, x);
- return ConstantInt::get(FPC->getContext(), Val);
+ return ConstantInt::get(FPC->getContext(), IntVal);
}
return nullptr; // Can't fold.
case Instruction::IntToPtr: //always treated as unsigned
@@ -1209,10 +1208,15 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
SmallVector<Constant*, 16> Result;
Type *Ty = IntegerType::get(VTy->getContext(), 32);
for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
- Constant *LHS =
- ConstantExpr::getExtractElement(C1, ConstantInt::get(Ty, i));
- Constant *RHS =
- ConstantExpr::getExtractElement(C2, ConstantInt::get(Ty, i));
+ Constant *ExtractIdx = ConstantInt::get(Ty, i);
+ Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
+ Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);
+
+ // If any element of a divisor vector is zero, the whole op is undef.
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
+ Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
+ RHS->isNullValue())
+ return UndefValue::get(VTy);
Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
}
@@ -2231,7 +2235,8 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
ConstantInt *Factor = ConstantInt::get(CI->getType(), NumElements);
NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
- Constant *PrevIdx = cast<Constant>(Idxs[i - 1]);
+ Constant *PrevIdx = NewIdxs[i-1] ? NewIdxs[i-1] :
+ cast<Constant>(Idxs[i - 1]);
Constant *Div = ConstantExpr::getSDiv(CI, Factor);
unsigned CommonExtendedWidth =
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index a85ad465317c..8dfd6c8036c4 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -40,10 +40,10 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
/// Initialize a range to hold the single specified value.
///
-ConstantRange::ConstantRange(APIntMoveTy V)
+ConstantRange::ConstantRange(APInt V)
: Lower(std::move(V)), Upper(Lower + 1) {}
-ConstantRange::ConstantRange(APIntMoveTy L, APIntMoveTy U)
+ConstantRange::ConstantRange(APInt L, APInt U)
: Lower(std::move(L)), Upper(std::move(U)) {
assert(Lower.getBitWidth() == Upper.getBitWidth() &&
"ConstantRange with unequal bit widths");
@@ -272,6 +272,22 @@ APInt ConstantRange::getSetSize() const {
return (Upper - Lower).zext(getBitWidth()+1);
}
+/// isSizeStrictlySmallerThanOf - Compare set size of this range with the range
+/// CR.
+/// This function is faster than comparing results of getSetSize for the two
+/// ranges, because we don't need to extend bitwidth of APInts we're operating
+/// with.
+///
+bool
+ConstantRange::isSizeStrictlySmallerThanOf(const ConstantRange &Other) const {
+ assert(getBitWidth() == Other.getBitWidth());
+ if (isFullSet())
+ return false;
+ if (Other.isFullSet())
+ return true;
+ return (Upper - Lower).ult(Other.Upper - Other.Lower);
+}
+
/// getUnsignedMax - Return the largest unsigned value contained in the
/// ConstantRange.
///
@@ -414,7 +430,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
if (CR.Upper.ule(Lower))
return ConstantRange(CR.Lower, Upper);
- if (getSetSize().ult(CR.getSetSize()))
+ if (isSizeStrictlySmallerThanOf(CR))
return *this;
return CR;
}
@@ -429,7 +445,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
if (CR.Upper.ult(Upper)) {
if (CR.Lower.ult(Upper)) {
- if (getSetSize().ult(CR.getSetSize()))
+ if (isSizeStrictlySmallerThanOf(CR))
return *this;
return CR;
}
@@ -445,7 +461,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
return ConstantRange(CR.Lower, Upper);
}
- if (getSetSize().ult(CR.getSetSize()))
+ if (isSizeStrictlySmallerThanOf(CR))
return *this;
return CR;
}
@@ -739,17 +755,16 @@ ConstantRange::add(const ConstantRange &Other) const {
if (isFullSet() || Other.isFullSet())
return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
APInt NewLower = getLower() + Other.getLower();
APInt NewUpper = getUpper() + Other.getUpper() - 1;
if (NewLower == NewUpper)
return ConstantRange(getBitWidth(), /*isFullSet=*/true);
ConstantRange X = ConstantRange(NewLower, NewUpper);
- if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ if (X.isSizeStrictlySmallerThanOf(*this) ||
+ X.isSizeStrictlySmallerThanOf(Other))
// We've wrapped, therefore, full set.
return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-
return X;
}
@@ -773,17 +788,16 @@ ConstantRange::sub(const ConstantRange &Other) const {
if (isFullSet() || Other.isFullSet())
return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
APInt NewLower = getLower() - Other.getUpper() + 1;
APInt NewUpper = getUpper() - Other.getLower();
if (NewLower == NewUpper)
return ConstantRange(getBitWidth(), /*isFullSet=*/true);
ConstantRange X = ConstantRange(NewLower, NewUpper);
- if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ if (X.isSizeStrictlySmallerThanOf(*this) ||
+ X.isSizeStrictlySmallerThanOf(Other))
// We've wrapped, therefore, full set.
return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-
return X;
}
@@ -837,7 +851,7 @@ ConstantRange::multiply(const ConstantRange &Other) const {
ConstantRange Result_sext(std::min(L, Compare), std::max(L, Compare) + 1);
ConstantRange SR = Result_sext.truncate(getBitWidth());
- return UR.getSetSize().ult(SR.getSetSize()) ? UR : SR;
+ return UR.isSizeStrictlySmallerThanOf(SR) ? UR : SR;
}
ConstantRange
@@ -996,11 +1010,13 @@ void ConstantRange::print(raw_ostream &OS) const {
OS << "[" << Lower << "," << Upper << ")";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dump - Allow printing from a debugger easily...
///
LLVM_DUMP_METHOD void ConstantRange::dump() const {
print(dbgs());
}
+#endif
ConstantRange llvm::getConstantRangeFromMetadata(const MDNode &Ranges) {
const unsigned NumRanges = Ranges.getNumOperands() / 2;
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 533b9245277f..c5f93c9f4db0 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1027,7 +1027,7 @@ Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
return getSequenceIfElementsMatch<ConstantDataVector>(C, V);
// Otherwise, the element type isn't compatible with ConstantDataVector, or
- // the operand list constants a ConstantExpr or something else strange.
+ // the operand list contains a ConstantExpr or something else strange.
return nullptr;
}
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 00bb476c0b3c..b5ed30b85c8a 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/IR/Attributes.h"
-#include "AttributeSetNode.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -259,7 +258,8 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
}
void LLVMDumpModule(LLVMModuleRef M) {
- unwrap(M)->dump();
+ unwrap(M)->print(errs(), nullptr,
+ /*ShouldPreserveUseListOrder=*/false, /*IsForDebug=*/true);
}
LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
@@ -358,9 +358,11 @@ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
return wrap(&unwrap(Ty)->getContext());
}
-void LLVMDumpType(LLVMTypeRef Ty) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LLVMDumpType(LLVMTypeRef Ty) {
return unwrap(Ty)->dump();
}
+#endif
char *LLVMPrintTypeToString(LLVMTypeRef Ty) {
std::string buf;
@@ -640,8 +642,8 @@ void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
unwrap(Val)->setName(Name);
}
-void LLVMDumpValue(LLVMValueRef Val) {
- unwrap(Val)->dump();
+LLVM_DUMP_METHOD void LLVMDumpValue(LLVMValueRef Val) {
+ unwrap(Val)->print(errs(), /*IsForDebug=*/true);
}
char* LLVMPrintValueToString(LLVMValueRef Val) {
@@ -1844,18 +1846,14 @@ void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
}
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) {
- auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
- if (!ASN)
- return 0;
- return ASN->getNumAttributes();
+ auto AS = unwrap<Function>(F)->getAttributes().getAttributes(Idx);
+ return AS.getNumAttributes();
}
void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs) {
- auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
- if (!ASN)
- return;
- for (auto A: make_range(ASN->begin(), ASN->end()))
+ auto AS = unwrap<Function>(F)->getAttributes().getAttributes(Idx);
+ for (auto A : AS)
*Attrs++ = wrap(A);
}
@@ -1885,12 +1883,12 @@ void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
const char *V) {
Function *Func = unwrap<Function>(Fn);
- AttributeSet::AttrIndex Idx =
- AttributeSet::AttrIndex(AttributeSet::FunctionIndex);
+ AttributeList::AttrIndex Idx =
+ AttributeList::AttrIndex(AttributeList::FunctionIndex);
AttrBuilder B;
B.addAttribute(A, V);
- AttributeSet Set = AttributeSet::get(Func->getContext(), Idx, B);
+ AttributeList Set = AttributeList::get(Func->getContext(), Idx, B);
Func->addAttributes(Idx, Set);
}
@@ -1910,10 +1908,8 @@ void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
}
LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
- Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
- while (index --> 0)
- AI++;
- return wrap(&*AI);
+ Function *Fn = unwrap<Function>(FnRef);
+ return wrap(&Fn->arg_begin()[index]);
}
LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
@@ -1938,25 +1934,24 @@ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
Argument *A = unwrap<Argument>(Arg);
- Function::arg_iterator I(A);
- if (++I == A->getParent()->arg_end())
+ Function *Fn = A->getParent();
+ if (A->getArgNo() + 1 >= Fn->arg_size())
return nullptr;
- return wrap(&*I);
+ return wrap(&Fn->arg_begin()[A->getArgNo() + 1]);
}
LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
Argument *A = unwrap<Argument>(Arg);
- Function::arg_iterator I(A);
- if (I == A->getParent()->arg_begin())
+ if (A->getArgNo() == 0)
return nullptr;
- return wrap(&*--I);
+ return wrap(&A->getParent()->arg_begin()[A->getArgNo() - 1]);
}
void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
Argument *A = unwrap<Argument>(Arg);
AttrBuilder B;
B.addAlignmentAttr(align);
- A->addAttr(AttributeSet::get(A->getContext(),A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
}
/*--.. Operations on basic blocks ..........................................--*/
@@ -2165,10 +2160,9 @@ void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
CallSite Call = CallSite(unwrap<Instruction>(Instr));
AttrBuilder B;
B.addAlignmentAttr(align);
- Call.setAttributes(Call.getAttributes()
- .addAttributes(Call->getContext(), index,
- AttributeSet::get(Call->getContext(),
- index, B)));
+ Call.setAttributes(Call.getAttributes().addAttributes(
+ Call->getContext(), index,
+ AttributeList::get(Call->getContext(), index, B)));
}
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
@@ -2179,19 +2173,15 @@ void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
LLVMAttributeIndex Idx) {
auto CS = CallSite(unwrap<Instruction>(C));
- auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
- if (!ASN)
- return 0;
- return ASN->getNumAttributes();
+ auto AS = CS.getAttributes().getAttributes(Idx);
+ return AS.getNumAttributes();
}
void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef *Attrs) {
auto CS = CallSite(unwrap<Instruction>(C));
- auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
- if (!ASN)
- return;
- for (auto A: make_range(ASN->begin(), ASN->end()))
+ auto AS = CS.getAttributes().getAttributes(Idx);
+ for (auto A : AS)
*Attrs++ = wrap(A);
}
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index d06161067f5f..9407c805b92a 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -126,7 +126,7 @@ DICompileUnit *DIBuilder::createCompileUnit(
unsigned Lang, DIFile *File, StringRef Producer, bool isOptimized,
StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
DICompileUnit::DebugEmissionKind Kind, uint64_t DWOId,
- bool SplitDebugInlining) {
+ bool SplitDebugInlining, bool DebugInfoForProfiling) {
assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) ||
(Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
@@ -136,7 +136,7 @@ DICompileUnit *DIBuilder::createCompileUnit(
CUNode = DICompileUnit::getDistinct(
VMContext, Lang, File, Producer, isOptimized, Flags, RunTimeVer,
SplitName, Kind, nullptr, nullptr, nullptr, nullptr, nullptr, DWOId,
- SplitDebugInlining);
+ SplitDebugInlining, DebugInfoForProfiling);
// Create a named metadata so that it is easier to find cu in a module.
NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
@@ -241,17 +241,20 @@ DIBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0,
- 0, 0, DINode::FlagZero);
+ 0, 0, None, DINode::FlagZero);
}
-DIDerivedType *DIBuilder::createPointerType(DIType *PointeeTy,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- StringRef Name) {
+DIDerivedType *DIBuilder::createPointerType(
+ DIType *PointeeTy,
+ uint64_t SizeInBits,
+ uint32_t AlignInBits,
+ Optional<unsigned> DWARFAddressSpace,
+ StringRef Name) {
// FIXME: Why is there a name here?
return DIDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
nullptr, 0, nullptr, PointeeTy, SizeInBits,
- AlignInBits, 0, DINode::FlagZero);
+ AlignInBits, 0, DWARFAddressSpace,
+ DINode::FlagZero);
}
DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
@@ -261,15 +264,18 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
DINode::DIFlags Flags) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "",
nullptr, 0, nullptr, PointeeTy, SizeInBits,
- AlignInBits, 0, Flags, Base);
+ AlignInBits, 0, None, Flags, Base);
}
-DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy,
- uint64_t SizeInBits,
- uint32_t AlignInBits) {
+DIDerivedType *DIBuilder::createReferenceType(
+ unsigned Tag, DIType *RTy,
+ uint64_t SizeInBits,
+ uint32_t AlignInBits,
+ Optional<unsigned> DWARFAddressSpace) {
assert(RTy && "Unable to create reference type");
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, RTy,
- SizeInBits, AlignInBits, 0, DINode::FlagZero);
+ SizeInBits, AlignInBits, 0, DWARFAddressSpace,
+ DINode::FlagZero);
}
DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
@@ -277,14 +283,14 @@ DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
DIScope *Context) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
LineNo, getNonCompileUnitScope(Context), Ty, 0, 0,
- 0, DINode::FlagZero);
+ 0, None, DINode::FlagZero);
}
DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) {
assert(Ty && "Invalid type!");
assert(FriendTy && "Invalid friend type!");
return DIDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0, Ty,
- FriendTy, 0, 0, 0, DINode::FlagZero);
+ FriendTy, 0, 0, 0, None, DINode::FlagZero);
}
DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
@@ -292,7 +298,7 @@ DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
DINode::DIFlags Flags) {
assert(Ty && "Unable to create inheritance");
return DIDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
- 0, Ty, BaseTy, 0, 0, BaseOffset, Flags);
+ 0, Ty, BaseTy, 0, 0, BaseOffset, None, Flags);
}
DIDerivedType *DIBuilder::createMemberType(DIScope *Scope, StringRef Name,
@@ -303,7 +309,7 @@ DIDerivedType *DIBuilder::createMemberType(DIScope *Scope, StringRef Name,
DINode::DIFlags Flags, DIType *Ty) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(Scope), Ty,
- SizeInBits, AlignInBits, OffsetInBits, Flags);
+ SizeInBits, AlignInBits, OffsetInBits, None, Flags);
}
static ConstantAsMetadata *getConstantOrNull(Constant *C) {
@@ -320,7 +326,7 @@ DIDerivedType *DIBuilder::createBitFieldMemberType(
return DIDerivedType::get(
VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
getNonCompileUnitScope(Scope), Ty, SizeInBits, /* AlignInBits */ 0,
- OffsetInBits, Flags,
+ OffsetInBits, None, Flags,
ConstantAsMetadata::get(ConstantInt::get(IntegerType::get(VMContext, 64),
StorageOffsetInBits)));
}
@@ -333,7 +339,8 @@ DIBuilder::createStaticMemberType(DIScope *Scope, StringRef Name, DIFile *File,
Flags |= DINode::FlagStaticMember;
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(Scope), Ty, 0,
- AlignInBits, 0, Flags, getConstantOrNull(Val));
+ AlignInBits, 0, None, Flags,
+ getConstantOrNull(Val));
}
DIDerivedType *
@@ -343,7 +350,7 @@ DIBuilder::createObjCIVar(StringRef Name, DIFile *File, unsigned LineNumber,
DIType *Ty, MDNode *PropertyNode) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(File), Ty,
- SizeInBits, AlignInBits, OffsetInBits, Flags,
+ SizeInBits, AlignInBits, OffsetInBits, None, Flags,
PropertyNode);
}
@@ -442,14 +449,6 @@ DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes,
return DISubroutineType::get(VMContext, Flags, CC, ParameterTypes);
}
-DICompositeType *DIBuilder::createExternalTypeRef(unsigned Tag, DIFile *File,
- StringRef UniqueIdentifier) {
- assert(!UniqueIdentifier.empty() && "external type ref without uid");
- return DICompositeType::get(VMContext, Tag, "", nullptr, 0, nullptr, nullptr,
- 0, 0, 0, DINode::FlagExternalTypeRef, nullptr, 0,
- nullptr, nullptr, UniqueIdentifier);
-}
-
DICompositeType *DIBuilder::createEnumerationType(
DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index d15a34c0b936..6f90ce598568 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -118,9 +118,6 @@ LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
&& TypeBitWidth == rhs.TypeBitWidth);
}
-const LayoutAlignElem
-DataLayout::InvalidAlignmentElem = { INVALID_ALIGN, 0, 0, 0 };
-
//===----------------------------------------------------------------------===//
// PointerAlignElem, PointerAlign support
//===----------------------------------------------------------------------===//
@@ -145,9 +142,6 @@ PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
&& TypeByteWidth == rhs.TypeByteWidth);
}
-const PointerAlignElem
-DataLayout::InvalidPointerElem = { 0U, 0U, 0U, ~0U };
-
//===----------------------------------------------------------------------===//
// DataLayout Class Implementation
//===----------------------------------------------------------------------===//
@@ -180,6 +174,7 @@ void DataLayout::reset(StringRef Desc) {
LayoutMap = nullptr;
BigEndian = false;
+ AllocaAddrSpace = 0;
StackNaturalAlign = 0;
ManglingMode = MM_None;
NonIntegralAddressSpaces.clear();
@@ -358,6 +353,12 @@ void DataLayout::parseSpecifier(StringRef Desc) {
StackNaturalAlign = inBytes(getInt(Tok));
break;
}
+ case 'A': { // Default stack/alloca address space.
+ AllocaAddrSpace = getInt(Tok);
+ if (!isUInt<24>(AllocaAddrSpace))
+ report_fatal_error("Invalid address space, must be a 24bit integer");
+ break;
+ }
case 'm':
if (!Tok.empty())
report_fatal_error("Unexpected trailing characters after mangling specifier in datalayout string");
@@ -400,6 +401,7 @@ void DataLayout::init(const Module *M) { *this = M->getDataLayout(); }
bool DataLayout::operator==(const DataLayout &Other) const {
bool Ret = BigEndian == Other.BigEndian &&
+ AllocaAddrSpace == Other.AllocaAddrSpace &&
StackNaturalAlign == Other.StackNaturalAlign &&
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths &&
@@ -408,6 +410,18 @@ bool DataLayout::operator==(const DataLayout &Other) const {
return Ret;
}
+DataLayout::AlignmentsTy::iterator
+DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType,
+ uint32_t BitWidth) {
+ auto Pair = std::make_pair((unsigned)AlignType, BitWidth);
+ return std::lower_bound(Alignments.begin(), Alignments.end(), Pair,
+ [](const LayoutAlignElem &LHS,
+ const std::pair<unsigned, uint32_t> &RHS) {
+ return std::tie(LHS.AlignType, LHS.TypeBitWidth) <
+ std::tie(RHS.first, RHS.second);
+ });
+}
+
void
DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
unsigned pref_align, uint32_t bit_width) {
@@ -426,18 +440,17 @@ DataLayout::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
report_fatal_error(
"Preferred alignment cannot be less than the ABI alignment");
- for (LayoutAlignElem &Elem : Alignments) {
- if (Elem.AlignType == (unsigned)align_type &&
- Elem.TypeBitWidth == bit_width) {
- // Update the abi, preferred alignments.
- Elem.ABIAlign = abi_align;
- Elem.PrefAlign = pref_align;
- return;
- }
+ AlignmentsTy::iterator I = findAlignmentLowerBound(align_type, bit_width);
+ if (I != Alignments.end() &&
+ I->AlignType == (unsigned)align_type && I->TypeBitWidth == bit_width) {
+ // Update the abi, preferred alignments.
+ I->ABIAlign = abi_align;
+ I->PrefAlign = pref_align;
+ } else {
+ // Insert before I to keep the vector sorted.
+ Alignments.insert(I, LayoutAlignElem::get(align_type, abi_align,
+ pref_align, bit_width));
}
-
- Alignments.push_back(LayoutAlignElem::get(align_type, abi_align,
- pref_align, bit_width));
}
DataLayout::PointersTy::iterator
@@ -471,45 +484,29 @@ void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
uint32_t BitWidth, bool ABIInfo,
Type *Ty) const {
- // Check to see if we have an exact match and remember the best match we see.
- int BestMatchIdx = -1;
- int LargestInt = -1;
- for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
- if (Alignments[i].AlignType == (unsigned)AlignType &&
- Alignments[i].TypeBitWidth == BitWidth)
- return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
-
- // The best match so far depends on what we're looking for.
- if (AlignType == INTEGER_ALIGN &&
- Alignments[i].AlignType == INTEGER_ALIGN) {
- // The "best match" for integers is the smallest size that is larger than
- // the BitWidth requested.
- if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
- Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
- BestMatchIdx = i;
- // However, if there isn't one that's larger, then we must use the
- // largest one we have (see below)
- if (LargestInt == -1 ||
- Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
- LargestInt = i;
+ AlignmentsTy::const_iterator I = findAlignmentLowerBound(AlignType, BitWidth);
+ // See if we found an exact match. Of if we are looking for an integer type,
+ // but don't have an exact match take the next largest integer. This is where
+ // the lower_bound will point to when it fails an exact match.
+ if (I != Alignments.end() && I->AlignType == (unsigned)AlignType &&
+ (I->TypeBitWidth == BitWidth || AlignType == INTEGER_ALIGN))
+ return ABIInfo ? I->ABIAlign : I->PrefAlign;
+
+ if (AlignType == INTEGER_ALIGN) {
+ // If we didn't have a larger value try the largest value we have.
+ if (I != Alignments.begin()) {
+ --I; // Go to the previous entry and see if its an integer.
+ if (I->AlignType == INTEGER_ALIGN)
+ return ABIInfo ? I->ABIAlign : I->PrefAlign;
}
- }
-
- // Okay, we didn't find an exact solution. Fall back here depending on what
- // is being looked for.
- if (BestMatchIdx == -1) {
- // If we didn't find an integer alignment, fall back on most conservative.
- if (AlignType == INTEGER_ALIGN) {
- BestMatchIdx = LargestInt;
- } else if (AlignType == VECTOR_ALIGN) {
- // By default, use natural alignment for vector types. This is consistent
- // with what clang and llvm-gcc do.
- unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
- Align *= cast<VectorType>(Ty)->getNumElements();
- Align = PowerOf2Ceil(Align);
- return Align;
- }
- }
+ } else if (AlignType == VECTOR_ALIGN) {
+ // By default, use natural alignment for vector types. This is consistent
+ // with what clang and llvm-gcc do.
+ unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ Align *= cast<VectorType>(Ty)->getNumElements();
+ Align = PowerOf2Ceil(Align);
+ return Align;
+ }
// If we still couldn't find a reasonable default alignment, fall back
// to a simple heuristic that the alignment is the first power of two
@@ -517,15 +514,9 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
// approximation of reality, and if the user wanted something less
// less conservative, they should have specified it explicitly in the data
// layout.
- if (BestMatchIdx == -1) {
- unsigned Align = getTypeStoreSize(Ty);
- Align = PowerOf2Ceil(Align);
- return Align;
- }
-
- // Since we got a "best match" index, just return it.
- return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
- : Alignments[BestMatchIdx].PrefAlign;
+ unsigned Align = getTypeStoreSize(Ty);
+ Align = PowerOf2Ceil(Align);
+ return Align;
}
namespace {
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 6b9bc689a446..c5d39c544304 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -79,9 +79,19 @@ void DebugInfoFinder::processModule(const Module &M) {
processScope(M->getScope());
}
}
- for (auto &F : M.functions())
+ for (auto &F : M.functions()) {
if (auto *SP = cast_or_null<DISubprogram>(F.getSubprogram()))
processSubprogram(SP);
+ // There could be subprograms from inlined functions referenced from
+ // instructions only. Walk the function to find them.
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB) {
+ if (!I.getDebugLoc())
+ continue;
+ processLocation(M, I.getDebugLoc().get());
+ }
+ }
+ }
}
void DebugInfoFinder::processLocation(const Module &M, const DILocation *Loc) {
@@ -239,6 +249,38 @@ bool DebugInfoFinder::addScope(DIScope *Scope) {
return true;
}
+static llvm::MDNode *stripDebugLocFromLoopID(llvm::MDNode *N) {
+ assert(N->op_begin() != N->op_end() && "Missing self reference?");
+
+ // if there is no debug location, we do not have to rewrite this MDNode.
+ if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) {
+ return isa<DILocation>(Op.get());
+ }))
+ return N;
+
+ // If there is only the debug location without any actual loop metadata, we
+ // can remove the metadata.
+ if (std::none_of(N->op_begin() + 1, N->op_end(), [](const MDOperand &Op) {
+ return !isa<DILocation>(Op.get());
+ }))
+ return nullptr;
+
+ SmallVector<Metadata *, 4> Args;
+ // Reserve operand 0 for loop id self reference.
+ auto TempNode = MDNode::getTemporary(N->getContext(), None);
+ Args.push_back(TempNode.get());
+ // Add all non-debug location operands back.
+ for (auto Op = N->op_begin() + 1; Op != N->op_end(); Op++) {
+ if (!isa<DILocation>(*Op))
+ Args.push_back(*Op);
+ }
+
+ // Set the first operand to itself.
+ MDNode *LoopID = MDNode::get(N->getContext(), Args);
+ LoopID->replaceOperandWith(0, LoopID);
+ return LoopID;
+}
+
bool llvm::stripDebugInfo(Function &F) {
bool Changed = false;
if (F.getSubprogram()) {
@@ -246,6 +288,7 @@ bool llvm::stripDebugInfo(Function &F) {
F.setSubprogram(nullptr);
}
+ llvm::DenseMap<llvm::MDNode*, llvm::MDNode*> LoopIDsMap;
for (BasicBlock &BB : F) {
for (auto II = BB.begin(), End = BB.end(); II != End;) {
Instruction &I = *II++; // We may delete the instruction, increment now.
@@ -259,6 +302,15 @@ bool llvm::stripDebugInfo(Function &F) {
I.setDebugLoc(DebugLoc());
}
}
+
+ auto *TermInst = BB.getTerminator();
+ if (auto *LoopID = TermInst->getMetadata(LLVMContext::MD_loop)) {
+ auto *NewLoopID = LoopIDsMap.lookup(LoopID);
+ if (!NewLoopID)
+ NewLoopID = LoopIDsMap[LoopID] = stripDebugLocFromLoopID(LoopID);
+ if (NewLoopID != LoopID)
+ TermInst->setMetadata(LLVMContext::MD_loop, NewLoopID);
+ }
}
return Changed;
}
@@ -410,7 +462,8 @@ private:
CU->isOptimized(), CU->getFlags(), CU->getRuntimeVersion(),
CU->getSplitDebugFilename(), DICompileUnit::LineTablesOnly, EnumTypes,
RetainedTypes, GlobalVariables, ImportedEntities, CU->getMacros(),
- CU->getDWOId(), CU->getSplitDebugInlining());
+ CU->getDWOId(), CU->getSplitDebugInlining(),
+ CU->getDebugInfoForProfiling());
}
DILocation *getReplacementMDLocation(DILocation *MLD) {
@@ -558,17 +611,26 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) {
}
for (auto &BB : F) {
for (auto &I : BB) {
- if (I.getDebugLoc() == DebugLoc())
- continue;
-
- // Make a replacement.
- auto &DL = I.getDebugLoc();
- auto *Scope = DL.getScope();
- MDNode *InlinedAt = DL.getInlinedAt();
- Scope = remap(Scope);
- InlinedAt = remap(InlinedAt);
- I.setDebugLoc(
- DebugLoc::get(DL.getLine(), DL.getCol(), Scope, InlinedAt));
+ auto remapDebugLoc = [&](DebugLoc DL) -> DebugLoc {
+ auto *Scope = DL.getScope();
+ MDNode *InlinedAt = DL.getInlinedAt();
+ Scope = remap(Scope);
+ InlinedAt = remap(InlinedAt);
+ return DebugLoc::get(DL.getLine(), DL.getCol(), Scope, InlinedAt);
+ };
+
+ if (I.getDebugLoc() != DebugLoc())
+ I.setDebugLoc(remapDebugLoc(I.getDebugLoc()));
+
+ // Remap DILocations in untyped MDNodes (e.g., llvm.loop).
+ SmallVector<std::pair<unsigned, MDNode *>, 2> MDs;
+ I.getAllMetadata(MDs);
+ for (auto Attachment : MDs)
+ if (auto *T = dyn_cast_or_null<MDTuple>(Attachment.second))
+ for (unsigned N = 0; N < T->getNumOperands(); ++N)
+ if (auto *Loc = dyn_cast_or_null<DILocation>(T->getOperand(N)))
+ if (Loc != DebugLoc())
+ T->replaceOperandWith(N, remapDebugLoc(Loc));
}
}
}
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 8e21a907e15e..d14c6018d409 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -245,16 +245,18 @@ DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
DIDerivedType *DIDerivedType::getImpl(
LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
- uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
- Metadata *ExtraData, StorageType Storage, bool ShouldCreate) {
+ uint32_t AlignInBits, uint64_t OffsetInBits,
+ Optional<unsigned> DWARFAddressSpace, DIFlags Flags, Metadata *ExtraData,
+ StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIDerivedType,
(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
- AlignInBits, OffsetInBits, Flags, ExtraData));
+ AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
+ ExtraData));
Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData};
DEFINE_GETIMPL_STORE(
- DIDerivedType, (Tag, Line, SizeInBits, AlignInBits, OffsetInBits, Flags),
- Ops);
+ DIDerivedType, (Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
+ DWARFAddressSpace, Flags), Ops);
}
DICompositeType *DICompositeType::getImpl(
@@ -383,8 +385,8 @@ DICompileUnit *DICompileUnit::getImpl(
unsigned RuntimeVersion, MDString *SplitDebugFilename,
unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros,
- uint64_t DWOId, bool SplitDebugInlining, StorageType Storage,
- bool ShouldCreate) {
+ uint64_t DWOId, bool SplitDebugInlining, bool DebugInfoForProfiling,
+ StorageType Storage, bool ShouldCreate) {
assert(Storage != Uniqued && "Cannot unique DICompileUnit");
assert(isCanonical(Producer) && "Expected canonical MDString");
assert(isCanonical(Flags) && "Expected canonical MDString");
@@ -397,7 +399,8 @@ DICompileUnit *DICompileUnit::getImpl(
return storeImpl(new (array_lengthof(Ops))
DICompileUnit(Context, Storage, SourceLanguage,
IsOptimized, RuntimeVersion, EmissionKind,
- DWOId, SplitDebugInlining, Ops),
+ DWOId, SplitDebugInlining,
+ DebugInfoForProfiling, Ops),
Storage);
}
@@ -611,10 +614,23 @@ bool DIExpression::isValid() const {
return false;
break;
}
+ case dwarf::DW_OP_swap: {
+ // Must be more than one implicit element on the stack.
+
+ // FIXME: A better way to implement this would be to add a local variable
+ // that keeps track of the stack depth and introduce something like a
+ // DW_LLVM_OP_implicit_location as a placeholder for the location this
+ // DIExpression is attached to, or else pass the number of implicit stack
+ // elements into isValid.
+ if (getNumElements() == 1)
+ return false;
+ break;
+ }
case dwarf::DW_OP_constu:
case dwarf::DW_OP_plus:
case dwarf::DW_OP_minus:
case dwarf::DW_OP_deref:
+ case dwarf::DW_OP_xderef:
break;
}
}
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index ffa7a6b40e2a..f31074a7ad44 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -66,8 +66,8 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col, const MDNode *Scope,
const_cast<MDNode *>(InlinedAt));
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void DebugLoc::dump() const {
-#ifndef NDEBUG
if (!Loc)
return;
@@ -79,8 +79,8 @@ LLVM_DUMP_METHOD void DebugLoc::dump() const {
InlinedAtDL.dump();
} else
dbgs() << "\n";
-#endif
}
+#endif
void DebugLoc::print(raw_ostream &OS) const {
if (!Loc)
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index ea71fde26e0e..395b6158e0c8 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -148,21 +148,31 @@ void DiagnosticInfoPGOProfile::print(DiagnosticPrinter &DP) const {
DP << getMsg();
}
-bool DiagnosticInfoWithDebugLocBase::isLocationAvailable() const {
- return getDebugLoc();
+DiagnosticLocation::DiagnosticLocation(const DebugLoc &DL) {
+ if (!DL)
+ return;
+ Filename = DL->getFilename();
+ Line = DL->getLine();
+ Column = DL->getColumn();
}
-void DiagnosticInfoWithDebugLocBase::getLocation(StringRef *Filename,
+DiagnosticLocation::DiagnosticLocation(const DISubprogram *SP) {
+ if (!SP)
+ return;
+ Filename = SP->getFilename();
+ Line = SP->getScopeLine();
+ Column = 0;
+}
+
+void DiagnosticInfoWithLocationBase::getLocation(StringRef *Filename,
unsigned *Line,
unsigned *Column) const {
- DILocation *L = getDebugLoc();
- assert(L != nullptr && "debug location is invalid");
- *Filename = L->getFilename();
- *Line = L->getLine();
- *Column = L->getColumn();
+ *Filename = Loc.getFilename();
+ *Line = Loc.getLine();
+ *Column = Loc.getColumn();
}
-const std::string DiagnosticInfoWithDebugLocBase::getLocationStr() const {
+const std::string DiagnosticInfoWithLocationBase::getLocationStr() const {
StringRef Filename("<unknown>");
unsigned Line = 0;
unsigned Column = 0;
@@ -171,14 +181,14 @@ const std::string DiagnosticInfoWithDebugLocBase::getLocationStr() const {
return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
}
-DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, Value *V)
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, const Value *V)
: Key(Key) {
if (auto *F = dyn_cast<Function>(V)) {
if (DISubprogram *SP = F->getSubprogram())
- DLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+ Loc = SP;
}
else if (auto *I = dyn_cast<Instruction>(V))
- DLoc = I->getDebugLoc();
+ Loc = I->getDebugLoc();
// Only include names that correspond to user variables. FIXME: we should use
// debug info if available to get the name of the user variable.
@@ -191,7 +201,7 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, Value *V)
Val = I->getOpcodeName();
}
-DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, Type *T)
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, const Type *T)
: Key(Key) {
raw_string_ostream OS(Val);
OS << *T;
@@ -211,73 +221,83 @@ void DiagnosticInfoOptimizationBase::print(DiagnosticPrinter &DP) const {
OptimizationRemark::OptimizationRemark(const char *PassName,
StringRef RemarkName,
- const DebugLoc &DLoc, Value *CodeRegion)
- : DiagnosticInfoOptimizationBase(
+ const DiagnosticLocation &Loc,
+ const Value *CodeRegion)
+ : DiagnosticInfoIROptimization(
DK_OptimizationRemark, DS_Remark, PassName, RemarkName,
- *cast<BasicBlock>(CodeRegion)->getParent(), DLoc, CodeRegion) {}
+ *cast<BasicBlock>(CodeRegion)->getParent(), Loc, CodeRegion) {}
+
+OptimizationRemark::OptimizationRemark(const char *PassName,
+ StringRef RemarkName,
+ const Instruction *Inst)
+ : DiagnosticInfoIROptimization(DK_OptimizationRemark, DS_Remark, PassName,
+ RemarkName, *Inst->getParent()->getParent(),
+ Inst->getDebugLoc(), Inst->getParent()) {}
+
+// Helper to allow for an assert before attempting to return an invalid
+// reference.
+static const BasicBlock &getFirstFunctionBlock(const Function *Func) {
+ assert(!Func->empty() && "Function does not have a body");
+ return Func->front();
+}
OptimizationRemark::OptimizationRemark(const char *PassName,
- StringRef RemarkName, Instruction *Inst)
- : DiagnosticInfoOptimizationBase(DK_OptimizationRemark, DS_Remark, PassName,
- RemarkName,
- *Inst->getParent()->getParent(),
- Inst->getDebugLoc(), Inst->getParent()) {}
+ StringRef RemarkName,
+ const Function *Func)
+ : DiagnosticInfoIROptimization(DK_OptimizationRemark, DS_Remark, PassName,
+ RemarkName, *Func, Func->getSubprogram(),
+ &getFirstFunctionBlock(Func)) {}
-bool OptimizationRemark::isEnabled() const {
+bool OptimizationRemark::isEnabled(StringRef PassName) {
return PassRemarksOptLoc.Pattern &&
- PassRemarksOptLoc.Pattern->match(getPassName());
+ PassRemarksOptLoc.Pattern->match(PassName);
}
-OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
- StringRef RemarkName,
- const DebugLoc &DLoc,
- Value *CodeRegion)
- : DiagnosticInfoOptimizationBase(
+OptimizationRemarkMissed::OptimizationRemarkMissed(
+ const char *PassName, StringRef RemarkName, const DiagnosticLocation &Loc,
+ const Value *CodeRegion)
+ : DiagnosticInfoIROptimization(
DK_OptimizationRemarkMissed, DS_Remark, PassName, RemarkName,
- *cast<BasicBlock>(CodeRegion)->getParent(), DLoc, CodeRegion) {}
+ *cast<BasicBlock>(CodeRegion)->getParent(), Loc, CodeRegion) {}
OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
StringRef RemarkName,
- Instruction *Inst)
- : DiagnosticInfoOptimizationBase(DK_OptimizationRemarkMissed, DS_Remark,
- PassName, RemarkName,
- *Inst->getParent()->getParent(),
- Inst->getDebugLoc(), Inst->getParent()) {}
+ const Instruction *Inst)
+ : DiagnosticInfoIROptimization(DK_OptimizationRemarkMissed, DS_Remark,
+ PassName, RemarkName,
+ *Inst->getParent()->getParent(),
+ Inst->getDebugLoc(), Inst->getParent()) {}
-bool OptimizationRemarkMissed::isEnabled() const {
+bool OptimizationRemarkMissed::isEnabled(StringRef PassName) {
return PassRemarksMissedOptLoc.Pattern &&
- PassRemarksMissedOptLoc.Pattern->match(getPassName());
+ PassRemarksMissedOptLoc.Pattern->match(PassName);
}
-OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(const char *PassName,
- StringRef RemarkName,
- const DebugLoc &DLoc,
- Value *CodeRegion)
- : DiagnosticInfoOptimizationBase(
+OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(
+ const char *PassName, StringRef RemarkName, const DiagnosticLocation &Loc,
+ const Value *CodeRegion)
+ : DiagnosticInfoIROptimization(
DK_OptimizationRemarkAnalysis, DS_Remark, PassName, RemarkName,
- *cast<BasicBlock>(CodeRegion)->getParent(), DLoc, CodeRegion) {}
+ *cast<BasicBlock>(CodeRegion)->getParent(), Loc, CodeRegion) {}
OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(const char *PassName,
StringRef RemarkName,
- Instruction *Inst)
- : DiagnosticInfoOptimizationBase(DK_OptimizationRemarkAnalysis, DS_Remark,
- PassName, RemarkName,
- *Inst->getParent()->getParent(),
- Inst->getDebugLoc(), Inst->getParent()) {}
-
-OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(enum DiagnosticKind Kind,
- const char *PassName,
- StringRef RemarkName,
- const DebugLoc &DLoc,
- Value *CodeRegion)
- : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, RemarkName,
- *cast<BasicBlock>(CodeRegion)->getParent(),
- DLoc, CodeRegion) {}
+ const Instruction *Inst)
+ : DiagnosticInfoIROptimization(DK_OptimizationRemarkAnalysis, DS_Remark,
+ PassName, RemarkName,
+ *Inst->getParent()->getParent(),
+ Inst->getDebugLoc(), Inst->getParent()) {}
-bool OptimizationRemarkAnalysis::isEnabled() const {
- return shouldAlwaysPrint() ||
- (PassRemarksAnalysisOptLoc.Pattern &&
- PassRemarksAnalysisOptLoc.Pattern->match(getPassName()));
+OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(
+ enum DiagnosticKind Kind, const char *PassName, StringRef RemarkName,
+ const DiagnosticLocation &Loc, const Value *CodeRegion)
+ : DiagnosticInfoIROptimization(Kind, DS_Remark, PassName, RemarkName,
+ *cast<BasicBlock>(CodeRegion)->getParent(),
+ Loc, CodeRegion) {}
+
+bool OptimizationRemarkAnalysis::isEnabled(StringRef PassName) {
+ return PassRemarksAnalysisOptLoc.Pattern &&
+ PassRemarksAnalysisOptLoc.Pattern->match(PassName);
}
void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const {
@@ -285,42 +305,48 @@ void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const {
}
void llvm::emitOptimizationRemark(LLVMContext &Ctx, const char *PassName,
- const Function &Fn, const DebugLoc &DLoc,
+ const Function &Fn,
+ const DiagnosticLocation &Loc,
const Twine &Msg) {
- Ctx.diagnose(OptimizationRemark(PassName, Fn, DLoc, Msg));
+ Ctx.diagnose(OptimizationRemark(PassName, Fn, Loc, Msg));
}
void llvm::emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName,
const Function &Fn,
- const DebugLoc &DLoc,
+ const DiagnosticLocation &Loc,
const Twine &Msg) {
- Ctx.diagnose(OptimizationRemarkMissed(PassName, Fn, DLoc, Msg));
+ Ctx.diagnose(OptimizationRemarkMissed(PassName, Fn, Loc, Msg));
}
void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx,
const char *PassName,
const Function &Fn,
- const DebugLoc &DLoc,
+ const DiagnosticLocation &Loc,
const Twine &Msg) {
- Ctx.diagnose(OptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg));
+ Ctx.diagnose(OptimizationRemarkAnalysis(PassName, Fn, Loc, Msg));
}
-void llvm::emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx,
- const char *PassName,
- const Function &Fn,
- const DebugLoc &DLoc,
- const Twine &Msg) {
- Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, Fn, DLoc, Msg));
+void llvm::emitOptimizationRemarkAnalysisFPCommute(
+ LLVMContext &Ctx, const char *PassName, const Function &Fn,
+ const DiagnosticLocation &Loc, const Twine &Msg) {
+ Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, Fn, Loc, Msg));
}
void llvm::emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx,
const char *PassName,
const Function &Fn,
- const DebugLoc &DLoc,
+ const DiagnosticLocation &Loc,
const Twine &Msg) {
- Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, Fn, DLoc, Msg));
+ Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, Fn, Loc, Msg));
}
+DiagnosticInfoOptimizationFailure::DiagnosticInfoOptimizationFailure(
+ const char *PassName, StringRef RemarkName, const DiagnosticLocation &Loc,
+ const Value *CodeRegion)
+ : DiagnosticInfoIROptimization(
+ DK_OptimizationFailure, DS_Warning, PassName, RemarkName,
+ *cast<BasicBlock>(CodeRegion)->getParent(), Loc, CodeRegion) {}
+
bool DiagnosticInfoOptimizationFailure::isEnabled() const {
// Only print warnings.
return getSeverity() == DS_Warning;
@@ -336,18 +362,6 @@ void DiagnosticInfoUnsupported::print(DiagnosticPrinter &DP) const {
DP << Str;
}
-void llvm::emitLoopVectorizeWarning(LLVMContext &Ctx, const Function &Fn,
- const DebugLoc &DLoc, const Twine &Msg) {
- Ctx.diagnose(DiagnosticInfoOptimizationFailure(
- Fn, DLoc, Twine("loop not vectorized: " + Msg)));
-}
-
-void llvm::emitLoopInterleaveWarning(LLVMContext &Ctx, const Function &Fn,
- const DebugLoc &DLoc, const Twine &Msg) {
- Ctx.diagnose(DiagnosticInfoOptimizationFailure(
- Fn, DLoc, Twine("loop not interleaved: " + Msg)));
-}
-
void DiagnosticInfoISelFallback::print(DiagnosticPrinter &DP) const {
DP << "Instruction selection used fallback path for " << getFunction();
}
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index 1880807da7eb..44948cc5831d 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -29,9 +29,9 @@ using namespace llvm;
// Always verify dominfo if expensive checking is enabled.
#ifdef EXPENSIVE_CHECKS
-static bool VerifyDomInfo = true;
+bool llvm::VerifyDomInfo = true;
#else
-static bool VerifyDomInfo = false;
+bool llvm::VerifyDomInfo = false;
#endif
static cl::opt<bool,true>
VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
@@ -73,6 +73,15 @@ template void llvm::Calculate<Function, Inverse<BasicBlock *>>(
GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT,
Function &F);
+bool DominatorTree::invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &) {
+ // Check whether the analysis, all analyses on functions, or the function's
+ // CFG have been preserved.
+ auto PAC = PA.getChecker<DominatorTreeAnalysis>();
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>() ||
+ PAC.preservedSet<CFGAnalyses>());
+}
+
// dominates - Return true if Def dominates a use in User. This performs
// the special checks necessary if Def and User are in the same basic block.
// Note that Def doesn't dominate a use in Def itself!
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 05419aa3d2bb..c4bb9e83acd7 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -30,7 +30,6 @@ using namespace llvm;
// Explicit instantiations of SymbolTableListTraits since some of the methods
// are not in the public header file...
-template class llvm::SymbolTableListTraits<Argument>;
template class llvm::SymbolTableListTraits<BasicBlock>;
//===----------------------------------------------------------------------===//
@@ -39,12 +38,8 @@ template class llvm::SymbolTableListTraits<BasicBlock>;
void Argument::anchor() { }
-Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
- : Value(Ty, Value::ArgumentVal) {
- Parent = nullptr;
-
- if (Par)
- Par->getArgumentList().push_back(this);
+Argument::Argument(Type *Ty, const Twine &Name, Function *Par, unsigned ArgNo)
+ : Value(Ty, Value::ArgumentVal), Parent(Par), ArgNo(ArgNo) {
setName(Name);
}
@@ -52,27 +47,9 @@ void Argument::setParent(Function *parent) {
Parent = parent;
}
-/// getArgNo - Return the index of this formal argument in its containing
-/// function. For example in "void foo(int a, float b)" a is 0 and b is 1.
-unsigned Argument::getArgNo() const {
- const Function *F = getParent();
- assert(F && "Argument is not in a function");
-
- Function::const_arg_iterator AI = F->arg_begin();
- unsigned ArgIdx = 0;
- for (; &*AI != this; ++AI)
- ++ArgIdx;
-
- return ArgIdx;
-}
-
-/// hasNonNullAttr - Return true if this argument has the nonnull attribute on
-/// it in its containing function. Also returns true if at least one byte is
-/// known to be dereferenceable and the pointer is in addrspace(0).
bool Argument::hasNonNullAttr() const {
if (!getType()->isPointerTy()) return false;
- if (getParent()->getAttributes().
- hasAttribute(getArgNo()+1, Attribute::NonNull))
+ if (getParent()->hasParamAttribute(getArgNo(), Attribute::NonNull))
return true;
else if (getDereferenceableBytes() > 0 &&
getType()->getPointerAddressSpace() == 0)
@@ -80,25 +57,19 @@ bool Argument::hasNonNullAttr() const {
return false;
}
-/// hasByValAttr - Return true if this argument has the byval attribute on it
-/// in its containing function.
bool Argument::hasByValAttr() const {
if (!getType()->isPointerTy()) return false;
return hasAttribute(Attribute::ByVal);
}
bool Argument::hasSwiftSelfAttr() const {
- return getParent()->getAttributes().
- hasAttribute(getArgNo()+1, Attribute::SwiftSelf);
+ return getParent()->hasParamAttribute(getArgNo(), Attribute::SwiftSelf);
}
bool Argument::hasSwiftErrorAttr() const {
- return getParent()->getAttributes().
- hasAttribute(getArgNo()+1, Attribute::SwiftError);
+ return getParent()->hasParamAttribute(getArgNo(), Attribute::SwiftError);
}
-/// \brief Return true if this argument has the inalloca attribute on it in
-/// its containing function.
bool Argument::hasInAllocaAttr() const {
if (!getType()->isPointerTy()) return false;
return hasAttribute(Attribute::InAlloca);
@@ -106,9 +77,9 @@ bool Argument::hasInAllocaAttr() const {
bool Argument::hasByValOrInAllocaAttr() const {
if (!getType()->isPointerTy()) return false;
- AttributeSet Attrs = getParent()->getAttributes();
- return Attrs.hasAttribute(getArgNo() + 1, Attribute::ByVal) ||
- Attrs.hasAttribute(getArgNo() + 1, Attribute::InAlloca);
+ AttributeList Attrs = getParent()->getAttributes();
+ return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
+ Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca);
}
unsigned Argument::getParamAlignment() const {
@@ -129,116 +100,74 @@ uint64_t Argument::getDereferenceableOrNullBytes() const {
return getParent()->getDereferenceableOrNullBytes(getArgNo()+1);
}
-/// hasNestAttr - Return true if this argument has the nest attribute on
-/// it in its containing function.
bool Argument::hasNestAttr() const {
if (!getType()->isPointerTy()) return false;
return hasAttribute(Attribute::Nest);
}
-/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
-/// it in its containing function.
bool Argument::hasNoAliasAttr() const {
if (!getType()->isPointerTy()) return false;
return hasAttribute(Attribute::NoAlias);
}
-/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
-/// on it in its containing function.
bool Argument::hasNoCaptureAttr() const {
if (!getType()->isPointerTy()) return false;
return hasAttribute(Attribute::NoCapture);
}
-/// hasSRetAttr - Return true if this argument has the sret attribute on
-/// it in its containing function.
bool Argument::hasStructRetAttr() const {
if (!getType()->isPointerTy()) return false;
return hasAttribute(Attribute::StructRet);
}
-/// hasReturnedAttr - Return true if this argument has the returned attribute on
-/// it in its containing function.
bool Argument::hasReturnedAttr() const {
return hasAttribute(Attribute::Returned);
}
-/// hasZExtAttr - Return true if this argument has the zext attribute on it in
-/// its containing function.
bool Argument::hasZExtAttr() const {
return hasAttribute(Attribute::ZExt);
}
-/// hasSExtAttr Return true if this argument has the sext attribute on it in its
-/// containing function.
bool Argument::hasSExtAttr() const {
return hasAttribute(Attribute::SExt);
}
-/// Return true if this argument has the readonly or readnone attribute on it
-/// in its containing function.
bool Argument::onlyReadsMemory() const {
- return getParent()->getAttributes().
- hasAttribute(getArgNo()+1, Attribute::ReadOnly) ||
- getParent()->getAttributes().
- hasAttribute(getArgNo()+1, Attribute::ReadNone);
+ AttributeList Attrs = getParent()->getAttributes();
+ return Attrs.hasParamAttribute(getArgNo(), Attribute::ReadOnly) ||
+ Attrs.hasParamAttribute(getArgNo(), Attribute::ReadNone);
}
-/// addAttr - Add attributes to an argument.
-void Argument::addAttr(AttributeSet AS) {
+void Argument::addAttr(AttributeList AS) {
assert(AS.getNumSlots() <= 1 &&
"Trying to add more than one attribute set to an argument!");
AttrBuilder B(AS, AS.getSlotIndex(0));
- getParent()->addAttributes(getArgNo() + 1,
- AttributeSet::get(Parent->getContext(),
- getArgNo() + 1, B));
+ getParent()->addAttributes(
+ getArgNo() + 1,
+ AttributeList::get(Parent->getContext(), getArgNo() + 1, B));
}
-/// removeAttr - Remove attributes from an argument.
-void Argument::removeAttr(AttributeSet AS) {
+void Argument::removeAttr(AttributeList AS) {
assert(AS.getNumSlots() <= 1 &&
"Trying to remove more than one attribute set from an argument!");
AttrBuilder B(AS, AS.getSlotIndex(0));
- getParent()->removeAttributes(getArgNo() + 1,
- AttributeSet::get(Parent->getContext(),
- getArgNo() + 1, B));
+ getParent()->removeAttributes(
+ getArgNo() + 1,
+ AttributeList::get(Parent->getContext(), getArgNo() + 1, B));
}
-/// hasAttribute - Checks if an argument has a given attribute.
bool Argument::hasAttribute(Attribute::AttrKind Kind) const {
- return getParent()->hasAttribute(getArgNo() + 1, Kind);
+ return getParent()->hasParamAttribute(getArgNo(), Kind);
}
//===----------------------------------------------------------------------===//
// Helper Methods in Function
//===----------------------------------------------------------------------===//
-bool Function::isMaterializable() const {
- return getGlobalObjectSubClassData() & (1 << IsMaterializableBit);
-}
-
-void Function::setIsMaterializable(bool V) {
- unsigned Mask = 1 << IsMaterializableBit;
- setGlobalObjectSubClassData((~Mask & getGlobalObjectSubClassData()) |
- (V ? Mask : 0u));
-}
-
LLVMContext &Function::getContext() const {
return getType()->getContext();
}
-FunctionType *Function::getFunctionType() const {
- return cast<FunctionType>(getValueType());
-}
-
-bool Function::isVarArg() const {
- return getFunctionType()->isVarArg();
-}
-
-Type *Function::getReturnType() const {
- return getFunctionType()->getReturnType();
-}
-
void Function::removeFromParent() {
getParent()->getFunctionList().remove(getIterator());
}
@@ -254,7 +183,8 @@ void Function::eraseFromParent() {
Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
Module *ParentModule)
: GlobalObject(Ty, Value::FunctionVal,
- OperandTraits<Function>::op_begin(this), 0, Linkage, name) {
+ OperandTraits<Function>::op_begin(this), 0, Linkage, name),
+ Arguments(nullptr), NumArgs(Ty->getNumParams()) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
"invalid return type");
setGlobalObjectSubClassData(0);
@@ -282,7 +212,8 @@ Function::~Function() {
dropAllReferences(); // After this it is safe to delete instructions.
// Delete all of the method arguments and unlink from symbol table...
- ArgumentList.clear();
+ if (Arguments)
+ clearArguments();
// Remove the function from the on-the-side GC table.
clearGC();
@@ -290,16 +221,33 @@ Function::~Function() {
void Function::BuildLazyArguments() const {
// Create the arguments vector, all arguments start out unnamed.
- FunctionType *FT = getFunctionType();
- for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
- assert(!FT->getParamType(i)->isVoidTy() &&
- "Cannot have void typed arguments!");
- ArgumentList.push_back(new Argument(FT->getParamType(i)));
+ auto *FT = getFunctionType();
+ if (NumArgs > 0) {
+ Arguments = std::allocator<Argument>().allocate(NumArgs);
+ for (unsigned i = 0, e = NumArgs; i != e; ++i) {
+ Type *ArgTy = FT->getParamType(i);
+ assert(!ArgTy->isVoidTy() && "Cannot have void typed arguments!");
+ new (Arguments + i) Argument(ArgTy, "", const_cast<Function *>(this), i);
+ }
}
// Clear the lazy arguments bit.
unsigned SDC = getSubclassDataFromValue();
const_cast<Function*>(this)->setValueSubclassData(SDC &= ~(1<<0));
+ assert(!hasLazyArguments());
+}
+
+static MutableArrayRef<Argument> makeArgArray(Argument *Args, size_t Count) {
+ return MutableArrayRef<Argument>(Args, Count);
+}
+
+void Function::clearArguments() {
+ for (Argument &A : makeArgArray(Arguments, NumArgs)) {
+ A.setName("");
+ A.~Argument();
+ }
+ std::allocator<Argument>().deallocate(Arguments, NumArgs);
+ Arguments = nullptr;
}
void Function::stealArgumentListFrom(Function &Src) {
@@ -307,10 +255,10 @@ void Function::stealArgumentListFrom(Function &Src) {
// Drop the current arguments, if any, and set the lazy argument bit.
if (!hasLazyArguments()) {
- assert(llvm::all_of(ArgumentList,
+ assert(llvm::all_of(makeArgArray(Arguments, NumArgs),
[](const Argument &A) { return A.use_empty(); }) &&
"Expected arguments to be unused in declaration");
- ArgumentList.clear();
+ clearArguments();
setValueSubclassData(getSubclassDataFromValue() | (1 << 0));
}
@@ -319,18 +267,26 @@ void Function::stealArgumentListFrom(Function &Src) {
return;
// Steal arguments from Src, and fix the lazy argument bits.
- ArgumentList.splice(ArgumentList.end(), Src.ArgumentList);
+ assert(arg_size() == Src.arg_size());
+ Arguments = Src.Arguments;
+ Src.Arguments = nullptr;
+ for (Argument &A : makeArgArray(Arguments, NumArgs)) {
+ // FIXME: This does the work of transferNodesFromList inefficiently.
+ SmallString<128> Name;
+ if (A.hasName())
+ Name = A.getName();
+ if (!Name.empty())
+ A.setName("");
+ A.setParent(this);
+ if (!Name.empty())
+ A.setName(Name);
+ }
+
setValueSubclassData(getSubclassDataFromValue() & ~(1 << 0));
+ assert(!hasLazyArguments());
Src.setValueSubclassData(Src.getSubclassDataFromValue() | (1 << 0));
}
-size_t Function::arg_size() const {
- return getFunctionType()->getNumParams();
-}
-bool Function::arg_empty() const {
- return getFunctionType()->getNumParams() == 0;
-}
-
// dropAllReferences() - This function causes all the subinstructions to "let
// go" of all references that they are maintaining. This allows one to
// 'delete' a whole class at a time, even though there may be circular
@@ -362,49 +318,49 @@ void Function::dropAllReferences() {
}
void Function::addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void Function::addAttribute(unsigned i, Attribute Attr) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, Attr);
setAttributes(PAL);
}
-void Function::addAttributes(unsigned i, AttributeSet Attrs) {
- AttributeSet PAL = getAttributes();
+void Function::addAttributes(unsigned i, AttributeList Attrs) {
+ AttributeList PAL = getAttributes();
PAL = PAL.addAttributes(getContext(), i, Attrs);
setAttributes(PAL);
}
void Function::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.removeAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void Function::removeAttribute(unsigned i, StringRef Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.removeAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
-void Function::removeAttributes(unsigned i, AttributeSet Attrs) {
- AttributeSet PAL = getAttributes();
+void Function::removeAttributes(unsigned i, AttributeList Attrs) {
+ AttributeList PAL = getAttributes();
PAL = PAL.removeAttributes(getContext(), i, Attrs);
setAttributes(PAL);
}
void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
setAttributes(PAL);
}
void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
setAttributes(PAL);
}
@@ -533,10 +489,18 @@ static std::string getMangledTypeStr(Type* Ty) {
} else if (ArrayType* ATyp = dyn_cast<ArrayType>(Ty)) {
Result += "a" + llvm::utostr(ATyp->getNumElements()) +
getMangledTypeStr(ATyp->getElementType());
- } else if (StructType* STyp = dyn_cast<StructType>(Ty)) {
- assert(!STyp->isLiteral() && "TODO: implement literal types");
- Result += STyp->getName();
- } else if (FunctionType* FT = dyn_cast<FunctionType>(Ty)) {
+ } else if (StructType *STyp = dyn_cast<StructType>(Ty)) {
+ if (!STyp->isLiteral()) {
+ Result += "s_";
+ Result += STyp->getName();
+ } else {
+ Result += "sl_";
+ for (auto Elem : STyp->elements())
+ Result += getMangledTypeStr(Elem);
+ }
+ // Ensure nested structs are distinguishable.
+ Result += "s";
+ } else if (FunctionType *FT = dyn_cast<FunctionType>(Ty)) {
Result += "f_" + getMangledTypeStr(FT->getReturnType());
for (size_t i = 0; i < FT->getNumParams(); i++)
Result += getMangledTypeStr(FT->getParamType(i));
@@ -1279,9 +1243,10 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
}
-void Function::setEntryCount(uint64_t Count) {
+void Function::setEntryCount(uint64_t Count,
+ const DenseSet<GlobalValue::GUID> *S) {
MDBuilder MDB(getContext());
- setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count));
+ setMetadata(LLVMContext::MD_prof, MDB.createFunctionEntryCount(Count, S));
}
Optional<uint64_t> Function::getEntryCount() const {
@@ -1298,6 +1263,18 @@ Optional<uint64_t> Function::getEntryCount() const {
return None;
}
+DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
+ DenseSet<GlobalValue::GUID> R;
+ if (MDNode *MD = getMetadata(LLVMContext::MD_prof))
+ if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0)))
+ if (MDS->getString().equals("function_entry_count"))
+ for (unsigned i = 2; i < MD->getNumOperands(); i++)
+ R.insert(mdconst::extract<ConstantInt>(MD->getOperand(i))
+ ->getValue()
+ .getZExtValue());
+ return R;
+}
+
void Function::setSectionPrefix(StringRef Prefix) {
MDBuilder MDB(getContext());
setMetadata(LLVMContext::MD_section_prefix,
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 3bbcf781e5dd..ba92a91cc917 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -103,11 +103,17 @@ bool GCOVFile::readGCDA(GCOVBuffer &Buffer) {
return true;
}
+void GCOVFile::print(raw_ostream &OS) const {
+ for (const auto &FPtr : Functions)
+ FPtr->print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dump - Dump GCOVFile content to dbgs() for debugging purposes.
LLVM_DUMP_METHOD void GCOVFile::dump() const {
- for (const auto &FPtr : Functions)
- FPtr->dump();
+ print(dbgs());
}
+#endif
/// collectLineCounts - Collect line counts. This must be used after
/// reading .gcno and .gcda files.
@@ -343,13 +349,19 @@ uint64_t GCOVFunction::getExitCount() const {
return Blocks.back()->getCount();
}
+void GCOVFunction::print(raw_ostream &OS) const {
+ OS << "===== " << Name << " (" << Ident << ") @ " << Filename << ":"
+ << LineNumber << "\n";
+ for (const auto &Block : Blocks)
+ Block->print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// dump - Dump GCOVFunction content to dbgs() for debugging purposes.
LLVM_DUMP_METHOD void GCOVFunction::dump() const {
- dbgs() << "===== " << Name << " (" << Ident << ") @ " << Filename << ":"
- << LineNumber << "\n";
- for (const auto &Block : Blocks)
- Block->dump();
+ print(dbgs());
}
+#endif
/// collectLineCounts - Collect line counts. This must be used after
/// reading .gcno and .gcda files.
@@ -400,29 +412,35 @@ void GCOVBlock::collectLineCounts(FileInfo &FI) {
FI.addBlockLine(Parent.getFilename(), N, this);
}
-/// dump - Dump GCOVBlock content to dbgs() for debugging purposes.
-LLVM_DUMP_METHOD void GCOVBlock::dump() const {
- dbgs() << "Block : " << Number << " Counter : " << Counter << "\n";
+void GCOVBlock::print(raw_ostream &OS) const {
+ OS << "Block : " << Number << " Counter : " << Counter << "\n";
if (!SrcEdges.empty()) {
- dbgs() << "\tSource Edges : ";
+ OS << "\tSource Edges : ";
for (const GCOVEdge *Edge : SrcEdges)
- dbgs() << Edge->Src.Number << " (" << Edge->Count << "), ";
- dbgs() << "\n";
+ OS << Edge->Src.Number << " (" << Edge->Count << "), ";
+ OS << "\n";
}
if (!DstEdges.empty()) {
- dbgs() << "\tDestination Edges : ";
+ OS << "\tDestination Edges : ";
for (const GCOVEdge *Edge : DstEdges)
- dbgs() << Edge->Dst.Number << " (" << Edge->Count << "), ";
- dbgs() << "\n";
+ OS << Edge->Dst.Number << " (" << Edge->Count << "), ";
+ OS << "\n";
}
if (!Lines.empty()) {
- dbgs() << "\tLines : ";
+ OS << "\tLines : ";
for (uint32_t N : Lines)
- dbgs() << (N) << ",";
- dbgs() << "\n";
+ OS << (N) << ",";
+ OS << "\n";
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+/// dump - Dump GCOVBlock content to dbgs() for debugging purposes.
+LLVM_DUMP_METHOD void GCOVBlock::dump() const {
+ print(dbgs());
+}
+#endif
+
//===----------------------------------------------------------------------===//
// FileInfo implementation.
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 6f7356524d38..5f338f58d940 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -93,18 +93,6 @@ void GlobalObject::setAlignment(unsigned Align) {
assert(getAlignment() == Align && "Alignment representation error!");
}
-unsigned GlobalObject::getGlobalObjectSubClassData() const {
- unsigned ValueData = getGlobalValueSubClassData();
- return ValueData >> GlobalObjectBits;
-}
-
-void GlobalObject::setGlobalObjectSubClassData(unsigned Val) {
- unsigned OldData = getGlobalValueSubClassData();
- setGlobalValueSubClassData((OldData & GlobalObjectMask) |
- (Val << GlobalObjectBits));
- assert(getGlobalObjectSubClassData() == Val && "representation error");
-}
-
void GlobalObject::copyAttributesFrom(const GlobalValue *Src) {
GlobalValue::copyAttributesFrom(Src);
if (const auto *GV = dyn_cast<GlobalObject>(Src)) {
@@ -152,7 +140,7 @@ StringRef GlobalValue::getSection() const {
return cast<GlobalObject>(this)->getSection();
}
-Comdat *GlobalValue::getComdat() {
+const Comdat *GlobalValue::getComdat() const {
if (auto *GA = dyn_cast<GlobalAlias>(this)) {
// In general we cannot compute this at the IR level, but we try.
if (const GlobalObject *GO = GA->getBaseObject())
@@ -177,7 +165,9 @@ void GlobalObject::setSection(StringRef S) {
// Get or create a stable section name string and put it in the table in the
// context.
- S = getContext().pImpl->SectionStrings.insert(S).first->first();
+ if (!S.empty()) {
+ S = getContext().pImpl->SectionStrings.insert(S).first->first();
+ }
getContext().pImpl->GlobalObjectSections[this] = S;
// Update the HasSectionHashEntryBit. Setting the section to the empty string
@@ -240,7 +230,7 @@ bool GlobalValue::canIncreaseAlignment() const {
return true;
}
-GlobalObject *GlobalValue::getBaseObject() {
+const GlobalObject *GlobalValue::getBaseObject() const {
if (auto *GO = dyn_cast<GlobalObject>(this))
return GO;
if (auto *GA = dyn_cast<GlobalAlias>(this))
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index d3e410d6d033..fd5ae71a2f3c 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -172,7 +172,8 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
"lifetime.start requires the size to be an i64");
Value *Ops[] = { Size, Ptr };
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start,
+ { Ptr->getType() });
return createCallHelper(TheFn, Ops, this);
}
@@ -187,7 +188,8 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
"lifetime.end requires the size to be an i64");
Value *Ops[] = { Size, Ptr };
Module *M = BB->getParent()->getParent();
- Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
+ Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end,
+ { Ptr->getType() });
return createCallHelper(TheFn, Ops, this);
}
@@ -482,3 +484,11 @@ CallInst *IRBuilderBase::CreateGCRelocate(Instruction *Statepoint,
getInt32(DerivedOffset)};
return createCallHelper(FnGCRelocate, Args, this, Name);
}
+
+CallInst *IRBuilderBase::CreateBinaryIntrinsic(Intrinsic::ID ID,
+ Value *LHS, Value *RHS,
+ const Twine &Name) {
+ Module *M = BB->getParent()->getParent();
+ Function *Fn = Intrinsic::getDeclaration(M, ID, { LHS->getType() });
+ return createCallHelper(Fn, { LHS, RHS }, this, Name);
+}
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 05e206cfd6cb..955fdc749b2b 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -70,6 +70,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
+
+ StringRef getPassName() const override { return "Print Module IR"; }
};
class PrintFunctionPassWrapper : public FunctionPass {
@@ -91,6 +93,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
+
+ StringRef getPassName() const override { return "Print Function IR"; }
};
class PrintBasicBlockPass : public BasicBlockPass {
@@ -111,6 +115,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
+
+ StringRef getPassName() const override { return "Print BasicBlock IR"; }
};
}
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 5a9118571040..8feeeb65d445 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -1,4 +1,4 @@
-//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
+//===- InlineAsm.cpp - Implement the InlineAsm class ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,27 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/InlineAsm.h"
#include "ConstantsContext.h"
#include "LLVMContextImpl.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include <algorithm>
+#include <cassert>
#include <cctype>
-using namespace llvm;
-
-// Implement the first virtual method in this class in this file so the
-// InlineAsm vtable is emitted here.
-InlineAsm::~InlineAsm() {
-}
+#include <cstddef>
+#include <cstdlib>
-InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString,
- StringRef Constraints, bool hasSideEffects,
- bool isAlignStack, AsmDialect asmDialect) {
- InlineAsmKeyType Key(AsmString, Constraints, FTy, hasSideEffects,
- isAlignStack, asmDialect);
- LLVMContextImpl *pImpl = FTy->getContext().pImpl;
- return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(FTy), Key);
-}
+using namespace llvm;
InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString,
const std::string &constraints, bool hasSideEffects,
@@ -40,12 +35,24 @@ InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString,
AsmString(asmString), Constraints(constraints), FTy(FTy),
HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
Dialect(asmDialect) {
-
// Do various checks on the constraint string and type.
assert(Verify(getFunctionType(), constraints) &&
"Function type not legal for constraints!");
}
+// Implement the first virtual method in this class in this file so the
+// InlineAsm vtable is emitted here.
+InlineAsm::~InlineAsm() = default;
+
+InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString,
+ StringRef Constraints, bool hasSideEffects,
+ bool isAlignStack, AsmDialect asmDialect) {
+ InlineAsmKeyType Key(AsmString, Constraints, FTy, hasSideEffects,
+ isAlignStack, asmDialect);
+ LLVMContextImpl *pImpl = FTy->getContext().pImpl;
+ return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(FTy), Key);
+}
+
void InlineAsm::destroyConstant() {
getType()->getContext().pImpl->InlineAsms.remove(this);
delete this;
@@ -55,14 +62,6 @@ FunctionType *InlineAsm::getFunctionType() const {
return FTy;
}
-///Default constructor.
-InlineAsm::ConstraintInfo::ConstraintInfo() :
- Type(isInput), isEarlyClobber(false),
- MatchingInput(-1), isCommutative(false),
- isIndirect(false), isMultipleAlternative(false),
- currentAlternativeIndex(0) {
-}
-
/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
/// fields in this structure. If the constraint string is not understood,
/// return true, otherwise return false.
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 2fa03489081d..c26699eab4e2 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -17,6 +17,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
using namespace llvm;
@@ -59,12 +60,6 @@ const Module *Instruction::getModule() const {
return getParent()->getModule();
}
-Module *Instruction::getModule() {
- return getParent()->getModule();
-}
-
-Function *Instruction::getFunction() { return getParent()->getParent(); }
-
const Function *Instruction::getFunction() const {
return getParent()->getParent();
}
@@ -122,6 +117,29 @@ bool Instruction::hasNoSignedWrap() const {
return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
}
+void Instruction::dropPoisonGeneratingFlags() {
+ switch (getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(false);
+ cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(false);
+ break;
+
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ cast<PossiblyExactOperator>(this)->setIsExact(false);
+ break;
+
+ case Instruction::GetElementPtr:
+ cast<GetElementPtrInst>(this)->setIsInBounds(false);
+ break;
+ }
+}
+
bool Instruction::isExact() const {
return cast<PossiblyExactOperator>(this)->isExact();
}
@@ -186,6 +204,11 @@ bool Instruction::hasAllowReciprocal() const {
return cast<FPMathOperator>(this)->hasAllowReciprocal();
}
+bool Instruction::hasAllowContract() const {
+ assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
+ return cast<FPMathOperator>(this)->hasAllowContract();
+}
+
FastMathFlags Instruction::getFastMathFlags() const {
assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->getFastMathFlags();
@@ -521,17 +544,6 @@ bool Instruction::mayThrow() const {
return isa<ResumeInst>(this);
}
-/// Return true if the instruction is associative:
-///
-/// Associative operators satisfy: x op (y op z) === (x op y) op z
-///
-/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
-///
-bool Instruction::isAssociative(unsigned Opcode) {
- return Opcode == And || Opcode == Or || Opcode == Xor ||
- Opcode == Add || Opcode == Mul;
-}
-
bool Instruction::isAssociative() const {
unsigned Opcode = getOpcode();
if (isAssociative(Opcode))
@@ -546,51 +558,6 @@ bool Instruction::isAssociative() const {
}
}
-/// Return true if the instruction is commutative:
-///
-/// Commutative operators satisfy: (x op y) === (y op x)
-///
-/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
-/// applied to any type.
-///
-bool Instruction::isCommutative(unsigned op) {
- switch (op) {
- case Add:
- case FAdd:
- case Mul:
- case FMul:
- case And:
- case Or:
- case Xor:
- return true;
- default:
- return false;
- }
-}
-
-/// Return true if the instruction is idempotent:
-///
-/// Idempotent operators satisfy: x op x === x
-///
-/// In LLVM, the And and Or operators are idempotent.
-///
-bool Instruction::isIdempotent(unsigned Opcode) {
- return Opcode == And || Opcode == Or;
-}
-
-/// Return true if the instruction is nilpotent:
-///
-/// Nilpotent operators satisfy: x op x === Id,
-///
-/// where Id is the identity for the operator, i.e. a constant such that
-/// x op Id === x and Id op x === x for all x.
-///
-/// In LLVM, the Xor operator is nilpotent.
-///
-bool Instruction::isNilpotent(unsigned Opcode) {
- return Opcode == Xor;
-}
-
Instruction *Instruction::cloneImpl() const {
llvm_unreachable("Subclass of Instruction failed to implement cloneImpl");
}
@@ -651,3 +618,34 @@ Instruction *Instruction::clone() const {
New->copyMetadata(*this);
return New;
}
+
+void Instruction::updateProfWeight(uint64_t S, uint64_t T) {
+ auto *ProfileData = getMetadata(LLVMContext::MD_prof);
+ if (ProfileData == nullptr)
+ return;
+
+ auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+ if (!ProfDataName || !ProfDataName->getString().equals("branch_weights"))
+ return;
+
+ SmallVector<uint32_t, 4> Weights;
+ for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) {
+ // Using APInt::div may be expensive, but most cases should fit in 64 bits.
+ APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i))
+ ->getValue()
+ .getZExtValue());
+ Val *= APInt(128, S);
+ Weights.push_back(Val.udiv(APInt(128, T)).getLimitedValue());
+ }
+ MDBuilder MDB(getContext());
+ setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+}
+
+void Instruction::setProfWeight(uint64_t W) {
+ assert((isa<CallInst>(this) || isa<InvokeInst>(this)) &&
+ "Can only set weights for call and invoke instrucitons");
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(W);
+ MDBuilder MDB(getContext());
+ setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+}
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index b67926943429..c10c144122e2 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -307,7 +307,7 @@ CallInst::CallInst(const CallInst &CI)
: Instruction(CI.getType(), Instruction::Call,
OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
CI.getNumOperands()),
- AttributeList(CI.AttributeList), FTy(CI.FTy) {
+ Attrs(CI.Attrs), FTy(CI.FTy) {
setTailCallKind(CI.getTailCallKind());
setCallingConv(CI.getCallingConv());
@@ -334,7 +334,7 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
Value *CallInst::getReturnedArgOperand() const {
unsigned Index;
- if (AttributeList.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
+ if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
return getArgOperand(Index-1);
if (const Function *F = getCalledFunction())
if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
@@ -345,48 +345,58 @@ Value *CallInst::getReturnedArgOperand() const {
}
void CallInst::addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void CallInst::addAttribute(unsigned i, Attribute Attr) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, Attr);
setAttributes(PAL);
}
void CallInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.removeAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void CallInst::removeAttribute(unsigned i, StringRef Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.removeAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
setAttributes(PAL);
}
void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
setAttributes(PAL);
}
+bool CallInst::hasRetAttr(Attribute::AttrKind Kind) const {
+ if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+ return true;
+
+ // Look at the callee, if available.
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+ return false;
+}
+
bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind Kind) const {
- assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
+ assert(i < getNumArgOperands() && "Param index out of bounds!");
- if (AttributeList.hasAttribute(i, Kind))
+ if (Attrs.hasParamAttribute(i, Kind))
return true;
if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(i, Kind);
+ return F->getAttributes().hasParamAttribute(i, Kind);
return false;
}
@@ -400,8 +410,10 @@ bool CallInst::dataOperandHasImpliedAttr(unsigned i,
// question is a call argument; or be indirectly implied by the kind of its
// containing operand bundle, if the operand is a bundle operand.
+ // FIXME: Avoid these i - 1 calculations and update the API to use zero-based
+ // indices.
if (i < (getNumArgOperands() + 1))
- return paramHasAttr(i, Kind);
+ return paramHasAttr(i - 1, Kind);
assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
"Must be either a call argument or an operand bundle!");
@@ -466,7 +478,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
Value *MallocFunc = MallocF;
if (!MallocFunc)
// prototype malloc as "void *malloc(size_t)"
- MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, nullptr);
+ MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy);
PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
CallInst *MCall = nullptr;
Instruction *Result = nullptr;
@@ -560,7 +572,7 @@ static Instruction *createFree(Value *Source,
Type *VoidTy = Type::getVoidTy(M->getContext());
Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
// prototype free as "void free(void*)"
- Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, nullptr);
+ Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy);
CallInst *Result = nullptr;
Value *PtrCast = Source;
if (InsertBefore) {
@@ -646,7 +658,7 @@ InvokeInst::InvokeInst(const InvokeInst &II)
OperandTraits<InvokeInst>::op_end(this) -
II.getNumOperands(),
II.getNumOperands()),
- AttributeList(II.AttributeList), FTy(II.FTy) {
+ Attrs(II.Attrs), FTy(II.FTy) {
setCallingConv(II.getCallingConv());
std::copy(II.op_begin(), II.op_end(), op_begin());
std::copy(II.bundle_op_info_begin(), II.bundle_op_info_end(),
@@ -681,7 +693,7 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
Value *InvokeInst::getReturnedArgOperand() const {
unsigned Index;
- if (AttributeList.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
+ if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
return getArgOperand(Index-1);
if (const Function *F = getCalledFunction())
if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
@@ -691,13 +703,23 @@ Value *InvokeInst::getReturnedArgOperand() const {
return nullptr;
}
+bool InvokeInst::hasRetAttr(Attribute::AttrKind Kind) const {
+ if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+ return true;
+
+ // Look at the callee, if available.
+ if (const Function *F = getCalledFunction())
+ return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+ return false;
+}
+
bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind Kind) const {
- assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
+ assert(i < getNumArgOperands() && "Param index out of bounds!");
- if (AttributeList.hasAttribute(i, Kind))
+ if (Attrs.hasParamAttribute(i, Kind))
return true;
if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(i, Kind);
+ return F->getAttributes().hasParamAttribute(i, Kind);
return false;
}
@@ -711,8 +733,10 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i,
// question is an invoke argument; or be indirectly implied by the kind of its
// containing operand bundle, if the operand is a bundle operand.
+ // FIXME: Avoid these i - 1 calculations and update the API to use zero-based
+ // indices.
if (i < (getNumArgOperands() + 1))
- return paramHasAttr(i, Kind);
+ return paramHasAttr(i - 1, Kind);
assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
"Must be either an invoke argument or an operand bundle!");
@@ -720,37 +744,37 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i,
}
void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void InvokeInst::addAttribute(unsigned i, Attribute Attr) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addAttribute(getContext(), i, Attr);
setAttributes(PAL);
}
void InvokeInst::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.removeAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void InvokeInst::removeAttribute(unsigned i, StringRef Kind) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.removeAttribute(getContext(), i, Kind);
setAttributes(PAL);
}
void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
setAttributes(PAL);
}
void InvokeInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeSet PAL = getAttributes();
+ AttributeList PAL = getAttributes();
PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
setAttributes(PAL);
}
@@ -1199,34 +1223,38 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
return Amt;
}
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore)
- : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertBefore) {}
-
-AllocaInst::AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
- : AllocaInst(Ty, /*ArraySize=*/nullptr, Name, InsertAtEnd) {}
-
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, const Twine &Name,
Instruction *InsertBefore)
- : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertBefore) {}
+ : AllocaInst(Ty, AddrSpace, /*ArraySize=*/nullptr, Name, InsertBefore) {}
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, const Twine &Name,
+AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, const Twine &Name,
BasicBlock *InsertAtEnd)
- : AllocaInst(Ty, ArraySize, /*Align=*/0, Name, InsertAtEnd) {}
+ : AllocaInst(Ty, AddrSpace, /*ArraySize=*/nullptr, Name, InsertAtEnd) {}
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
+AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
const Twine &Name, Instruction *InsertBefore)
- : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), ArraySize), InsertBefore),
- AllocatedType(Ty) {
+ : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/0, Name, InsertBefore) {}
+
+AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/0, Name, InsertAtEnd) {}
+
+AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
+ unsigned Align, const Twine &Name,
+ Instruction *InsertBefore)
+ : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertBefore),
+ AllocatedType(Ty) {
setAlignment(Align);
assert(!Ty->isVoidTy() && "Cannot allocate void!");
setName(Name);
}
-AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, unsigned Align,
- const Twine &Name, BasicBlock *InsertAtEnd)
- : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), ArraySize), InsertAtEnd),
+AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
+ unsigned Align, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
+ getAISize(Ty->getContext(), ArraySize), InsertAtEnd),
AllocatedType(Ty) {
setAlignment(Align);
assert(!Ty->isVoidTy() && "Cannot allocate void!");
@@ -3655,16 +3683,16 @@ void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
// Initialize some new operands.
assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
setNumHungOffUseOperands(OpNo+2);
- CaseIt Case(this, NewCaseIdx);
+ CaseHandle Case(this, NewCaseIdx);
Case.setValue(OnVal);
Case.setSuccessor(Dest);
}
/// removeCase - This method removes the specified case and its successor
/// from the switch instruction.
-void SwitchInst::removeCase(CaseIt i) {
- unsigned idx = i.getCaseIndex();
-
+SwitchInst::CaseIt SwitchInst::removeCase(CaseIt I) {
+ unsigned idx = I->getCaseIndex();
+
assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
unsigned NumOps = getNumOperands();
@@ -3680,6 +3708,8 @@ void SwitchInst::removeCase(CaseIt i) {
OL[NumOps-2].set(nullptr);
OL[NumOps-2+1].set(nullptr);
setNumHungOffUseOperands(NumOps-2);
+
+ return CaseIt(this, idx);
}
/// growOperands - grow operands - This grows the operand list in response
@@ -3826,6 +3856,7 @@ InsertValueInst *InsertValueInst::cloneImpl() const {
AllocaInst *AllocaInst::cloneImpl() const {
AllocaInst *Result = new AllocaInst(getAllocatedType(),
+ getType()->getAddressSpace(),
(Value *)getOperand(0), getAlignment());
Result->setUsedWithInAlloca(isUsedWithInAlloca());
Result->setSwiftError(isSwiftError());
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
index 240250662aec..c9814a96bea6 100644
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -21,6 +21,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalVariable.h"
@@ -93,3 +94,34 @@ Value *InstrProfIncrementInst::getStep() const {
LLVMContext &Context = M->getContext();
return ConstantInt::get(Type::getInt64Ty(Context), 1);
}
+
+ConstrainedFPIntrinsic::RoundingMode
+ConstrainedFPIntrinsic::getRoundingMode() const {
+ Metadata *MD = dyn_cast<MetadataAsValue>(getOperand(2))->getMetadata();
+ if (!MD || !isa<MDString>(MD))
+ return rmInvalid;
+ StringRef RoundingArg = cast<MDString>(MD)->getString();
+
+ // For dynamic rounding mode, we use round to nearest but we will set the
+ // 'exact' SDNodeFlag so that the value will not be rounded.
+ return StringSwitch<RoundingMode>(RoundingArg)
+ .Case("round.dynamic", rmDynamic)
+ .Case("round.tonearest", rmToNearest)
+ .Case("round.downward", rmDownward)
+ .Case("round.upward", rmUpward)
+ .Case("round.towardzero", rmTowardZero)
+ .Default(rmInvalid);
+}
+
+ConstrainedFPIntrinsic::ExceptionBehavior
+ConstrainedFPIntrinsic::getExceptionBehavior() const {
+ Metadata *MD = dyn_cast<MetadataAsValue>(getOperand(3))->getMetadata();
+ if (!MD || !isa<MDString>(MD))
+ return ebInvalid;
+ StringRef ExceptionArg = cast<MDString>(MD)->getString();
+ return StringSwitch<ExceptionBehavior>(ExceptionArg)
+ .Case("fpexcept.ignore", ebIgnore)
+ .Case("fpexcept.maytrap", ebMayTrap)
+ .Case("fpexcept.strict", ebStrict)
+ .Default(ebInvalid);
+}
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index dd66f144f04f..6c6383c22255 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -58,6 +58,7 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
{MD_type, "type"},
{MD_section_prefix, "section_prefix"},
{MD_absolute_symbol, "absolute_symbol"},
+ {MD_associated, "associated"},
};
for (auto &MDKind : MDKinds) {
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index c43356c53826..343722463e5f 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -114,9 +114,10 @@ LLVMContextImpl::~LLVMContextImpl() {
}
// Destroy attribute lists.
- for (FoldingSetIterator<AttributeSetImpl> I = AttrsLists.begin(),
- E = AttrsLists.end(); I != E; ) {
- FoldingSetIterator<AttributeSetImpl> Elem = I++;
+ for (FoldingSetIterator<AttributeListImpl> I = AttrsLists.begin(),
+ E = AttrsLists.end();
+ I != E;) {
+ FoldingSetIterator<AttributeListImpl> Elem = I++;
delete &*Elem;
}
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index 850c81cfabb2..0ee0b9c0da25 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -352,22 +352,26 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
uint64_t SizeInBits;
uint64_t OffsetInBits;
uint32_t AlignInBits;
+ Optional<unsigned> DWARFAddressSpace;
unsigned Flags;
Metadata *ExtraData;
MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
- uint32_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+ uint32_t AlignInBits, uint64_t OffsetInBits,
+ Optional<unsigned> DWARFAddressSpace, unsigned Flags,
Metadata *ExtraData)
: Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
- AlignInBits(AlignInBits), Flags(Flags), ExtraData(ExtraData) {}
+ AlignInBits(AlignInBits), DWARFAddressSpace(DWARFAddressSpace),
+ Flags(Flags), ExtraData(ExtraData) {}
MDNodeKeyImpl(const DIDerivedType *N)
: Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Scope(N->getRawScope()),
BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()),
- Flags(N->getFlags()), ExtraData(N->getRawExtraData()) {}
+ DWARFAddressSpace(N->getDWARFAddressSpace()), Flags(N->getFlags()),
+ ExtraData(N->getRawExtraData()) {}
bool isKeyOf(const DIDerivedType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
@@ -375,7 +379,9 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
Scope == RHS->getRawScope() && BaseType == RHS->getRawBaseType() &&
SizeInBits == RHS->getSizeInBits() &&
AlignInBits == RHS->getAlignInBits() &&
- OffsetInBits == RHS->getOffsetInBits() && Flags == RHS->getFlags() &&
+ OffsetInBits == RHS->getOffsetInBits() &&
+ DWARFAddressSpace == RHS->getDWARFAddressSpace() &&
+ Flags == RHS->getFlags() &&
ExtraData == RHS->getRawExtraData();
}
unsigned getHashValue() const {
@@ -612,17 +618,19 @@ template <> struct MDNodeSubsetEqualImpl<DISubprogram> {
typedef MDNodeKeyImpl<DISubprogram> KeyTy;
static bool isSubsetEqual(const KeyTy &LHS, const DISubprogram *RHS) {
return isDeclarationOfODRMember(LHS.IsDefinition, LHS.Scope,
- LHS.LinkageName, RHS);
+ LHS.LinkageName, LHS.TemplateParams, RHS);
}
static bool isSubsetEqual(const DISubprogram *LHS, const DISubprogram *RHS) {
return isDeclarationOfODRMember(LHS->isDefinition(), LHS->getRawScope(),
- LHS->getRawLinkageName(), RHS);
+ LHS->getRawLinkageName(),
+ LHS->getRawTemplateParams(), RHS);
}
/// Subprograms compare equal if they declare the same function in an ODR
/// type.
static bool isDeclarationOfODRMember(bool IsDefinition, const Metadata *Scope,
const MDString *LinkageName,
+ const Metadata *TemplateParams,
const DISubprogram *RHS) {
// Check whether the LHS is eligible.
if (IsDefinition || !Scope || !LinkageName)
@@ -633,8 +641,14 @@ template <> struct MDNodeSubsetEqualImpl<DISubprogram> {
return false;
// Compare to the RHS.
+ // FIXME: We need to compare template parameters here to avoid incorrect
+ // collisions in mapMetadata when RF_MoveDistinctMDs and a ODR-DISubprogram
+ // has a non-ODR template parameter (i.e., a DICompositeType that does not
+ // have an identifier). Eventually we should decouple ODR logic from
+ // uniquing logic.
return IsDefinition == RHS->isDefinition() && Scope == RHS->getRawScope() &&
- LinkageName == RHS->getRawLinkageName();
+ LinkageName == RHS->getRawLinkageName() &&
+ TemplateParams == RHS->getRawTemplateParams();
}
};
@@ -1105,7 +1119,7 @@ public:
FPMapTy FPConstants;
FoldingSet<AttributeImpl> AttrsSet;
- FoldingSet<AttributeSetImpl> AttrsLists;
+ FoldingSet<AttributeListImpl> AttrsLists;
FoldingSet<AttributeSetNode> AttrsSetNodes;
StringMap<MDString, BumpPtrAllocator> MDStringCache;
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
index f4bfd5992151..b9c4f482adf5 100644
--- a/lib/IR/MDBuilder.cpp
+++ b/lib/IR/MDBuilder.cpp
@@ -56,11 +56,16 @@ MDNode *MDBuilder::createUnpredictable() {
return MDNode::get(Context, None);
}
-MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
+MDNode *MDBuilder::createFunctionEntryCount(
+ uint64_t Count, const DenseSet<GlobalValue::GUID> *Imports) {
Type *Int64Ty = Type::getInt64Ty(Context);
- return MDNode::get(Context,
- {createString("function_entry_count"),
- createConstant(ConstantInt::get(Int64Ty, Count))});
+ SmallVector<Metadata *, 8> Ops;
+ Ops.push_back(createString("function_entry_count"));
+ Ops.push_back(createConstant(ConstantInt::get(Int64Ty, Count)));
+ if (Imports)
+ for (auto ID : *Imports)
+ Ops.push_back(createConstant(ConstantInt::get(Int64Ty, ID)));
+ return MDNode::get(Context, Ops);
}
MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) {
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index 41e11b3945e4..03723bfd2ddb 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -172,3 +173,34 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
raw_svector_ostream OS(OutName);
getNameWithPrefix(OS, GV, CannotUsePrivateLabel);
}
+
+void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
+ const Triple &TT, Mangler &Mangler) {
+ if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
+ return;
+
+ if (TT.isKnownWindowsMSVCEnvironment())
+ OS << " /EXPORT:";
+ else
+ OS << " -export:";
+
+ if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
+ std::string Flag;
+ raw_string_ostream FlagOS(Flag);
+ Mangler.getNameWithPrefix(FlagOS, GV, false);
+ FlagOS.flush();
+ if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
+ OS << Flag.substr(1);
+ else
+ OS << Flag;
+ } else {
+ Mangler.getNameWithPrefix(OS, GV, false);
+ }
+
+ if (!GV->getValueType()->isFunctionTy()) {
+ if (TT.isKnownWindowsMSVCEnvironment())
+ OS << ",DATA";
+ else
+ OS << ",data";
+ }
+}
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 1d1930459239..7228de3d2370 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -11,20 +11,50 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Metadata.h"
#include "LLVMContextImpl.h"
#include "MetadataImpl.h"
#include "SymbolTableListTraitsImpl.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/TrackingMDRef.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -1027,8 +1057,7 @@ static SmallVector<TrackingMDRef, 4> &getNMDOps(void *Operands) {
}
NamedMDNode::NamedMDNode(const Twine &N)
- : Name(N.str()), Parent(nullptr),
- Operands(new SmallVector<TrackingMDRef, 4>()) {}
+ : Name(N.str()), Operands(new SmallVector<TrackingMDRef, 4>()) {}
NamedMDNode::~NamedMDNode() {
dropAllReferences();
@@ -1308,17 +1337,26 @@ bool Instruction::extractProfTotalWeight(uint64_t &TotalVal) const {
return false;
auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
- if (!ProfDataName || !ProfDataName->getString().equals("branch_weights"))
+ if (!ProfDataName)
return false;
- TotalVal = 0;
- for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) {
- auto *V = mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i));
- if (!V)
- return false;
- TotalVal += V->getValue().getZExtValue();
+ if (ProfDataName->getString().equals("branch_weights")) {
+ TotalVal = 0;
+ for (unsigned i = 1; i < ProfileData->getNumOperands(); i++) {
+ auto *V = mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i));
+ if (!V)
+ return false;
+ TotalVal += V->getValue().getZExtValue();
+ }
+ return true;
+ } else if (ProfDataName->getString().equals("VP") &&
+ ProfileData->getNumOperands() > 3) {
+ TotalVal = mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2))
+ ->getValue()
+ .getZExtValue();
+ return true;
}
- return true;
+ return false;
}
void Instruction::clearMetadataHashEntries() {
@@ -1446,7 +1484,7 @@ void GlobalObject::addTypeMetadata(unsigned Offset, Metadata *TypeID) {
addMetadata(
LLVMContext::MD_type,
*MDTuple::get(getContext(),
- {llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+ {ConstantAsMetadata::get(llvm::ConstantInt::get(
Type::getInt64Ty(getContext()), Offset)),
TypeID}));
}
@@ -1459,6 +1497,15 @@ DISubprogram *Function::getSubprogram() const {
return cast_or_null<DISubprogram>(getMetadata(LLVMContext::MD_dbg));
}
+bool Function::isDebugInfoForProfiling() const {
+ if (DISubprogram *SP = getSubprogram()) {
+ if (DICompileUnit *CU = SP->getUnit()) {
+ return CU->getDebugInfoForProfiling();
+ }
+ }
+ return false;
+}
+
void GlobalVariable::addDebugInfo(DIGlobalVariableExpression *GV) {
addMetadata(LLVMContext::MD_dbg, *GV);
}
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 1911f84340c6..fec9df193685 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -120,9 +120,8 @@ void Module::getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const {
// it. This is nice because it allows most passes to get away with not handling
// the symbol table directly for this common task.
//
-Constant *Module::getOrInsertFunction(StringRef Name,
- FunctionType *Ty,
- AttributeSet AttributeList) {
+Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty,
+ AttributeList AttributeList) {
// See if we have a definition for the specified function already.
GlobalValue *F = getNamedValue(Name);
if (!F) {
@@ -145,49 +144,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
Constant *Module::getOrInsertFunction(StringRef Name,
FunctionType *Ty) {
- return getOrInsertFunction(Name, Ty, AttributeSet());
-}
-
-// getOrInsertFunction - Look up the specified function in the module symbol
-// table. If it does not exist, add a prototype for the function and return it.
-// This version of the method takes a null terminated list of function
-// arguments, which makes it easier for clients to use.
-//
-Constant *Module::getOrInsertFunction(StringRef Name,
- AttributeSet AttributeList,
- Type *RetTy, ...) {
- va_list Args;
- va_start(Args, RetTy);
-
- // Build the list of argument types...
- std::vector<Type*> ArgTys;
- while (Type *ArgTy = va_arg(Args, Type*))
- ArgTys.push_back(ArgTy);
-
- va_end(Args);
-
- // Build the function type and chain to the other getOrInsertFunction...
- return getOrInsertFunction(Name,
- FunctionType::get(RetTy, ArgTys, false),
- AttributeList);
-}
-
-Constant *Module::getOrInsertFunction(StringRef Name,
- Type *RetTy, ...) {
- va_list Args;
- va_start(Args, RetTy);
-
- // Build the list of argument types...
- std::vector<Type*> ArgTys;
- while (Type *ArgTy = va_arg(Args, Type*))
- ArgTys.push_back(ArgTy);
-
- va_end(Args);
-
- // Build the function type and chain to the other getOrInsertFunction...
- return getOrInsertFunction(Name,
- FunctionType::get(RetTy, ArgTys, false),
- AttributeSet());
+ return getOrInsertFunction(Name, Ty, AttributeList());
}
// getFunction - Look up the specified function in the module symbol table.
@@ -208,7 +165,8 @@ Function *Module::getFunction(StringRef Name) const {
/// If AllowLocal is set to true, this function will return types that
/// have an local. By default, these types are not returned.
///
-GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) {
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
+ bool AllowLocal) const {
if (GlobalVariable *Result =
dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
if (AllowLocal || !Result->hasLocalLinkage())
@@ -465,6 +423,14 @@ void Module::dropAllReferences() {
GIF.dropAllReferences();
}
+unsigned Module::getNumberRegisterParameters() const {
+ auto *Val =
+ cast_or_null<ConstantAsMetadata>(getModuleFlag("NumRegisterParameters"));
+ if (!Val)
+ return 0;
+ return cast<ConstantInt>(Val->getValue())->getZExtValue();
+}
+
unsigned Module::getDwarfVersion() const {
auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("Dwarf Version"));
if (!Val)
diff --git a/lib/IR/Operator.cpp b/lib/IR/Operator.cpp
index 2fba24d99b30..7d819f3aae8d 100644
--- a/lib/IR/Operator.cpp
+++ b/lib/IR/Operator.cpp
@@ -1,4 +1,18 @@
+//===-- Operator.cpp - Implement the LLVM operators -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the non-inline methods for the LLVM Operator classes.
+//
+//===----------------------------------------------------------------------===//
+
#include "llvm/IR/Operator.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp
index e9574ca81261..b670c817569a 100644
--- a/lib/IR/OptBisect.cpp
+++ b/lib/IR/OptBisect.cpp
@@ -39,14 +39,6 @@ static void printPassMessage(const StringRef &Name, int PassNum,
<< "(" << PassNum << ") " << Name << " on " << TargetDesc << "\n";
}
-static void printCaseMessage(int CaseNum, StringRef Msg, bool Running) {
- if (Running)
- errs() << "BISECT: running case (";
- else
- errs() << "BISECT: NOT running case (";
- errs() << CaseNum << "): " << Msg << "\n";
-}
-
static std::string getDescription(const Module &M) {
return "module (" + M.getName().str() + ")";
}
@@ -108,13 +100,3 @@ bool OptBisect::checkPass(const StringRef PassName,
printPassMessage(PassName, CurBisectNum, TargetDesc, ShouldRun);
return ShouldRun;
}
-
-bool OptBisect::shouldRunCase(const Twine &Msg) {
- if (!BisectEnabled)
- return true;
- int CurFuelNum = ++LastBisectNum;
- bool ShouldRun = (OptBisectLimit == -1 || CurFuelNum <= OptBisectLimit);
- printCaseMessage(CurFuelNum, Msg.str(), ShouldRun);
- return ShouldRun;
-}
-
diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp
index a42945ef3fff..f1b5f2f108dc 100644
--- a/lib/IR/Pass.cpp
+++ b/lib/IR/Pass.cpp
@@ -118,10 +118,12 @@ void Pass::print(raw_ostream &O,const Module*) const {
O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// dump - call print(cerr);
LLVM_DUMP_METHOD void Pass::dump() const {
print(dbgs(), nullptr);
}
+#endif
//===----------------------------------------------------------------------===//
// ImmutablePass Implementation
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index 8f68bb1daecf..47fdfedfdde8 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -91,4 +91,6 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
}
}
+AnalysisSetKey CFGAnalyses::SetKey;
+
AnalysisSetKey PreservedAnalyses::AllAnalysesKey;
diff --git a/lib/IR/Statepoint.cpp b/lib/IR/Statepoint.cpp
index 63be1e780d81..8c3f0f208cc6 100644
--- a/lib/IR/Statepoint.cpp
+++ b/lib/IR/Statepoint.cpp
@@ -53,18 +53,19 @@ bool llvm::isStatepointDirectiveAttr(Attribute Attr) {
Attr.hasAttribute("statepoint-num-patch-bytes");
}
-StatepointDirectives llvm::parseStatepointDirectivesFromAttrs(AttributeSet AS) {
+StatepointDirectives
+llvm::parseStatepointDirectivesFromAttrs(AttributeList AS) {
StatepointDirectives Result;
Attribute AttrID =
- AS.getAttribute(AttributeSet::FunctionIndex, "statepoint-id");
+ AS.getAttribute(AttributeList::FunctionIndex, "statepoint-id");
uint64_t StatepointID;
if (AttrID.isStringAttribute())
if (!AttrID.getValueAsString().getAsInteger(10, StatepointID))
Result.StatepointID = StatepointID;
uint32_t NumPatchBytes;
- Attribute AttrNumPatchBytes = AS.getAttribute(AttributeSet::FunctionIndex,
+ Attribute AttrNumPatchBytes = AS.getAttribute(AttributeList::FunctionIndex,
"statepoint-num-patch-bytes");
if (AttrNumPatchBytes.isStringAttribute())
if (!AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes))
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index ca866738f882..b67b0a307861 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -41,12 +41,6 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
}
}
-Type *Type::getScalarType() const {
- if (auto *VTy = dyn_cast<VectorType>(this))
- return VTy->getElementType();
- return const_cast<Type*>(this);
-}
-
bool Type::isIntegerTy(unsigned Bitwidth) const {
return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
}
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index dc4c1cffb20c..a178b9ec0f09 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -1,4 +1,4 @@
-//===-- TypeFinder.cpp - Implement the TypeFinder class -------------------===//
+//===- TypeFinder.cpp - Implement the TypeFinder class --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,13 +11,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/TypeFinder.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <utility>
+
using namespace llvm;
void TypeFinder::run(const Module &M, bool onlyNamed) {
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 91a999b58004..b07c57685a26 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -320,7 +320,7 @@ void Value::takeName(Value *V) {
ST->reinsertValue(this);
}
-void Value::assertModuleIsMaterialized() const {
+void Value::assertModuleIsMaterializedImpl() const {
#ifndef NDEBUG
const GlobalValue *GV = dyn_cast<GlobalValue>(this);
if (!GV)
@@ -437,17 +437,17 @@ enum PointerStripKind {
};
template <PointerStripKind StripKind>
-static Value *stripPointerCastsAndOffsets(Value *V) {
+static const Value *stripPointerCastsAndOffsets(const Value *V) {
if (!V->getType()->isPointerTy())
return V;
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
- SmallPtrSet<Value *, 4> Visited;
+ SmallPtrSet<const Value *, 4> Visited;
Visited.insert(V);
do {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(V)) {
switch (StripKind) {
case PSK_ZeroIndicesAndAliases:
case PSK_ZeroIndices:
@@ -467,13 +467,13 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
} else if (Operator::getOpcode(V) == Instruction::BitCast ||
Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
V = cast<Operator>(V)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
if (StripKind == PSK_ZeroIndices || GA->isInterposable())
return V;
V = GA->getAliasee();
} else {
- if (auto CS = CallSite(V))
- if (Value *RV = CS.getReturnedArgOperand()) {
+ if (auto CS = ImmutableCallSite(V))
+ if (const Value *RV = CS.getReturnedArgOperand()) {
V = RV;
continue;
}
@@ -487,20 +487,21 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
}
} // end anonymous namespace
-Value *Value::stripPointerCasts() {
+const Value *Value::stripPointerCasts() const {
return stripPointerCastsAndOffsets<PSK_ZeroIndicesAndAliases>(this);
}
-Value *Value::stripPointerCastsNoFollowAliases() {
+const Value *Value::stripPointerCastsNoFollowAliases() const {
return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
}
-Value *Value::stripInBoundsConstantOffsets() {
+const Value *Value::stripInBoundsConstantOffsets() const {
return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
}
-Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
- APInt &Offset) {
+const Value *
+Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
+ APInt &Offset) const {
if (!getType()->isPointerTy())
return this;
@@ -510,11 +511,11 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
- SmallPtrSet<Value *, 4> Visited;
+ SmallPtrSet<const Value *, 4> Visited;
Visited.insert(this);
- Value *V = this;
+ const Value *V = this;
do {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (auto *GEP = dyn_cast<GEPOperator>(V)) {
if (!GEP->isInBounds())
return V;
APInt GEPOffset(Offset);
@@ -524,11 +525,11 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
V = GA->getAliasee();
} else {
- if (auto CS = CallSite(V))
- if (Value *RV = CS.getReturnedArgOperand()) {
+ if (auto CS = ImmutableCallSite(V))
+ if (const Value *RV = CS.getReturnedArgOperand()) {
V = RV;
continue;
}
@@ -541,7 +542,7 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
return V;
}
-Value *Value::stripInBoundsOffsets() {
+const Value *Value::stripInBoundsOffsets() const {
return stripPointerCastsAndOffsets<PSK_InBounds>(this);
}
@@ -633,7 +634,7 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
Align = DL.getPrefTypeAlignment(AllocatedType);
}
} else if (auto CS = ImmutableCallSite(this))
- Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex);
+ Align = CS.getAttributes().getParamAlignment(AttributeList::ReturnIndex);
else if (const LoadInst *LI = dyn_cast<LoadInst>(this))
if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
@@ -643,9 +644,9 @@ unsigned Value::getPointerAlignment(const DataLayout &DL) const {
return Align;
}
-Value *Value::DoPHITranslation(const BasicBlock *CurBB,
- const BasicBlock *PredBB) {
- PHINode *PN = dyn_cast<PHINode>(this);
+const Value *Value::DoPHITranslation(const BasicBlock *CurBB,
+ const BasicBlock *PredBB) const {
+ auto *PN = dyn_cast<PHINode>(this);
if (PN && PN->getParent() == CurBB)
return PN->getIncomingValueForBlock(PredBB);
return this;
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index 8a6a320fc2d1..0c3946c8661e 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -1,4 +1,4 @@
-//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
+//===- ValueSymbolTable.cpp - Implement the ValueSymbolTable class --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -99,13 +100,15 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
return makeUniqueName(V, UniqueName);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// dump - print out the symbol table
//
LLVM_DUMP_METHOD void ValueSymbolTable::dump() const {
- //DEBUG(dbgs() << "ValueSymbolTable:\n");
+ //dbgs() << "ValueSymbolTable:\n";
for (const auto &I : *this) {
- //DEBUG(dbgs() << " '" << I->getKeyData() << "' = ");
+ //dbgs() << " '" << I->getKeyData() << "' = ";
I.getValue()->dump();
- //DEBUG(dbgs() << "\n");
+ //dbgs() << "\n";
}
}
+#endif
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 5855059a189c..4e04020f206e 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -277,6 +277,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// already.
bool SawFrameEscape;
+ /// Whether the current function has a DISubprogram attached to it.
+ bool HasDebugInfo = false;
+
/// Stores the count of how many objects were passed to llvm.localescape for a
/// given function and the largest index passed to llvm.localrecover.
DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
@@ -297,6 +300,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
// constant expressions, we can arrive at a particular user many times.
SmallPtrSet<const Value *, 32> GlobalValueVisited;
+ // Keeps track of duplicate function argument debug info.
+ SmallVector<const DILocalVariable *, 16> DebugFnArgs;
+
TBAAVerifier TBAAVerifyHelper;
void checkAtomicMemAccessSize(Type *Ty, const Instruction *I);
@@ -342,6 +348,7 @@ public:
visit(const_cast<Function &>(F));
verifySiblingFuncletUnwinds();
InstsInThisBlock.clear();
+ DebugFnArgs.clear();
LandingPadResultTy = nullptr;
SawFrameEscape = false;
SiblingFuncletInfo.clear();
@@ -457,6 +464,7 @@ private:
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
void visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS);
+ void visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI);
template <class DbgIntrinsicTy>
void visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII);
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI);
@@ -481,12 +489,11 @@ private:
void verifyMustTailCall(CallInst &CI);
bool performTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT,
unsigned ArgNo, std::string &Suffix);
- bool verifyAttributeCount(AttributeSet Attrs, unsigned Params);
- void verifyAttributeTypes(AttributeSet Attrs, unsigned Idx, bool isFunction,
+ bool verifyAttributeCount(AttributeList Attrs, unsigned Params);
+ void verifyAttributeTypes(AttributeSet Attrs, bool IsFunction,
const Value *V);
- void verifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
- bool isReturnValue, const Value *V);
- void verifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
+ void verifyParameterAttrs(AttributeSet Attrs, Type *Ty, const Value *V);
+ void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
const Value *V);
void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs);
@@ -497,6 +504,7 @@ private:
void verifySiblingFuncletUnwinds();
void verifyFragmentExpression(const DbgInfoIntrinsic &I);
+ void verifyFnArgs(const DbgInfoIntrinsic &I);
/// Module-level debug info verification...
void verifyCompileUnits();
@@ -652,7 +660,8 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
if (auto *GVE = dyn_cast<DIGlobalVariableExpression>(MD))
visitDIGlobalVariableExpression(*GVE);
else
- AssertDI(false, "!dbg attachment of global variable must be a DIGlobalVariableExpression");
+ AssertDI(false, "!dbg attachment of global variable must be a "
+ "DIGlobalVariableExpression");
}
if (!GV.hasInitializer()) {
@@ -822,28 +831,6 @@ static bool isType(const Metadata *MD) { return !MD || isa<DIType>(MD); }
static bool isScope(const Metadata *MD) { return !MD || isa<DIScope>(MD); }
static bool isDINode(const Metadata *MD) { return !MD || isa<DINode>(MD); }
-template <class Ty>
-static bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) {
- for (Metadata *MD : N.operands()) {
- if (MD) {
- if (!isa<Ty>(MD))
- return false;
- } else {
- if (!AllowNull)
- return false;
- }
- }
- return true;
-}
-
-template <class Ty> static bool isValidMetadataArray(const MDTuple &N) {
- return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ false);
-}
-
-template <class Ty> static bool isValidMetadataNullArray(const MDTuple &N) {
- return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ true);
-}
-
void Verifier::visitDILocation(const DILocation &N) {
AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
"location requires a valid scope", &N, N.getRawScope());
@@ -900,6 +887,13 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
AssertDI(isType(N.getRawBaseType()), "invalid base type", &N,
N.getRawBaseType());
+
+ if (N.getDWARFAddressSpace()) {
+ AssertDI(N.getTag() == dwarf::DW_TAG_pointer_type ||
+ N.getTag() == dwarf::DW_TAG_reference_type,
+ "DWARF address space only applies to pointer or reference types",
+ &N);
+ }
}
static bool hasConflictingReferenceFlags(unsigned Flags) {
@@ -1024,6 +1018,8 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
if (auto *F = N.getRawFile())
AssertDI(isa<DIFile>(F), "invalid file", &N, F);
+ else
+ AssertDI(N.getLine() == 0, "line specified with no file", &N, N.getLine());
if (auto *T = N.getRawType())
AssertDI(isa<DISubroutineType>(T), "invalid subroutine type", &N, T);
AssertDI(isType(N.getRawContainingType()), "invalid containing type", &N,
@@ -1312,71 +1308,73 @@ Verifier::visitModuleFlag(const MDNode *Op,
}
}
-void Verifier::verifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
- bool isFunction, const Value *V) {
- unsigned Slot = ~0U;
- for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I)
- if (Attrs.getSlotIndex(I) == Idx) {
- Slot = I;
- break;
- }
+/// Return true if this attribute kind only applies to functions.
+static bool isFuncOnlyAttr(Attribute::AttrKind Kind) {
+ switch (Kind) {
+ case Attribute::NoReturn:
+ case Attribute::NoUnwind:
+ case Attribute::NoInline:
+ case Attribute::AlwaysInline:
+ case Attribute::OptimizeForSize:
+ case Attribute::StackProtect:
+ case Attribute::StackProtectReq:
+ case Attribute::StackProtectStrong:
+ case Attribute::SafeStack:
+ case Attribute::NoRedZone:
+ case Attribute::NoImplicitFloat:
+ case Attribute::Naked:
+ case Attribute::InlineHint:
+ case Attribute::StackAlignment:
+ case Attribute::UWTable:
+ case Attribute::NonLazyBind:
+ case Attribute::ReturnsTwice:
+ case Attribute::SanitizeAddress:
+ case Attribute::SanitizeThread:
+ case Attribute::SanitizeMemory:
+ case Attribute::MinSize:
+ case Attribute::NoDuplicate:
+ case Attribute::Builtin:
+ case Attribute::NoBuiltin:
+ case Attribute::Cold:
+ case Attribute::OptimizeNone:
+ case Attribute::JumpTable:
+ case Attribute::Convergent:
+ case Attribute::ArgMemOnly:
+ case Attribute::NoRecurse:
+ case Attribute::InaccessibleMemOnly:
+ case Attribute::InaccessibleMemOrArgMemOnly:
+ case Attribute::AllocSize:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
- assert(Slot != ~0U && "Attribute set inconsistency!");
+/// Return true if this is a function attribute that can also appear on
+/// arguments.
+static bool isFuncOrArgAttr(Attribute::AttrKind Kind) {
+ return Kind == Attribute::ReadOnly || Kind == Attribute::WriteOnly ||
+ Kind == Attribute::ReadNone;
+}
- for (AttributeSet::iterator I = Attrs.begin(Slot), E = Attrs.end(Slot);
- I != E; ++I) {
- if (I->isStringAttribute())
+void Verifier::verifyAttributeTypes(AttributeSet Attrs, bool IsFunction,
+ const Value *V) {
+ for (Attribute A : Attrs) {
+ if (A.isStringAttribute())
continue;
- if (I->getKindAsEnum() == Attribute::NoReturn ||
- I->getKindAsEnum() == Attribute::NoUnwind ||
- I->getKindAsEnum() == Attribute::NoInline ||
- I->getKindAsEnum() == Attribute::AlwaysInline ||
- I->getKindAsEnum() == Attribute::OptimizeForSize ||
- I->getKindAsEnum() == Attribute::StackProtect ||
- I->getKindAsEnum() == Attribute::StackProtectReq ||
- I->getKindAsEnum() == Attribute::StackProtectStrong ||
- I->getKindAsEnum() == Attribute::SafeStack ||
- I->getKindAsEnum() == Attribute::NoRedZone ||
- I->getKindAsEnum() == Attribute::NoImplicitFloat ||
- I->getKindAsEnum() == Attribute::Naked ||
- I->getKindAsEnum() == Attribute::InlineHint ||
- I->getKindAsEnum() == Attribute::StackAlignment ||
- I->getKindAsEnum() == Attribute::UWTable ||
- I->getKindAsEnum() == Attribute::NonLazyBind ||
- I->getKindAsEnum() == Attribute::ReturnsTwice ||
- I->getKindAsEnum() == Attribute::SanitizeAddress ||
- I->getKindAsEnum() == Attribute::SanitizeThread ||
- I->getKindAsEnum() == Attribute::SanitizeMemory ||
- I->getKindAsEnum() == Attribute::MinSize ||
- I->getKindAsEnum() == Attribute::NoDuplicate ||
- I->getKindAsEnum() == Attribute::Builtin ||
- I->getKindAsEnum() == Attribute::NoBuiltin ||
- I->getKindAsEnum() == Attribute::Cold ||
- I->getKindAsEnum() == Attribute::OptimizeNone ||
- I->getKindAsEnum() == Attribute::JumpTable ||
- I->getKindAsEnum() == Attribute::Convergent ||
- I->getKindAsEnum() == Attribute::ArgMemOnly ||
- I->getKindAsEnum() == Attribute::NoRecurse ||
- I->getKindAsEnum() == Attribute::InaccessibleMemOnly ||
- I->getKindAsEnum() == Attribute::InaccessibleMemOrArgMemOnly ||
- I->getKindAsEnum() == Attribute::AllocSize) {
- if (!isFunction) {
- CheckFailed("Attribute '" + I->getAsString() +
- "' only applies to functions!", V);
- return;
- }
- } else if (I->getKindAsEnum() == Attribute::ReadOnly ||
- I->getKindAsEnum() == Attribute::WriteOnly ||
- I->getKindAsEnum() == Attribute::ReadNone) {
- if (Idx == 0) {
- CheckFailed("Attribute '" + I->getAsString() +
- "' does not apply to function returns");
+ if (isFuncOnlyAttr(A.getKindAsEnum())) {
+ if (!IsFunction) {
+ CheckFailed("Attribute '" + A.getAsString() +
+ "' only applies to functions!",
+ V);
return;
}
- } else if (isFunction) {
- CheckFailed("Attribute '" + I->getAsString() +
- "' does not apply to functions!", V);
+ } else if (IsFunction && !isFuncOrArgAttr(A.getKindAsEnum())) {
+ CheckFailed("Attribute '" + A.getAsString() +
+ "' does not apply to functions!",
+ V);
return;
}
}
@@ -1384,106 +1382,91 @@ void Verifier::verifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
// VerifyParameterAttrs - Check the given attributes for an argument or return
// value of the specified type. The value V is printed in error messages.
-void Verifier::verifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
- bool isReturnValue, const Value *V) {
- if (!Attrs.hasAttributes(Idx))
+void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
+ const Value *V) {
+ if (!Attrs.hasAttributes())
return;
- verifyAttributeTypes(Attrs, Idx, false, V);
-
- if (isReturnValue)
- Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
- !Attrs.hasAttribute(Idx, Attribute::Nest) &&
- !Attrs.hasAttribute(Idx, Attribute::StructRet) &&
- !Attrs.hasAttribute(Idx, Attribute::NoCapture) &&
- !Attrs.hasAttribute(Idx, Attribute::Returned) &&
- !Attrs.hasAttribute(Idx, Attribute::InAlloca) &&
- !Attrs.hasAttribute(Idx, Attribute::SwiftSelf) &&
- !Attrs.hasAttribute(Idx, Attribute::SwiftError),
- "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', "
- "'returned', 'swiftself', and 'swifterror' do not apply to return "
- "values!",
- V);
+ verifyAttributeTypes(Attrs, /*IsFunction=*/false, V);
// Check for mutually incompatible attributes. Only inreg is compatible with
// sret.
unsigned AttrCount = 0;
- AttrCount += Attrs.hasAttribute(Idx, Attribute::ByVal);
- AttrCount += Attrs.hasAttribute(Idx, Attribute::InAlloca);
- AttrCount += Attrs.hasAttribute(Idx, Attribute::StructRet) ||
- Attrs.hasAttribute(Idx, Attribute::InReg);
- AttrCount += Attrs.hasAttribute(Idx, Attribute::Nest);
+ AttrCount += Attrs.hasAttribute(Attribute::ByVal);
+ AttrCount += Attrs.hasAttribute(Attribute::InAlloca);
+ AttrCount += Attrs.hasAttribute(Attribute::StructRet) ||
+ Attrs.hasAttribute(Attribute::InReg);
+ AttrCount += Attrs.hasAttribute(Attribute::Nest);
Assert(AttrCount <= 1, "Attributes 'byval', 'inalloca', 'inreg', 'nest', "
"and 'sret' are incompatible!",
V);
- Assert(!(Attrs.hasAttribute(Idx, Attribute::InAlloca) &&
- Attrs.hasAttribute(Idx, Attribute::ReadOnly)),
+ Assert(!(Attrs.hasAttribute(Attribute::InAlloca) &&
+ Attrs.hasAttribute(Attribute::ReadOnly)),
"Attributes "
"'inalloca and readonly' are incompatible!",
V);
- Assert(!(Attrs.hasAttribute(Idx, Attribute::StructRet) &&
- Attrs.hasAttribute(Idx, Attribute::Returned)),
+ Assert(!(Attrs.hasAttribute(Attribute::StructRet) &&
+ Attrs.hasAttribute(Attribute::Returned)),
"Attributes "
"'sret and returned' are incompatible!",
V);
- Assert(!(Attrs.hasAttribute(Idx, Attribute::ZExt) &&
- Attrs.hasAttribute(Idx, Attribute::SExt)),
+ Assert(!(Attrs.hasAttribute(Attribute::ZExt) &&
+ Attrs.hasAttribute(Attribute::SExt)),
"Attributes "
"'zeroext and signext' are incompatible!",
V);
- Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
- Attrs.hasAttribute(Idx, Attribute::ReadOnly)),
+ Assert(!(Attrs.hasAttribute(Attribute::ReadNone) &&
+ Attrs.hasAttribute(Attribute::ReadOnly)),
"Attributes "
"'readnone and readonly' are incompatible!",
V);
- Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadNone) &&
- Attrs.hasAttribute(Idx, Attribute::WriteOnly)),
+ Assert(!(Attrs.hasAttribute(Attribute::ReadNone) &&
+ Attrs.hasAttribute(Attribute::WriteOnly)),
"Attributes "
"'readnone and writeonly' are incompatible!",
V);
- Assert(!(Attrs.hasAttribute(Idx, Attribute::ReadOnly) &&
- Attrs.hasAttribute(Idx, Attribute::WriteOnly)),
+ Assert(!(Attrs.hasAttribute(Attribute::ReadOnly) &&
+ Attrs.hasAttribute(Attribute::WriteOnly)),
"Attributes "
"'readonly and writeonly' are incompatible!",
V);
- Assert(!(Attrs.hasAttribute(Idx, Attribute::NoInline) &&
- Attrs.hasAttribute(Idx, Attribute::AlwaysInline)),
+ Assert(!(Attrs.hasAttribute(Attribute::NoInline) &&
+ Attrs.hasAttribute(Attribute::AlwaysInline)),
"Attributes "
"'noinline and alwaysinline' are incompatible!",
V);
- Assert(
- !AttrBuilder(Attrs, Idx).overlaps(AttributeFuncs::typeIncompatible(Ty)),
- "Wrong types for attribute: " +
- AttributeSet::get(Context, Idx, AttributeFuncs::typeIncompatible(Ty))
- .getAsString(Idx),
- V);
+ AttrBuilder IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty);
+ Assert(!AttrBuilder(Attrs).overlaps(IncompatibleAttrs),
+ "Wrong types for attribute: " +
+ AttributeSet::get(Context, IncompatibleAttrs).getAsString(),
+ V);
if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
SmallPtrSet<Type*, 4> Visited;
if (!PTy->getElementType()->isSized(&Visited)) {
- Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
- !Attrs.hasAttribute(Idx, Attribute::InAlloca),
+ Assert(!Attrs.hasAttribute(Attribute::ByVal) &&
+ !Attrs.hasAttribute(Attribute::InAlloca),
"Attributes 'byval' and 'inalloca' do not support unsized types!",
V);
}
if (!isa<PointerType>(PTy->getElementType()))
- Assert(!Attrs.hasAttribute(Idx, Attribute::SwiftError),
+ Assert(!Attrs.hasAttribute(Attribute::SwiftError),
"Attribute 'swifterror' only applies to parameters "
"with pointer to pointer type!",
V);
} else {
- Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal),
+ Assert(!Attrs.hasAttribute(Attribute::ByVal),
"Attribute 'byval' only applies to parameters with pointer type!",
V);
- Assert(!Attrs.hasAttribute(Idx, Attribute::SwiftError),
+ Assert(!Attrs.hasAttribute(Attribute::SwiftError),
"Attribute 'swifterror' only applies to parameters "
"with pointer type!",
V);
@@ -1492,7 +1475,7 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
// Check parameter attributes against a function type.
// The value V is printed in error messages.
-void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
+void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
const Value *V) {
if (Attrs.isEmpty())
return;
@@ -1503,122 +1486,124 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
bool SawSwiftSelf = false;
bool SawSwiftError = false;
- for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
- unsigned Idx = Attrs.getSlotIndex(i);
-
- Type *Ty;
- if (Idx == 0)
- Ty = FT->getReturnType();
- else if (Idx-1 < FT->getNumParams())
- Ty = FT->getParamType(Idx-1);
- else
- break; // VarArgs attributes, verified elsewhere.
+ // Verify return value attributes.
+ AttributeSet RetAttrs = Attrs.getRetAttributes();
+ Assert((!RetAttrs.hasAttribute(Attribute::ByVal) &&
+ !RetAttrs.hasAttribute(Attribute::Nest) &&
+ !RetAttrs.hasAttribute(Attribute::StructRet) &&
+ !RetAttrs.hasAttribute(Attribute::NoCapture) &&
+ !RetAttrs.hasAttribute(Attribute::Returned) &&
+ !RetAttrs.hasAttribute(Attribute::InAlloca) &&
+ !RetAttrs.hasAttribute(Attribute::SwiftSelf) &&
+ !RetAttrs.hasAttribute(Attribute::SwiftError)),
+ "Attributes 'byval', 'inalloca', 'nest', 'sret', 'nocapture', "
+ "'returned', 'swiftself', and 'swifterror' do not apply to return "
+ "values!",
+ V);
+ Assert((!RetAttrs.hasAttribute(Attribute::ReadOnly) &&
+ !RetAttrs.hasAttribute(Attribute::WriteOnly) &&
+ !RetAttrs.hasAttribute(Attribute::ReadNone)),
+ "Attribute '" + RetAttrs.getAsString() +
+ "' does not apply to function returns",
+ V);
+ verifyParameterAttrs(RetAttrs, FT->getReturnType(), V);
- verifyParameterAttrs(Attrs, Idx, Ty, Idx == 0, V);
+ // Verify parameter attributes.
+ for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+ Type *Ty = FT->getParamType(i);
+ AttributeSet ArgAttrs = Attrs.getParamAttributes(i);
- if (Idx == 0)
- continue;
+ verifyParameterAttrs(ArgAttrs, Ty, V);
- if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
+ if (ArgAttrs.hasAttribute(Attribute::Nest)) {
Assert(!SawNest, "More than one parameter has attribute nest!", V);
SawNest = true;
}
- if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+ if (ArgAttrs.hasAttribute(Attribute::Returned)) {
Assert(!SawReturned, "More than one parameter has attribute returned!",
V);
Assert(Ty->canLosslesslyBitCastTo(FT->getReturnType()),
- "Incompatible "
- "argument and return types for 'returned' attribute",
+ "Incompatible argument and return types for 'returned' attribute",
V);
SawReturned = true;
}
- if (Attrs.hasAttribute(Idx, Attribute::StructRet)) {
+ if (ArgAttrs.hasAttribute(Attribute::StructRet)) {
Assert(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
- Assert(Idx == 1 || Idx == 2,
+ Assert(i == 0 || i == 1,
"Attribute 'sret' is not on first or second parameter!", V);
SawSRet = true;
}
- if (Attrs.hasAttribute(Idx, Attribute::SwiftSelf)) {
+ if (ArgAttrs.hasAttribute(Attribute::SwiftSelf)) {
Assert(!SawSwiftSelf, "Cannot have multiple 'swiftself' parameters!", V);
SawSwiftSelf = true;
}
- if (Attrs.hasAttribute(Idx, Attribute::SwiftError)) {
+ if (ArgAttrs.hasAttribute(Attribute::SwiftError)) {
Assert(!SawSwiftError, "Cannot have multiple 'swifterror' parameters!",
V);
SawSwiftError = true;
}
- if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) {
- Assert(Idx == FT->getNumParams(), "inalloca isn't on the last parameter!",
- V);
+ if (ArgAttrs.hasAttribute(Attribute::InAlloca)) {
+ Assert(i == FT->getNumParams() - 1,
+ "inalloca isn't on the last parameter!", V);
}
}
- if (!Attrs.hasAttributes(AttributeSet::FunctionIndex))
+ if (!Attrs.hasAttributes(AttributeList::FunctionIndex))
return;
- verifyAttributeTypes(Attrs, AttributeSet::FunctionIndex, true, V);
+ verifyAttributeTypes(Attrs.getFnAttributes(), /*IsFunction=*/true, V);
- Assert(
- !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly)),
- "Attributes 'readnone and readonly' are incompatible!", V);
+ Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
+ Attrs.hasFnAttribute(Attribute::ReadOnly)),
+ "Attributes 'readnone and readonly' are incompatible!", V);
- Assert(
- !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::WriteOnly)),
- "Attributes 'readnone and writeonly' are incompatible!", V);
+ Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
+ Attrs.hasFnAttribute(Attribute::WriteOnly)),
+ "Attributes 'readnone and writeonly' are incompatible!", V);
- Assert(
- !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::WriteOnly)),
- "Attributes 'readonly and writeonly' are incompatible!", V);
+ Assert(!(Attrs.hasFnAttribute(Attribute::ReadOnly) &&
+ Attrs.hasFnAttribute(Attribute::WriteOnly)),
+ "Attributes 'readonly and writeonly' are incompatible!", V);
- Assert(
- !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::InaccessibleMemOrArgMemOnly)),
- "Attributes 'readnone and inaccessiblemem_or_argmemonly' are incompatible!", V);
+ Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
+ Attrs.hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly)),
+ "Attributes 'readnone and inaccessiblemem_or_argmemonly' are "
+ "incompatible!",
+ V);
- Assert(
- !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::InaccessibleMemOnly)),
- "Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
+ Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
+ Attrs.hasFnAttribute(Attribute::InaccessibleMemOnly)),
+ "Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
- Assert(
- !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline) &&
- Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline)),
- "Attributes 'noinline and alwaysinline' are incompatible!", V);
-
- if (Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeNone)) {
- Assert(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline),
+ Assert(!(Attrs.hasFnAttribute(Attribute::NoInline) &&
+ Attrs.hasFnAttribute(Attribute::AlwaysInline)),
+ "Attributes 'noinline and alwaysinline' are incompatible!", V);
+
+ if (Attrs.hasFnAttribute(Attribute::OptimizeNone)) {
+ Assert(Attrs.hasFnAttribute(Attribute::NoInline),
"Attribute 'optnone' requires 'noinline'!", V);
- Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize),
+ Assert(!Attrs.hasFnAttribute(Attribute::OptimizeForSize),
"Attributes 'optsize and optnone' are incompatible!", V);
- Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize),
+ Assert(!Attrs.hasFnAttribute(Attribute::MinSize),
"Attributes 'minsize and optnone' are incompatible!", V);
}
- if (Attrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::JumpTable)) {
+ if (Attrs.hasFnAttribute(Attribute::JumpTable)) {
const GlobalValue *GV = cast<GlobalValue>(V);
Assert(GV->hasGlobalUnnamedAddr(),
"Attribute 'jumptable' requires 'unnamed_addr'", V);
}
- if (Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::AllocSize)) {
+ if (Attrs.hasFnAttribute(Attribute::AllocSize)) {
std::pair<unsigned, Optional<unsigned>> Args =
- Attrs.getAllocSizeArgs(AttributeSet::FunctionIndex);
+ Attrs.getAllocSizeArgs(AttributeList::FunctionIndex);
auto CheckParam = [&](StringRef Name, unsigned ParamNo) {
if (ParamNo >= FT->getNumParams()) {
@@ -1649,8 +1634,8 @@ void Verifier::verifyFunctionMetadata(
for (const auto &Pair : MDs) {
if (Pair.first == LLVMContext::MD_prof) {
MDNode *MD = Pair.second;
- Assert(MD->getNumOperands() == 2,
- "!prof annotations should have exactly 2 operands", MD);
+ Assert(MD->getNumOperands() >= 2,
+ "!prof annotations should have no less than 2 operands", MD);
// Check first operand.
Assert(MD->getOperand(0) != nullptr, "first operand should not be null",
@@ -1725,15 +1710,15 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) {
}
}
-bool Verifier::verifyAttributeCount(AttributeSet Attrs, unsigned Params) {
+bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) {
if (Attrs.getNumSlots() == 0)
return true;
unsigned LastSlot = Attrs.getNumSlots() - 1;
unsigned LastIndex = Attrs.getSlotIndex(LastSlot);
- if (LastIndex <= Params
- || (LastIndex == AttributeSet::FunctionIndex
- && (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params)))
+ if (LastIndex <= Params ||
+ (LastIndex == AttributeList::FunctionIndex &&
+ (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params)))
return true;
return false;
@@ -1963,7 +1948,7 @@ void Verifier::visitFunction(const Function &F) {
Assert(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
"Invalid struct return type!", &F);
- AttributeSet Attrs = F.getAttributes();
+ AttributeList Attrs = F.getAttributes();
Assert(verifyAttributeCount(Attrs, FT->getNumParams()),
"Attribute after last parameter!", &F);
@@ -1974,7 +1959,7 @@ void Verifier::visitFunction(const Function &F) {
// On function declarations/definitions, we do not support the builtin
// attribute. We do not check this in VerifyFunctionAttrs since that is
// checking for Attributes that can/can not ever be on functions.
- Assert(!Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::Builtin),
+ Assert(!Attrs.hasFnAttribute(Attribute::Builtin),
"Attribute 'builtin' can only be applied to a callsite.", &F);
// Check that this function meets the restrictions on this calling convention.
@@ -1984,6 +1969,18 @@ void Verifier::visitFunction(const Function &F) {
default:
case CallingConv::C:
break;
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ Assert(F.getReturnType()->isVoidTy(),
+ "Calling convention requires void return type", &F);
+ LLVM_FALLTHROUGH;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ Assert(!F.hasStructRetAttr(),
+ "Calling convention does not allow sret", &F);
+ LLVM_FALLTHROUGH;
case CallingConv::Fast:
case CallingConv::Cold:
case CallingConv::Intel_OCL_BI:
@@ -2014,7 +2011,7 @@ void Verifier::visitFunction(const Function &F) {
}
// Check that swifterror argument is only used by loads and stores.
- if (Attrs.hasAttribute(i+1, Attribute::SwiftError)) {
+ if (Attrs.hasParamAttribute(i, Attribute::SwiftError)) {
verifySwiftErrorValue(&Arg);
}
++i;
@@ -2113,11 +2110,10 @@ void Verifier::visitFunction(const Function &F) {
"Function is marked as dllimport, but not external.", &F);
auto *N = F.getSubprogram();
- if (!N)
+ HasDebugInfo = (N != nullptr);
+ if (!HasDebugInfo)
return;
- visitDISubprogram(*N);
-
// Check that all !dbg attachments lead to back to N (or, at least, another
// subprogram that describes the same function).
//
@@ -2601,7 +2597,7 @@ void Verifier::verifyCallSite(CallSite CS) {
"Call parameter type does not match function signature!",
CS.getArgument(i), FTy->getParamType(i), I);
- AttributeSet Attrs = CS.getAttributes();
+ AttributeList Attrs = CS.getAttributes();
Assert(verifyAttributeCount(Attrs, CS.arg_size()),
"Attribute after last parameter!", I);
@@ -2623,7 +2619,7 @@ void Verifier::verifyCallSite(CallSite CS) {
// make sure the underlying alloca/parameter it comes from has a swifterror as
// well.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- if (CS.paramHasAttr(i+1, Attribute::SwiftError)) {
+ if (CS.paramHasAttr(i, Attribute::SwiftError)) {
Value *SwiftErrorArg = CS.getArgument(i);
if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
Assert(AI->isSwiftError(),
@@ -2641,24 +2637,25 @@ void Verifier::verifyCallSite(CallSite CS) {
bool SawNest = false;
bool SawReturned = false;
- for (unsigned Idx = 1; Idx < 1 + FTy->getNumParams(); ++Idx) {
- if (Attrs.hasAttribute(Idx, Attribute::Nest))
+ for (unsigned Idx = 0; Idx < FTy->getNumParams(); ++Idx) {
+ if (Attrs.hasParamAttribute(Idx, Attribute::Nest))
SawNest = true;
- if (Attrs.hasAttribute(Idx, Attribute::Returned))
+ if (Attrs.hasParamAttribute(Idx, Attribute::Returned))
SawReturned = true;
}
// Check attributes on the varargs part.
- for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
- Type *Ty = CS.getArgument(Idx-1)->getType();
- verifyParameterAttrs(Attrs, Idx, Ty, false, I);
+ for (unsigned Idx = FTy->getNumParams(); Idx < CS.arg_size(); ++Idx) {
+ Type *Ty = CS.getArgument(Idx)->getType();
+ AttributeSet ArgAttrs = Attrs.getParamAttributes(Idx);
+ verifyParameterAttrs(ArgAttrs, Ty, I);
- if (Attrs.hasAttribute(Idx, Attribute::Nest)) {
+ if (ArgAttrs.hasAttribute(Attribute::Nest)) {
Assert(!SawNest, "More than one parameter has attribute nest!", I);
SawNest = true;
}
- if (Attrs.hasAttribute(Idx, Attribute::Returned)) {
+ if (ArgAttrs.hasAttribute(Attribute::Returned)) {
Assert(!SawReturned, "More than one parameter has attribute returned!",
I);
Assert(Ty->canLosslesslyBitCastTo(FTy->getReturnType()),
@@ -2668,11 +2665,12 @@ void Verifier::verifyCallSite(CallSite CS) {
SawReturned = true;
}
- Assert(!Attrs.hasAttribute(Idx, Attribute::StructRet),
+ Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
"Attribute 'sret' cannot be used for vararg call arguments!", I);
- if (Attrs.hasAttribute(Idx, Attribute::InAlloca))
- Assert(Idx == CS.arg_size(), "inalloca isn't on the last argument!", I);
+ if (ArgAttrs.hasAttribute(Attribute::InAlloca))
+ Assert(Idx == CS.arg_size() - 1, "inalloca isn't on the last argument!",
+ I);
}
}
@@ -2726,9 +2724,9 @@ void Verifier::verifyCallSite(CallSite CS) {
// do so causes assertion failures when the inliner sets up inline scope info.
if (I->getFunction()->getSubprogram() && CS.getCalledFunction() &&
CS.getCalledFunction()->getSubprogram())
- Assert(I->getDebugLoc(), "inlinable function call in a function with debug "
- "info must have a !dbg location",
- I);
+ AssertDI(I->getDebugLoc(), "inlinable function call in a function with "
+ "debug info must have a !dbg location",
+ I);
visitInstruction(*I);
}
@@ -2745,17 +2743,17 @@ static bool isTypeCongruent(Type *L, Type *R) {
return PL->getAddressSpace() == PR->getAddressSpace();
}
-static AttrBuilder getParameterABIAttributes(int I, AttributeSet Attrs) {
+static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
static const Attribute::AttrKind ABIAttrs[] = {
Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
Attribute::InReg, Attribute::Returned, Attribute::SwiftSelf,
Attribute::SwiftError};
AttrBuilder Copy;
for (auto AK : ABIAttrs) {
- if (Attrs.hasAttribute(I + 1, AK))
+ if (Attrs.hasParamAttribute(I, AK))
Copy.addAttribute(AK);
}
- if (Attrs.hasAttribute(I + 1, Attribute::Alignment))
+ if (Attrs.hasParamAttribute(I, Attribute::Alignment))
Copy.addAlignmentAttr(Attrs.getParamAlignment(I + 1));
return Copy;
}
@@ -2787,8 +2785,8 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// - All ABI-impacting function attributes, such as sret, byval, inreg,
// returned, and inalloca, must match.
- AttributeSet CallerAttrs = F->getAttributes();
- AttributeSet CalleeAttrs = CI.getAttributes();
+ AttributeList CallerAttrs = F->getAttributes();
+ AttributeList CalleeAttrs = CI.getAttributes();
for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
@@ -3116,7 +3114,7 @@ void Verifier::verifySwiftErrorCallSite(CallSite CS,
for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I, ++Idx) {
if (*I == SwiftErrorVal) {
- Assert(CS.paramHasAttr(Idx+1, Attribute::SwiftError),
+ Assert(CS.paramHasAttr(Idx, Attribute::SwiftError),
"swifterror value when used in a callsite should be marked "
"with swifterror attribute",
SwiftErrorVal, CS);
@@ -3148,8 +3146,9 @@ void Verifier::verifySwiftErrorValue(const Value *SwiftErrorVal) {
void Verifier::visitAllocaInst(AllocaInst &AI) {
SmallPtrSet<Type*, 4> Visited;
PointerType *PTy = AI.getType();
- Assert(PTy->getAddressSpace() == 0,
- "Allocation instruction pointer not in the generic address space!",
+ // TODO: Relax this restriction?
+ Assert(PTy->getAddressSpace() == DL.getAllocaAddrSpace(),
+ "Allocation instruction pointer not in the stack address space!",
&AI);
Assert(AI.getAllocatedType()->isSized(&Visited),
"Cannot allocate unsized type", &AI);
@@ -3929,6 +3928,14 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"constant int",
CS);
break;
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ visitConstrainedFPIntrinsic(
+ cast<ConstrainedFPIntrinsic>(*CS.getInstruction()));
+ break;
case Intrinsic::dbg_declare: // llvm.dbg.declare
Assert(isa<MetadataAsValue>(CS.getArgOperand(0)),
"invalid llvm.dbg.declare intrinsic call 1", CS);
@@ -4294,6 +4301,15 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
return nullptr;
}
+void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
+ Assert(isa<MetadataAsValue>(FPI.getOperand(2)),
+ "invalid rounding mode argument", &FPI);
+ Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid,
+ "invalid rounding mode argument", &FPI);
+ Assert(FPI.getExceptionBehavior() != ConstrainedFPIntrinsic::ebInvalid,
+ "invalid exception behavior argument", &FPI);
+}
+
template <class DbgIntrinsicTy>
void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
auto *MD = cast<MetadataAsValue>(DII.getArgOperand(0))->getMetadata();
@@ -4330,6 +4346,8 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
" variable and !dbg attachment",
&DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
Loc->getScope()->getSubprogram());
+
+ verifyFnArgs(DII);
}
static uint64_t getVariableSize(const DILocalVariable &V) {
@@ -4398,15 +4416,49 @@ void Verifier::verifyFragmentExpression(const DbgInfoIntrinsic &I) {
AssertDI(FragSize != VarSize, "fragment covers entire variable", &I, V, E);
}
+void Verifier::verifyFnArgs(const DbgInfoIntrinsic &I) {
+ // This function does not take the scope of noninlined function arguments into
+ // account. Don't run it if current function is nodebug, because it may
+ // contain inlined debug intrinsics.
+ if (!HasDebugInfo)
+ return;
+
+ DILocalVariable *Var;
+ if (auto *DV = dyn_cast<DbgValueInst>(&I)) {
+ // For performance reasons only check non-inlined ones.
+ if (DV->getDebugLoc()->getInlinedAt())
+ return;
+ Var = DV->getVariable();
+ } else {
+ auto *DD = cast<DbgDeclareInst>(&I);
+ if (DD->getDebugLoc()->getInlinedAt())
+ return;
+ Var = DD->getVariable();
+ }
+ AssertDI(Var, "dbg intrinsic without variable");
+
+ unsigned ArgNo = Var->getArg();
+ if (!ArgNo)
+ return;
+
+ // Verify there are no duplicate function argument debug info entries.
+ // These will cause hard-to-debug assertions in the DWARF backend.
+ if (DebugFnArgs.size() < ArgNo)
+ DebugFnArgs.resize(ArgNo, nullptr);
+
+ auto *Prev = DebugFnArgs[ArgNo - 1];
+ DebugFnArgs[ArgNo - 1] = Var;
+ AssertDI(!Prev || (Prev == Var), "conflicting debug info for argument", &I,
+ Prev, Var);
+}
+
void Verifier::verifyCompileUnits() {
auto *CUs = M.getNamedMetadata("llvm.dbg.cu");
SmallPtrSet<const Metadata *, 2> Listed;
if (CUs)
Listed.insert(CUs->op_begin(), CUs->op_end());
- AssertDI(
- all_of(CUVisited,
- [&Listed](const Metadata *CU) { return Listed.count(CU); }),
- "All DICompileUnits must be listed in llvm.dbg.cu");
+ for (auto *CU : CUVisited)
+ AssertDI(Listed.count(CU), "DICompileUnit not listed in llvm.dbg.cu", CU);
CUVisited.clear();
}
diff --git a/lib/LTO/CMakeLists.txt b/lib/LTO/CMakeLists.txt
index c73143eb330b..73b5662d4bc8 100644
--- a/lib/LTO/CMakeLists.txt
+++ b/lib/LTO/CMakeLists.txt
@@ -1,52 +1,3 @@
-# Figure out if we can track VC revisions.
-function(find_first_existing_file out_var)
- foreach(file ${ARGN})
- if(EXISTS "${file}")
- set(${out_var} "${file}" PARENT_SCOPE)
- return()
- endif()
- endforeach()
-endfunction()
-
-macro(find_first_existing_vc_file out_var path)
- find_first_existing_file(${out_var}
- "${path}/.git/logs/HEAD" # Git
- "${path}/.svn/wc.db" # SVN 1.7
- "${path}/.svn/entries" # SVN 1.6
- )
-endmacro()
-
-find_first_existing_vc_file(llvm_vc "${LLVM_MAIN_SRC_DIR}")
-
-# The VC revision include that we want to generate.
-set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/LLVMLTORevision.h")
-
-set(get_svn_script "${LLVM_CMAKE_PATH}/GenerateVersionFromCVS.cmake")
-
-if(DEFINED llvm_vc)
- # Create custom target to generate the VC revision include.
- add_custom_command(OUTPUT "${version_inc}"
- DEPENDS "${llvm_vc}" "${get_svn_script}"
- COMMAND
- ${CMAKE_COMMAND} "-DSOURCE_DIR=${LLVM_MAIN_SRC_DIR}"
- "-DNAME=LLVM_REVISION"
- "-DHEADER_FILE=${version_inc}"
- -P "${get_svn_script}")
-
- # Mark the generated header as being generated.
- set_source_files_properties("${version_inc}"
- PROPERTIES GENERATED TRUE
- HEADER_FILE_ONLY TRUE)
-
- # Tell Version.cpp that it needs to build with -DHAVE_SVN_VERSION_INC.
- set_source_files_properties(Version.cpp
- PROPERTIES COMPILE_DEFINITIONS "HAVE_SVN_VERSION_INC")
-else()
- # Not producing a VC revision include.
- set(version_inc)
-endif()
-
-
add_llvm_library(LLVMLTO
Caching.cpp
LTO.cpp
@@ -55,11 +6,11 @@ add_llvm_library(LLVMLTO
LTOCodeGenerator.cpp
UpdateCompilerUsed.cpp
ThinLTOCodeGenerator.cpp
- ${version_inc}
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/LTO
DEPENDS
intrinsics_gen
+ llvm_vcsrevision_h
)
diff --git a/lib/LTO/Caching.cpp b/lib/LTO/Caching.cpp
index fd5bdb0bc01a..e32e46c4c3c8 100644
--- a/lib/LTO/Caching.cpp
+++ b/lib/LTO/Caching.cpp
@@ -13,6 +13,7 @@
#include "llvm/LTO/Caching.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -21,70 +22,71 @@
using namespace llvm;
using namespace llvm::lto;
-static void commitEntry(StringRef TempFilename, StringRef EntryPath) {
- // Rename to final destination (hopefully race condition won't matter here)
- auto EC = sys::fs::rename(TempFilename, EntryPath);
- if (EC) {
- // Renaming failed, probably not the same filesystem, copy and delete.
- // FIXME: Avoid needing to do this by creating the temporary file in the
- // cache directory.
- {
- auto ReloadedBufferOrErr = MemoryBuffer::getFile(TempFilename);
- if (auto EC = ReloadedBufferOrErr.getError())
- report_fatal_error(Twine("Failed to open temp file '") + TempFilename +
- "': " + EC.message() + "\n");
+Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
+ AddBufferFn AddBuffer) {
+ if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPath))
+ return errorCodeToError(EC);
- raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
- if (EC)
- report_fatal_error(Twine("Failed to open ") + EntryPath +
- " to save cached entry\n");
- // I'm not sure what are the guarantee if two processes are doing this
- // at the same time.
- OS << (*ReloadedBufferOrErr)->getBuffer();
- }
- sys::fs::remove(TempFilename);
- }
-}
-
-NativeObjectCache lto::localCache(std::string CacheDirectoryPath,
- AddFileFn AddFile) {
return [=](unsigned Task, StringRef Key) -> AddStreamFn {
- // First, see if we have a cache hit.
+ // This choice of file name allows the cache to be pruned (see pruneCache()
+ // in include/llvm/Support/CachePruning.h).
SmallString<64> EntryPath;
- sys::path::append(EntryPath, CacheDirectoryPath, Key);
- if (sys::fs::exists(EntryPath)) {
- AddFile(Task, EntryPath);
+ sys::path::append(EntryPath, CacheDirectoryPath, "llvmcache-" + Key);
+ // First, see if we have a cache hit.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(EntryPath);
+ if (MBOrErr) {
+ AddBuffer(Task, std::move(*MBOrErr));
return AddStreamFn();
}
+ if (MBOrErr.getError() != errc::no_such_file_or_directory)
+ report_fatal_error(Twine("Failed to open cache file ") + EntryPath +
+ ": " + MBOrErr.getError().message() + "\n");
+
// This native object stream is responsible for commiting the resulting
- // file to the cache and calling AddFile to add it to the link.
+ // file to the cache and calling AddBuffer to add it to the link.
struct CacheStream : NativeObjectStream {
- AddFileFn AddFile;
+ AddBufferFn AddBuffer;
std::string TempFilename;
std::string EntryPath;
unsigned Task;
- CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddFileFn AddFile,
+ CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddBufferFn AddBuffer,
std::string TempFilename, std::string EntryPath,
unsigned Task)
- : NativeObjectStream(std::move(OS)), AddFile(AddFile),
- TempFilename(TempFilename), EntryPath(EntryPath), Task(Task) {}
+ : NativeObjectStream(std::move(OS)), AddBuffer(std::move(AddBuffer)),
+ TempFilename(std::move(TempFilename)),
+ EntryPath(std::move(EntryPath)), Task(Task) {}
~CacheStream() {
+ // FIXME: This code could race with the cache pruner, but it is unlikely
+ // that the cache pruner will choose to remove a newly created file.
+
// Make sure the file is closed before committing it.
OS.reset();
- commitEntry(TempFilename, EntryPath);
- AddFile(Task, EntryPath);
+ // This is atomic on POSIX systems.
+ if (auto EC = sys::fs::rename(TempFilename, EntryPath))
+ report_fatal_error(Twine("Failed to rename temporary file ") +
+ TempFilename + ": " + EC.message() + "\n");
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(EntryPath);
+ if (!MBOrErr)
+ report_fatal_error(Twine("Failed to open cache file ") + EntryPath +
+ ": " + MBOrErr.getError().message() + "\n");
+ AddBuffer(Task, std::move(*MBOrErr));
}
};
return [=](size_t Task) -> std::unique_ptr<NativeObjectStream> {
// Write to a temporary to avoid race condition
int TempFD;
- SmallString<64> TempFilename;
+ SmallString<64> TempFilenameModel, TempFilename;
+ sys::path::append(TempFilenameModel, CacheDirectoryPath, "Thin-%%%%%%.tmp.o");
std::error_code EC =
- sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
+ sys::fs::createUniqueFile(TempFilenameModel, TempFD, TempFilename,
+ sys::fs::owner_read | sys::fs::owner_write);
if (EC) {
errs() << "Error: " << EC.message() << "\n";
report_fatal_error("ThinLTO: Can't get a temporary file");
@@ -93,7 +95,7 @@ NativeObjectCache lto::localCache(std::string CacheDirectoryPath,
// This CacheStream will move the temporary file into the cache when done.
return llvm::make_unique<CacheStream>(
llvm::make_unique<raw_fd_ostream>(TempFD, /* ShouldClose */ true),
- AddFile, TempFilename.str(), EntryPath.str(), Task);
+ AddBuffer, TempFilename.str(), EntryPath.str(), Task);
};
};
}
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index e3e2f9f806c8..9782c898bf50 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -20,9 +20,13 @@
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/LTO/LTOBackend.h"
#include "llvm/Linker/IRMover.h"
+#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -31,6 +35,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -46,6 +51,12 @@ using namespace object;
#define DEBUG_TYPE "lto"
+// The values are (type identifier, summary) pairs.
+typedef DenseMap<
+ GlobalValue::GUID,
+ TinyPtrVector<const std::pair<const std::string, TypeIdSummary> *>>
+ TypeIdSummariesByGuidTy;
+
// Returns a unique hash for the Module considering the current list of
// export/import and other global analysis results.
// The hash is produced in \p Key.
@@ -54,7 +65,8 @@ static void computeCacheKey(
StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
- const GVSummaryMapTy &DefinedGlobals) {
+ const GVSummaryMapTy &DefinedGlobals,
+ const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid) {
// Compute the unique hash for this entry.
// This is based on the current compiler version, the module itself, the
// export list, the hash for every single module in the import list, the
@@ -63,7 +75,7 @@ static void computeCacheKey(
// Start with the compiler revision
Hasher.update(LLVM_VERSION_STRING);
-#ifdef HAVE_LLVM_REVISION
+#ifdef LLVM_REVISION
Hasher.update(LLVM_REVISION);
#endif
@@ -80,6 +92,18 @@ static void computeCacheKey(
Data[3] = I >> 24;
Hasher.update(ArrayRef<uint8_t>{Data, 4});
};
+ auto AddUint64 = [&](uint64_t I) {
+ uint8_t Data[8];
+ Data[0] = I;
+ Data[1] = I >> 8;
+ Data[2] = I >> 16;
+ Data[3] = I >> 24;
+ Data[4] = I >> 32;
+ Data[5] = I >> 40;
+ Data[6] = I >> 48;
+ Data[7] = I >> 56;
+ Hasher.update(ArrayRef<uint8_t>{Data, 8});
+ };
AddString(Conf.CPU);
// FIXME: Hash more of Options. For now all clients initialize Options from
// command-line flags (which is unsupported in production), but may set
@@ -94,6 +118,7 @@ static void computeCacheKey(
AddUnsigned(Conf.RelocModel);
AddUnsigned(Conf.CodeModel);
AddUnsigned(Conf.CGOptLevel);
+ AddUnsigned(Conf.CGFileType);
AddUnsigned(Conf.OptLevel);
AddString(Conf.OptPipeline);
AddString(Conf.AAPipeline);
@@ -107,10 +132,16 @@ static void computeCacheKey(
// The export list can impact the internalization, be conservative here
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
- // Include the hash for every module we import functions from
+ // Include the hash for every module we import functions from. The set of
+ // imported symbols for each module may affect code generation and is
+ // sensitive to link order, so include that as well.
for (auto &Entry : ImportList) {
auto ModHash = Index.getModuleHash(Entry.first());
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+
+ AddUint64(Entry.second.size());
+ for (auto &Fn : Entry.second)
+ AddUint64(Fn.first);
}
// Include the hash for the resolved ODR.
@@ -121,12 +152,68 @@ static void computeCacheKey(
sizeof(GlobalValue::LinkageTypes)));
}
+ std::set<GlobalValue::GUID> UsedTypeIds;
+
+ auto AddUsedTypeIds = [&](GlobalValueSummary *GS) {
+ auto *FS = dyn_cast_or_null<FunctionSummary>(GS);
+ if (!FS)
+ return;
+ for (auto &TT : FS->type_tests())
+ UsedTypeIds.insert(TT);
+ for (auto &TT : FS->type_test_assume_vcalls())
+ UsedTypeIds.insert(TT.GUID);
+ for (auto &TT : FS->type_checked_load_vcalls())
+ UsedTypeIds.insert(TT.GUID);
+ for (auto &TT : FS->type_test_assume_const_vcalls())
+ UsedTypeIds.insert(TT.VFunc.GUID);
+ for (auto &TT : FS->type_checked_load_const_vcalls())
+ UsedTypeIds.insert(TT.VFunc.GUID);
+ };
+
// Include the hash for the linkage type to reflect internalization and weak
- // resolution.
+ // resolution, and collect any used type identifier resolutions.
for (auto &GS : DefinedGlobals) {
GlobalValue::LinkageTypes Linkage = GS.second->linkage();
Hasher.update(
ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
+ AddUsedTypeIds(GS.second);
+ }
+
+ // Imported functions may introduce new uses of type identifier resolutions,
+ // so we need to collect their used resolutions as well.
+ for (auto &ImpM : ImportList)
+ for (auto &ImpF : ImpM.second)
+ AddUsedTypeIds(Index.findSummaryInModule(ImpF.first, ImpM.first()));
+
+ auto AddTypeIdSummary = [&](StringRef TId, const TypeIdSummary &S) {
+ AddString(TId);
+
+ AddUnsigned(S.TTRes.TheKind);
+ AddUnsigned(S.TTRes.SizeM1BitWidth);
+
+ AddUint64(S.WPDRes.size());
+ for (auto &WPD : S.WPDRes) {
+ AddUnsigned(WPD.first);
+ AddUnsigned(WPD.second.TheKind);
+ AddString(WPD.second.SingleImplName);
+
+ AddUint64(WPD.second.ResByArg.size());
+ for (auto &ByArg : WPD.second.ResByArg) {
+ AddUint64(ByArg.first.size());
+ for (uint64_t Arg : ByArg.first)
+ AddUint64(Arg);
+ AddUnsigned(ByArg.second.TheKind);
+ AddUint64(ByArg.second.Info);
+ }
+ }
+ };
+
+ // Include the hash for all type identifiers used by this module.
+ for (GlobalValue::GUID TId : UsedTypeIds) {
+ auto SummariesI = TypeIdSummariesByGuid.find(TId);
+ if (SummariesI != TypeIdSummariesByGuid.end())
+ for (auto *Summary : SummariesI->second)
+ AddTypeIdSummary(Summary->first, Summary->second);
}
if (!Conf.SampleProfile.empty()) {
@@ -164,9 +251,7 @@ static void thinLTOResolveWeakForLinkerGUID(
}
// Alias and aliasee can't be turned into available_externally.
else if (!isa<AliasSummary>(S.get()) &&
- !GlobalInvolvedWithAlias.count(S.get()) &&
- (GlobalValue::isLinkOnceODRLinkage(OriginalLinkage) ||
- GlobalValue::isWeakODRLinkage(OriginalLinkage)))
+ !GlobalInvolvedWithAlias.count(S.get()))
S->setLinkage(GlobalValue::AvailableExternallyLinkage);
if (S->linkage() != OriginalLinkage)
recordNewLinkage(S->modulePath(), GUID, S->linkage());
@@ -220,14 +305,6 @@ void llvm::thinLTOInternalizeAndPromoteInIndex(
thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported);
}
-struct InputFile::InputModule {
- BitcodeModule BM;
- std::unique_ptr<Module> Mod;
-
- // The range of ModuleSymbolTable entries for this input module.
- size_t SymBegin, SymEnd;
-};
-
// Requires a destructor for std::vector<InputModule>.
InputFile::~InputFile() = default;
@@ -248,61 +325,52 @@ Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
return make_error<StringError>("Bitcode file does not contain any modules",
inconvertibleErrorCode());
- // Create an InputModule for each module in the InputFile, and add it to the
- // ModuleSymbolTable.
+ File->Mods = *BMsOrErr;
+
+ LLVMContext Ctx;
+ std::vector<Module *> Mods;
+ std::vector<std::unique_ptr<Module>> OwnedMods;
for (auto BM : *BMsOrErr) {
Expected<std::unique_ptr<Module>> MOrErr =
- BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true,
+ BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
/*IsImporting*/ false);
if (!MOrErr)
return MOrErr.takeError();
- size_t SymBegin = File->SymTab.symbols().size();
- File->SymTab.addModule(MOrErr->get());
- size_t SymEnd = File->SymTab.symbols().size();
+ if ((*MOrErr)->getDataLayoutStr().empty())
+ return make_error<StringError>("input module has no datalayout",
+ inconvertibleErrorCode());
- for (const auto &C : (*MOrErr)->getComdatSymbolTable()) {
- auto P = File->ComdatMap.insert(
- std::make_pair(&C.second, File->Comdats.size()));
- assert(P.second);
- (void)P;
- File->Comdats.push_back(C.first());
- }
+ Mods.push_back(MOrErr->get());
+ OwnedMods.push_back(std::move(*MOrErr));
+ }
- File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd});
+ SmallVector<char, 0> Symtab;
+ if (Error E = irsymtab::build(Mods, Symtab, File->Strtab))
+ return std::move(E);
+
+ irsymtab::Reader R({Symtab.data(), Symtab.size()},
+ {File->Strtab.data(), File->Strtab.size()});
+ File->TargetTriple = R.getTargetTriple();
+ File->SourceFileName = R.getSourceFileName();
+ File->COFFLinkerOpts = R.getCOFFLinkerOpts();
+ File->ComdatTable = R.getComdatTable();
+
+ for (unsigned I = 0; I != Mods.size(); ++I) {
+ size_t Begin = File->Symbols.size();
+ for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I))
+ // Skip symbols that are irrelevant to LTO. Note that this condition needs
+ // to match the one in Skip() in LTO::addRegularLTO().
+ if (Sym.isGlobal() && !Sym.isFormatSpecific())
+ File->Symbols.push_back(Sym);
+ File->ModuleSymIndices.push_back({Begin, File->Symbols.size()});
}
return std::move(File);
}
-Expected<int> InputFile::Symbol::getComdatIndex() const {
- if (!isGV())
- return -1;
- const GlobalObject *GO = getGV()->getBaseObject();
- if (!GO)
- return make_error<StringError>("Unable to determine comdat of alias!",
- inconvertibleErrorCode());
- if (const Comdat *C = GO->getComdat()) {
- auto I = File->ComdatMap.find(C);
- assert(I != File->ComdatMap.end());
- return I->second;
- }
- return -1;
-}
-
StringRef InputFile::getName() const {
- return Mods[0].BM.getModuleIdentifier();
-}
-
-StringRef InputFile::getSourceFileName() const {
- return Mods[0].Mod->getSourceFileName();
-}
-
-iterator_range<InputFile::symbol_iterator>
-InputFile::module_symbols(InputModule &IM) {
- return llvm::make_range(
- symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this),
- symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this));
+ return Mods[0].getModuleIdentifier();
}
LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
@@ -326,21 +394,17 @@ LTO::LTO(Config Conf, ThinBackend Backend,
LTO::~LTO() = default;
// Add the given symbol to the GlobalResolutions map, and resolve its partition.
-void LTO::addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
- const InputFile::Symbol &Sym,
+void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym,
SymbolResolution Res, unsigned Partition) {
- GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr;
-
auto &GlobalRes = GlobalResolutions[Sym.getName()];
- if (GV) {
- GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr();
- if (Res.Prevailing)
- GlobalRes.IRName = GV->getName();
- }
+ GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
+ if (Res.Prevailing)
+ GlobalRes.IRName = Sym.getIRName();
+
// Set the partition to external if we know it is used elsewhere, e.g.
// it is visible to a regular object, is referenced from llvm.compiler_used,
// or was already recorded as being referenced from a different partition.
- if (Res.VisibleToRegularObj || (GV && Used.count(GV)) ||
+ if (Res.VisibleToRegularObj || Sym.isUsed() ||
(GlobalRes.Partition != GlobalResolution::Unknown &&
GlobalRes.Partition != Partition)) {
GlobalRes.Partition = GlobalResolution::External;
@@ -372,6 +436,7 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
OS << 'x';
OS << '\n';
}
+ OS.flush();
assert(ResI == Res.end());
}
@@ -383,41 +448,32 @@ Error LTO::add(std::unique_ptr<InputFile> Input,
writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
const SymbolResolution *ResI = Res.begin();
- for (InputFile::InputModule &IM : Input->Mods)
- if (Error Err = addModule(*Input, IM, ResI, Res.end()))
+ for (unsigned I = 0; I != Input->Mods.size(); ++I)
+ if (Error Err = addModule(*Input, I, ResI, Res.end()))
return Err;
assert(ResI == Res.end());
return Error::success();
}
-Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM,
+Error LTO::addModule(InputFile &Input, unsigned ModI,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
- // FIXME: move to backend
- Module &M = *IM.Mod;
-
- if (M.getDataLayoutStr().empty())
- return make_error<StringError>("input module has no datalayout",
- inconvertibleErrorCode());
-
- if (!Conf.OverrideTriple.empty())
- M.setTargetTriple(Conf.OverrideTriple);
- else if (M.getTargetTriple().empty())
- M.setTargetTriple(Conf.DefaultTriple);
-
- Expected<bool> HasThinLTOSummary = IM.BM.hasSummary();
+ Expected<bool> HasThinLTOSummary = Input.Mods[ModI].hasSummary();
if (!HasThinLTOSummary)
return HasThinLTOSummary.takeError();
+ auto ModSyms = Input.module_symbols(ModI);
if (*HasThinLTOSummary)
- return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE);
+ return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
else
- return addRegularLTO(IM.BM, ResI, ResE);
+ return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
}
// Add a regular LTO object to the link.
-Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
+Error LTO::addRegularLTO(BitcodeModule BM,
+ ArrayRef<InputFile::Symbol> Syms,
+ const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
if (!RegularLTO.CombinedModule) {
RegularLTO.CombinedModule =
@@ -438,47 +494,84 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
ModuleSymbolTable SymTab;
SymTab.addModule(&M);
- SmallPtrSet<GlobalValue *, 8> Used;
- collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
-
std::vector<GlobalValue *> Keep;
for (GlobalVariable &GV : M.globals())
if (GV.hasAppendingLinkage())
Keep.push_back(&GV);
- for (const InputFile::Symbol &Sym :
- make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab,
- nullptr),
- InputFile::symbol_iterator(SymTab.symbols().end(), SymTab,
- nullptr))) {
+ DenseSet<GlobalObject *> AliasedGlobals;
+ for (auto &GA : M.aliases())
+ if (GlobalObject *GO = GA.getBaseObject())
+ AliasedGlobals.insert(GO);
+
+ // In this function we need IR GlobalValues matching the symbols in Syms
+ // (which is not backed by a module), so we need to enumerate them in the same
+ // order. The symbol enumeration order of a ModuleSymbolTable intentionally
+ // matches the order of an irsymtab, but when we read the irsymtab in
+ // InputFile::create we omit some symbols that are irrelevant to LTO. The
+ // Skip() function skips the same symbols from the module as InputFile does
+ // from the symbol table.
+ auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
+ auto Skip = [&]() {
+ while (MsymI != MsymE) {
+ auto Flags = SymTab.getSymbolFlags(*MsymI);
+ if ((Flags & object::BasicSymbolRef::SF_Global) &&
+ !(Flags & object::BasicSymbolRef::SF_FormatSpecific))
+ return;
+ ++MsymI;
+ }
+ };
+ Skip();
+
+ for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
- addSymbolToGlobalRes(Used, Sym, Res, 0);
-
- if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined)
- continue;
- if (Res.Prevailing && Sym.isGV()) {
- GlobalValue *GV = Sym.getGV();
- Keep.push_back(GV);
- switch (GV->getLinkage()) {
- default:
- break;
- case GlobalValue::LinkOnceAnyLinkage:
- GV->setLinkage(GlobalValue::WeakAnyLinkage);
- break;
- case GlobalValue::LinkOnceODRLinkage:
- GV->setLinkage(GlobalValue::WeakODRLinkage);
- break;
+ addSymbolToGlobalRes(Sym, Res, 0);
+
+ assert(MsymI != MsymE);
+ ModuleSymbolTable::Symbol Msym = *MsymI++;
+ Skip();
+
+ if (GlobalValue *GV = Msym.dyn_cast<GlobalValue *>()) {
+ if (Res.Prevailing) {
+ if (Sym.isUndefined())
+ continue;
+ Keep.push_back(GV);
+ switch (GV->getLinkage()) {
+ default:
+ break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ GV->setLinkage(GlobalValue::WeakAnyLinkage);
+ break;
+ case GlobalValue::LinkOnceODRLinkage:
+ GV->setLinkage(GlobalValue::WeakODRLinkage);
+ break;
+ }
+ } else if (isa<GlobalObject>(GV) &&
+ (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() ||
+ GV->hasAvailableExternallyLinkage()) &&
+ !AliasedGlobals.count(cast<GlobalObject>(GV))) {
+ // Either of the above three types of linkage indicates that the
+ // chosen prevailing symbol will have the same semantics as this copy of
+ // the symbol, so we can link it with available_externally linkage. We
+ // only need to do this if the symbol is undefined.
+ GlobalValue *CombinedGV =
+ RegularLTO.CombinedModule->getNamedValue(GV->getName());
+ if (!CombinedGV || CombinedGV->isDeclaration()) {
+ Keep.push_back(GV);
+ GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ cast<GlobalObject>(GV)->setComdat(nullptr);
+ }
}
}
// Common resolution: collect the maximum size/alignment over all commons.
// We also record if we see an instance of a common as prevailing, so that
// if none is prevailing we can ignore it later.
- if (Sym.getFlags() & object::BasicSymbolRef::SF_Common) {
+ if (Sym.isCommon()) {
// FIXME: We should figure out what to do about commons defined by asm.
// For now they aren't reported correctly by ModuleSymbolTable.
- auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()];
+ auto &CommonRes = RegularLTO.Commons[Sym.getIRName()];
CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment());
CommonRes.Prevailing |= Res.Prevailing;
@@ -486,23 +579,18 @@ Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
// FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit.
}
+ assert(MsymI == MsymE);
return RegularLTO.Mover->move(std::move(*MOrErr), Keep,
[](GlobalValue &, IRMover::ValueAdder) {},
- /* LinkModuleInlineAsm */ true,
/* IsPerformingImport */ false);
}
// Add a ThinLTO object to the link.
-// FIXME: This function should not need to take as many parameters once we have
-// a bitcode symbol table.
-Error LTO::addThinLTO(BitcodeModule BM, Module &M,
- iterator_range<InputFile::symbol_iterator> Syms,
+Error LTO::addThinLTO(BitcodeModule BM,
+ ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
- SmallPtrSet<GlobalValue *, 8> Used;
- collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
-
Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr = BM.getSummary();
if (!SummaryOrErr)
return SummaryOrErr.takeError();
@@ -512,11 +600,15 @@ Error LTO::addThinLTO(BitcodeModule BM, Module &M,
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
- addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1);
+ addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1);
- if (Res.Prevailing && Sym.isGV())
- ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] =
- BM.getModuleIdentifier();
+ if (Res.Prevailing) {
+ if (!Sym.getIRName().empty()) {
+ auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
+ Sym.getIRName(), GlobalValue::ExternalLinkage, ""));
+ ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
+ }
+ }
}
if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second)
@@ -602,7 +694,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
return Error::success();
}
return backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel,
- std::move(RegularLTO.CombinedModule));
+ std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex);
}
/// This class defines the interface to the ThinLTO backend.
@@ -633,6 +725,7 @@ class InProcessThinBackend : public ThinBackendProc {
ThreadPool BackendThreadPool;
AddStreamFn AddStream;
NativeObjectCache Cache;
+ TypeIdSummariesByGuidTy TypeIdSummariesByGuid;
Optional<Error> Err;
std::mutex ErrMu;
@@ -645,7 +738,14 @@ public:
AddStreamFn AddStream, NativeObjectCache Cache)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
BackendThreadPool(ThinLTOParallelismLevel),
- AddStream(std::move(AddStream)), Cache(std::move(Cache)) {}
+ AddStream(std::move(AddStream)), Cache(std::move(Cache)) {
+ // Create a mapping from type identifier GUIDs to type identifier summaries.
+ // This allows backends to use the type identifier GUIDs stored in the
+ // function summaries to determine which type identifier summaries affect
+ // each function without needing to compute GUIDs in each backend.
+ for (auto &TId : CombinedIndex.typeIds())
+ TypeIdSummariesByGuid[GlobalValue::getGUID(TId.first)].push_back(&TId);
+ }
Error runThinLTOBackendThread(
AddStreamFn AddStream, NativeObjectCache Cache, unsigned Task,
@@ -654,7 +754,8 @@ public:
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
- MapVector<StringRef, BitcodeModule> &ModuleMap) {
+ MapVector<StringRef, BitcodeModule> &ModuleMap,
+ const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid) {
auto RunThinBackend = [&](AddStreamFn AddStream) {
LTOLLVMContext BackendContext(Conf);
Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
@@ -677,7 +778,7 @@ public:
SmallString<40> Key;
// The module may be cached, this helps handling it.
computeCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList, ExportList,
- ResolvedODR, DefinedGlobals);
+ ResolvedODR, DefinedGlobals, TypeIdSummariesByGuid);
if (AddStreamFn CacheAddStream = Cache(Task, Key))
return RunThinBackend(CacheAddStream);
@@ -701,10 +802,11 @@ public:
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
&ResolvedODR,
const GVSummaryMapTy &DefinedGlobals,
- MapVector<StringRef, BitcodeModule> &ModuleMap) {
+ MapVector<StringRef, BitcodeModule> &ModuleMap,
+ const TypeIdSummariesByGuidTy &TypeIdSummariesByGuid) {
Error E = runThinLTOBackendThread(
- AddStream, Cache, Task, BM, CombinedIndex, ImportList,
- ExportList, ResolvedODR, DefinedGlobals, ModuleMap);
+ AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList,
+ ResolvedODR, DefinedGlobals, ModuleMap, TypeIdSummariesByGuid);
if (E) {
std::unique_lock<std::mutex> L(ErrMu);
if (Err)
@@ -713,9 +815,9 @@ public:
Err = std::move(E);
}
},
- BM, std::ref(CombinedIndex), std::ref(ImportList),
- std::ref(ExportList), std::ref(ResolvedODR), std::ref(DefinedGlobals),
- std::ref(ModuleMap));
+ BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList),
+ std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap),
+ std::ref(TypeIdSummariesByGuid));
return Error::success();
}
@@ -857,19 +959,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
if (!ModuleToDefinedGVSummaries.count(Mod.first))
ModuleToDefinedGVSummaries.try_emplace(Mod.first);
- // Compute "dead" symbols, we don't want to import/export these!
- DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
- for (auto &Res : GlobalResolutions) {
- if (Res.second.VisibleOutsideThinLTO &&
- // IRName will be defined if we have seen the prevailing copy of
- // this value. If not, no need to preserve any ThinLTO copies.
- !Res.second.IRName.empty())
- GUIDPreservedSymbols.insert(GlobalValue::getGUID(Res.second.IRName));
- }
-
- auto DeadSymbols =
- computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
-
StringMap<FunctionImporter::ImportMapTy> ImportLists(
ThinLTO.ModuleMap.size());
StringMap<FunctionImporter::ExportSetTy> ExportLists(
@@ -877,6 +966,20 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
if (Conf.OptLevel > 0) {
+ // Compute "dead" symbols, we don't want to import/export these!
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+ for (auto &Res : GlobalResolutions) {
+ if (Res.second.VisibleOutsideThinLTO &&
+ // IRName will be defined if we have seen the prevailing copy of
+ // this value. If not, no need to preserve any ThinLTO copies.
+ !Res.second.IRName.empty())
+ GUIDPreservedSymbols.insert(GlobalValue::getGUID(
+ GlobalValue::getRealLinkageName(Res.second.IRName)));
+ }
+
+ auto DeadSymbols =
+ computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
+
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
ImportLists, ExportLists, &DeadSymbols);
@@ -890,10 +993,11 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
// partition (and we can't get the GUID).
if (Res.second.IRName.empty())
continue;
- auto GUID = GlobalValue::getGUID(Res.second.IRName);
+ auto GUID = GlobalValue::getGUID(
+ GlobalValue::getRealLinkageName(Res.second.IRName));
// Mark exported unless index-based analysis determined it to be dead.
if (!DeadSymbols.count(GUID))
- ExportedGUIDs.insert(GlobalValue::getGUID(Res.second.IRName));
+ ExportedGUIDs.insert(GUID);
}
auto isPrevailing = [&](GlobalValue::GUID GUID,
@@ -937,3 +1041,27 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
return BackendProc->wait();
}
+
+Expected<std::unique_ptr<tool_output_file>>
+lto::setupOptimizationRemarks(LLVMContext &Context,
+ StringRef LTORemarksFilename,
+ bool LTOPassRemarksWithHotness, int Count) {
+ if (LTORemarksFilename.empty())
+ return nullptr;
+
+ std::string Filename = LTORemarksFilename;
+ if (Count != -1)
+ Filename += ".thin." + llvm::utostr(Count) + ".yaml";
+
+ std::error_code EC;
+ auto DiagnosticFile =
+ llvm::make_unique<tool_output_file>(Filename, EC, sys::fs::F_None);
+ if (EC)
+ return errorCodeToError(EC);
+ Context.setDiagnosticsOutputFile(
+ llvm::make_unique<yaml::Output>(DiagnosticFile->os()));
+ if (LTOPassRemarksWithHotness)
+ Context.setDiagnosticHotnessRequested(true);
+ DiagnosticFile->keep();
+ return std::move(DiagnosticFile);
+}
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index 809db80bc916..4bd251f727a4 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -27,6 +27,7 @@
#include "llvm/LTO/LTO.h"
#include "llvm/LTO/legacy/UpdateCompilerUsed.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
@@ -42,6 +43,11 @@
using namespace llvm;
using namespace lto;
+static cl::opt<bool>
+ LTOUseNewPM("lto-use-new-pm",
+ cl::desc("Run LTO passes using the new pass manager"),
+ cl::init(false), cl::Hidden);
+
LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
errs() << "failed to open " << Path << ": " << Msg << '\n';
errs().flush();
@@ -124,6 +130,56 @@ createTargetMachine(Config &Conf, StringRef TheTriple,
Conf.CodeModel, Conf.CGOptLevel));
}
+static void runNewPMPasses(Module &Mod, TargetMachine *TM, unsigned OptLevel) {
+ PassBuilder PB(TM);
+ AAManager AA;
+
+ // Parse a custom AA pipeline if asked to.
+ assert(PB.parseAAPipeline(AA, "default"));
+
+ LoopAnalysisManager LAM;
+ FunctionAnalysisManager FAM;
+ CGSCCAnalysisManager CGAM;
+ ModuleAnalysisManager MAM;
+
+ // Register the AA manager first so that our version is the one used.
+ FAM.registerPass([&] { return std::move(AA); });
+
+ // Register all the basic analyses with the managers.
+ PB.registerModuleAnalyses(MAM);
+ PB.registerCGSCCAnalyses(CGAM);
+ PB.registerFunctionAnalyses(FAM);
+ PB.registerLoopAnalyses(LAM);
+ PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+ ModulePassManager MPM;
+ // FIXME (davide): verify the input.
+
+ PassBuilder::OptimizationLevel OL;
+
+ switch (OptLevel) {
+ default:
+ llvm_unreachable("Invalid optimization level");
+ case 0:
+ OL = PassBuilder::O0;
+ break;
+ case 1:
+ OL = PassBuilder::O1;
+ break;
+ case 2:
+ OL = PassBuilder::O2;
+ break;
+ case 3:
+ OL = PassBuilder::O3;
+ break;
+ }
+
+ MPM = PB.buildLTODefaultPipeline(OL, false /* DebugLogging */);
+ MPM.run(Mod, MAM);
+
+ // FIXME (davide): verify the output.
+}
+
static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
std::string PipelineDesc,
std::string AAPipelineDesc,
@@ -168,13 +224,16 @@ static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
}
static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
- bool IsThinLTO) {
+ bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
legacy::PassManager passes;
passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
PMB.Inliner = createFunctionInliningPass();
+ PMB.ExportSummary = ExportSummary;
+ PMB.ImportSummary = ImportSummary;
// Unconditionally verify input since it is not verified before this
// point and has unknown origin.
PMB.VerifyInput = true;
@@ -191,12 +250,21 @@ static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
}
bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
- bool IsThinLTO) {
- if (Conf.OptPipeline.empty())
- runOldPMPasses(Conf, Mod, TM, IsThinLTO);
- else
+ bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
+ // There's still no ThinLTO pipeline hooked up in the new pass manager,
+ // once there is one, we can just remove this.
+ if (LTOUseNewPM && IsThinLTO)
+ report_fatal_error("ThinLTO not supported with the new PM yet!");
+
+ // FIXME: Plumb the combined index into the new pass manager.
+ if (!Conf.OptPipeline.empty())
runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
Conf.DisableVerify);
+ else if (LTOUseNewPM)
+ runNewPMPasses(Mod, TM, Conf.OptLevel);
+ else
+ runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
}
@@ -207,8 +275,7 @@ void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
auto Stream = AddStream(Task);
legacy::PassManager CodeGenPasses;
- if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
- TargetMachine::CGFT_ObjectFile))
+ if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, Conf.CGFileType))
report_fatal_error("Failed to setup codegen");
CodeGenPasses.run(Mod);
}
@@ -276,12 +343,22 @@ Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
}
+static void
+finalizeOptimizationRemarks(std::unique_ptr<tool_output_file> DiagOutputFile) {
+ // Make sure we flush the diagnostic remarks file in case the linker doesn't
+ // call the global destructors before exiting.
+ if (!DiagOutputFile)
+ return;
+ DiagOutputFile->keep();
+ DiagOutputFile->os().flush();
+}
+
static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) {
// Collect the list of undefined symbols used in asm and update
// llvm.compiler.used to prevent optimization to drop these from the output.
StringSet<> AsmUndefinedRefs;
ModuleSymbolTable::CollectAsmSymbols(
- Triple(Mod.getTargetTriple()), Mod.getModuleInlineAsm(),
+ Mod,
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
if (Flags & object::BasicSymbolRef::SF_Undefined)
AsmUndefinedRefs.insert(Name);
@@ -291,7 +368,8 @@ static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) {
Error lto::backend(Config &C, AddStreamFn AddStream,
unsigned ParallelCodeGenParallelismLevel,
- std::unique_ptr<Module> Mod) {
+ std::unique_ptr<Module> Mod,
+ ModuleSummaryIndex &CombinedIndex) {
Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
if (!TOrErr)
return TOrErr.takeError();
@@ -301,9 +379,20 @@ Error lto::backend(Config &C, AddStreamFn AddStream,
handleAsmUndefinedRefs(*Mod, *TM);
- if (!C.CodeGenOnly)
- if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false))
+ // Setup optimization remarks.
+ auto DiagFileOrErr = lto::setupOptimizationRemarks(
+ Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
+ if (!DiagFileOrErr)
+ return DiagFileOrErr.takeError();
+ auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
+
+ if (!C.CodeGenOnly) {
+ if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
+ /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) {
+ finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
return Error::success();
+ }
+ }
if (ParallelCodeGenParallelismLevel == 1) {
codegen(C, TM.get(), AddStream, 0, *Mod);
@@ -311,11 +400,12 @@ Error lto::backend(Config &C, AddStreamFn AddStream,
splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
std::move(Mod));
}
+ finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
return Error::success();
}
Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
- Module &Mod, ModuleSummaryIndex &CombinedIndex,
+ Module &Mod, const ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const GVSummaryMapTy &DefinedGlobals,
MapVector<StringRef, BitcodeModule> &ModuleMap) {
@@ -367,7 +457,8 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
return Error::success();
- if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true))
+ if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
+ /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
return Error::success();
codegen(Conf, TM.get(), AddStream, Task, Mod);
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 6af31e61f946..86fba843e980 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
+#include "llvm/LTO/LTO.h"
#include "llvm/LTO/legacy/LTOModule.h"
#include "llvm/LTO/legacy/UpdateCompilerUsed.h"
#include "llvm/Linker/Linker.h"
@@ -140,6 +141,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeMemCpyOptLegacyPassPass(R);
initializeDCELegacyPassPass(R);
initializeCFGSimplifyPassPass(R);
+ initializeLateCFGSimplifyPassPass(R);
}
void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) {
@@ -506,25 +508,6 @@ void LTOCodeGenerator::verifyMergedModuleOnce() {
report_fatal_error("Broken module found, compilation aborted!");
}
-bool LTOCodeGenerator::setupOptimizationRemarks() {
- if (LTORemarksFilename != "") {
- std::error_code EC;
- DiagnosticOutputFile = llvm::make_unique<tool_output_file>(
- LTORemarksFilename, EC, sys::fs::F_None);
- if (EC) {
- emitError(EC.message());
- return false;
- }
- Context.setDiagnosticsOutputFile(
- llvm::make_unique<yaml::Output>(DiagnosticOutputFile->os()));
- }
-
- if (LTOPassRemarksWithHotness)
- Context.setDiagnosticHotnessRequested(true);
-
- return true;
-}
-
void LTOCodeGenerator::finishOptimizationRemarks() {
if (DiagnosticOutputFile) {
DiagnosticOutputFile->keep();
@@ -540,8 +523,13 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
if (!this->determineTarget())
return false;
- if (!setupOptimizationRemarks())
- return false;
+ auto DiagFileOrErr = lto::setupOptimizationRemarks(
+ Context, LTORemarksFilename, LTOPassRemarksWithHotness);
+ if (!DiagFileOrErr) {
+ errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
+ report_fatal_error("Can't get an output file for the remarks");
+ }
+ DiagnosticOutputFile = std::move(*DiagFileOrErr);
// We always run the verifier once on the merged module, the `DisableVerify`
// parameter only applies to subsequent verify.
@@ -567,6 +555,8 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
if (!DisableInline)
PMB.Inliner = createFunctionInliningPass();
PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple);
+ if (Freestanding)
+ PMB.LibraryInfo->disableAllFunctions();
PMB.OptLevel = OptLevel;
PMB.VerifyInput = !DisableVerify;
PMB.VerifyOutput = !DisableVerify;
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 89aeb8000038..11f0982c6a60 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -14,11 +14,12 @@
#include "llvm/LTO/legacy/LTOModule.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ObjectUtils.h"
#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCExpr.h"
@@ -647,11 +648,15 @@ void LTOModule::parseMetadata() {
}
}
- // Globals
+ // Globals - we only need to do this for COFF.
+ const Triple TT(_target->getTargetTriple());
+ if (!TT.isOSBinFormatCOFF())
+ return;
+ Mangler M;
for (const NameAndAttributes &Sym : _symbols) {
if (!Sym.symbol)
continue;
- _target->getObjFileLowering()->emitLinkerFlagsForGlobal(OS, Sym.symbol);
+ emitLinkerFlagsForGlobalCOFF(OS, Sym.symbol, TT, M);
}
// Add other interesting metadata here.
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index 40537e4fa784..0d845a26d0c2 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -14,10 +14,6 @@
#include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
-#ifdef HAVE_LLVM_REVISION
-#include "LLVMLTORevision.h"
-#endif
-
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
@@ -47,6 +43,7 @@
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VCSRevision.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
@@ -73,27 +70,6 @@ namespace {
static cl::opt<int>
ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency()));
-Expected<std::unique_ptr<tool_output_file>>
-setupOptimizationRemarks(LLVMContext &Ctx, int Count) {
- if (LTOPassRemarksWithHotness)
- Ctx.setDiagnosticHotnessRequested(true);
-
- if (LTORemarksFilename.empty())
- return nullptr;
-
- std::string FileName =
- LTORemarksFilename + ".thin." + llvm::utostr(Count) + ".yaml";
- std::error_code EC;
- auto DiagnosticOutputFile =
- llvm::make_unique<tool_output_file>(FileName, EC, sys::fs::F_None);
- if (EC)
- return errorCodeToError(EC);
- Ctx.setDiagnosticsOutputFile(
- llvm::make_unique<yaml::Output>(DiagnosticOutputFile->os()));
- DiagnosticOutputFile->keep();
- return std::move(DiagnosticOutputFile);
-}
-
// Simple helper to save temporary files for debug.
static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
unsigned count, StringRef Suffix) {
@@ -208,10 +184,12 @@ crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
}
static void optimizeModule(Module &TheModule, TargetMachine &TM,
- unsigned OptLevel) {
+ unsigned OptLevel, bool Freestanding) {
// Populate the PassManager
PassManagerBuilder PMB;
PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
+ if (Freestanding)
+ PMB.LibraryInfo->disableAllFunctions();
PMB.Inliner = createFunctionInliningPass();
// FIXME: should get it from the bitcode?
PMB.OptLevel = OptLevel;
@@ -285,7 +263,7 @@ public:
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
const GVSummaryMapTy &DefinedFunctions,
const DenseSet<GlobalValue::GUID> &PreservedSymbols, unsigned OptLevel,
- const TargetMachineBuilder &TMBuilder) {
+ bool Freestanding, const TargetMachineBuilder &TMBuilder) {
if (CachePath.empty())
return;
@@ -323,7 +301,7 @@ public:
// Start with the compiler revision
Hasher.update(LLVM_VERSION_STRING);
-#ifdef HAVE_LLVM_REVISION
+#ifdef LLVM_REVISION
Hasher.update(LLVM_REVISION);
#endif
@@ -342,6 +320,7 @@ public:
AddUnsigned(*TMBuilder.RelocModel);
AddUnsigned(TMBuilder.CGOptLevel);
AddUnsigned(OptLevel);
+ AddUnsigned(Freestanding);
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
for (auto F : ExportList)
@@ -369,7 +348,10 @@ public:
ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
}
- sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
+ // This choice of file name allows the cache to be pruned (see pruneCache()
+ // in include/llvm/Support/CachePruning.h).
+ sys::path::append(EntryPath, CachePath,
+ "llvmcache-" + toHex(Hasher.result()));
}
// Access the path to this entry in the cache.
@@ -422,7 +404,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
const GVSummaryMapTy &DefinedGlobals,
const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
bool DisableCodeGen, StringRef SaveTempsDir,
- unsigned OptLevel, unsigned count) {
+ bool Freestanding, unsigned OptLevel, unsigned count) {
// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);
@@ -454,7 +436,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
}
- optimizeModule(TheModule, TM, OptLevel);
+ optimizeModule(TheModule, TM, OptLevel, Freestanding);
saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
@@ -780,7 +762,7 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
// Optimize now
- optimizeModule(TheModule, *TMBuilder.create(), OptLevel);
+ optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
}
/**
@@ -966,7 +948,7 @@ void ThinLTOCodeGenerator::run() {
ImportLists[ModuleIdentifier], ExportList,
ResolvedODR[ModuleIdentifier],
DefinedFunctions, GUIDPreservedSymbols,
- OptLevel, TMBuilder);
+ OptLevel, Freestanding, TMBuilder);
auto CacheEntryPath = CacheEntry.getEntryPath();
{
@@ -990,7 +972,8 @@ void ThinLTOCodeGenerator::run() {
LLVMContext Context;
Context.setDiscardValueNames(LTODiscardValueNames);
Context.enableDebugTypeODRUniquing();
- auto DiagFileOrErr = setupOptimizationRemarks(Context, count);
+ auto DiagFileOrErr = lto::setupOptimizationRemarks(
+ Context, LTORemarksFilename, LTOPassRemarksWithHotness, count);
if (!DiagFileOrErr) {
errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
report_fatal_error("ThinLTO: Can't get an output file for the "
@@ -1011,7 +994,7 @@ void ThinLTOCodeGenerator::run() {
*TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
ExportList, GUIDPreservedSymbols,
ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
- DisableCodeGen, SaveTempsDir, OptLevel, count);
+ DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count);
// Commit to the cache (if enabled)
CacheEntry.write(*OutputBuffer);
@@ -1043,11 +1026,7 @@ void ThinLTOCodeGenerator::run() {
}
}
- CachePruning(CacheOptions.Path)
- .setPruningInterval(std::chrono::seconds(CacheOptions.PruningInterval))
- .setEntryExpiration(std::chrono::seconds(CacheOptions.Expiration))
- .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace)
- .prune();
+ pruneCache(CacheOptions.Path, CacheOptions.Policy);
// If statistics were requested, print them out now.
if (llvm::AreStatisticsEnabled())
diff --git a/lib/LTO/UpdateCompilerUsed.cpp b/lib/LTO/UpdateCompilerUsed.cpp
index b67d9ea5989d..5165cc965038 100644
--- a/lib/LTO/UpdateCompilerUsed.cpp
+++ b/lib/LTO/UpdateCompilerUsed.cpp
@@ -65,7 +65,7 @@ private:
// target.
for (unsigned I = 0, E = static_cast<unsigned>(LibFunc::NumLibFuncs);
I != E; ++I) {
- LibFunc::Func F = static_cast<LibFunc::Func>(I);
+ LibFunc F = static_cast<LibFunc>(I);
if (TLI.has(F))
Libcalls.insert(TLI.getName(F));
}
diff --git a/lib/LibDriver/LibDriver.cpp b/lib/LibDriver/LibDriver.cpp
index bcdec4f7a933..c50629d71501 100644
--- a/lib/LibDriver/LibDriver.cpp
+++ b/lib/LibDriver/LibDriver.cpp
@@ -121,7 +121,7 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
for (auto *Arg : Args.filtered(OPT_UNKNOWN))
llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n";
- if (Args.filtered_begin(OPT_INPUT) == Args.filtered_end()) {
+ if (!Args.hasArgNoClaim(OPT_INPUT)) {
// No input files. To match lib.exe, silently do nothing.
return 0;
}
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index 9f3cfc0eace4..15a46a2d0420 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -395,11 +395,12 @@ class IRLinker {
Worklist.push_back(GV);
}
- /// Flag whether the ModuleInlineAsm string in Src should be linked with
- /// (concatenated into) the ModuleInlineAsm string for the destination
- /// module. It should be true for full LTO, but not when importing for
- /// ThinLTO, otherwise we can have duplicate symbols.
- bool LinkModuleInlineAsm;
+ /// Whether we are importing globals for ThinLTO, as opposed to linking the
+ /// source module. If this flag is set, it means that we can rely on some
+ /// other object file to define any non-GlobalValue entities defined by the
+ /// source module. This currently causes us to not link retained types in
+ /// debug info metadata and module inline asm.
+ bool IsPerformingImport;
/// Set to true when all global value body linking is complete (including
/// lazy linking). Used to prevent metadata linking from creating new
@@ -491,10 +492,10 @@ public:
IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr<Module> SrcM,
ArrayRef<GlobalValue *> ValuesToLink,
std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor,
- bool LinkModuleInlineAsm, bool IsPerformingImport)
+ bool IsPerformingImport)
: DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)),
TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this),
- SharedMDs(SharedMDs), LinkModuleInlineAsm(LinkModuleInlineAsm),
+ SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport),
Mapper(ValueMap, RF_MoveDistinctMDs | RF_IgnoreMissingLocals, &TypeMap,
&GValMaterializer),
AliasMCID(Mapper.registerAlternateMappingContext(AliasValueMap,
@@ -870,9 +871,6 @@ bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) {
if (DGV && !DGV->isDeclarationForLinker())
return false;
- if (SGV.hasAvailableExternallyLinkage())
- return true;
-
if (SGV.isDeclaration() || DoneLinkingBodies)
return false;
@@ -1297,7 +1295,7 @@ Error IRLinker::run() {
DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple));
// Append the module inline asm string.
- if (LinkModuleInlineAsm && !SrcM->getModuleInlineAsm().empty()) {
+ if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) {
if (DstM.getModuleInlineAsm().empty())
DstM.setModuleInlineAsm(SrcM->getModuleInlineAsm());
else
@@ -1436,10 +1434,10 @@ IRMover::IRMover(Module &M) : Composite(M) {
Error IRMover::move(
std::unique_ptr<Module> Src, ArrayRef<GlobalValue *> ValuesToLink,
std::function<void(GlobalValue &, ValueAdder Add)> AddLazyFor,
- bool LinkModuleInlineAsm, bool IsPerformingImport) {
+ bool IsPerformingImport) {
IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes,
std::move(Src), ValuesToLink, std::move(AddLazyFor),
- LinkModuleInlineAsm, IsPerformingImport);
+ IsPerformingImport);
Error E = TheIRLinker.run();
Composite.dropTriviallyDeadConstantArrays();
return E;
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index cf2c4ccf523e..c0ce4bf76b9f 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -14,12 +14,13 @@
#include "LinkDiagnosticInfo.h"
#include "llvm-c/Linker.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/Comdat.h"
#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Support/Error.h"
-#include "llvm/Transforms/Utils/FunctionImportUtils.h"
using namespace llvm;
namespace {
@@ -31,14 +32,17 @@ class ModuleLinker {
std::unique_ptr<Module> SrcM;
SetVector<GlobalValue *> ValuesToLink;
- StringSet<> Internalize;
/// For symbol clashes, prefer those from Src.
unsigned Flags;
- /// Functions to import from source module, all other functions are
- /// imported as declarations instead of definitions.
- DenseSet<const GlobalValue *> *GlobalsToImport;
+ /// List of global value names that should be internalized.
+ StringSet<> Internalize;
+
+ /// Function that will perform the actual internalization. The reason for a
+ /// callback is that the linker cannot call internalizeModule without
+ /// creating a circular dependency between IPO and the linker.
+ std::function<void(Module &, const StringSet<> &)> InternalizeCallback;
/// Used as the callback for lazy linking.
/// The mover has just hit GV and we have to decide if it, and other members
@@ -46,14 +50,8 @@ class ModuleLinker {
/// to Add.
void addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add);
- bool shouldLinkReferencedLinkOnce() {
- return !(Flags & Linker::DontForceLinkLinkonceODR);
- }
bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
- bool shouldInternalizeLinkedSymbols() {
- return Flags & Linker::InternalizeLinkedSymbols;
- }
bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
const GlobalValue &Src);
@@ -108,31 +106,17 @@ class ModuleLinker {
bool linkIfNeeded(GlobalValue &GV);
- /// Helper method to check if we are importing from the current source
- /// module.
- bool isPerformingImport() const { return GlobalsToImport != nullptr; }
-
- /// If we are importing from the source module, checks if we should
- /// import SGV as a definition, otherwise import as a declaration.
- bool doImportAsDefinition(const GlobalValue *SGV);
-
public:
ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
- DenseSet<const GlobalValue *> *GlobalsToImport = nullptr)
+ std::function<void(Module &, const StringSet<> &)>
+ InternalizeCallback = {})
: Mover(Mover), SrcM(std::move(SrcM)), Flags(Flags),
- GlobalsToImport(GlobalsToImport) {}
+ InternalizeCallback(std::move(InternalizeCallback)) {}
bool run();
};
}
-bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
- if (!isPerformingImport())
- return false;
- return FunctionImportGlobalProcessing::doImportAsDefinition(SGV,
- GlobalsToImport);
-}
-
static GlobalValue::VisibilityTypes
getMinVisibility(GlobalValue::VisibilityTypes A,
GlobalValue::VisibilityTypes B) {
@@ -266,18 +250,10 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
// We always have to add Src if it has appending linkage.
if (Src.hasAppendingLinkage()) {
- // Should have prevented importing for appending linkage in linkIfNeeded.
- assert(!isPerformingImport());
LinkFromSrc = true;
return false;
}
- if (isPerformingImport()) {
- // LinkFromSrc iff this is a global requested for importing.
- LinkFromSrc = GlobalsToImport->count(&Src);
- return false;
- }
-
bool SrcIsDeclaration = Src.isDeclarationForLinker();
bool DestIsDeclaration = Dest.isDeclarationForLinker();
@@ -383,19 +359,9 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
GV.setUnnamedAddr(UnnamedAddr);
}
- // Don't want to append to global_ctors list, for example, when we
- // are importing for ThinLTO, otherwise the global ctors and dtors
- // get executed multiple times for local variables (the latter causing
- // double frees).
- if (GV.hasAppendingLinkage() && isPerformingImport())
- return false;
-
- if (isPerformingImport()) {
- if (!doImportAsDefinition(&GV))
- return false;
- } else if (!DGV && !shouldOverrideFromSrc() &&
- (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
- GV.hasAvailableExternallyLinkage()))
+ if (!DGV && !shouldOverrideFromSrc() &&
+ (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
+ GV.hasAvailableExternallyLinkage()))
return false;
if (GV.isDeclaration())
@@ -418,17 +384,12 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
}
void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
- if (!shouldLinkReferencedLinkOnce())
- // For ThinLTO we don't import more than what was required.
- // The client has to guarantee that the linkonce will be availabe at link
- // time (by promoting it to weak for instance).
- return;
-
// Add these to the internalize list
- if (!GV.hasLinkOnceLinkage() && !shouldLinkOnlyNeeded())
+ if (!GV.hasLinkOnceLinkage() && !GV.hasAvailableExternallyLinkage() &&
+ !shouldLinkOnlyNeeded())
return;
- if (shouldInternalizeLinkedSymbols())
+ if (InternalizeCallback)
Internalize.insert(GV.getName());
Add(GV);
@@ -442,7 +403,7 @@ void ModuleLinker::addLazyFor(GlobalValue &GV, const IRMover::ValueAdder &Add) {
return;
if (!LinkFromSrc)
continue;
- if (shouldInternalizeLinkedSymbols())
+ if (InternalizeCallback)
Internalize.insert(GV2->getName());
Add(*GV2);
}
@@ -571,7 +532,7 @@ bool ModuleLinker::run() {
}
}
- if (shouldInternalizeLinkedSymbols()) {
+ if (InternalizeCallback) {
for (GlobalValue *GV : ValuesToLink)
Internalize.insert(GV->getName());
}
@@ -583,8 +544,7 @@ bool ModuleLinker::run() {
[this](GlobalValue &GV, IRMover::ValueAdder Add) {
addLazyFor(GV, Add);
},
- /* LinkModuleInlineAsm */ !isPerformingImport(),
- /* IsPerformingImport */ isPerformingImport())) {
+ /* IsPerformingImport */ false)) {
handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
HasErrors = true;
@@ -593,19 +553,19 @@ bool ModuleLinker::run() {
if (HasErrors)
return true;
- for (auto &P : Internalize) {
- GlobalValue *GV = DstM.getNamedValue(P.first());
- GV->setLinkage(GlobalValue::InternalLinkage);
- }
+ if (InternalizeCallback)
+ InternalizeCallback(DstM, Internalize);
return false;
}
Linker::Linker(Module &M) : Mover(M) {}
-bool Linker::linkInModule(std::unique_ptr<Module> Src, unsigned Flags,
- DenseSet<const GlobalValue *> *GlobalsToImport) {
- ModuleLinker ModLinker(Mover, std::move(Src), Flags, GlobalsToImport);
+bool Linker::linkInModule(
+ std::unique_ptr<Module> Src, unsigned Flags,
+ std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
+ ModuleLinker ModLinker(Mover, std::move(Src), Flags,
+ std::move(InternalizeCallback));
return ModLinker.run();
}
@@ -618,10 +578,11 @@ bool Linker::linkInModule(std::unique_ptr<Module> Src, unsigned Flags,
/// true is returned and ErrorMsg (if not null) is set to indicate the problem.
/// Upon failure, the Dest module could be in a modified state, and shouldn't be
/// relied on to be consistent.
-bool Linker::linkModules(Module &Dest, std::unique_ptr<Module> Src,
- unsigned Flags) {
+bool Linker::linkModules(
+ Module &Dest, std::unique_ptr<Module> Src, unsigned Flags,
+ std::function<void(Module &, const StringSet<> &)> InternalizeCallback) {
Linker L(Dest);
- return L.linkInModule(std::move(Src), Flags);
+ return L.linkInModule(std::move(Src), Flags, std::move(InternalizeCallback));
}
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 2f1b39e58e33..a86fd383003d 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMMC
MCAsmInfoCOFF.cpp
MCAsmInfoDarwin.cpp
MCAsmInfoELF.cpp
+ MCAsmInfoWasm.cpp
MCAsmStreamer.cpp
MCAssembler.cpp
MCCodeEmitter.cpp
@@ -34,17 +35,21 @@ add_llvm_library(LLVMMC
MCSectionCOFF.cpp
MCSectionELF.cpp
MCSectionMachO.cpp
+ MCSectionWasm.cpp
MCStreamer.cpp
MCSubtargetInfo.cpp
MCSymbol.cpp
MCSymbolELF.cpp
MCTargetOptions.cpp
MCValue.cpp
+ MCWasmObjectTargetWriter.cpp
+ MCWasmStreamer.cpp
MCWin64EH.cpp
MCWinEH.cpp
MachObjectWriter.cpp
StringTableBuilder.cpp
SubtargetFeature.cpp
+ WasmObjectWriter.cpp
WinCOFFObjectWriter.cpp
WinCOFFStreamer.cpp
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
index 9608c2c656b7..8c94e2780998 100644
--- a/lib/MC/ConstantPools.cpp
+++ b/lib/MC/ConstantPools.cpp
@@ -1,4 +1,4 @@
-//===- ConstantPools.cpp - ConstantPool class --*- C++ -*---------===//
+//===- ConstantPools.cpp - ConstantPool class -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,13 +10,16 @@
// This file implements the ConstantPool and AssemblerConstantPools classes.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/MapVector.h"
+
#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Casting.h"
using namespace llvm;
+
//
// ConstantPool implementation
//
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index a8c88dda6936..ee9c25cda94f 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -11,29 +11,49 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Compression.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
#include <vector>
using namespace llvm;
@@ -42,6 +62,7 @@ using namespace llvm;
#define DEBUG_TYPE "reloc-info"
namespace {
+
typedef DenseMap<const MCSectionELF *, uint32_t> SectionIndexMapTy;
class ELFObjectWriter;
@@ -99,8 +120,7 @@ class ELFObjectWriter : public MCObjectWriter {
DenseMap<const MCSymbolELF *, const MCSymbolELF *> Renames;
- llvm::DenseMap<const MCSectionELF *, std::vector<ELFRelocationEntry>>
- Relocations;
+ DenseMap<const MCSectionELF *, std::vector<ELFRelocationEntry>> Relocations;
/// @}
/// @name Symbol Table Data
@@ -144,6 +164,8 @@ public:
bool IsLittleEndian)
: MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW) {}
+ ~ELFObjectWriter() override = default;
+
void reset() override {
Renames.clear();
Relocations.clear();
@@ -152,8 +174,6 @@ public:
MCObjectWriter::reset();
}
- ~ELFObjectWriter() override;
-
void WriteWord(uint64_t W) {
if (is64Bit())
write64(W);
@@ -222,18 +242,18 @@ public:
void writeRelocations(const MCAssembler &Asm, const MCSectionELF &Sec);
+ using MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl;
bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
- bool isWeak(const MCSymbol &Sym) const override;
-
void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
void writeSection(const SectionIndexMapTy &SectionIndexMap,
uint32_t GroupSymbolIndex, uint64_t Offset, uint64_t Size,
const MCSectionELF &Section);
};
+
} // end anonymous namespace
void ELFObjectWriter::align(unsigned Alignment) {
@@ -297,9 +317,6 @@ void SymbolTableWriter::writeSymbol(uint32_t name, uint8_t info, uint64_t value,
++NumWritten;
}
-ELFObjectWriter::~ELFObjectWriter()
-{}
-
// Emit the ELF header.
void ELFObjectWriter::writeHeader(const MCAssembler &Asm) {
// ELF Header
@@ -370,22 +387,6 @@ uint64_t ELFObjectWriter::SymbolValue(const MCSymbol &Sym,
void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
- // Section symbols are used as definitions for undefined symbols with matching
- // names. If there are multiple sections with the same name, the first one is
- // used.
- for (const MCSection &Sec : Asm) {
- const MCSymbol *Begin = Sec.getBeginSymbol();
- if (!Begin)
- continue;
-
- const MCSymbol *Alias = Asm.getContext().lookupSymbol(Begin->getName());
- if (!Alias || !Alias->isUndefined())
- continue;
-
- Renames.insert(
- std::make_pair(cast<MCSymbolELF>(Alias), cast<MCSymbolELF>(Begin)));
- }
-
// The presence of symbol versions causes undefined symbols and
// versions declared with @@@ to be renamed.
for (const MCSymbol &A : Asm.symbols()) {
@@ -900,6 +901,8 @@ void ELFObjectWriter::computeSymbolTable(
StrTabBuilder.finalize();
+ // File symbols are emitted first and handled separately from normal symbols,
+ // i.e. a non-STT_FILE symbol with the same name may appear.
for (const std::string &Name : FileNames)
Writer.writeSymbol(StrTabBuilder.getOffset(Name),
ELF::STT_FILE | ELF::STB_LOCAL, 0, 0, ELF::STV_DEFAULT,
@@ -1037,10 +1040,10 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
setStream(OldStream);
SmallVector<char, 128> CompressedContents;
- zlib::Status Success = zlib::compress(
- StringRef(UncompressedData.data(), UncompressedData.size()),
- CompressedContents);
- if (Success != zlib::StatusOK) {
+ if (Error E = zlib::compress(
+ StringRef(UncompressedData.data(), UncompressedData.size()),
+ CompressedContents)) {
+ consumeError(std::move(E));
getStream() << UncompressedData;
return;
}
@@ -1151,8 +1154,8 @@ void ELFObjectWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
case ELF::SHT_RELA: {
sh_link = SymbolTableIndex;
assert(sh_link && ".symtab not found");
- const MCSectionELF *InfoSection = Section.getAssociatedSection();
- sh_info = SectionIndexMap.lookup(InfoSection);
+ const MCSection *InfoSection = Section.getAssociatedSection();
+ sh_info = SectionIndexMap.lookup(cast<MCSectionELF>(InfoSection));
break;
}
@@ -1172,9 +1175,11 @@ void ELFObjectWriter::writeSection(const SectionIndexMapTy &SectionIndexMap,
break;
}
- if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
- Section.getType() == ELF::SHT_ARM_EXIDX)
- sh_link = SectionIndexMap.lookup(Section.getAssociatedSection());
+ if (Section.getFlags() & ELF::SHF_LINK_ORDER) {
+ const MCSymbol *Sym = Section.getAssociatedSymbol();
+ const MCSectionELF *Sec = cast<MCSectionELF>(&Sym->getSection());
+ sh_link = SectionIndexMap.lookup(Sec);
+ }
WriteSecHdrEntry(StrTabBuilder.getOffset(Section.getSectionName()),
Section.getType(), Section.getFlags(), 0, Offset, Size,
@@ -1298,7 +1303,8 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
// Remember the offset into the file for this section.
uint64_t SecStart = getStream().tell();
- writeRelocations(Asm, *RelSection->getAssociatedSection());
+ writeRelocations(Asm,
+ cast<MCSectionELF>(*RelSection->getAssociatedSection()));
uint64_t SecEnd = getStream().tell();
SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd);
@@ -1351,34 +1357,13 @@ bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const auto &SymA = cast<MCSymbolELF>(SA);
if (IsPCRel) {
assert(!InSet);
- if (::isWeak(SymA))
+ if (isWeak(SymA))
return false;
}
return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
InSet, IsPCRel);
}
-bool ELFObjectWriter::isWeak(const MCSymbol &S) const {
- const auto &Sym = cast<MCSymbolELF>(S);
- if (::isWeak(Sym))
- return true;
-
- // It is invalid to replace a reference to a global in a comdat
- // with a reference to a local since out of comdat references
- // to a local are forbidden.
- // We could try to return false for more cases, like the reference
- // being in the same comdat or Sym being an alias to another global,
- // but it is not clear if it is worth the effort.
- if (Sym.getBinding() != ELF::STB_GLOBAL)
- return false;
-
- if (!Sym.isInSection())
- return false;
-
- const auto &Sec = cast<MCSectionELF>(Sym.getSection());
- return Sec.getGroup();
-}
-
MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
raw_pwrite_stream &OS,
bool IsLittleEndian) {
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 570f764f6642..fc0aa788f6d3 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -1,4 +1,4 @@
-//===-- MCAsmBackend.cpp - Target MC Assembly Backend ----------------------==//
+//===- MCAsmBackend.cpp - Target MC Assembly Backend ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+
using namespace llvm;
-MCAsmBackend::MCAsmBackend() {}
+MCAsmBackend::MCAsmBackend() = default;
-MCAsmBackend::~MCAsmBackend() {}
+MCAsmBackend::~MCAsmBackend() = default;
Optional<MCFixupKind> MCAsmBackend::getFixupKind(StringRef Name) const {
return None;
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 3eb8f50de5a8..b9be685cedc4 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==//
+//===- MCAsmInfo.cpp - Asm Info -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,29 +16,14 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Dwarf.h"
-#include <cctype>
-#include <cstring>
+
using namespace llvm;
MCAsmInfo::MCAsmInfo() {
- PointerSize = 4;
- CalleeSaveStackSlotSize = 4;
-
- IsLittleEndian = true;
- StackGrowsUp = false;
- HasSubsectionsViaSymbols = false;
- HasMachoZeroFillDirective = false;
- HasMachoTBSSDirective = false;
- MaxInstLength = 4;
- MinInstAlignment = 1;
- DollarIsPC = false;
SeparatorString = ";";
CommentString = "#";
LabelSuffix = ":";
- UseAssignmentForEHBegin = false;
- NeedsLocalForSize = false;
PrivateGlobalPrefix = "L";
PrivateLabelPrefix = PrivateGlobalPrefix;
LinkerPrivateGlobalPrefix = "";
@@ -47,10 +32,6 @@ MCAsmInfo::MCAsmInfo() {
Code16Directive = ".code16";
Code32Directive = ".code32";
Code64Directive = ".code64";
- AssemblerDialect = 0;
- AllowAtInName = false;
- SupportsQuotedNames = true;
- UseDataRegionDirectives = false;
ZeroDirective = "\t.zero\t";
AsciiDirective = "\t.ascii\t";
AscizDirective = "\t.asciz\t";
@@ -58,40 +39,8 @@ MCAsmInfo::MCAsmInfo() {
Data16bitsDirective = "\t.short\t";
Data32bitsDirective = "\t.long\t";
Data64bitsDirective = "\t.quad\t";
- SunStyleELFSectionSwitchSyntax = false;
- UsesELFSectionDirectiveForBSS = false;
- AlignmentIsInBytes = true;
- TextAlignFillValue = 0;
- GPRel64Directive = nullptr;
- GPRel32Directive = nullptr;
GlobalDirective = "\t.globl\t";
- SetDirectiveSuppressesReloc = false;
- HasAggressiveSymbolFolding = true;
- COMMDirectiveAlignmentIsInBytes = true;
- LCOMMDirectiveAlignmentType = LCOMM::NoAlignment;
- HasFunctionAlignment = true;
- HasDotTypeDotSizeDirective = true;
- HasSingleParameterDotFile = true;
- HasIdentDirective = false;
- HasNoDeadStrip = false;
- HasAltEntry = false;
WeakDirective = "\t.weak\t";
- WeakRefDirective = nullptr;
- HasWeakDefDirective = false;
- HasWeakDefCanBeHiddenDirective = false;
- HasLinkOnceDirective = false;
- HiddenVisibilityAttr = MCSA_Hidden;
- HiddenDeclarationVisibilityAttr = MCSA_Hidden;
- ProtectedVisibilityAttr = MCSA_Protected;
- SupportsDebugInformation = false;
- ExceptionsType = ExceptionHandling::None;
- WinEHEncodingType = WinEH::EncodingType::Invalid;
- DwarfUsesRelocationsAcrossSections = true;
- DwarfFDESymbolsUseAbsDiff = false;
- DwarfRegNumForCFI = false;
- NeedsDwarfSectionOffsetDirective = false;
- UseParensForSymbolVariant = false;
- UseLogicalShr = true;
// FIXME: Clang's logic should be synced with the logic used to initialize
// this member and the two implementations should be merged.
@@ -107,12 +56,9 @@ MCAsmInfo::MCAsmInfo() {
// - The target subclasses for AArch64, ARM, and X86 handle these cases
UseIntegratedAssembler = false;
PreserveAsmComments = true;
-
- CompressDebugSections = DebugCompressionType::DCT_None;
}
-MCAsmInfo::~MCAsmInfo() {
-}
+MCAsmInfo::~MCAsmInfo() = default;
bool MCAsmInfo::isSectionAtomizableBySymbols(const MCSection &Section) const {
return false;
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 5b9dd2009f8b..85104484fd40 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -1,4 +1,4 @@
-//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===//
+//===- MCAsmInfoCOFF.cpp - COFF asm properties ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,9 +13,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCDirectives.h"
+
using namespace llvm;
-void MCAsmInfoCOFF::anchor() { }
+void MCAsmInfoCOFF::anchor() {}
MCAsmInfoCOFF::MCAsmInfoCOFF() {
// MingW 4.5 and later support .comm with log2 alignment, but .lcomm uses byte
@@ -41,13 +43,10 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
UseLogicalShr = false;
}
-void MCAsmInfoMicrosoft::anchor() { }
-
-MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
-}
+void MCAsmInfoMicrosoft::anchor() {}
-void MCAsmInfoGNUCOFF::anchor() { }
+MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() = default;
-MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
+void MCAsmInfoGNUCOFF::anchor() {}
-}
+MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() = default;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index e95cf488cd30..4b2001764e97 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -1,4 +1,4 @@
-//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===//
+//===- MCAsmInfoDarwin.cpp - Darwin asm properties ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,9 +13,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfoDarwin.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Support/MachO.h"
+
using namespace llvm;
bool MCAsmInfoDarwin::isSectionAtomizableBySymbols(
diff --git a/lib/MC/MCAsmInfoELF.cpp b/lib/MC/MCAsmInfoELF.cpp
index 26e5608d8733..e44c08b50d76 100644
--- a/lib/MC/MCAsmInfoELF.cpp
+++ b/lib/MC/MCAsmInfoELF.cpp
@@ -1,4 +1,4 @@
-//===-- MCAsmInfoELF.cpp - ELF asm properties -------------------*- C++ -*-===//
+//===- MCAsmInfoELF.cpp - ELF asm properties ------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,9 +16,10 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/ELF.h"
+
using namespace llvm;
-void MCAsmInfoELF::anchor() { }
+void MCAsmInfoELF::anchor() {}
MCSection *MCAsmInfoELF::getNonexecutableStackSection(MCContext &Ctx) const {
if (!UsesNonexecutableStackSection)
@@ -31,5 +32,4 @@ MCAsmInfoELF::MCAsmInfoELF() {
WeakRefDirective = "\t.weak\t";
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
- UsesNonexecutableStackSection = true;
}
diff --git a/lib/MC/MCAsmInfoWasm.cpp b/lib/MC/MCAsmInfoWasm.cpp
new file mode 100644
index 000000000000..aa26616dda36
--- /dev/null
+++ b/lib/MC/MCAsmInfoWasm.cpp
@@ -0,0 +1,27 @@
+//===-- MCAsmInfoWasm.cpp - Wasm asm properties -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Wasm-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoWasm.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionWasm.h"
+using namespace llvm;
+
+void MCAsmInfoWasm::anchor() { }
+
+MCAsmInfoWasm::MCAsmInfoWasm() {
+ HasIdentDirective = true;
+ WeakRefDirective = "\t.weak\t";
+ PrivateGlobalPrefix = ".L";
+ PrivateLabelPrefix = ".L";
+}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 817009a65363..9e5553fa8d42 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -103,7 +103,10 @@ public:
void AddComment(const Twine &T, bool EOL = true) override;
/// AddEncodingComment - Add a comment showing the encoding of an instruction.
- void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
+ /// If PrintSchedInfo - is true then the comment sched:[x:y] should
+ // be added to output if it's being supported by target
+ void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &,
+ bool PrintSchedInfo);
/// GetCommentOS - Return a raw_ostream that comments can be written to.
/// Unlike AddComment, you are required to terminate comments with \n if you
@@ -130,7 +133,7 @@ public:
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
void EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override;
- void EmitLabel(MCSymbol *Symbol) override;
+ void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitLinkerOptions(ArrayRef<std::string> Options) override;
@@ -278,7 +281,8 @@ public:
void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) override;
void EmitWinEHHandlerData() override;
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool PrintSchedInfo) override;
void EmitBundleAlignMode(unsigned AlignPow2) override;
void EmitBundleLock(bool AlignToEnd) override;
@@ -392,12 +396,13 @@ void MCAsmStreamer::emitExplicitComments() {
void MCAsmStreamer::ChangeSection(MCSection *Section,
const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
- Section->PrintSwitchToSection(*MAI, OS, Subsection);
+ Section->PrintSwitchToSection(
+ *MAI, getContext().getObjectFileInfo()->getTargetTriple(), OS,
+ Subsection);
}
-void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- MCStreamer::EmitLabel(Symbol);
+void MCAsmStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
+ MCStreamer::EmitLabel(Symbol, Loc);
Symbol->print(OS, MAI);
OS << MAI->getLabelSuffix();
@@ -1503,7 +1508,8 @@ void MCAsmStreamer::EmitWinCFIEndProlog() {
}
void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI,
+ bool PrintSchedInfo) {
raw_ostream &OS = GetCommentOS();
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
@@ -1576,7 +1582,11 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
}
}
- OS << "]\n";
+ OS << "]";
+ // If we are not going to add fixup or schedul comments after this point then
+ // we have to end the current comment line with "\n".
+ if (Fixups.size() || !PrintSchedInfo)
+ OS << "\n";
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
@@ -1587,16 +1597,19 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI,
+ bool PrintSchedInfo) {
assert(getCurrentSectionOnly() &&
"Cannot emit contents before setting section!");
// Show the encoding in a comment if we have a code emitter.
if (Emitter)
- AddEncodingComment(Inst, STI);
+ AddEncodingComment(Inst, STI, PrintSchedInfo);
// Show the MCInst if enabled.
if (ShowInst) {
+ if (PrintSchedInfo)
+ GetCommentOS() << "\n";
Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
GetCommentOS() << "\n";
}
@@ -1606,6 +1619,16 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
else
InstPrinter->printInst(&Inst, OS, "", STI);
+ if (PrintSchedInfo) {
+ std::string SI = STI.getSchedInfoStr(Inst);
+ if (!SI.empty())
+ GetCommentOS() << SI;
+ }
+
+ StringRef Comments = CommentToEmit;
+ if (Comments.size() && Comments.back() != '\n')
+ GetCommentOS() << "\n";
+
EmitEOL();
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 83fcec92e2b5..c2bb7b277181 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -7,36 +7,49 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAssembler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCCodeView.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+#include <cassert>
+#include <cstdint>
#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "assembler"
namespace {
namespace stats {
+
STATISTIC(EmittedFragments, "Number of emitted assembler fragments - total");
STATISTIC(EmittedRelaxableFragments,
"Number of emitted assembler fragments - relaxable");
@@ -55,8 +68,9 @@ STATISTIC(FragmentLayouts, "Number of fragment layouts");
STATISTIC(ObjectBytes, "Number of emitted object file bytes");
STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
-}
-}
+
+} // end namespace stats
+} // end anonymous namespace
// FIXME FIXME FIXME: There are number of places in this file where we convert
// what is a 64-bit assembler value used for computation into a value in the
@@ -73,8 +87,7 @@ MCAssembler::MCAssembler(MCContext &Context, MCAsmBackend &Backend,
VersionMinInfo.Major = 0; // Major version == 0 for "none specified"
}
-MCAssembler::~MCAssembler() {
-}
+MCAssembler::~MCAssembler() = default;
void MCAssembler::reset() {
Sections.clear();
@@ -114,10 +127,16 @@ bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
if (!Symbol->isVariable())
return false;
- // FIXME: It looks like gas supports some cases of the form "foo + 2". It
- // is not clear if that is a bug or a feature.
const MCExpr *Expr = Symbol->getVariableValue();
- const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(Expr);
+
+ MCValue V;
+ if (!Expr->evaluateAsRelocatable(V, nullptr, nullptr))
+ return false;
+
+ if (V.getSymB() || V.getRefKind() != MCSymbolRefExpr::VK_None)
+ return false;
+
+ const MCSymbolRefExpr *Ref = V.getSymA();
if (!Ref)
return false;
@@ -219,7 +238,6 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
Value -= Layout.getSymbolOffset(Sym);
}
-
bool ShouldAlignPC = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
assert((ShouldAlignPC ? IsPCRel : true) &&
@@ -641,7 +659,7 @@ std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout,
void MCAssembler::layout(MCAsmLayout &Layout) {
DEBUG_WITH_TYPE("mc-dump", {
- llvm::errs() << "assembler backend - pre-layout\n--\n";
+ errs() << "assembler backend - pre-layout\n--\n";
dump(); });
// Create dummy fragments and assign section ordinals.
@@ -671,14 +689,14 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
return;
DEBUG_WITH_TYPE("mc-dump", {
- llvm::errs() << "assembler backend - post-relaxation\n--\n";
+ errs() << "assembler backend - post-relaxation\n--\n";
dump(); });
// Finalize the layout, including fragment lowering.
finishLayout(Layout);
DEBUG_WITH_TYPE("mc-dump", {
- llvm::errs() << "assembler backend - final-layout\n--\n";
+ errs() << "assembler backend - final-layout\n--\n";
dump(); });
// Allow the object writer a chance to perform post-layout binding (for
@@ -714,8 +732,8 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
uint64_t FixedValue;
bool IsPCRel;
std::tie(FixedValue, IsPCRel) = handleFixup(Layout, Frag, Fixup);
- getBackend().applyFixup(Fixup, Contents.data(),
- Contents.size(), FixedValue, IsPCRel);
+ getBackend().applyFixup(Fixup, Contents.data(), Contents.size(),
+ FixedValue, IsPCRel, getContext());
}
}
}
@@ -741,6 +759,10 @@ bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
MCValue Target;
uint64_t Value;
bool Resolved = evaluateFixup(Layout, Fixup, DF, Target, Value);
+ if (Target.getSymA() &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_X86_ABS8 &&
+ Fixup.getKind() == FK_Data_1)
+ return false;
return getBackend().fixupNeedsRelaxationAdvanced(Fixup, Resolved, Value, DF,
Layout);
}
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index c122763b2fe5..ca69478ed10d 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===//
+//===- MCCodeEmitter.cpp - Instruction Encoding ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,6 @@
using namespace llvm;
-MCCodeEmitter::MCCodeEmitter() {
-}
+MCCodeEmitter::MCCodeEmitter() = default;
-MCCodeEmitter::~MCCodeEmitter() {
-}
+MCCodeEmitter::~MCCodeEmitter() = default;
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 4798991ceed6..4628d0ab88f3 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -7,30 +7,43 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCContext.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeView.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCLabel.h"
#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbolMachO.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
+#include <cassert>
+#include <cstdlib>
+#include <tuple>
+#include <utility>
using namespace llvm;
@@ -40,19 +53,14 @@ AsSecureLogFileName("as-secure-log-file-name",
"AS_SECURE_LOG_FILE env variable)"),
cl::init(getenv("AS_SECURE_LOG_FILE")), cl::Hidden);
-
MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
const MCObjectFileInfo *mofi, const SourceMgr *mgr,
bool DoAutoReset)
- : SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(),
+ : SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi),
Symbols(Allocator), UsedNames(Allocator),
- CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false),
- GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4),
- AllowTemporaryLabels(true), DwarfCompileUnitID(0),
- AutoReset(DoAutoReset), HadError(false) {
+ CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
+ AutoReset(DoAutoReset) {
SecureLogFile = AsSecureLogFileName;
- SecureLog = nullptr;
- SecureLogUsed = false;
if (SrcMgr && SrcMgr->getNumBuffers())
MainFileName =
@@ -80,7 +88,6 @@ void MCContext::reset() {
MCSubtargetAllocator.DestroyAll();
UsedNames.clear();
Symbols.clear();
- SectionSymbols.clear();
Allocator.Reset();
Instances.clear();
CompilationDir.clear();
@@ -124,18 +131,6 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
return Sym;
}
-MCSymbolELF *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
- MCSymbol *&Sym = SectionSymbols[&Section];
- if (Sym)
- return cast<MCSymbolELF>(Sym);
-
- StringRef Name = Section.getSectionName();
- auto NameIter = UsedNames.insert(std::make_pair(Name, false)).first;
- Sym = new (&*NameIter, *this) MCSymbolELF(&*NameIter, /*isTemporary*/ false);
-
- return cast<MCSymbolELF>(Sym);
-}
-
MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName,
unsigned Idx) {
return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
@@ -162,6 +157,8 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name,
return new (Name, *this) MCSymbolELF(Name, IsTemporary);
case MCObjectFileInfo::IsMachO:
return new (Name, *this) MCSymbolMachO(Name, IsTemporary);
+ case MCObjectFileInfo::IsWasm:
+ return new (Name, *this) MCSymbolWasm(Name, IsTemporary);
}
}
return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name,
@@ -182,7 +179,7 @@ MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
SmallString<128> NewName = Name;
bool AddSuffix = AlwaysAddSuffix;
unsigned &NextUniqueID = NextID[Name];
- for (;;) {
+ while (true) {
if (AddSuffix) {
NewName.resize(Name.size());
raw_svector_ostream(NewName) << NextUniqueID++;
@@ -275,7 +272,6 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section,
unsigned TypeAndAttributes,
unsigned Reserved2, SectionKind Kind,
const char *BeginSymName) {
-
// We unique sections by their segment/section pair. The returned section
// may not have the same flags as the requested section, if so this should be
// diagnosed by the client as an error.
@@ -316,18 +312,53 @@ void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) {
const_cast<MCSectionELF *>(Section)->setSectionName(CachedName);
}
+MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind K,
+ unsigned EntrySize,
+ const MCSymbolELF *Group,
+ unsigned UniqueID,
+ const MCSymbolELF *Associated) {
+ MCSymbolELF *R;
+ MCSymbol *&Sym = Symbols[Section];
+ // A section symbol can not redefine regular symbols. There may be multiple
+ // sections with the same name, in which case the first such section wins.
+ if (Sym && Sym->isDefined() &&
+ (!Sym->isInSection() || Sym->getSection().getBeginSymbol() != Sym))
+ reportError(SMLoc(), "invalid symbol redefinition");
+ if (Sym && Sym->isUndefined()) {
+ R = cast<MCSymbolELF>(Sym);
+ } else {
+ auto NameIter = UsedNames.insert(std::make_pair(Section, false)).first;
+ R = new (&*NameIter, *this) MCSymbolELF(&*NameIter, /*isTemporary*/ false);
+ if (!Sym)
+ Sym = R;
+ }
+ R->setBinding(ELF::STB_LOCAL);
+ R->setType(ELF::STT_SECTION);
+
+ auto *Ret = new (ELFAllocator.Allocate()) MCSectionELF(
+ Section, Type, Flags, K, EntrySize, Group, UniqueID, R, Associated);
+
+ auto *F = new MCDataFragment();
+ Ret->getFragmentList().insert(Ret->begin(), F);
+ F->setParent(Ret);
+ R->setFragment(F);
+
+ return Ret;
+}
+
MCSectionELF *MCContext::createELFRelSection(const Twine &Name, unsigned Type,
unsigned Flags, unsigned EntrySize,
const MCSymbolELF *Group,
- const MCSectionELF *Associated) {
+ const MCSectionELF *RelInfoSection) {
StringMap<bool>::iterator I;
bool Inserted;
std::tie(I, Inserted) =
- ELFRelSecNames.insert(std::make_pair(Name.str(), true));
+ RelSecNames.insert(std::make_pair(Name.str(), true));
- return new (ELFAllocator.Allocate())
- MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(),
- EntrySize, Group, true, nullptr, Associated);
+ return createELFSectionImpl(
+ I->getKey(), Type, Flags, SectionKind::getReadOnly(), EntrySize, Group,
+ true, cast<MCSymbolELF>(RelInfoSection->getBeginSymbol()));
}
MCSectionELF *MCContext::getELFNamedSection(const Twine &Prefix,
@@ -340,21 +371,20 @@ MCSectionELF *MCContext::getELFNamedSection(const Twine &Prefix,
MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type,
unsigned Flags, unsigned EntrySize,
const Twine &Group, unsigned UniqueID,
- const char *BeginSymName) {
+ const MCSymbolELF *Associated) {
MCSymbolELF *GroupSym = nullptr;
if (!Group.isTriviallyEmpty() && !Group.str().empty())
GroupSym = cast<MCSymbolELF>(getOrCreateSymbol(Group));
return getELFSection(Section, Type, Flags, EntrySize, GroupSym, UniqueID,
- BeginSymName, nullptr);
+ Associated);
}
MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type,
unsigned Flags, unsigned EntrySize,
const MCSymbolELF *GroupSym,
unsigned UniqueID,
- const char *BeginSymName,
- const MCSectionELF *Associated) {
+ const MCSymbolELF *Associated) {
StringRef Group = "";
if (GroupSym)
Group = GroupSym->getName();
@@ -375,22 +405,16 @@ MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type,
else
Kind = SectionKind::getReadOnly();
- MCSymbol *Begin = nullptr;
- if (BeginSymName)
- Begin = createTempSymbol(BeginSymName, false);
-
- MCSectionELF *Result = new (ELFAllocator.Allocate())
- MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym, UniqueID,
- Begin, Associated);
+ MCSectionELF *Result = createELFSectionImpl(
+ CachedName, Type, Flags, Kind, EntrySize, GroupSym, UniqueID, Associated);
Entry.second = Result;
return Result;
}
MCSectionELF *MCContext::createELFGroupSection(const MCSymbolELF *Group) {
- MCSectionELF *Result = new (ELFAllocator.Allocate())
- MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
- Group, ~0, nullptr, nullptr);
- return Result;
+ return createELFSectionImpl(".group", ELF::SHT_GROUP, 0,
+ SectionKind::getReadOnly(), 4, Group, ~0,
+ nullptr);
}
MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
@@ -462,6 +486,80 @@ MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec,
"", 0, UniqueID);
}
+void MCContext::renameWasmSection(MCSectionWasm *Section, StringRef Name) {
+ StringRef GroupName;
+ assert(!Section->getGroup() && "not yet implemented");
+
+ unsigned UniqueID = Section->getUniqueID();
+ WasmUniquingMap.erase(
+ WasmSectionKey{Section->getSectionName(), GroupName, UniqueID});
+ auto I = WasmUniquingMap.insert(std::make_pair(
+ WasmSectionKey{Name, GroupName, UniqueID},
+ Section))
+ .first;
+ StringRef CachedName = I->first.SectionName;
+ const_cast<MCSectionWasm *>(Section)->setSectionName(CachedName);
+}
+
+MCSectionWasm *MCContext::createWasmRelSection(const Twine &Name, unsigned Type,
+ unsigned Flags,
+ const MCSymbolWasm *Group) {
+ StringMap<bool>::iterator I;
+ bool Inserted;
+ std::tie(I, Inserted) =
+ RelSecNames.insert(std::make_pair(Name.str(), true));
+
+ return new (WasmAllocator.Allocate())
+ MCSectionWasm(I->getKey(), Type, Flags, SectionKind::getReadOnly(),
+ Group, ~0, nullptr);
+}
+
+MCSectionWasm *MCContext::getWasmNamedSection(const Twine &Prefix,
+ const Twine &Suffix, unsigned Type,
+ unsigned Flags) {
+ return getWasmSection(Prefix + "." + Suffix, Type, Flags, Suffix);
+}
+
+MCSectionWasm *MCContext::getWasmSection(const Twine &Section, unsigned Type,
+ unsigned Flags,
+ const Twine &Group, unsigned UniqueID,
+ const char *BeginSymName) {
+ MCSymbolWasm *GroupSym = nullptr;
+ if (!Group.isTriviallyEmpty() && !Group.str().empty())
+ GroupSym = cast<MCSymbolWasm>(getOrCreateSymbol(Group));
+
+ return getWasmSection(Section, Type, Flags, GroupSym, UniqueID, BeginSymName);
+}
+
+MCSectionWasm *MCContext::getWasmSection(const Twine &Section, unsigned Type,
+ unsigned Flags,
+ const MCSymbolWasm *GroupSym,
+ unsigned UniqueID,
+ const char *BeginSymName) {
+ StringRef Group = "";
+ if (GroupSym)
+ Group = GroupSym->getName();
+ // Do the lookup, if we have a hit, return it.
+ auto IterBool = WasmUniquingMap.insert(
+ std::make_pair(WasmSectionKey{Section.str(), Group, UniqueID}, nullptr));
+ auto &Entry = *IterBool.first;
+ if (!IterBool.second)
+ return Entry.second;
+
+ StringRef CachedName = Entry.first.SectionName;
+
+ SectionKind Kind = SectionKind::getText();
+
+ MCSymbol *Begin = nullptr;
+ if (BeginSymName)
+ Begin = createTempSymbol(BeginSymName, false);
+
+ MCSectionWasm *Result = new (WasmAllocator.Allocate())
+ MCSectionWasm(CachedName, Type, Flags, Kind, GroupSym, UniqueID, Begin);
+ Entry.second = Result;
+ return Result;
+}
+
MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) {
return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI);
}
@@ -510,13 +608,15 @@ CodeViewContext &MCContext::getCVContext() {
void MCContext::reportError(SMLoc Loc, const Twine &Msg) {
HadError = true;
- // If we have a source manager use it. Otherwise just use the generic
- // report_fatal_error().
- if (!SrcMgr)
+ // If we have a source manager use it. Otherwise, try using the inline source
+ // manager.
+ // If that fails, use the generic report_fatal_error().
+ if (SrcMgr)
+ SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+ else if (InlineSrcMgr)
+ InlineSrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+ else
report_fatal_error(Msg, false);
-
- // Use the source manager to print the message.
- SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
}
void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) {
diff --git a/lib/MC/MCDisassembler/MCDisassembler.cpp b/lib/MC/MCDisassembler/MCDisassembler.cpp
index 3a4f7382bd3c..2f1275d00b86 100644
--- a/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -1,4 +1,4 @@
-//===-- MCDisassembler.cpp - Disassembler interface -----------------------===//
+//===- MCDisassembler.cpp - Disassembler interface ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,13 +8,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
-MCDisassembler::~MCDisassembler() {
-}
+MCDisassembler::~MCDisassembler() = default;
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
diff --git a/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index 1612562497d9..5805fd7007d2 100644
--- a/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -1,4 +1,4 @@
-//==-- MCRelocationInfo.cpp ------------------------------------------------==//
+//===-- MCRelocationInfo.cpp ----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,17 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
-#include "llvm-c/Disassembler.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm-c/Disassembler.h"
using namespace llvm;
-MCRelocationInfo::MCRelocationInfo(MCContext &Ctx)
- : Ctx(Ctx) {
-}
+MCRelocationInfo::MCRelocationInfo(MCContext &Ctx) : Ctx(Ctx) {}
-MCRelocationInfo::~MCRelocationInfo() {
-}
+MCRelocationInfo::~MCRelocationInfo() = default;
const MCExpr *
MCRelocationInfo::createExprForCAPIVariantKind(const MCExpr *SubExpr,
diff --git a/lib/MC/MCDisassembler/MCSymbolizer.cpp b/lib/MC/MCDisassembler/MCSymbolizer.cpp
index c0f707d356c1..78e611e3ddda 100644
--- a/lib/MC/MCDisassembler/MCSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCSymbolizer.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/MC/MCSymbolizer.cpp - MCSymbolizer class -----------*- C++ -*-===//
+//===-- llvm/MC/MCSymbolizer.cpp - MCSymbolizer class ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,5 +11,4 @@
using namespace llvm;
-MCSymbolizer::~MCSymbolizer() {
-}
+MCSymbolizer::~MCSymbolizer() = default;
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index a7551a3283a3..cc32e90ad36e 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -7,27 +7,41 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -592,7 +606,6 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
// And the pair of terminating zeros.
Length += 2 * AddrSize;
-
// Emit the header for this section.
// The 4 byte length not including the 4 byte value for the length.
MCOS->EmitIntValue(Length - 4, 4);
@@ -661,7 +674,14 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// The 2 byte DWARF version.
MCOS->EmitIntValue(context.getDwarfVersion(), 2);
+ // The DWARF v5 header has unit type, address size, abbrev offset.
+ // Earlier versions have abbrev offset, address size.
const MCAsmInfo &AsmInfo = *context.getAsmInfo();
+ int AddrSize = AsmInfo.getPointerSize();
+ if (context.getDwarfVersion() >= 5) {
+ MCOS->EmitIntValue(dwarf::DW_UT_compile, 1);
+ MCOS->EmitIntValue(AddrSize, 1);
+ }
// The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
// it is at the start of that section so this is zero.
if (AbbrevSectionSymbol == nullptr)
@@ -669,11 +689,8 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
else
MCOS->EmitSymbolValue(AbbrevSectionSymbol, 4,
AsmInfo.needsDwarfSectionOffsetDirective());
-
- const MCAsmInfo *asmInfo = context.getAsmInfo();
- int AddrSize = asmInfo->getPointerSize();
- // The 1 byte size of an address.
- MCOS->EmitIntValue(AddrSize, 1);
+ if (context.getDwarfVersion() <= 4)
+ MCOS->EmitIntValue(AddrSize, 1);
// Second part: the compile_unit DIE.
@@ -885,7 +902,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
}
}
- assert((RangesSectionSymbol != NULL) || !UseRangesSection);
+ assert((RangesSectionSymbol != nullptr) || !UseRangesSection);
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
@@ -1003,6 +1020,7 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
}
namespace {
+
class FrameEmitterImpl {
int CFAOffset = 0;
int InitialCFAOffset = 0;
@@ -1050,10 +1068,10 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
Streamer.EmitULEB128IntValue(Reg2);
return;
}
- case MCCFIInstruction::OpWindowSave: {
+ case MCCFIInstruction::OpWindowSave:
Streamer.EmitIntValue(dwarf::DW_CFA_GNU_window_save, 1);
return;
- }
+
case MCCFIInstruction::OpUndefined: {
unsigned Reg = Instr.getRegister();
Streamer.EmitIntValue(dwarf::DW_CFA_undefined, 1);
@@ -1087,7 +1105,6 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
return;
}
-
case MCCFIInstruction::OpDefCfaRegister: {
unsigned Reg = Instr.getRegister();
if (!IsEH)
@@ -1097,7 +1114,6 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
return;
}
-
case MCCFIInstruction::OpOffset:
case MCCFIInstruction::OpRelOffset: {
const bool IsRelative =
@@ -1145,11 +1161,11 @@ void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
return;
}
- case MCCFIInstruction::OpGnuArgsSize: {
+ case MCCFIInstruction::OpGnuArgsSize:
Streamer.EmitIntValue(dwarf::DW_CFA_GNU_args_size, 1);
Streamer.EmitULEB128IntValue(Instr.getOffset());
return;
- }
+
case MCCFIInstruction::OpEscape:
Streamer.EmitBytes(Instr.getValues());
return;
@@ -1444,10 +1460,12 @@ void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart,
}
namespace {
+
struct CIEKey {
static const CIEKey getEmptyKey() {
return CIEKey(nullptr, 0, -1, false, false);
}
+
static const CIEKey getTombstoneKey() {
return CIEKey(nullptr, -1, 0, false, false);
}
@@ -1457,23 +1475,28 @@ struct CIEKey {
: Personality(Personality), PersonalityEncoding(PersonalityEncoding),
LsdaEncoding(LsdaEncoding), IsSignalFrame(IsSignalFrame),
IsSimple(IsSimple) {}
+
const MCSymbol *Personality;
unsigned PersonalityEncoding;
unsigned LsdaEncoding;
bool IsSignalFrame;
bool IsSimple;
};
-} // anonymous namespace
+
+} // end anonymous namespace
namespace llvm {
+
template <> struct DenseMapInfo<CIEKey> {
static CIEKey getEmptyKey() { return CIEKey::getEmptyKey(); }
static CIEKey getTombstoneKey() { return CIEKey::getTombstoneKey(); }
+
static unsigned getHashValue(const CIEKey &Key) {
return static_cast<unsigned>(
hash_combine(Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding,
Key.IsSignalFrame, Key.IsSimple));
}
+
static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) {
return LHS.Personality == RHS.Personality &&
LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
@@ -1482,7 +1505,8 @@ template <> struct DenseMapInfo<CIEKey> {
LHS.IsSimple == RHS.IsSimple;
}
};
-} // namespace llvm
+
+} // end namespace llvm
void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
bool IsEH) {
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index de645cac7370..68fb5e7cbb3d 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -7,10 +7,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCValue.h"
using namespace llvm;
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 0ef1b2a8bdca..c8e0223c0573 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -11,30 +11,31 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -42,9 +43,6 @@ bool MCELFStreamer::isBundleLocked() const {
return getCurrentSectionOnly()->isBundleLocked();
}
-MCELFStreamer::~MCELFStreamer() {
-}
-
void MCELFStreamer::mergeFragment(MCDataFragment *DF,
MCDataFragment *EF) {
MCAssembler &Assembler = getAssembler();
@@ -95,11 +93,19 @@ void MCELFStreamer::InitSections(bool NoExecStack) {
SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
}
-void MCELFStreamer::EmitLabel(MCSymbol *S) {
+void MCELFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc) {
auto *Symbol = cast<MCSymbolELF>(S);
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ MCObjectStreamer::EmitLabel(Symbol, Loc);
+
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(*getCurrentSectionOnly());
+ if (Section.getFlags() & ELF::SHF_TLS)
+ Symbol->setType(ELF::STT_TLS);
+}
- MCObjectStreamer::EmitLabel(Symbol);
+void MCELFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc, MCFragment *F) {
+ auto *Symbol = cast<MCSymbolELF>(S);
+ MCObjectStreamer::EmitLabel(Symbol, Loc, F);
const MCSectionELF &Section =
static_cast<const MCSectionELF &>(*getCurrentSectionOnly());
@@ -147,17 +153,8 @@ void MCELFStreamer::ChangeSection(MCSection *Section,
if (Grp)
Asm.registerSymbol(*Grp);
- this->MCObjectStreamer::ChangeSection(Section, Subsection);
- MCContext &Ctx = getContext();
- auto *Begin = cast_or_null<MCSymbolELF>(Section->getBeginSymbol());
- if (!Begin) {
- Begin = Ctx.getOrCreateSectionSymbol(*SectionELF);
- Section->setBeginSymbol(Begin);
- }
- if (Begin->isUndefined()) {
- Asm.registerSymbol(*Begin);
- Begin->setType(ELF::STT_SECTION);
- }
+ changeSectionImpl(Section, Subsection);
+ Asm.registerSymbol(*Section->getBeginSymbol());
}
void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
@@ -361,13 +358,6 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
ValueSize, MaxBytesToEmit);
}
-// Add a symbol for the file name of this module. They start after the
-// null symbol and don't count as normal symbol, i.e. a non-STT_FILE symbol
-// with the same name may appear.
-void MCELFStreamer::EmitFileDirective(StringRef Filename) {
- getAssembler().addFileName(Filename);
-}
-
void MCELFStreamer::EmitIdent(StringRef IdentString) {
MCSection *Comment = getAssembler().getContext().getELFSection(
".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
@@ -630,15 +620,6 @@ void MCELFStreamer::FinishImpl() {
this->MCObjectStreamer::FinishImpl();
}
-MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_pwrite_stream &OS, MCCodeEmitter *CE,
- bool RelaxAll) {
- MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
- if (RelaxAll)
- S->getAssembler().setRelaxAll(true);
- return S;
-}
-
void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
llvm_unreachable("Generic ELF doesn't support this directive");
}
@@ -647,22 +628,6 @@ void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
llvm_unreachable("ELF doesn't support this directive");
}
-void MCELFStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
- llvm_unreachable("ELF doesn't support this directive");
-}
-
-void MCELFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
- llvm_unreachable("ELF doesn't support this directive");
-}
-
-void MCELFStreamer::EmitCOFFSymbolType(int Type) {
- llvm_unreachable("ELF doesn't support this directive");
-}
-
-void MCELFStreamer::EndCOFFSymbolDef() {
- llvm_unreachable("ELF doesn't support this directive");
-}
-
void MCELFStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
llvm_unreachable("ELF doesn't support this directive");
@@ -672,3 +637,12 @@ void MCELFStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
llvm_unreachable("ELF doesn't support this directive");
}
+
+MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index bcc43a54d620..8149aa27327c 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -7,28 +7,35 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCExpr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "mcexpr"
namespace {
namespace stats {
+
STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
-}
-}
+
+} // end namespace stats
+} // end anonymous namespace
void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
switch (getKind()) {
@@ -44,7 +51,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
// Parenthesize names that start with $ so that they don't look like
// absolute names.
bool UseParens =
- !InParens && Sym.getName().size() && Sym.getName()[0] == '$';
+ !InParens && !Sym.getName().empty() && Sym.getName()[0] == '$';
if (UseParens) {
OS << '(';
Sym.print(OS, MAI);
@@ -129,21 +136,24 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
llvm_unreachable("Invalid expression kind!");
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCExpr::dump() const {
dbgs() << *this;
dbgs() << '\n';
}
+#endif
/* *** */
const MCBinaryExpr *MCBinaryExpr::create(Opcode Opc, const MCExpr *LHS,
- const MCExpr *RHS, MCContext &Ctx) {
- return new (Ctx) MCBinaryExpr(Opc, LHS, RHS);
+ const MCExpr *RHS, MCContext &Ctx,
+ SMLoc Loc) {
+ return new (Ctx) MCBinaryExpr(Opc, LHS, RHS, Loc);
}
const MCUnaryExpr *MCUnaryExpr::create(Opcode Opc, const MCExpr *Expr,
- MCContext &Ctx) {
- return new (Ctx) MCUnaryExpr(Opc, Expr);
+ MCContext &Ctx, SMLoc Loc) {
+ return new (Ctx) MCUnaryExpr(Opc, Expr, Loc);
}
const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) {
@@ -153,8 +163,8 @@ const MCConstantExpr *MCConstantExpr::create(int64_t Value, MCContext &Ctx) {
/* *** */
MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind,
- const MCAsmInfo *MAI)
- : MCExpr(MCExpr::SymbolRef), Kind(Kind),
+ const MCAsmInfo *MAI, SMLoc Loc)
+ : MCExpr(MCExpr::SymbolRef, Loc), Kind(Kind),
UseParensForSymbolVariant(MAI->useParensForSymbolVariant()),
HasSubsectionsViaSymbols(MAI->hasSubsectionsViaSymbols()),
Symbol(Symbol) {
@@ -163,8 +173,8 @@ MCSymbolRefExpr::MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind,
const MCSymbolRefExpr *MCSymbolRefExpr::create(const MCSymbol *Sym,
VariantKind Kind,
- MCContext &Ctx) {
- return new (Ctx) MCSymbolRefExpr(Sym, Kind, Ctx.getAsmInfo());
+ MCContext &Ctx, SMLoc Loc) {
+ return new (Ctx) MCSymbolRefExpr(Sym, Kind, Ctx.getAsmInfo(), Loc);
}
const MCSymbolRefExpr *MCSymbolRefExpr::create(StringRef Name, VariantKind Kind,
@@ -205,6 +215,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_SECREL: return "SECREL32";
case VK_SIZE: return "SIZE";
case VK_WEAKREF: return "WEAKREF";
+ case VK_X86_ABS8: return "ABS8";
case VK_ARM_NONE: return "none";
case VK_ARM_GOT_PREL: return "GOT_PREL";
case VK_ARM_TARGET1: return "target1";
@@ -275,6 +286,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Hexagon_IE: return "IE";
case VK_Hexagon_IE_GOT: return "IEGOT";
case VK_WebAssembly_FUNCTION: return "FUNCTION";
+ case VK_WebAssembly_TYPEINDEX: return "TYPEINDEX";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
case VK_AMDGPU_REL32_LO: return "rel32@lo";
@@ -314,6 +326,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("imgrel", VK_COFF_IMGREL32)
.Case("secrel32", VK_SECREL)
.Case("size", VK_SIZE)
+ .Case("abs8", VK_X86_ABS8)
.Case("l", VK_PPC_LO)
.Case("h", VK_PPC_HI)
.Case("ha", VK_PPC_HA)
diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp
index 8ff8f8aba1c1..90b44177cf5e 100644
--- a/lib/MC/MCFragment.cpp
+++ b/lib/MC/MCFragment.cpp
@@ -7,30 +7,29 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCFragment.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
using namespace llvm;
-MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
- : Assembler(Asm), LastValidFragment()
- {
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm) : Assembler(Asm) {
// Compute the section layout order. Virtual sections must go last.
for (MCSection &Sec : Asm)
if (!Sec.isVirtualSection())
@@ -145,14 +144,14 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
MCValue Value;
if (!Expr->evaluateAsValue(Value, *this)) {
Assembler.getContext().reportError(
- SMLoc(), "expression could not be evaluated");
+ Expr->getLoc(), "expression could not be evaluated");
return nullptr;
}
const MCSymbolRefExpr *RefB = Value.getSymB();
if (RefB) {
Assembler.getContext().reportError(
- SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
+ Expr->getLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
"' could not be evaluated in a subtraction expression");
return nullptr;
}
@@ -164,8 +163,7 @@ const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
const MCSymbol &ASym = A->getSymbol();
const MCAssembler &Asm = getAssembler();
if (ASym.isCommon()) {
- // FIXME: we should probably add a SMLoc to MCExpr.
- Asm.getContext().reportError(SMLoc(),
+ Asm.getContext().reportError(Expr->getLoc(),
"Common symbol '" + ASym.getName() +
"' cannot be used in assignment expr");
return nullptr;
@@ -234,7 +232,7 @@ uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
void ilist_alloc_traits<MCFragment>::deleteNode(MCFragment *V) { V->destroy(); }
-MCFragment::~MCFragment() { }
+MCFragment::~MCFragment() = default;
MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
uint8_t BundlePadding, MCSection *Parent)
@@ -295,8 +293,6 @@ void MCFragment::destroy() {
}
}
-/* *** */
-
// Debugging methods
namespace llvm {
@@ -308,10 +304,11 @@ raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
return OS;
}
-}
+} // end namespace llvm
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCFragment::dump() {
- raw_ostream &OS = llvm::errs();
+ raw_ostream &OS = errs();
OS << "<";
switch (getKind()) {
@@ -449,7 +446,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() {
}
LLVM_DUMP_METHOD void MCAssembler::dump() {
- raw_ostream &OS = llvm::errs();
+ raw_ostream &OS = errs();
OS << "<MCAssembler\n";
OS << " Sections:[\n ";
@@ -469,3 +466,4 @@ LLVM_DUMP_METHOD void MCAssembler::dump() {
}
OS << "]>\n";
}
+#endif
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 2da8ecc4ff6a..f6d1d3cffca0 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -34,10 +35,12 @@ void MCOperand::print(raw_ostream &OS) const {
OS << ">";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCOperand::dump() const {
print(dbgs());
dbgs() << "\n";
}
+#endif
void MCInst::print(raw_ostream &OS) const {
OS << "<MCInst " << getOpcode();
@@ -63,7 +66,9 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCInstPrinter *Printer,
OS << ">";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCInst::dump() const {
print(dbgs());
dbgs() << "\n";
}
+#endif
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 23afe8054840..912179095974 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===//
+//===- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,13 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include <cinttypes>
+#include <cstdint>
+
using namespace llvm;
void llvm::dumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS) {
@@ -25,8 +29,7 @@ void llvm::dumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS) {
}
}
-MCInstPrinter::~MCInstPrinter() {
-}
+MCInstPrinter::~MCInstPrinter() = default;
/// getOpcodeName - Return the name of the specified opcode enum (e.g.
/// "MOV32ri") or empty if we can't resolve it.
@@ -68,7 +71,7 @@ StringRef MCInstPrinter::markup(StringRef a, StringRef b) const {
// For asm-style hex (e.g. 0ffh) the first digit always has to be a number.
static bool needsLeadingZero(uint64_t Value)
{
- while(Value)
+ while (Value)
{
uint64_t digit = (Value >> 60) & 0xf;
if (digit != 0)
diff --git a/lib/MC/MCInstrAnalysis.cpp b/lib/MC/MCInstrAnalysis.cpp
index 2d8336d77ac7..566944c53548 100644
--- a/lib/MC/MCInstrAnalysis.cpp
+++ b/lib/MC/MCInstrAnalysis.cpp
@@ -1,4 +1,4 @@
-//===-- MCInstrAnalysis.cpp - InstrDesc target hooks ------------*- C++ -*-===//
+//===- MCInstrAnalysis.cpp - InstrDesc target hooks -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include <cstdint>
+
using namespace llvm;
bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp
index b443cbbbf43e..db25a46fce18 100644
--- a/lib/MC/MCLabel.cpp
+++ b/lib/MC/MCLabel.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===//
+//===- lib/MC/MCLabel.cpp - MCLabel implementation ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,14 +8,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCLabel.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
void MCLabel::print(raw_ostream &OS) const {
OS << '"' << getInstance() << '"';
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCLabel::dump() const {
print(dbgs());
}
+#endif
diff --git a/lib/MC/MCLinkerOptimizationHint.cpp b/lib/MC/MCLinkerOptimizationHint.cpp
index f71fc7830129..97f95418e054 100644
--- a/lib/MC/MCLinkerOptimizationHint.cpp
+++ b/lib/MC/MCLinkerOptimizationHint.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/MC/MCLinkerOptimizationHint.cpp ----- LOH handling -*- C++ -*-===//
+//===- llvm/MC/MCLinkerOptimizationHint.cpp ----- LOH handling ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,9 +9,11 @@
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <cstdint>
using namespace llvm;
@@ -41,14 +43,14 @@ void MCLOHDirective::emit(MachObjectWriter &ObjWriter,
uint64_t MCLOHDirective::getEmitSize(const MachObjectWriter &ObjWriter,
const MCAsmLayout &Layout) const {
class raw_counting_ostream : public raw_ostream {
- uint64_t Count;
+ uint64_t Count = 0;
void write_impl(const char *, size_t size) override { Count += size; }
uint64_t current_pos() const override { return Count; }
public:
- raw_counting_ostream() : Count(0) {}
+ raw_counting_ostream() = default;
~raw_counting_ostream() override { flush(); }
};
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index bd425bb73093..1e9ef4163256 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- MCMachOStreamer.cpp - MachO Streamer ------------------------------===//
+//===- MCMachOStreamer.cpp - MachO Streamer -------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,27 +7,35 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <vector>
using namespace llvm;
@@ -70,7 +78,7 @@ public:
/// @{
void ChangeSection(MCSection *Sect, const MCExpr *Subsect) override;
- void EmitLabel(MCSymbol *Symbol) override;
+ void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override;
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
@@ -83,18 +91,7 @@ public:
void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {
- llvm_unreachable("macho doesn't support this directive");
- }
- void EmitCOFFSymbolStorageClass(int StorageClass) override {
- llvm_unreachable("macho doesn't support this directive");
- }
- void EmitCOFFSymbolType(int Type) override {
- llvm_unreachable("macho doesn't support this directive");
- }
- void EndCOFFSymbolDef() override {
- llvm_unreachable("macho doesn't support this directive");
- }
+
void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
@@ -102,13 +99,6 @@ public:
void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment = 0) override;
- void EmitFileDirective(StringRef Filename) override {
- // FIXME: Just ignore the .file; it isn't important enough to fail the
- // entire assembly.
-
- // report_fatal_error("unsupported directive: '.file'");
- }
-
void EmitIdent(StringRef IdentString) override {
llvm_unreachable("macho doesn't support this directive");
}
@@ -152,7 +142,7 @@ static bool canGoAfterDWARF(const MCSectionMachO &MSec) {
void MCMachOStreamer::ChangeSection(MCSection *Section,
const MCExpr *Subsection) {
// Change the section normally.
- bool Created = MCObjectStreamer::changeSectionImpl(Section, Subsection);
+ bool Created = changeSectionImpl(Section, Subsection);
const MCSectionMachO &MSec = *cast<MCSectionMachO>(Section);
StringRef SegName = MSec.getSegmentName();
if (SegName == "__DWARF")
@@ -181,15 +171,13 @@ void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
}
-void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
-
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
// We have to create a new fragment if this is an atom defining symbol,
// fragments cannot span atoms.
if (getAssembler().isSymbolLinkerVisible(*Symbol))
insert(new MCDataFragment());
- MCObjectStreamer::EmitLabel(Symbol);
+ MCObjectStreamer::EmitLabel(Symbol, Loc);
// This causes the reference type flag to be cleared. Darwin 'as' was "trying"
// to clear the weak reference and weak definition bits too, but the
diff --git a/lib/MC/MCMachObjectTargetWriter.cpp b/lib/MC/MCMachObjectTargetWriter.cpp
index 4ffd6a78a61f..8809a3c320f8 100644
--- a/lib/MC/MCMachObjectTargetWriter.cpp
+++ b/lib/MC/MCMachObjectTargetWriter.cpp
@@ -1,4 +1,4 @@
-//===-- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass ------===//
+//===- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,4 +16,4 @@ MCMachObjectTargetWriter::MCMachObjectTargetWriter(bool Is64Bit_,
uint32_t CPUSubtype_)
: Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_) {}
-MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {}
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() = default;
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index eb2d91254b34..d156f5d05a31 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -34,6 +34,10 @@ namespace {
void EmitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
uint64_t Size = 0, unsigned ByteAlignment = 0) override {}
void EmitGPRel32Value(const MCExpr *Value) override {}
+ void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void EmitCOFFSymbolStorageClass(int StorageClass) override {}
+ void EmitCOFFSymbolType(int Type) override {}
+ void EndCOFFSymbolDef() override {}
};
}
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 8fd71f62e4e5..9f94264684f9 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -16,7 +16,9 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/Support/COFF.h"
+#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -505,68 +507,75 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) {
COFFDebugSymbolsSection = nullptr;
COFFDebugTypesSection = nullptr;
+ unsigned DebugSecType = ELF::SHT_PROGBITS;
+
+ // MIPS .debug_* sections should have SHT_MIPS_DWARF section type
+ // to distinguish among sections contain DWARF and ECOFF debug formats.
+ // Sections with ECOFF debug format are obsoleted and marked by SHT_PROGBITS.
+ if (T.getArch() == Triple::mips || T.getArch() == Triple::mipsel ||
+ T.getArch() == Triple::mips64 || T.getArch() == Triple::mips64el)
+ DebugSecType = ELF::SHT_MIPS_DWARF;
+
// Debug Info Sections.
- DwarfAbbrevSection = Ctx->getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
- "section_abbrev");
- DwarfInfoSection =
- Ctx->getELFSection(".debug_info", ELF::SHT_PROGBITS, 0, "section_info");
- DwarfLineSection = Ctx->getELFSection(".debug_line", ELF::SHT_PROGBITS, 0);
- DwarfFrameSection = Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0);
+ DwarfAbbrevSection =
+ Ctx->getELFSection(".debug_abbrev", DebugSecType, 0);
+ DwarfInfoSection = Ctx->getELFSection(".debug_info", DebugSecType, 0);
+ DwarfLineSection = Ctx->getELFSection(".debug_line", DebugSecType, 0);
+ DwarfFrameSection = Ctx->getELFSection(".debug_frame", DebugSecType, 0);
DwarfPubNamesSection =
- Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_pubnames", DebugSecType, 0);
DwarfPubTypesSection =
- Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_pubtypes", DebugSecType, 0);
DwarfGnuPubNamesSection =
- Ctx->getELFSection(".debug_gnu_pubnames", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_gnu_pubnames", DebugSecType, 0);
DwarfGnuPubTypesSection =
- Ctx->getELFSection(".debug_gnu_pubtypes", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_gnu_pubtypes", DebugSecType, 0);
DwarfStrSection =
- Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS,
+ Ctx->getELFSection(".debug_str", DebugSecType,
ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
- DwarfLocSection = Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0);
+ DwarfLocSection = Ctx->getELFSection(".debug_loc", DebugSecType, 0);
DwarfARangesSection =
- Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_aranges", DebugSecType, 0);
DwarfRangesSection =
- Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range");
- DwarfMacinfoSection = Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS,
- 0, "debug_macinfo");
+ Ctx->getELFSection(".debug_ranges", DebugSecType, 0);
+ DwarfMacinfoSection =
+ Ctx->getELFSection(".debug_macinfo", DebugSecType, 0);
// DWARF5 Experimental Debug Info
// Accelerator Tables
DwarfAccelNamesSection =
- Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0, "names_begin");
+ Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0);
DwarfAccelObjCSection =
- Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0, "objc_begin");
- DwarfAccelNamespaceSection = Ctx->getELFSection(
- ".apple_namespaces", ELF::SHT_PROGBITS, 0, "namespac_begin");
+ Ctx->getELFSection(".apple_objc", ELF::SHT_PROGBITS, 0);
+ DwarfAccelNamespaceSection =
+ Ctx->getELFSection(".apple_namespaces", ELF::SHT_PROGBITS, 0);
DwarfAccelTypesSection =
- Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0, "types_begin");
+ Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0);
// Fission Sections
DwarfInfoDWOSection =
- Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_info.dwo", DebugSecType, 0);
DwarfTypesDWOSection =
- Ctx->getELFSection(".debug_types.dwo", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_types.dwo", DebugSecType, 0);
DwarfAbbrevDWOSection =
- Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_abbrev.dwo", DebugSecType, 0);
DwarfStrDWOSection =
- Ctx->getELFSection(".debug_str.dwo", ELF::SHT_PROGBITS,
+ Ctx->getELFSection(".debug_str.dwo", DebugSecType,
ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
DwarfLineDWOSection =
- Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_line.dwo", DebugSecType, 0);
DwarfLocDWOSection =
- Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0, "skel_loc");
+ Ctx->getELFSection(".debug_loc.dwo", DebugSecType, 0);
DwarfStrOffDWOSection =
- Ctx->getELFSection(".debug_str_offsets.dwo", ELF::SHT_PROGBITS, 0);
- DwarfAddrSection =
- Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, "addr_sec");
+ Ctx->getELFSection(".debug_str_offsets.dwo", DebugSecType, 0);
+ DwarfAddrSection = Ctx->getELFSection(".debug_addr", DebugSecType, 0);
// DWP Sections
DwarfCUIndexSection =
- Ctx->getELFSection(".debug_cu_index", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_cu_index", DebugSecType, 0);
DwarfTUIndexSection =
- Ctx->getELFSection(".debug_tu_index", ELF::SHT_PROGBITS, 0);
+ Ctx->getELFSection(".debug_tu_index", DebugSecType, 0);
StackMapSection =
Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
@@ -799,6 +808,30 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
SectionKind::getReadOnly());
}
+void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
+ // TODO: Set the section types and flags.
+ TextSection = Ctx->getWasmSection(".text", 0, 0);
+ DataSection = Ctx->getWasmSection(".data", 0, 0);
+
+ // TODO: Set the section types and flags.
+ DwarfLineSection = Ctx->getWasmSection(".debug_line", 0, 0);
+ DwarfStrSection = Ctx->getWasmSection(".debug_str", 0, 0);
+ DwarfLocSection = Ctx->getWasmSection(".debug_loc", 0, 0);
+ DwarfAbbrevSection = Ctx->getWasmSection(".debug_abbrev", 0, 0, "section_abbrev");
+ DwarfARangesSection = Ctx->getWasmSection(".debug_aranges", 0, 0);
+ DwarfRangesSection = Ctx->getWasmSection(".debug_ranges", 0, 0, "debug_range");
+ DwarfMacinfoSection = Ctx->getWasmSection(".debug_macinfo", 0, 0, "debug_macinfo");
+ DwarfAddrSection = Ctx->getWasmSection(".debug_addr", 0, 0);
+ DwarfCUIndexSection = Ctx->getWasmSection(".debug_cu_index", 0, 0);
+ DwarfTUIndexSection = Ctx->getWasmSection(".debug_tu_index", 0, 0);
+ DwarfInfoSection = Ctx->getWasmSection(".debug_info", 0, 0, "section_info");
+ DwarfFrameSection = Ctx->getWasmSection(".debug_frame", 0, 0);
+ DwarfPubNamesSection = Ctx->getWasmSection(".debug_pubnames", 0, 0);
+ DwarfPubTypesSection = Ctx->getWasmSection(".debug_pubtypes", 0, 0);
+
+ // TODO: Define more sections.
+}
+
void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
CodeModel::Model cm,
MCContext &ctx) {
@@ -843,6 +876,10 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple, bool PIC,
Env = IsELF;
initELFMCObjectFileInfo(TT);
break;
+ case Triple::Wasm:
+ Env = IsWasm;
+ initWasmMCObjectFileInfo(TT);
+ break;
case Triple::UnknownObjectFormat:
report_fatal_error("Cannot initialize MC for unknown object file format.");
break;
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index cae5c1f8d156..f7f2253256eb 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -153,8 +153,8 @@ void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
EmitLabel(Frame.End);
}
-void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
- MCStreamer::EmitLabel(Symbol);
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
+ MCStreamer::EmitLabel(Symbol, Loc);
getAssembler().registerSymbol(*Symbol);
@@ -171,6 +171,16 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
}
}
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc, MCFragment *F) {
+ MCStreamer::EmitLabel(Symbol, Loc);
+ getAssembler().registerSymbol(*Symbol);
+ auto *DF = dyn_cast_or_null<MCDataFragment>(F);
+ if (DF)
+ Symbol->setFragment(F);
+ else
+ PendingLabels.push_back(Symbol);
+}
+
void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
int64_t IntValue;
if (Value->evaluateAsAbsolute(IntValue, getAssembler())) {
@@ -203,6 +213,7 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
const MCExpr *Subsection) {
assert(Section && "Cannot switch to a null section!");
flushPendingLabels(nullptr);
+ getContext().clearDwarfLocSeen();
bool Created = getAssembler().registerSection(*Section);
@@ -227,7 +238,7 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
}
void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
MCStreamer::EmitInstruction(Inst, STI);
MCSection *Sec = getCurrentSectionOnly();
@@ -490,8 +501,8 @@ void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
- DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
- Value, FK_GPRel_4));
+ DF->getFixups().push_back(
+ MCFixup::create(DF->getContents().size(), Value, FK_GPRel_4));
DF->getContents().resize(DF->getContents().size() + 4, 0);
}
@@ -500,8 +511,8 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
- DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
- Value, FK_GPRel_4));
+ DF->getFixups().push_back(
+ MCFixup::create(DF->getContents().size(), Value, FK_GPRel_4));
DF->getContents().resize(DF->getContents().size() + 8, 0);
}
@@ -572,6 +583,10 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
MCStreamer::emitFill(IntNumValues, Size, Expr);
}
+void MCObjectStreamer::EmitFileDirective(StringRef Filename) {
+ getAssembler().addFileName(Filename);
+}
+
void MCObjectStreamer::FinishImpl() {
// If we are generating dwarf for assembly source files dump out the sections.
if (getContext().getGenDwarfForAssembly())
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index e84f74ae81d6..478b4e84e74a 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -8,14 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSymbol.h"
using namespace llvm;
-MCObjectWriter::~MCObjectWriter() {
-}
+MCObjectWriter::~MCObjectWriter() = default;
bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(
const MCAssembler &Asm, const MCSymbolRefExpr *A, const MCSymbolRefExpr *B,
@@ -51,5 +51,3 @@ bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
// On ELF and COFF A - B is absolute if A and B are in the same section.
return &SecA == &SecB;
}
-
-bool MCObjectWriter::isWeak(const MCSymbol &) const { return false; }
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 87ecf9e0227f..38dadfe62135 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SaveAndRestore.h"
@@ -30,15 +30,11 @@
using namespace llvm;
-AsmLexer::AsmLexer(const MCAsmInfo &MAI)
- : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true),
- IsAtStartOfStatement(true), IsParsingMSInlineAsm(false),
- IsPeeking(false) {
+AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
}
-AsmLexer::~AsmLexer() {
-}
+AsmLexer::~AsmLexer() = default;
void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
CurBuf = Buf;
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index da54155b3b9d..e65ce9f0b936 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -35,6 +35,7 @@
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCAsmParserUtils.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
@@ -42,6 +43,7 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -55,6 +57,7 @@
#include <algorithm>
#include <cassert>
#include <cctype>
+#include <climits>
#include <cstddef>
#include <cstdint>
#include <deque>
@@ -67,7 +70,7 @@
using namespace llvm;
-MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {}
+MCAsmParserSemaCallback::~MCAsmParserSemaCallback() = default;
static cl::opt<unsigned> AsmMacroMaxNestingDepth(
"asm-macro-max-nesting-depth", cl::init(20), cl::Hidden,
@@ -82,10 +85,10 @@ typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
struct MCAsmMacroParameter {
StringRef Name;
MCAsmMacroArgument Value;
- bool Required;
- bool Vararg;
+ bool Required = false;
+ bool Vararg = false;
- MCAsmMacroParameter() : Required(false), Vararg(false) {}
+ MCAsmMacroParameter() = default;
};
typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
@@ -124,23 +127,20 @@ struct ParseStatementInfo {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
/// \brief The opcode from the last parsed instruction.
- unsigned Opcode;
+ unsigned Opcode = ~0U;
/// \brief Was there an error parsing the inline assembly?
- bool ParseError;
+ bool ParseError = false;
- SmallVectorImpl<AsmRewrite> *AsmRewrites;
+ SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
- ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(nullptr) {}
+ ParseStatementInfo() = default;
ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
- : Opcode(~0), ParseError(false), AsmRewrites(rewrites) {}
+ : AsmRewrites(rewrites) {}
};
/// \brief The concrete assembly parser instance.
class AsmParser : public MCAsmParser {
- AsmParser(const AsmParser &) = delete;
- void operator=(const AsmParser &) = delete;
-
private:
AsmLexer Lexer;
MCContext &Ctx;
@@ -199,17 +199,19 @@ private:
unsigned LastQueryLine;
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
- unsigned AssemblerDialect;
+ unsigned AssemblerDialect = ~0U;
/// \brief is Darwin compatibility enabled?
- bool IsDarwin;
+ bool IsDarwin = false;
/// \brief Are we parsing ms-style inline assembly?
- bool ParsingInlineAsm;
+ bool ParsingInlineAsm = false;
public:
AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
- const MCAsmInfo &MAI);
+ const MCAsmInfo &MAI, unsigned CB);
+ AsmParser(const AsmParser &) = delete;
+ AsmParser &operator=(const AsmParser &) = delete;
~AsmParser() override;
bool Run(bool NoInitialTextSection, bool NoFinalize = false) override;
@@ -223,7 +225,6 @@ public:
DirectiveKindMap[Directive] = DirectiveKindMap[Alias];
}
-public:
/// @name MCAsmParser Interface
/// {
@@ -258,7 +259,7 @@ public:
bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
- SmallVectorImpl<std::pair<void *,bool> > &OpDecls,
+ SmallVectorImpl<std::pair<void *,bool>> &OpDecls,
SmallVectorImpl<std::string> &Constraints,
SmallVectorImpl<std::string> &Clobbers,
const MCInstrInfo *MII, const MCInstPrinter *IP,
@@ -572,11 +573,9 @@ extern MCAsmParserExtension *createCOFFAsmParser();
enum { DEFAULT_ADDRSPACE = 0 };
AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
- const MCAsmInfo &MAI)
+ const MCAsmInfo &MAI, unsigned CB = 0)
: Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
- PlatformParser(nullptr), CurBuffer(SM.getMainFileID()),
- MacrosEnabledFlag(true), CppHashInfo(), AssemblerDialect(~0U),
- IsDarwin(false), ParsingInlineAsm(false) {
+ CurBuffer(CB ? CB : SM.getMainFileID()), MacrosEnabledFlag(true) {
HadError = false;
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
@@ -597,6 +596,9 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
case MCObjectFileInfo::IsELF:
PlatformParser.reset(createELFAsmParser());
break;
+ case MCObjectFileInfo::IsWasm:
+ llvm_unreachable("Wasm parsing not supported yet");
+ break;
}
PlatformParser->Initialize(*this);
@@ -608,6 +610,10 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
AsmParser::~AsmParser() {
assert((HadError || ActiveMacros.empty()) &&
"Unexpected active macro instantiation!");
+
+ // Restore the saved diagnostics handler and context for use during
+ // finalization.
+ SrcMgr.setDiagHandler(SavedDiagHandler, SavedDiagContext);
}
void AsmParser::printMacroInstantiations() {
@@ -918,7 +924,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::createLNot(Res, getContext());
+ Res = MCUnaryExpr::createLNot(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Dollar:
case AsmToken::At:
@@ -979,7 +985,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
// Lookup the symbol variant if used.
- if (Split.second.size()) {
+ if (!Split.second.empty()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
if (Variant != MCSymbolRefExpr::VK_Invalid) {
SymbolName = Split.first;
@@ -1005,7 +1011,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
// Otherwise create a symbol ref.
- Res = MCSymbolRefExpr::create(Sym, Variant, getContext());
+ Res = MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc);
return false;
}
case AsmToken::BigNum:
@@ -1071,19 +1077,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::createMinus(Res, getContext());
+ Res = MCUnaryExpr::createMinus(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Plus:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::createPlus(Res, getContext());
+ Res = MCUnaryExpr::createPlus(Res, getContext(), FirstTokenLoc);
return false;
case AsmToken::Tilde:
Lex(); // Eat the operator.
if (parsePrimaryExpr(Res, EndLoc))
return true;
- Res = MCUnaryExpr::createNot(Res, getContext());
+ Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc);
return false;
// MIPS unary expression operators. The lexer won't generate these tokens if
// MCAsmInfo::HasMipsExpressions is false for the target.
@@ -1436,6 +1442,7 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
/// Res contains the LHS of the expression on input.
bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
SMLoc &EndLoc) {
+ SMLoc StartLoc = Lexer.getLoc();
while (true) {
MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
@@ -1460,7 +1467,7 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
return true;
// Merge LHS and RHS according to operator.
- Res = MCBinaryExpr::create(Kind, Res, RHS, getContext());
+ Res = MCBinaryExpr::create(Kind, Res, RHS, getContext(), StartLoc);
}
}
@@ -1617,7 +1624,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
if (ParsingInlineAsm && SI) {
StringRef RewrittenLabel =
SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
- assert(RewrittenLabel.size() &&
+ assert(!RewrittenLabel.empty() &&
"We should have an internal name here.");
Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
RewrittenLabel);
@@ -1626,12 +1633,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
Sym = getContext().getOrCreateSymbol(IDVal);
} else
Sym = Ctx.createDirectionalLocalSymbol(LocalLabelVal);
-
- Sym->redefineIfPossible();
-
- if (!Sym->isUndefined() || Sym->isVariable())
- return Error(IDLoc, "invalid symbol redefinition");
-
// End of Labels should be treated as end of line for lexing
// purposes but that information is not available to the Lexer who
// does not understand Labels. This may cause us to see a Hash
@@ -1650,7 +1651,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// Emit the label.
if (!ParsingInlineAsm)
- Out.EmitLabel(Sym);
+ Out.EmitLabel(Sym, IDLoc);
// If we are generating dwarf for assembly source files then gather the
// info to make a dwarf label entry for this label if needed.
@@ -1979,7 +1980,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
return parseDirectiveMSAlign(IDLoc, Info);
- if (ParsingInlineAsm && (IDVal == "even"))
+ if (ParsingInlineAsm && (IDVal == "even" || IDVal == "EVEN"))
Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
if (checkForValidSection())
return true;
@@ -2025,7 +2026,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
// If we previously parsed a cpp hash file line comment then make sure the
// current Dwarf File is for the CppHashFilename if not then emit the
// Dwarf File table for it and adjust the line number for the .loc.
- if (CppHashInfo.Filename.size()) {
+ if (!CppHashInfo.Filename.empty()) {
unsigned FileNumber = getStreamer().EmitDwarfFileDirective(
0, StringRef(), CppHashInfo.Filename);
getContext().setGenDwarfFileNumber(FileNumber);
@@ -3873,6 +3874,12 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
if (parseIdentifier(Parameter.Name))
return TokError("expected identifier in '.macro' directive");
+ // Emit an error if two (or more) named parameters share the same name
+ for (const MCAsmMacroParameter& CurrParam : Parameters)
+ if (CurrParam.Name.equals(Parameter.Name))
+ return TokError("macro '" + Name + "' has multiple parameters"
+ " named '" + Parameter.Name + "'");
+
if (Lexer.is(AsmToken::Colon)) {
Lex(); // consume ':'
@@ -4191,7 +4198,6 @@ bool AsmParser::parseDirectiveBundleUnlock() {
/// parseDirectiveSpace
/// ::= (.skip | .space) expression [ , expression ]
bool AsmParser::parseDirectiveSpace(StringRef IDVal) {
-
SMLoc NumBytesLoc = Lexer.getLoc();
const MCExpr *NumBytes;
if (checkForValidSection() || parseExpression(NumBytes))
@@ -4287,7 +4293,6 @@ bool AsmParser::parseDirectiveRealDCB(StringRef IDVal, const fltSemantics &Seman
/// parseDirectiveDS
/// ::= .ds.{b, d, l, p, s, w, x} expression
bool AsmParser::parseDirectiveDS(StringRef IDVal, unsigned Size) {
-
SMLoc NumValuesLoc = Lexer.getLoc();
int64_t NumValues;
if (checkForValidSection() || parseAbsoluteExpression(NumValues))
@@ -4416,6 +4421,7 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) {
return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
"alignment, can't be less than zero");
+ Sym->redefineIfPossible();
if (!Sym->isUndefined())
return Error(IDLoc, "invalid symbol redefinition");
@@ -5208,7 +5214,7 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA,
bool AsmParser::parseMSInlineAsm(
void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
- unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool> > &OpDecls,
+ unsigned &NumInputs, SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
SmallVectorImpl<std::string> &Constraints,
SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
@@ -5518,6 +5524,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
/// \brief Create an MCAsmParser instance.
MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C,
- MCStreamer &Out, const MCAsmInfo &MAI) {
- return new AsmParser(SM, C, Out, MAI);
+ MCStreamer &Out, const MCAsmInfo &MAI,
+ unsigned CB) {
+ return new AsmParser(SM, C, Out, MAI, CB);
}
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index f4114795a92d..bec62ccb2f7f 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -7,19 +7,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/COFF.h"
+#include "llvm/Support/SMLoc.h"
+#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -98,12 +106,14 @@ class COFFAsmParser : public MCAsmParserExtension {
| COFF::IMAGE_SCN_MEM_READ,
SectionKind::getText());
}
+
bool ParseSectionDirectiveData(StringRef, SMLoc) {
return ParseSectionSwitch(".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getData());
}
+
bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
return ParseSectionSwitch(".bss",
COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
@@ -141,8 +151,9 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseAtUnwindOrAtExcept(bool &unwind, bool &except);
bool ParseSEHRegisterNumber(unsigned &RegNo);
bool ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc);
+
public:
- COFFAsmParser() {}
+ COFFAsmParser() = default;
};
} // end annonomous namespace.
@@ -277,7 +288,7 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
.Default(MCSA_Invalid);
assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
+ while (true) {
StringRef Name;
if (getParser().parseIdentifier(Name))
@@ -466,10 +477,11 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
- if (Offset < 0 || Offset > UINT32_MAX)
- return Error(OffsetLoc,
- "invalid '.secrel32' directive offset, can't be less "
- "than zero or greater than UINT32_MAX");
+ if (Offset < 0 || Offset > std::numeric_limits<uint32_t>::max())
+ return Error(
+ OffsetLoc,
+ "invalid '.secrel32' directive offset, can't be less "
+ "than zero or greater than std::numeric_limits<uint32_t>::max()");
MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
@@ -817,4 +829,4 @@ MCAsmParserExtension *createCOFFAsmParser() {
return new COFFAsmParser;
}
-}
+} // end namespace llvm
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 94aa70ef0326..73a7ad0500c3 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -7,22 +7,35 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <system_error>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -44,7 +57,7 @@ class DarwinAsmParser : public MCAsmParserExtension {
SMLoc LastVersionMinDirective;
public:
- DarwinAsmParser() {}
+ DarwinAsmParser() = default;
void Initialize(MCAsmParser &Parser) override {
// Call the base implementation.
@@ -209,37 +222,47 @@ public:
bool parseSectionDirectiveConst(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__const");
}
+
bool parseSectionDirectiveStaticConst(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__static_const");
}
+
bool parseSectionDirectiveCString(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__cstring",
MachO::S_CSTRING_LITERALS);
}
+
bool parseSectionDirectiveLiteral4(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__literal4",
MachO::S_4BYTE_LITERALS, 4);
}
+
bool parseSectionDirectiveLiteral8(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__literal8",
MachO::S_8BYTE_LITERALS, 8);
}
+
bool parseSectionDirectiveLiteral16(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__literal16",
MachO::S_16BYTE_LITERALS, 16);
}
+
bool parseSectionDirectiveConstructor(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__constructor");
}
+
bool parseSectionDirectiveDestructor(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__destructor");
}
+
bool parseSectionDirectiveFVMLibInit0(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__fvmlib_init0");
}
+
bool parseSectionDirectiveFVMLibInit1(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__fvmlib_init1");
}
+
bool parseSectionDirectiveSymbolStub(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__symbol_stub",
MachO::S_SYMBOL_STUBS |
@@ -247,144 +270,178 @@ public:
// FIXME: Different on PPC and ARM.
0, 16);
}
+
bool parseSectionDirectivePICSymbolStub(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT","__picsymbol_stub",
MachO::S_SYMBOL_STUBS |
MachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26);
}
+
bool parseSectionDirectiveData(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__data");
}
+
bool parseSectionDirectiveStaticData(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__static_data");
}
+
bool parseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__nl_symbol_ptr",
MachO::S_NON_LAZY_SYMBOL_POINTERS, 4);
}
+
bool parseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__la_symbol_ptr",
MachO::S_LAZY_SYMBOL_POINTERS, 4);
}
+
bool parseSectionDirectiveThreadLocalVariablePointers(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__thread_ptr",
MachO::S_THREAD_LOCAL_VARIABLE_POINTERS, 4);
}
+
bool parseSectionDirectiveDyld(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__dyld");
}
+
bool parseSectionDirectiveModInitFunc(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__mod_init_func",
MachO::S_MOD_INIT_FUNC_POINTERS, 4);
}
+
bool parseSectionDirectiveModTermFunc(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__mod_term_func",
MachO::S_MOD_TERM_FUNC_POINTERS, 4);
}
+
bool parseSectionDirectiveConstData(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__const");
}
+
bool parseSectionDirectiveObjCClass(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__class",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCMetaClass(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__meta_class",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__cat_cls_meth",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__cat_inst_meth",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCProtocol(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__protocol",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCStringObject(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__string_object",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCClsMeth(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__cls_meth",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCInstMeth(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__inst_meth",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCClsRefs(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__cls_refs",
MachO::S_ATTR_NO_DEAD_STRIP |
MachO::S_LITERAL_POINTERS, 4);
}
+
bool parseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__message_refs",
MachO::S_ATTR_NO_DEAD_STRIP |
MachO::S_LITERAL_POINTERS, 4);
}
+
bool parseSectionDirectiveObjCSymbols(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__symbols",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCCategory(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__category",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCClassVars(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__class_vars",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__instance_vars",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__module_info",
MachO::S_ATTR_NO_DEAD_STRIP);
}
+
bool parseSectionDirectiveObjCClassNames(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__cstring",
MachO::S_CSTRING_LITERALS);
}
+
bool parseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__cstring",
MachO::S_CSTRING_LITERALS);
}
+
bool parseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__cstring",
MachO::S_CSTRING_LITERALS);
}
+
bool parseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) {
return parseSectionSwitch("__OBJC", "__selector_strs",
MachO::S_CSTRING_LITERALS);
}
+
bool parseSectionDirectiveTData(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__thread_data",
MachO::S_THREAD_LOCAL_REGULAR);
}
+
bool parseSectionDirectiveText(StringRef, SMLoc) {
return parseSectionSwitch("__TEXT", "__text",
MachO::S_ATTR_PURE_INSTRUCTIONS);
}
+
bool parseSectionDirectiveTLV(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__thread_vars",
MachO::S_THREAD_LOCAL_VARIABLES);
}
+
bool parseSectionDirectiveIdent(StringRef, SMLoc) {
// Darwin silently ignores the .ident directive.
getParser().eatToEndOfStatement();
return false;
}
+
bool parseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
return parseSectionSwitch("__DATA", "__thread_init",
MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
}
- bool parseVersionMin(StringRef, SMLoc);
+ bool parseVersionMin(StringRef, SMLoc);
};
} // end anonymous namespace
@@ -526,7 +583,7 @@ bool DarwinAsmParser::parseDirectiveDumpOrLoad(StringRef Directive,
/// ::= .linker_option "string" ( , "string" )*
bool DarwinAsmParser::parseDirectiveLinkerOption(StringRef IDVal, SMLoc) {
SmallVector<std::string, 4> Args;
- for (;;) {
+ while (true) {
if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '" + Twine(IDVal) + "' directive");
@@ -604,7 +661,6 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) {
return TokError("unexpected token in '.section' directive");
Lex();
-
StringRef Segment, Section;
unsigned StubSize;
unsigned TAA;
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 8d7ba0d03362..401011a027f4 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -7,17 +7,29 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SMLoc.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -142,9 +154,14 @@ private:
bool ParseSectionName(StringRef &SectionName);
bool ParseSectionArguments(bool IsPush, SMLoc loc);
unsigned parseSunStyleSectionFlags();
+ bool maybeParseSectionType(StringRef &TypeName);
+ bool parseMergeSize(int64_t &Size);
+ bool parseGroup(StringRef &GroupName);
+ bool parseMetadataSym(MCSymbolELF *&Associated);
+ bool maybeParseUniqueID(int64_t &UniqueID);
};
-}
+} // end anonymous namespace
/// ParseDirectiveSymbolAttribute
/// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
@@ -158,7 +175,7 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
.Default(MCSA_Invalid);
assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
+ while (true) {
StringRef Name;
if (getParser().parseIdentifier(Name))
@@ -230,8 +247,7 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
return false;
}
- for (;;) {
-
+ while (true) {
SMLoc PrevLoc = getLexer().getLoc();
if (getLexer().is(AsmToken::Comma) ||
getLexer().is(AsmToken::EndOfStatement))
@@ -282,6 +298,9 @@ static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
case 'w':
flags |= ELF::SHF_WRITE;
break;
+ case 'o':
+ flags |= ELF::SHF_LINK_ORDER;
+ break;
case 'M':
flags |= ELF::SHF_MERGE;
break;
@@ -366,6 +385,97 @@ bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc loc) {
return ParseSectionArguments(/*IsPush=*/false, loc);
}
+bool ELFAsmParser::maybeParseSectionType(StringRef &TypeName) {
+ MCAsmLexer &L = getLexer();
+ if (L.isNot(AsmToken::Comma))
+ return false;
+ Lex();
+ if (L.isNot(AsmToken::At) && L.isNot(AsmToken::Percent) &&
+ L.isNot(AsmToken::String)) {
+ if (L.getAllowAtInIdentifier())
+ return TokError("expected '@<type>', '%<type>' or \"<type>\"");
+ else
+ return TokError("expected '%<type>' or \"<type>\"");
+ }
+ if (!L.is(AsmToken::String))
+ Lex();
+ if (L.is(AsmToken::Integer)) {
+ TypeName = getTok().getString();
+ Lex();
+ } else if (getParser().parseIdentifier(TypeName))
+ return TokError("expected identifier in directive");
+ return false;
+}
+
+bool ELFAsmParser::parseMergeSize(int64_t &Size) {
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("expected the entry size");
+ Lex();
+ if (getParser().parseAbsoluteExpression(Size))
+ return true;
+ if (Size <= 0)
+ return TokError("entry size must be positive");
+ return false;
+}
+
+bool ELFAsmParser::parseGroup(StringRef &GroupName) {
+ MCAsmLexer &L = getLexer();
+ if (L.isNot(AsmToken::Comma))
+ return TokError("expected group name");
+ Lex();
+ if (getParser().parseIdentifier(GroupName))
+ return true;
+ if (L.is(AsmToken::Comma)) {
+ Lex();
+ StringRef Linkage;
+ if (getParser().parseIdentifier(Linkage))
+ return true;
+ if (Linkage != "comdat")
+ return TokError("Linkage must be 'comdat'");
+ }
+ return false;
+}
+
+bool ELFAsmParser::parseMetadataSym(MCSymbolELF *&Associated) {
+ MCAsmLexer &L = getLexer();
+ if (L.isNot(AsmToken::Comma))
+ return TokError("expected metadata symbol");
+ Lex();
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return true;
+ Associated = dyn_cast_or_null<MCSymbolELF>(getContext().lookupSymbol(Name));
+ if (!Associated || !Associated->isInSection())
+ return TokError("symbol is not in a section: " + Name);
+ return false;
+}
+
+bool ELFAsmParser::maybeParseUniqueID(int64_t &UniqueID) {
+ MCAsmLexer &L = getLexer();
+ if (L.isNot(AsmToken::Comma))
+ return false;
+ Lex();
+ StringRef UniqueStr;
+ if (getParser().parseIdentifier(UniqueStr))
+ return TokError("expected identifier in directive");
+ if (UniqueStr != "unique")
+ return TokError("expected 'unique'");
+ if (L.isNot(AsmToken::Comma))
+ return TokError("expected commma");
+ Lex();
+ if (getParser().parseAbsoluteExpression(UniqueID))
+ return true;
+ if (UniqueID < 0)
+ return TokError("unique id must be positive");
+ if (!isUInt<32>(UniqueID) || UniqueID == ~0U)
+ return TokError("unique id is too large");
+ return false;
+}
+
+static bool hasPrefix(StringRef SectionName, StringRef Prefix) {
+ return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back();
+}
+
bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
StringRef SectionName;
@@ -379,14 +489,24 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
const MCExpr *Subsection = nullptr;
bool UseLastGroup = false;
StringRef UniqueStr;
+ MCSymbolELF *Associated = nullptr;
int64_t UniqueID = ~0;
// Set the defaults first.
- if (SectionName == ".fini" || SectionName == ".init" ||
- SectionName == ".rodata")
+ if (hasPrefix(SectionName, ".rodata.") || SectionName == ".rodata1")
Flags |= ELF::SHF_ALLOC;
- if (SectionName == ".fini" || SectionName == ".init")
- Flags |= ELF::SHF_EXECINSTR;
+ if (SectionName == ".fini" || SectionName == ".init" ||
+ hasPrefix(SectionName, ".text."))
+ Flags |= ELF::SHF_ALLOC | ELF::SHF_EXECINSTR;
+ if (hasPrefix(SectionName, ".data.") || SectionName == ".data1" ||
+ hasPrefix(SectionName, ".bss.") ||
+ hasPrefix(SectionName, ".init_array.") ||
+ hasPrefix(SectionName, ".fini_array.") ||
+ hasPrefix(SectionName, ".preinit_array."))
+ Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE;
+ if (hasPrefix(SectionName, ".tdata.") ||
+ hasPrefix(SectionName, ".tbss."))
+ Flags |= ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_TLS;
if (getLexer().is(AsmToken::Comma)) {
Lex();
@@ -422,65 +542,30 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
return TokError("Section cannot specifiy a group name while also acting "
"as a member of the last group");
- if (getLexer().isNot(AsmToken::Comma)) {
+ if (maybeParseSectionType(TypeName))
+ return true;
+
+ MCAsmLexer &L = getLexer();
+ if (TypeName.empty()) {
if (Mergeable)
return TokError("Mergeable section must specify the type");
if (Group)
return TokError("Group section must specify the type");
- } else {
- Lex();
- if (getLexer().is(AsmToken::At) || getLexer().is(AsmToken::Percent) ||
- getLexer().is(AsmToken::String)) {
- if (!getLexer().is(AsmToken::String))
- Lex();
- } else
- return TokError("expected '@<type>', '%<type>' or \"<type>\"");
-
- if (getParser().parseIdentifier(TypeName))
- return TokError("expected identifier in directive");
-
- if (Mergeable) {
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("expected the entry size");
- Lex();
- if (getParser().parseAbsoluteExpression(Size))
- return true;
- if (Size <= 0)
- return TokError("entry size must be positive");
- }
-
- if (Group) {
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("expected group name");
- Lex();
- if (getParser().parseIdentifier(GroupName))
- return true;
- if (getLexer().is(AsmToken::Comma)) {
- Lex();
- StringRef Linkage;
- if (getParser().parseIdentifier(Linkage))
- return true;
- if (Linkage != "comdat")
- return TokError("Linkage must be 'comdat'");
- }
- }
- if (getLexer().is(AsmToken::Comma)) {
- Lex();
- if (getParser().parseIdentifier(UniqueStr))
- return TokError("expected identifier in directive");
- if (UniqueStr != "unique")
- return TokError("expected 'unique'");
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("expected commma");
- Lex();
- if (getParser().parseAbsoluteExpression(UniqueID))
- return true;
- if (UniqueID < 0)
- return TokError("unique id must be positive");
- if (!isUInt<32>(UniqueID) || UniqueID == ~0U)
- return TokError("unique id is too large");
- }
+ if (L.isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
}
+
+ if (Mergeable)
+ if (parseMergeSize(Size))
+ return true;
+ if (Group)
+ if (parseGroup(GroupName))
+ return true;
+ if (Flags & ELF::SHF_LINK_ORDER)
+ if (parseMetadataSym(Associated))
+ return true;
+ if (maybeParseUniqueID(UniqueID))
+ return true;
}
EndStmt:
@@ -493,11 +578,15 @@ EndStmt:
if (TypeName.empty()) {
if (SectionName.startswith(".note"))
Type = ELF::SHT_NOTE;
- else if (SectionName == ".init_array")
+ else if (hasPrefix(SectionName, ".init_array."))
Type = ELF::SHT_INIT_ARRAY;
- else if (SectionName == ".fini_array")
+ else if (hasPrefix(SectionName, ".bss."))
+ Type = ELF::SHT_NOBITS;
+ else if (hasPrefix(SectionName, ".tbss."))
+ Type = ELF::SHT_NOBITS;
+ else if (hasPrefix(SectionName, ".fini_array."))
Type = ELF::SHT_FINI_ARRAY;
- else if (SectionName == ".preinit_array")
+ else if (hasPrefix(SectionName, ".preinit_array."))
Type = ELF::SHT_PREINIT_ARRAY;
} else {
if (TypeName == "init_array")
@@ -514,7 +603,7 @@ EndStmt:
Type = ELF::SHT_NOTE;
else if (TypeName == "unwind")
Type = ELF::SHT_X86_64_UNWIND;
- else
+ else if (TypeName.getAsInteger(0, Type))
return TokError("unknown section type");
}
@@ -528,8 +617,9 @@ EndStmt:
}
}
- MCSection *ELFSection = getContext().getELFSection(SectionName, Type, Flags,
- Size, GroupName, UniqueID);
+ MCSection *ELFSection =
+ getContext().getELFSection(SectionName, Type, Flags, Size, GroupName,
+ UniqueID, Associated);
getStreamer().SwitchSection(ELFSection, Subsection);
if (getContext().getGenDwarfForAssembly()) {
@@ -677,6 +767,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
const MCExpr *Value = MCSymbolRefExpr::create(Sym, getContext());
getStreamer().EmitAssignment(Alias, Value);
+ getStreamer().emitELFSymverDirective(Alias, Sym);
return false;
}
@@ -752,4 +843,4 @@ MCAsmParserExtension *createELFAsmParser() {
return new ELFAsmParser;
}
-}
+} // end namespace llvm
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 63c0daba09a0..f8fe78aece0c 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -1,4 +1,4 @@
-//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===//
+//===- MCAsmLexer.cpp - Abstract Asm Lexer Interface ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,19 +7,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SMLoc.h"
using namespace llvm;
-MCAsmLexer::MCAsmLexer()
- : TokStart(nullptr), SkipSpace(true), IsAtStartOfStatement(true),
- CommentConsumer(nullptr) {
+MCAsmLexer::MCAsmLexer() {
CurTok.emplace_back(AsmToken::Space, StringRef());
}
-MCAsmLexer::~MCAsmLexer() {
-}
+MCAsmLexer::~MCAsmLexer() = default;
SMLoc MCAsmLexer::getLoc() const {
return SMLoc::getFromPointer(TokStart);
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 98f4daf972d6..27b37f3e2dfb 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -7,22 +7,22 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
using namespace llvm;
-MCAsmParser::MCAsmParser()
- : TargetParser(nullptr), ShowParsedOperands(0), HadError(false),
- PendingErrors() {}
+MCAsmParser::MCAsmParser() : ShowParsedOperands(0) {}
-MCAsmParser::~MCAsmParser() {
-}
+MCAsmParser::~MCAsmParser() = default;
void MCAsmParser::setTargetParser(MCTargetAsmParser &P) {
assert(!TargetParser && "Target parser is already initialized!");
@@ -118,10 +118,10 @@ bool MCAsmParser::addErrorSuffix(const Twine &Suffix) {
return true;
}
-bool MCAsmParser::parseMany(std::function<bool()> parseOne, bool hasComma) {
+bool MCAsmParser::parseMany(function_ref<bool()> parseOne, bool hasComma) {
if (parseOptionalToken(AsmToken::EndOfStatement))
return false;
- while (1) {
+ while (true) {
if (parseOne())
return true;
if (parseOptionalToken(AsmToken::EndOfStatement))
@@ -137,6 +137,9 @@ bool MCAsmParser::parseExpression(const MCExpr *&Res) {
return parseExpression(Res, L);
}
-LLVM_DUMP_METHOD void MCParsedAsmOperand::dump() const {
+void MCParsedAsmOperand::dump() const {
+ // Cannot completely remove virtual function even in release mode.
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dbgs() << " " << *this;
+#endif
}
diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp
index 3f25a14926b6..031f473dc5fe 100644
--- a/lib/MC/MCParser/MCAsmParserExtension.cpp
+++ b/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -1,4 +1,4 @@
-//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===//
+//===- MCAsmParserExtension.cpp - Asm Parser Hooks ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,14 +8,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+
using namespace llvm;
-MCAsmParserExtension::MCAsmParserExtension() :
- BracketExpressionsSupported(false) {
-}
+MCAsmParserExtension::MCAsmParserExtension() = default;
-MCAsmParserExtension::~MCAsmParserExtension() {
-}
+MCAsmParserExtension::~MCAsmParserExtension() = default;
void MCAsmParserExtension::Initialize(MCAsmParser &Parser) {
this->Parser = &Parser;
diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp
index 14a22c6b8a2f..5f821443bb96 100644
--- a/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -1,4 +1,4 @@
-//===-- MCTargetAsmParser.cpp - Target Assembly Parser ---------------------==//
+//===-- MCTargetAsmParser.cpp - Target Assembly Parser --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,19 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+
using namespace llvm;
MCTargetAsmParser::MCTargetAsmParser(MCTargetOptions const &MCOptions,
const MCSubtargetInfo &STI)
- : AvailableFeatures(0), ParsingInlineAsm(false), MCOptions(MCOptions),
- STI(&STI)
-{
-}
+ : MCOptions(MCOptions), STI(&STI) {}
-MCTargetAsmParser::~MCTargetAsmParser() {
-}
+MCTargetAsmParser::~MCTargetAsmParser() = default;
MCSubtargetInfo &MCTargetAsmParser::copySTI() {
MCSubtargetInfo &STICopy = getContext().getSubtargetCopy(getSTI());
diff --git a/lib/MC/MCRegisterInfo.cpp b/lib/MC/MCRegisterInfo.cpp
index ea117f3caa85..a75100a4876b 100644
--- a/lib/MC/MCRegisterInfo.cpp
+++ b/lib/MC/MCRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//=== MC/MCRegisterInfo.cpp - Target Register Description -------*- C++ -*-===//
+//===- MC/MCRegisterInfo.cpp - Target Register Description ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,9 +11,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index 9064cdf2f319..7986c0122043 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -7,17 +7,18 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
+#include <algorithm>
+#include <utility>
-//===----------------------------------------------------------------------===//
-// MCSection
-//===----------------------------------------------------------------------===//
+using namespace llvm;
MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
: Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
@@ -31,8 +32,7 @@ MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
bool MCSection::hasEnded() const { return End && End->isInSection(); }
-MCSection::~MCSection() {
-}
+MCSection::~MCSection() = default;
void MCSection::setBundleLockState(BundleLockStateType NewState) {
if (NewState == NotBundleLocked) {
@@ -85,8 +85,9 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
return IP;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCSection::dump() {
- raw_ostream &OS = llvm::errs();
+ raw_ostream &OS = errs();
OS << "<MCSection";
OS << " Fragments:[\n ";
@@ -97,3 +98,4 @@ LLVM_DUMP_METHOD void MCSection::dump() {
}
OS << "]>";
}
+#endif
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index f2dd47d81b7e..f0709cbc2515 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -8,14 +8,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
using namespace llvm;
-MCSectionCOFF::~MCSectionCOFF() {} // anchor.
+MCSectionCOFF::~MCSectionCOFF() = default; // anchor.
// ShouldOmitSectionDirective - Decides whether a '.section' directive
// should be printed before the section name
@@ -37,10 +37,9 @@ void MCSectionCOFF::setSelection(int Selection) const {
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
}
-void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
-
// standard sections don't require the '.section'
if (ShouldOmitSectionDirective(SectionName, MAI)) {
OS << '\t' << getSectionName() << '\n';
@@ -94,7 +93,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << "newest,";
break;
default:
- assert (0 && "unsupported COFF selection type");
+ assert(false && "unsupported COFF selection type");
break;
}
assert(COMDATSymbol);
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 587b28f71b7d..78fe01cca24a 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -7,23 +7,23 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
-MCSectionELF::~MCSectionELF() {} // anchor.
+MCSectionELF::~MCSectionELF() = default; // anchor.
// Decides whether a '.section' directive
// should be printed before the section name.
bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
const MCAsmInfo &MAI) const {
-
if (isUnique())
return false;
@@ -53,10 +53,9 @@ static void printName(raw_ostream &OS, StringRef Name) {
OS << '"';
}
-void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
+void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
-
if (ShouldOmitSectionDirective(SectionName, MAI)) {
OS << '\t' << getSectionName();
if (Subsection) {
@@ -104,14 +103,21 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << 'S';
if (Flags & ELF::SHF_TLS)
OS << 'T';
+ if (Flags & ELF::SHF_LINK_ORDER)
+ OS << 'o';
// If there are target-specific flags, print them.
- if (Flags & ELF::XCORE_SHF_CP_SECTION)
- OS << 'c';
- if (Flags & ELF::XCORE_SHF_DP_SECTION)
- OS << 'd';
- if (Flags & ELF::SHF_ARM_PURECODE)
- OS << 'y';
+ Triple::ArchType Arch = T.getArch();
+ if (Arch == Triple::xcore) {
+ if (Flags & ELF::XCORE_SHF_CP_SECTION)
+ OS << 'c';
+ if (Flags & ELF::XCORE_SHF_DP_SECTION)
+ OS << 'd';
+ } else if (Arch == Triple::arm || Arch == Triple::armeb ||
+ Arch == Triple::thumb || Arch == Triple::thumbeb) {
+ if (Flags & ELF::SHF_ARM_PURECODE)
+ OS << 'y';
+ }
OS << '"';
@@ -137,6 +143,13 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << "progbits";
else if (Type == ELF::SHT_X86_64_UNWIND)
OS << "unwind";
+ else if (Type == ELF::SHT_MIPS_DWARF)
+ // Print hex value of the flag while we do not have
+ // any standard symbolic representation of the flag.
+ OS << "0x7000001e";
+ else
+ report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
+ " for section " + getSectionName());
if (EntrySize) {
assert(Flags & ELF::SHF_MERGE);
@@ -149,6 +162,12 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << ",comdat";
}
+ if (Flags & ELF::SHF_LINK_ORDER) {
+ assert(AssociatedSymbol);
+ OS << ",";
+ printName(OS, AssociatedSymbol->getName());
+ }
+
if (isUnique())
OS << ",unique," << UniqueID;
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index c2a772fdbdac..f40237231a2f 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -16,45 +16,57 @@ using namespace llvm;
/// SectionTypeDescriptors - These are strings that describe the various section
/// types. This *must* be kept in order with and stay synchronized with the
/// section type list.
-static const struct {
- StringRef AssemblerName, EnumName;
-} SectionTypeDescriptors[MachO::LAST_KNOWN_SECTION_TYPE+1] = {
- { "regular", "S_REGULAR" }, // 0x00
- { StringRef(), "S_ZEROFILL" }, // 0x01
- { "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02
- { "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03
- { "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04
- { "literal_pointers", "S_LITERAL_POINTERS" }, // 0x05
- { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" }, // 0x06
- { "lazy_symbol_pointers", "S_LAZY_SYMBOL_POINTERS" }, // 0x07
- { "symbol_stubs", "S_SYMBOL_STUBS" }, // 0x08
- { "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09
- { "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A
- { "coalesced", "S_COALESCED" }, // 0x0B
- { StringRef(), /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C
- { "interposing", "S_INTERPOSING" }, // 0x0D
- { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E
- { StringRef(), /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F
- { StringRef(), /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
- { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11
- { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12
- { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13
- { "thread_local_variable_pointers",
- "S_THREAD_LOCAL_VARIABLE_POINTERS" }, // 0x14
- { "thread_local_init_function_pointers",
- "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"}, // 0x15
+static constexpr struct {
+ StringLiteral AssemblerName, EnumName;
+} SectionTypeDescriptors[MachO::LAST_KNOWN_SECTION_TYPE + 1] = {
+ {StringLiteral("regular"), StringLiteral("S_REGULAR")}, // 0x00
+ {StringLiteral(""), StringLiteral("S_ZEROFILL")}, // 0x01
+ {StringLiteral("cstring_literals"),
+ StringLiteral("S_CSTRING_LITERALS")}, // 0x02
+ {StringLiteral("4byte_literals"),
+ StringLiteral("S_4BYTE_LITERALS")}, // 0x03
+ {StringLiteral("8byte_literals"),
+ StringLiteral("S_8BYTE_LITERALS")}, // 0x04
+ {StringLiteral("literal_pointers"),
+ StringLiteral("S_LITERAL_POINTERS")}, // 0x05
+ {StringLiteral("non_lazy_symbol_pointers"),
+ StringLiteral("S_NON_LAZY_SYMBOL_POINTERS")}, // 0x06
+ {StringLiteral("lazy_symbol_pointers"),
+ StringLiteral("S_LAZY_SYMBOL_POINTERS")}, // 0x07
+ {StringLiteral("symbol_stubs"), StringLiteral("S_SYMBOL_STUBS")}, // 0x08
+ {StringLiteral("mod_init_funcs"),
+ StringLiteral("S_MOD_INIT_FUNC_POINTERS")}, // 0x09
+ {StringLiteral("mod_term_funcs"),
+ StringLiteral("S_MOD_TERM_FUNC_POINTERS")}, // 0x0A
+ {StringLiteral("coalesced"), StringLiteral("S_COALESCED")}, // 0x0B
+ {StringLiteral("") /*FIXME??*/, StringLiteral("S_GB_ZEROFILL")}, // 0x0C
+ {StringLiteral("interposing"), StringLiteral("S_INTERPOSING")}, // 0x0D
+ {StringLiteral("16byte_literals"),
+ StringLiteral("S_16BYTE_LITERALS")}, // 0x0E
+ {StringLiteral("") /*FIXME??*/, StringLiteral("S_DTRACE_DOF")}, // 0x0F
+ {StringLiteral("") /*FIXME??*/,
+ StringLiteral("S_LAZY_DYLIB_SYMBOL_POINTERS")}, // 0x10
+ {StringLiteral("thread_local_regular"),
+ StringLiteral("S_THREAD_LOCAL_REGULAR")}, // 0x11
+ {StringLiteral("thread_local_zerofill"),
+ StringLiteral("S_THREAD_LOCAL_ZEROFILL")}, // 0x12
+ {StringLiteral("thread_local_variables"),
+ StringLiteral("S_THREAD_LOCAL_VARIABLES")}, // 0x13
+ {StringLiteral("thread_local_variable_pointers"),
+ StringLiteral("S_THREAD_LOCAL_VARIABLE_POINTERS")}, // 0x14
+ {StringLiteral("thread_local_init_function_pointers"),
+ StringLiteral("S_THREAD_LOCAL_INIT_FUNCTION_POINTERS")}, // 0x15
};
-
/// SectionAttrDescriptors - This is an array of descriptors for section
/// attributes. Unlike the SectionTypeDescriptors, this is not directly indexed
/// by attribute, instead it is searched.
-static const struct {
+static constexpr struct {
unsigned AttrFlag;
- StringRef AssemblerName, EnumName;
+ StringLiteral AssemblerName, EnumName;
} SectionAttrDescriptors[] = {
#define ENTRY(ASMNAME, ENUM) \
- { MachO::ENUM, ASMNAME, #ENUM },
+ { MachO::ENUM, StringLiteral(ASMNAME), StringLiteral(#ENUM) },
ENTRY("pure_instructions", S_ATTR_PURE_INSTRUCTIONS)
ENTRY("no_toc", S_ATTR_NO_TOC)
ENTRY("strip_static_syms", S_ATTR_STRIP_STATIC_SYMS)
@@ -62,11 +74,11 @@ ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP)
ENTRY("live_support", S_ATTR_LIVE_SUPPORT)
ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
ENTRY("debug", S_ATTR_DEBUG)
-ENTRY(StringRef() /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS)
-ENTRY(StringRef() /*FIXME*/, S_ATTR_EXT_RELOC)
-ENTRY(StringRef() /*FIXME*/, S_ATTR_LOC_RELOC)
+ENTRY("" /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS)
+ENTRY("" /*FIXME*/, S_ATTR_EXT_RELOC)
+ENTRY("" /*FIXME*/, S_ATTR_LOC_RELOC)
#undef ENTRY
- { 0, "none", StringRef() }, // used if section has no attributes but has a stub size
+ { 0, StringLiteral("none"), StringLiteral("") }, // used if section has no attributes but has a stub size
};
MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
@@ -89,7 +101,7 @@ MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
}
}
-void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
+void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const {
OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
diff --git a/lib/MC/MCSectionWasm.cpp b/lib/MC/MCSectionWasm.cpp
new file mode 100644
index 000000000000..c61f28e129f5
--- /dev/null
+++ b/lib/MC/MCSectionWasm.cpp
@@ -0,0 +1,97 @@
+//===- lib/MC/MCSectionWasm.cpp - Wasm Code Section Representation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCSectionWasm::~MCSectionWasm() {} // anchor.
+
+// Decides whether a '.section' directive
+// should be printed before the section name.
+bool MCSectionWasm::ShouldOmitSectionDirective(StringRef Name,
+ const MCAsmInfo &MAI) const {
+ return MAI.shouldOmitSectionDirective(Name);
+}
+
+static void printName(raw_ostream &OS, StringRef Name) {
+ if (Name.find_first_not_of("0123456789_."
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) {
+ OS << Name;
+ return;
+ }
+ OS << '"';
+ for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) {
+ if (*B == '"') // Unquoted "
+ OS << "\\\"";
+ else if (*B != '\\') // Neither " or backslash
+ OS << *B;
+ else if (B + 1 == E) // Trailing backslash
+ OS << "\\\\";
+ else {
+ OS << B[0] << B[1]; // Quoted character
+ ++B;
+ }
+ }
+ OS << '"';
+}
+
+void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
+ raw_ostream &OS,
+ const MCExpr *Subsection) const {
+
+ if (ShouldOmitSectionDirective(SectionName, MAI)) {
+ OS << '\t' << getSectionName();
+ if (Subsection) {
+ OS << '\t';
+ Subsection->print(OS, &MAI);
+ }
+ OS << '\n';
+ return;
+ }
+
+ OS << "\t.section\t";
+ printName(OS, getSectionName());
+ OS << ",\"";
+
+ // TODO: Print section flags.
+
+ OS << '"';
+
+ OS << ',';
+
+ // If comment string is '@', e.g. as on ARM - use '%' instead
+ if (MAI.getCommentString()[0] == '@')
+ OS << '%';
+ else
+ OS << '@';
+
+ // TODO: Print section type.
+
+ if (isUnique())
+ OS << ",unique," << UniqueID;
+
+ OS << '\n';
+
+ if (Subsection) {
+ OS << "\t.subsection\t";
+ Subsection->print(OS, &MAI);
+ OS << '\n';
+ }
+}
+
+bool MCSectionWasm::UseCodeAlign() const { return false; }
+
+bool MCSectionWasm::isVirtualSection() const { return false; }
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index fb28f856f671..c9a6f12b6a58 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -7,36 +7,44 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeView.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCWin64EH.h"
+#include "llvm/MC/MCWinEH.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdlib>
-using namespace llvm;
+#include <cassert>
+#include <cstdint>
+#include <utility>
-// Pin the vtables to this file.
-MCTargetStreamer::~MCTargetStreamer() {}
+using namespace llvm;
MCTargetStreamer::MCTargetStreamer(MCStreamer &S) : Streamer(S) {
S.setTargetStreamer(this);
}
+// Pin the vtables to this file.
+MCTargetStreamer::~MCTargetStreamer() = default;
+
void MCTargetStreamer::emitLabel(MCSymbol *Symbol) {}
void MCTargetStreamer::finish() {}
@@ -290,10 +298,17 @@ void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) {
SymbolOrdering[Symbol] = 1 + SymbolOrdering.size();
}
-void MCStreamer::EmitLabel(MCSymbol *Symbol) {
+void MCStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
+ Symbol->redefineIfPossible();
+
+ if (!Symbol->isUndefined() || Symbol->isVariable())
+ return getContext().reportError(Loc, "invalid symbol redefinition");
+
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSectionOnly() && "Cannot emit before setting section!");
assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!");
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
Symbol->setFragment(&getCurrentSectionOnly()->getDummyFragment());
MCTargetStreamer *TS = getTargetStreamer();
@@ -666,7 +681,7 @@ void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) {
void MCStreamer::EmitWinCFIPushFrame(bool Code) {
EnsureValidWinFrameInfo();
- if (CurrentWinFrameInfo->Instructions.size() > 0)
+ if (!CurrentWinFrameInfo->Instructions.empty())
report_fatal_error("If present, PushMachFrame must be the first UOP");
MCSymbol *Label = EmitCFILabel();
@@ -762,8 +777,8 @@ void MCStreamer::visitUsedExpr(const MCExpr &Expr) {
}
}
-void MCStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) {
// Scan for values.
for (unsigned i = Inst.getNumOperands(); i--;)
if (Inst.getOperand(i).isExpr())
@@ -792,12 +807,22 @@ void MCStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi, const MCSymbol *Lo,
void MCStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
void MCStreamer::EmitThumbFunc(MCSymbol *Func) {}
void MCStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
-void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
-void MCStreamer::EndCOFFSymbolDef() {}
+void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ llvm_unreachable("this directive only supported on COFF targets");
+}
+void MCStreamer::EndCOFFSymbolDef() {
+ llvm_unreachable("this directive only supported on COFF targets");
+}
void MCStreamer::EmitFileDirective(StringRef Filename) {}
-void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {}
-void MCStreamer::EmitCOFFSymbolType(int Type) {}
+void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+ llvm_unreachable("this directive only supported on COFF targets");
+}
+void MCStreamer::EmitCOFFSymbolType(int Type) {
+ llvm_unreachable("this directive only supported on COFF targets");
+}
void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+void MCStreamer::emitELFSymverDirective(MCSymbol *Alias,
+ const MCSymbol *Aliasee) {}
void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 1b592504b1e4..777b4e3d6b67 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MCSubtargetInfo.cpp - Subtarget Information -----------------------===//
+//===- MCSubtargetInfo.cpp - Subtarget Information ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,13 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cassert>
+#include <cstring>
using namespace llvm;
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 20d985df7ea0..cb262542b89f 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -7,13 +7,19 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+
using namespace llvm;
// Only the address of this fragment is ever actually used.
@@ -75,4 +81,8 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
OS << '"';
}
-LLVM_DUMP_METHOD void MCSymbol::dump() const { dbgs() << *this; }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MCSymbol::dump() const {
+ dbgs() << *this;
+}
+#endif
diff --git a/lib/MC/MCSymbolELF.cpp b/lib/MC/MCSymbolELF.cpp
index ec7ef447ff89..ffa8260d4342 100644
--- a/lib/MC/MCSymbolELF.cpp
+++ b/lib/MC/MCSymbolELF.cpp
@@ -42,6 +42,8 @@ enum {
void MCSymbolELF::setBinding(unsigned Binding) const {
setIsBindingSet();
+ if (getType() == ELF::STT_SECTION && Binding != ELF::STB_LOCAL)
+ setType(ELF::STT_NOTYPE);
unsigned Val;
switch (Binding) {
default:
@@ -93,6 +95,8 @@ unsigned MCSymbolELF::getBinding() const {
void MCSymbolELF::setType(unsigned Type) const {
unsigned Val;
+ if (Type == ELF::STT_SECTION && getBinding() != ELF::STB_LOCAL)
+ return;
switch (Type) {
default:
llvm_unreachable("Unsupported Binding");
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index 419210537eea..5d666b67fddb 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCTargetOptions.cpp - MC Target Options --------------------===//
+//===- lib/MC/MCTargetOptions.cpp - MC Target Options ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,19 +10,16 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCTargetOptions.h"
-namespace llvm {
+using namespace llvm;
MCTargetOptions::MCTargetOptions()
: SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
MCFatalWarnings(false), MCNoWarn(false), MCNoDeprecatedWarn(false),
- MCSaveTempLabels(false),
- MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
- MCPIECopyRelocations(false), ShowMCEncoding(false),
- ShowMCInst(false), AsmVerbose(false),
- PreserveAsmComments(true), DwarfVersion(0), ABIName() {}
+ MCSaveTempLabels(false), MCUseDwarfDirectory(false),
+ MCIncrementalLinkerCompatible(false), MCPIECopyRelocations(false),
+ ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
+ PreserveAsmComments(true) {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
}
-
-} // end namespace llvm
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
index c1336d6d1b49..32a6adbf224e 100644
--- a/lib/MC/MCValue.cpp
+++ b/lib/MC/MCValue.cpp
@@ -37,9 +37,11 @@ void MCValue::print(raw_ostream &OS) const {
OS << " + " << getConstant();
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MCValue::dump() const {
print(dbgs());
}
+#endif
MCSymbolRefExpr::VariantKind MCValue::getAccessVariant() const {
const MCSymbolRefExpr *B = getSymB();
diff --git a/lib/MC/MCWasmObjectTargetWriter.cpp b/lib/MC/MCWasmObjectTargetWriter.cpp
new file mode 100644
index 000000000000..a09a17d7a124
--- /dev/null
+++ b/lib/MC/MCWasmObjectTargetWriter.cpp
@@ -0,0 +1,27 @@
+//===-- MCWasmObjectTargetWriter.cpp - Wasm Target Writer Subclass --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWasmObjectWriter.h"
+
+using namespace llvm;
+
+MCWasmObjectTargetWriter::MCWasmObjectTargetWriter(bool Is64Bit_)
+ : Is64Bit(Is64Bit_) {}
+
+bool MCWasmObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
+ unsigned Type) const {
+ return false;
+}
+
+void MCWasmObjectTargetWriter::sortRelocs(
+ const MCAssembler &Asm, std::vector<WasmRelocationEntry> &Relocs) {
+}
diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp
new file mode 100644
index 000000000000..59b62b8d37c3
--- /dev/null
+++ b/lib/MC/MCWasmStreamer.cpp
@@ -0,0 +1,216 @@
+//===- lib/MC/MCWasmStreamer.cpp - Wasm Object Output ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits Wasm .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCWasmStreamer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCWasmStreamer::~MCWasmStreamer() {}
+
+void MCWasmStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) {
+ flushPendingLabels(DF, DF->getContents().size());
+
+ for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) {
+ EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() +
+ DF->getContents().size());
+ DF->getFixups().push_back(EF->getFixups()[i]);
+ }
+ DF->setHasInstructions(true);
+ DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
+}
+
+void MCWasmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ // Let the target do whatever target specific stuff it needs to do.
+ getAssembler().getBackend().handleAssemblerFlag(Flag);
+
+ // Do any generic stuff we need to do.
+ llvm_unreachable("invalid assembler flag!");
+}
+
+void MCWasmStreamer::ChangeSection(MCSection *Section,
+ const MCExpr *Subsection) {
+ MCAssembler &Asm = getAssembler();
+ auto *SectionWasm = static_cast<const MCSectionWasm *>(Section);
+ const MCSymbol *Grp = SectionWasm->getGroup();
+ if (Grp)
+ Asm.registerSymbol(*Grp);
+
+ this->MCObjectStreamer::ChangeSection(Section, Subsection);
+}
+
+void MCWasmStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ getAssembler().registerSymbol(*Symbol);
+ const MCExpr *Value = MCSymbolRefExpr::create(
+ Symbol, MCSymbolRefExpr::VK_WEAKREF, getContext());
+ Alias->setVariableValue(Value);
+}
+
+bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
+ assert(Attribute != MCSA_IndirectSymbol && "indirect symbols not supported");
+
+ auto *Symbol = cast<MCSymbolWasm>(S);
+
+ // Adding a symbol attribute always introduces the symbol, note that an
+ // important side effect of calling registerSymbol here is to register
+ // the symbol with the assembler.
+ getAssembler().registerSymbol(*Symbol);
+
+ switch (Attribute) {
+ case MCSA_LazyReference:
+ case MCSA_Reference:
+ case MCSA_SymbolResolver:
+ case MCSA_PrivateExtern:
+ case MCSA_WeakDefinition:
+ case MCSA_WeakDefAutoPrivate:
+ case MCSA_Invalid:
+ case MCSA_IndirectSymbol:
+ return false;
+ case MCSA_Global:
+ Symbol->setExternal(true);
+ break;
+ case MCSA_ELF_TypeFunction:
+ Symbol->setIsFunction(true);
+ break;
+ case MCSA_ELF_TypeObject:
+ Symbol->setIsFunction(false);
+ break;
+ default:
+ // unrecognized directive
+ return false;
+ }
+
+ return true;
+}
+
+void MCWasmStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
+ unsigned ByteAlignment) {
+ llvm_unreachable("Common symbols are not yet implemented for Wasm");
+}
+
+void MCWasmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ cast<MCSymbolWasm>(Symbol)->setSize(Value);
+}
+
+void MCWasmStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
+ unsigned ByteAlignment) {
+ llvm_unreachable("Local common symbols are not yet implemented for Wasm");
+}
+
+void MCWasmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ SMLoc Loc) {
+ MCObjectStreamer::EmitValueImpl(Value, Size, Loc);
+}
+
+void MCWasmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ MCObjectStreamer::EmitValueToAlignment(ByteAlignment, Value, ValueSize,
+ MaxBytesToEmit);
+}
+
+void MCWasmStreamer::EmitIdent(StringRef IdentString) {
+ MCSection *Comment = getAssembler().getContext().getWasmSection(
+ ".comment", 0, 0);
+ PushSection();
+ SwitchSection(Comment);
+ if (!SeenIdent) {
+ EmitIntValue(0, 1);
+ SeenIdent = true;
+ }
+ EmitBytes(IdentString);
+ EmitIntValue(0, 1);
+ PopSection();
+}
+
+void MCWasmStreamer::EmitInstToFragment(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ this->MCObjectStreamer::EmitInstToFragment(Inst, STI);
+}
+
+void MCWasmStreamer::EmitInstToData(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ MCAssembler &Assembler = getAssembler();
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+
+ // Append the encoded instruction to the current data fragment (or create a
+ // new such fragment if the current fragment is not a data fragment).
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixups[i]);
+ }
+ DF->setHasInstructions(true);
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCWasmStreamer::FinishImpl() {
+ EmitFrames(nullptr);
+
+ this->MCObjectStreamer::FinishImpl();
+}
+
+MCStreamer *llvm::createWasmStreamer(MCContext &Context, MCAsmBackend &MAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCWasmStreamer *S = new MCWasmStreamer(Context, MAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
+
+void MCWasmStreamer::EmitThumbFunc(MCSymbol *Func) {
+ llvm_unreachable("Generic Wasm doesn't support this directive");
+}
+
+void MCWasmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ llvm_unreachable("Wasm doesn't support this directive");
+}
+
+void MCWasmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ llvm_unreachable("Wasm doesn't support this directive");
+}
+
+void MCWasmStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ llvm_unreachable("Wasm doesn't support this directive");
+}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index c4b35f5db9b4..d9ccf0dd661f 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -7,23 +7,36 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCFragment.h"
+#include "llvm/MC/MCMachObjectWriter.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "mc"
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index 1a501bcafc12..fbd7ba60bc90 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -1,4 +1,4 @@
-//===-- StringTableBuilder.cpp - String table building utility ------------===//
+//===- StringTableBuilder.cpp - String table building utility -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,18 +7,24 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <utility>
#include <vector>
using namespace llvm;
-StringTableBuilder::~StringTableBuilder() {}
+StringTableBuilder::~StringTableBuilder() = default;
void StringTableBuilder::initSize() {
// Account for leading bytes in table so that offsets returned from add are
@@ -48,7 +54,7 @@ void StringTableBuilder::write(raw_ostream &OS) const {
assert(isFinalized());
SmallString<0> Data;
Data.resize(getSize());
- write((uint8_t *)&Data[0]);
+ write((uint8_t *)Data.data());
OS << Data;
}
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 32f06f8a7d6a..51aaa4b0aa25 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -7,28 +7,31 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the SubtargetFeature interface.
+/// \file Implements the SubtargetFeature interface.
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
-#include <cctype>
-#include <cstdlib>
-using namespace llvm;
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <vector>
-//===----------------------------------------------------------------------===//
-// Static Helper Functions
-//===----------------------------------------------------------------------===//
+using namespace llvm;
-/// hasFlag - Determine if a feature has a flag; '+' or '-'
-///
+/// Determine if a feature has a flag; '+' or '-'
static inline bool hasFlag(StringRef Feature) {
assert(!Feature.empty() && "Empty string");
// Get first character
@@ -37,14 +40,12 @@ static inline bool hasFlag(StringRef Feature) {
return Ch == '+' || Ch =='-';
}
-/// StripFlag - Return string stripped of flag.
-///
+/// Return string stripped of flag.
static inline std::string StripFlag(StringRef Feature) {
return hasFlag(Feature) ? Feature.substr(1) : Feature;
}
-/// isEnabled - Return true if enable flag; '+'.
-///
+/// Return true if enable flag; '+'.
static inline bool isEnabled(StringRef Feature) {
assert(!Feature.empty() && "Empty string");
// Get first character
@@ -53,15 +54,13 @@ static inline bool isEnabled(StringRef Feature) {
return Ch == '+';
}
-/// Split - Splits a string of comma separated items in to a vector of strings.
-///
+/// Splits a string of comma separated items in to a vector of strings.
static void Split(std::vector<std::string> &V, StringRef S) {
SmallVector<StringRef, 3> Tmp;
S.split(Tmp, ',', -1, false /* KeepEmpty */);
V.assign(Tmp.begin(), Tmp.end());
}
-/// Adding features.
void SubtargetFeatures::AddFeature(StringRef String, bool Enable) {
// Don't add empty features.
if (!String.empty())
@@ -81,8 +80,7 @@ static const SubtargetFeatureKV *Find(StringRef S,
return F;
}
-/// getLongestEntryLength - Return the length of the longest entry in the table.
-///
+/// Return the length of the longest entry in the table.
static size_t getLongestEntryLength(ArrayRef<SubtargetFeatureKV> Table) {
size_t MaxLen = 0;
for (auto &I : Table)
@@ -91,7 +89,6 @@ static size_t getLongestEntryLength(ArrayRef<SubtargetFeatureKV> Table) {
}
/// Display help for feature choices.
-///
static void Help(ArrayRef<SubtargetFeatureKV> CPUTable,
ArrayRef<SubtargetFeatureKV> FeatTable) {
// Determine the length of the longest CPU and Feature entries.
@@ -114,58 +111,47 @@ static void Help(ArrayRef<SubtargetFeatureKV> CPUTable,
"For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
}
-//===----------------------------------------------------------------------===//
-// SubtargetFeatures Implementation
-//===----------------------------------------------------------------------===//
-
SubtargetFeatures::SubtargetFeatures(StringRef Initial) {
// Break up string into separate features
Split(Features, Initial);
}
-
std::string SubtargetFeatures::getString() const {
return join(Features.begin(), Features.end(), ",");
}
-/// SetImpliedBits - For each feature that is (transitively) implied by this
-/// feature, set it.
-///
+/// For each feature that is (transitively) implied by this feature, set it.
static
-void SetImpliedBits(FeatureBitset &Bits, const SubtargetFeatureKV *FeatureEntry,
+void SetImpliedBits(FeatureBitset &Bits, const SubtargetFeatureKV &FeatureEntry,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
- for (auto &FE : FeatureTable) {
- if (FeatureEntry->Value == FE.Value) continue;
+ for (const SubtargetFeatureKV &FE : FeatureTable) {
+ if (FeatureEntry.Value == FE.Value) continue;
- if ((FeatureEntry->Implies & FE.Value).any()) {
+ if ((FeatureEntry.Implies & FE.Value).any()) {
Bits |= FE.Value;
- SetImpliedBits(Bits, &FE, FeatureTable);
+ SetImpliedBits(Bits, FE, FeatureTable);
}
}
}
-/// ClearImpliedBits - For each feature that (transitively) implies this
-/// feature, clear it.
-///
+/// For each feature that (transitively) implies this feature, clear it.
static
-void ClearImpliedBits(FeatureBitset &Bits,
- const SubtargetFeatureKV *FeatureEntry,
+void ClearImpliedBits(FeatureBitset &Bits,
+ const SubtargetFeatureKV &FeatureEntry,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
- for (auto &FE : FeatureTable) {
- if (FeatureEntry->Value == FE.Value) continue;
+ for (const SubtargetFeatureKV &FE : FeatureTable) {
+ if (FeatureEntry.Value == FE.Value) continue;
- if ((FE.Implies & FeatureEntry->Value).any()) {
+ if ((FE.Implies & FeatureEntry.Value).any()) {
Bits &= ~FE.Value;
- ClearImpliedBits(Bits, &FE, FeatureTable);
+ ClearImpliedBits(Bits, FE, FeatureTable);
}
}
}
-/// ToggleFeature - Toggle a feature and update the feature bits.
void
SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
-
// Find feature in table.
const SubtargetFeatureKV *FeatureEntry =
Find(StripFlag(Feature), FeatureTable);
@@ -174,23 +160,21 @@ SubtargetFeatures::ToggleFeature(FeatureBitset &Bits, StringRef Feature,
if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
Bits &= ~FeatureEntry->Value;
// For each feature that implies this, clear it.
- ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
+ ClearImpliedBits(Bits, *FeatureEntry, FeatureTable);
} else {
Bits |= FeatureEntry->Value;
// For each feature that this implies, set it.
- SetImpliedBits(Bits, FeatureEntry, FeatureTable);
+ SetImpliedBits(Bits, *FeatureEntry, FeatureTable);
}
} else {
- errs() << "'" << Feature
- << "' is not a recognized feature for this target"
+ errs() << "'" << Feature << "' is not a recognized feature for this target"
<< " (ignoring feature)\n";
}
}
void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
-
assert(hasFlag(Feature));
// Find feature in table.
@@ -203,37 +187,30 @@ void SubtargetFeatures::ApplyFeatureFlag(FeatureBitset &Bits, StringRef Feature,
Bits |= FeatureEntry->Value;
// For each feature that this implies, set it.
- SetImpliedBits(Bits, FeatureEntry, FeatureTable);
+ SetImpliedBits(Bits, *FeatureEntry, FeatureTable);
} else {
Bits &= ~FeatureEntry->Value;
// For each feature that implies this, clear it.
- ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
+ ClearImpliedBits(Bits, *FeatureEntry, FeatureTable);
}
} else {
- errs() << "'" << Feature
- << "' is not a recognized feature for this target"
+ errs() << "'" << Feature << "' is not a recognized feature for this target"
<< " (ignoring feature)\n";
}
}
-
-/// getFeatureBits - Get feature bits a CPU.
-///
FeatureBitset
SubtargetFeatures::getFeatureBits(StringRef CPU,
ArrayRef<SubtargetFeatureKV> CPUTable,
ArrayRef<SubtargetFeatureKV> FeatureTable) {
-
if (CPUTable.empty() || FeatureTable.empty())
return FeatureBitset();
-#ifndef NDEBUG
assert(std::is_sorted(std::begin(CPUTable), std::end(CPUTable)) &&
"CPU table is not sorted");
assert(std::is_sorted(std::begin(FeatureTable), std::end(FeatureTable)) &&
"CPU features table is not sorted");
-#endif
// Resulting bits
FeatureBitset Bits;
@@ -253,17 +230,16 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
// Set the feature implied by this CPU feature, if any.
for (auto &FE : FeatureTable) {
if ((CPUEntry->Value & FE.Value).any())
- SetImpliedBits(Bits, &FE, FeatureTable);
+ SetImpliedBits(Bits, FE, FeatureTable);
}
} else {
- errs() << "'" << CPU
- << "' is not a recognized processor for this target"
+ errs() << "'" << CPU << "' is not a recognized processor for this target"
<< " (ignoring processor)\n";
}
}
// Iterate through each feature
- for (auto &Feature : Features) {
+ for (const std::string &Feature : Features) {
// Check for help
if (Feature == "+help")
Help(CPUTable, FeatureTable);
@@ -274,27 +250,22 @@ SubtargetFeatures::getFeatureBits(StringRef CPU,
return Bits;
}
-/// print - Print feature string.
-///
void SubtargetFeatures::print(raw_ostream &OS) const {
for (auto &F : Features)
OS << F << " ";
OS << "\n";
}
-/// dump - Dump feature info.
-///
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SubtargetFeatures::dump() const {
print(dbgs());
}
+#endif
-/// Adds the default features for the specified target triple.
-///
-/// FIXME: This is an inelegant way of specifying the features of a
-/// subtarget. It would be better if we could encode this information
-/// into the IR. See <rdar://5972456>.
-///
void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
+ // FIXME: This is an inelegant way of specifying the features of a
+ // subtarget. It would be better if we could encode this information
+ // into the IR. See <rdar://5972456>.
if (Triple.getVendor() == Triple::Apple) {
if (Triple.getArch() == Triple::ppc) {
// powerpc-apple-*
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
new file mode 100644
index 000000000000..159cc3b4def2
--- /dev/null
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -0,0 +1,1149 @@
+//===- lib/MC/WasmObjectWriter.cpp - Wasm File Writer ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Wasm object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWasmObjectWriter.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/Wasm.h"
+#include <vector>
+
+using namespace llvm;
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "reloc-info"
+
+namespace {
+// For patching purposes, we need to remember where each section starts, both
+// for patching up the section size field, and for patching up references to
+// locations within the section.
+struct SectionBookkeeping {
+ // Where the size of the section is written.
+ uint64_t SizeOffset;
+ // Where the contents of the section starts (after the header).
+ uint64_t ContentsOffset;
+};
+
+class WasmObjectWriter : public MCObjectWriter {
+ /// Helper struct for containing some precomputed information on symbols.
+ struct WasmSymbolData {
+ const MCSymbolWasm *Symbol;
+ StringRef Name;
+
+ // Support lexicographic sorting.
+ bool operator<(const WasmSymbolData &RHS) const { return Name < RHS.Name; }
+ };
+
+ /// The target specific Wasm writer instance.
+ std::unique_ptr<MCWasmObjectTargetWriter> TargetObjectWriter;
+
+ // Relocations for fixing up references in the code section.
+ std::vector<WasmRelocationEntry> CodeRelocations;
+
+ // Relocations for fixing up references in the data section.
+ std::vector<WasmRelocationEntry> DataRelocations;
+
+ // Fixups for call_indirect type indices.
+ std::vector<WasmRelocationEntry> TypeIndexFixups;
+
+ // Index values to use for fixing up call_indirect type indices.
+ std::vector<uint32_t> TypeIndexFixupTypes;
+
+ // TargetObjectWriter wrappers.
+ bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const {
+ return TargetObjectWriter->getRelocType(Ctx, Target, Fixup, IsPCRel);
+ }
+
+ void startSection(SectionBookkeeping &Section, unsigned SectionId,
+ const char *Name = nullptr);
+ void endSection(SectionBookkeeping &Section);
+
+public:
+ WasmObjectWriter(MCWasmObjectTargetWriter *MOTW, raw_pwrite_stream &OS)
+ : MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {}
+
+private:
+ void reset() override {
+ MCObjectWriter::reset();
+ }
+
+ ~WasmObjectWriter() override;
+
+ void writeHeader(const MCAssembler &Asm);
+
+ void writeValueType(wasm::ValType Ty) {
+ encodeSLEB128(int32_t(Ty), getStream());
+ }
+
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, bool &IsPCRel,
+ uint64_t &FixedValue) override;
+
+ void executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) override;
+
+ void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+};
+} // end anonymous namespace
+
+WasmObjectWriter::~WasmObjectWriter() {}
+
+// Return the padding size to write a 32-bit value into a 5-byte ULEB128.
+static unsigned PaddingFor5ByteULEB128(uint32_t X) {
+ return X == 0 ? 4 : (4u - (31u - countLeadingZeros(X)) / 7u);
+}
+
+// Return the padding size to write a 32-bit value into a 5-byte SLEB128.
+static unsigned PaddingFor5ByteSLEB128(int32_t X) {
+ return 5 - getSLEB128Size(X);
+}
+
+// Write out a section header and a patchable section size field.
+void WasmObjectWriter::startSection(SectionBookkeeping &Section,
+ unsigned SectionId,
+ const char *Name) {
+ assert((Name != nullptr) == (SectionId == wasm::WASM_SEC_CUSTOM) &&
+ "Only custom sections can have names");
+
+ encodeULEB128(SectionId, getStream());
+
+ Section.SizeOffset = getStream().tell();
+
+ // The section size. We don't know the size yet, so reserve enough space
+ // for any 32-bit value; we'll patch it later.
+ encodeULEB128(UINT32_MAX, getStream());
+
+ // The position where the section starts, for measuring its size.
+ Section.ContentsOffset = getStream().tell();
+
+ // Custom sections in wasm also have a string identifier.
+ if (SectionId == wasm::WASM_SEC_CUSTOM) {
+ encodeULEB128(strlen(Name), getStream());
+ writeBytes(Name);
+ }
+}
+
+// Now that the section is complete and we know how big it is, patch up the
+// section size field at the start of the section.
+void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
+ uint64_t Size = getStream().tell() - Section.ContentsOffset;
+ if (uint32_t(Size) != Size)
+ report_fatal_error("section size does not fit in a uint32_t");
+
+ unsigned Padding = PaddingFor5ByteULEB128(Size);
+
+ // Write the final section size to the payload_len field, which follows
+ // the section id byte.
+ uint8_t Buffer[16];
+ unsigned SizeLen = encodeULEB128(Size, Buffer, Padding);
+ assert(SizeLen == 5);
+ getStream().pwrite((char *)Buffer, SizeLen, Section.SizeOffset);
+}
+
+// Emit the Wasm header.
+void WasmObjectWriter::writeHeader(const MCAssembler &Asm) {
+ writeBytes(StringRef(wasm::WasmMagic, sizeof(wasm::WasmMagic)));
+ writeLE32(wasm::WasmVersion);
+}
+
+void WasmObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+}
+
+void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ bool &IsPCRel, uint64_t &FixedValue) {
+ MCSectionWasm &FixupSection = cast<MCSectionWasm>(*Fragment->getParent());
+ uint64_t C = Target.getConstant();
+ uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ MCContext &Ctx = Asm.getContext();
+
+ if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+ assert(RefB->getKind() == MCSymbolRefExpr::VK_None &&
+ "Should not have constructed this");
+
+ // Let A, B and C being the components of Target and R be the location of
+ // the fixup. If the fixup is not pcrel, we want to compute (A - B + C).
+ // If it is pcrel, we want to compute (A - B + C - R).
+
+ // In general, Wasm has no relocations for -B. It can only represent (A + C)
+ // or (A + C - R). If B = R + K and the relocation is not pcrel, we can
+ // replace B to implement it: (A - R - K + C)
+ if (IsPCRel) {
+ Ctx.reportError(
+ Fixup.getLoc(),
+ "No relocation available to represent this relative expression");
+ return;
+ }
+
+ const auto &SymB = cast<MCSymbolWasm>(RefB->getSymbol());
+
+ if (SymB.isUndefined()) {
+ Ctx.reportError(Fixup.getLoc(),
+ Twine("symbol '") + SymB.getName() +
+ "' can not be undefined in a subtraction expression");
+ return;
+ }
+
+ assert(!SymB.isAbsolute() && "Should have been folded");
+ const MCSection &SecB = SymB.getSection();
+ if (&SecB != &FixupSection) {
+ Ctx.reportError(Fixup.getLoc(),
+ "Cannot represent a difference across sections");
+ return;
+ }
+
+ uint64_t SymBOffset = Layout.getSymbolOffset(SymB);
+ uint64_t K = SymBOffset - FixupOffset;
+ IsPCRel = true;
+ C -= K;
+ }
+
+ // We either rejected the fixup or folded B into C at this point.
+ const MCSymbolRefExpr *RefA = Target.getSymA();
+ const auto *SymA = RefA ? cast<MCSymbolWasm>(&RefA->getSymbol()) : nullptr;
+
+ bool ViaWeakRef = false;
+ if (SymA && SymA->isVariable()) {
+ const MCExpr *Expr = SymA->getVariableValue();
+ if (const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) {
+ SymA = cast<MCSymbolWasm>(&Inner->getSymbol());
+ ViaWeakRef = true;
+ }
+ }
+ }
+
+ // Put any constant offset in an addend. Offsets can be negative, and
+ // LLVM expects wrapping, in contrast to wasm's immediates which can't
+ // be negative and don't wrap.
+ FixedValue = 0;
+
+ if (SymA) {
+ if (ViaWeakRef)
+ llvm_unreachable("weakref used in reloc not yet implemented");
+ else
+ SymA->setUsedInReloc();
+ }
+
+ if (RefA) {
+ if (RefA->getKind() == MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX) {
+ assert(C == 0);
+ WasmRelocationEntry Rec(FixupOffset, SymA, C,
+ wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB,
+ &FixupSection);
+ TypeIndexFixups.push_back(Rec);
+ return;
+ }
+ }
+
+ unsigned Type = getRelocType(Ctx, Target, Fixup, IsPCRel);
+
+ WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
+
+ if (FixupSection.hasInstructions())
+ CodeRelocations.push_back(Rec);
+ else
+ DataRelocations.push_back(Rec);
+}
+
+namespace {
+
+// The signature of a wasm function, in a struct capable of being used as a
+// DenseMap key.
+struct WasmFunctionType {
+ // Support empty and tombstone instances, needed by DenseMap.
+ enum { Plain, Empty, Tombstone } State;
+
+ // The return types of the function.
+ SmallVector<wasm::ValType, 1> Returns;
+
+ // The parameter types of the function.
+ SmallVector<wasm::ValType, 4> Params;
+
+ WasmFunctionType() : State(Plain) {}
+
+ bool operator==(const WasmFunctionType &Other) const {
+ return State == Other.State && Returns == Other.Returns &&
+ Params == Other.Params;
+ }
+};
+
+// Traits for using WasmFunctionType in a DenseMap.
+struct WasmFunctionTypeDenseMapInfo {
+ static WasmFunctionType getEmptyKey() {
+ WasmFunctionType FuncTy;
+ FuncTy.State = WasmFunctionType::Empty;
+ return FuncTy;
+ }
+ static WasmFunctionType getTombstoneKey() {
+ WasmFunctionType FuncTy;
+ FuncTy.State = WasmFunctionType::Tombstone;
+ return FuncTy;
+ }
+ static unsigned getHashValue(const WasmFunctionType &FuncTy) {
+ uintptr_t Value = FuncTy.State;
+ for (wasm::ValType Ret : FuncTy.Returns)
+ Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
+ for (wasm::ValType Param : FuncTy.Params)
+ Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
+ return Value;
+ }
+ static bool isEqual(const WasmFunctionType &LHS,
+ const WasmFunctionType &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// A wasm import to be written into the import section.
+struct WasmImport {
+ StringRef ModuleName;
+ StringRef FieldName;
+ unsigned Kind;
+ int32_t Type;
+};
+
+// A wasm function to be written into the function section.
+struct WasmFunction {
+ int32_t Type;
+ const MCSymbolWasm *Sym;
+};
+
+// A wasm export to be written into the export section.
+struct WasmExport {
+ StringRef FieldName;
+ unsigned Kind;
+ uint32_t Index;
+};
+
+// A wasm global to be written into the global section.
+struct WasmGlobal {
+ wasm::ValType Type;
+ bool IsMutable;
+ bool HasImport;
+ uint64_t InitialValue;
+ uint32_t ImportIndex;
+};
+
+} // end anonymous namespace
+
+// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
+// to allow patching.
+static void
+WritePatchableLEB(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+ uint8_t Buffer[5];
+ unsigned Padding = PaddingFor5ByteULEB128(X);
+ unsigned SizeLen = encodeULEB128(X, Buffer, Padding);
+ assert(SizeLen == 5);
+ Stream.pwrite((char *)Buffer, SizeLen, Offset);
+}
+
+// Write X as an signed LEB value at offset Offset in Stream, padded
+// to allow patching.
+static void
+WritePatchableSLEB(raw_pwrite_stream &Stream, int32_t X, uint64_t Offset) {
+ uint8_t Buffer[5];
+ unsigned Padding = PaddingFor5ByteSLEB128(X);
+ unsigned SizeLen = encodeSLEB128(X, Buffer, Padding);
+ assert(SizeLen == 5);
+ Stream.pwrite((char *)Buffer, SizeLen, Offset);
+}
+
+// Write X as a plain integer value at offset Offset in Stream.
+static void WriteI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) {
+ uint8_t Buffer[4];
+ support::endian::write32le(Buffer, X);
+ Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset);
+}
+
+// Compute a value to write into the code at the location covered
+// by RelEntry. This value isn't used by the static linker, since
+// we have addends; it just serves to make the code more readable
+// and to make standalone wasm modules directly usable.
+static uint32_t ProvisionalValue(const WasmRelocationEntry &RelEntry) {
+ const MCSymbolWasm *Sym = RelEntry.Symbol;
+
+ // For undefined symbols, use a hopefully invalid value.
+ if (!Sym->isDefined(false))
+ return UINT32_MAX;
+
+ MCSectionWasm &Section =
+ cast<MCSectionWasm>(RelEntry.Symbol->getSection(false));
+ uint64_t Address = Section.getSectionOffset() + RelEntry.Addend;
+
+ // Ignore overflow. LLVM allows address arithmetic to silently wrap.
+ uint32_t Value = Address;
+
+ return Value;
+}
+
+// Apply the portions of the relocation records that we can handle ourselves
+// directly.
+static void ApplyRelocations(
+ ArrayRef<WasmRelocationEntry> Relocations,
+ raw_pwrite_stream &Stream,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ uint64_t ContentsOffset)
+{
+ for (const WasmRelocationEntry &RelEntry : Relocations) {
+ uint64_t Offset = ContentsOffset +
+ RelEntry.FixupSection->getSectionOffset() +
+ RelEntry.Offset;
+ switch (RelEntry.Type) {
+ case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: {
+ uint32_t Index = SymbolIndices[RelEntry.Symbol];
+ assert(RelEntry.Addend == 0);
+
+ WritePatchableLEB(Stream, Index, Offset);
+ break;
+ }
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
+ uint32_t Index = SymbolIndices[RelEntry.Symbol];
+ assert(RelEntry.Addend == 0);
+
+ WritePatchableSLEB(Stream, Index, Offset);
+ break;
+ }
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: {
+ uint32_t Value = ProvisionalValue(RelEntry);
+
+ WritePatchableSLEB(Stream, Value, Offset);
+ break;
+ }
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB: {
+ uint32_t Value = ProvisionalValue(RelEntry);
+
+ WritePatchableLEB(Stream, Value, Offset);
+ break;
+ }
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: {
+ uint32_t Index = SymbolIndices[RelEntry.Symbol];
+ assert(RelEntry.Addend == 0);
+
+ WriteI32(Stream, Index, Offset);
+ break;
+ }
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: {
+ uint32_t Value = ProvisionalValue(RelEntry);
+
+ WriteI32(Stream, Value, Offset);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+// Write out the portions of the relocation records that the linker will
+// need to handle.
+static void WriteRelocations(
+ ArrayRef<WasmRelocationEntry> Relocations,
+ raw_pwrite_stream &Stream,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices)
+{
+ for (const WasmRelocationEntry RelEntry : Relocations) {
+ encodeULEB128(RelEntry.Type, Stream);
+
+ uint64_t Offset = RelEntry.Offset +
+ RelEntry.FixupSection->getSectionOffset();
+ uint32_t Index = SymbolIndices[RelEntry.Symbol];
+ int64_t Addend = RelEntry.Addend;
+
+ switch (RelEntry.Type) {
+ case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+ encodeULEB128(Offset, Stream);
+ encodeULEB128(Index, Stream);
+ assert(Addend == 0 && "addends not supported for functions");
+ break;
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
+ encodeULEB128(Offset, Stream);
+ encodeULEB128(Index, Stream);
+ encodeSLEB128(Addend, Stream);
+ break;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+ }
+}
+
+// Write out the the type relocation records that the linker will
+// need to handle.
+static void WriteTypeRelocations(
+ ArrayRef<WasmRelocationEntry> TypeIndexFixups,
+ ArrayRef<uint32_t> TypeIndexFixupTypes,
+ raw_pwrite_stream &Stream)
+{
+ for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
+ const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
+ uint32_t Type = TypeIndexFixupTypes[i];
+
+ assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
+ assert(Fixup.Addend == 0);
+
+ uint64_t Offset = Fixup.Offset +
+ Fixup.FixupSection->getSectionOffset();
+
+ encodeULEB128(Fixup.Type, Stream);
+ encodeULEB128(Offset, Stream);
+ encodeULEB128(Type, Stream);
+ }
+}
+
+void WasmObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ MCContext &Ctx = Asm.getContext();
+ wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32;
+
+ // Collect information from the available symbols.
+ DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo>
+ FunctionTypeIndices;
+ SmallVector<WasmFunctionType, 4> FunctionTypes;
+ SmallVector<WasmFunction, 4> Functions;
+ SmallVector<uint32_t, 4> TableElems;
+ SmallVector<WasmGlobal, 4> Globals;
+ SmallVector<WasmImport, 4> Imports;
+ SmallVector<WasmExport, 4> Exports;
+ DenseMap<const MCSymbolWasm *, uint32_t> SymbolIndices;
+ SmallPtrSet<const MCSymbolWasm *, 4> IsAddressTaken;
+ unsigned NumFuncImports = 0;
+ unsigned NumGlobalImports = 0;
+ SmallVector<char, 0> DataBytes;
+ uint32_t StackPointerGlobal = 0;
+ bool HasStackPointer = false;
+
+ // Populate the IsAddressTaken set.
+ for (WasmRelocationEntry RelEntry : CodeRelocations) {
+ switch (RelEntry.Type) {
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
+ IsAddressTaken.insert(RelEntry.Symbol);
+ break;
+ default:
+ break;
+ }
+ }
+ for (WasmRelocationEntry RelEntry : DataRelocations) {
+ switch (RelEntry.Type) {
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
+ IsAddressTaken.insert(RelEntry.Symbol);
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Populate the Imports set.
+ for (const MCSymbol &S : Asm.symbols()) {
+ const auto &WS = static_cast<const MCSymbolWasm &>(S);
+ int32_t Type;
+
+ if (WS.isFunction()) {
+ // Prepare the function's type, if we haven't seen it yet.
+ WasmFunctionType F;
+ F.Returns = WS.getReturns();
+ F.Params = WS.getParams();
+ auto Pair =
+ FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
+ if (Pair.second)
+ FunctionTypes.push_back(F);
+
+ Type = Pair.first->second;
+ } else {
+ Type = int32_t(PtrType);
+ }
+
+ // If the symbol is not defined in this translation unit, import it.
+ if (!WS.isTemporary() && !WS.isDefined(/*SetUsed=*/false)) {
+ WasmImport Import;
+ Import.ModuleName = WS.getModuleName();
+ Import.FieldName = WS.getName();
+
+ if (WS.isFunction()) {
+ Import.Kind = wasm::WASM_EXTERNAL_FUNCTION;
+ Import.Type = Type;
+ SymbolIndices[&WS] = NumFuncImports;
+ ++NumFuncImports;
+ } else {
+ Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+ Import.Type = Type;
+ SymbolIndices[&WS] = NumGlobalImports;
+ ++NumGlobalImports;
+ }
+
+ Imports.push_back(Import);
+ }
+ }
+
+ // In the special .global_variables section, we've encoded global
+ // variables used by the function. Translate them into the Globals
+ // list.
+ MCSectionWasm *GlobalVars = Ctx.getWasmSection(".global_variables", 0, 0);
+ if (!GlobalVars->getFragmentList().empty()) {
+ if (GlobalVars->getFragmentList().size() != 1)
+ report_fatal_error("only one .global_variables fragment supported");
+ const MCFragment &Frag = *GlobalVars->begin();
+ if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
+ report_fatal_error("only data supported in .global_variables");
+ const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag);
+ if (!DataFrag.getFixups().empty())
+ report_fatal_error("fixups not supported in .global_variables");
+ const SmallVectorImpl<char> &Contents = DataFrag.getContents();
+ for (const uint8_t *p = (const uint8_t *)Contents.data(),
+ *end = (const uint8_t *)Contents.data() + Contents.size();
+ p != end; ) {
+ WasmGlobal G;
+ if (end - p < 3)
+ report_fatal_error("truncated global variable encoding");
+ G.Type = wasm::ValType(int8_t(*p++));
+ G.IsMutable = bool(*p++);
+ G.HasImport = bool(*p++);
+ if (G.HasImport) {
+ G.InitialValue = 0;
+
+ WasmImport Import;
+ Import.ModuleName = (const char *)p;
+ const uint8_t *nul = (const uint8_t *)memchr(p, '\0', end - p);
+ if (!nul)
+ report_fatal_error("global module name must be nul-terminated");
+ p = nul + 1;
+ nul = (const uint8_t *)memchr(p, '\0', end - p);
+ if (!nul)
+ report_fatal_error("global base name must be nul-terminated");
+ Import.FieldName = (const char *)p;
+ p = nul + 1;
+
+ Import.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+ Import.Type = int32_t(G.Type);
+
+ G.ImportIndex = NumGlobalImports;
+ ++NumGlobalImports;
+
+ Imports.push_back(Import);
+ } else {
+ unsigned n;
+ G.InitialValue = decodeSLEB128(p, &n);
+ G.ImportIndex = 0;
+ if ((ptrdiff_t)n > end - p)
+ report_fatal_error("global initial value must be valid SLEB128");
+ p += n;
+ }
+ Globals.push_back(G);
+ }
+ }
+
+ // In the special .stack_pointer section, we've encoded the stack pointer
+ // index.
+ MCSectionWasm *StackPtr = Ctx.getWasmSection(".stack_pointer", 0, 0);
+ if (!StackPtr->getFragmentList().empty()) {
+ if (StackPtr->getFragmentList().size() != 1)
+ report_fatal_error("only one .stack_pointer fragment supported");
+ const MCFragment &Frag = *StackPtr->begin();
+ if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
+ report_fatal_error("only data supported in .stack_pointer");
+ const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag);
+ if (!DataFrag.getFixups().empty())
+ report_fatal_error("fixups not supported in .stack_pointer");
+ const SmallVectorImpl<char> &Contents = DataFrag.getContents();
+ if (Contents.size() != 4)
+ report_fatal_error("only one entry supported in .stack_pointer");
+ HasStackPointer = true;
+ StackPointerGlobal = NumGlobalImports + *(const int32_t *)Contents.data();
+ }
+
+ // Handle defined symbols.
+ for (const MCSymbol &S : Asm.symbols()) {
+ // Ignore unnamed temporary symbols, which aren't ever exported, imported,
+ // or used in relocations.
+ if (S.isTemporary() && S.getName().empty())
+ continue;
+ const auto &WS = static_cast<const MCSymbolWasm &>(S);
+ unsigned Index;
+ if (WS.isFunction()) {
+ // Prepare the function's type, if we haven't seen it yet.
+ WasmFunctionType F;
+ F.Returns = WS.getReturns();
+ F.Params = WS.getParams();
+ auto Pair =
+ FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
+ if (Pair.second)
+ FunctionTypes.push_back(F);
+
+ int32_t Type = Pair.first->second;
+
+ if (WS.isDefined(/*SetUsed=*/false)) {
+ // A definition. Take the next available index.
+ Index = NumFuncImports + Functions.size();
+
+ // Prepare the function.
+ WasmFunction Func;
+ Func.Type = Type;
+ Func.Sym = &WS;
+ SymbolIndices[&WS] = Index;
+ Functions.push_back(Func);
+ } else {
+ // An import; the index was assigned above.
+ Index = SymbolIndices.find(&WS)->second;
+ }
+
+ // If needed, prepare the function to be called indirectly.
+ if (IsAddressTaken.count(&WS))
+ TableElems.push_back(Index);
+ } else {
+ // For now, ignore temporary non-function symbols.
+ if (S.isTemporary())
+ continue;
+
+ if (WS.getOffset() != 0)
+ report_fatal_error("data sections must contain one variable each");
+ if (!WS.getSize())
+ report_fatal_error("data symbols must have a size set with .size");
+
+ int64_t Size = 0;
+ if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+
+ if (WS.isDefined(false)) {
+ MCSectionWasm &DataSection =
+ static_cast<MCSectionWasm &>(WS.getSection());
+
+ if (uint64_t(Size) != Layout.getSectionFileSize(&DataSection))
+ report_fatal_error("data sections must contain at most one variable");
+
+ DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment()));
+
+ DataSection.setSectionOffset(DataBytes.size());
+
+ for (MCSection::iterator I = DataSection.begin(), E = DataSection.end();
+ I != E; ++I) {
+ const MCFragment &Frag = *I;
+ if (Frag.hasInstructions())
+ report_fatal_error("only data supported in data sections");
+
+ if (const MCAlignFragment *Align = dyn_cast<MCAlignFragment>(&Frag)) {
+ if (Align->getValueSize() != 1)
+ report_fatal_error("only byte values supported for alignment");
+ // If nops are requested, use zeros, as this is the data section.
+ uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue();
+ uint64_t Size = std::min<uint64_t>(alignTo(DataBytes.size(),
+ Align->getAlignment()),
+ DataBytes.size() +
+ Align->getMaxBytesToEmit());
+ DataBytes.resize(Size, Value);
+ } else if (const MCFillFragment *Fill =
+ dyn_cast<MCFillFragment>(&Frag)) {
+ DataBytes.insert(DataBytes.end(), Size, Fill->getValue());
+ } else {
+ const MCDataFragment &DataFrag = cast<MCDataFragment>(Frag);
+ const SmallVectorImpl<char> &Contents = DataFrag.getContents();
+
+ DataBytes.insert(DataBytes.end(), Contents.begin(), Contents.end());
+ }
+ }
+
+ // For each external global, prepare a corresponding wasm global
+ // holding its address.
+ if (WS.isExternal()) {
+ Index = NumGlobalImports + Globals.size();
+
+ WasmGlobal Global;
+ Global.Type = PtrType;
+ Global.IsMutable = false;
+ Global.HasImport = false;
+ Global.InitialValue = DataSection.getSectionOffset();
+ Global.ImportIndex = 0;
+ SymbolIndices[&WS] = Index;
+ Globals.push_back(Global);
+ }
+ }
+ }
+
+ // If the symbol is visible outside this translation unit, export it.
+ if (WS.isExternal()) {
+ assert(WS.isDefined(false));
+ WasmExport Export;
+ Export.FieldName = WS.getName();
+ Export.Index = Index;
+
+ if (WS.isFunction())
+ Export.Kind = wasm::WASM_EXTERNAL_FUNCTION;
+ else
+ Export.Kind = wasm::WASM_EXTERNAL_GLOBAL;
+
+ Exports.push_back(Export);
+ }
+ }
+
+ // Add types for indirect function calls.
+ for (const WasmRelocationEntry &Fixup : TypeIndexFixups) {
+ assert(Fixup.Addend == 0);
+ assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
+
+ WasmFunctionType F;
+ F.Returns = Fixup.Symbol->getReturns();
+ F.Params = Fixup.Symbol->getParams();
+ auto Pair =
+ FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size()));
+ if (Pair.second)
+ FunctionTypes.push_back(F);
+
+ TypeIndexFixupTypes.push_back(Pair.first->second);
+ }
+
+ // Write out the Wasm header.
+ writeHeader(Asm);
+
+ SectionBookkeeping Section;
+
+ // === Type Section =========================================================
+ if (!FunctionTypes.empty()) {
+ startSection(Section, wasm::WASM_SEC_TYPE);
+
+ encodeULEB128(FunctionTypes.size(), getStream());
+
+ for (WasmFunctionType &FuncTy : FunctionTypes) {
+ encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream());
+ encodeULEB128(FuncTy.Params.size(), getStream());
+ for (wasm::ValType Ty : FuncTy.Params)
+ writeValueType(Ty);
+ encodeULEB128(FuncTy.Returns.size(), getStream());
+ for (wasm::ValType Ty : FuncTy.Returns)
+ writeValueType(Ty);
+ }
+
+ endSection(Section);
+ }
+
+ // === Import Section ========================================================
+ if (!Imports.empty()) {
+ startSection(Section, wasm::WASM_SEC_IMPORT);
+
+ encodeULEB128(Imports.size(), getStream());
+ for (const WasmImport &Import : Imports) {
+ StringRef ModuleName = Import.ModuleName;
+ encodeULEB128(ModuleName.size(), getStream());
+ writeBytes(ModuleName);
+
+ StringRef FieldName = Import.FieldName;
+ encodeULEB128(FieldName.size(), getStream());
+ writeBytes(FieldName);
+
+ encodeULEB128(Import.Kind, getStream());
+
+ switch (Import.Kind) {
+ case wasm::WASM_EXTERNAL_FUNCTION:
+ encodeULEB128(Import.Type, getStream());
+ break;
+ case wasm::WASM_EXTERNAL_GLOBAL:
+ encodeSLEB128(int32_t(Import.Type), getStream());
+ encodeULEB128(0, getStream()); // mutability
+ break;
+ default:
+ llvm_unreachable("unsupported import kind");
+ }
+ }
+
+ endSection(Section);
+ }
+
+ // === Function Section ======================================================
+ if (!Functions.empty()) {
+ startSection(Section, wasm::WASM_SEC_FUNCTION);
+
+ encodeULEB128(Functions.size(), getStream());
+ for (const WasmFunction &Func : Functions)
+ encodeULEB128(Func.Type, getStream());
+
+ endSection(Section);
+ }
+
+ // === Table Section =========================================================
+ // For now, always emit the table section, since indirect calls are not
+ // valid without it. In the future, we could perhaps be more clever and omit
+ // it if there are no indirect calls.
+ startSection(Section, wasm::WASM_SEC_TABLE);
+
+ // The number of tables, fixed to 1 for now.
+ encodeULEB128(1, getStream());
+
+ encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream());
+
+ encodeULEB128(0, getStream()); // flags
+ encodeULEB128(TableElems.size(), getStream()); // initial
+
+ endSection(Section);
+
+ // === Memory Section ========================================================
+ // For now, always emit the memory section, since loads and stores are not
+ // valid without it. In the future, we could perhaps be more clever and omit
+ // it if there are no loads or stores.
+ startSection(Section, wasm::WASM_SEC_MEMORY);
+
+ encodeULEB128(1, getStream()); // number of memory spaces
+
+ encodeULEB128(0, getStream()); // flags
+ encodeULEB128(DataBytes.size(), getStream()); // initial
+
+ endSection(Section);
+
+ // === Global Section ========================================================
+ if (!Globals.empty()) {
+ startSection(Section, wasm::WASM_SEC_GLOBAL);
+
+ encodeULEB128(Globals.size(), getStream());
+ for (const WasmGlobal &Global : Globals) {
+ writeValueType(Global.Type);
+ write8(Global.IsMutable);
+
+ if (Global.HasImport) {
+ assert(Global.InitialValue == 0);
+ write8(wasm::WASM_OPCODE_GET_GLOBAL);
+ encodeULEB128(Global.ImportIndex, getStream());
+ } else {
+ assert(Global.ImportIndex == 0);
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(Global.InitialValue, getStream()); // offset
+ }
+ write8(wasm::WASM_OPCODE_END);
+ }
+
+ endSection(Section);
+ }
+
+ // === Export Section ========================================================
+ if (!Exports.empty()) {
+ startSection(Section, wasm::WASM_SEC_EXPORT);
+
+ encodeULEB128(Exports.size(), getStream());
+ for (const WasmExport &Export : Exports) {
+ encodeULEB128(Export.FieldName.size(), getStream());
+ writeBytes(Export.FieldName);
+
+ encodeSLEB128(Export.Kind, getStream());
+
+ encodeULEB128(Export.Index, getStream());
+ }
+
+ endSection(Section);
+ }
+
+#if 0 // TODO: Start Section
+ if (HaveStartFunction) {
+ // === Start Section =========================================================
+ startSection(Section, wasm::WASM_SEC_START);
+
+ encodeSLEB128(StartFunction, getStream());
+
+ endSection(Section);
+ }
+#endif
+
+ // === Elem Section ==========================================================
+ if (!TableElems.empty()) {
+ startSection(Section, wasm::WASM_SEC_ELEM);
+
+ encodeULEB128(1, getStream()); // number of "segments"
+ encodeULEB128(0, getStream()); // the table index
+
+ // init expr for starting offset
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(0, getStream());
+ write8(wasm::WASM_OPCODE_END);
+
+ encodeULEB128(TableElems.size(), getStream());
+ for (uint32_t Elem : TableElems)
+ encodeULEB128(Elem, getStream());
+
+ endSection(Section);
+ }
+
+ // === Code Section ==========================================================
+ if (!Functions.empty()) {
+ startSection(Section, wasm::WASM_SEC_CODE);
+
+ encodeULEB128(Functions.size(), getStream());
+
+ for (const WasmFunction &Func : Functions) {
+ MCSectionWasm &FuncSection =
+ static_cast<MCSectionWasm &>(Func.Sym->getSection());
+
+ if (Func.Sym->isVariable())
+ report_fatal_error("weak symbols not supported yet");
+
+ if (Func.Sym->getOffset() != 0)
+ report_fatal_error("function sections must contain one function each");
+
+ if (!Func.Sym->getSize())
+ report_fatal_error("function symbols must have a size set with .size");
+
+ int64_t Size = 0;
+ if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+
+ encodeULEB128(Size, getStream());
+
+ FuncSection.setSectionOffset(getStream().tell() -
+ Section.ContentsOffset);
+
+ Asm.writeSectionData(&FuncSection, Layout);
+ }
+
+ // Apply the type index fixups for call_indirect etc. instructions.
+ for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
+ uint32_t Type = TypeIndexFixupTypes[i];
+ unsigned Padding = PaddingFor5ByteULEB128(Type);
+
+ const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
+ assert(Fixup.Addend == 0);
+ assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
+ uint64_t Offset = Fixup.Offset +
+ Fixup.FixupSection->getSectionOffset();
+
+ uint8_t Buffer[16];
+ unsigned SizeLen = encodeULEB128(Type, Buffer, Padding);
+ assert(SizeLen == 5);
+ getStream().pwrite((char *)Buffer, SizeLen,
+ Section.ContentsOffset + Offset);
+ }
+
+ // Apply fixups.
+ ApplyRelocations(CodeRelocations, getStream(), SymbolIndices,
+ Section.ContentsOffset);
+
+ endSection(Section);
+ }
+
+ // === Data Section ==========================================================
+ if (!DataBytes.empty()) {
+ startSection(Section, wasm::WASM_SEC_DATA);
+
+ encodeULEB128(1, getStream()); // count
+ encodeULEB128(0, getStream()); // memory index
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(0, getStream()); // offset
+ write8(wasm::WASM_OPCODE_END);
+ encodeULEB128(DataBytes.size(), getStream()); // size
+ writeBytes(DataBytes); // data
+
+ // Apply fixups.
+ ApplyRelocations(DataRelocations, getStream(), SymbolIndices,
+ Section.ContentsOffset);
+
+ endSection(Section);
+ }
+
+ // === Name Section ==========================================================
+ uint32_t TotalFunctions = NumFuncImports + Functions.size();
+ if (TotalFunctions != 0) {
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "name");
+ SectionBookkeeping SubSection;
+ startSection(SubSection, wasm::WASM_NAMES_FUNCTION);
+
+ encodeULEB128(TotalFunctions, getStream());
+ uint32_t Index = 0;
+ for (const WasmImport &Import : Imports) {
+ if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
+ encodeULEB128(Index, getStream());
+ encodeULEB128(Import.FieldName.size(), getStream());
+ writeBytes(Import.FieldName);
+ ++Index;
+ }
+ }
+ for (const WasmFunction &Func : Functions) {
+ encodeULEB128(Index, getStream());
+ encodeULEB128(Func.Sym->getName().size(), getStream());
+ writeBytes(Func.Sym->getName());
+ ++Index;
+ }
+
+ endSection(SubSection);
+ endSection(Section);
+ }
+
+ // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ // for descriptions of the reloc sections.
+
+ // === Code Reloc Section ====================================================
+ if (!CodeRelocations.empty()) {
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE");
+
+ encodeULEB128(wasm::WASM_SEC_CODE, getStream());
+
+ encodeULEB128(CodeRelocations.size(), getStream());
+
+ WriteRelocations(CodeRelocations, getStream(), SymbolIndices);
+ WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
+
+ endSection(Section);
+ }
+
+ // === Data Reloc Section ====================================================
+ if (!DataRelocations.empty()) {
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA");
+
+ encodeULEB128(wasm::WASM_SEC_DATA, getStream());
+
+ encodeULEB128(DataRelocations.size(), getStream());
+
+ WriteRelocations(DataRelocations, getStream(), SymbolIndices);
+
+ endSection(Section);
+ }
+
+ // === Linking Metadata Section ==============================================
+ if (HasStackPointer) {
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
+
+ encodeULEB128(1, getStream()); // count
+
+ encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type
+ encodeULEB128(StackPointerGlobal, getStream()); // id
+
+ endSection(Section);
+ }
+
+ // TODO: Translate the .comment section to the output.
+
+ // TODO: Translate debug sections to the output.
+}
+
+MCObjectWriter *llvm::createWasmObjectWriter(MCWasmObjectTargetWriter *MOTW,
+ raw_pwrite_stream &OS) {
+ return new WasmObjectWriter(MOTW, OS);
+}
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index afc5c6a14d11..da8fe73f823b 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===//
+//===- llvm/MC/WinCOFFObjectWriter.cpp ------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,37 +11,49 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Config/config.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/COFF.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/JamCRC.h"
-#include <cstdio>
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
#include <ctime>
+#include <memory>
+#include <string>
+#include <vector>
using namespace llvm;
+using llvm::support::endian::write32le;
#define DEBUG_TYPE "WinCOFFObjectWriter"
namespace {
+
typedef SmallString<COFF::NameSize> name;
enum AuxiliaryType {
@@ -57,25 +69,24 @@ struct AuxSymbol {
COFF::Auxiliary Aux;
};
-class COFFSymbol;
class COFFSection;
class COFFSymbol {
public:
- COFF::symbol Data;
+ COFF::symbol Data = {};
typedef SmallVector<AuxSymbol, 1> AuxiliarySymbols;
name Name;
int Index;
AuxiliarySymbols Aux;
- COFFSymbol *Other;
- COFFSection *Section;
- int Relocations;
+ COFFSymbol *Other = nullptr;
+ COFFSection *Section = nullptr;
+ int Relocations = 0;
+ const MCSymbol *MC = nullptr;
- const MCSymbol *MC;
+ COFFSymbol(StringRef Name) : Name(Name) {}
- COFFSymbol(StringRef name);
void set_name_offset(uint32_t Offset);
int64_t getIndex() const { return Index; }
@@ -89,9 +100,10 @@ public:
// This class contains staging data for a COFF relocation entry.
struct COFFRelocation {
COFF::relocation Data;
- COFFSymbol *Symb;
+ COFFSymbol *Symb = nullptr;
+
+ COFFRelocation() = default;
- COFFRelocation() : Symb(nullptr) {}
static size_t size() { return COFF::RelocationSize; }
};
@@ -99,15 +111,15 @@ typedef std::vector<COFFRelocation> relocations;
class COFFSection {
public:
- COFF::section Header;
+ COFF::section Header = {};
std::string Name;
int Number;
- MCSectionCOFF const *MCSection;
- COFFSymbol *Symbol;
+ MCSectionCOFF const *MCSection = nullptr;
+ COFFSymbol *Symbol = nullptr;
relocations Relocations;
- COFFSection(StringRef name);
+ COFFSection(StringRef Name) : Name(Name) {}
};
class WinCOFFObjectWriter : public MCObjectWriter {
@@ -121,7 +133,7 @@ public:
std::unique_ptr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
// Root level file contents.
- COFF::header Header;
+ COFF::header Header = {};
sections Sections;
symbols Symbols;
StringTableBuilder Strings{StringTableBuilder::WinCOFF};
@@ -149,9 +161,6 @@ public:
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol *Symbol);
COFFSection *createSection(StringRef Name);
- template <typename object_t, typename list_t>
- object_t *createCOFFEntity(StringRef Name, list_t &List);
-
void defineSection(MCSectionCOFF const &Sec);
COFFSymbol *getLinkedSymbol(const MCSymbol &Symbol);
@@ -168,8 +177,12 @@ public:
void WriteFileHeader(const COFF::header &Header);
void WriteSymbol(const COFFSymbol &S);
void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
- void writeSectionHeader(const COFF::section &S);
+ void writeSectionHeaders();
void WriteRelocation(const COFF::relocation &R);
+ uint32_t writeSectionContents(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCSection &MCSec);
+ void writeSection(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const COFFSection &Sec, const MCSection &MCSec);
// MCObjectWriter interface implementation.
@@ -181,45 +194,29 @@ public:
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
- bool isWeak(const MCSymbol &Sym) const override;
-
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
uint64_t &FixedValue) override;
+ void createFileSymbols(MCAssembler &Asm);
+ void assignSectionNumbers();
+ void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout);
+
void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
-}
-static inline void write_uint32_le(void *Data, uint32_t Value) {
- support::endian::write<uint32_t, support::little, support::unaligned>(Data,
- Value);
-}
+} // end anonymous namespace
//------------------------------------------------------------------------------
// Symbol class implementation
-COFFSymbol::COFFSymbol(StringRef name)
- : Name(name.begin(), name.end()), Other(nullptr), Section(nullptr),
- Relocations(0), MC(nullptr) {
- memset(&Data, 0, sizeof(Data));
-}
-
// In the case that the name does not fit within 8 bytes, the offset
// into the string table is stored in the last 4 bytes instead, leaving
// the first 4 bytes as 0.
void COFFSymbol::set_name_offset(uint32_t Offset) {
- write_uint32_le(Data.Name + 0, 0);
- write_uint32_le(Data.Name + 4, Offset);
-}
-
-//------------------------------------------------------------------------------
-// Section class implementation
-
-COFFSection::COFFSection(StringRef name)
- : Name(name), MCSection(nullptr), Symbol(nullptr) {
- memset(&Header, 0, sizeof(Header));
+ write32le(Data.Name + 0, 0);
+ write32le(Data.Name + 4, Offset);
}
//------------------------------------------------------------------------------
@@ -228,115 +225,92 @@ COFFSection::COFFSection(StringRef name)
WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
raw_pwrite_stream &OS)
: MCObjectWriter(OS, true), TargetObjectWriter(MOTW) {
- memset(&Header, 0, sizeof(Header));
-
Header.Machine = TargetObjectWriter->getMachine();
}
COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
- return createCOFFEntity<COFFSymbol>(Name, Symbols);
+ Symbols.push_back(make_unique<COFFSymbol>(Name));
+ return Symbols.back().get();
}
COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol *Symbol) {
- symbol_map::iterator i = SymbolMap.find(Symbol);
- if (i != SymbolMap.end())
- return i->second;
- COFFSymbol *RetSymbol =
- createCOFFEntity<COFFSymbol>(Symbol->getName(), Symbols);
- SymbolMap[Symbol] = RetSymbol;
- return RetSymbol;
+ COFFSymbol *&Ret = SymbolMap[Symbol];
+ if (!Ret)
+ Ret = createSymbol(Symbol->getName());
+ return Ret;
}
COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
- return createCOFFEntity<COFFSection>(Name, Sections);
+ Sections.emplace_back(make_unique<COFFSection>(Name));
+ return Sections.back().get();
}
-/// A template used to lookup or create a symbol/section, and initialize it if
-/// needed.
-template <typename object_t, typename list_t>
-object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name, list_t &List) {
- List.push_back(make_unique<object_t>(Name));
-
- return List.back().get();
-}
-
-/// This function takes a section data object from the assembler
-/// and creates the associated COFF section staging object.
-void WinCOFFObjectWriter::defineSection(MCSectionCOFF const &Sec) {
- COFFSection *coff_section = createSection(Sec.getSectionName());
- COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
- if (Sec.getSelection() != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
- if (const MCSymbol *S = Sec.getCOMDATSymbol()) {
- COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(S);
- if (COMDATSymbol->Section)
- report_fatal_error("two sections have the same comdat");
- COMDATSymbol->Section = coff_section;
- }
- }
-
- coff_section->Symbol = coff_symbol;
- coff_symbol->Section = coff_section;
- coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
-
- // In this case the auxiliary symbol is a Section Definition.
- coff_symbol->Aux.resize(1);
- memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
- coff_symbol->Aux[0].AuxType = ATSectionDefinition;
- coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
-
- coff_section->Header.Characteristics = Sec.getCharacteristics();
-
- uint32_t &Characteristics = coff_section->Header.Characteristics;
+static uint32_t getAlignment(const MCSectionCOFF &Sec) {
switch (Sec.getAlignment()) {
case 1:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_1BYTES;
case 2:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_2BYTES;
case 4:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_4BYTES;
case 8:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_8BYTES;
case 16:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_16BYTES;
case 32:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_32BYTES;
case 64:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_64BYTES;
case 128:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_128BYTES;
case 256:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_256BYTES;
case 512:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_512BYTES;
case 1024:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_1024BYTES;
case 2048:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_2048BYTES;
case 4096:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES;
- break;
+ return COFF::IMAGE_SCN_ALIGN_4096BYTES;
case 8192:
- Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES;
- break;
- default:
- llvm_unreachable("unsupported section alignment");
+ return COFF::IMAGE_SCN_ALIGN_8192BYTES;
+ }
+ llvm_unreachable("unsupported section alignment");
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF section staging object.
+void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec) {
+ COFFSection *Section = createSection(MCSec.getSectionName());
+ COFFSymbol *Symbol = createSymbol(MCSec.getSectionName());
+ Section->Symbol = Symbol;
+ Symbol->Section = Section;
+ Symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
+
+ // Create a COMDAT symbol if needed.
+ if (MCSec.getSelection() != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+ if (const MCSymbol *S = MCSec.getCOMDATSymbol()) {
+ COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(S);
+ if (COMDATSymbol->Section)
+ report_fatal_error("two sections have the same comdat");
+ COMDATSymbol->Section = Section;
+ }
}
+ // In this case the auxiliary symbol is a Section Definition.
+ Symbol->Aux.resize(1);
+ Symbol->Aux[0] = {};
+ Symbol->Aux[0].AuxType = ATSectionDefinition;
+ Symbol->Aux[0].Aux.SectionDefinition.Selection = MCSec.getSelection();
+
+ // Set section alignment.
+ Section->Header.Characteristics = MCSec.getCharacteristics();
+ Section->Header.Characteristics |= getAlignment(MCSec);
+
// Bind internal COFF section to MC section.
- coff_section->MCSection = &Sec;
- SectionMap[&Sec] = coff_section;
+ Section->MCSection = &MCSec;
+ SectionMap[&MCSec] = Section;
}
static uint64_t getSymbolValue(const MCSymbol &Symbol,
@@ -368,25 +342,25 @@ COFFSymbol *WinCOFFObjectWriter::getLinkedSymbol(const MCSymbol &Symbol) {
/// This function takes a symbol data object from the assembler
/// and creates the associated COFF symbol staging object.
-void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
+void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
MCAssembler &Assembler,
const MCAsmLayout &Layout) {
- COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
- const MCSymbol *Base = Layout.getBaseSymbol(Symbol);
+ COFFSymbol *Sym = GetOrCreateCOFFSymbol(&MCSym);
+ const MCSymbol *Base = Layout.getBaseSymbol(MCSym);
COFFSection *Sec = nullptr;
if (Base && Base->getFragment()) {
Sec = SectionMap[Base->getFragment()->getParent()];
- if (coff_symbol->Section && coff_symbol->Section != Sec)
+ if (Sym->Section && Sym->Section != Sec)
report_fatal_error("conflicting sections for symbol");
}
COFFSymbol *Local = nullptr;
- if (cast<MCSymbolCOFF>(Symbol).isWeakExternal()) {
- coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+ if (cast<MCSymbolCOFF>(MCSym).isWeakExternal()) {
+ Sym->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
- COFFSymbol *WeakDefault = getLinkedSymbol(Symbol);
+ COFFSymbol *WeakDefault = getLinkedSymbol(MCSym);
if (!WeakDefault) {
- std::string WeakName = (".weak." + Symbol.getName() + ".default").str();
+ std::string WeakName = (".weak." + MCSym.getName() + ".default").str();
WeakDefault = createSymbol(WeakName);
if (!Sec)
WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
@@ -395,41 +369,41 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
Local = WeakDefault;
}
- coff_symbol->Other = WeakDefault;
+ Sym->Other = WeakDefault;
// Setup the Weak External auxiliary symbol.
- coff_symbol->Aux.resize(1);
- memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
- coff_symbol->Aux[0].AuxType = ATWeakExternal;
- coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
- coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+ Sym->Aux.resize(1);
+ memset(&Sym->Aux[0], 0, sizeof(Sym->Aux[0]));
+ Sym->Aux[0].AuxType = ATWeakExternal;
+ Sym->Aux[0].Aux.WeakExternal.TagIndex = 0;
+ Sym->Aux[0].Aux.WeakExternal.Characteristics =
COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
} else {
if (!Base)
- coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+ Sym->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
else
- coff_symbol->Section = Sec;
- Local = coff_symbol;
+ Sym->Section = Sec;
+ Local = Sym;
}
if (Local) {
- Local->Data.Value = getSymbolValue(Symbol, Layout);
+ Local->Data.Value = getSymbolValue(MCSym, Layout);
- const MCSymbolCOFF &SymbolCOFF = cast<MCSymbolCOFF>(Symbol);
+ const MCSymbolCOFF &SymbolCOFF = cast<MCSymbolCOFF>(MCSym);
Local->Data.Type = SymbolCOFF.getType();
Local->Data.StorageClass = SymbolCOFF.getClass();
// If no storage class was specified in the streamer, define it here.
if (Local->Data.StorageClass == COFF::IMAGE_SYM_CLASS_NULL) {
- bool IsExternal = Symbol.isExternal() ||
- (!Symbol.getFragment() && !Symbol.isVariable());
+ bool IsExternal = MCSym.isExternal() ||
+ (!MCSym.getFragment() && !MCSym.isVariable());
Local->Data.StorageClass = IsExternal ? COFF::IMAGE_SYM_CLASS_EXTERNAL
: COFF::IMAGE_SYM_CLASS_STATIC;
}
}
- coff_symbol->MC = &Symbol;
+ Sym->MC = &MCSym;
}
// Maximum offsets for different string table entry encodings.
@@ -459,24 +433,25 @@ static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
}
void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
- if (S.Name.size() > COFF::NameSize) {
- uint64_t StringTableEntry = Strings.getOffset(S.Name);
-
- if (StringTableEntry <= Max7DecimalOffset) {
- SmallVector<char, COFF::NameSize> Buffer;
- Twine('/').concat(Twine(StringTableEntry)).toVector(Buffer);
- assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
-
- std::memcpy(S.Header.Name, Buffer.data(), Buffer.size());
- } else if (StringTableEntry <= MaxBase64Offset) {
- // Starting with 10,000,000, offsets are encoded as base64.
- encodeBase64StringEntry(S.Header.Name, StringTableEntry);
- } else {
- report_fatal_error("COFF string table is greater than 64 GB.");
- }
- } else {
+ if (S.Name.size() <= COFF::NameSize) {
std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+ return;
}
+
+ uint64_t StringTableEntry = Strings.getOffset(S.Name);
+ if (StringTableEntry <= Max7DecimalOffset) {
+ SmallVector<char, COFF::NameSize> Buffer;
+ Twine('/').concat(Twine(StringTableEntry)).toVector(Buffer);
+ assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
+ std::memcpy(S.Header.Name, Buffer.data(), Buffer.size());
+ return;
+ }
+ if (StringTableEntry <= MaxBase64Offset) {
+ // Starting with 10,000,000, offsets are encoded as base64.
+ encodeBase64StringEntry(S.Header.Name, StringTableEntry);
+ return;
+ }
+ report_fatal_error("COFF string table is greater than 64 GB.");
}
void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
@@ -583,18 +558,37 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
}
}
-void WinCOFFObjectWriter::writeSectionHeader(const COFF::section &S) {
- writeBytes(StringRef(S.Name, COFF::NameSize));
-
- writeLE32(S.VirtualSize);
- writeLE32(S.VirtualAddress);
- writeLE32(S.SizeOfRawData);
- writeLE32(S.PointerToRawData);
- writeLE32(S.PointerToRelocations);
- writeLE32(S.PointerToLineNumbers);
- writeLE16(S.NumberOfRelocations);
- writeLE16(S.NumberOfLineNumbers);
- writeLE32(S.Characteristics);
+// Write the section header.
+void WinCOFFObjectWriter::writeSectionHeaders() {
+ // Section numbers must be monotonically increasing in the section
+ // header, but our Sections array is not sorted by section number,
+ // so make a copy of Sections and sort it.
+ std::vector<COFFSection *> Arr;
+ for (auto &Section : Sections)
+ Arr.push_back(Section.get());
+ std::sort(Arr.begin(), Arr.end(),
+ [](const COFFSection *A, const COFFSection *B) {
+ return A->Number < B->Number;
+ });
+
+ for (auto &Section : Arr) {
+ if (Section->Number == -1)
+ continue;
+
+ COFF::section &S = Section->Header;
+ if (Section->Relocations.size() >= 0xffff)
+ S.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+ writeBytes(StringRef(S.Name, COFF::NameSize));
+ writeLE32(S.VirtualSize);
+ writeLE32(S.VirtualAddress);
+ writeLE32(S.SizeOfRawData);
+ writeLE32(S.PointerToRawData);
+ writeLE32(S.PointerToRelocations);
+ writeLE32(S.PointerToLineNumbers);
+ writeLE16(S.NumberOfRelocations);
+ writeLE16(S.NumberOfLineNumbers);
+ writeLE32(S.Characteristics);
+ }
}
void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
@@ -603,6 +597,87 @@ void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
writeLE16(R.Type);
}
+// Write MCSec's contents. What this function does is essentially
+// "Asm.writeSectionData(&MCSec, Layout)", but it's a bit complicated
+// because it needs to compute a CRC.
+uint32_t WinCOFFObjectWriter::writeSectionContents(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSection &MCSec) {
+ // Save the contents of the section to a temporary buffer, we need this
+ // to CRC the data before we dump it into the object file.
+ SmallVector<char, 128> Buf;
+ raw_svector_ostream VecOS(Buf);
+ raw_pwrite_stream &OldStream = getStream();
+
+ // Redirect the output stream to our buffer and fill our buffer with
+ // the section data.
+ setStream(VecOS);
+ Asm.writeSectionData(&MCSec, Layout);
+
+ // Reset the stream back to what it was before.
+ setStream(OldStream);
+
+ // Write the section contents to the object file.
+ getStream() << Buf;
+
+ // Calculate our CRC with an initial value of '0', this is not how
+ // JamCRC is specified but it aligns with the expected output.
+ JamCRC JC(/*Init=*/0);
+ JC.update(Buf);
+ return JC.getCRC();
+}
+
+void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const COFFSection &Sec,
+ const MCSection &MCSec) {
+ if (Sec.Number == -1)
+ return;
+
+ // Write the section contents.
+ if (Sec.Header.PointerToRawData != 0) {
+ assert(getStream().tell() <= Sec.Header.PointerToRawData &&
+ "Section::PointerToRawData is insane!");
+
+ unsigned PaddingSize = Sec.Header.PointerToRawData - getStream().tell();
+ assert(PaddingSize < 4 &&
+ "Should only need at most three bytes of padding!");
+ WriteZeros(PaddingSize);
+
+ uint32_t CRC = writeSectionContents(Asm, Layout, MCSec);
+
+ // Update the section definition auxiliary symbol to record the CRC.
+ COFFSection *Sec = SectionMap[&MCSec];
+ COFFSymbol::AuxiliarySymbols &AuxSyms = Sec->Symbol->Aux;
+ assert(AuxSyms.size() == 1 && AuxSyms[0].AuxType == ATSectionDefinition);
+ AuxSymbol &SecDef = AuxSyms[0];
+ SecDef.Aux.SectionDefinition.CheckSum = CRC;
+ }
+
+ // Write relocations for this section.
+ if (Sec.Relocations.empty()) {
+ assert(Sec.Header.PointerToRelocations == 0 &&
+ "Section::PointerToRelocations is insane!");
+ return;
+ }
+
+ assert(getStream().tell() == Sec.Header.PointerToRelocations &&
+ "Section::PointerToRelocations is insane!");
+
+ if (Sec.Relocations.size() >= 0xffff) {
+ // In case of overflow, write actual relocation count as first
+ // relocation. Including the synthetic reloc itself (+ 1).
+ COFF::relocation R;
+ R.VirtualAddress = Sec.Relocations.size() + 1;
+ R.SymbolTableIndex = 0;
+ R.Type = 0;
+ WriteRelocation(R);
+ }
+
+ for (const auto &Relocation : Sec.Relocations)
+ WriteRelocation(Relocation.Data);
+}
+
////////////////////////////////////////////////////////////////////////////////
// MCObjectWriter interface implementations
@@ -632,23 +707,6 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
InSet, IsPCRel);
}
-bool WinCOFFObjectWriter::isWeak(const MCSymbol &Sym) const {
- if (!Sym.isExternal())
- return false;
-
- if (!Sym.isInSection())
- return false;
-
- const auto &Sec = cast<MCSectionCOFF>(Sym.getSection());
- if (!Sec.getCOMDATSymbol())
- return false;
-
- // It looks like for COFF it is invalid to replace a reference to a global
- // in a comdat with a reference to a local.
- // FIXME: Add a specification reference if available.
- return true;
-}
-
void WinCOFFObjectWriter::recordRelocation(
MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
@@ -668,13 +726,13 @@ void WinCOFFObjectWriter::recordRelocation(
return;
}
- MCSection *Section = Fragment->getParent();
+ MCSection *MCSec = Fragment->getParent();
// Mark this symbol as requiring an entry in the symbol table.
- assert(SectionMap.find(Section) != SectionMap.end() &&
+ assert(SectionMap.find(MCSec) != SectionMap.end() &&
"Section must already have been defined in executePostLayoutBinding!");
- COFFSection *coff_section = SectionMap[Section];
+ COFFSection *Sec = SectionMap[MCSec];
const MCSymbolRefExpr *SymB = Target.getSymB();
bool CrossSection = false;
@@ -796,46 +854,31 @@ void WinCOFFObjectWriter::recordRelocation(
FixedValue = 0;
if (TargetObjectWriter->recordRelocation(Fixup))
- coff_section->Relocations.push_back(Reloc);
+ Sec->Relocations.push_back(Reloc);
}
-void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- size_t SectionsSize = Sections.size();
- if (SectionsSize > static_cast<size_t>(INT32_MAX))
- report_fatal_error(
- "PE COFF object files can't have more than 2147483647 sections");
-
- // Assign symbol and section indexes and offsets.
- int32_t NumberOfSections = static_cast<int32_t>(SectionsSize);
-
- UseBigObj = NumberOfSections > COFF::MaxNumberOfSections16;
-
- // Assign section numbers.
- size_t Number = 1;
- for (const auto &Section : Sections) {
- Section->Number = Number;
- Section->Symbol->Data.SectionNumber = Number;
- Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Number;
- ++Number;
- }
-
- Header.NumberOfSections = NumberOfSections;
- Header.NumberOfSymbols = 0;
+static std::time_t getTime() {
+ std::time_t Now = time(nullptr);
+ if (Now < 0 || !isUInt<32>(Now))
+ return UINT32_MAX;
+ return Now;
+}
+// Create .file symbols.
+void WinCOFFObjectWriter::createFileSymbols(MCAssembler &Asm) {
for (const std::string &Name : Asm.getFileNames()) {
// round up to calculate the number of auxiliary symbols required
unsigned SymbolSize = UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size;
unsigned Count = (Name.size() + SymbolSize - 1) / SymbolSize;
- COFFSymbol *file = createSymbol(".file");
- file->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG;
- file->Data.StorageClass = COFF::IMAGE_SYM_CLASS_FILE;
- file->Aux.resize(Count);
+ COFFSymbol *File = createSymbol(".file");
+ File->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG;
+ File->Data.StorageClass = COFF::IMAGE_SYM_CLASS_FILE;
+ File->Aux.resize(Count);
unsigned Offset = 0;
unsigned Length = Name.size();
- for (auto &Aux : file->Aux) {
+ for (auto &Aux : File->Aux) {
Aux.AuxType = ATFile;
if (Length > SymbolSize) {
@@ -850,6 +893,109 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
Offset += SymbolSize;
}
}
+}
+
+static bool isAssociative(const COFFSection &Section) {
+ return Section.Symbol->Aux[0].Aux.SectionDefinition.Selection ==
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
+}
+
+void WinCOFFObjectWriter::assignSectionNumbers() {
+ size_t I = 1;
+ auto Assign = [&](COFFSection &Section) {
+ Section.Number = I;
+ Section.Symbol->Data.SectionNumber = I;
+ Section.Symbol->Aux[0].Aux.SectionDefinition.Number = I;
+ ++I;
+ };
+
+ // Although it is not explicitly requested by the Microsoft COFF spec,
+ // we should avoid emitting forward associative section references,
+ // because MSVC link.exe as of 2017 cannot handle that.
+ for (const std::unique_ptr<COFFSection> &Section : Sections)
+ if (!isAssociative(*Section))
+ Assign(*Section);
+ for (const std::unique_ptr<COFFSection> &Section : Sections)
+ if (isAssociative(*Section))
+ Assign(*Section);
+}
+
+// Assign file offsets to COFF object file structures.
+void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ unsigned Offset = getInitialOffset();
+
+ Offset += UseBigObj ? COFF::Header32Size : COFF::Header16Size;
+ Offset += COFF::SectionSize * Header.NumberOfSections;
+
+ for (const auto &Section : Asm) {
+ COFFSection *Sec = SectionMap[&Section];
+
+ if (Sec->Number == -1)
+ continue;
+
+ Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
+
+ if (IsPhysicalSection(Sec)) {
+ // Align the section data to a four byte boundary.
+ Offset = alignTo(Offset, 4);
+ Sec->Header.PointerToRawData = Offset;
+
+ Offset += Sec->Header.SizeOfRawData;
+ }
+
+ if (!Sec->Relocations.empty()) {
+ bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
+
+ if (RelocationsOverflow) {
+ // Signal overflow by setting NumberOfRelocations to max value. Actual
+ // size is found in reloc #0. Microsoft tools understand this.
+ Sec->Header.NumberOfRelocations = 0xffff;
+ } else {
+ Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ }
+ Sec->Header.PointerToRelocations = Offset;
+
+ if (RelocationsOverflow) {
+ // Reloc #0 will contain actual count, so make room for it.
+ Offset += COFF::RelocationSize;
+ }
+
+ Offset += COFF::RelocationSize * Sec->Relocations.size();
+
+ for (auto &Relocation : Sec->Relocations) {
+ assert(Relocation.Symb->getIndex() != -1);
+ Relocation.Data.SymbolTableIndex = Relocation.Symb->getIndex();
+ }
+ }
+
+ assert(Sec->Symbol->Aux.size() == 1 &&
+ "Section's symbol must have one aux!");
+ AuxSymbol &Aux = Sec->Symbol->Aux[0];
+ assert(Aux.AuxType == ATSectionDefinition &&
+ "Section's symbol's aux symbol must be a Section Definition!");
+ Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
+ Aux.Aux.SectionDefinition.NumberOfRelocations =
+ Sec->Header.NumberOfRelocations;
+ Aux.Aux.SectionDefinition.NumberOfLinenumbers =
+ Sec->Header.NumberOfLineNumbers;
+ }
+
+ Header.PointerToSymbolTable = Offset;
+}
+
+void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ if (Sections.size() > INT32_MAX)
+ report_fatal_error(
+ "PE COFF object files can't have more than 2147483647 sections");
+
+ UseBigObj = Sections.size() > COFF::MaxNumberOfSections16;
+ Header.NumberOfSections = Sections.size();
+ Header.NumberOfSymbols = 0;
+
+ assignSectionNumbers();
+ createFileSymbols(Asm);
for (auto &Symbol : Symbols) {
// Update section number & offset for symbols that have them.
@@ -912,78 +1058,12 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
Section->Symbol->Aux[0].Aux.SectionDefinition.Number = Assoc->Number;
}
- // Assign file offsets to COFF object file structures.
-
- unsigned offset = getInitialOffset();
-
- if (UseBigObj)
- offset += COFF::Header32Size;
- else
- offset += COFF::Header16Size;
- offset += COFF::SectionSize * Header.NumberOfSections;
-
- for (const auto &Section : Asm) {
- COFFSection *Sec = SectionMap[&Section];
-
- if (Sec->Number == -1)
- continue;
-
- Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
-
- if (IsPhysicalSection(Sec)) {
- // Align the section data to a four byte boundary.
- offset = alignTo(offset, 4);
- Sec->Header.PointerToRawData = offset;
-
- offset += Sec->Header.SizeOfRawData;
- }
-
- if (Sec->Relocations.size() > 0) {
- bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
-
- if (RelocationsOverflow) {
- // Signal overflow by setting NumberOfRelocations to max value. Actual
- // size is found in reloc #0. Microsoft tools understand this.
- Sec->Header.NumberOfRelocations = 0xffff;
- } else {
- Sec->Header.NumberOfRelocations = Sec->Relocations.size();
- }
- Sec->Header.PointerToRelocations = offset;
-
- if (RelocationsOverflow) {
- // Reloc #0 will contain actual count, so make room for it.
- offset += COFF::RelocationSize;
- }
-
- offset += COFF::RelocationSize * Sec->Relocations.size();
-
- for (auto &Relocation : Sec->Relocations) {
- assert(Relocation.Symb->getIndex() != -1);
- Relocation.Data.SymbolTableIndex = Relocation.Symb->getIndex();
- }
- }
-
- assert(Sec->Symbol->Aux.size() == 1 &&
- "Section's symbol must have one aux!");
- AuxSymbol &Aux = Sec->Symbol->Aux[0];
- assert(Aux.AuxType == ATSectionDefinition &&
- "Section's symbol's aux symbol must be a Section Definition!");
- Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
- Aux.Aux.SectionDefinition.NumberOfRelocations =
- Sec->Header.NumberOfRelocations;
- Aux.Aux.SectionDefinition.NumberOfLinenumbers =
- Sec->Header.NumberOfLineNumbers;
- }
-
- Header.PointerToSymbolTable = offset;
+ assignFileOffsets(Asm, Layout);
// MS LINK expects to be able to use this timestamp to implement their
// /INCREMENTAL feature.
if (Asm.isIncrementalLinkerCompatible()) {
- std::time_t Now = time(nullptr);
- if (Now < 0 || !isUInt<32>(Now))
- Now = UINT32_MAX;
- Header.TimeDateStamp = Now;
+ Header.TimeDateStamp = getTime();
} else {
// Have deterministic output if /INCREMENTAL isn't needed. Also matches GNU.
Header.TimeDateStamp = 0;
@@ -991,96 +1071,25 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
// Write it all to disk...
WriteFileHeader(Header);
+ writeSectionHeaders();
- {
- sections::iterator i, ie;
- MCAssembler::iterator j, je;
-
- for (auto &Section : Sections) {
- if (Section->Number != -1) {
- if (Section->Relocations.size() >= 0xffff)
- Section->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
- writeSectionHeader(Section->Header);
- }
- }
-
- SmallVector<char, 128> SectionContents;
- for (i = Sections.begin(), ie = Sections.end(), j = Asm.begin(),
- je = Asm.end();
- (i != ie) && (j != je); ++i, ++j) {
-
- if ((*i)->Number == -1)
- continue;
-
- if ((*i)->Header.PointerToRawData != 0) {
- assert(getStream().tell() <= (*i)->Header.PointerToRawData &&
- "Section::PointerToRawData is insane!");
-
- unsigned SectionDataPadding =
- (*i)->Header.PointerToRawData - getStream().tell();
- assert(SectionDataPadding < 4 &&
- "Should only need at most three bytes of padding!");
-
- WriteZeros(SectionDataPadding);
-
- // Save the contents of the section to a temporary buffer, we need this
- // to CRC the data before we dump it into the object file.
- SectionContents.clear();
- raw_svector_ostream VecOS(SectionContents);
- raw_pwrite_stream &OldStream = getStream();
- // Redirect the output stream to our buffer.
- setStream(VecOS);
- // Fill our buffer with the section data.
- Asm.writeSectionData(&*j, Layout);
- // Reset the stream back to what it was before.
- setStream(OldStream);
-
- // Calculate our CRC with an initial value of '0', this is not how
- // JamCRC is specified but it aligns with the expected output.
- JamCRC JC(/*Init=*/0x00000000U);
- JC.update(SectionContents);
-
- // Write the section contents to the object file.
- getStream() << SectionContents;
-
- // Update the section definition auxiliary symbol to record the CRC.
- COFFSection *Sec = SectionMap[&*j];
- COFFSymbol::AuxiliarySymbols &AuxSyms = Sec->Symbol->Aux;
- assert(AuxSyms.size() == 1 &&
- AuxSyms[0].AuxType == ATSectionDefinition);
- AuxSymbol &SecDef = AuxSyms[0];
- SecDef.Aux.SectionDefinition.CheckSum = JC.getCRC();
- }
-
- if ((*i)->Relocations.size() > 0) {
- assert(getStream().tell() == (*i)->Header.PointerToRelocations &&
- "Section::PointerToRelocations is insane!");
-
- if ((*i)->Relocations.size() >= 0xffff) {
- // In case of overflow, write actual relocation count as first
- // relocation. Including the synthetic reloc itself (+ 1).
- COFF::relocation r;
- r.VirtualAddress = (*i)->Relocations.size() + 1;
- r.SymbolTableIndex = 0;
- r.Type = 0;
- WriteRelocation(r);
- }
-
- for (const auto &Relocation : (*i)->Relocations)
- WriteRelocation(Relocation.Data);
- } else
- assert((*i)->Header.PointerToRelocations == 0 &&
- "Section::PointerToRelocations is insane!");
- }
- }
+ // Write section contents.
+ sections::iterator I = Sections.begin();
+ sections::iterator IE = Sections.end();
+ MCAssembler::iterator J = Asm.begin();
+ MCAssembler::iterator JE = Asm.end();
+ for (; I != IE && J != JE; ++I, ++J)
+ writeSection(Asm, Layout, **I, *J);
assert(getStream().tell() == Header.PointerToSymbolTable &&
"Header::PointerToSymbolTable is insane!");
+ // Write a symbol table.
for (auto &Symbol : Symbols)
if (Symbol->getIndex() != -1)
WriteSymbol(*Symbol);
+ // Write a string table, which completes the entire COFF file.
Strings.write(getStream());
}
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 6383d8794030..c26d87f36f83 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===//
+//===- llvm/MC/WinCOFFStreamer.cpp ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,32 +11,36 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolCOFF.h"
-#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFStreamer.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/COFF.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "WinCOFFStreamer"
-namespace llvm {
MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
MCCodeEmitter &CE, raw_pwrite_stream &OS)
: MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {}
@@ -75,10 +79,9 @@ void MCWinCOFFStreamer::InitSections(bool NoExecStack) {
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
-void MCWinCOFFStreamer::EmitLabel(MCSymbol *S) {
+void MCWinCOFFStreamer::EmitLabel(MCSymbol *S, SMLoc Loc) {
auto *Symbol = cast<MCSymbolCOFF>(S);
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- MCObjectStreamer::EmitLabel(Symbol);
+ MCObjectStreamer::EmitLabel(Symbol, Loc);
}
void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
@@ -275,10 +278,6 @@ void MCWinCOFFStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
llvm_unreachable("not implemented");
}
-void MCWinCOFFStreamer::EmitFileDirective(StringRef Filename) {
- getAssembler().addFileName(Filename);
-}
-
// TODO: Implement this if you want to emit .comment section in COFF obj files.
void MCWinCOFFStreamer::EmitIdent(StringRef IdentString) {
llvm_unreachable("not implemented");
@@ -295,5 +294,3 @@ void MCWinCOFFStreamer::FinishImpl() {
void MCWinCOFFStreamer::Error(const Twine &Msg) const {
getContext().reportError(SMLoc(), Msg);
}
-}
-
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index f8e3c5a0a03f..5b233aab2018 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -122,12 +122,27 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
}
}
+static bool isBSDLike(object::Archive::Kind Kind) {
+ switch (Kind) {
+ case object::Archive::K_GNU:
+ return false;
+ case object::Archive::K_BSD:
+ case object::Archive::K_DARWIN:
+ return true;
+ case object::Archive::K_MIPS64:
+ case object::Archive::K_DARWIN64:
+ case object::Archive::K_COFF:
+ break;
+ }
+ llvm_unreachable("not supported for writting");
+}
+
static void print32(raw_ostream &Out, object::Archive::Kind Kind,
uint32_t Val) {
- if (Kind == object::Archive::K_GNU)
- support::endian::Writer<support::big>(Out).write(Val);
- else
+ if (isBSDLike(Kind))
support::endian::Writer<support::little>(Out).write(Val);
+ else
+ support::endian::Writer<support::big>(Out).write(Val);
}
static void printRestOfMemberHeader(
@@ -178,7 +193,7 @@ printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin,
std::vector<unsigned>::iterator &StringMapIndexIter,
const sys::TimePoint<std::chrono::seconds> &ModTime,
unsigned UID, unsigned GID, unsigned Perms, unsigned Size) {
- if (Kind == object::Archive::K_BSD)
+ if (isBSDLike(Kind))
return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
if (!useStringTable(Thin, Name))
return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
@@ -285,10 +300,10 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
if (!HeaderStartOffset) {
HeaderStartOffset = Out.tell();
- if (Kind == object::Archive::K_GNU)
- printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0);
- else
+ if (isBSDLike(Kind))
printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0);
+ else
+ printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0);
BodyStartOffset = Out.tell();
print32(Out, Kind, 0); // number of entries or bytes
}
@@ -307,7 +322,7 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
return EC;
NameOS << '\0';
MemberOffsetRefs.push_back(MemberNum);
- if (Kind == object::Archive::K_BSD)
+ if (isBSDLike(Kind))
print32(Out, Kind, NameOffset);
print32(Out, Kind, 0); // member offset
}
@@ -316,10 +331,21 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
if (HeaderStartOffset == 0)
return 0;
+ // ld64 prefers the cctools type archive which pads its string table to a
+ // boundary of sizeof(int32_t).
+ if (isBSDLike(Kind))
+ for (unsigned P = OffsetToAlignment(NameOS.tell(), sizeof(int32_t)); P--;)
+ NameOS << '\0';
+
StringRef StringTable = NameOS.str();
- if (Kind == object::Archive::K_BSD)
+ if (isBSDLike(Kind))
print32(Out, Kind, StringTable.size()); // byte count of the string table
Out << StringTable;
+ // If there are no symbols, emit an empty symbol table, to satisfy Solaris
+ // tools, older versions of which expect a symbol table in a non-empty
+ // archive, regardless of whether there are any symbols in it.
+ if (StringTable.size() == 0)
+ print32(Out, Kind, 0);
// ld64 requires the next member header to start at an offset that is
// 4 bytes aligned.
@@ -336,10 +362,10 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
// Patch up the number of symbols.
Out.seek(BodyStartOffset);
unsigned NumSyms = MemberOffsetRefs.size();
- if (Kind == object::Archive::K_GNU)
- print32(Out, Kind, NumSyms);
- else
+ if (isBSDLike(Kind))
print32(Out, Kind, NumSyms * 8);
+ else
+ print32(Out, Kind, NumSyms);
Out.seek(Pos);
return BodyStartOffset + 4;
@@ -351,8 +377,7 @@ llvm::writeArchive(StringRef ArcName,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
- assert((!Thin || Kind == object::Archive::K_GNU) &&
- "Only the gnu format has a thin mode");
+ assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
SmallString<128> TmpArchive;
int TmpArchiveFD;
if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a",
@@ -368,10 +393,6 @@ llvm::writeArchive(StringRef ArcName,
std::vector<unsigned> MemberOffsetRefs;
- std::vector<std::unique_ptr<MemoryBuffer>> Buffers;
- std::vector<MemoryBufferRef> Members;
- std::vector<sys::fs::file_status> NewMemberStatus;
-
unsigned MemberReferenceOffset = 0;
if (WriteSymtab) {
ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable(
@@ -382,25 +403,35 @@ llvm::writeArchive(StringRef ArcName,
}
std::vector<unsigned> StringMapIndexes;
- if (Kind != object::Archive::K_BSD)
+ if (!isBSDLike(Kind))
writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin);
std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin();
std::vector<unsigned> MemberOffset;
for (const NewArchiveMember &M : NewMembers) {
MemoryBufferRef File = M.Buf->getMemBufferRef();
+ unsigned Padding = 0;
unsigned Pos = Out.tell();
MemberOffset.push_back(Pos);
+ // ld64 expects the members to be 8-byte aligned for 64-bit content and at
+ // least 4-byte aligned for 32-bit content. Opt for the larger encoding
+ // uniformly. This matches the behaviour with cctools and ensures that ld64
+ // is happy with archives that we generate.
+ if (Kind == object::Archive::K_DARWIN)
+ Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8);
+
printMemberHeader(Out, Kind, Thin,
sys::path::filename(M.Buf->getBufferIdentifier()),
StringMapIndexIter, M.ModTime, M.UID, M.GID, M.Perms,
- M.Buf->getBufferSize());
+ M.Buf->getBufferSize() + Padding);
if (!Thin)
Out << File.getBuffer();
+ while (Padding--)
+ Out << '\n';
if (Out.tell() % 2)
Out << '\n';
}
@@ -408,7 +439,7 @@ llvm::writeArchive(StringRef ArcName,
if (MemberReferenceOffset) {
Out.seek(MemberReferenceOffset);
for (unsigned MemberNum : MemberOffsetRefs) {
- if (Kind == object::Archive::K_BSD)
+ if (isBSDLike(Kind))
Out.seek(Out.tell() + 4); // skip over the string offset
print32(Out, Kind, MemberOffset[MemberNum]);
}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index b895c3fcc050..2007f560c166 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMObject
ELFObjectFile.cpp
Error.cpp
IRObjectFile.cpp
+ IRSymtab.cpp
MachOObjectFile.cpp
MachOUniversal.cpp
ModuleSummaryIndexObjectFile.cpp
diff --git a/lib/Object/Decompressor.cpp b/lib/Object/Decompressor.cpp
index bca41fd9f487..0be602b1fc1a 100644
--- a/lib/Object/Decompressor.cpp
+++ b/lib/Object/Decompressor.cpp
@@ -95,8 +95,5 @@ Error Decompressor::decompress(SmallString<32> &Out) {
Error Decompressor::decompress(MutableArrayRef<char> Buffer) {
size_t Size = Buffer.size();
- zlib::Status Status = zlib::uncompress(SectionData, Buffer.data(), Size);
- if (Status != zlib::StatusOK)
- return createError("decompression failed");
- return Error::success();
+ return zlib::uncompress(SectionData, Buffer.data(), Size);
}
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 4bd69e34e3c3..3f8c81c8e911 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/ARMAttributeParser.h"
#include "llvm/Support/MathExtras.h"
namespace llvm {
@@ -55,71 +57,247 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj) {
return std::move(R);
}
-SubtargetFeatures ELFObjectFileBase::getFeatures() const {
- switch (getEMachine()) {
- case ELF::EM_MIPS: {
- SubtargetFeatures Features;
- unsigned PlatformFlags;
- getPlatformFlags(PlatformFlags);
+SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const {
+ SubtargetFeatures Features;
+ unsigned PlatformFlags;
+ getPlatformFlags(PlatformFlags);
+
+ switch (PlatformFlags & ELF::EF_MIPS_ARCH) {
+ case ELF::EF_MIPS_ARCH_1:
+ break;
+ case ELF::EF_MIPS_ARCH_2:
+ Features.AddFeature("mips2");
+ break;
+ case ELF::EF_MIPS_ARCH_3:
+ Features.AddFeature("mips3");
+ break;
+ case ELF::EF_MIPS_ARCH_4:
+ Features.AddFeature("mips4");
+ break;
+ case ELF::EF_MIPS_ARCH_5:
+ Features.AddFeature("mips5");
+ break;
+ case ELF::EF_MIPS_ARCH_32:
+ Features.AddFeature("mips32");
+ break;
+ case ELF::EF_MIPS_ARCH_64:
+ Features.AddFeature("mips64");
+ break;
+ case ELF::EF_MIPS_ARCH_32R2:
+ Features.AddFeature("mips32r2");
+ break;
+ case ELF::EF_MIPS_ARCH_64R2:
+ Features.AddFeature("mips64r2");
+ break;
+ case ELF::EF_MIPS_ARCH_32R6:
+ Features.AddFeature("mips32r6");
+ break;
+ case ELF::EF_MIPS_ARCH_64R6:
+ Features.AddFeature("mips64r6");
+ break;
+ default:
+ llvm_unreachable("Unknown EF_MIPS_ARCH value");
+ }
+
+ switch (PlatformFlags & ELF::EF_MIPS_MACH) {
+ case ELF::EF_MIPS_MACH_NONE:
+ // No feature associated with this value.
+ break;
+ case ELF::EF_MIPS_MACH_OCTEON:
+ Features.AddFeature("cnmips");
+ break;
+ default:
+ llvm_unreachable("Unknown EF_MIPS_ARCH value");
+ }
- switch (PlatformFlags & ELF::EF_MIPS_ARCH) {
- case ELF::EF_MIPS_ARCH_1:
+ if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16)
+ Features.AddFeature("mips16");
+ if (PlatformFlags & ELF::EF_MIPS_MICROMIPS)
+ Features.AddFeature("micromips");
+
+ return Features;
+}
+
+SubtargetFeatures ELFObjectFileBase::getARMFeatures() const {
+ SubtargetFeatures Features;
+ ARMAttributeParser Attributes;
+ std::error_code EC = getBuildAttributes(Attributes);
+ if (EC)
+ return SubtargetFeatures();
+
+ // both ARMv7-M and R have to support thumb hardware div
+ bool isV7 = false;
+ if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch))
+ isV7 = Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch)
+ == ARMBuildAttrs::v7;
+
+ if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch_profile)) {
+ switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile)) {
+ case ARMBuildAttrs::ApplicationProfile:
+ Features.AddFeature("aclass");
break;
- case ELF::EF_MIPS_ARCH_2:
- Features.AddFeature("mips2");
+ case ARMBuildAttrs::RealTimeProfile:
+ Features.AddFeature("rclass");
+ if (isV7)
+ Features.AddFeature("hwdiv");
break;
- case ELF::EF_MIPS_ARCH_3:
- Features.AddFeature("mips3");
+ case ARMBuildAttrs::MicroControllerProfile:
+ Features.AddFeature("mclass");
+ if (isV7)
+ Features.AddFeature("hwdiv");
break;
- case ELF::EF_MIPS_ARCH_4:
- Features.AddFeature("mips4");
+ }
+ }
+
+ if (Attributes.hasAttribute(ARMBuildAttrs::THUMB_ISA_use)) {
+ switch(Attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use)) {
+ default:
break;
- case ELF::EF_MIPS_ARCH_5:
- Features.AddFeature("mips5");
+ case ARMBuildAttrs::Not_Allowed:
+ Features.AddFeature("thumb", false);
+ Features.AddFeature("thumb2", false);
break;
- case ELF::EF_MIPS_ARCH_32:
- Features.AddFeature("mips32");
+ case ARMBuildAttrs::AllowThumb32:
+ Features.AddFeature("thumb2");
break;
- case ELF::EF_MIPS_ARCH_64:
- Features.AddFeature("mips64");
+ }
+ }
+
+ if (Attributes.hasAttribute(ARMBuildAttrs::FP_arch)) {
+ switch(Attributes.getAttributeValue(ARMBuildAttrs::FP_arch)) {
+ default:
break;
- case ELF::EF_MIPS_ARCH_32R2:
- Features.AddFeature("mips32r2");
+ case ARMBuildAttrs::Not_Allowed:
+ Features.AddFeature("vfp2", false);
+ Features.AddFeature("vfp3", false);
+ Features.AddFeature("vfp4", false);
break;
- case ELF::EF_MIPS_ARCH_64R2:
- Features.AddFeature("mips64r2");
+ case ARMBuildAttrs::AllowFPv2:
+ Features.AddFeature("vfp2");
break;
- case ELF::EF_MIPS_ARCH_32R6:
- Features.AddFeature("mips32r6");
+ case ARMBuildAttrs::AllowFPv3A:
+ case ARMBuildAttrs::AllowFPv3B:
+ Features.AddFeature("vfp3");
break;
- case ELF::EF_MIPS_ARCH_64R6:
- Features.AddFeature("mips64r6");
+ case ARMBuildAttrs::AllowFPv4A:
+ case ARMBuildAttrs::AllowFPv4B:
+ Features.AddFeature("vfp4");
break;
- default:
- llvm_unreachable("Unknown EF_MIPS_ARCH value");
}
+ }
- switch (PlatformFlags & ELF::EF_MIPS_MACH) {
- case ELF::EF_MIPS_MACH_NONE:
- // No feature associated with this value.
+ if (Attributes.hasAttribute(ARMBuildAttrs::Advanced_SIMD_arch)) {
+ switch(Attributes.getAttributeValue(ARMBuildAttrs::Advanced_SIMD_arch)) {
+ default:
+ break;
+ case ARMBuildAttrs::Not_Allowed:
+ Features.AddFeature("neon", false);
+ Features.AddFeature("fp16", false);
+ break;
+ case ARMBuildAttrs::AllowNeon:
+ Features.AddFeature("neon");
break;
- case ELF::EF_MIPS_MACH_OCTEON:
- Features.AddFeature("cnmips");
+ case ARMBuildAttrs::AllowNeon2:
+ Features.AddFeature("neon");
+ Features.AddFeature("fp16");
break;
+ }
+ }
+
+ if (Attributes.hasAttribute(ARMBuildAttrs::DIV_use)) {
+ switch(Attributes.getAttributeValue(ARMBuildAttrs::DIV_use)) {
default:
- llvm_unreachable("Unknown EF_MIPS_ARCH value");
+ break;
+ case ARMBuildAttrs::DisallowDIV:
+ Features.AddFeature("hwdiv", false);
+ Features.AddFeature("hwdiv-arm", false);
+ break;
+ case ARMBuildAttrs::AllowDIVExt:
+ Features.AddFeature("hwdiv");
+ Features.AddFeature("hwdiv-arm");
+ break;
}
+ }
- if (PlatformFlags & ELF::EF_MIPS_ARCH_ASE_M16)
- Features.AddFeature("mips16");
- if (PlatformFlags & ELF::EF_MIPS_MICROMIPS)
- Features.AddFeature("micromips");
+ return Features;
+}
- return Features;
- }
+SubtargetFeatures ELFObjectFileBase::getFeatures() const {
+ switch (getEMachine()) {
+ case ELF::EM_MIPS:
+ return getMIPSFeatures();
+ case ELF::EM_ARM:
+ return getARMFeatures();
default:
return SubtargetFeatures();
}
}
+// FIXME Encode from a tablegen description or target parser.
+void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
+ if (TheTriple.getSubArch() != Triple::NoSubArch)
+ return;
+
+ ARMAttributeParser Attributes;
+ std::error_code EC = getBuildAttributes(Attributes);
+ if (EC)
+ return;
+
+ std::string Triple;
+ // Default to ARM, but use the triple if it's been set.
+ if (TheTriple.getArch() == Triple::thumb ||
+ TheTriple.getArch() == Triple::thumbeb)
+ Triple = "thumb";
+ else
+ Triple = "arm";
+
+ if (Attributes.hasAttribute(ARMBuildAttrs::CPU_arch)) {
+ switch(Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch)) {
+ case ARMBuildAttrs::v4:
+ Triple += "v4";
+ break;
+ case ARMBuildAttrs::v4T:
+ Triple += "v4t";
+ break;
+ case ARMBuildAttrs::v5T:
+ Triple += "v5t";
+ break;
+ case ARMBuildAttrs::v5TE:
+ Triple += "v5te";
+ break;
+ case ARMBuildAttrs::v5TEJ:
+ Triple += "v5tej";
+ break;
+ case ARMBuildAttrs::v6:
+ Triple += "v6";
+ break;
+ case ARMBuildAttrs::v6KZ:
+ Triple += "v6kz";
+ break;
+ case ARMBuildAttrs::v6T2:
+ Triple += "v6t2";
+ break;
+ case ARMBuildAttrs::v6K:
+ Triple += "v6k";
+ break;
+ case ARMBuildAttrs::v7:
+ Triple += "v7";
+ break;
+ case ARMBuildAttrs::v6_M:
+ Triple += "v6m";
+ break;
+ case ARMBuildAttrs::v6S_M:
+ Triple += "v6sm";
+ break;
+ case ARMBuildAttrs::v7E_M:
+ Triple += "v7em";
+ break;
+ }
+ }
+ if (!isLittleEndian())
+ Triple += "eb";
+
+ TheTriple.setArchName(Triple);
+}
+
} // end namespace llvm
diff --git a/lib/Object/IRSymtab.cpp b/lib/Object/IRSymtab.cpp
new file mode 100644
index 000000000000..da1ef9946b50
--- /dev/null
+++ b/lib/Object/IRSymtab.cpp
@@ -0,0 +1,231 @@
+//===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/IRSymtab.h"
+#include "llvm/Analysis/ObjectUtils.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Object/ModuleSymbolTable.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/StringSaver.h"
+
+using namespace llvm;
+using namespace irsymtab;
+
+namespace {
+
+/// Stores the temporary state that is required to build an IR symbol table.
+struct Builder {
+ SmallVector<char, 0> &Symtab;
+ SmallVector<char, 0> &Strtab;
+ Builder(SmallVector<char, 0> &Symtab, SmallVector<char, 0> &Strtab)
+ : Symtab(Symtab), Strtab(Strtab) {}
+
+ StringTableBuilder StrtabBuilder{StringTableBuilder::ELF};
+
+ BumpPtrAllocator Alloc;
+ StringSaver Saver{Alloc};
+
+ DenseMap<const Comdat *, unsigned> ComdatMap;
+ ModuleSymbolTable Msymtab;
+ SmallPtrSet<GlobalValue *, 8> Used;
+ Mangler Mang;
+ Triple TT;
+
+ std::vector<storage::Comdat> Comdats;
+ std::vector<storage::Module> Mods;
+ std::vector<storage::Symbol> Syms;
+ std::vector<storage::Uncommon> Uncommons;
+
+ std::string COFFLinkerOpts;
+ raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
+
+ void setStr(storage::Str &S, StringRef Value) {
+ S.Offset = StrtabBuilder.add(Value);
+ }
+ template <typename T>
+ void writeRange(storage::Range<T> &R, const std::vector<T> &Objs) {
+ R.Offset = Symtab.size();
+ R.Size = Objs.size();
+ Symtab.insert(Symtab.end(), reinterpret_cast<const char *>(Objs.data()),
+ reinterpret_cast<const char *>(Objs.data() + Objs.size()));
+ }
+
+ Error addModule(Module *M);
+ Error addSymbol(ModuleSymbolTable::Symbol Sym);
+
+ Error build(ArrayRef<Module *> Mods);
+};
+
+Error Builder::addModule(Module *M) {
+ collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false);
+
+ storage::Module Mod;
+ Mod.Begin = Msymtab.symbols().size();
+ Msymtab.addModule(M);
+ Mod.End = Msymtab.symbols().size();
+ Mods.push_back(Mod);
+
+ if (TT.isOSBinFormatCOFF()) {
+ if (auto E = M->materializeMetadata())
+ return E;
+ if (Metadata *Val = M->getModuleFlag("Linker Options")) {
+ MDNode *LinkerOptions = cast<MDNode>(Val);
+ for (const MDOperand &MDOptions : LinkerOptions->operands())
+ for (const MDOperand &MDOption : cast<MDNode>(MDOptions)->operands())
+ COFFLinkerOptsOS << " " << cast<MDString>(MDOption)->getString();
+ }
+ }
+
+ return Error::success();
+}
+
+Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) {
+ Syms.emplace_back();
+ storage::Symbol &Sym = Syms.back();
+ Sym = {};
+
+ Sym.UncommonIndex = -1;
+ storage::Uncommon *Unc = nullptr;
+ auto Uncommon = [&]() -> storage::Uncommon & {
+ if (Unc)
+ return *Unc;
+ Sym.UncommonIndex = Uncommons.size();
+ Uncommons.emplace_back();
+ Unc = &Uncommons.back();
+ *Unc = {};
+ setStr(Unc->COFFWeakExternFallbackName, "");
+ return *Unc;
+ };
+
+ SmallString<64> Name;
+ {
+ raw_svector_ostream OS(Name);
+ Msymtab.printSymbolName(OS, Msym);
+ }
+ setStr(Sym.Name, Saver.save(StringRef(Name)));
+
+ auto Flags = Msymtab.getSymbolFlags(Msym);
+ if (Flags & object::BasicSymbolRef::SF_Undefined)
+ Sym.Flags |= 1 << storage::Symbol::FB_undefined;
+ if (Flags & object::BasicSymbolRef::SF_Weak)
+ Sym.Flags |= 1 << storage::Symbol::FB_weak;
+ if (Flags & object::BasicSymbolRef::SF_Common)
+ Sym.Flags |= 1 << storage::Symbol::FB_common;
+ if (Flags & object::BasicSymbolRef::SF_Indirect)
+ Sym.Flags |= 1 << storage::Symbol::FB_indirect;
+ if (Flags & object::BasicSymbolRef::SF_Global)
+ Sym.Flags |= 1 << storage::Symbol::FB_global;
+ if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
+ Sym.Flags |= 1 << storage::Symbol::FB_format_specific;
+ if (Flags & object::BasicSymbolRef::SF_Executable)
+ Sym.Flags |= 1 << storage::Symbol::FB_executable;
+
+ Sym.ComdatIndex = -1;
+ auto *GV = Msym.dyn_cast<GlobalValue *>();
+ if (!GV) {
+ setStr(Sym.IRName, "");
+ return Error::success();
+ }
+
+ setStr(Sym.IRName, GV->getName());
+
+ if (Used.count(GV))
+ Sym.Flags |= 1 << storage::Symbol::FB_used;
+ if (GV->isThreadLocal())
+ Sym.Flags |= 1 << storage::Symbol::FB_tls;
+ if (GV->hasGlobalUnnamedAddr())
+ Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr;
+ if (canBeOmittedFromSymbolTable(GV))
+ Sym.Flags |= 1 << storage::Symbol::FB_may_omit;
+ Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility;
+
+ if (Flags & object::BasicSymbolRef::SF_Common) {
+ Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize(
+ GV->getType()->getElementType());
+ Uncommon().CommonAlign = GV->getAlignment();
+ }
+
+ const GlobalObject *Base = GV->getBaseObject();
+ if (!Base)
+ return make_error<StringError>("Unable to determine comdat of alias!",
+ inconvertibleErrorCode());
+ if (const Comdat *C = Base->getComdat()) {
+ auto P = ComdatMap.insert(std::make_pair(C, Comdats.size()));
+ Sym.ComdatIndex = P.first->second;
+
+ if (P.second) {
+ storage::Comdat Comdat;
+ setStr(Comdat.Name, C->getName());
+ Comdats.push_back(Comdat);
+ }
+ }
+
+ if (TT.isOSBinFormatCOFF()) {
+ emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang);
+
+ if ((Flags & object::BasicSymbolRef::SF_Weak) &&
+ (Flags & object::BasicSymbolRef::SF_Indirect)) {
+ std::string FallbackName;
+ raw_string_ostream OS(FallbackName);
+ Msymtab.printSymbolName(
+ OS, cast<GlobalValue>(
+ cast<GlobalAlias>(GV)->getAliasee()->stripPointerCasts()));
+ OS.flush();
+ setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName));
+ }
+ }
+
+ return Error::success();
+}
+
+Error Builder::build(ArrayRef<Module *> IRMods) {
+ storage::Header Hdr;
+
+ assert(!IRMods.empty());
+ setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple());
+ setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
+ TT = Triple(IRMods[0]->getTargetTriple());
+
+ // This adds the symbols for each module to Msymtab.
+ for (auto *M : IRMods)
+ if (Error Err = addModule(M))
+ return Err;
+
+ for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
+ if (Error Err = addSymbol(Msym))
+ return Err;
+
+ COFFLinkerOptsOS.flush();
+ setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts);
+
+ // We are about to fill in the header's range fields, so reserve space for it
+ // and copy it in afterwards.
+ Symtab.resize(sizeof(storage::Header));
+ writeRange(Hdr.Modules, Mods);
+ writeRange(Hdr.Comdats, Comdats);
+ writeRange(Hdr.Symbols, Syms);
+ writeRange(Hdr.Uncommons, Uncommons);
+
+ *reinterpret_cast<storage::Header *>(Symtab.data()) = Hdr;
+
+ raw_svector_ostream OS(Strtab);
+ StrtabBuilder.finalizeInOrder();
+ StrtabBuilder.write(OS);
+
+ return Error::success();
+}
+
+} // anonymous namespace
+
+Error irsymtab::build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
+ SmallVector<char, 0> &Strtab) {
+ return Builder(Symtab, Strtab).build(Mods);
+}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 5b018676eba3..1753d2baaedd 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -106,13 +106,6 @@ static StringRef parseSegmentOrSectionName(const char *P) {
return StringRef(P, 16);
}
-// Helper to advance a section or symbol iterator multiple increments at a time.
-template<class T>
-static void advance(T &it, size_t Val) {
- while (Val--)
- ++it;
-}
-
static unsigned getCPUType(const MachOObjectFile &O) {
return O.getHeader().cputype;
}
@@ -368,7 +361,7 @@ static Error parseSegmentLoadCommand(
CmdName + " extends past the end of the file");
if (S.vmsize != 0 && S.filesize > S.vmsize)
return malformedError("load command " + Twine(LoadCommandIndex) +
- " fileoff field in " + CmdName +
+ " filesize field in " + CmdName +
" greater than vmsize field");
IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname);
} else
@@ -784,6 +777,52 @@ static Error checkVersCommand(const MachOObjectFile &Obj,
return Error::success();
}
+static Error checkNoteCommand(const MachOObjectFile &Obj,
+ const MachOObjectFile::LoadCommandInfo &Load,
+ uint32_t LoadCommandIndex,
+ std::list<MachOElement> &Elements) {
+ if (Load.C.cmdsize != sizeof(MachO::note_command))
+ return malformedError("load command " + Twine(LoadCommandIndex) +
+ " LC_NOTE has incorrect cmdsize");
+ MachO::note_command Nt = getStruct<MachO::note_command>(Obj, Load.Ptr);
+ uint64_t FileSize = Obj.getData().size();
+ if (Nt.offset > FileSize)
+ return malformedError("offset field of LC_NOTE command " +
+ Twine(LoadCommandIndex) + " extends "
+ "past the end of the file");
+ uint64_t BigSize = Nt.offset;
+ BigSize += Nt.size;
+ if (BigSize > FileSize)
+ return malformedError("size field plus offset field of LC_NOTE command " +
+ Twine(LoadCommandIndex) + " extends past the end of "
+ "the file");
+ if (Error Err = checkOverlappingElement(Elements, Nt.offset, Nt.size,
+ "LC_NOTE data"))
+ return Err;
+ return Error::success();
+}
+
+static Error
+parseBuildVersionCommand(const MachOObjectFile &Obj,
+ const MachOObjectFile::LoadCommandInfo &Load,
+ SmallVectorImpl<const char*> &BuildTools,
+ uint32_t LoadCommandIndex) {
+ MachO::build_version_command BVC =
+ getStruct<MachO::build_version_command>(Obj, Load.Ptr);
+ if (Load.C.cmdsize !=
+ sizeof(MachO::build_version_command) +
+ BVC.ntools * sizeof(MachO::build_tool_version))
+ return malformedError("load command " + Twine(LoadCommandIndex) +
+ " LC_BUILD_VERSION_COMMAND has incorrect cmdsize");
+
+ auto Start = Load.Ptr + sizeof(MachO::build_version_command);
+ BuildTools.resize(BVC.ntools);
+ for (unsigned i = 0; i < BVC.ntools; ++i)
+ BuildTools[i] = Start + i * sizeof(MachO::build_tool_version);
+
+ return Error::success();
+}
+
static Error checkRpathCommand(const MachOObjectFile &Obj,
const MachOObjectFile::LoadCommandInfo &Load,
uint32_t LoadCommandIndex) {
@@ -931,7 +970,26 @@ static Error checkThreadCommand(const MachOObjectFile &Obj,
sys::swapByteOrder(count);
state += sizeof(uint32_t);
- if (cputype == MachO::CPU_TYPE_X86_64) {
+ if (cputype == MachO::CPU_TYPE_I386) {
+ if (flavor == MachO::x86_THREAD_STATE32) {
+ if (count != MachO::x86_THREAD_STATE32_COUNT)
+ return malformedError("load command " + Twine(LoadCommandIndex) +
+ " count not x86_THREAD_STATE32_COUNT for "
+ "flavor number " + Twine(nflavor) + " which is "
+ "a x86_THREAD_STATE32 flavor in " + CmdName +
+ " command");
+ if (state + sizeof(MachO::x86_thread_state32_t) > end)
+ return malformedError("load command " + Twine(LoadCommandIndex) +
+ " x86_THREAD_STATE32 extends past end of "
+ "command in " + CmdName + " command");
+ state += sizeof(MachO::x86_thread_state32_t);
+ } else {
+ return malformedError("load command " + Twine(LoadCommandIndex) +
+ " unknown flavor (" + Twine(flavor) + ") for "
+ "flavor number " + Twine(nflavor) + " in " +
+ CmdName + " command");
+ }
+ } else if (cputype == MachO::CPU_TYPE_X86_64) {
if (flavor == MachO::x86_THREAD_STATE64) {
if (count != MachO::x86_THREAD_STATE64_COUNT)
return malformedError("load command " + Twine(LoadCommandIndex) +
@@ -1280,6 +1338,12 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
"LC_VERSION_MIN_WATCHOS")))
return;
+ } else if (Load.C.cmd == MachO::LC_NOTE) {
+ if ((Err = checkNoteCommand(*this, Load, I, Elements)))
+ return;
+ } else if (Load.C.cmd == MachO::LC_BUILD_VERSION) {
+ if ((Err = parseBuildVersionCommand(*this, Load, BuildTools, I)))
+ return;
} else if (Load.C.cmd == MachO::LC_RPATH) {
if ((Err = checkRpathCommand(*this, Load, I)))
return;
@@ -2201,6 +2265,10 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
return std::error_code();
}
+uint32_t MachOObjectFile::getLibraryCount() const {
+ return Libraries.size();
+}
+
section_iterator
MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const {
DataRefImpl Sec;
@@ -2383,6 +2451,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
*ArchFlag = "armv7em";
return Triple("thumbv7em-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7K:
+ if (McpuDefault)
+ *McpuDefault = "cortex-a7";
if (ArchFlag)
*ArchFlag = "armv7k";
return Triple("armv7k-apple-darwin");
@@ -2393,6 +2463,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
*ArchFlag = "armv7m";
return Triple("thumbv7m-apple-darwin");
case MachO::CPU_SUBTYPE_ARM_V7S:
+ if (McpuDefault)
+ *McpuDefault = "cortex-a7";
if (ArchFlag)
*ArchFlag = "armv7s";
return Triple("armv7s-apple-darwin");
@@ -2402,6 +2474,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
case MachO::CPU_TYPE_ARM64:
switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
case MachO::CPU_SUBTYPE_ARM64_ALL:
+ if (McpuDefault)
+ *McpuDefault = "cyclone";
if (ArchFlag)
*ArchFlag = "arm64";
return Triple("arm64-apple-darwin");
@@ -2674,10 +2748,11 @@ iterator_range<export_iterator> MachOObjectFile::exports() const {
return exports(getDyldInfoExportsTrie());
}
-MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit)
- : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
- RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0),
- PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {}
+MachORebaseEntry::MachORebaseEntry(Error *E, const MachOObjectFile *O,
+ ArrayRef<uint8_t> Bytes, bool is64Bit)
+ : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0),
+ SegmentIndex(-1), RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0),
+ PointerSize(is64Bit ? 8 : 4), Done(false) {}
void MachORebaseEntry::moveToFirst() {
Ptr = Opcodes.begin();
@@ -2691,22 +2766,29 @@ void MachORebaseEntry::moveToEnd() {
}
void MachORebaseEntry::moveNext() {
+ ErrorAsOutParameter ErrAsOutParam(E);
// If in the middle of some loop, move to next rebasing in loop.
SegmentOffset += AdvanceAmount;
if (RemainingLoopCount) {
--RemainingLoopCount;
return;
}
+ // REBASE_OPCODE_DONE is only used for padding if we are not aligned to
+ // pointer size. Therefore it is possible to reach the end without ever having
+ // seen REBASE_OPCODE_DONE.
if (Ptr == Opcodes.end()) {
Done = true;
return;
}
bool More = true;
- while (More && !Malformed) {
+ while (More) {
// Parse next opcode and set up next loop.
+ const uint8_t *OpcodeStart = Ptr;
uint8_t Byte = *Ptr++;
uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK;
uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK;
+ uint32_t Count, Skip;
+ const char *error = nullptr;
switch (Opcode) {
case MachO::REBASE_OPCODE_DONE:
More = false;
@@ -2716,6 +2798,13 @@ void MachORebaseEntry::moveNext() {
break;
case MachO::REBASE_OPCODE_SET_TYPE_IMM:
RebaseType = ImmValue;
+ if (RebaseType > MachO::REBASE_TYPE_TEXT_PCREL32) {
+ *E = malformedError("for REBASE_OPCODE_SET_TYPE_IMM bad bind type: " +
+ Twine((int)RebaseType) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: "
@@ -2723,7 +2812,23 @@ void MachORebaseEntry::moveNext() {
break;
case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
SegmentIndex = ImmValue;
- SegmentOffset = readULEB128();
+ SegmentOffset = readULEB128(&error);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ true);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
@@ -2732,22 +2837,80 @@ void MachORebaseEntry::moveNext() {
<< "\n");
break;
case MachO::REBASE_OPCODE_ADD_ADDR_ULEB:
- SegmentOffset += readULEB128();
+ SegmentOffset += readULEB128(&error);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ true);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE("mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: "
<< format("SegmentOffset=0x%06X",
SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED:
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ true);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
SegmentOffset += ImmValue * PointerSize;
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ false);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_ADD_ADDR_IMM_SCALED "
+ " (after adding immediate times the pointer size) " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE("mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: "
<< format("SegmentOffset=0x%06X",
SegmentOffset) << "\n");
break;
case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES:
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ true);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
AdvanceAmount = PointerSize;
- RemainingLoopCount = ImmValue - 1;
+ Skip = 0;
+ Count = ImmValue;
+ if (ImmValue != 0)
+ RemainingLoopCount = ImmValue - 1;
+ else
+ RemainingLoopCount = 0;
+ error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
+ SegmentIndex, SegmentOffset);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_IMM_TIMES "
+ + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: "
@@ -2757,8 +2920,38 @@ void MachORebaseEntry::moveNext() {
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES:
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ true);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
AdvanceAmount = PointerSize;
- RemainingLoopCount = readULEB128() - 1;
+ Skip = 0;
+ Count = readULEB128(&error);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (Count != 0)
+ RemainingLoopCount = Count - 1;
+ else
+ RemainingLoopCount = 0;
+ error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
+ SegmentIndex, SegmentOffset);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES "
+ + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: "
@@ -2768,8 +2961,35 @@ void MachORebaseEntry::moveNext() {
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB:
- AdvanceAmount = readULEB128() + PointerSize;
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ true);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ Skip = readULEB128(&error);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ AdvanceAmount = Skip + PointerSize;
+ Count = 1;
RemainingLoopCount = 0;
+ error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
+ SegmentIndex, SegmentOffset);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB "
+ + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: "
@@ -2779,8 +2999,46 @@ void MachORebaseEntry::moveNext() {
<< "\n");
return;
case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
- RemainingLoopCount = readULEB128() - 1;
- AdvanceAmount = readULEB128() + PointerSize;
+ error = O->RebaseEntryCheckSegAndOffset(SegmentIndex, SegmentOffset,
+ true);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
+ "ULEB " + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ Count = readULEB128(&error);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
+ "ULEB " + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (Count != 0)
+ RemainingLoopCount = Count - 1;
+ else
+ RemainingLoopCount = 0;
+ Skip = readULEB128(&error);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
+ "ULEB " + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ AdvanceAmount = Skip + PointerSize;
+
+ error = O->RebaseEntryCheckCountAndSkip(Count, Skip, PointerSize,
+ SegmentIndex, SegmentOffset);
+ if (error) {
+ *E = malformedError("for REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_"
+ "ULEB " + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-rebase",
llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: "
@@ -2790,23 +3048,25 @@ void MachORebaseEntry::moveNext() {
<< "\n");
return;
default:
- Malformed = true;
+ *E = malformedError("bad rebase info (bad opcode value 0x" +
+ utohexstr(Opcode) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
}
}
}
-uint64_t MachORebaseEntry::readULEB128() {
+uint64_t MachORebaseEntry::readULEB128(const char **error) {
unsigned Count;
- uint64_t Result = decodeULEB128(Ptr, &Count);
+ uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error);
Ptr += Count;
- if (Ptr > Opcodes.end()) {
+ if (Ptr > Opcodes.end())
Ptr = Opcodes.end();
- Malformed = true;
- }
return Result;
}
-uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; }
+int32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; }
uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; }
@@ -2822,6 +3082,24 @@ StringRef MachORebaseEntry::typeName() const {
return "unknown";
}
+// For use with the SegIndex of a checked Mach-O Rebase entry
+// to get the segment name.
+StringRef MachORebaseEntry::segmentName() const {
+ return O->BindRebaseSegmentName(SegmentIndex);
+}
+
+// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry
+// to get the section name.
+StringRef MachORebaseEntry::sectionName() const {
+ return O->BindRebaseSectionName(SegmentIndex, SegmentOffset);
+}
+
+// For use with a SegIndex,SegOffset pair from a checked Mach-O Rebase entry
+// to get the address.
+uint64_t MachORebaseEntry::address() const {
+ return O->BindRebaseAddress(SegmentIndex, SegmentOffset);
+}
+
bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
#ifdef EXPENSIVE_CHECKS
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
@@ -2834,25 +3112,29 @@ bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const {
}
iterator_range<rebase_iterator>
-MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) {
- MachORebaseEntry Start(Opcodes, is64);
+MachOObjectFile::rebaseTable(Error &Err, MachOObjectFile *O,
+ ArrayRef<uint8_t> Opcodes, bool is64) {
+ if (O->BindRebaseSectionTable == nullptr)
+ O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O);
+ MachORebaseEntry Start(&Err, O, Opcodes, is64);
Start.moveToFirst();
- MachORebaseEntry Finish(Opcodes, is64);
+ MachORebaseEntry Finish(&Err, O, Opcodes, is64);
Finish.moveToEnd();
return make_range(rebase_iterator(Start), rebase_iterator(Finish));
}
-iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const {
- return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit());
+iterator_range<rebase_iterator> MachOObjectFile::rebaseTable(Error &Err) {
+ return rebaseTable(Err, this, getDyldInfoRebaseOpcodes(), is64Bit());
}
-MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK)
- : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
- Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0),
- BindType(0), PointerSize(is64Bit ? 8 : 4),
- TableKind(BK), Malformed(false), Done(false) {}
+MachOBindEntry::MachOBindEntry(Error *E, const MachOObjectFile *O,
+ ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK)
+ : E(E), O(O), Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0),
+ SegmentIndex(-1), LibraryOrdinalSet(false), Ordinal(0), Flags(0),
+ Addend(0), RemainingLoopCount(0), AdvanceAmount(0), BindType(0),
+ PointerSize(is64Bit ? 8 : 4), TableKind(BK), Done(false) {}
void MachOBindEntry::moveToFirst() {
Ptr = Opcodes.begin();
@@ -2866,24 +3148,31 @@ void MachOBindEntry::moveToEnd() {
}
void MachOBindEntry::moveNext() {
+ ErrorAsOutParameter ErrAsOutParam(E);
// If in the middle of some loop, move to next binding in loop.
SegmentOffset += AdvanceAmount;
if (RemainingLoopCount) {
--RemainingLoopCount;
return;
}
+ // BIND_OPCODE_DONE is only used for padding if we are not aligned to
+ // pointer size. Therefore it is possible to reach the end without ever having
+ // seen BIND_OPCODE_DONE.
if (Ptr == Opcodes.end()) {
Done = true;
return;
}
bool More = true;
- while (More && !Malformed) {
+ while (More) {
// Parse next opcode and set up next loop.
+ const uint8_t *OpcodeStart = Ptr;
uint8_t Byte = *Ptr++;
uint8_t ImmValue = Byte & MachO::BIND_IMMEDIATE_MASK;
uint8_t Opcode = Byte & MachO::BIND_OPCODE_MASK;
int8_t SignExtended;
const uint8_t *SymStart;
+ uint32_t Count, Skip;
+ const char *error = nullptr;
switch (Opcode) {
case MachO::BIND_OPCODE_DONE:
if (TableKind == Kind::Lazy) {
@@ -2899,28 +3188,81 @@ void MachOBindEntry::moveNext() {
break;
}
More = false;
- Done = true;
moveToEnd();
DEBUG_WITH_TYPE("mach-o-bind", llvm::dbgs() << "BIND_OPCODE_DONE\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_IMM:
+ if (TableKind == Kind::Weak) {
+ *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_IMM not allowed in "
+ "weak bind table for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
Ordinal = ImmValue;
+ LibraryOrdinalSet = true;
+ if (ImmValue > O->getLibraryCount()) {
+ *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad "
+ "library ordinal: " + Twine((int)ImmValue) + " (max " +
+ Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_IMM: "
<< "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB:
- Ordinal = readULEB128();
+ if (TableKind == Kind::Weak) {
+ *E = malformedError("BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB not allowed in "
+ "weak bind table for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ Ordinal = readULEB128(&error);
+ LibraryOrdinalSet = true;
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (Ordinal > (int)O->getLibraryCount()) {
+ *E = malformedError("for BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB bad "
+ "library ordinal: " + Twine((int)Ordinal) + " (max " +
+ Twine((int)O->getLibraryCount()) + ") for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB: "
<< "Ordinal=" << Ordinal << "\n");
break;
case MachO::BIND_OPCODE_SET_DYLIB_SPECIAL_IMM:
+ if (TableKind == Kind::Weak) {
+ *E = malformedError("BIND_OPCODE_SET_DYLIB_SPECIAL_IMM not allowed in "
+ "weak bind table for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
if (ImmValue) {
SignExtended = MachO::BIND_OPCODE_MASK | ImmValue;
Ordinal = SignExtended;
+ LibraryOrdinalSet = true;
+ if (Ordinal < MachO::BIND_SPECIAL_DYLIB_FLAT_LOOKUP) {
+ *E = malformedError("for BIND_OPCODE_SET_DYLIB_SPECIAL_IMM unknown "
+ "special ordinal: " + Twine((int)Ordinal) + " for opcode at: "
+ "0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
} else
Ordinal = 0;
DEBUG_WITH_TYPE(
@@ -2931,9 +3273,16 @@ void MachOBindEntry::moveNext() {
case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
Flags = ImmValue;
SymStart = Ptr;
- while (*Ptr) {
+ while (*Ptr && (Ptr < Opcodes.end())) {
++Ptr;
}
+ if (Ptr == Opcodes.end()) {
+ *E = malformedError("for BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM "
+ "symbol name extends past opcodes for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
SymbolName = StringRef(reinterpret_cast<const char*>(SymStart),
Ptr-SymStart);
++Ptr;
@@ -2948,15 +3297,27 @@ void MachOBindEntry::moveNext() {
break;
case MachO::BIND_OPCODE_SET_TYPE_IMM:
BindType = ImmValue;
+ if (ImmValue > MachO::BIND_TYPE_TEXT_PCREL32) {
+ *E = malformedError("for BIND_OPCODE_SET_TYPE_IMM bad bind type: " +
+ Twine((int)ImmValue) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_SET_TYPE_IMM: "
<< "BindType=" << (int)BindType << "\n");
break;
case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
- Addend = readSLEB128();
- if (TableKind == Kind::Lazy)
- Malformed = true;
+ Addend = readSLEB128(&error);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_SET_ADDEND_SLEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_SET_ADDEND_SLEB: "
@@ -2964,7 +3325,22 @@ void MachOBindEntry::moveNext() {
break;
case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
SegmentIndex = ImmValue;
- SegmentOffset = readULEB128();
+ SegmentOffset = readULEB128(&error);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: "
@@ -2973,7 +3349,22 @@ void MachOBindEntry::moveNext() {
<< "\n");
break;
case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
- SegmentOffset += readULEB128();
+ SegmentOffset += readULEB128(&error);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE("mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_ADD_ADDR_ULEB: "
<< format("SegmentOffset=0x%06X",
@@ -2982,16 +3373,83 @@ void MachOBindEntry::moveNext() {
case MachO::BIND_OPCODE_DO_BIND:
AdvanceAmount = PointerSize;
RemainingLoopCount = 0;
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND " + Twine(error) +
+ " for opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (SymbolName == StringRef()) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding "
+ "BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND missing preceding "
+ "BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE("mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_DO_BIND: "
<< format("SegmentOffset=0x%06X",
SegmentOffset) << "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
- AdvanceAmount = readULEB128() + PointerSize;
+ if (TableKind == Kind::Lazy) {
+ *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB not allowed in "
+ "lazy bind table for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (SymbolName == StringRef()) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing "
+ "preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for opcode "
+ "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB missing "
+ "preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ AdvanceAmount = readULEB128(&error) + PointerSize;
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ // Note, this is not really an error until the next bind but make no sense
+ // for a BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB to not be followed by another
+ // bind operation.
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset +
+ AdvanceAmount, false);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_ADD_ADDR_ULEB (after adding "
+ "ULEB) " + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
RemainingLoopCount = 0;
- if (TableKind == Kind::Lazy)
- Malformed = true;
DEBUG_WITH_TYPE(
"mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB: "
@@ -3001,10 +3459,47 @@ void MachOBindEntry::moveNext() {
<< "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
+ if (TableKind == Kind::Lazy) {
+ *E = malformedError("BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED not "
+ "allowed in lazy bind table for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (SymbolName == StringRef()) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
+ "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for "
+ "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
+ "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode "
+ "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
AdvanceAmount = ImmValue * PointerSize + PointerSize;
RemainingLoopCount = 0;
- if (TableKind == Kind::Lazy)
- Malformed = true;
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset +
+ AdvanceAmount, false);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED "
+ " (after adding immediate times the pointer size) " +
+ Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE("mach-o-bind",
llvm::dbgs()
<< "BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED: "
@@ -3012,10 +3507,65 @@ void MachOBindEntry::moveNext() {
SegmentOffset) << "\n");
return;
case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
- RemainingLoopCount = readULEB128() - 1;
- AdvanceAmount = readULEB128() + PointerSize;
- if (TableKind == Kind::Lazy)
- Malformed = true;
+ if (TableKind == Kind::Lazy) {
+ *E = malformedError("BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB not "
+ "allowed in lazy bind table for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ Count = readULEB128(&error);
+ if (Count != 0)
+ RemainingLoopCount = Count - 1;
+ else
+ RemainingLoopCount = 0;
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
+ " (count value) " + Twine(error) + " for opcode at"
+ ": 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ Skip = readULEB128(&error);
+ AdvanceAmount = Skip + PointerSize;
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
+ " (skip value) " + Twine(error) + " for opcode at"
+ ": 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->BindEntryCheckSegAndOffset(SegmentIndex, SegmentOffset, true);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
+ + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (SymbolName == StringRef()) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
+ "missing preceding BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM for "
+ "opcode at: 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ if (!LibraryOrdinalSet && TableKind != Kind::Weak) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
+ "missing preceding BIND_OPCODE_SET_DYLIB_ORDINAL_* for opcode "
+ "at: 0x" + utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
+ error = O->BindEntryCheckCountAndSkip(Count, Skip, PointerSize,
+ SegmentIndex, SegmentOffset);
+ if (error) {
+ *E = malformedError("for BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB "
+ + Twine(error) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
+ }
DEBUG_WITH_TYPE(
"mach-o-bind",
llvm::dbgs() << "BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB: "
@@ -3025,34 +3575,34 @@ void MachOBindEntry::moveNext() {
<< "\n");
return;
default:
- Malformed = true;
+ *E = malformedError("bad bind info (bad opcode value 0x" +
+ utohexstr(Opcode) + " for opcode at: 0x" +
+ utohexstr(OpcodeStart - Opcodes.begin()));
+ moveToEnd();
+ return;
}
}
}
-uint64_t MachOBindEntry::readULEB128() {
+uint64_t MachOBindEntry::readULEB128(const char **error) {
unsigned Count;
- uint64_t Result = decodeULEB128(Ptr, &Count);
+ uint64_t Result = decodeULEB128(Ptr, &Count, Opcodes.end(), error);
Ptr += Count;
- if (Ptr > Opcodes.end()) {
+ if (Ptr > Opcodes.end())
Ptr = Opcodes.end();
- Malformed = true;
- }
return Result;
}
-int64_t MachOBindEntry::readSLEB128() {
+int64_t MachOBindEntry::readSLEB128(const char **error) {
unsigned Count;
- int64_t Result = decodeSLEB128(Ptr, &Count);
+ int64_t Result = decodeSLEB128(Ptr, &Count, Opcodes.end(), error);
Ptr += Count;
- if (Ptr > Opcodes.end()) {
+ if (Ptr > Opcodes.end())
Ptr = Opcodes.end();
- Malformed = true;
- }
return Result;
}
-uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; }
+int32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; }
uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; }
@@ -3076,6 +3626,24 @@ uint32_t MachOBindEntry::flags() const { return Flags; }
int MachOBindEntry::ordinal() const { return Ordinal; }
+// For use with the SegIndex of a checked Mach-O Bind entry
+// to get the segment name.
+StringRef MachOBindEntry::segmentName() const {
+ return O->BindRebaseSegmentName(SegmentIndex);
+}
+
+// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry
+// to get the section name.
+StringRef MachOBindEntry::sectionName() const {
+ return O->BindRebaseSectionName(SegmentIndex, SegmentOffset);
+}
+
+// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind entry
+// to get the address.
+uint64_t MachOBindEntry::address() const {
+ return O->BindRebaseAddress(SegmentIndex, SegmentOffset);
+}
+
bool MachOBindEntry::operator==(const MachOBindEntry &Other) const {
#ifdef EXPENSIVE_CHECKS
assert(Opcodes == Other.Opcodes && "compare iterators of different files");
@@ -3087,30 +3655,149 @@ bool MachOBindEntry::operator==(const MachOBindEntry &Other) const {
(Done == Other.Done);
}
+// Build table of sections so SegIndex/SegOffset pairs can be translated.
+BindRebaseSegInfo::BindRebaseSegInfo(const object::MachOObjectFile *Obj) {
+ uint32_t CurSegIndex = Obj->hasPageZeroSegment() ? 1 : 0;
+ StringRef CurSegName;
+ uint64_t CurSegAddress;
+ for (const SectionRef &Section : Obj->sections()) {
+ SectionInfo Info;
+ Section.getName(Info.SectionName);
+ Info.Address = Section.getAddress();
+ Info.Size = Section.getSize();
+ Info.SegmentName =
+ Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl());
+ if (!Info.SegmentName.equals(CurSegName)) {
+ ++CurSegIndex;
+ CurSegName = Info.SegmentName;
+ CurSegAddress = Info.Address;
+ }
+ Info.SegmentIndex = CurSegIndex - 1;
+ Info.OffsetInSegment = Info.Address - CurSegAddress;
+ Info.SegmentStartAddress = CurSegAddress;
+ Sections.push_back(Info);
+ }
+ MaxSegIndex = CurSegIndex;
+}
+
+// For use with a SegIndex,SegOffset pair in MachOBindEntry::moveNext() to
+// validate a MachOBindEntry or MachORebaseEntry.
+const char * BindRebaseSegInfo::checkSegAndOffset(int32_t SegIndex,
+ uint64_t SegOffset,
+ bool endInvalid) {
+ if (SegIndex == -1)
+ return "missing preceding *_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB";
+ if (SegIndex >= MaxSegIndex)
+ return "bad segIndex (too large)";
+ for (const SectionInfo &SI : Sections) {
+ if (SI.SegmentIndex != SegIndex)
+ continue;
+ if (SI.OffsetInSegment > SegOffset)
+ continue;
+ if (SegOffset > (SI.OffsetInSegment + SI.Size))
+ continue;
+ if (endInvalid && SegOffset >= (SI.OffsetInSegment + SI.Size))
+ continue;
+ return nullptr;
+ }
+ return "bad segOffset, too large";
+}
+
+// For use in MachOBindEntry::moveNext() to validate a MachOBindEntry for
+// the BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB opcode and for use in
+// MachORebaseEntry::moveNext() to validate a MachORebaseEntry for
+// REBASE_OPCODE_DO_*_TIMES* opcodes. The SegIndex and SegOffset must have
+// been already checked.
+const char * BindRebaseSegInfo::checkCountAndSkip(uint32_t Count, uint32_t Skip,
+ uint8_t PointerSize,
+ int32_t SegIndex,
+ uint64_t SegOffset) {
+ const SectionInfo &SI = findSection(SegIndex, SegOffset);
+ uint64_t addr = SI.SegmentStartAddress + SegOffset;
+ if (addr >= SI.Address + SI.Size)
+ return "bad segOffset, too large";
+ uint64_t i = 0;
+ if (Count > 1)
+ i = (Skip + PointerSize) * (Count - 1);
+ else if (Count == 1)
+ i = Skip + PointerSize;
+ if (addr + i >= SI.Address + SI.Size) {
+ // For rebase opcodes they can step from one section to another.
+ uint64_t TrailingSegOffset = (addr + i) - SI.SegmentStartAddress;
+ const char *error = checkSegAndOffset(SegIndex, TrailingSegOffset, false);
+ if (error)
+ return "bad count and skip, too large";
+ }
+ return nullptr;
+}
+
+// For use with the SegIndex of a checked Mach-O Bind or Rebase entry
+// to get the segment name.
+StringRef BindRebaseSegInfo::segmentName(int32_t SegIndex) {
+ for (const SectionInfo &SI : Sections) {
+ if (SI.SegmentIndex == SegIndex)
+ return SI.SegmentName;
+ }
+ llvm_unreachable("invalid SegIndex");
+}
+
+// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase
+// to get the SectionInfo.
+const BindRebaseSegInfo::SectionInfo &BindRebaseSegInfo::findSection(
+ int32_t SegIndex, uint64_t SegOffset) {
+ for (const SectionInfo &SI : Sections) {
+ if (SI.SegmentIndex != SegIndex)
+ continue;
+ if (SI.OffsetInSegment > SegOffset)
+ continue;
+ if (SegOffset >= (SI.OffsetInSegment + SI.Size))
+ continue;
+ return SI;
+ }
+ llvm_unreachable("SegIndex and SegOffset not in any section");
+}
+
+// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase
+// entry to get the section name.
+StringRef BindRebaseSegInfo::sectionName(int32_t SegIndex,
+ uint64_t SegOffset) {
+ return findSection(SegIndex, SegOffset).SectionName;
+}
+
+// For use with a SegIndex,SegOffset pair from a checked Mach-O Bind or Rebase
+// entry to get the address.
+uint64_t BindRebaseSegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) {
+ const SectionInfo &SI = findSection(SegIndex, OffsetInSeg);
+ return SI.SegmentStartAddress + OffsetInSeg;
+}
+
iterator_range<bind_iterator>
-MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64,
+MachOObjectFile::bindTable(Error &Err, MachOObjectFile *O,
+ ArrayRef<uint8_t> Opcodes, bool is64,
MachOBindEntry::Kind BKind) {
- MachOBindEntry Start(Opcodes, is64, BKind);
+ if (O->BindRebaseSectionTable == nullptr)
+ O->BindRebaseSectionTable = llvm::make_unique<BindRebaseSegInfo>(O);
+ MachOBindEntry Start(&Err, O, Opcodes, is64, BKind);
Start.moveToFirst();
- MachOBindEntry Finish(Opcodes, is64, BKind);
+ MachOBindEntry Finish(&Err, O, Opcodes, is64, BKind);
Finish.moveToEnd();
return make_range(bind_iterator(Start), bind_iterator(Finish));
}
-iterator_range<bind_iterator> MachOObjectFile::bindTable() const {
- return bindTable(getDyldInfoBindOpcodes(), is64Bit(),
+iterator_range<bind_iterator> MachOObjectFile::bindTable(Error &Err) {
+ return bindTable(Err, this, getDyldInfoBindOpcodes(), is64Bit(),
MachOBindEntry::Kind::Regular);
}
-iterator_range<bind_iterator> MachOObjectFile::lazyBindTable() const {
- return bindTable(getDyldInfoLazyBindOpcodes(), is64Bit(),
+iterator_range<bind_iterator> MachOObjectFile::lazyBindTable(Error &Err) {
+ return bindTable(Err, this, getDyldInfoLazyBindOpcodes(), is64Bit(),
MachOBindEntry::Kind::Lazy);
}
-iterator_range<bind_iterator> MachOObjectFile::weakBindTable() const {
- return bindTable(getDyldInfoWeakBindOpcodes(), is64Bit(),
+iterator_range<bind_iterator> MachOObjectFile::weakBindTable(Error &Err) {
+ return bindTable(Err, this, getDyldInfoWeakBindOpcodes(), is64Bit(),
MachOBindEntry::Kind::Weak);
}
@@ -3289,6 +3976,21 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::version_min_command>(*this, L.Ptr);
}
+MachO::note_command
+MachOObjectFile::getNoteLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::note_command>(*this, L.Ptr);
+}
+
+MachO::build_version_command
+MachOObjectFile::getBuildVersionLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::build_version_command>(*this, L.Ptr);
+}
+
+MachO::build_tool_version
+MachOObjectFile::getBuildToolVersion(unsigned index) const {
+ return getStruct<MachO::build_tool_version>(*this, BuildTools[index]);
+}
+
MachO::dylib_command
MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::dylib_command>(*this, L.Ptr);
diff --git a/lib/Object/ModuleSummaryIndexObjectFile.cpp b/lib/Object/ModuleSummaryIndexObjectFile.cpp
index 11ace84b9ceb..de1ddab88fd4 100644
--- a/lib/Object/ModuleSummaryIndexObjectFile.cpp
+++ b/lib/Object/ModuleSummaryIndexObjectFile.cpp
@@ -96,13 +96,18 @@ ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) {
// Parse the module summary index out of an IR file and return the summary
// index object if found, or nullptr if not.
Expected<std::unique_ptr<ModuleSummaryIndex>>
-llvm::getModuleSummaryIndexForFile(StringRef Path) {
+llvm::getModuleSummaryIndexForFile(StringRef Path, StringRef Identifier) {
ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
MemoryBuffer::getFileOrSTDIN(Path);
std::error_code EC = FileOrErr.getError();
if (EC)
return errorCodeToError(EC);
- MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef();
+ std::unique_ptr<MemoryBuffer> MemBuffer = std::move(FileOrErr.get());
+ // If Identifier is non-empty, use it as the buffer identifier, which
+ // will become the module path in the index.
+ if (Identifier.empty())
+ Identifier = MemBuffer->getBufferIdentifier();
+ MemoryBufferRef BufferRef(MemBuffer->getBuffer(), Identifier);
if (IgnoreEmptyThinLTOIndexFile && !BufferRef.getBufferSize())
return nullptr;
Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
diff --git a/lib/Object/ModuleSymbolTable.cpp b/lib/Object/ModuleSymbolTable.cpp
index 90488007ff59..9a935d8e0869 100644
--- a/lib/Object/ModuleSymbolTable.cpp
+++ b/lib/Object/ModuleSymbolTable.cpp
@@ -43,27 +43,98 @@ void ModuleSymbolTable::addModule(Module *M) {
else
FirstMod = M;
- for (Function &F : *M)
- SymTab.push_back(&F);
- for (GlobalVariable &GV : M->globals())
+ for (GlobalValue &GV : M->global_values())
SymTab.push_back(&GV);
- for (GlobalAlias &GA : M->aliases())
- SymTab.push_back(&GA);
-
- CollectAsmSymbols(Triple(M->getTargetTriple()), M->getModuleInlineAsm(),
- [this](StringRef Name, BasicSymbolRef::Flags Flags) {
- SymTab.push_back(new (AsmSymbols.Allocate())
- AsmSymbol(Name, Flags));
- });
+
+ CollectAsmSymbols(*M, [this](StringRef Name, BasicSymbolRef::Flags Flags) {
+ SymTab.push_back(new (AsmSymbols.Allocate()) AsmSymbol(Name, Flags));
+ });
+}
+
+// Ensure ELF .symver aliases get the same binding as the defined symbol
+// they alias with.
+static void handleSymverAliases(const Module &M, RecordStreamer &Streamer) {
+ if (Streamer.symverAliases().empty())
+ return;
+
+ // The name in the assembler will be mangled, but the name in the IR
+ // might not, so we first compute a mapping from mangled name to GV.
+ Mangler Mang;
+ SmallString<64> MangledName;
+ StringMap<const GlobalValue *> MangledNameMap;
+ auto GetMangledName = [&](const GlobalValue &GV) {
+ if (!GV.hasName())
+ return;
+
+ MangledName.clear();
+ MangledName.reserve(GV.getName().size() + 1);
+ Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false);
+ MangledNameMap[MangledName] = &GV;
+ };
+ for (const Function &F : M)
+ GetMangledName(F);
+ for (const GlobalVariable &GV : M.globals())
+ GetMangledName(GV);
+ for (const GlobalAlias &GA : M.aliases())
+ GetMangledName(GA);
+
+ // Walk all the recorded .symver aliases, and set up the binding
+ // for each alias.
+ for (auto &Symver : Streamer.symverAliases()) {
+ const MCSymbol *Aliasee = Symver.first;
+ MCSymbolAttr Attr = MCSA_Invalid;
+
+ // First check if the aliasee binding was recorded in the asm.
+ RecordStreamer::State state = Streamer.getSymbolState(Aliasee);
+ switch (state) {
+ case RecordStreamer::Global:
+ case RecordStreamer::DefinedGlobal:
+ Attr = MCSA_Global;
+ break;
+ case RecordStreamer::UndefinedWeak:
+ case RecordStreamer::DefinedWeak:
+ Attr = MCSA_Weak;
+ break;
+ default:
+ break;
+ }
+
+ // If we don't have a symbol attribute from assembly, then check if
+ // the aliasee was defined in the IR.
+ if (Attr == MCSA_Invalid) {
+ const auto *GV = M.getNamedValue(Aliasee->getName());
+ if (!GV) {
+ auto MI = MangledNameMap.find(Aliasee->getName());
+ if (MI != MangledNameMap.end())
+ GV = MI->second;
+ else
+ continue;
+ }
+ if (GV->hasExternalLinkage())
+ Attr = MCSA_Global;
+ else if (GV->hasLocalLinkage())
+ Attr = MCSA_Local;
+ else if (GV->isWeakForLinker())
+ Attr = MCSA_Weak;
+ }
+ if (Attr == MCSA_Invalid)
+ continue;
+
+ // Set the detected binding on each alias with this aliasee.
+ for (auto &Alias : Symver.second)
+ Streamer.EmitSymbolAttribute(Alias, Attr);
+ }
}
void ModuleSymbolTable::CollectAsmSymbols(
- const Triple &TT, StringRef InlineAsm,
+ const Module &M,
function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) {
+ StringRef InlineAsm = M.getModuleInlineAsm();
if (InlineAsm.empty())
return;
std::string Err;
+ const Triple TT(M.getTargetTriple());
const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
assert(T && T->hasMCAsmParser());
@@ -106,6 +177,8 @@ void ModuleSymbolTable::CollectAsmSymbols(
if (Parser->Run(false))
return;
+ handleSymverAliases(M, Streamer);
+
for (auto &KV : Streamer) {
StringRef Key = KV.first();
RecordStreamer::State Value = KV.second;
diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp
index 572b960bc85f..c9c27451f809 100644
--- a/lib/Object/RecordStreamer.cpp
+++ b/lib/Object/RecordStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- RecordStreamer.cpp - Record asm definde and used symbols ----------===//
+//===-- RecordStreamer.cpp - Record asm defined and used symbols ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -78,11 +78,11 @@ RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
void RecordStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
MCStreamer::EmitInstruction(Inst, STI);
}
-void RecordStreamer::EmitLabel(MCSymbol *Symbol) {
+void RecordStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
MCStreamer::EmitLabel(Symbol);
markDefined(*Symbol);
}
@@ -110,3 +110,8 @@ void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
markDefined(*Symbol);
}
+
+void RecordStreamer::emitELFSymverDirective(MCSymbol *Alias,
+ const MCSymbol *Aliasee) {
+ SymverAliasMap[Aliasee].push_back(Alias);
+}
diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h
index 617d8a43fbd2..a845ecd786a8 100644
--- a/lib/Object/RecordStreamer.h
+++ b/lib/Object/RecordStreamer.h
@@ -20,6 +20,10 @@ public:
private:
StringMap<State> Symbols;
+ // Map of aliases created by .symver directives, saved so we can update
+ // their symbol binding after parsing complete. This maps from each
+ // aliasee to its list of aliases.
+ DenseMap<const MCSymbol *, std::vector<MCSymbol *>> SymverAliasMap;
void markDefined(const MCSymbol &Symbol);
void markGlobal(const MCSymbol &Symbol, MCSymbolAttr Attribute);
void markUsed(const MCSymbol &Symbol);
@@ -30,14 +34,29 @@ public:
const_iterator begin();
const_iterator end();
RecordStreamer(MCContext &Context);
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
- void EmitLabel(MCSymbol *Symbol) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override;
+ void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
void EmitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
+ /// Record .symver aliases for later processing.
+ void emitELFSymverDirective(MCSymbol *Alias,
+ const MCSymbol *Aliasee) override;
+ /// Return the map of .symver aliasee to associated aliases.
+ DenseMap<const MCSymbol *, std::vector<MCSymbol *>> &symverAliases() {
+ return SymverAliasMap;
+ }
+ /// Get the state recorded for the given symbol.
+ State getSymbolState(const MCSymbol *Sym) {
+ auto SI = Symbols.find(Sym->getName());
+ if (SI == Symbols.end())
+ return NeverSeen;
+ return SI->second;
+ }
};
}
#endif
diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp
index 2b61a8a034f6..fc1dca35424e 100644
--- a/lib/Object/WasmObjectFile.cpp
+++ b/lib/Object/WasmObjectFile.cpp
@@ -1,4 +1,4 @@
-//===- WasmObjectFile.cpp - Wasm object file implementation -----*- C++ -*-===//
+//===- WasmObjectFile.cpp - Wasm object file implementation ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,26 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
#include "llvm/Object/Wasm.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/Wasm.h"
+#include <algorithm>
+#include <cstdint>
+#include <system_error>
-namespace llvm {
-namespace object {
+using namespace llvm;
+using namespace object;
Expected<std::unique_ptr<WasmObjectFile>>
ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) {
@@ -24,34 +38,139 @@ ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) {
return std::move(ObjectFile);
}
-namespace {
+#define VARINT7_MAX ((1<<7)-1)
+#define VARINT7_MIN (-(1<<7))
+#define VARUINT7_MAX (1<<7)
+#define VARUINT1_MAX (1)
-uint32_t readUint32(const uint8_t *&Ptr) {
+static uint8_t readUint8(const uint8_t *&Ptr) { return *Ptr++; }
+
+static uint32_t readUint32(const uint8_t *&Ptr) {
uint32_t Result = support::endian::read32le(Ptr);
Ptr += sizeof(Result);
return Result;
}
-uint64_t readULEB128(const uint8_t *&Ptr) {
+static int32_t readFloat32(const uint8_t *&Ptr) {
+ int32_t Result = 0;
+ memcpy(&Result, Ptr, sizeof(Result));
+ Ptr += sizeof(Result);
+ return Result;
+}
+
+static int64_t readFloat64(const uint8_t *&Ptr) {
+ int64_t Result = 0;
+ memcpy(&Result, Ptr, sizeof(Result));
+ Ptr += sizeof(Result);
+ return Result;
+}
+
+static uint64_t readULEB128(const uint8_t *&Ptr) {
unsigned Count;
uint64_t Result = decodeULEB128(Ptr, &Count);
Ptr += Count;
return Result;
}
-StringRef readString(const uint8_t *&Ptr) {
+static StringRef readString(const uint8_t *&Ptr) {
uint32_t StringLen = readULEB128(Ptr);
StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen);
Ptr += StringLen;
return Return;
}
-Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr,
- const uint8_t *Start) {
+static int64_t readLEB128(const uint8_t *&Ptr) {
+ unsigned Count;
+ uint64_t Result = decodeSLEB128(Ptr, &Count);
+ Ptr += Count;
+ return Result;
+}
+
+static uint8_t readVaruint1(const uint8_t *&Ptr) {
+ int64_t result = readLEB128(Ptr);
+ assert(result <= VARUINT1_MAX && result >= 0);
+ return result;
+}
+
+static int8_t readVarint7(const uint8_t *&Ptr) {
+ int64_t result = readLEB128(Ptr);
+ assert(result <= VARINT7_MAX && result >= VARINT7_MIN);
+ return result;
+}
+
+static uint8_t readVaruint7(const uint8_t *&Ptr) {
+ uint64_t result = readULEB128(Ptr);
+ assert(result <= VARUINT7_MAX);
+ return result;
+}
+
+static int32_t readVarint32(const uint8_t *&Ptr) {
+ int64_t result = readLEB128(Ptr);
+ assert(result <= INT32_MAX && result >= INT32_MIN);
+ return result;
+}
+
+static uint32_t readVaruint32(const uint8_t *&Ptr) {
+ uint64_t result = readULEB128(Ptr);
+ assert(result <= UINT32_MAX);
+ return result;
+}
+
+static int64_t readVarint64(const uint8_t *&Ptr) {
+ return readLEB128(Ptr);
+}
+
+static uint8_t readOpcode(const uint8_t *&Ptr) {
+ return readUint8(Ptr);
+}
+
+static Error readInitExpr(wasm::WasmInitExpr &Expr, const uint8_t *&Ptr) {
+ Expr.Opcode = readOpcode(Ptr);
+
+ switch (Expr.Opcode) {
+ case wasm::WASM_OPCODE_I32_CONST:
+ Expr.Value.Int32 = readVarint32(Ptr);
+ break;
+ case wasm::WASM_OPCODE_I64_CONST:
+ Expr.Value.Int64 = readVarint64(Ptr);
+ break;
+ case wasm::WASM_OPCODE_F32_CONST:
+ Expr.Value.Float32 = readFloat32(Ptr);
+ break;
+ case wasm::WASM_OPCODE_F64_CONST:
+ Expr.Value.Float64 = readFloat64(Ptr);
+ break;
+ case wasm::WASM_OPCODE_GET_GLOBAL:
+ Expr.Value.Global = readUint32(Ptr);
+ break;
+ default:
+ return make_error<GenericBinaryError>("Invalid opcode in init_expr",
+ object_error::parse_failed);
+ }
+
+ uint8_t EndOpcode = readOpcode(Ptr);
+ if (EndOpcode != wasm::WASM_OPCODE_END) {
+ return make_error<GenericBinaryError>("Invalid init_expr",
+ object_error::parse_failed);
+ }
+ return Error::success();
+}
+
+static wasm::WasmLimits readLimits(const uint8_t *&Ptr) {
+ wasm::WasmLimits Result;
+ Result.Flags = readVaruint1(Ptr);
+ Result.Initial = readVaruint32(Ptr);
+ if (Result.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
+ Result.Maximum = readVaruint32(Ptr);
+ return Result;
+}
+
+static Error readSection(WasmSection &Section, const uint8_t *&Ptr,
+ const uint8_t *Start) {
// TODO(sbc): Avoid reading past EOF in the case of malformed files.
Section.Offset = Ptr - Start;
- Section.Type = readULEB128(Ptr);
- uint32_t Size = readULEB128(Ptr);
+ Section.Type = readVaruint7(Ptr);
+ uint32_t Size = readVaruint32(Ptr);
if (Size == 0)
return make_error<StringError>("Zero length section",
object_error::parse_failed);
@@ -59,10 +178,9 @@ Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr,
Ptr += Size;
return Error::success();
}
-}
WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
- : ObjectFile(Binary::ID_Wasm, Buffer) {
+ : ObjectFile(Binary::ID_Wasm, Buffer), StartFunction(-1) {
ErrorAsOutParameter ErrAsOutParam(&Err);
Header.Magic = getData().substr(0, 4);
if (Header.Magic != StringRef("\0asm", 4)) {
@@ -79,21 +197,388 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
}
const uint8_t *Eof = getPtr(getData().size());
- wasm::WasmSection Sec;
+ WasmSection Sec;
while (Ptr < Eof) {
if ((Err = readSection(Sec, Ptr, getPtr(0))))
return;
- if (Sec.Type == wasm::WASM_SEC_USER) {
- if ((Err = parseUserSection(Sec, Sec.Content.data(), Sec.Content.size())))
- return;
- }
+ if ((Err = parseSection(Sec)))
+ return;
+
Sections.push_back(Sec);
}
}
-Error WasmObjectFile::parseUserSection(wasm::WasmSection &Sec,
- const uint8_t *Ptr, size_t Length) {
+Error WasmObjectFile::parseSection(WasmSection &Sec) {
+ const uint8_t* Start = Sec.Content.data();
+ const uint8_t* End = Start + Sec.Content.size();
+ switch (Sec.Type) {
+ case wasm::WASM_SEC_CUSTOM:
+ return parseCustomSection(Sec, Start, End);
+ case wasm::WASM_SEC_TYPE:
+ return parseTypeSection(Start, End);
+ case wasm::WASM_SEC_IMPORT:
+ return parseImportSection(Start, End);
+ case wasm::WASM_SEC_FUNCTION:
+ return parseFunctionSection(Start, End);
+ case wasm::WASM_SEC_TABLE:
+ return parseTableSection(Start, End);
+ case wasm::WASM_SEC_MEMORY:
+ return parseMemorySection(Start, End);
+ case wasm::WASM_SEC_GLOBAL:
+ return parseGlobalSection(Start, End);
+ case wasm::WASM_SEC_EXPORT:
+ return parseExportSection(Start, End);
+ case wasm::WASM_SEC_START:
+ return parseStartSection(Start, End);
+ case wasm::WASM_SEC_ELEM:
+ return parseElemSection(Start, End);
+ case wasm::WASM_SEC_CODE:
+ return parseCodeSection(Start, End);
+ case wasm::WASM_SEC_DATA:
+ return parseDataSection(Start, End);
+ default:
+ return make_error<GenericBinaryError>("Bad section type",
+ object_error::parse_failed);
+ }
+}
+
+Error WasmObjectFile::parseNameSection(const uint8_t *Ptr, const uint8_t *End) {
+ while (Ptr < End) {
+ uint8_t Type = readVarint7(Ptr);
+ uint32_t Size = readVaruint32(Ptr);
+ switch (Type) {
+ case wasm::WASM_NAMES_FUNCTION: {
+ uint32_t Count = readVaruint32(Ptr);
+ while (Count--) {
+ /*uint32_t Index =*/readVaruint32(Ptr);
+ StringRef Name = readString(Ptr);
+ if (Name.size())
+ Symbols.emplace_back(Name,
+ WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME);
+ }
+ break;
+ }
+ // Ignore local names for now
+ case wasm::WASM_NAMES_LOCAL:
+ default:
+ Ptr += Size;
+ break;
+ }
+ }
+
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Name section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+WasmSection* WasmObjectFile::findCustomSectionByName(StringRef Name) {
+ for (WasmSection& Section : Sections) {
+ if (Section.Type == wasm::WASM_SEC_CUSTOM && Section.Name == Name)
+ return &Section;
+ }
+ return nullptr;
+}
+
+WasmSection* WasmObjectFile::findSectionByType(uint32_t Type) {
+ assert(Type != wasm::WASM_SEC_CUSTOM);
+ for (WasmSection& Section : Sections) {
+ if (Section.Type == Type)
+ return &Section;
+ }
+ return nullptr;
+}
+
+Error WasmObjectFile::parseRelocSection(StringRef Name, const uint8_t *Ptr,
+ const uint8_t *End) {
+ uint8_t SectionCode = readVarint7(Ptr);
+ WasmSection* Section = nullptr;
+ if (SectionCode == wasm::WASM_SEC_CUSTOM) {
+ StringRef Name = readString(Ptr);
+ Section = findCustomSectionByName(Name);
+ } else {
+ Section = findSectionByType(SectionCode);
+ }
+ if (!Section)
+ return make_error<GenericBinaryError>("Invalid section code",
+ object_error::parse_failed);
+ uint32_t RelocCount = readVaruint32(Ptr);
+ while (RelocCount--) {
+ wasm::WasmRelocation Reloc;
+ memset(&Reloc, 0, sizeof(Reloc));
+ Reloc.Type = readVaruint32(Ptr);
+ Reloc.Offset = readVaruint32(Ptr);
+ Reloc.Index = readVaruint32(Ptr);
+ switch (Reloc.Type) {
+ case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB:
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+ case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32:
+ break;
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB:
+ case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32:
+ Reloc.Addend = readVaruint32(Ptr);
+ break;
+ default:
+ return make_error<GenericBinaryError>("Bad relocation type",
+ object_error::parse_failed);
+ }
+ Section->Relocations.push_back(Reloc);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Reloc section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseCustomSection(WasmSection &Sec,
+ const uint8_t *Ptr, const uint8_t *End) {
Sec.Name = readString(Ptr);
+ if (Sec.Name == "name") {
+ if (Error Err = parseNameSection(Ptr, End))
+ return Err;
+ } else if (Sec.Name.startswith("reloc.")) {
+ if (Error Err = parseRelocSection(Sec.Name, Ptr, End))
+ return Err;
+ }
+ return Error::success();
+}
+
+Error WasmObjectFile::parseTypeSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ Signatures.reserve(Count);
+ while (Count--) {
+ wasm::WasmSignature Sig;
+ Sig.ReturnType = wasm::WASM_TYPE_NORESULT;
+ int8_t Form = readVarint7(Ptr);
+ if (Form != wasm::WASM_TYPE_FUNC) {
+ return make_error<GenericBinaryError>("Invalid signature type",
+ object_error::parse_failed);
+ }
+ uint32_t ParamCount = readVaruint32(Ptr);
+ Sig.ParamTypes.reserve(ParamCount);
+ while (ParamCount--) {
+ uint32_t ParamType = readVarint7(Ptr);
+ Sig.ParamTypes.push_back(ParamType);
+ }
+ uint32_t ReturnCount = readVaruint32(Ptr);
+ if (ReturnCount) {
+ if (ReturnCount != 1) {
+ return make_error<GenericBinaryError>(
+ "Multiple return types not supported", object_error::parse_failed);
+ }
+ Sig.ReturnType = readVarint7(Ptr);
+ }
+ Signatures.push_back(Sig);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Type section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseImportSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ Imports.reserve(Count);
+ while (Count--) {
+ wasm::WasmImport Im;
+ Im.Module = readString(Ptr);
+ Im.Field = readString(Ptr);
+ Im.Kind = readUint8(Ptr);
+ switch (Im.Kind) {
+ case wasm::WASM_EXTERNAL_FUNCTION:
+ Im.SigIndex = readVaruint32(Ptr);
+ Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::FUNCTION_IMPORT);
+ break;
+ case wasm::WASM_EXTERNAL_GLOBAL:
+ Im.GlobalType = readVarint7(Ptr);
+ Im.GlobalMutable = readVaruint1(Ptr);
+ Symbols.emplace_back(Im.Field, WasmSymbol::SymbolType::GLOBAL_IMPORT);
+ break;
+ default:
+ // TODO(sbc): Handle other kinds of imports
+ return make_error<GenericBinaryError>(
+ "Unexpected import kind", object_error::parse_failed);
+ }
+ Imports.push_back(Im);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Import section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseFunctionSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ FunctionTypes.reserve(Count);
+ while (Count--) {
+ FunctionTypes.push_back(readVaruint32(Ptr));
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Function section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseTableSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ Tables.reserve(Count);
+ while (Count--) {
+ wasm::WasmTable Table;
+ Table.ElemType = readVarint7(Ptr);
+ if (Table.ElemType != wasm::WASM_TYPE_ANYFUNC) {
+ return make_error<GenericBinaryError>("Invalid table element type",
+ object_error::parse_failed);
+ }
+ Table.Limits = readLimits(Ptr);
+ Tables.push_back(Table);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Table section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseMemorySection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ Memories.reserve(Count);
+ while (Count--) {
+ Memories.push_back(readLimits(Ptr));
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Memory section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseGlobalSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ Globals.reserve(Count);
+ while (Count--) {
+ wasm::WasmGlobal Global;
+ Global.Type = readVarint7(Ptr);
+ Global.Mutable = readVaruint1(Ptr);
+ if (Error Err = readInitExpr(Global.InitExpr, Ptr))
+ return Err;
+ Globals.push_back(Global);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Global section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ Exports.reserve(Count);
+ while (Count--) {
+ wasm::WasmExport Ex;
+ Ex.Name = readString(Ptr);
+ Ex.Kind = readUint8(Ptr);
+ Ex.Index = readVaruint32(Ptr);
+ Exports.push_back(Ex);
+ switch (Ex.Kind) {
+ case wasm::WASM_EXTERNAL_FUNCTION:
+ Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT);
+ break;
+ case wasm::WASM_EXTERNAL_GLOBAL:
+ Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT);
+ break;
+ default:
+ // TODO(sbc): Handle other kinds of exports
+ return make_error<GenericBinaryError>(
+ "Unexpected export kind", object_error::parse_failed);
+ }
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Export section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseStartSection(const uint8_t *Ptr, const uint8_t *End) {
+ StartFunction = readVaruint32(Ptr);
+ if (StartFunction < FunctionTypes.size())
+ return make_error<GenericBinaryError>("Invalid start function",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseCodeSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t FunctionCount = readVaruint32(Ptr);
+ if (FunctionCount != FunctionTypes.size()) {
+ return make_error<GenericBinaryError>("Invalid function count",
+ object_error::parse_failed);
+ }
+
+ CodeSection = ArrayRef<uint8_t>(Ptr, End - Ptr);
+
+ while (FunctionCount--) {
+ wasm::WasmFunction Function;
+ uint32_t FunctionSize = readVaruint32(Ptr);
+ const uint8_t *FunctionEnd = Ptr + FunctionSize;
+
+ uint32_t NumLocalDecls = readVaruint32(Ptr);
+ Function.Locals.reserve(NumLocalDecls);
+ while (NumLocalDecls--) {
+ wasm::WasmLocalDecl Decl;
+ Decl.Count = readVaruint32(Ptr);
+ Decl.Type = readVarint7(Ptr);
+ Function.Locals.push_back(Decl);
+ }
+
+ uint32_t BodySize = FunctionEnd - Ptr;
+ Function.Body = ArrayRef<uint8_t>(Ptr, BodySize);
+ Ptr += BodySize;
+ assert(Ptr == FunctionEnd);
+ Functions.push_back(Function);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Code section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ ElemSegments.reserve(Count);
+ while (Count--) {
+ wasm::WasmElemSegment Segment;
+ Segment.TableIndex = readVaruint32(Ptr);
+ if (Segment.TableIndex != 0) {
+ return make_error<GenericBinaryError>("Invalid TableIndex",
+ object_error::parse_failed);
+ }
+ if (Error Err = readInitExpr(Segment.Offset, Ptr))
+ return Err;
+ uint32_t NumElems = readVaruint32(Ptr);
+ while (NumElems--) {
+ Segment.Functions.push_back(readVaruint32(Ptr));
+ }
+ ElemSegments.push_back(Segment);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Elem section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
+Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) {
+ uint32_t Count = readVaruint32(Ptr);
+ DataSegments.reserve(Count);
+ while (Count--) {
+ wasm::WasmDataSegment Segment;
+ Segment.Index = readVaruint32(Ptr);
+ if (Error Err = readInitExpr(Segment.Offset, Ptr))
+ return Err;
+ uint32_t Size = readVaruint32(Ptr);
+ Segment.Content = ArrayRef<uint8_t>(Ptr, Size);
+ Ptr += Size;
+ DataSegments.push_back(Segment);
+ }
+ if (Ptr != End)
+ return make_error<GenericBinaryError>("Data section ended prematurely",
+ object_error::parse_failed);
return Error::success();
}
@@ -105,37 +590,48 @@ const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const {
return Header;
}
-void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
- llvm_unreachable("not yet implemented");
-}
-
-std::error_code WasmObjectFile::printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const {
- llvm_unreachable("not yet implemented");
- return object_error::invalid_symbol_index;
-}
+void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.d.a++; }
uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
- llvm_unreachable("not yet implemented");
- return 0;
+ const WasmSymbol &Sym = getWasmSymbol(Symb);
+ switch (Sym.Type) {
+ case WasmSymbol::SymbolType::FUNCTION_IMPORT:
+ return object::SymbolRef::SF_Undefined | SymbolRef::SF_Executable;
+ case WasmSymbol::SymbolType::FUNCTION_EXPORT:
+ return object::SymbolRef::SF_Global | SymbolRef::SF_Executable;
+ case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME:
+ return object::SymbolRef::SF_Executable;
+ case WasmSymbol::SymbolType::GLOBAL_IMPORT:
+ return object::SymbolRef::SF_Undefined;
+ case WasmSymbol::SymbolType::GLOBAL_EXPORT:
+ return object::SymbolRef::SF_Global;
+ }
+ llvm_unreachable("Unknown WasmSymbol::SymbolType");
}
basic_symbol_iterator WasmObjectFile::symbol_begin() const {
- return BasicSymbolRef(DataRefImpl(), this);
+ DataRefImpl Ref;
+ Ref.d.a = 0;
+ return BasicSymbolRef(Ref, this);
}
basic_symbol_iterator WasmObjectFile::symbol_end() const {
- return BasicSymbolRef(DataRefImpl(), this);
+ DataRefImpl Ref;
+ Ref.d.a = Symbols.size();
+ return BasicSymbolRef(Ref, this);
+}
+
+const WasmSymbol &WasmObjectFile::getWasmSymbol(DataRefImpl Symb) const {
+ return Symbols[Symb.d.a];
}
Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
- llvm_unreachable("not yet implemented");
- return errorCodeToError(object_error::invalid_symbol_index);
+ const WasmSymbol &Sym = getWasmSymbol(Symb);
+ return Sym.Name;
}
Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
- llvm_unreachable("not yet implemented");
- return errorCodeToError(object_error::invalid_symbol_index);
+ return (uint64_t)Symb.d.a;
}
uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
@@ -169,7 +665,7 @@ void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; }
std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
StringRef &Res) const {
- const wasm::WasmSection &S = Sections[Sec.d.a];
+ const WasmSection &S = Sections[Sec.d.a];
#define ECase(X) \
case wasm::WASM_SEC_##X: \
Res = #X; \
@@ -186,7 +682,7 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
ECase(ELEM);
ECase(CODE);
ECase(DATA);
- case wasm::WASM_SEC_USER:
+ case wasm::WASM_SEC_CUSTOM:
Res = S.Name;
break;
default:
@@ -199,13 +695,13 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const {
- const wasm::WasmSection &S = Sections[Sec.d.a];
+ const WasmSection &S = Sections[Sec.d.a];
return S.Content.size();
}
std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec,
StringRef &Res) const {
- const wasm::WasmSection &S = Sections[Sec.d.a];
+ const WasmSection &S = Sections[Sec.d.a];
// This will never fail since wasm sections can never be empty (user-sections
// must have a name and non-user sections each have a defined structure).
Res = StringRef(reinterpret_cast<const char *>(S.Content.data()),
@@ -222,13 +718,11 @@ bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const {
}
bool WasmObjectFile::isSectionText(DataRefImpl Sec) const {
- const wasm::WasmSection &S = Sections[Sec.d.a];
- return S.Type == wasm::WASM_SEC_CODE;
+ return getWasmSection(Sec).Type == wasm::WASM_SEC_CODE;
}
bool WasmObjectFile::isSectionData(DataRefImpl Sec) const {
- const wasm::WasmSection &S = Sections[Sec.d.a];
- return S.Type == wasm::WASM_SEC_DATA;
+ return getWasmSection(Sec).Type == wasm::WASM_SEC_DATA;
}
bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; }
@@ -237,31 +731,28 @@ bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; }
bool WasmObjectFile::isSectionBitcode(DataRefImpl Sec) const { return false; }
-relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Sec) const {
- llvm_unreachable("not yet implemented");
- RelocationRef Rel;
- return relocation_iterator(Rel);
+relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Ref) const {
+ DataRefImpl RelocRef;
+ RelocRef.d.a = Ref.d.a;
+ RelocRef.d.b = 0;
+ return relocation_iterator(RelocationRef(RelocRef, this));
}
-relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Sec) const {
- llvm_unreachable("not yet implemented");
- RelocationRef Rel;
- return relocation_iterator(Rel);
-}
-
-section_iterator WasmObjectFile::getRelocatedSection(DataRefImpl Sec) const {
- llvm_unreachable("not yet implemented");
- SectionRef Ref;
- return section_iterator(Ref);
+relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Ref) const {
+ const WasmSection &Sec = getWasmSection(Ref);
+ DataRefImpl RelocRef;
+ RelocRef.d.a = Ref.d.a;
+ RelocRef.d.b = Sec.Relocations.size();
+ return relocation_iterator(RelocationRef(RelocRef, this));
}
void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
- llvm_unreachable("not yet implemented");
+ Rel.d.b++;
}
-uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Rel) const {
- llvm_unreachable("not yet implemented");
- return 0;
+uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Ref) const {
+ const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
+ return Rel.Offset;
}
symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
@@ -270,14 +761,28 @@ symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
return symbol_iterator(Ref);
}
-uint64_t WasmObjectFile::getRelocationType(DataRefImpl Rel) const {
- llvm_unreachable("not yet implemented");
- return 0;
+uint64_t WasmObjectFile::getRelocationType(DataRefImpl Ref) const {
+ const wasm::WasmRelocation &Rel = getWasmRelocation(Ref);
+ return Rel.Type;
}
void WasmObjectFile::getRelocationTypeName(
- DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
- llvm_unreachable("not yet implemented");
+ DataRefImpl Ref, SmallVectorImpl<char> &Result) const {
+ const wasm::WasmRelocation& Rel = getWasmRelocation(Ref);
+ StringRef Res = "Unknown";
+
+#define WASM_RELOC(name, value) \
+ case wasm::name: \
+ Res = #name; \
+ break;
+
+ switch (Rel.Type) {
+#include "llvm/Support/WasmRelocs/WebAssembly.def"
+ }
+
+#undef WASM_RELOC
+
+ Result.append(Res.begin(), Res.end());
}
section_iterator WasmObjectFile::section_begin() const {
@@ -304,10 +809,25 @@ SubtargetFeatures WasmObjectFile::getFeatures() const {
bool WasmObjectFile::isRelocatableObject() const { return false; }
-const wasm::WasmSection *
+const WasmSection &WasmObjectFile::getWasmSection(DataRefImpl Ref) const {
+ assert(Ref.d.a < Sections.size());
+ return Sections[Ref.d.a];
+}
+
+const WasmSection &
WasmObjectFile::getWasmSection(const SectionRef &Section) const {
- return &Sections[Section.getRawDataRefImpl().d.a];
+ return getWasmSection(Section.getRawDataRefImpl());
}
-} // end namespace object
-} // end namespace llvm
+const wasm::WasmRelocation &
+WasmObjectFile::getWasmRelocation(const RelocationRef &Ref) const {
+ return getWasmRelocation(Ref.getRawDataRefImpl());
+}
+
+const wasm::WasmRelocation &
+WasmObjectFile::getWasmRelocation(DataRefImpl Ref) const {
+ assert(Ref.d.a < Sections.size());
+ const WasmSection& Sec = Sections[Ref.d.a];
+ assert(Ref.d.b < Sec.Relocations.size());
+ return Sec.Relocations[Ref.d.b];
+}
diff --git a/lib/ObjectYAML/CMakeLists.txt b/lib/ObjectYAML/CMakeLists.txt
index 2eee95b318db..37f8fd7bce1a 100644
--- a/lib/ObjectYAML/CMakeLists.txt
+++ b/lib/ObjectYAML/CMakeLists.txt
@@ -1,8 +1,11 @@
add_llvm_library(LLVMObjectYAML
- YAML.cpp
COFFYAML.cpp
+ DWARFEmitter.cpp
+ DWARFVisitor.cpp
+ DWARFYAML.cpp
ELFYAML.cpp
MachOYAML.cpp
ObjectYAML.cpp
- DWARFYAML.cpp
+ WasmYAML.cpp
+ YAML.cpp
)
diff --git a/lib/ObjectYAML/DWARFEmitter.cpp b/lib/ObjectYAML/DWARFEmitter.cpp
new file mode 100644
index 000000000000..1aa1519b708b
--- /dev/null
+++ b/lib/ObjectYAML/DWARFEmitter.cpp
@@ -0,0 +1,321 @@
+//===- DWARFEmitter - Convert YAML to DWARF binary data -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief The DWARF component of yaml2obj. Provided as library code for tests.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/DWARFEmitter.h"
+#include "llvm/ObjectYAML/DWARFYAML.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SwapByteOrder.h"
+
+#include "DWARFVisitor.h"
+
+#include <algorithm>
+
+using namespace llvm;
+
+template <typename T>
+static void writeInteger(T Integer, raw_ostream &OS, bool IsLittleEndian) {
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ sys::swapByteOrder(Integer);
+ OS.write(reinterpret_cast<char *>(&Integer), sizeof(T));
+}
+
+static void writeVariableSizedInteger(uint64_t Integer, size_t Size,
+ raw_ostream &OS, bool IsLittleEndian) {
+ if (8 == Size)
+ writeInteger((uint64_t)Integer, OS, IsLittleEndian);
+ else if (4 == Size)
+ writeInteger((uint32_t)Integer, OS, IsLittleEndian);
+ else if (2 == Size)
+ writeInteger((uint16_t)Integer, OS, IsLittleEndian);
+ else if (1 == Size)
+ writeInteger((uint8_t)Integer, OS, IsLittleEndian);
+ else
+ assert(false && "Invalid integer write size.");
+}
+
+static void ZeroFillBytes(raw_ostream &OS, size_t Size) {
+ std::vector<uint8_t> FillData;
+ FillData.insert(FillData.begin(), Size, 0);
+ OS.write(reinterpret_cast<char *>(FillData.data()), Size);
+}
+
+void writeInitialLength(const DWARFYAML::InitialLength &Length, raw_ostream &OS,
+ bool IsLittleEndian) {
+ writeInteger((uint32_t)Length.TotalLength, OS, IsLittleEndian);
+ if (Length.isDWARF64())
+ writeInteger((uint64_t)Length.TotalLength64, OS, IsLittleEndian);
+}
+
+void DWARFYAML::EmitDebugStr(raw_ostream &OS, const DWARFYAML::Data &DI) {
+ for (auto Str : DI.DebugStrings) {
+ OS.write(Str.data(), Str.size());
+ OS.write('\0');
+ }
+}
+
+void DWARFYAML::EmitDebugAbbrev(raw_ostream &OS, const DWARFYAML::Data &DI) {
+ for (auto AbbrevDecl : DI.AbbrevDecls) {
+ encodeULEB128(AbbrevDecl.Code, OS);
+ encodeULEB128(AbbrevDecl.Tag, OS);
+ OS.write(AbbrevDecl.Children);
+ for (auto Attr : AbbrevDecl.Attributes) {
+ encodeULEB128(Attr.Attribute, OS);
+ encodeULEB128(Attr.Form, OS);
+ if (Attr.Form == dwarf::DW_FORM_implicit_const)
+ encodeSLEB128(Attr.Value, OS);
+ }
+ encodeULEB128(0, OS);
+ encodeULEB128(0, OS);
+ }
+}
+
+void DWARFYAML::EmitDebugAranges(raw_ostream &OS, const DWARFYAML::Data &DI) {
+ for (auto Range : DI.ARanges) {
+ auto HeaderStart = OS.tell();
+ writeInitialLength(Range.Length, OS, DI.IsLittleEndian);
+ writeInteger((uint16_t)Range.Version, OS, DI.IsLittleEndian);
+ writeInteger((uint32_t)Range.CuOffset, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)Range.AddrSize, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)Range.SegSize, OS, DI.IsLittleEndian);
+
+ auto HeaderSize = OS.tell() - HeaderStart;
+ auto FirstDescriptor = alignTo(HeaderSize, Range.AddrSize * 2);
+ ZeroFillBytes(OS, FirstDescriptor - HeaderSize);
+
+ for (auto Descriptor : Range.Descriptors) {
+ writeVariableSizedInteger(Descriptor.Address, Range.AddrSize, OS,
+ DI.IsLittleEndian);
+ writeVariableSizedInteger(Descriptor.Length, Range.AddrSize, OS,
+ DI.IsLittleEndian);
+ }
+ ZeroFillBytes(OS, Range.AddrSize * 2);
+ }
+}
+
+void DWARFYAML::EmitPubSection(raw_ostream &OS,
+ const DWARFYAML::PubSection &Sect,
+ bool IsLittleEndian) {
+ writeInitialLength(Sect.Length, OS, IsLittleEndian);
+ writeInteger((uint16_t)Sect.Version, OS, IsLittleEndian);
+ writeInteger((uint32_t)Sect.UnitOffset, OS, IsLittleEndian);
+ writeInteger((uint32_t)Sect.UnitSize, OS, IsLittleEndian);
+ for (auto Entry : Sect.Entries) {
+ writeInteger((uint32_t)Entry.DieOffset, OS, IsLittleEndian);
+ if (Sect.IsGNUStyle)
+ writeInteger((uint32_t)Entry.Descriptor, OS, IsLittleEndian);
+ OS.write(Entry.Name.data(), Entry.Name.size());
+ OS.write('\0');
+ }
+}
+
+/// \brief An extension of the DWARFYAML::ConstVisitor which writes compile
+/// units and DIEs to a stream.
+class DumpVisitor : public DWARFYAML::ConstVisitor {
+ raw_ostream &OS;
+
+protected:
+ virtual void onStartCompileUnit(const DWARFYAML::Unit &CU) {
+ writeInitialLength(CU.Length, OS, DebugInfo.IsLittleEndian);
+ writeInteger((uint16_t)CU.Version, OS, DebugInfo.IsLittleEndian);
+ if(CU.Version >= 5) {
+ writeInteger((uint8_t)CU.Type, OS, DebugInfo.IsLittleEndian);
+ writeInteger((uint8_t)CU.AddrSize, OS, DebugInfo.IsLittleEndian);
+ writeInteger((uint32_t)CU.AbbrOffset, OS, DebugInfo.IsLittleEndian);
+ }else {
+ writeInteger((uint32_t)CU.AbbrOffset, OS, DebugInfo.IsLittleEndian);
+ writeInteger((uint8_t)CU.AddrSize, OS, DebugInfo.IsLittleEndian);
+ }
+
+ }
+
+ virtual void onStartDIE(const DWARFYAML::Unit &CU,
+ const DWARFYAML::Entry &DIE) {
+ encodeULEB128(DIE.AbbrCode, OS);
+ }
+
+ virtual void onValue(const uint8_t U) {
+ writeInteger(U, OS, DebugInfo.IsLittleEndian);
+ }
+
+ virtual void onValue(const uint16_t U) {
+ writeInteger(U, OS, DebugInfo.IsLittleEndian);
+ }
+ virtual void onValue(const uint32_t U) {
+ writeInteger(U, OS, DebugInfo.IsLittleEndian);
+ }
+ virtual void onValue(const uint64_t U, const bool LEB = false) {
+ if (LEB)
+ encodeULEB128(U, OS);
+ else
+ writeInteger(U, OS, DebugInfo.IsLittleEndian);
+ }
+
+ virtual void onValue(const int64_t S, const bool LEB = false) {
+ if (LEB)
+ encodeSLEB128(S, OS);
+ else
+ writeInteger(S, OS, DebugInfo.IsLittleEndian);
+ }
+
+ virtual void onValue(const StringRef String) {
+ OS.write(String.data(), String.size());
+ OS.write('\0');
+ }
+
+ virtual void onValue(const MemoryBufferRef MBR) {
+ OS.write(MBR.getBufferStart(), MBR.getBufferSize());
+ }
+
+public:
+ DumpVisitor(const DWARFYAML::Data &DI, raw_ostream &Out)
+ : DWARFYAML::ConstVisitor(DI), OS(Out) {}
+};
+
+void DWARFYAML::EmitDebugInfo(raw_ostream &OS, const DWARFYAML::Data &DI) {
+ DumpVisitor Visitor(DI, OS);
+ Visitor.traverseDebugInfo();
+}
+
+static void EmitFileEntry(raw_ostream &OS, const DWARFYAML::File &File) {
+ OS.write(File.Name.data(), File.Name.size());
+ OS.write('\0');
+ encodeULEB128(File.DirIdx, OS);
+ encodeULEB128(File.ModTime, OS);
+ encodeULEB128(File.Length, OS);
+}
+
+void DWARFYAML::EmitDebugLine(raw_ostream &OS, const DWARFYAML::Data &DI) {
+ for (const auto &LineTable : DI.DebugLines) {
+ writeInitialLength(LineTable.Length, OS, DI.IsLittleEndian);
+ uint64_t SizeOfPrologueLength = LineTable.Length.isDWARF64() ? 8 : 4;
+ writeInteger((uint16_t)LineTable.Version, OS, DI.IsLittleEndian);
+ writeVariableSizedInteger(LineTable.PrologueLength, SizeOfPrologueLength,
+ OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)LineTable.MinInstLength, OS, DI.IsLittleEndian);
+ if (LineTable.Version >= 4)
+ writeInteger((uint8_t)LineTable.MaxOpsPerInst, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)LineTable.DefaultIsStmt, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)LineTable.LineBase, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)LineTable.LineRange, OS, DI.IsLittleEndian);
+ writeInteger((uint8_t)LineTable.OpcodeBase, OS, DI.IsLittleEndian);
+
+ for (auto OpcodeLength : LineTable.StandardOpcodeLengths)
+ writeInteger((uint8_t)OpcodeLength, OS, DI.IsLittleEndian);
+
+ for (auto IncludeDir : LineTable.IncludeDirs) {
+ OS.write(IncludeDir.data(), IncludeDir.size());
+ OS.write('\0');
+ }
+ OS.write('\0');
+
+ for (auto File : LineTable.Files)
+ EmitFileEntry(OS, File);
+ OS.write('\0');
+
+ for (auto Op : LineTable.Opcodes) {
+ writeInteger((uint8_t)Op.Opcode, OS, DI.IsLittleEndian);
+ if (Op.Opcode == 0) {
+ encodeULEB128(Op.ExtLen, OS);
+ writeInteger((uint8_t)Op.SubOpcode, OS, DI.IsLittleEndian);
+ switch (Op.SubOpcode) {
+ case dwarf::DW_LNE_set_address:
+ case dwarf::DW_LNE_set_discriminator:
+ writeVariableSizedInteger(Op.Data, DI.CompileUnits[0].AddrSize, OS,
+ DI.IsLittleEndian);
+ break;
+ case dwarf::DW_LNE_define_file:
+ EmitFileEntry(OS, Op.FileEntry);
+ break;
+ case dwarf::DW_LNE_end_sequence:
+ break;
+ default:
+ for (auto OpByte : Op.UnknownOpcodeData)
+ writeInteger((uint8_t)OpByte, OS, DI.IsLittleEndian);
+ }
+ } else if (Op.Opcode < LineTable.OpcodeBase) {
+ switch (Op.Opcode) {
+ case dwarf::DW_LNS_copy:
+ case dwarf::DW_LNS_negate_stmt:
+ case dwarf::DW_LNS_set_basic_block:
+ case dwarf::DW_LNS_const_add_pc:
+ case dwarf::DW_LNS_set_prologue_end:
+ case dwarf::DW_LNS_set_epilogue_begin:
+ break;
+
+ case dwarf::DW_LNS_advance_pc:
+ case dwarf::DW_LNS_set_file:
+ case dwarf::DW_LNS_set_column:
+ case dwarf::DW_LNS_set_isa:
+ encodeULEB128(Op.Data, OS);
+ break;
+
+ case dwarf::DW_LNS_advance_line:
+ encodeSLEB128(Op.SData, OS);
+ break;
+
+ case dwarf::DW_LNS_fixed_advance_pc:
+ writeInteger((uint16_t)Op.Data, OS, DI.IsLittleEndian);
+ break;
+
+ default:
+ for (auto OpData : Op.StandardOpcodeData) {
+ encodeULEB128(OpData, OS);
+ }
+ }
+ }
+ }
+ }
+}
+
+typedef void (*EmitFuncType)(raw_ostream &, const DWARFYAML::Data &);
+
+static void
+EmitDebugSectionImpl(const DWARFYAML::Data &DI, EmitFuncType EmitFunc,
+ StringRef Sec,
+ StringMap<std::unique_ptr<MemoryBuffer>> &OutputBuffers) {
+ std::string Data;
+ raw_string_ostream DebugInfoStream(Data);
+ EmitFunc(DebugInfoStream, DI);
+ DebugInfoStream.flush();
+ if (!Data.empty())
+ OutputBuffers[Sec] = MemoryBuffer::getMemBufferCopy(Data);
+}
+
+Expected<StringMap<std::unique_ptr<MemoryBuffer>>>
+DWARFYAML::EmitDebugSections(StringRef YAMLString,
+ bool IsLittleEndian) {
+ StringMap<std::unique_ptr<MemoryBuffer>> DebugSections;
+
+ yaml::Input YIn(YAMLString);
+
+ DWARFYAML::Data DI;
+ DI.IsLittleEndian = IsLittleEndian;
+ YIn >> DI;
+ if (YIn.error())
+ return errorCodeToError(YIn.error());
+
+ EmitDebugSectionImpl(DI, &DWARFYAML::EmitDebugInfo, "debug_info",
+ DebugSections);
+ EmitDebugSectionImpl(DI, &DWARFYAML::EmitDebugLine, "debug_line",
+ DebugSections);
+ EmitDebugSectionImpl(DI, &DWARFYAML::EmitDebugStr, "debug_str",
+ DebugSections);
+ EmitDebugSectionImpl(DI, &DWARFYAML::EmitDebugAbbrev, "debug_abbrev",
+ DebugSections);
+ EmitDebugSectionImpl(DI, &DWARFYAML::EmitDebugAranges, "debug_aranges",
+ DebugSections);
+ return std::move(DebugSections);
+}
diff --git a/lib/ObjectYAML/DWARFVisitor.cpp b/lib/ObjectYAML/DWARFVisitor.cpp
new file mode 100644
index 000000000000..36a9f7638bd4
--- /dev/null
+++ b/lib/ObjectYAML/DWARFVisitor.cpp
@@ -0,0 +1,178 @@
+//===--- DWARFVisitor.cpp ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFVisitor.h"
+#include "llvm/ObjectYAML/DWARFYAML.h"
+
+using namespace llvm;
+
+template <typename T>
+void DWARFYAML::VisitorImpl<T>::onVariableSizeValue(uint64_t U, unsigned Size) {
+ switch (Size) {
+ case 8:
+ onValue((uint64_t)U);
+ break;
+ case 4:
+ onValue((uint32_t)U);
+ break;
+ case 2:
+ onValue((uint16_t)U);
+ break;
+ case 1:
+ onValue((uint8_t)U);
+ break;
+ default:
+ llvm_unreachable("Invalid integer write size.");
+ }
+}
+
+unsigned getOffsetSize(const DWARFYAML::Unit &Unit) {
+ return Unit.Length.isDWARF64() ? 8 : 4;
+}
+
+unsigned getRefSize(const DWARFYAML::Unit &Unit) {
+ if (Unit.Version == 2)
+ return Unit.AddrSize;
+ return getOffsetSize(Unit);
+}
+
+template <typename T> void DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
+ for (auto &Unit : DebugInfo.CompileUnits) {
+ onStartCompileUnit(Unit);
+ auto FirstAbbrevCode = Unit.Entries[0].AbbrCode;
+
+ for (auto &Entry : Unit.Entries) {
+ onStartDIE(Unit, Entry);
+ if (Entry.AbbrCode == 0u)
+ continue;
+ auto &Abbrev = DebugInfo.AbbrevDecls[Entry.AbbrCode - FirstAbbrevCode];
+ auto FormVal = Entry.Values.begin();
+ auto AbbrForm = Abbrev.Attributes.begin();
+ for (;
+ FormVal != Entry.Values.end() && AbbrForm != Abbrev.Attributes.end();
+ ++FormVal, ++AbbrForm) {
+ onForm(*AbbrForm, *FormVal);
+ dwarf::Form Form = AbbrForm->Form;
+ bool Indirect;
+ do {
+ Indirect = false;
+ switch (Form) {
+ case dwarf::DW_FORM_addr:
+ onVariableSizeValue(FormVal->Value, Unit.AddrSize);
+ break;
+ case dwarf::DW_FORM_ref_addr:
+ onVariableSizeValue(FormVal->Value, getRefSize(Unit));
+ break;
+ case dwarf::DW_FORM_exprloc:
+ case dwarf::DW_FORM_block:
+ onValue((uint64_t)FormVal->BlockData.size(), true);
+ onValue(
+ MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
+ FormVal->BlockData.size()),
+ ""));
+ break;
+ case dwarf::DW_FORM_block1: {
+ auto writeSize = FormVal->BlockData.size();
+ onValue((uint8_t)writeSize);
+ onValue(
+ MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
+ FormVal->BlockData.size()),
+ ""));
+ break;
+ }
+ case dwarf::DW_FORM_block2: {
+ auto writeSize = FormVal->BlockData.size();
+ onValue((uint16_t)writeSize);
+ onValue(
+ MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
+ FormVal->BlockData.size()),
+ ""));
+ break;
+ }
+ case dwarf::DW_FORM_block4: {
+ auto writeSize = FormVal->BlockData.size();
+ onValue((uint32_t)writeSize);
+ onValue(
+ MemoryBufferRef(StringRef((const char *)&FormVal->BlockData[0],
+ FormVal->BlockData.size()),
+ ""));
+ break;
+ }
+ case dwarf::DW_FORM_data1:
+ case dwarf::DW_FORM_ref1:
+ case dwarf::DW_FORM_flag:
+ case dwarf::DW_FORM_strx1:
+ case dwarf::DW_FORM_addrx1:
+ onValue((uint8_t)FormVal->Value);
+ break;
+ case dwarf::DW_FORM_data2:
+ case dwarf::DW_FORM_ref2:
+ case dwarf::DW_FORM_strx2:
+ case dwarf::DW_FORM_addrx2:
+ onValue((uint16_t)FormVal->Value);
+ break;
+ case dwarf::DW_FORM_data4:
+ case dwarf::DW_FORM_ref4:
+ case dwarf::DW_FORM_ref_sup4:
+ case dwarf::DW_FORM_strx4:
+ case dwarf::DW_FORM_addrx4:
+ onValue((uint32_t)FormVal->Value);
+ break;
+ case dwarf::DW_FORM_data8:
+ case dwarf::DW_FORM_ref8:
+ case dwarf::DW_FORM_ref_sup8:
+ onValue((uint64_t)FormVal->Value);
+ break;
+ case dwarf::DW_FORM_sdata:
+ onValue((int64_t)FormVal->Value, true);
+ break;
+ case dwarf::DW_FORM_udata:
+ case dwarf::DW_FORM_ref_udata:
+ onValue((uint64_t)FormVal->Value, true);
+ break;
+ case dwarf::DW_FORM_string:
+ onValue(FormVal->CStr);
+ break;
+ case dwarf::DW_FORM_indirect:
+ onValue((uint64_t)FormVal->Value, true);
+ Indirect = true;
+ Form = static_cast<dwarf::Form>((uint64_t)FormVal->Value);
+ ++FormVal;
+ break;
+ case dwarf::DW_FORM_strp:
+ case dwarf::DW_FORM_sec_offset:
+ case dwarf::DW_FORM_GNU_ref_alt:
+ case dwarf::DW_FORM_GNU_strp_alt:
+ case dwarf::DW_FORM_line_strp:
+ case dwarf::DW_FORM_strp_sup:
+ onVariableSizeValue(FormVal->Value, getOffsetSize(Unit));
+ break;
+ case dwarf::DW_FORM_ref_sig8:
+ onValue((uint64_t)FormVal->Value);
+ break;
+ case dwarf::DW_FORM_GNU_addr_index:
+ case dwarf::DW_FORM_GNU_str_index:
+ onValue((uint64_t)FormVal->Value, true);
+ break;
+ default:
+ break;
+ }
+ } while (Indirect);
+ }
+ onEndDIE(Unit, Entry);
+ }
+ onEndCompileUnit(Unit);
+ }
+}
+
+// Explicitly instantiate the two template expansions.
+template class DWARFYAML::VisitorImpl<DWARFYAML::Data>;
+template class DWARFYAML::VisitorImpl<const DWARFYAML::Data>;
diff --git a/lib/ObjectYAML/DWARFVisitor.h b/lib/ObjectYAML/DWARFVisitor.h
new file mode 100644
index 000000000000..263e36220a05
--- /dev/null
+++ b/lib/ObjectYAML/DWARFVisitor.h
@@ -0,0 +1,97 @@
+//===--- DWARFVisitor.h -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECTYAML_DWARFVISITOR_H
+#define LLVM_OBJECTYAML_DWARFVISITOR_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+
+namespace DWARFYAML {
+
+struct Data;
+struct Unit;
+struct Entry;
+struct FormValue;
+struct AttributeAbbrev;
+
+/// \brief A class to visits DWARFYAML Compile Units and DIEs in preorder.
+///
+/// Extensions of this class can either maintain const or non-const references
+/// to the DWARFYAML::Data object.
+template <typename T> class VisitorImpl {
+protected:
+ T &DebugInfo;
+
+ /// Visitor Functions
+ /// @{
+ virtual void onStartCompileUnit(Unit &CU) {}
+ virtual void onEndCompileUnit(Unit &CU) {}
+ virtual void onStartDIE(Unit &CU, Entry &DIE) {}
+ virtual void onEndDIE(Unit &CU, Entry &DIE) {}
+ virtual void onForm(AttributeAbbrev &AttAbbrev, FormValue &Value) {}
+ /// @}
+
+ /// Const Visitor Functions
+ /// @{
+ virtual void onStartCompileUnit(const Unit &CU) {}
+ virtual void onEndCompileUnit(const Unit &CU) {}
+ virtual void onStartDIE(const Unit &CU, const Entry &DIE) {}
+ virtual void onEndDIE(const Unit &CU, const Entry &DIE) {}
+ virtual void onForm(const AttributeAbbrev &AttAbbrev,
+ const FormValue &Value) {}
+ /// @}
+
+ /// Value visitors
+ /// @{
+ virtual void onValue(const uint8_t U) {}
+ virtual void onValue(const uint16_t U) {}
+ virtual void onValue(const uint32_t U) {}
+ virtual void onValue(const uint64_t U, const bool LEB = false) {}
+ virtual void onValue(const int64_t S, const bool LEB = false) {}
+ virtual void onValue(const StringRef String) {}
+ virtual void onValue(const MemoryBufferRef MBR) {}
+ /// @}
+
+public:
+ VisitorImpl(T &DI) : DebugInfo(DI) {}
+
+ virtual ~VisitorImpl() {}
+
+ void traverseDebugInfo();
+
+private:
+ void onVariableSizeValue(uint64_t U, unsigned Size);
+};
+
+// Making the visior instantiations extern and explicit in the cpp file. This
+// prevents them from being instantiated in every compile unit that uses the
+// visitors.
+extern template class VisitorImpl<DWARFYAML::Data>;
+extern template class VisitorImpl<const DWARFYAML::Data>;
+
+class Visitor : public VisitorImpl<Data> {
+public:
+ Visitor(Data &DI) : VisitorImpl<Data>(DI) {}
+};
+
+class ConstVisitor : public VisitorImpl<const Data> {
+public:
+ ConstVisitor(const Data &DI) : VisitorImpl<const Data>(DI) {}
+};
+
+} // namespace DWARFYAML
+} // namespace llvm
+
+#endif
diff --git a/lib/ObjectYAML/DWARFYAML.cpp b/lib/ObjectYAML/DWARFYAML.cpp
index 014e63fe7d34..edb9545f14b1 100644
--- a/lib/ObjectYAML/DWARFYAML.cpp
+++ b/lib/ObjectYAML/DWARFYAML.cpp
@@ -54,6 +54,8 @@ void MappingTraits<DWARFYAML::AttributeAbbrev>::mapping(
IO &IO, DWARFYAML::AttributeAbbrev &AttAbbrev) {
IO.mapRequired("Attribute", AttAbbrev.Attribute);
IO.mapRequired("Form", AttAbbrev.Form);
+ if(AttAbbrev.Form == dwarf::DW_FORM_implicit_const)
+ IO.mapRequired("Value", AttAbbrev.Value);
}
void MappingTraits<DWARFYAML::ARangeDescriptor>::mapping(
@@ -97,6 +99,8 @@ void MappingTraits<DWARFYAML::PubSection>::mapping(
void MappingTraits<DWARFYAML::Unit>::mapping(IO &IO, DWARFYAML::Unit &Unit) {
IO.mapRequired("Length", Unit.Length);
IO.mapRequired("Version", Unit.Version);
+ if (Unit.Version >= 5)
+ IO.mapRequired("UnitType", Unit.Type);
IO.mapRequired("AbbrOffset", Unit.AbbrOffset);
IO.mapRequired("AddrSize", Unit.AddrSize);
IO.mapOptional("Entries", Unit.Entries);
@@ -144,9 +148,7 @@ void MappingTraits<DWARFYAML::LineTableOpcode>::mapping(
void MappingTraits<DWARFYAML::LineTable>::mapping(
IO &IO, DWARFYAML::LineTable &LineTable) {
- IO.mapRequired("TotalLength", LineTable.TotalLength);
- if (LineTable.TotalLength == UINT32_MAX)
- IO.mapRequired("TotalLength64", LineTable.TotalLength64);
+ IO.mapRequired("Length", LineTable.Length);
IO.mapRequired("Version", LineTable.Version);
IO.mapRequired("PrologueLength", LineTable.PrologueLength);
IO.mapRequired("MinInstLength", LineTable.MinInstLength);
@@ -162,6 +164,13 @@ void MappingTraits<DWARFYAML::LineTable>::mapping(
IO.mapRequired("Opcodes", LineTable.Opcodes);
}
+void MappingTraits<DWARFYAML::InitialLength>::mapping(
+ IO &IO, DWARFYAML::InitialLength &InitialLength) {
+ IO.mapRequired("TotalLength", InitialLength.TotalLength);
+ if (InitialLength.isDWARF64())
+ IO.mapRequired("TotalLength64", InitialLength.TotalLength64);
+}
+
} // namespace llvm::yaml
} // namespace llvm
diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp
index fe9af9f3ac76..3052901da45c 100644
--- a/lib/ObjectYAML/ELFYAML.cpp
+++ b/lib/ObjectYAML/ELFYAML.cpp
@@ -21,231 +21,229 @@ ELFYAML::Section::~Section() {}
namespace yaml {
-void
-ScalarEnumerationTraits<ELFYAML::ELF_ET>::enumeration(IO &IO,
- ELFYAML::ELF_ET &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
- ECase(ET_NONE)
- ECase(ET_REL)
- ECase(ET_EXEC)
- ECase(ET_DYN)
- ECase(ET_CORE)
+void ScalarEnumerationTraits<ELFYAML::ELF_ET>::enumeration(
+ IO &IO, ELFYAML::ELF_ET &Value) {
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(ET_NONE);
+ ECase(ET_REL);
+ ECase(ET_EXEC);
+ ECase(ET_DYN);
+ ECase(ET_CORE);
#undef ECase
IO.enumFallback<Hex16>(Value);
}
-void
-ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(IO &IO,
- ELFYAML::ELF_EM &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
- ECase(EM_NONE)
- ECase(EM_M32)
- ECase(EM_SPARC)
- ECase(EM_386)
- ECase(EM_68K)
- ECase(EM_88K)
- ECase(EM_IAMCU)
- ECase(EM_860)
- ECase(EM_MIPS)
- ECase(EM_S370)
- ECase(EM_MIPS_RS3_LE)
- ECase(EM_PARISC)
- ECase(EM_VPP500)
- ECase(EM_SPARC32PLUS)
- ECase(EM_960)
- ECase(EM_PPC)
- ECase(EM_PPC64)
- ECase(EM_S390)
- ECase(EM_SPU)
- ECase(EM_V800)
- ECase(EM_FR20)
- ECase(EM_RH32)
- ECase(EM_RCE)
- ECase(EM_ARM)
- ECase(EM_ALPHA)
- ECase(EM_SH)
- ECase(EM_SPARCV9)
- ECase(EM_TRICORE)
- ECase(EM_ARC)
- ECase(EM_H8_300)
- ECase(EM_H8_300H)
- ECase(EM_H8S)
- ECase(EM_H8_500)
- ECase(EM_IA_64)
- ECase(EM_MIPS_X)
- ECase(EM_COLDFIRE)
- ECase(EM_68HC12)
- ECase(EM_MMA)
- ECase(EM_PCP)
- ECase(EM_NCPU)
- ECase(EM_NDR1)
- ECase(EM_STARCORE)
- ECase(EM_ME16)
- ECase(EM_ST100)
- ECase(EM_TINYJ)
- ECase(EM_X86_64)
- ECase(EM_PDSP)
- ECase(EM_PDP10)
- ECase(EM_PDP11)
- ECase(EM_FX66)
- ECase(EM_ST9PLUS)
- ECase(EM_ST7)
- ECase(EM_68HC16)
- ECase(EM_68HC11)
- ECase(EM_68HC08)
- ECase(EM_68HC05)
- ECase(EM_SVX)
- ECase(EM_ST19)
- ECase(EM_VAX)
- ECase(EM_CRIS)
- ECase(EM_JAVELIN)
- ECase(EM_FIREPATH)
- ECase(EM_ZSP)
- ECase(EM_MMIX)
- ECase(EM_HUANY)
- ECase(EM_PRISM)
- ECase(EM_AVR)
- ECase(EM_FR30)
- ECase(EM_D10V)
- ECase(EM_D30V)
- ECase(EM_V850)
- ECase(EM_M32R)
- ECase(EM_MN10300)
- ECase(EM_MN10200)
- ECase(EM_PJ)
- ECase(EM_OPENRISC)
- ECase(EM_ARC_COMPACT)
- ECase(EM_XTENSA)
- ECase(EM_VIDEOCORE)
- ECase(EM_TMM_GPP)
- ECase(EM_NS32K)
- ECase(EM_TPC)
- ECase(EM_SNP1K)
- ECase(EM_ST200)
- ECase(EM_IP2K)
- ECase(EM_MAX)
- ECase(EM_CR)
- ECase(EM_F2MC16)
- ECase(EM_MSP430)
- ECase(EM_BLACKFIN)
- ECase(EM_SE_C33)
- ECase(EM_SEP)
- ECase(EM_ARCA)
- ECase(EM_UNICORE)
- ECase(EM_EXCESS)
- ECase(EM_DXP)
- ECase(EM_ALTERA_NIOS2)
- ECase(EM_CRX)
- ECase(EM_XGATE)
- ECase(EM_C166)
- ECase(EM_M16C)
- ECase(EM_DSPIC30F)
- ECase(EM_CE)
- ECase(EM_M32C)
- ECase(EM_TSK3000)
- ECase(EM_RS08)
- ECase(EM_SHARC)
- ECase(EM_ECOG2)
- ECase(EM_SCORE7)
- ECase(EM_DSP24)
- ECase(EM_VIDEOCORE3)
- ECase(EM_LATTICEMICO32)
- ECase(EM_SE_C17)
- ECase(EM_TI_C6000)
- ECase(EM_TI_C2000)
- ECase(EM_TI_C5500)
- ECase(EM_MMDSP_PLUS)
- ECase(EM_CYPRESS_M8C)
- ECase(EM_R32C)
- ECase(EM_TRIMEDIA)
- ECase(EM_HEXAGON)
- ECase(EM_8051)
- ECase(EM_STXP7X)
- ECase(EM_NDS32)
- ECase(EM_ECOG1)
- ECase(EM_ECOG1X)
- ECase(EM_MAXQ30)
- ECase(EM_XIMO16)
- ECase(EM_MANIK)
- ECase(EM_CRAYNV2)
- ECase(EM_RX)
- ECase(EM_METAG)
- ECase(EM_MCST_ELBRUS)
- ECase(EM_ECOG16)
- ECase(EM_CR16)
- ECase(EM_ETPU)
- ECase(EM_SLE9X)
- ECase(EM_L10M)
- ECase(EM_K10M)
- ECase(EM_AARCH64)
- ECase(EM_AVR32)
- ECase(EM_STM8)
- ECase(EM_TILE64)
- ECase(EM_TILEPRO)
- ECase(EM_CUDA)
- ECase(EM_TILEGX)
- ECase(EM_CLOUDSHIELD)
- ECase(EM_COREA_1ST)
- ECase(EM_COREA_2ND)
- ECase(EM_ARC_COMPACT2)
- ECase(EM_OPEN8)
- ECase(EM_RL78)
- ECase(EM_VIDEOCORE5)
- ECase(EM_78KOR)
- ECase(EM_56800EX)
- ECase(EM_AMDGPU)
- ECase(EM_RISCV)
- ECase(EM_LANAI)
- ECase(EM_BPF)
+void ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(
+ IO &IO, ELFYAML::ELF_EM &Value) {
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(EM_NONE);
+ ECase(EM_M32);
+ ECase(EM_SPARC);
+ ECase(EM_386);
+ ECase(EM_68K);
+ ECase(EM_88K);
+ ECase(EM_IAMCU);
+ ECase(EM_860);
+ ECase(EM_MIPS);
+ ECase(EM_S370);
+ ECase(EM_MIPS_RS3_LE);
+ ECase(EM_PARISC);
+ ECase(EM_VPP500);
+ ECase(EM_SPARC32PLUS);
+ ECase(EM_960);
+ ECase(EM_PPC);
+ ECase(EM_PPC64);
+ ECase(EM_S390);
+ ECase(EM_SPU);
+ ECase(EM_V800);
+ ECase(EM_FR20);
+ ECase(EM_RH32);
+ ECase(EM_RCE);
+ ECase(EM_ARM);
+ ECase(EM_ALPHA);
+ ECase(EM_SH);
+ ECase(EM_SPARCV9);
+ ECase(EM_TRICORE);
+ ECase(EM_ARC);
+ ECase(EM_H8_300);
+ ECase(EM_H8_300H);
+ ECase(EM_H8S);
+ ECase(EM_H8_500);
+ ECase(EM_IA_64);
+ ECase(EM_MIPS_X);
+ ECase(EM_COLDFIRE);
+ ECase(EM_68HC12);
+ ECase(EM_MMA);
+ ECase(EM_PCP);
+ ECase(EM_NCPU);
+ ECase(EM_NDR1);
+ ECase(EM_STARCORE);
+ ECase(EM_ME16);
+ ECase(EM_ST100);
+ ECase(EM_TINYJ);
+ ECase(EM_X86_64);
+ ECase(EM_PDSP);
+ ECase(EM_PDP10);
+ ECase(EM_PDP11);
+ ECase(EM_FX66);
+ ECase(EM_ST9PLUS);
+ ECase(EM_ST7);
+ ECase(EM_68HC16);
+ ECase(EM_68HC11);
+ ECase(EM_68HC08);
+ ECase(EM_68HC05);
+ ECase(EM_SVX);
+ ECase(EM_ST19);
+ ECase(EM_VAX);
+ ECase(EM_CRIS);
+ ECase(EM_JAVELIN);
+ ECase(EM_FIREPATH);
+ ECase(EM_ZSP);
+ ECase(EM_MMIX);
+ ECase(EM_HUANY);
+ ECase(EM_PRISM);
+ ECase(EM_AVR);
+ ECase(EM_FR30);
+ ECase(EM_D10V);
+ ECase(EM_D30V);
+ ECase(EM_V850);
+ ECase(EM_M32R);
+ ECase(EM_MN10300);
+ ECase(EM_MN10200);
+ ECase(EM_PJ);
+ ECase(EM_OPENRISC);
+ ECase(EM_ARC_COMPACT);
+ ECase(EM_XTENSA);
+ ECase(EM_VIDEOCORE);
+ ECase(EM_TMM_GPP);
+ ECase(EM_NS32K);
+ ECase(EM_TPC);
+ ECase(EM_SNP1K);
+ ECase(EM_ST200);
+ ECase(EM_IP2K);
+ ECase(EM_MAX);
+ ECase(EM_CR);
+ ECase(EM_F2MC16);
+ ECase(EM_MSP430);
+ ECase(EM_BLACKFIN);
+ ECase(EM_SE_C33);
+ ECase(EM_SEP);
+ ECase(EM_ARCA);
+ ECase(EM_UNICORE);
+ ECase(EM_EXCESS);
+ ECase(EM_DXP);
+ ECase(EM_ALTERA_NIOS2);
+ ECase(EM_CRX);
+ ECase(EM_XGATE);
+ ECase(EM_C166);
+ ECase(EM_M16C);
+ ECase(EM_DSPIC30F);
+ ECase(EM_CE);
+ ECase(EM_M32C);
+ ECase(EM_TSK3000);
+ ECase(EM_RS08);
+ ECase(EM_SHARC);
+ ECase(EM_ECOG2);
+ ECase(EM_SCORE7);
+ ECase(EM_DSP24);
+ ECase(EM_VIDEOCORE3);
+ ECase(EM_LATTICEMICO32);
+ ECase(EM_SE_C17);
+ ECase(EM_TI_C6000);
+ ECase(EM_TI_C2000);
+ ECase(EM_TI_C5500);
+ ECase(EM_MMDSP_PLUS);
+ ECase(EM_CYPRESS_M8C);
+ ECase(EM_R32C);
+ ECase(EM_TRIMEDIA);
+ ECase(EM_HEXAGON);
+ ECase(EM_8051);
+ ECase(EM_STXP7X);
+ ECase(EM_NDS32);
+ ECase(EM_ECOG1);
+ ECase(EM_ECOG1X);
+ ECase(EM_MAXQ30);
+ ECase(EM_XIMO16);
+ ECase(EM_MANIK);
+ ECase(EM_CRAYNV2);
+ ECase(EM_RX);
+ ECase(EM_METAG);
+ ECase(EM_MCST_ELBRUS);
+ ECase(EM_ECOG16);
+ ECase(EM_CR16);
+ ECase(EM_ETPU);
+ ECase(EM_SLE9X);
+ ECase(EM_L10M);
+ ECase(EM_K10M);
+ ECase(EM_AARCH64);
+ ECase(EM_AVR32);
+ ECase(EM_STM8);
+ ECase(EM_TILE64);
+ ECase(EM_TILEPRO);
+ ECase(EM_CUDA);
+ ECase(EM_TILEGX);
+ ECase(EM_CLOUDSHIELD);
+ ECase(EM_COREA_1ST);
+ ECase(EM_COREA_2ND);
+ ECase(EM_ARC_COMPACT2);
+ ECase(EM_OPEN8);
+ ECase(EM_RL78);
+ ECase(EM_VIDEOCORE5);
+ ECase(EM_78KOR);
+ ECase(EM_56800EX);
+ ECase(EM_AMDGPU);
+ ECase(EM_RISCV);
+ ECase(EM_LANAI);
+ ECase(EM_BPF);
#undef ECase
}
void ScalarEnumerationTraits<ELFYAML::ELF_ELFCLASS>::enumeration(
IO &IO, ELFYAML::ELF_ELFCLASS &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
// Since the semantics of ELFCLASSNONE is "invalid", just don't accept it
// here.
- ECase(ELFCLASS32)
- ECase(ELFCLASS64)
+ ECase(ELFCLASS32);
+ ECase(ELFCLASS64);
#undef ECase
}
void ScalarEnumerationTraits<ELFYAML::ELF_ELFDATA>::enumeration(
IO &IO, ELFYAML::ELF_ELFDATA &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
// Since the semantics of ELFDATANONE is "invalid", just don't accept it
// here.
- ECase(ELFDATA2LSB)
- ECase(ELFDATA2MSB)
+ ECase(ELFDATA2LSB);
+ ECase(ELFDATA2MSB);
#undef ECase
}
void ScalarEnumerationTraits<ELFYAML::ELF_ELFOSABI>::enumeration(
IO &IO, ELFYAML::ELF_ELFOSABI &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
- ECase(ELFOSABI_NONE)
- ECase(ELFOSABI_HPUX)
- ECase(ELFOSABI_NETBSD)
- ECase(ELFOSABI_GNU)
- ECase(ELFOSABI_GNU)
- ECase(ELFOSABI_HURD)
- ECase(ELFOSABI_SOLARIS)
- ECase(ELFOSABI_AIX)
- ECase(ELFOSABI_IRIX)
- ECase(ELFOSABI_FREEBSD)
- ECase(ELFOSABI_TRU64)
- ECase(ELFOSABI_MODESTO)
- ECase(ELFOSABI_OPENBSD)
- ECase(ELFOSABI_OPENVMS)
- ECase(ELFOSABI_NSK)
- ECase(ELFOSABI_AROS)
- ECase(ELFOSABI_FENIXOS)
- ECase(ELFOSABI_CLOUDABI)
- ECase(ELFOSABI_C6000_ELFABI)
- ECase(ELFOSABI_AMDGPU_HSA)
- ECase(ELFOSABI_C6000_LINUX)
- ECase(ELFOSABI_ARM)
- ECase(ELFOSABI_STANDALONE)
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(ELFOSABI_NONE);
+ ECase(ELFOSABI_HPUX);
+ ECase(ELFOSABI_NETBSD);
+ ECase(ELFOSABI_GNU);
+ ECase(ELFOSABI_GNU);
+ ECase(ELFOSABI_HURD);
+ ECase(ELFOSABI_SOLARIS);
+ ECase(ELFOSABI_AIX);
+ ECase(ELFOSABI_IRIX);
+ ECase(ELFOSABI_FREEBSD);
+ ECase(ELFOSABI_TRU64);
+ ECase(ELFOSABI_MODESTO);
+ ECase(ELFOSABI_OPENBSD);
+ ECase(ELFOSABI_OPENVMS);
+ ECase(ELFOSABI_NSK);
+ ECase(ELFOSABI_AROS);
+ ECase(ELFOSABI_FENIXOS);
+ ECase(ELFOSABI_CLOUDABI);
+ ECase(ELFOSABI_C6000_ELFABI);
+ ECase(ELFOSABI_AMDGPU_HSA);
+ ECase(ELFOSABI_C6000_LINUX);
+ ECase(ELFOSABI_ARM);
+ ECase(ELFOSABI_STANDALONE);
#undef ECase
}
@@ -253,92 +251,92 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
ELFYAML::ELF_EF &Value) {
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
assert(Object && "The IO context is not initialized");
-#define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
-#define BCaseMask(X, M) IO.maskedBitSetCase(Value, #X, ELF::X, ELF::M);
+#define BCase(X) IO.bitSetCase(Value, #X, ELF::X)
+#define BCaseMask(X, M) IO.maskedBitSetCase(Value, #X, ELF::X, ELF::M)
switch (Object->Header.Machine) {
case ELF::EM_ARM:
- BCase(EF_ARM_SOFT_FLOAT)
- BCase(EF_ARM_VFP_FLOAT)
- BCaseMask(EF_ARM_EABI_UNKNOWN, EF_ARM_EABIMASK)
- BCaseMask(EF_ARM_EABI_VER1, EF_ARM_EABIMASK)
- BCaseMask(EF_ARM_EABI_VER2, EF_ARM_EABIMASK)
- BCaseMask(EF_ARM_EABI_VER3, EF_ARM_EABIMASK)
- BCaseMask(EF_ARM_EABI_VER4, EF_ARM_EABIMASK)
- BCaseMask(EF_ARM_EABI_VER5, EF_ARM_EABIMASK)
+ BCase(EF_ARM_SOFT_FLOAT);
+ BCase(EF_ARM_VFP_FLOAT);
+ BCaseMask(EF_ARM_EABI_UNKNOWN, EF_ARM_EABIMASK);
+ BCaseMask(EF_ARM_EABI_VER1, EF_ARM_EABIMASK);
+ BCaseMask(EF_ARM_EABI_VER2, EF_ARM_EABIMASK);
+ BCaseMask(EF_ARM_EABI_VER3, EF_ARM_EABIMASK);
+ BCaseMask(EF_ARM_EABI_VER4, EF_ARM_EABIMASK);
+ BCaseMask(EF_ARM_EABI_VER5, EF_ARM_EABIMASK);
break;
case ELF::EM_MIPS:
- BCase(EF_MIPS_NOREORDER)
- BCase(EF_MIPS_PIC)
- BCase(EF_MIPS_CPIC)
- BCase(EF_MIPS_ABI2)
- BCase(EF_MIPS_32BITMODE)
- BCase(EF_MIPS_FP64)
- BCase(EF_MIPS_NAN2008)
- BCase(EF_MIPS_MICROMIPS)
- BCase(EF_MIPS_ARCH_ASE_M16)
- BCase(EF_MIPS_ARCH_ASE_MDMX)
- BCaseMask(EF_MIPS_ABI_O32, EF_MIPS_ABI)
- BCaseMask(EF_MIPS_ABI_O64, EF_MIPS_ABI)
- BCaseMask(EF_MIPS_ABI_EABI32, EF_MIPS_ABI)
- BCaseMask(EF_MIPS_ABI_EABI64, EF_MIPS_ABI)
- BCaseMask(EF_MIPS_MACH_3900, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_4010, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_4100, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_4650, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_4120, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_4111, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_SB1, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_OCTEON, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_XLR, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_OCTEON2, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_OCTEON3, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_5400, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_5900, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_5500, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_9000, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_LS2E, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_LS2F, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_MACH_LS3A, EF_MIPS_MACH)
- BCaseMask(EF_MIPS_ARCH_1, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_2, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_3, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_4, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_5, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_32, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_64, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_32R2, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_64R2, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_32R6, EF_MIPS_ARCH)
- BCaseMask(EF_MIPS_ARCH_64R6, EF_MIPS_ARCH)
+ BCase(EF_MIPS_NOREORDER);
+ BCase(EF_MIPS_PIC);
+ BCase(EF_MIPS_CPIC);
+ BCase(EF_MIPS_ABI2);
+ BCase(EF_MIPS_32BITMODE);
+ BCase(EF_MIPS_FP64);
+ BCase(EF_MIPS_NAN2008);
+ BCase(EF_MIPS_MICROMIPS);
+ BCase(EF_MIPS_ARCH_ASE_M16);
+ BCase(EF_MIPS_ARCH_ASE_MDMX);
+ BCaseMask(EF_MIPS_ABI_O32, EF_MIPS_ABI);
+ BCaseMask(EF_MIPS_ABI_O64, EF_MIPS_ABI);
+ BCaseMask(EF_MIPS_ABI_EABI32, EF_MIPS_ABI);
+ BCaseMask(EF_MIPS_ABI_EABI64, EF_MIPS_ABI);
+ BCaseMask(EF_MIPS_MACH_3900, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_4010, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_4100, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_4650, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_4120, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_4111, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_SB1, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_OCTEON, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_XLR, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_OCTEON2, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_OCTEON3, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_5400, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_5900, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_5500, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_9000, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_LS2E, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_LS2F, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_MACH_LS3A, EF_MIPS_MACH);
+ BCaseMask(EF_MIPS_ARCH_1, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_2, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_3, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_4, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_5, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_32, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_64, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_32R2, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_64R2, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_32R6, EF_MIPS_ARCH);
+ BCaseMask(EF_MIPS_ARCH_64R6, EF_MIPS_ARCH);
break;
case ELF::EM_HEXAGON:
- BCase(EF_HEXAGON_MACH_V2)
- BCase(EF_HEXAGON_MACH_V3)
- BCase(EF_HEXAGON_MACH_V4)
- BCase(EF_HEXAGON_MACH_V5)
- BCase(EF_HEXAGON_ISA_V2)
- BCase(EF_HEXAGON_ISA_V3)
- BCase(EF_HEXAGON_ISA_V4)
- BCase(EF_HEXAGON_ISA_V5)
+ BCase(EF_HEXAGON_MACH_V2);
+ BCase(EF_HEXAGON_MACH_V3);
+ BCase(EF_HEXAGON_MACH_V4);
+ BCase(EF_HEXAGON_MACH_V5);
+ BCase(EF_HEXAGON_ISA_V2);
+ BCase(EF_HEXAGON_ISA_V3);
+ BCase(EF_HEXAGON_ISA_V4);
+ BCase(EF_HEXAGON_ISA_V5);
break;
case ELF::EM_AVR:
- BCase(EF_AVR_ARCH_AVR1)
- BCase(EF_AVR_ARCH_AVR2)
- BCase(EF_AVR_ARCH_AVR25)
- BCase(EF_AVR_ARCH_AVR3)
- BCase(EF_AVR_ARCH_AVR31)
- BCase(EF_AVR_ARCH_AVR35)
- BCase(EF_AVR_ARCH_AVR4)
- BCase(EF_AVR_ARCH_AVR51)
- BCase(EF_AVR_ARCH_AVR6)
- BCase(EF_AVR_ARCH_AVRTINY)
- BCase(EF_AVR_ARCH_XMEGA1)
- BCase(EF_AVR_ARCH_XMEGA2)
- BCase(EF_AVR_ARCH_XMEGA3)
- BCase(EF_AVR_ARCH_XMEGA4)
- BCase(EF_AVR_ARCH_XMEGA5)
- BCase(EF_AVR_ARCH_XMEGA6)
- BCase(EF_AVR_ARCH_XMEGA7)
+ BCase(EF_AVR_ARCH_AVR1);
+ BCase(EF_AVR_ARCH_AVR2);
+ BCase(EF_AVR_ARCH_AVR25);
+ BCase(EF_AVR_ARCH_AVR3);
+ BCase(EF_AVR_ARCH_AVR31);
+ BCase(EF_AVR_ARCH_AVR35);
+ BCase(EF_AVR_ARCH_AVR4);
+ BCase(EF_AVR_ARCH_AVR51);
+ BCase(EF_AVR_ARCH_AVR6);
+ BCase(EF_AVR_ARCH_AVRTINY);
+ BCase(EF_AVR_ARCH_XMEGA1);
+ BCase(EF_AVR_ARCH_XMEGA2);
+ BCase(EF_AVR_ARCH_XMEGA3);
+ BCase(EF_AVR_ARCH_XMEGA4);
+ BCase(EF_AVR_ARCH_XMEGA5);
+ BCase(EF_AVR_ARCH_XMEGA6);
+ BCase(EF_AVR_ARCH_XMEGA7);
break;
case ELF::EM_AMDGPU:
case ELF::EM_X86_64:
@@ -354,51 +352,51 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
IO &IO, ELFYAML::ELF_SHT &Value) {
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
assert(Object && "The IO context is not initialized");
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
- ECase(SHT_NULL)
- ECase(SHT_PROGBITS)
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(SHT_NULL);
+ ECase(SHT_PROGBITS);
// No SHT_SYMTAB. Use the top-level `Symbols` key instead.
// FIXME: Issue a diagnostic with this information.
- ECase(SHT_STRTAB)
- ECase(SHT_RELA)
- ECase(SHT_HASH)
- ECase(SHT_DYNAMIC)
- ECase(SHT_NOTE)
- ECase(SHT_NOBITS)
- ECase(SHT_REL)
- ECase(SHT_SHLIB)
- ECase(SHT_DYNSYM)
- ECase(SHT_INIT_ARRAY)
- ECase(SHT_FINI_ARRAY)
- ECase(SHT_PREINIT_ARRAY)
- ECase(SHT_GROUP)
- ECase(SHT_SYMTAB_SHNDX)
- ECase(SHT_LOOS)
- ECase(SHT_GNU_ATTRIBUTES)
- ECase(SHT_GNU_HASH)
- ECase(SHT_GNU_verdef)
- ECase(SHT_GNU_verneed)
- ECase(SHT_GNU_versym)
- ECase(SHT_HIOS)
- ECase(SHT_LOPROC)
+ ECase(SHT_STRTAB);
+ ECase(SHT_RELA);
+ ECase(SHT_HASH);
+ ECase(SHT_DYNAMIC);
+ ECase(SHT_NOTE);
+ ECase(SHT_NOBITS);
+ ECase(SHT_REL);
+ ECase(SHT_SHLIB);
+ ECase(SHT_DYNSYM);
+ ECase(SHT_INIT_ARRAY);
+ ECase(SHT_FINI_ARRAY);
+ ECase(SHT_PREINIT_ARRAY);
+ ECase(SHT_GROUP);
+ ECase(SHT_SYMTAB_SHNDX);
+ ECase(SHT_LOOS);
+ ECase(SHT_GNU_ATTRIBUTES);
+ ECase(SHT_GNU_HASH);
+ ECase(SHT_GNU_verdef);
+ ECase(SHT_GNU_verneed);
+ ECase(SHT_GNU_versym);
+ ECase(SHT_HIOS);
+ ECase(SHT_LOPROC);
switch (Object->Header.Machine) {
case ELF::EM_ARM:
- ECase(SHT_ARM_EXIDX)
- ECase(SHT_ARM_PREEMPTMAP)
- ECase(SHT_ARM_ATTRIBUTES)
- ECase(SHT_ARM_DEBUGOVERLAY)
- ECase(SHT_ARM_OVERLAYSECTION)
+ ECase(SHT_ARM_EXIDX);
+ ECase(SHT_ARM_PREEMPTMAP);
+ ECase(SHT_ARM_ATTRIBUTES);
+ ECase(SHT_ARM_DEBUGOVERLAY);
+ ECase(SHT_ARM_OVERLAYSECTION);
break;
case ELF::EM_HEXAGON:
- ECase(SHT_HEX_ORDERED)
+ ECase(SHT_HEX_ORDERED);
break;
case ELF::EM_X86_64:
- ECase(SHT_X86_64_UNWIND)
+ ECase(SHT_X86_64_UNWIND);
break;
case ELF::EM_MIPS:
- ECase(SHT_MIPS_REGINFO)
- ECase(SHT_MIPS_OPTIONS)
- ECase(SHT_MIPS_ABIFLAGS)
+ ECase(SHT_MIPS_REGINFO);
+ ECase(SHT_MIPS_OPTIONS);
+ ECase(SHT_MIPS_ABIFLAGS);
break;
default:
// Nothing to do.
@@ -410,43 +408,43 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
ELFYAML::ELF_SHF &Value) {
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
-#define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
- BCase(SHF_WRITE)
- BCase(SHF_ALLOC)
- BCase(SHF_EXCLUDE)
- BCase(SHF_EXECINSTR)
- BCase(SHF_MERGE)
- BCase(SHF_STRINGS)
- BCase(SHF_INFO_LINK)
- BCase(SHF_LINK_ORDER)
- BCase(SHF_OS_NONCONFORMING)
- BCase(SHF_GROUP)
- BCase(SHF_TLS)
- switch(Object->Header.Machine) {
+#define BCase(X) IO.bitSetCase(Value, #X, ELF::X)
+ BCase(SHF_WRITE);
+ BCase(SHF_ALLOC);
+ BCase(SHF_EXCLUDE);
+ BCase(SHF_EXECINSTR);
+ BCase(SHF_MERGE);
+ BCase(SHF_STRINGS);
+ BCase(SHF_INFO_LINK);
+ BCase(SHF_LINK_ORDER);
+ BCase(SHF_OS_NONCONFORMING);
+ BCase(SHF_GROUP);
+ BCase(SHF_TLS);
+ switch (Object->Header.Machine) {
case ELF::EM_ARM:
- BCase(SHF_ARM_PURECODE)
+ BCase(SHF_ARM_PURECODE);
break;
case ELF::EM_AMDGPU:
- BCase(SHF_AMDGPU_HSA_GLOBAL)
- BCase(SHF_AMDGPU_HSA_READONLY)
- BCase(SHF_AMDGPU_HSA_CODE)
- BCase(SHF_AMDGPU_HSA_AGENT)
+ BCase(SHF_AMDGPU_HSA_GLOBAL);
+ BCase(SHF_AMDGPU_HSA_READONLY);
+ BCase(SHF_AMDGPU_HSA_CODE);
+ BCase(SHF_AMDGPU_HSA_AGENT);
break;
case ELF::EM_HEXAGON:
- BCase(SHF_HEX_GPREL)
+ BCase(SHF_HEX_GPREL);
break;
case ELF::EM_MIPS:
- BCase(SHF_MIPS_NODUPES)
- BCase(SHF_MIPS_NAMES)
- BCase(SHF_MIPS_LOCAL)
- BCase(SHF_MIPS_NOSTRIP)
- BCase(SHF_MIPS_GPREL)
- BCase(SHF_MIPS_MERGE)
- BCase(SHF_MIPS_ADDR)
- BCase(SHF_MIPS_STRING)
+ BCase(SHF_MIPS_NODUPES);
+ BCase(SHF_MIPS_NAMES);
+ BCase(SHF_MIPS_LOCAL);
+ BCase(SHF_MIPS_NOSTRIP);
+ BCase(SHF_MIPS_GPREL);
+ BCase(SHF_MIPS_MERGE);
+ BCase(SHF_MIPS_ADDR);
+ BCase(SHF_MIPS_STRING);
break;
case ELF::EM_X86_64:
- BCase(SHF_X86_64_LARGE)
+ BCase(SHF_X86_64_LARGE);
break;
default:
// Nothing to do.
@@ -457,25 +455,25 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
IO &IO, ELFYAML::ELF_STT &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
- ECase(STT_NOTYPE)
- ECase(STT_OBJECT)
- ECase(STT_FUNC)
- ECase(STT_SECTION)
- ECase(STT_FILE)
- ECase(STT_COMMON)
- ECase(STT_TLS)
- ECase(STT_GNU_IFUNC)
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(STT_NOTYPE);
+ ECase(STT_OBJECT);
+ ECase(STT_FUNC);
+ ECase(STT_SECTION);
+ ECase(STT_FILE);
+ ECase(STT_COMMON);
+ ECase(STT_TLS);
+ ECase(STT_GNU_IFUNC);
#undef ECase
}
void ScalarEnumerationTraits<ELFYAML::ELF_STV>::enumeration(
IO &IO, ELFYAML::ELF_STV &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
- ECase(STV_DEFAULT)
- ECase(STV_INTERNAL)
- ECase(STV_HIDDEN)
- ECase(STV_PROTECTED)
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(STV_DEFAULT);
+ ECase(STV_INTERNAL);
+ ECase(STV_HIDDEN);
+ ECase(STV_PROTECTED);
#undef ECase
}
@@ -483,13 +481,13 @@ void ScalarBitSetTraits<ELFYAML::ELF_STO>::bitset(IO &IO,
ELFYAML::ELF_STO &Value) {
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
assert(Object && "The IO context is not initialized");
-#define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
+#define BCase(X) IO.bitSetCase(Value, #X, ELF::X)
switch (Object->Header.Machine) {
case ELF::EM_MIPS:
- BCase(STO_MIPS_OPTIONAL)
- BCase(STO_MIPS_PLT)
- BCase(STO_MIPS_PIC)
- BCase(STO_MIPS_MICROMIPS)
+ BCase(STO_MIPS_OPTIONAL);
+ BCase(STO_MIPS_PLT);
+ BCase(STO_MIPS_PIC);
+ BCase(STO_MIPS_MICROMIPS);
break;
default:
break; // Nothing to do
@@ -500,11 +498,11 @@ void ScalarBitSetTraits<ELFYAML::ELF_STO>::bitset(IO &IO,
void ScalarEnumerationTraits<ELFYAML::ELF_RSS>::enumeration(
IO &IO, ELFYAML::ELF_RSS &Value) {
-#define ECase(X) IO.enumCase(Value, #X, ELF::X);
- ECase(RSS_UNDEF)
- ECase(RSS_GP)
- ECase(RSS_GP0)
- ECase(RSS_LOC)
+#define ECase(X) IO.enumCase(Value, #X, ELF::X)
+ ECase(RSS_UNDEF);
+ ECase(RSS_GP);
+ ECase(RSS_GP0);
+ ECase(RSS_LOC);
#undef ECase
}
@@ -553,51 +551,51 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_REG>::enumeration(
IO &IO, ELFYAML::MIPS_AFL_REG &Value) {
-#define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X);
- ECase(REG_NONE)
- ECase(REG_32)
- ECase(REG_64)
- ECase(REG_128)
+#define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X)
+ ECase(REG_NONE);
+ ECase(REG_32);
+ ECase(REG_64);
+ ECase(REG_128);
#undef ECase
}
void ScalarEnumerationTraits<ELFYAML::MIPS_ABI_FP>::enumeration(
IO &IO, ELFYAML::MIPS_ABI_FP &Value) {
-#define ECase(X) IO.enumCase(Value, #X, Mips::Val_GNU_MIPS_ABI_##X);
- ECase(FP_ANY)
- ECase(FP_DOUBLE)
- ECase(FP_SINGLE)
- ECase(FP_SOFT)
- ECase(FP_OLD_64)
- ECase(FP_XX)
- ECase(FP_64)
- ECase(FP_64A)
+#define ECase(X) IO.enumCase(Value, #X, Mips::Val_GNU_MIPS_ABI_##X)
+ ECase(FP_ANY);
+ ECase(FP_DOUBLE);
+ ECase(FP_SINGLE);
+ ECase(FP_SOFT);
+ ECase(FP_OLD_64);
+ ECase(FP_XX);
+ ECase(FP_64);
+ ECase(FP_64A);
#undef ECase
}
void ScalarEnumerationTraits<ELFYAML::MIPS_AFL_EXT>::enumeration(
IO &IO, ELFYAML::MIPS_AFL_EXT &Value) {
-#define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X);
- ECase(EXT_NONE)
- ECase(EXT_XLR)
- ECase(EXT_OCTEON2)
- ECase(EXT_OCTEONP)
- ECase(EXT_LOONGSON_3A)
- ECase(EXT_OCTEON)
- ECase(EXT_5900)
- ECase(EXT_4650)
- ECase(EXT_4010)
- ECase(EXT_4100)
- ECase(EXT_3900)
- ECase(EXT_10000)
- ECase(EXT_SB1)
- ECase(EXT_4111)
- ECase(EXT_4120)
- ECase(EXT_5400)
- ECase(EXT_5500)
- ECase(EXT_LOONGSON_2E)
- ECase(EXT_LOONGSON_2F)
- ECase(EXT_OCTEON3)
+#define ECase(X) IO.enumCase(Value, #X, Mips::AFL_##X)
+ ECase(EXT_NONE);
+ ECase(EXT_XLR);
+ ECase(EXT_OCTEON2);
+ ECase(EXT_OCTEONP);
+ ECase(EXT_LOONGSON_3A);
+ ECase(EXT_OCTEON);
+ ECase(EXT_5900);
+ ECase(EXT_4650);
+ ECase(EXT_4010);
+ ECase(EXT_4100);
+ ECase(EXT_3900);
+ ECase(EXT_10000);
+ ECase(EXT_SB1);
+ ECase(EXT_4111);
+ ECase(EXT_4120);
+ ECase(EXT_5400);
+ ECase(EXT_5500);
+ ECase(EXT_LOONGSON_2E);
+ ECase(EXT_LOONGSON_2F);
+ ECase(EXT_OCTEON3);
#undef ECase
}
@@ -614,27 +612,27 @@ void ScalarEnumerationTraits<ELFYAML::MIPS_ISA>::enumeration(
void ScalarBitSetTraits<ELFYAML::MIPS_AFL_ASE>::bitset(
IO &IO, ELFYAML::MIPS_AFL_ASE &Value) {
-#define BCase(X) IO.bitSetCase(Value, #X, Mips::AFL_ASE_##X);
- BCase(DSP)
- BCase(DSPR2)
- BCase(EVA)
- BCase(MCU)
- BCase(MDMX)
- BCase(MIPS3D)
- BCase(MT)
- BCase(SMARTMIPS)
- BCase(VIRT)
- BCase(MSA)
- BCase(MIPS16)
- BCase(MICROMIPS)
- BCase(XPA)
+#define BCase(X) IO.bitSetCase(Value, #X, Mips::AFL_ASE_##X)
+ BCase(DSP);
+ BCase(DSPR2);
+ BCase(EVA);
+ BCase(MCU);
+ BCase(MDMX);
+ BCase(MIPS3D);
+ BCase(MT);
+ BCase(SMARTMIPS);
+ BCase(VIRT);
+ BCase(MSA);
+ BCase(MIPS16);
+ BCase(MICROMIPS);
+ BCase(XPA);
#undef BCase
}
void ScalarBitSetTraits<ELFYAML::MIPS_AFL_FLAGS1>::bitset(
IO &IO, ELFYAML::MIPS_AFL_FLAGS1 &Value) {
-#define BCase(X) IO.bitSetCase(Value, #X, Mips::AFL_FLAGS1_##X);
- BCase(ODDSPREG)
+#define BCase(X) IO.bitSetCase(Value, #X, Mips::AFL_FLAGS1_##X)
+ BCase(ODDSPREG);
#undef BCase
}
diff --git a/lib/ObjectYAML/MachOYAML.cpp b/lib/ObjectYAML/MachOYAML.cpp
index a033a79189bd..6b0e4e3762d0 100644
--- a/lib/ObjectYAML/MachOYAML.cpp
+++ b/lib/ObjectYAML/MachOYAML.cpp
@@ -230,6 +230,12 @@ void mapLoadCommandData<MachO::dylinker_command>(
IO.mapOptional("PayloadString", LoadCommand.PayloadString);
}
+template <>
+void mapLoadCommandData<MachO::build_version_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Tools", LoadCommand.Tools);
+}
+
void MappingTraits<MachOYAML::LoadCommand>::mapping(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
MachO::LoadCommandType TempCmd = static_cast<MachO::LoadCommandType>(
@@ -282,6 +288,12 @@ void MappingTraits<MachOYAML::Section>::mapping(IO &IO,
IO.mapOptional("reserved3", Section.reserved3);
}
+void MappingTraits<MachO::build_tool_version>::mapping(
+ IO &IO, MachO::build_tool_version &tool) {
+ IO.mapRequired("tool", tool.tool);
+ IO.mapRequired("version", tool.version);
+}
+
void MappingTraits<MachO::dylib>::mapping(IO &IO, MachO::dylib &DylibStruct) {
IO.mapRequired("name", DylibStruct.name);
IO.mapRequired("timestamp", DylibStruct.timestamp);
@@ -558,6 +570,23 @@ void MappingTraits<MachO::version_min_command>::mapping(
IO.mapRequired("sdk", LoadCommand.sdk);
}
+void MappingTraits<MachO::note_command>::mapping(
+ IO &IO, MachO::note_command &LoadCommand) {
+
+ IO.mapRequired("data_owner", LoadCommand.data_owner);
+ IO.mapRequired("offset", LoadCommand.offset);
+ IO.mapRequired("size", LoadCommand.size);
+}
+
+void MappingTraits<MachO::build_version_command>::mapping(
+ IO &IO, MachO::build_version_command &LoadCommand) {
+
+ IO.mapRequired("platform", LoadCommand.platform);
+ IO.mapRequired("minos", LoadCommand.minos);
+ IO.mapRequired("sdk", LoadCommand.sdk);
+ IO.mapRequired("ntools", LoadCommand.ntools);
+}
+
} // namespace llvm::yaml
} // namespace llvm
diff --git a/lib/ObjectYAML/ObjectYAML.cpp b/lib/ObjectYAML/ObjectYAML.cpp
index cbbaac6062a7..74581c1ecaac 100644
--- a/lib/ObjectYAML/ObjectYAML.cpp
+++ b/lib/ObjectYAML/ObjectYAML.cpp
@@ -43,6 +43,9 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
ObjectFile.FatMachO.reset(new MachOYAML::UniversalBinary());
MappingTraits<MachOYAML::UniversalBinary>::mapping(IO,
*ObjectFile.FatMachO);
+ } else if (IO.mapTag("!WASM")) {
+ ObjectFile.Wasm.reset(new WasmYAML::Object());
+ MappingTraits<WasmYAML::Object>::mapping(IO, *ObjectFile.Wasm);
} else {
Input &In = (Input &)IO;
std::string Tag = In.getCurrentNode()->getRawTag();
diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp
new file mode 100644
index 000000000000..3e1bed19d61f
--- /dev/null
+++ b/lib/ObjectYAML/WasmYAML.cpp
@@ -0,0 +1,357 @@
+//===- WasmYAML.cpp - Wasm YAMLIO implementation --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of wasm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/WasmYAML.h"
+#include "llvm/Object/Wasm.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MipsABIFlags.h"
+
+namespace llvm {
+
+namespace WasmYAML {
+
+// Declared here rather than in the header to comply with:
+// http://llvm.org/docs/CodingStandards.html#provide-a-virtual-method-anchor-for-classes-in-headers
+Section::~Section() {}
+
+} // end namespace WasmYAML
+
+namespace yaml {
+
+void MappingTraits<WasmYAML::FileHeader>::mapping(
+ IO &IO, WasmYAML::FileHeader &FileHdr) {
+ IO.mapRequired("Version", FileHdr.Version);
+}
+
+void MappingTraits<WasmYAML::Object>::mapping(IO &IO,
+ WasmYAML::Object &Object) {
+ IO.setContext(&Object);
+ IO.mapTag("!WASM", true);
+ IO.mapRequired("FileHeader", Object.Header);
+ IO.mapOptional("Sections", Object.Sections);
+ IO.setContext(nullptr);
+}
+
+static void commonSectionMapping(IO &IO, WasmYAML::Section &Section) {
+ IO.mapRequired("Type", Section.Type);
+ IO.mapOptional("Relocations", Section.Relocations);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Name", Section.Name);
+ IO.mapRequired("Payload", Section.Payload);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::TypeSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Signatures", Section.Signatures);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::ImportSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Imports", Section.Imports);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::FunctionSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("FunctionTypes", Section.FunctionTypes);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::TableSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Tables", Section.Tables);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::MemorySection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Memories", Section.Memories);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::GlobalSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Globals", Section.Globals);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::ExportSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Exports", Section.Exports);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::StartSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("StartFunction", Section.StartFunction);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::ElemSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Segments", Section.Segments);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::CodeSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Functions", Section.Functions);
+}
+
+static void sectionMapping(IO &IO, WasmYAML::DataSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapRequired("Segments", Section.Segments);
+}
+
+void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
+ IO &IO, std::unique_ptr<WasmYAML::Section> &Section) {
+ WasmYAML::SectionType SectionType;
+ if (IO.outputting())
+ SectionType = Section->Type;
+ else
+ IO.mapRequired("Type", SectionType);
+
+ switch (SectionType) {
+ case wasm::WASM_SEC_CUSTOM:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::CustomSection());
+ sectionMapping(IO, *cast<WasmYAML::CustomSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_TYPE:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::TypeSection());
+ sectionMapping(IO, *cast<WasmYAML::TypeSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_IMPORT:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::ImportSection());
+ sectionMapping(IO, *cast<WasmYAML::ImportSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_FUNCTION:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::FunctionSection());
+ sectionMapping(IO, *cast<WasmYAML::FunctionSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_TABLE:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::TableSection());
+ sectionMapping(IO, *cast<WasmYAML::TableSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_MEMORY:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::MemorySection());
+ sectionMapping(IO, *cast<WasmYAML::MemorySection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_GLOBAL:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::GlobalSection());
+ sectionMapping(IO, *cast<WasmYAML::GlobalSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_EXPORT:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::ExportSection());
+ sectionMapping(IO, *cast<WasmYAML::ExportSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_START:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::StartSection());
+ sectionMapping(IO, *cast<WasmYAML::StartSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_ELEM:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::ElemSection());
+ sectionMapping(IO, *cast<WasmYAML::ElemSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_CODE:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::CodeSection());
+ sectionMapping(IO, *cast<WasmYAML::CodeSection>(Section.get()));
+ break;
+ case wasm::WASM_SEC_DATA:
+ if (!IO.outputting())
+ Section.reset(new WasmYAML::DataSection());
+ sectionMapping(IO, *cast<WasmYAML::DataSection>(Section.get()));
+ break;
+ default:
+ llvm_unreachable("Unknown section type");
+ }
+}
+
+void ScalarEnumerationTraits<WasmYAML::SectionType>::enumeration(
+ IO &IO, WasmYAML::SectionType &Type) {
+#define ECase(X) IO.enumCase(Type, #X, wasm::WASM_SEC_##X);
+ ECase(CUSTOM);
+ ECase(TYPE);
+ ECase(IMPORT);
+ ECase(FUNCTION);
+ ECase(TABLE);
+ ECase(MEMORY);
+ ECase(GLOBAL);
+ ECase(EXPORT);
+ ECase(START);
+ ECase(ELEM);
+ ECase(CODE);
+ ECase(DATA);
+#undef ECase
+}
+
+void MappingTraits<WasmYAML::Signature>::mapping(
+ IO &IO, WasmYAML::Signature &Signature) {
+ IO.mapOptional("Index", Signature.Index);
+ IO.mapRequired("ReturnType", Signature.ReturnType);
+ IO.mapRequired("ParamTypes", Signature.ParamTypes);
+}
+
+void MappingTraits<WasmYAML::Table>::mapping(IO &IO, WasmYAML::Table &Table) {
+ IO.mapRequired("ElemType", Table.ElemType);
+ IO.mapRequired("Limits", Table.TableLimits);
+}
+
+void MappingTraits<WasmYAML::Function>::mapping(IO &IO,
+ WasmYAML::Function &Function) {
+ IO.mapRequired("Locals", Function.Locals);
+ IO.mapRequired("Body", Function.Body);
+}
+
+void MappingTraits<WasmYAML::Relocation>::mapping(
+ IO &IO, WasmYAML::Relocation &Relocation) {
+ IO.mapRequired("Type", Relocation.Type);
+ IO.mapRequired("Index", Relocation.Index);
+ IO.mapRequired("Offset", Relocation.Offset);
+ IO.mapRequired("Addend", Relocation.Addend);
+}
+
+void MappingTraits<WasmYAML::LocalDecl>::mapping(
+ IO &IO, WasmYAML::LocalDecl &LocalDecl) {
+ IO.mapRequired("Type", LocalDecl.Type);
+ IO.mapRequired("Count", LocalDecl.Count);
+}
+
+void MappingTraits<WasmYAML::Limits>::mapping(IO &IO,
+ WasmYAML::Limits &Limits) {
+ if (!IO.outputting() || Limits.Flags)
+ IO.mapOptional("Flags", Limits.Flags);
+ IO.mapRequired("Initial", Limits.Initial);
+ if (!IO.outputting() || Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX)
+ IO.mapOptional("Maximum", Limits.Maximum);
+}
+
+void MappingTraits<WasmYAML::ElemSegment>::mapping(
+ IO &IO, WasmYAML::ElemSegment &Segment) {
+ IO.mapRequired("Offset", Segment.Offset);
+ IO.mapRequired("Functions", Segment.Functions);
+}
+
+void MappingTraits<WasmYAML::Import>::mapping(IO &IO,
+ WasmYAML::Import &Import) {
+ IO.mapRequired("Module", Import.Module);
+ IO.mapRequired("Field", Import.Field);
+ IO.mapRequired("Kind", Import.Kind);
+ if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
+ IO.mapRequired("SigIndex", Import.SigIndex);
+ } else if (Import.Kind == wasm::WASM_EXTERNAL_GLOBAL) {
+ IO.mapRequired("GlobalType", Import.GlobalType);
+ IO.mapRequired("GlobalMutable", Import.GlobalMutable);
+ } else {
+ llvm_unreachable("unhandled import type");
+ }
+}
+
+void MappingTraits<WasmYAML::Export>::mapping(IO &IO,
+ WasmYAML::Export &Export) {
+ IO.mapRequired("Name", Export.Name);
+ IO.mapRequired("Kind", Export.Kind);
+ IO.mapRequired("Index", Export.Index);
+}
+
+void MappingTraits<WasmYAML::Global>::mapping(IO &IO,
+ WasmYAML::Global &Global) {
+ IO.mapRequired("Type", Global.Type);
+ IO.mapRequired("Mutable", Global.Mutable);
+ IO.mapRequired("InitExpr", Global.InitExpr);
+}
+
+void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO,
+ wasm::WasmInitExpr &Expr) {
+ WasmYAML::Opcode Op = Expr.Opcode;
+ IO.mapRequired("Opcode", Op);
+ Expr.Opcode = Op;
+ switch (Expr.Opcode) {
+ case wasm::WASM_OPCODE_I32_CONST:
+ IO.mapRequired("Value", Expr.Value.Int32);
+ break;
+ case wasm::WASM_OPCODE_I64_CONST:
+ IO.mapRequired("Value", Expr.Value.Int64);
+ break;
+ case wasm::WASM_OPCODE_F32_CONST:
+ IO.mapRequired("Value", Expr.Value.Float32);
+ break;
+ case wasm::WASM_OPCODE_F64_CONST:
+ IO.mapRequired("Value", Expr.Value.Float64);
+ break;
+ }
+}
+
+void MappingTraits<WasmYAML::DataSegment>::mapping(
+ IO &IO, WasmYAML::DataSegment &Segment) {
+ IO.mapRequired("Index", Segment.Index);
+ IO.mapRequired("Offset", Segment.Offset);
+ IO.mapRequired("Content", Segment.Content);
+}
+
+void ScalarEnumerationTraits<WasmYAML::ValueType>::enumeration(
+ IO &IO, WasmYAML::ValueType &Type) {
+#define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X);
+ ECase(I32);
+ ECase(I64);
+ ECase(F32);
+ ECase(F64);
+ ECase(ANYFUNC);
+ ECase(FUNC);
+ ECase(NORESULT);
+#undef ECase
+}
+
+void ScalarEnumerationTraits<WasmYAML::ExportKind>::enumeration(
+ IO &IO, WasmYAML::ExportKind &Kind) {
+#define ECase(X) IO.enumCase(Kind, #X, wasm::WASM_EXTERNAL_##X);
+ ECase(FUNCTION);
+ ECase(TABLE);
+ ECase(MEMORY);
+ ECase(GLOBAL);
+#undef ECase
+}
+
+void ScalarEnumerationTraits<WasmYAML::Opcode>::enumeration(
+ IO &IO, WasmYAML::Opcode &Code) {
+#define ECase(X) IO.enumCase(Code, #X, wasm::WASM_OPCODE_##X);
+ ECase(END);
+ ECase(I32_CONST);
+ ECase(I64_CONST);
+ ECase(F64_CONST);
+ ECase(F32_CONST);
+ ECase(GET_GLOBAL);
+#undef ECase
+}
+
+void ScalarEnumerationTraits<WasmYAML::TableType>::enumeration(
+ IO &IO, WasmYAML::TableType &Type) {
+#define ECase(X) IO.enumCase(Type, #X, wasm::WASM_TYPE_##X);
+ ECase(ANYFUNC);
+#undef ECase
+}
+
+void ScalarEnumerationTraits<WasmYAML::RelocType>::enumeration(
+ IO &IO, WasmYAML::RelocType &Type) {
+#define WASM_RELOC(name, value) IO.enumCase(Type, #name, wasm::name);
+#include "llvm/Support/WasmRelocs/WebAssembly.def"
+#undef WASM_RELOC
+}
+
+} // end namespace yaml
+} // end namespace llvm
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
index c3de2d1a4965..3e8a1d802314 100644
--- a/lib/Option/Arg.cpp
+++ b/lib/Option/Arg.cpp
@@ -61,7 +61,9 @@ void Arg::print(raw_ostream& O) const {
O << "]>\n";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); }
+#endif
std::string Arg::getAsString(const ArgList &Args) const {
SmallString<256> Res;
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index f94de866ef34..39dbce87f9ae 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -19,203 +19,44 @@
using namespace llvm;
using namespace llvm::opt;
-void arg_iterator::SkipToNextArg() {
- for (; Current != Args.end(); ++Current) {
- // Done if there are no filters.
- if (!Id0.isValid())
- break;
-
- // Otherwise require a match.
- const Option &O = (*Current)->getOption();
- if (O.matches(Id0) ||
- (Id1.isValid() && O.matches(Id1)) ||
- (Id2.isValid() && O.matches(Id2)))
- break;
- }
-}
-
void ArgList::append(Arg *A) {
Args.push_back(A);
-}
-
-void ArgList::eraseArg(OptSpecifier Id) {
- Args.erase(
- remove_if(*this, [=](Arg *A) { return A->getOption().matches(Id); }),
- end());
-}
-
-Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
- // FIXME: Make search efficient?
- for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
- if ((*it)->getOption().matches(Id))
- return *it;
- return nullptr;
-}
-
-Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1) const {
- // FIXME: Make search efficient?
- for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1))
- return *it;
- return nullptr;
-}
-
-Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2) const {
- // FIXME: Make search efficient?
- for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
- if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2))
- return *it;
- return nullptr;
-}
-
-Arg *ArgList::getLastArgNoClaim(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2, OptSpecifier Id3) const {
- // FIXME: Make search efficient?
- for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
- if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2) || (*it)->getOption().matches(Id3))
- return *it;
- return nullptr;
-}
-
-Arg *ArgList::getLastArg(OptSpecifier Id) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id)) {
- Res = *it;
- Res->claim();
- }
- }
- return Res;
-}
-
-Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1)) {
- Res = *it;
- Res->claim();
-
- }
+ // Update ranges for the option and all of its groups.
+ for (Option O = A->getOption().getUnaliasedOption(); O.isValid();
+ O = O.getGroup()) {
+ auto &R =
+ OptRanges.insert(std::make_pair(O.getID(), emptyRange())).first->second;
+ R.first = std::min<unsigned>(R.first, Args.size() - 1);
+ R.second = Args.size();
}
-
- return Res;
}
-Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2)) {
- Res = *it;
- Res->claim();
- }
- }
-
- return Res;
-}
-
-Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2, OptSpecifier Id3) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2) ||
- (*it)->getOption().matches(Id3)) {
- Res = *it;
- Res->claim();
- }
- }
-
- return Res;
-}
-
-Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2, OptSpecifier Id3,
- OptSpecifier Id4) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2) ||
- (*it)->getOption().matches(Id3) ||
- (*it)->getOption().matches(Id4)) {
- Res = *it;
- Res->claim();
- }
- }
-
- return Res;
-}
-
-Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2, OptSpecifier Id3,
- OptSpecifier Id4, OptSpecifier Id5) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2) ||
- (*it)->getOption().matches(Id3) ||
- (*it)->getOption().matches(Id4) ||
- (*it)->getOption().matches(Id5)) {
- Res = *it;
- Res->claim();
- }
- }
-
- return Res;
-}
-
-Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2, OptSpecifier Id3,
- OptSpecifier Id4, OptSpecifier Id5,
- OptSpecifier Id6) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2) ||
- (*it)->getOption().matches(Id3) ||
- (*it)->getOption().matches(Id4) ||
- (*it)->getOption().matches(Id5) ||
- (*it)->getOption().matches(Id6)) {
- Res = *it;
- Res->claim();
- }
+void ArgList::eraseArg(OptSpecifier Id) {
+ // Zero out the removed entries but keep them around so that we don't
+ // need to invalidate OptRanges.
+ for (Arg *const &A : filtered(Id)) {
+ // Avoid the need for a non-const filtered iterator variant.
+ Arg **ArgsBegin = Args.data();
+ ArgsBegin[&A - ArgsBegin] = nullptr;
}
-
- return Res;
+ OptRanges.erase(Id.getID());
}
-Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
- OptSpecifier Id2, OptSpecifier Id3,
- OptSpecifier Id4, OptSpecifier Id5,
- OptSpecifier Id6, OptSpecifier Id7) const {
- Arg *Res = nullptr;
- for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
- if ((*it)->getOption().matches(Id0) ||
- (*it)->getOption().matches(Id1) ||
- (*it)->getOption().matches(Id2) ||
- (*it)->getOption().matches(Id3) ||
- (*it)->getOption().matches(Id4) ||
- (*it)->getOption().matches(Id5) ||
- (*it)->getOption().matches(Id6) ||
- (*it)->getOption().matches(Id7)) {
- Res = *it;
- Res->claim();
+ArgList::OptRange
+ArgList::getRange(std::initializer_list<OptSpecifier> Ids) const {
+ OptRange R = emptyRange();
+ for (auto Id : Ids) {
+ auto I = OptRanges.find(Id.getID());
+ if (I != OptRanges.end()) {
+ R.first = std::min(R.first, I->second.first);
+ R.second = std::max(R.second, I->second.second);
}
}
-
- return Res;
+ // Map an empty {-1, 0} range to {0, 0} so it can be used to form iterators.
+ if (R.first == -1u)
+ R.first = 0;
+ return R;
}
bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const {
@@ -231,8 +72,7 @@ bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg,
return Default;
}
-StringRef ArgList::getLastArgValue(OptSpecifier Id,
- StringRef Default) const {
+StringRef ArgList::getLastArgValue(OptSpecifier Id, StringRef Default) const {
if (Arg *A = getLastArg(Id))
return A->getValue();
return Default;
@@ -262,7 +102,7 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0,
void ArgList::AddAllArgsExcept(ArgStringList &Output,
ArrayRef<OptSpecifier> Ids,
ArrayRef<OptSpecifier> ExcludeIds) const {
- for (const Arg *Arg : Args) {
+ for (const Arg *Arg : *this) {
bool Excluded = false;
for (OptSpecifier Id : ExcludeIds) {
if (Arg->getOption().matches(Id)) {
@@ -325,14 +165,14 @@ void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0,
}
void ArgList::ClaimAllArgs(OptSpecifier Id0) const {
- for (auto Arg : filtered(Id0))
+ for (auto *Arg : filtered(Id0))
Arg->claim();
}
void ArgList::ClaimAllArgs() const {
- for (const_iterator it = begin(), ie = end(); it != ie; ++it)
- if (!(*it)->isClaimed())
- (*it)->claim();
+ for (auto *Arg : *this)
+ if (!Arg->isClaimed())
+ Arg->claim();
}
const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
@@ -353,7 +193,9 @@ void ArgList::print(raw_ostream &O) const {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); }
+#endif
//
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index 5eb179fbd257..736b939fe80b 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -83,7 +83,9 @@ void Option::print(raw_ostream &O) const {
O << ">\n";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Option::dump() const { print(dbgs()); }
+#endif
bool Option::matches(OptSpecifier Opt) const {
// Aliases are never considered in matching, look through them.
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index 2994a07b1ccf..0421946a32a6 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -39,6 +39,7 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/PostDominators.h"
@@ -61,6 +62,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/GCOVProfiler.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/IPO/ConstantMerge.h"
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
@@ -104,9 +106,12 @@
#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
+#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopPredication.h"
#include "llvm/Transforms/Scalar/LoopRotation.h"
#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
@@ -131,8 +136,8 @@
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LowerInvoke.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
-#include "llvm/Transforms/Utils/MemorySSA.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/SimplifyInstructions.h"
#include "llvm/Transforms/Utils/SymbolRewriter.h"
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
@@ -142,6 +147,9 @@
using namespace llvm;
+static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations",
+ cl::ReallyHidden, cl::init(4));
+
static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$");
static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
@@ -316,19 +324,21 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// the other we have is `LoopInstSimplify`.
LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
- // FIXME: Enable these when the loop pass manager can support enforcing loop
- // simplified and LCSSA form as well as updating the loop nest after
- // transformations and we finsih porting the loop passes.
-#if 0
// Rotate Loop - disable header duplication at -Oz
LPM1.addPass(LoopRotatePass(Level != Oz));
LPM1.addPass(LICMPass());
+#if 0
+ // The LoopUnswitch pass isn't yet ported to the new pass manager.
LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3));
+#endif
LPM2.addPass(IndVarSimplifyPass());
- LPM2.addPass(LoopIdiomPass());
+ LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(LoopDeletionPass());
- LPM2.addPass(SimpleLoopUnrollPass());
-#endif
+ LPM2.addPass(LoopUnrollPass::createFull(Level));
+
+ // We provide the opt remark emitter pass for LICM to use. We only need to do
+ // this once as it is immutable.
+ FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1)));
FPM.addPass(SimplifyCFGPass());
FPM.addPass(InstCombinePass());
@@ -363,12 +373,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(JumpThreadingPass());
FPM.addPass(CorrelatedValuePropagationPass());
FPM.addPass(DSEPass());
- // FIXME: Enable this when the loop pass manager can support enforcing loop
- // simplified and LCSSA form as well as updating the loop nest after
- // transformations and we finsih porting the loop passes.
-#if 0
FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
-#endif
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
@@ -379,6 +384,56 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
return FPM;
}
+static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
+ PassBuilder::OptimizationLevel Level,
+ bool RunProfileGen, std::string ProfileGenFile,
+ std::string ProfileUseFile) {
+ // Generally running simplification passes and the inliner with an high
+ // threshold results in smaller executables, but there may be cases where
+ // the size grows, so let's be conservative here and skip this simplification
+ // at -Os/Oz.
+ if (!isOptimizingForSize(Level)) {
+ InlineParams IP;
+
+ // In the old pass manager, this is a cl::opt. Should still this be one?
+ IP.DefaultThreshold = 75;
+
+ // FIXME: The hint threshold has the same value used by the regular inliner.
+ // This should probably be lowered after performance testing.
+ // FIXME: this comment is cargo culted from the old pass manager, revisit).
+ IP.HintThreshold = 325;
+
+ CGSCCPassManager CGPipeline(DebugLogging);
+
+ CGPipeline.addPass(InlinerPass(IP));
+
+ FunctionPassManager FPM;
+ FPM.addPass(SROA());
+ FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
+ FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
+ FPM.addPass(InstCombinePass()); // Combine silly sequences.
+
+ // FIXME: Here the old pass manager inserts peephole extensions.
+ // Add them when they're supported.
+ CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline)));
+ }
+
+ if (RunProfileGen) {
+ MPM.addPass(PGOInstrumentationGen());
+
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ if (!ProfileGenFile.empty())
+ Options.InstrProfileOutput = ProfileGenFile;
+ MPM.addPass(InstrProfiling(Options));
+ }
+
+ if (!ProfileUseFile.empty())
+ MPM.addPass(PGOInstrumentationUse(ProfileUseFile));
+}
+
ModulePassManager
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
bool DebugLogging) {
@@ -429,10 +484,20 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
GlobalCleanupPM.addPass(SimplifyCFGPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
- // FIXME: Enable this when cross-IR-unit analysis invalidation is working.
-#if 0
- MPM.addPass(RequireAnalysisPass<GlobalsAA>());
-#endif
+ // Add all the requested passes for PGO Instrumentation, if requested.
+ if (PGOOpt) {
+ assert(PGOOpt->RunProfileGen || PGOOpt->SamplePGO ||
+ !PGOOpt->ProfileUseFile.empty());
+ addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen,
+ PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile);
+ }
+
+ // Indirect call promotion that promotes intra-module targes only.
+ MPM.addPass(PGOIndirectCallPromotion(false, PGOOpt && PGOOpt->SamplePGO));
+
+ // Require the GlobalsAA analysis for the module so we can query it within
+ // the CGSCC pipeline.
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
// Now begin the main postorder CGSCC pipeline.
// FIXME: The current CGSCC pipeline has its origins in the legacy pass
@@ -454,13 +519,24 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// Now deduce any function attributes based in the current code.
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+ // When at O3 add argument promotion to the pass pipeline.
+ // FIXME: It isn't at all clear why this should be limited to O3.
+ if (Level == O3)
+ MainCGPipeline.addPass(ArgumentPromotionPass());
+
// Lastly, add the core function simplification pipeline nested inside the
// CGSCC walk.
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
buildFunctionSimplificationPipeline(Level, DebugLogging)));
+ // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
+ // to detect when we devirtualize indirect calls and iterate the SCC passes
+ // in that case to try and catch knock-on inlining or function attrs
+ // opportunities. Then we add it to the module pipeline by walking the SCCs
+ // in postorder (or bottom-up).
MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(std::move(MainCGPipeline)));
+ createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
+ std::move(MainCGPipeline), MaxDevirtIterations, DebugLogging)));
// This ends the canonicalization and simplification phase of the pipeline.
// At this point, we expect to have canonical and simple IR which we begin
@@ -475,17 +551,14 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// FIXME: Is this really an optimization rather than a canonicalization?
MPM.addPass(ReversePostOrderFunctionAttrsPass());
- // Recompute GloblasAA here prior to function passes. This is particularly
+ // Re-require GloblasAA here prior to function passes. This is particularly
// useful as the above will have inlined, DCE'ed, and function-attr
// propagated everything. We should at this point have a reasonably minimal
// and richly annotated call graph. By computing aliasing and mod/ref
// information for all local globals here, the late loop passes and notably
// the vectorizer will be able to use them to help recognize vectorizable
// memory operations.
- // FIXME: Enable this once analysis invalidation is fully supported.
-#if 0
- MPM.addPass(Require<GlobalsAA>());
-#endif
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
FunctionPassManager OptimizePM(DebugLogging);
OptimizePM.addPass(Float2IntPass());
@@ -495,36 +568,63 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// Optimize the loop execution. These passes operate on entire loop nests
// rather than on each loop in an inside-out manner, and so they are actually
// function passes.
+
+ // First rotate loops that may have been un-rotated by prior passes.
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
+ // into separate loop that would otherwise inhibit vectorization. This is
+ // currently only performed for loops marked with the metadata
+ // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
OptimizePM.addPass(LoopDistributePass());
-#if 0
- // FIXME: LoopVectorize relies on "requiring" LCSSA which isn't supported in
- // the new PM.
+
+ // Now run the core loop vectorizer.
OptimizePM.addPass(LoopVectorizePass());
-#endif
- // FIXME: Need to port Loop Load Elimination and add it here.
+
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ OptimizePM.addPass(LoopLoadEliminationPass());
+
+ // Cleanup after the loop optimization passes.
OptimizePM.addPass(InstCombinePass());
+
+ // Now that we've formed fast to execute loop structures, we do further
+ // optimizations. These are run afterward as they might block doing complex
+ // analyses and transforms such as what are needed for loop vectorization.
+
// Optimize parallel scalar instruction chains into SIMD instructions.
OptimizePM.addPass(SLPVectorizerPass());
- // Cleanup after vectorizers.
+ // Cleanup after all of the vectorizers.
OptimizePM.addPass(SimplifyCFGPass());
OptimizePM.addPass(InstCombinePass());
// Unroll small loops to hide loop backedge latency and saturate any parallel
- // execution resources of an out-of-order processor.
- // FIXME: Need to add once loop pass pipeline is available.
-
- // FIXME: Add the loop sink pass when ported.
-
- // FIXME: Add cleanup from the loop pass manager when we're forming LCSSA
- // here.
+ // execution resources of an out-of-order processor. We also then need to
+ // clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopUnrollPass::create(Level)));
+ OptimizePM.addPass(InstCombinePass());
+ OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
OptimizePM.addPass(AlignmentFromAssumptionsPass());
- // ADd the core optimizing pipeline.
+ // LoopSink pass sinks instructions hoisted by LICM, which serves as a
+ // canonicalization pass that enables other optimizations. As a result,
+ // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
+ // result too early.
+ OptimizePM.addPass(LoopSinkPass());
+
+ // And finally clean up LCSSA form before generating code.
+ OptimizePM.addPass(InstSimplifierPass());
+
+ // Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
// Now we need to do some global optimization transforms.
@@ -550,13 +650,167 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
assert(Level != O0 && "Must request optimizations for the default pipeline!");
ModulePassManager MPM(DebugLogging);
- // FIXME: Finish fleshing this out to match the legacy LTO pipelines.
- FunctionPassManager LateFPM(DebugLogging);
- LateFPM.addPass(InstCombinePass());
- LateFPM.addPass(SimplifyCFGPass());
+ // Remove unused virtual tables to improve the quality of code generated by
+ // whole-program devirtualization and bitset lowering.
+ MPM.addPass(GlobalDCEPass());
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Do basic inference of function attributes from known properties of system
+ // libraries and other oracles.
+ MPM.addPass(InferFunctionAttrsPass());
+
+ if (Level > 1) {
+ // Indirect call promotion. This should promote all the targets that are
+ // left by the earlier promotion pass that promotes intra-module targets.
+ // This two-step promotion is to save the compile time. For LTO, it should
+ // produce the same result as if we only do promotion here.
+ MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */,
+ PGOOpt && PGOOpt->SamplePGO));
+
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ MPM.addPass(IPSCCPPass());
+ }
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
+ // Now deduce any function attributes based in the current code.
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ PostOrderFunctionAttrsPass()));
+ // Do RPO function attribute inference across the module to forward-propagate
+ // attributes where applicable.
+ // FIXME: Is this really an optimization rather than a canonicalization?
+ MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+ // Use inragne annotations on GEP indices to split globals where beneficial.
+ MPM.addPass(GlobalSplitPass());
+
+ // Run whole program optimization of virtual call when the list of callees
+ // is fixed.
+ MPM.addPass(WholeProgramDevirtPass());
+
+ // Stop here at -O1.
+ if (Level == 1)
+ return MPM;
+
+ // Optimize globals to try and fold them into constants.
+ MPM.addPass(GlobalOptPass());
+
+ // Promote any localized globals to SSA registers.
+ MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+ // Linking modules together can lead to duplicate global constant, only
+ // keep one copy of each constant.
+ MPM.addPass(ConstantMergePass());
+
+ // Remove unused arguments from functions.
+ MPM.addPass(DeadArgumentEliminationPass());
+
+ // Reduce the code after globalopt and ipsccp. Both can open up significant
+ // simplification opportunities, and both can propagate functions through
+ // function pointers. When this happens, we often have to resolve varargs
+ // calls, etc, so let instcombine do this.
+ // FIXME: add peephole extensions here as the legacy PM does.
+ MPM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass()));
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+ // Run the inliner now.
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(InlinerPass()));
+
+ // Optimize globals again after we ran the inliner.
+ MPM.addPass(GlobalOptPass());
+
+ // Garbage collect dead functions.
+ // FIXME: Add ArgumentPromotion pass after once it's ported.
+ MPM.addPass(GlobalDCEPass());
+
+ FunctionPassManager FPM(DebugLogging);
+
+ // The IPO Passes may leave cruft around. Clean up after them.
+ // FIXME: add peephole extensions here as the legacy PM does.
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(JumpThreadingPass());
+
+ // Break up allocas
+ FPM.addPass(SROA());
+
+ // Run a few AA driver optimizations here and now to cleanup the code.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ PostOrderFunctionAttrsPass()));
+ // FIXME: here we run IP alias analysis in the legacy PM.
+
+ FunctionPassManager MainFPM;
+
+ // FIXME: once we fix LoopPass Manager, add LICM here.
+ // FIXME: once we provide support for enabling MLSM, add it here.
+ // FIXME: once we provide support for enabling NewGVN, add it here.
+ MainFPM.addPass(GVN());
+
+ // Remove dead memcpy()'s.
+ MainFPM.addPass(MemCpyOptPass());
+
+ // Nuke dead stores.
+ MainFPM.addPass(DSEPass());
+
+ // FIXME: at this point, we run a bunch of loop passes:
+ // indVarSimplify, loopDeletion, loopInterchange, loopUnrool,
+ // loopVectorize. Enable them once the remaining issue with LPM
+ // are sorted out.
+
+ MainFPM.addPass(InstCombinePass());
+ MainFPM.addPass(SimplifyCFGPass());
+ MainFPM.addPass(SCCPPass());
+ MainFPM.addPass(InstCombinePass());
+ MainFPM.addPass(BDCEPass());
+
+ // FIXME: We may want to run SLPVectorizer here.
+ // After vectorization, assume intrinsics may tell us more
+ // about pointer alignments.
+#if 0
+ MainFPM.add(AlignmentFromAssumptionsPass());
+#endif
+
+ // FIXME: Conditionally run LoadCombine here, after it's ported
+ // (in case we still have this pass, given its questionable usefulness).
+
+ // FIXME: add peephole extensions to the PM here.
+ MainFPM.addPass(InstCombinePass());
+ MainFPM.addPass(JumpThreadingPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
+
+ // Create a function that performs CFI checks for cross-DSO calls with
+ // targets in the current module.
+ MPM.addPass(CrossDSOCFIPass());
+
+ // Lower type metadata and the type.test intrinsic. This pass supports
+ // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
+ // to be run at link time if CFI is enabled. This pass does nothing if
+ // CFI is disabled.
+ // Enable once we add support for the summary in the new PM.
+#if 0
+ MPM.addPass(LowerTypeTestsPass(Summary ? PassSummaryAction::Export :
+ PassSummaryAction::None,
+ Summary));
+#endif
+
+ // Add late LTO optimization passes.
+ // Delete basic blocks, which optimization passes may have killed.
+ MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass()));
+
+ // Drop bodies of available eternally objects to improve GlobalDCE.
+ MPM.addPass(EliminateAvailableExternallyPass());
+
+ // Now that we have optimized the program, discard unreachable functions.
+ MPM.addPass(GlobalDCEPass());
+
+ // FIXME: Enable MergeFuncs, conditionally, after ported, maybe.
return MPM;
}
@@ -579,12 +833,8 @@ AAManager PassBuilder::buildDefaultAAPipeline() {
// Add support for querying global aliasing information when available.
// Because the `AAManager` is a function analysis and `GlobalsAA` is a module
// analysis, all that the `AAManager` can do is query for any *cached*
- // results from `GlobalsAA` through a readonly proxy..
-#if 0
- // FIXME: Enable once the invalidation logic supports this. Currently, the
- // `AAManager` will hold stale references to the module analyses.
+ // results from `GlobalsAA` through a readonly proxy.
AA.registerModuleAnalysis<GlobalsAA>();
-#endif
return AA;
}
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
index a9939fddb98c..efd4c097a675 100644
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -85,6 +85,7 @@ CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy())
#ifndef CGSCC_PASS
#define CGSCC_PASS(NAME, CREATE_PASS)
#endif
+CGSCC_PASS("argpromotion", ArgumentPromotionPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
CGSCC_PASS("inline", InlinerPass())
@@ -159,6 +160,7 @@ FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("gvn", GVN())
FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
+FUNCTION_PASS("loop-sink", LoopSinkPass())
FUNCTION_PASS("lowerinvoke", LowerInvokePass())
FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
@@ -169,8 +171,10 @@ FUNCTION_PASS("jump-threading", JumpThreadingPass())
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
FUNCTION_PASS("lcssa", LCSSAPass())
FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
+FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
FUNCTION_PASS("loop-distribute", LoopDistributePass())
FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
+FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
@@ -223,7 +227,9 @@ LOOP_PASS("loop-deletion", LoopDeletionPass())
LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
LOOP_PASS("strength-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
-LOOP_PASS("unroll", LoopUnrollPass())
+LOOP_PASS("unroll", LoopUnrollPass::create())
+LOOP_PASS("unroll-full", LoopUnrollPass::createFull())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("loop-predication", LoopPredicationPass())
#undef LOOP_PASS
diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp
index 6d907c7098e0..23999a5312c7 100644
--- a/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -1,4 +1,4 @@
-//=-- CoverageMapping.cpp - Code coverage mapping support ---------*- C++ -*-=//
+//===- CoverageMapping.cpp - Code coverage mapping support ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,32 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ProfileData/Coverage/CoverageMapping.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/Coverage/CoverageMapping.h"
#include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace coverage;
@@ -59,7 +73,7 @@ void CounterExpressionBuilder::extractTerms(
Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) {
// Gather constant terms.
- llvm::SmallVector<std::pair<unsigned, int>, 32> Terms;
+ SmallVector<std::pair<unsigned, int>, 32> Terms;
extractTerms(ExpressionTree, +1, Terms);
// If there are no terms, this is just a zero. The algorithm below assumes at
@@ -120,8 +134,7 @@ Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) {
get(CounterExpression(CounterExpression::Subtract, LHS, RHS)));
}
-void CounterMappingContext::dump(const Counter &C,
- llvm::raw_ostream &OS) const {
+void CounterMappingContext::dump(const Counter &C, raw_ostream &OS) const {
switch (C.getKind()) {
case Counter::Zero:
OS << '0';
@@ -145,7 +158,7 @@ void CounterMappingContext::dump(const Counter &C,
return;
Expected<int64_t> Value = evaluate(C);
if (auto E = Value.takeError()) {
- llvm::consumeError(std::move(E));
+ consumeError(std::move(E));
return;
}
OS << '[' << *Value << ']';
@@ -217,7 +230,7 @@ Error CoverageMapping::loadFunctionRecord(
for (const auto &Region : Record.MappingRegions) {
Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
if (auto E = ExecutionCount.takeError()) {
- llvm::consumeError(std::move(E));
+ consumeError(std::move(E));
return Error::success();
}
Function.pushRegion(Region, *ExecutionCount);
@@ -281,6 +294,7 @@ CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
}
namespace {
+
/// \brief Distributes functions into instantiation sets.
///
/// An instantiation set is a collection of functions that have the same source
@@ -326,7 +340,7 @@ class SegmentBuilder {
Segments.pop_back();
DEBUG(dbgs() << "Segment at " << Line << ":" << Col);
// Set this region's count.
- if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion) {
+ if (Region.Kind != CounterMappingRegion::SkippedRegion) {
DEBUG(dbgs() << " with count " << Region.ExecutionCount);
Segments.emplace_back(Line, Col, Region.ExecutionCount, IsRegionEntry);
} else
@@ -380,10 +394,10 @@ class SegmentBuilder {
// in combineRegions(). Because we accumulate counter values only from
// regions of the same kind as the first region of the area, prefer
// CodeRegion to ExpansionRegion and ExpansionRegion to SkippedRegion.
- static_assert(coverage::CounterMappingRegion::CodeRegion <
- coverage::CounterMappingRegion::ExpansionRegion &&
- coverage::CounterMappingRegion::ExpansionRegion <
- coverage::CounterMappingRegion::SkippedRegion,
+ static_assert(CounterMappingRegion::CodeRegion <
+ CounterMappingRegion::ExpansionRegion &&
+ CounterMappingRegion::ExpansionRegion <
+ CounterMappingRegion::SkippedRegion,
"Unexpected order of region kind values");
return LHS.Kind < RHS.Kind;
});
@@ -437,7 +451,8 @@ public:
return Segments;
}
};
-}
+
+} // end anonymous namespace
std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const {
std::vector<StringRef> Filenames;
@@ -487,7 +502,7 @@ static bool isExpansion(const CountedRegion &R, unsigned FileID) {
CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const {
CoverageData FileCoverage(Filename);
- std::vector<coverage::CountedRegion> Regions;
+ std::vector<CountedRegion> Regions;
for (const auto &Function : Functions) {
auto MainFileID = findMainViewFileID(Filename, Function);
@@ -533,7 +548,7 @@ CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) const {
return CoverageData();
CoverageData FunctionCoverage(Function.Filenames[*MainFileID]);
- std::vector<coverage::CountedRegion> Regions;
+ std::vector<CountedRegion> Regions;
for (const auto &CR : Function.CountedRegions)
if (CR.FileID == *MainFileID) {
Regions.push_back(CR);
@@ -551,7 +566,7 @@ CoverageData CoverageMapping::getCoverageForExpansion(
const ExpansionRecord &Expansion) const {
CoverageData ExpansionCoverage(
Expansion.Function.Filenames[Expansion.FileID]);
- std::vector<coverage::CountedRegion> Regions;
+ std::vector<CountedRegion> Regions;
for (const auto &CR : Expansion.Function.CountedRegions)
if (CR.FileID == Expansion.FileID) {
Regions.push_back(CR);
@@ -566,8 +581,7 @@ CoverageData CoverageMapping::getCoverageForExpansion(
return ExpansionCoverage;
}
-namespace {
-std::string getCoverageMapErrString(coveragemap_error Err) {
+static std::string getCoverageMapErrString(coveragemap_error Err) {
switch (Err) {
case coveragemap_error::success:
return "Success";
@@ -585,6 +599,8 @@ std::string getCoverageMapErrString(coveragemap_error Err) {
llvm_unreachable("A value of coveragemap_error has no message.");
}
+namespace {
+
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
@@ -594,6 +610,7 @@ class CoverageMappingErrorCategoryType : public std::error_category {
return getCoverageMapErrString(static_cast<coveragemap_error>(IE));
}
};
+
} // end anonymous namespace
std::string CoverageMapError::message() const {
diff --git a/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index a6c7031ccd3d..a34f359cd542 100644
--- a/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -1,4 +1,4 @@
-//=-- CoverageMappingReader.cpp - Code coverage mapping reader ----*- C++ -*-=//
+//===- CoverageMappingReader.cpp - Code coverage mapping reader -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,14 +13,34 @@
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/Error.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace coverage;
@@ -226,9 +246,8 @@ Error RawCoverageMappingReader::readMappingRegionsSubArray(
}
Error RawCoverageMappingReader::read() {
-
// Read the virtual file mapping.
- llvm::SmallVector<unsigned, 8> VirtualFileMapping;
+ SmallVector<unsigned, 8> VirtualFileMapping;
uint64_t NumFileMappings;
if (auto Err = readSize(NumFileMappings))
return Err;
@@ -349,7 +368,10 @@ static Expected<bool> isCoverageMappingDummy(uint64_t Hash, StringRef Mapping) {
}
namespace {
+
struct CovMapFuncRecordReader {
+ virtual ~CovMapFuncRecordReader() = default;
+
// The interface to read coverage mapping function records for a module.
//
// \p Buf points to the buffer containing the \c CovHeader of the coverage
@@ -359,26 +381,24 @@ struct CovMapFuncRecordReader {
// greater than \p End if not.
virtual Expected<const char *> readFunctionRecords(const char *Buf,
const char *End) = 0;
- virtual ~CovMapFuncRecordReader() {}
+
template <class IntPtrT, support::endianness Endian>
static Expected<std::unique_ptr<CovMapFuncRecordReader>>
- get(coverage::CovMapVersion Version, InstrProfSymtab &P,
+ get(CovMapVersion Version, InstrProfSymtab &P,
std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
std::vector<StringRef> &F);
};
// A class for reading coverage mapping function records for a module.
-template <coverage::CovMapVersion Version, class IntPtrT,
- support::endianness Endian>
+template <CovMapVersion Version, class IntPtrT, support::endianness Endian>
class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader {
- typedef typename coverage::CovMapTraits<
+ typedef typename CovMapTraits<
Version, IntPtrT>::CovMapFuncRecordType FuncRecordType;
- typedef typename coverage::CovMapTraits<Version, IntPtrT>::NameRefType
- NameRefType;
+ typedef typename CovMapTraits<Version, IntPtrT>::NameRefType NameRefType;
// Maps function's name references to the indexes of their records
// in \c Records.
- llvm::DenseMap<NameRefType, size_t> FunctionRecords;
+ DenseMap<NameRefType, size_t> FunctionRecords;
InstrProfSymtab &ProfileNames;
std::vector<StringRef> &Filenames;
std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records;
@@ -432,14 +452,16 @@ public:
std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
std::vector<StringRef> &F)
: ProfileNames(P), Filenames(F), Records(R) {}
- ~VersionedCovMapFuncRecordReader() override {}
+
+ ~VersionedCovMapFuncRecordReader() override = default;
Expected<const char *> readFunctionRecords(const char *Buf,
const char *End) override {
using namespace support;
+
if (Buf + sizeof(CovMapHeader) > End)
return make_error<CoverageMapError>(coveragemap_error::malformed);
- auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
+ auto CovHeader = reinterpret_cast<const CovMapHeader *>(Buf);
uint32_t NRecords = CovHeader->getNRecords<Endian>();
uint32_t FilenamesSize = CovHeader->getFilenamesSize<Endian>();
uint32_t CoverageSize = CovHeader->getCoverageSize<Endian>();
@@ -490,14 +512,16 @@ public:
return Buf;
}
};
+
} // end anonymous namespace
template <class IntPtrT, support::endianness Endian>
Expected<std::unique_ptr<CovMapFuncRecordReader>> CovMapFuncRecordReader::get(
- coverage::CovMapVersion Version, InstrProfSymtab &P,
+ CovMapVersion Version, InstrProfSymtab &P,
std::vector<BinaryCoverageReader::ProfileMappingRecord> &R,
std::vector<StringRef> &F) {
using namespace coverage;
+
switch (Version) {
case CovMapVersion::Version1:
return llvm::make_unique<VersionedCovMapFuncRecordReader<
@@ -518,11 +542,12 @@ static Error readCoverageMappingData(
std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
std::vector<StringRef> &Filenames) {
using namespace coverage;
+
// Read the records in the coverage data section.
auto CovHeader =
- reinterpret_cast<const coverage::CovMapHeader *>(Data.data());
+ reinterpret_cast<const CovMapHeader *>(Data.data());
CovMapVersion Version = (CovMapVersion)CovHeader->getVersion<Endian>();
- if (Version > coverage::CovMapVersion::CurrentVersion)
+ if (Version > CovMapVersion::CurrentVersion)
return make_error<CoverageMapError>(coveragemap_error::unsupported_version);
Expected<std::unique_ptr<CovMapFuncRecordReader>> ReaderExpected =
CovMapFuncRecordReader::get<T, Endian>(Version, ProfileNames, Records,
@@ -538,6 +563,7 @@ static Error readCoverageMappingData(
}
return Error::success();
}
+
static const char *TestingFormatMagic = "llvmcovmtestdata";
static Error loadTestingFormat(StringRef Data, InstrProfSymtab &ProfileNames,
@@ -595,21 +621,21 @@ static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
StringRef &CoverageMapping,
uint8_t &BytesInAddress,
support::endianness &Endian, StringRef Arch) {
- auto BinOrErr = object::createBinary(ObjectBuffer);
+ auto BinOrErr = createBinary(ObjectBuffer);
if (!BinOrErr)
return BinOrErr.takeError();
auto Bin = std::move(BinOrErr.get());
std::unique_ptr<ObjectFile> OF;
- if (auto *Universal = dyn_cast<object::MachOUniversalBinary>(Bin.get())) {
+ if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
// If we have a universal binary, try to look up the object for the
// appropriate architecture.
auto ObjectFileOrErr = Universal->getObjectForArch(Arch);
if (!ObjectFileOrErr)
return ObjectFileOrErr.takeError();
OF = std::move(ObjectFileOrErr.get());
- } else if (isa<object::ObjectFile>(Bin.get())) {
+ } else if (isa<ObjectFile>(Bin.get())) {
// For any other object file, upcast and take ownership.
- OF.reset(cast<object::ObjectFile>(Bin.release()));
+ OF.reset(cast<ObjectFile>(Bin.release()));
// If we've asked for a particular arch, make sure they match.
if (!Arch.empty() && OF->getArch() != Triple(Arch).getArch())
return errorCodeToError(object_error::arch_not_found);
@@ -623,11 +649,15 @@ static Error loadBinaryFormat(MemoryBufferRef ObjectBuffer,
: support::endianness::big;
// Look for the sections that we are interested in.
- auto NamesSection = lookupSection(*OF, getInstrProfNameSectionName(false));
+ auto ObjFormat = OF->getTripleObjectFormat();
+ auto NamesSection =
+ lookupSection(*OF, getInstrProfSectionName(IPSK_name, ObjFormat,
+ /*AddSegmentInfo=*/false));
if (auto E = NamesSection.takeError())
return E;
auto CoverageSection =
- lookupSection(*OF, getInstrProfCoverageSectionName(false));
+ lookupSection(*OF, getInstrProfSectionName(IPSK_covmap, ObjFormat,
+ /*AddSegmentInfo=*/false));
if (auto E = CoverageSection.takeError())
return E;
diff --git a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index 82356333b937..f131be2cba49 100644
--- a/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -1,4 +1,4 @@
-//=-- CoverageMappingWriter.cpp - Code coverage mapping writer -------------=//
+//===- CoverageMappingWriter.cpp - Code coverage mapping writer -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <vector>
using namespace llvm;
using namespace coverage;
@@ -27,14 +34,25 @@ void CoverageFilenamesSectionWriter::write(raw_ostream &OS) {
}
namespace {
+
/// \brief Gather only the expressions that are used by the mapping
/// regions in this function.
class CounterExpressionsMinimizer {
ArrayRef<CounterExpression> Expressions;
- llvm::SmallVector<CounterExpression, 16> UsedExpressions;
+ SmallVector<CounterExpression, 16> UsedExpressions;
std::vector<unsigned> AdjustedExpressionIDs;
public:
+ CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,
+ ArrayRef<CounterMappingRegion> MappingRegions)
+ : Expressions(Expressions) {
+ AdjustedExpressionIDs.resize(Expressions.size(), 0);
+ for (const auto &I : MappingRegions)
+ mark(I.Count);
+ for (const auto &I : MappingRegions)
+ gatherUsed(I.Count);
+ }
+
void mark(Counter C) {
if (!C.isExpression())
return;
@@ -54,16 +72,6 @@ public:
gatherUsed(E.RHS);
}
- CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions,
- ArrayRef<CounterMappingRegion> MappingRegions)
- : Expressions(Expressions) {
- AdjustedExpressionIDs.resize(Expressions.size(), 0);
- for (const auto &I : MappingRegions)
- mark(I.Count);
- for (const auto &I : MappingRegions)
- gatherUsed(I.Count);
- }
-
ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; }
/// \brief Adjust the given counter to correctly transition from the old
@@ -74,7 +82,8 @@ public:
return C;
}
};
-}
+
+} // end anonymous namespace
/// \brief Encode the counter.
///
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 74acd9e5e207..64a65ccc11a1 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -1,4 +1,4 @@
-//=-- InstrProf.cpp - Instrumented profiling format support -----------------=//
+//===- InstrProf.cpp - Instrumented profiling format support --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,29 +12,68 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
using namespace llvm;
static cl::opt<bool> StaticFuncFullModulePrefix(
- "static-func-full-module-prefix", cl::init(false),
+ "static-func-full-module-prefix", cl::init(true),
cl::desc("Use full module build paths in the profile counter names for "
"static functions."));
-namespace {
-std::string getInstrProfErrString(instrprof_error Err) {
+// This option is tailored to users that have different top-level directory in
+// profile-gen and profile-use compilation. Users need to specific the number
+// of levels to strip. A value larger than the number of directories in the
+// source file will strip all the directory names and only leave the basename.
+//
+// Note current ThinLTO module importing for the indirect-calls assumes
+// the source directory name not being stripped. A non-zero option value here
+// can potentially prevent some inter-module indirect-call-promotions.
+static cl::opt<unsigned> StaticFuncStripDirNamePrefix(
+ "static-func-strip-dirname-prefix", cl::init(0),
+ cl::desc("Strip specified level of directory name from source path in "
+ "the profile counter name for static functions."));
+
+static std::string getInstrProfErrString(instrprof_error Err) {
switch (Err) {
case instrprof_error::success:
return "Success";
@@ -76,15 +115,19 @@ std::string getInstrProfErrString(instrprof_error Err) {
llvm_unreachable("A value of instrprof_error has no message.");
}
+namespace {
+
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
class InstrProfErrorCategoryType : public std::error_category {
const char *name() const noexcept override { return "llvm.instrprof"; }
+
std::string message(int IE) const override {
return getInstrProfErrString(static_cast<instrprof_error>(IE));
}
};
+
} // end anonymous namespace
static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
@@ -93,8 +136,49 @@ const std::error_category &llvm::instrprof_category() {
return *ErrorCategory;
}
+namespace {
+
+const char *InstrProfSectNameCommon[] = {
+#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
+ SectNameCommon,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+const char *InstrProfSectNameCoff[] = {
+#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
+ SectNameCoff,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+const char *InstrProfSectNamePrefix[] = {
+#define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
+ Prefix,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+} // namespace
+
namespace llvm {
+std::string getInstrProfSectionName(InstrProfSectKind IPSK,
+ Triple::ObjectFormatType OF,
+ bool AddSegmentInfo) {
+ std::string SectName;
+
+ if (OF == Triple::MachO && AddSegmentInfo)
+ SectName = InstrProfSectNamePrefix[IPSK];
+
+ if (OF == Triple::COFF)
+ SectName += InstrProfSectNameCoff[IPSK];
+ else
+ SectName += InstrProfSectNameCommon[IPSK];
+
+ if (OF == Triple::MachO && IPSK == IPSK_data && AddSegmentInfo)
+ SectName += ",regular,live_support";
+
+ return SectName;
+}
+
void SoftInstrProfErrors::addError(instrprof_error IE) {
if (IE == instrprof_error::success)
return;
@@ -133,6 +217,24 @@ std::string getPGOFuncName(StringRef RawFuncName,
return GlobalValue::getGlobalIdentifier(RawFuncName, Linkage, FileName);
}
+// Strip NumPrefix level of directory name from PathNameStr. If the number of
+// directory separators is less than NumPrefix, strip all the directories and
+// leave base file name only.
+static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
+ uint32_t Count = NumPrefix;
+ uint32_t Pos = 0, LastPos = 0;
+ for (auto & CI : PathNameStr) {
+ ++Pos;
+ if (llvm::sys::path::is_separator(CI)) {
+ LastPos = Pos;
+ --Count;
+ }
+ if (Count == 0)
+ break;
+ }
+ return PathNameStr.substr(LastPos);
+}
+
// Return the PGOFuncName. This function has some special handling when called
// in LTO optimization. The following only applies when calling in LTO passes
// (when \c InLTO is true): LTO's internalization privatizes many global linkage
@@ -151,6 +253,8 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
StringRef FileName = (StaticFuncFullModulePrefix
? F.getParent()->getName()
: sys::path::filename(F.getParent()->getName()));
+ if (StaticFuncFullModulePrefix && StaticFuncStripDirNamePrefix != 0)
+ FileName = stripDirPrefix(FileName, StaticFuncStripDirNamePrefix);
return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
}
@@ -198,7 +302,6 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef PGOFuncName) {
-
// We generally want to match the function's linkage, but available_externally
// and extern_weak both have the wrong semantics, and anything that doesn't
// need to link across compilation units doesn't need to be visible at all.
@@ -236,6 +339,17 @@ void InstrProfSymtab::create(Module &M, bool InLTO) {
const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
addFuncName(PGOFuncName);
MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+ // In ThinLTO, local function may have been promoted to global and have
+ // suffix added to the function name. We need to add the stripped function
+ // name to the symbol table so that we can find a match from profile.
+ if (InLTO) {
+ auto pos = PGOFuncName.find('.');
+ if (pos != std::string::npos) {
+ const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
+ addFuncName(OtherFuncName);
+ MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+ }
+ }
}
finalizeSymtab();
@@ -243,7 +357,7 @@ void InstrProfSymtab::create(Module &M, bool InLTO) {
Error collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
bool doCompression, std::string &Result) {
- assert(NameStrs.size() && "No name data to emit");
+ assert(!NameStrs.empty() && "No name data to emit");
uint8_t Header[16], *P = Header;
std::string UncompressedNameStrings =
@@ -271,12 +385,12 @@ Error collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs,
}
SmallString<128> CompressedNameStrings;
- zlib::Status Success =
- zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
- zlib::BestSizeCompression);
-
- if (Success != zlib::StatusOK)
+ Error E = zlib::compress(StringRef(UncompressedNameStrings),
+ CompressedNameStrings, zlib::BestSizeCompression);
+ if (E) {
+ consumeError(std::move(E));
return make_error<InstrProfError>(instrprof_error::compress_failed);
+ }
return WriteStringToResult(CompressedNameStrings.size(),
CompressedNameStrings);
@@ -315,9 +429,12 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
if (isCompressed) {
StringRef CompressedNameStrings(reinterpret_cast<const char *>(P),
CompressedSize);
- if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
- UncompressedSize) != zlib::StatusOK)
+ if (Error E =
+ zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
+ UncompressedSize)) {
+ consumeError(std::move(E));
return make_error<InstrProfError>(instrprof_error::uncompress_failed);
+ }
P += CompressedSize;
NameStrings = StringRef(UncompressedNameStrings.data(),
UncompressedNameStrings.size());
@@ -553,6 +670,7 @@ void ValueProfRecord::deserializeTo(InstrProfRecord &Record,
void ValueProfRecord::swapBytes(support::endianness Old,
support::endianness New) {
using namespace support;
+
if (Old == New)
return;
@@ -589,6 +707,7 @@ void ValueProfData::deserializeTo(InstrProfRecord &Record,
template <class T>
static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) {
using namespace support;
+
if (Orig == little)
return endian::readNext<T, little, unaligned>(D);
else
@@ -623,6 +742,7 @@ ValueProfData::getValueProfData(const unsigned char *D,
const unsigned char *const BufferEnd,
support::endianness Endianness) {
using namespace support;
+
if (D + sizeof(ValueProfData) > BufferEnd)
return make_error<InstrProfError>(instrprof_error::truncated);
@@ -645,6 +765,7 @@ ValueProfData::getValueProfData(const unsigned char *D,
void ValueProfData::swapBytesToHost(support::endianness Endianness) {
using namespace support;
+
if (Endianness == getHostEndianness())
return;
@@ -660,6 +781,7 @@ void ValueProfData::swapBytesToHost(support::endianness Endianness) {
void ValueProfData::swapBytesFromHost(support::endianness Endianness) {
using namespace support;
+
if (Endianness == getHostEndianness())
return;
@@ -791,7 +913,7 @@ bool needsComdatForCounter(const Function &F, const Module &M) {
return true;
Triple TT(M.getTargetTriple());
- if (!TT.isOSBinFormatELF())
+ if (!TT.isOSBinFormatELF() && !TT.isOSBinFormatWasm())
return false;
// See createPGOFuncNameVar for more details. To avoid link errors, profile
@@ -854,4 +976,26 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
}
return true;
}
+
+// Parse the value profile options.
+void getMemOPSizeRangeFromOption(std::string MemOPSizeRange,
+ int64_t &RangeStart, int64_t &RangeLast) {
+ static const int64_t DefaultMemOPSizeRangeStart = 0;
+ static const int64_t DefaultMemOPSizeRangeLast = 8;
+ RangeStart = DefaultMemOPSizeRangeStart;
+ RangeLast = DefaultMemOPSizeRangeLast;
+
+ if (!MemOPSizeRange.empty()) {
+ auto Pos = MemOPSizeRange.find(":");
+ if (Pos != std::string::npos) {
+ if (Pos > 0)
+ RangeStart = atoi(MemOPSizeRange.substr(0, Pos).c_str());
+ if (Pos < MemOPSizeRange.size() - 1)
+ RangeLast = atoi(MemOPSizeRange.substr(Pos + 1).c_str());
+ } else
+ RangeLast = atoi(MemOPSizeRange.c_str());
+ }
+ assert(RangeLast >= RangeStart);
+}
+
} // end namespace llvm
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index ad407f07957f..856f793363f7 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -1,4 +1,4 @@
-//=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=//
+//===- InstrProfReader.cpp - Instrumented profiling reader ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,9 +12,27 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include <cassert>
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include <algorithm>
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <system_error>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -78,7 +96,6 @@ IndexedInstrProfReader::create(const Twine &Path) {
return IndexedInstrProfReader::create(std::move(BufferOrError.get()));
}
-
Expected<std::unique_ptr<IndexedInstrProfReader>>
IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
// Sanity check the buffer.
@@ -182,7 +199,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
CHECK_LINE_END(Line);
std::pair<StringRef, StringRef> VD = Line->rsplit(':');
uint64_t TakenCount, Value;
- if (VK == IPVK_IndirectCallTarget) {
+ if (ValueKind == IPVK_IndirectCallTarget) {
Symtab->addFuncName(VD.first);
Value = IndexedInstrProf::ComputeHash(VD.first);
} else {
@@ -192,7 +209,8 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
CurrentValues.push_back({Value, TakenCount});
Line++;
}
- Record.addValueData(VK, S, CurrentValues.data(), NumValueData, nullptr);
+ Record.addValueData(ValueKind, S, CurrentValues.data(), NumValueData,
+ nullptr);
}
}
return success();
@@ -232,7 +250,7 @@ Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
return error(instrprof_error::malformed);
// Read each counter and fill our internal storage with the values.
- Record.Counts.clear();
+ Record.Clear();
Record.Counts.reserve(NumCounters);
for (uint64_t I = 0; I < NumCounters; ++I) {
if (Line.is_at_end())
@@ -398,7 +416,6 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readValueProfilingData(
InstrProfRecord &Record) {
-
Record.clearValueData();
CurValueDataSize = 0;
// Need to match the logic in value profile dumper code in compiler-rt:
@@ -454,9 +471,11 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
}
namespace llvm {
+
template class RawInstrProfReader<uint32_t>;
template class RawInstrProfReader<uint64_t>;
-}
+
+} // end namespace llvm
InstrProfLookupTrait::hash_value_type
InstrProfLookupTrait::ComputeHash(StringRef K) {
@@ -482,6 +501,8 @@ bool InstrProfLookupTrait::readValueProfilingData(
data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
offset_type N) {
+ using namespace support;
+
// Check if the data is corrupt. If so, don't try to read it.
if (N % sizeof(uint64_t))
return data_type();
@@ -489,7 +510,6 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
DataBuffer.clear();
std::vector<uint64_t> CounterBuffer;
- using namespace support;
const unsigned char *End = D + N;
while (D < End) {
// Read hash.
@@ -567,9 +587,10 @@ InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
}
bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
+ using namespace support;
+
if (DataBuffer.getBufferSize() < 8)
return false;
- using namespace support;
uint64_t Magic =
endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
// Verify that it's magical.
@@ -581,6 +602,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
const unsigned char *Cur) {
using namespace IndexedInstrProf;
using namespace support;
+
if (Version >= IndexedInstrProf::Version4) {
const IndexedInstrProf::Summary *SummaryInLE =
reinterpret_cast<const IndexedInstrProf::Summary *>(Cur);
@@ -617,6 +639,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
} else {
// For older version of profile data, we need to compute on the fly:
using namespace IndexedInstrProf;
+
InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
// FIXME: This only computes an empty summary. Need to call addRecord for
// all InstrProfRecords to get the correct summary.
@@ -626,14 +649,14 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version,
}
Error IndexedInstrProfReader::readHeader() {
+ using namespace support;
+
const unsigned char *Start =
(const unsigned char *)DataBuffer->getBufferStart();
const unsigned char *Cur = Start;
if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
return error(instrprof_error::truncated);
- using namespace support;
-
auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
Cur += sizeof(IndexedInstrProf::Header);
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 029d75660a73..6b7bd3b2fc0a 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -1,4 +1,4 @@
-//=-- InstrProfWriter.cpp - Instrumented profiling writer -------------------=//
+//===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,15 +12,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ProfileData/InstrProfWriter.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfWriter.h"
#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/OnDiskHashTable.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cstdint>
+#include <memory>
#include <string>
#include <tuple>
#include <utility>
@@ -41,10 +47,9 @@ namespace llvm {
// A wrapper class to abstract writer stream with support of bytes
// back patching.
class ProfOStream {
-
public:
- ProfOStream(llvm::raw_fd_ostream &FD) : IsFDOStream(true), OS(FD), LE(FD) {}
- ProfOStream(llvm::raw_string_ostream &STR)
+ ProfOStream(raw_fd_ostream &FD) : IsFDOStream(true), OS(FD), LE(FD) {}
+ ProfOStream(raw_string_ostream &STR)
: IsFDOStream(false), OS(STR), LE(STR) {}
uint64_t tell() { return OS.tell(); }
@@ -55,15 +60,16 @@ public:
// directly and it won't be reflected in the stream's internal buffer.
void patch(PatchItem *P, int NItems) {
using namespace support;
+
if (IsFDOStream) {
- llvm::raw_fd_ostream &FDOStream = static_cast<llvm::raw_fd_ostream &>(OS);
+ raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
for (int K = 0; K < NItems; K++) {
FDOStream.seek(P[K].Pos);
for (int I = 0; I < P[K].N; I++)
write(P[K].D[I]);
}
} else {
- llvm::raw_string_ostream &SOStream =
+ raw_string_ostream &SOStream =
static_cast<llvm::raw_string_ostream &>(OS);
std::string &Data = SOStream.str(); // with flush
for (int K = 0; K < NItems; K++) {
@@ -94,17 +100,19 @@ public:
typedef uint64_t hash_value_type;
typedef uint64_t offset_type;
- support::endianness ValueProfDataEndianness;
+ support::endianness ValueProfDataEndianness = support::little;
InstrProfSummaryBuilder *SummaryBuilder;
- InstrProfRecordWriterTrait() : ValueProfDataEndianness(support::little) {}
+ InstrProfRecordWriterTrait() = default;
+
static hash_value_type ComputeHash(key_type_ref K) {
return IndexedInstrProf::ComputeHash(K);
}
static std::pair<offset_type, offset_type>
EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
- using namespace llvm::support;
+ using namespace support;
+
endian::Writer<little> LE(Out);
offset_type N = K.size();
@@ -130,7 +138,8 @@ public:
}
void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) {
- using namespace llvm::support;
+ using namespace support;
+
endian::Writer<little> LE(Out);
for (const auto &ProfileData : *V) {
const InstrProfRecord &ProfRecord = ProfileData.second;
@@ -154,8 +163,7 @@ public:
} // end namespace llvm
InstrProfWriter::InstrProfWriter(bool Sparse)
- : Sparse(Sparse), FunctionData(), ProfileKind(PF_Unknown),
- InfoObj(new InstrProfRecordWriterTrait()) {}
+ : Sparse(Sparse), InfoObj(new InstrProfRecordWriterTrait()) {}
InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
@@ -208,7 +216,7 @@ bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
return true;
for (const auto &Func : PD) {
const InstrProfRecord &IPR = Func.second;
- if (any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
+ if (llvm::any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
return true;
}
return false;
@@ -217,6 +225,7 @@ bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
static void setSummary(IndexedInstrProf::Summary *TheSummary,
ProfileSummary &PS) {
using namespace IndexedInstrProf;
+
std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
TheSummary->NumSummaryFields = Summary::NumKinds;
TheSummary->NumCutoffEntries = Res.size();
@@ -231,9 +240,10 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary,
}
void InstrProfWriter::writeImpl(ProfOStream &OS) {
+ using namespace IndexedInstrProf;
+
OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
- using namespace IndexedInstrProf;
InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs);
InfoObj->SummaryBuilder = &ISB;
@@ -301,7 +311,7 @@ void InstrProfWriter::write(raw_fd_ostream &OS) {
std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
std::string Data;
- llvm::raw_string_ostream OS(Data);
+ raw_string_ostream OS(Data);
ProfOStream POS(OS);
// Write the hash table.
writeImpl(POS);
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
index 5bcfff0801d5..eafdd2154b7b 100644
--- a/lib/ProfileData/SampleProf.cpp
+++ b/lib/ProfileData/SampleProf.cpp
@@ -13,18 +13,25 @@
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <system_error>
-using namespace llvm::sampleprof;
using namespace llvm;
+using namespace sampleprof;
namespace {
+
// FIXME: This class is only here to support the transition to llvm::Error. It
// will be removed once this transition is complete. Clients should prefer to
// deal with the Error value directly, rather than converting to error_code.
class SampleProfErrorCategoryType : public std::error_category {
const char *name() const noexcept override { return "llvm.sampleprof"; }
+
std::string message(int IE) const override {
sampleprof_error E = static_cast<sampleprof_error>(IE);
switch (E) {
@@ -54,7 +61,8 @@ class SampleProfErrorCategoryType : public std::error_category {
llvm_unreachable("A value of sampleprof_error has no message.");
}
};
-}
+
+} // end anonymous namespace
static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
@@ -74,7 +82,9 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
return OS;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void LineLocation::dump() const { print(dbgs()); }
+#endif
/// \brief Print the sample record to the stream \p OS indented by \p Indent.
void SampleRecord::print(raw_ostream &OS, unsigned Indent) const {
@@ -87,7 +97,9 @@ void SampleRecord::print(raw_ostream &OS, unsigned Indent) const {
OS << "\n";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SampleRecord::dump() const { print(dbgs(), 0); }
+#endif
raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
const SampleRecord &Sample) {
@@ -101,7 +113,7 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
<< " sampled lines\n";
OS.indent(Indent);
- if (BodySamples.size() > 0) {
+ if (!BodySamples.empty()) {
OS << "Samples collected in the function's body {\n";
SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples);
for (const auto &SI : SortedBodySamples.get()) {
@@ -115,14 +127,16 @@ void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
}
OS.indent(Indent);
- if (CallsiteSamples.size() > 0) {
+ if (!CallsiteSamples.empty()) {
OS << "Samples collected in inlined callsites {\n";
- SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
+ SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
CallsiteSamples);
for (const auto &CS : SortedCallsiteSamples.get()) {
- OS.indent(Indent + 2);
- OS << CS->first << ": inlined callee: " << CS->second.getName() << ": ";
- CS->second.print(OS, Indent + 4);
+ for (const auto &FS : CS->second) {
+ OS.indent(Indent + 2);
+ OS << CS->first << ": inlined callee: " << FS.second.getName() << ": ";
+ FS.second.print(OS, Indent + 4);
+ }
}
OS << "}\n";
} else {
@@ -136,4 +150,6 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
return OS;
}
-void FunctionSamples::dump(void) const { print(dbgs(), 0); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); }
+#endif
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
index af80b036a5bb..234fe02ac8a8 100644
--- a/lib/ProfileData/SampleProfReader.cpp
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -23,14 +23,25 @@
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <system_error>
+#include <vector>
-using namespace llvm::sampleprof;
using namespace llvm;
+using namespace sampleprof;
/// \brief Dump the function profile for \p FName.
///
@@ -200,7 +211,7 @@ std::error_code SampleProfileReaderText::read() {
InlineStack.pop_back();
}
FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
- LineLocation(LineOffset, Discriminator));
+ LineLocation(LineOffset, Discriminator))[FName];
FSamples.setName(FName);
MergeResult(Result, FSamples.addTotalSamples(NumSamples));
InlineStack.push_back(&FSamples);
@@ -352,8 +363,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
if (std::error_code EC = FName.getError())
return EC;
- FunctionSamples &CalleeProfile =
- FProfile.functionSamplesAt(LineLocation(*LineOffset, *Discriminator));
+ FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
+ LineLocation(*LineOffset, *Discriminator))[*FName];
CalleeProfile.setName(*FName);
if (std::error_code EC = readProfile(CalleeProfile))
return EC;
@@ -625,7 +636,7 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
uint32_t LineOffset = Offset >> 16;
uint32_t Discriminator = Offset & 0xffff;
FProfile = &CallerProfile->functionSamplesAt(
- LineLocation(LineOffset, Discriminator));
+ LineLocation(LineOffset, Discriminator))[Name];
}
FProfile->setName(Name);
@@ -681,11 +692,9 @@ std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
if (!GcovBuffer.readInt64(TargetCount))
return sampleprof_error::truncated;
- if (Update) {
- FunctionSamples &TargetProfile = Profiles[TargetName];
- TargetProfile.addCalledTargetSamples(LineOffset, Discriminator,
- TargetName, TargetCount);
- }
+ if (Update)
+ FProfile->addCalledTargetSamples(LineOffset, Discriminator,
+ TargetName, TargetCount);
}
}
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
index 4fa71288f8d9..b91b6fb7c7ad 100644
--- a/lib/ProfileData/SampleProfWriter.cpp
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -18,16 +18,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfWriter.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <system_error>
+#include <utility>
+#include <vector>
-using namespace llvm::sampleprof;
using namespace llvm;
+using namespace sampleprof;
/// \brief Write samples to a text file.
///
@@ -61,20 +68,21 @@ std::error_code SampleProfileWriterText::write(const FunctionSamples &S) {
OS << "\n";
}
- SampleSorter<LineLocation, FunctionSamples> SortedCallsiteSamples(
+ SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
S.getCallsiteSamples());
Indent += 1;
- for (const auto &I : SortedCallsiteSamples.get()) {
- LineLocation Loc = I->first;
- const FunctionSamples &CalleeSamples = I->second;
- OS.indent(Indent);
- if (Loc.Discriminator == 0)
- OS << Loc.LineOffset << ": ";
- else
- OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
- if (std::error_code EC = write(CalleeSamples))
- return EC;
- }
+ for (const auto &I : SortedCallsiteSamples.get())
+ for (const auto &FS : I->second) {
+ LineLocation Loc = I->first;
+ const FunctionSamples &CalleeSamples = FS.second;
+ OS.indent(Indent);
+ if (Loc.Discriminator == 0)
+ OS << Loc.LineOffset << ": ";
+ else
+ OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
+ if (std::error_code EC = write(CalleeSamples))
+ return EC;
+ }
Indent -= 1;
return sampleprof_error::success;
@@ -102,11 +110,12 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
}
// Recursively add all the names for inlined callsites.
- for (const auto &J : S.getCallsiteSamples()) {
- const FunctionSamples &CalleeSamples = J.second;
- addName(CalleeSamples.getName());
- addNames(CalleeSamples);
- }
+ for (const auto &J : S.getCallsiteSamples())
+ for (const auto &FS : J.second) {
+ const FunctionSamples &CalleeSamples = FS.second;
+ addName(CalleeSamples.getName());
+ addNames(CalleeSamples);
+ }
}
std::error_code SampleProfileWriterBinary::writeHeader(
@@ -180,14 +189,15 @@ std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
// Recursively emit all the callsite samples.
encodeULEB128(S.getCallsiteSamples().size(), OS);
- for (const auto &J : S.getCallsiteSamples()) {
- LineLocation Loc = J.first;
- const FunctionSamples &CalleeSamples = J.second;
- encodeULEB128(Loc.LineOffset, OS);
- encodeULEB128(Loc.Discriminator, OS);
- if (std::error_code EC = writeBody(CalleeSamples))
- return EC;
- }
+ for (const auto &J : S.getCallsiteSamples())
+ for (const auto &FS : J.second) {
+ LineLocation Loc = J.first;
+ const FunctionSamples &CalleeSamples = FS.second;
+ encodeULEB128(Loc.LineOffset, OS);
+ encodeULEB128(Loc.Discriminator, OS);
+ if (std::error_code EC = writeBody(CalleeSamples))
+ return EC;
+ }
return sampleprof_error::success;
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 4cfbbf8645e0..9778628911cd 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -26,8 +26,21 @@
#include <cstring>
#include <limits.h>
+#define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
+ do { \
+ if (usesLayout<IEEEFloat>(getSemantics())) \
+ return U.IEEE.METHOD_CALL; \
+ if (usesLayout<DoubleAPFloat>(getSemantics())) \
+ return U.Double.METHOD_CALL; \
+ llvm_unreachable("Unexpected semantics"); \
+ } while (false)
+
using namespace llvm;
+// TODO: Remove these and use APInt qualified types directly.
+typedef APInt::WordType integerPart;
+const unsigned int integerPartWidth = APInt::APINT_BITS_PER_WORD;
+
/// A macro used to combine two fcCategory enums into one key which can be used
/// in a switch statement to classify how the interaction of two APFloat's
/// categories affects an operation.
@@ -66,33 +79,43 @@ namespace llvm {
static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
static const fltSemantics semBogus = {0, 0, 0, 0};
- /* The PowerPC format consists of two doubles. It does not map cleanly
- onto the usual format above. It is approximated using twice the
- mantissa bits. Note that for exponents near the double minimum,
- we no longer can represent the full 106 mantissa bits, so those
- will be treated as denormal numbers.
-
- FIXME: While this approximation is equivalent to what GCC uses for
- compile-time arithmetic on PPC double-double numbers, it is not able
- to represent all possible values held by a PPC double-double number,
- for example: (long double) 1.0 + (long double) 0x1p-106
- Should this be replaced by a full emulation of PPC double-double?
+ /* The IBM double-double semantics. Such a number consists of a pair of IEEE
+ 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
+ (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
+ Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
+ to each other, and two 11-bit exponents.
Note: we need to make the value different from semBogus as otherwise
an unsafe optimization may collapse both values to a single address,
and we heavily rely on them having distinct addresses. */
static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
- /* There are temporary semantics for the real PPCDoubleDouble implementation.
- Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the
- high part of double double, and one IEEEdouble as the low part, so that
- the old operations operate on PPCDoubleDoubleImpl, while the newly added
- operations also populate the IEEEdouble.
+ /* These are legacy semantics for the fallback, inaccrurate implementation of
+ IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
+ operation. It's equivalent to having an IEEE number with consecutive 106
+ bits of mantissa and 11 bits of exponent.
+
+ It's not equivalent to IBM double-double. For example, a legit IBM
+ double-double, 1 + epsilon:
+
+ 1 + epsilon = 1 + (1 >> 1076)
- TODO: Once all functions support DoubleAPFloat mode, we'll change all
- PPCDoubleDoubleImpl to IEEEdouble and remove PPCDoubleDoubleImpl. */
- static const fltSemantics semPPCDoubleDoubleImpl = {1023, -1022 + 53, 53 + 53,
- 128};
+ is not representable by a consecutive 106 bits of mantissa.
+
+ Currently, these semantics are used in the following way:
+
+ semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
+ (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
+ semPPCDoubleDoubleLegacy -> IEEE operations
+
+ We use bitcastToAPInt() to get the bit representation (in APInt) of the
+ underlying IEEEdouble, then use the APInt constructor to construct the
+ legacy IEEE float.
+
+ TODO: Implement all operations in semPPCDoubleDouble, and delete these
+ semantics. */
+ static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
+ 53 + 53, 128};
const fltSemantics &APFloatBase::IEEEhalf() {
return semIEEEhalf;
@@ -742,7 +765,7 @@ IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
bool IEEEFloat::isDenormal() const {
return isFiniteNonZero() && (exponent == semantics->minExponent) &&
- (APInt::tcExtractBit(significandParts(),
+ (APInt::tcExtractBit(significandParts(),
semantics->precision - 1) == 0);
}
@@ -862,11 +885,6 @@ IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
IEEEFloat::~IEEEFloat() { freeSignificand(); }
-// Profile - This method 'profiles' an APFloat for use with FoldingSet.
-void IEEEFloat::Profile(FoldingSetNodeID &ID) const {
- ID.Add(bitcastToAPInt());
-}
-
unsigned int IEEEFloat::partCount() const {
return partCountForBits(semantics->precision + 1);
}
@@ -966,14 +984,14 @@ lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
// rhs = b23 . b22 ... b0 * 2^e2
// the result of multiplication is:
// *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
- // Note that there are three significant bits at the left-hand side of the
+ // Note that there are three significant bits at the left-hand side of the
// radix point: two for the multiplication, and an overflow bit for the
// addition (that will always be zero at this point). Move the radix point
// toward left by two bits, and adjust exponent accordingly.
exponent += 2;
if (addend && addend->isNonZero()) {
- // The intermediate result of the multiplication has "2 * precision"
+ // The intermediate result of the multiplication has "2 * precision"
// signicant bit; adjust the addend to be consistent with mul result.
//
Significand savedSignificand = significand;
@@ -1025,7 +1043,7 @@ lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
}
// Convert the result having "2 * precision" significant-bits back to the one
- // having "precision" significant-bits. First, move the radix point from
+ // having "precision" significant-bits. First, move the radix point from
// poision "2*precision - 1" to "precision - 1". The exponent need to be
// adjusted by "2*precision - 1" - "precision - 1" = "precision".
exponent -= precision + 1;
@@ -1611,16 +1629,6 @@ void IEEEFloat::changeSign() {
sign = !sign;
}
-void IEEEFloat::clearSign() {
- /* So is this one. */
- sign = 0;
-}
-
-void IEEEFloat::copySign(const IEEEFloat &rhs) {
- /* And this one. */
- sign = rhs.sign;
-}
-
/* Normalized addition or subtraction. */
IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
roundingMode rounding_mode,
@@ -1712,9 +1720,10 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
int parts = partCount();
integerPart *x = new integerPart[parts];
bool ignored;
- fs = V.convertToInteger(x, parts * integerPartWidth, true,
- rmNearestTiesToEven, &ignored);
- if (fs==opInvalidOp) {
+ fs = V.convertToInteger(makeMutableArrayRef(x, parts),
+ parts * integerPartWidth, true, rmNearestTiesToEven,
+ &ignored);
+ if (fs == opInvalidOp) {
delete[] x;
return fs;
}
@@ -1735,43 +1744,20 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
return fs;
}
-/* Normalized llvm frem (C fmod).
- This is not currently correct in all cases. */
+/* Normalized llvm frem (C fmod). */
IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
opStatus fs;
fs = modSpecials(rhs);
- if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
- IEEEFloat V = *this;
- unsigned int origSign = sign;
-
- fs = V.divide(rhs, rmNearestTiesToEven);
- if (fs == opDivByZero)
- return fs;
-
- int parts = partCount();
- integerPart *x = new integerPart[parts];
- bool ignored;
- fs = V.convertToInteger(x, parts * integerPartWidth, true,
- rmTowardZero, &ignored);
- if (fs==opInvalidOp) {
- delete[] x;
- return fs;
- }
-
- fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
- rmNearestTiesToEven);
- assert(fs==opOK); // should always work
-
- fs = V.multiply(rhs, rmNearestTiesToEven);
- assert(fs==opOK || fs==opInexact); // should not overflow or underflow
-
+ while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
+ compareAbsoluteValue(rhs) != cmpLessThan) {
+ IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
+ if (compareAbsoluteValue(V) == cmpLessThan)
+ V = scalbn(V, -1, rmNearestTiesToEven);
+ V.sign = sign;
+
fs = subtract(V, rmNearestTiesToEven);
- assert(fs==opOK || fs==opInexact); // likewise
-
- if (isZero())
- sign = origSign; // IEEE754 requires this
- delete[] x;
+ assert(fs==opOK);
}
return fs;
}
@@ -1840,7 +1826,7 @@ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
IEEEFloat MagicConstant(*semantics);
fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
rmNearestTiesToEven);
- MagicConstant.copySign(*this);
+ MagicConstant.sign = sign;
if (fs != opOK)
return fs;
@@ -2047,7 +2033,7 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
Note that for conversions to integer type the C standard requires
round-to-zero to always be used. */
IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
- integerPart *parts, unsigned int width, bool isSigned,
+ MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
roundingMode rounding_mode, bool *isExact) const {
lostFraction lost_fraction;
const integerPart *src;
@@ -2060,9 +2046,10 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
return opInvalidOp;
dstPartsCount = partCountForBits(width);
+ assert(dstPartsCount <= parts.size() && "Integer too big");
if (category == fcZero) {
- APInt::tcSet(parts, 0, dstPartsCount);
+ APInt::tcSet(parts.data(), 0, dstPartsCount);
// Negative zero can't be represented as an int.
*isExact = !sign;
return opOK;
@@ -2074,7 +2061,7 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
the destination. */
if (exponent < 0) {
/* Our absolute value is less than one; truncate everything. */
- APInt::tcSet(parts, 0, dstPartsCount);
+ APInt::tcSet(parts.data(), 0, dstPartsCount);
/* For exponent -1 the integer bit represents .5, look at that.
For smaller exponents leftmost truncated bit is 0. */
truncatedBits = semantics->precision -1U - exponent;
@@ -2090,11 +2077,13 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
if (bits < semantics->precision) {
/* We truncate (semantics->precision - bits) bits. */
truncatedBits = semantics->precision - bits;
- APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
+ APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
} else {
/* We want at least as many bits as are available. */
- APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
- APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
+ APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
+ 0);
+ APInt::tcShiftLeft(parts.data(), dstPartsCount,
+ bits - semantics->precision);
truncatedBits = 0;
}
}
@@ -2106,7 +2095,7 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
truncatedBits);
if (lost_fraction != lfExactlyZero &&
roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
- if (APInt::tcIncrement(parts, dstPartsCount))
+ if (APInt::tcIncrement(parts.data(), dstPartsCount))
return opInvalidOp; /* Overflow. */
}
} else {
@@ -2114,7 +2103,7 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
}
/* Step 3: check if we fit in the destination. */
- unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
+ unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
if (sign) {
if (!isSigned) {
@@ -2125,7 +2114,8 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
/* It takes omsb bits to represent the unsigned integer value.
We lose a bit for the sign, but care is needed as the
maximally negative integer is a special case. */
- if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
+ if (omsb == width &&
+ APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
return opInvalidOp;
/* This case can happen because of rounding. */
@@ -2133,7 +2123,7 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
return opInvalidOp;
}
- APInt::tcNegate (parts, dstPartsCount);
+ APInt::tcNegate (parts.data(), dstPartsCount);
} else {
if (omsb >= width + !isSigned)
return opInvalidOp;
@@ -2155,11 +2145,10 @@ IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
the original value. This is almost equivalent to result==opOK,
except for negative zeroes.
*/
-IEEEFloat::opStatus IEEEFloat::convertToInteger(integerPart *parts,
- unsigned int width,
- bool isSigned,
- roundingMode rounding_mode,
- bool *isExact) const {
+IEEEFloat::opStatus
+IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
+ unsigned int width, bool isSigned,
+ roundingMode rounding_mode, bool *isExact) const {
opStatus fs;
fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
@@ -2169,6 +2158,7 @@ IEEEFloat::opStatus IEEEFloat::convertToInteger(integerPart *parts,
unsigned int bits, dstPartsCount;
dstPartsCount = partCountForBits(width);
+ assert(dstPartsCount <= parts.size() && "Integer too big");
if (category == fcNaN)
bits = 0;
@@ -2177,30 +2167,14 @@ IEEEFloat::opStatus IEEEFloat::convertToInteger(integerPart *parts,
else
bits = width - isSigned;
- APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
+ APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
if (sign && isSigned)
- APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
+ APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
}
return fs;
}
-/* Same as convertToInteger(integerPart*, ...), except the result is returned in
- an APSInt, whose initial bit-width and signed-ness are used to determine the
- precision of the conversion.
- */
-IEEEFloat::opStatus IEEEFloat::convertToInteger(APSInt &result,
- roundingMode rounding_mode,
- bool *isExact) const {
- unsigned bitWidth = result.getBitWidth();
- SmallVector<uint64_t, 4> parts(result.getNumWords());
- opStatus status = convertToInteger(
- parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
- // Keeps the original signed-ness.
- result = APInt(bitWidth, parts);
- return status;
-}
-
/* Convert an unsigned integer SRC to a floating point number,
rounding according to ROUNDING_MODE. The sign of the floating
point number is not modified. */
@@ -2484,7 +2458,7 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
// Test if we have a zero number allowing for strings with no null terminators
// and zero decimals with non-zero exponents.
- //
+ //
// We computed firstSigDigit by ignoring all zeros and dots. Thus if
// D->firstSigDigit equals str.end(), every digit must be a zero and there can
// be at most one dot. On the other hand, if we have a zero with a non-zero
@@ -2852,7 +2826,7 @@ APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
}
APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleImpl);
+ assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
assert(partCount()==2);
uint64_t words[2];
@@ -3033,7 +3007,7 @@ APInt IEEEFloat::bitcastToAPInt() const {
if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
return convertQuadrupleAPFloatToAPInt();
- if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleImpl)
+ if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
return convertPPCDoubleDoubleAPFloatToAPInt();
assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
@@ -3103,14 +3077,14 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
// Get the first double and convert to our format.
initFromDoubleAPInt(APInt(64, i1));
- fs = convert(semPPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo);
+ fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
// Unless we have a special case, add in second double.
if (isFiniteNonZero()) {
IEEEFloat v(semIEEEdouble, APInt(64, i2));
- fs = v.convert(semPPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo);
+ fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
assert(fs == opOK && !losesInfo);
(void)fs;
@@ -3264,7 +3238,7 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
return initFromF80LongDoubleAPInt(api);
if (Sem == &semIEEEquad)
return initFromQuadrupleAPInt(api);
- if (Sem == &semPPCDoubleDoubleImpl)
+ if (Sem == &semPPCDoubleDoubleLegacy)
return initFromPPCDoubleDoubleAPInt(api);
llvm_unreachable(nullptr);
@@ -3620,7 +3594,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
Str.push_back(buffer[NDigits-I-1]);
}
-bool IEEEFloat::getExactInverse(IEEEFloat *inv) const {
+bool IEEEFloat::getExactInverse(APFloat *inv) const {
// Special floats and denormals have no exact inverse.
if (!isFiniteNonZero())
return false;
@@ -3644,7 +3618,7 @@ bool IEEEFloat::getExactInverse(IEEEFloat *inv) const {
reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
if (inv)
- *inv = reciprocal;
+ *inv = APFloat(reciprocal, *semantics);
return true;
}
@@ -3856,28 +3830,29 @@ IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
- : Semantics(&S), Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl),
- APFloat(semIEEEdouble)}) {
+ : Semantics(&S),
+ Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
assert(Semantics == &semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
: Semantics(&S),
- Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl, uninitialized),
+ Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
APFloat(semIEEEdouble, uninitialized)}) {
assert(Semantics == &semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
- : Semantics(&S), Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl, I),
+ : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
APFloat(semIEEEdouble)}) {
assert(Semantics == &semPPCDoubleDouble);
}
DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
- : Semantics(&S), Floats(new APFloat[2]{
- APFloat(semPPCDoubleDoubleImpl, I),
- APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
+ : Semantics(&S),
+ Floats(new APFloat[2]{
+ APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
+ APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
assert(Semantics == &semPPCDoubleDouble);
}
@@ -3886,9 +3861,7 @@ DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
: Semantics(&S),
Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
assert(Semantics == &semPPCDoubleDouble);
- // TODO Check for First == &IEEEdouble once the transition is done.
- assert(&Floats[0].getSemantics() == &semPPCDoubleDoubleImpl ||
- &Floats[0].getSemantics() == &semIEEEdouble);
+ assert(&Floats[0].getSemantics() == &semIEEEdouble);
assert(&Floats[1].getSemantics() == &semIEEEdouble);
}
@@ -3917,6 +3890,7 @@ DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
return *this;
}
+// Implement addition, subtraction, multiplication and division based on:
// "Software for Doubled-Precision Floating-Point Computations",
// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
@@ -3928,7 +3902,7 @@ APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
if (!z.isFinite()) {
if (!z.isInfinity()) {
Floats[0] = std::move(z);
- Floats[1].makeZero(false);
+ Floats[1].makeZero(/* Neg = */ false);
return (opStatus)Status;
}
Status = opOK;
@@ -3946,7 +3920,7 @@ APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
}
if (!z.isFinite()) {
Floats[0] = std::move(z);
- Floats[1].makeZero(false);
+ Floats[1].makeZero(/* Neg = */ false);
return (opStatus)Status;
}
Floats[0] = z;
@@ -3982,13 +3956,13 @@ APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
Status |= zz.add(cc, RM);
if (zz.isZero() && !zz.isNegative()) {
Floats[0] = std::move(z);
- Floats[1].makeZero(false);
+ Floats[1].makeZero(/* Neg = */ false);
return opOK;
}
Floats[0] = z;
Status |= Floats[0].add(zz, RM);
if (!Floats[0].isFinite()) {
- Floats[1].makeZero(false);
+ Floats[1].makeZero(/* Neg = */ false);
return (opStatus)Status;
}
Floats[1] = std::move(z);
@@ -4033,25 +4007,15 @@ APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
}
assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
- // These conversions will go away once PPCDoubleDoubleImpl goes away.
- // (PPCDoubleDoubleImpl, IEEEDouble) -> (IEEEDouble, IEEEDouble)
- APFloat A(semIEEEdouble,
- APInt(64, LHS.Floats[0].bitcastToAPInt().getRawData()[0])),
- AA(LHS.Floats[1]),
- C(semIEEEdouble, APInt(64, RHS.Floats[0].bitcastToAPInt().getRawData()[0])),
+ APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
CC(RHS.Floats[1]);
+ assert(&A.getSemantics() == &semIEEEdouble);
assert(&AA.getSemantics() == &semIEEEdouble);
+ assert(&C.getSemantics() == &semIEEEdouble);
assert(&CC.getSemantics() == &semIEEEdouble);
- Out.Floats[0] = APFloat(semIEEEdouble);
+ assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
-
- auto Ret = Out.addImpl(A, AA, C, CC, RM);
-
- // (IEEEDouble, IEEEDouble) -> (PPCDoubleDoubleImpl, IEEEDouble)
- uint64_t Buffer[] = {Out.Floats[0].bitcastToAPInt().getRawData()[0],
- Out.Floats[1].bitcastToAPInt().getRawData()[0]};
- Out.Floats[0] = APFloat(semPPCDoubleDoubleImpl, APInt(128, 2, Buffer));
- return Ret;
+ return Out.addImpl(A, AA, C, CC, RM);
}
APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
@@ -4067,6 +4031,140 @@ APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
return Ret;
}
+APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
+ APFloat::roundingMode RM) {
+ const auto &LHS = *this;
+ auto &Out = *this;
+ /* Interesting observation: For special categories, finding the lowest
+ common ancestor of the following layered graph gives the correct
+ return category:
+
+ NaN
+ / \
+ Zero Inf
+ \ /
+ Normal
+
+ e.g. NaN * NaN = NaN
+ Zero * Inf = NaN
+ Normal * Zero = Zero
+ Normal * Inf = Inf
+ */
+ if (LHS.getCategory() == fcNaN) {
+ Out = LHS;
+ return opOK;
+ }
+ if (RHS.getCategory() == fcNaN) {
+ Out = RHS;
+ return opOK;
+ }
+ if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
+ (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
+ Out.makeNaN(false, false, nullptr);
+ return opOK;
+ }
+ if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
+ Out = LHS;
+ return opOK;
+ }
+ if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
+ Out = RHS;
+ return opOK;
+ }
+ assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
+ "Special cases not handled exhaustively");
+
+ int Status = opOK;
+ APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
+ // t = a * c
+ APFloat T = A;
+ Status |= T.multiply(C, RM);
+ if (!T.isFiniteNonZero()) {
+ Floats[0] = T;
+ Floats[1].makeZero(/* Neg = */ false);
+ return (opStatus)Status;
+ }
+
+ // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
+ APFloat Tau = A;
+ T.changeSign();
+ Status |= Tau.fusedMultiplyAdd(C, T, RM);
+ T.changeSign();
+ {
+ // v = a * d
+ APFloat V = A;
+ Status |= V.multiply(D, RM);
+ // w = b * c
+ APFloat W = B;
+ Status |= W.multiply(C, RM);
+ Status |= V.add(W, RM);
+ // tau += v + w
+ Status |= Tau.add(V, RM);
+ }
+ // u = t + tau
+ APFloat U = T;
+ Status |= U.add(Tau, RM);
+
+ Floats[0] = U;
+ if (!U.isFinite()) {
+ Floats[1].makeZero(/* Neg = */ false);
+ } else {
+ // Floats[1] = (t - u) + tau
+ Status |= T.subtract(U, RM);
+ Status |= T.add(Tau, RM);
+ Floats[1] = T;
+ }
+ return (opStatus)Status;
+}
+
+APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
+ APFloat::roundingMode RM) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret =
+ Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret =
+ Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus
+DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
+ const DoubleAPFloat &Addend,
+ APFloat::roundingMode RM) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.fusedMultiplyAdd(
+ APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
+ APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.roundToIntegral(RM);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
void DoubleAPFloat::changeSign() {
Floats[0].changeSign();
Floats[1].changeSign();
@@ -4101,12 +4199,200 @@ bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
void DoubleAPFloat::makeInf(bool Neg) {
Floats[0].makeInf(Neg);
- Floats[1].makeZero(false);
+ Floats[1].makeZero(/* Neg = */ false);
+}
+
+void DoubleAPFloat::makeZero(bool Neg) {
+ Floats[0].makeZero(Neg);
+ Floats[1].makeZero(/* Neg = */ false);
+}
+
+void DoubleAPFloat::makeLargest(bool Neg) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
+ Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
+ if (Neg)
+ changeSign();
+}
+
+void DoubleAPFloat::makeSmallest(bool Neg) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ Floats[0].makeSmallest(Neg);
+ Floats[1].makeZero(/* Neg = */ false);
+}
+
+void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
+ if (Neg)
+ Floats[0].changeSign();
+ Floats[1].makeZero(/* Neg = */ false);
}
void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
Floats[0].makeNaN(SNaN, Neg, fill);
- Floats[1].makeZero(false);
+ Floats[1].makeZero(/* Neg = */ false);
+}
+
+APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
+ auto Result = Floats[0].compare(RHS.Floats[0]);
+ // |Float[0]| > |Float[1]|
+ if (Result == APFloat::cmpEqual)
+ return Floats[1].compare(RHS.Floats[1]);
+ return Result;
+}
+
+bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
+ return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
+ Floats[1].bitwiseIsEqual(RHS.Floats[1]);
+}
+
+hash_code hash_value(const DoubleAPFloat &Arg) {
+ if (Arg.Floats)
+ return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
+ return hash_combine(Arg.Semantics);
+}
+
+APInt DoubleAPFloat::bitcastToAPInt() const {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ uint64_t Data[] = {
+ Floats[0].bitcastToAPInt().getRawData()[0],
+ Floats[1].bitcastToAPInt().getRawData()[0],
+ };
+ return APInt(128, 2, Data);
+}
+
+APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
+ roundingMode RM) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy);
+ auto Ret = Tmp.convertFromString(S, RM);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ auto Ret = Tmp.next(nextDown);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus
+DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
+ unsigned int Width, bool IsSigned,
+ roundingMode RM, bool *IsExact) const {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ .convertToInteger(Input, Width, IsSigned, RM, IsExact);
+}
+
+APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
+ bool IsSigned,
+ roundingMode RM) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy);
+ auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus
+DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
+ unsigned int InputSize,
+ bool IsSigned, roundingMode RM) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy);
+ auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+APFloat::opStatus
+DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
+ unsigned int InputSize,
+ bool IsSigned, roundingMode RM) {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy);
+ auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
+ *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
+ return Ret;
+}
+
+unsigned int DoubleAPFloat::convertToHexString(char *DST,
+ unsigned int HexDigits,
+ bool UpperCase,
+ roundingMode RM) const {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ .convertToHexString(DST, HexDigits, UpperCase, RM);
+}
+
+bool DoubleAPFloat::isDenormal() const {
+ return getCategory() == fcNormal &&
+ (Floats[0].isDenormal() || Floats[1].isDenormal() ||
+ // (double)(Hi + Lo) == Hi defines a normal number.
+ Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
+}
+
+bool DoubleAPFloat::isSmallest() const {
+ if (getCategory() != fcNormal)
+ return false;
+ DoubleAPFloat Tmp(*this);
+ Tmp.makeSmallest(this->isNegative());
+ return Tmp.compare(*this) == cmpEqual;
+}
+
+bool DoubleAPFloat::isLargest() const {
+ if (getCategory() != fcNormal)
+ return false;
+ DoubleAPFloat Tmp(*this);
+ Tmp.makeLargest(this->isNegative());
+ return Tmp.compare(*this) == cmpEqual;
+}
+
+bool DoubleAPFloat::isInteger() const {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy);
+ (void)Tmp.add(Floats[0], rmNearestTiesToEven);
+ (void)Tmp.add(Floats[1], rmNearestTiesToEven);
+ return Tmp.isInteger();
+}
+
+void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
+ unsigned FormatPrecision,
+ unsigned FormatMaxPadding) const {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
+ .toString(Str, FormatPrecision, FormatMaxPadding);
+}
+
+bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
+ assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
+ if (!inv)
+ return Tmp.getExactInverse(nullptr);
+ APFloat Inv(semPPCDoubleDoubleLegacy);
+ auto Ret = Tmp.getExactInverse(&Inv);
+ *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
+ return Ret;
+}
+
+DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
+ assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
+ scalbn(Arg.Floats[1], Exp, RM));
+}
+
+DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
+ APFloat::roundingMode RM) {
+ assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
+ APFloat First = frexp(Arg.Floats[0], Exp, RM);
+ APFloat Second = Arg.Floats[1];
+ if (Arg.getCategory() == APFloat::fcNormal)
+ Second = scalbn(Second, -Exp, RM);
+ return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
}
} // End detail namespace
@@ -4126,10 +4412,16 @@ APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
}
APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
- return getIEEE().convertFromString(Str, RM);
+ APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
}
-hash_code hash_value(const APFloat &Arg) { return hash_value(Arg.getIEEE()); }
+hash_code hash_value(const APFloat &Arg) {
+ if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
+ return hash_value(Arg.U.IEEE);
+ if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
+ return hash_value(Arg.U.Double);
+ llvm_unreachable("Unexpected semantics");
+}
APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
: APFloat(Semantics) {
@@ -4146,10 +4438,8 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
if (usesLayout<IEEEFloat>(getSemantics()) &&
usesLayout<DoubleAPFloat>(ToSemantics)) {
assert(&ToSemantics == &semPPCDoubleDouble);
- auto Ret = U.IEEE.convert(semPPCDoubleDoubleImpl, RM, losesInfo);
- *this = APFloat(DoubleAPFloat(semPPCDoubleDouble, std::move(*this),
- APFloat(semIEEEdouble)),
- ToSemantics);
+ auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
+ *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
return Ret;
}
if (usesLayout<DoubleAPFloat>(getSemantics()) &&
@@ -4189,6 +4479,30 @@ void APFloat::print(raw_ostream &OS) const {
OS << Buffer << "\n";
}
-void APFloat::dump() const { print(dbgs()); }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
+#endif
+
+void APFloat::Profile(FoldingSetNodeID &NID) const {
+ NID.Add(bitcastToAPInt());
+}
+
+/* Same as convertToInteger(integerPart*, ...), except the result is returned in
+ an APSInt, whose initial bit-width and signed-ness are used to determine the
+ precision of the conversion.
+ */
+APFloat::opStatus APFloat::convertToInteger(APSInt &result,
+ roundingMode rounding_mode,
+ bool *isExact) const {
+ unsigned bitWidth = result.getBitWidth();
+ SmallVector<uint64_t, 4> parts(result.getNumWords());
+ opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
+ rounding_mode, isExact);
+ // Keeps the original signed-ness.
+ result = APInt(bitWidth, parts);
+ return status;
+}
} // End llvm namespace
+
+#undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index fb8b45166a41..0c7da1dad0d2 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -63,7 +63,7 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) {
r = cdigit - 'a';
if (r <= radix - 11U)
return r + 10;
-
+
radix = 10;
}
@@ -76,14 +76,17 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) {
void APInt::initSlowCase(uint64_t val, bool isSigned) {
+ VAL = 0;
pVal = getClearedMemory(getNumWords());
pVal[0] = val;
if (isSigned && int64_t(val) < 0)
for (unsigned i = 1; i < getNumWords(); ++i)
pVal[i] = -1ULL;
+ clearUnusedBits();
}
void APInt::initSlowCase(const APInt& that) {
+ VAL = 0;
pVal = getMemory(getNumWords());
memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
}
@@ -95,6 +98,7 @@ void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
VAL = bigVal[0];
else {
// Get memory, cleared to 0
+ VAL = 0;
pVal = getClearedMemory(getNumWords());
// Calculate the number of words to copy
unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
@@ -106,17 +110,17 @@ void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
}
APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
- : BitWidth(numBits), VAL(0) {
+ : BitWidth(numBits) {
initFromArray(bigVal);
}
APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
- : BitWidth(numBits), VAL(0) {
+ : BitWidth(numBits) {
initFromArray(makeArrayRef(bigVal, numWords));
}
APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
- : BitWidth(numbits), VAL(0) {
+ : VAL(0), BitWidth(numbits) {
assert(BitWidth && "Bitwidth too small");
fromString(numbits, Str, radix);
}
@@ -153,16 +157,6 @@ APInt& APInt::AssignSlowCase(const APInt& RHS) {
return clearUnusedBits();
}
-APInt& APInt::operator=(uint64_t RHS) {
- if (isSingleWord())
- VAL = RHS;
- else {
- pVal[0] = RHS;
- memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
- }
- return clearUnusedBits();
-}
-
/// This method 'profiles' an APInt for use with FoldingSet.
void APInt::Profile(FoldingSetNodeID& ID) const {
ID.AddInteger(BitWidth);
@@ -177,76 +171,24 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
ID.AddInteger(pVal[i]);
}
-/// This function adds a single "digit" integer, y, to the multiple
-/// "digit" integer array, x[]. x[] is modified to reflect the addition and
-/// 1 is returned if there is a carry out, otherwise 0 is returned.
-/// @returns the carry of the addition.
-static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
- for (unsigned i = 0; i < len; ++i) {
- dest[i] = y + x[i];
- if (dest[i] < y)
- y = 1; // Carry one to next digit.
- else {
- y = 0; // No need to carry so exit early
- break;
- }
- }
- return y;
-}
-
/// @brief Prefix increment operator. Increments the APInt by one.
APInt& APInt::operator++() {
if (isSingleWord())
++VAL;
else
- add_1(pVal, pVal, getNumWords(), 1);
+ tcIncrement(pVal, getNumWords());
return clearUnusedBits();
}
-/// This function subtracts a single "digit" (64-bit word), y, from
-/// the multi-digit integer array, x[], propagating the borrowed 1 value until
-/// no further borrowing is needed or it runs out of "digits" in x. The result
-/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
-/// In other words, if y > x then this function returns 1, otherwise 0.
-/// @returns the borrow out of the subtraction
-static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
- for (unsigned i = 0; i < len; ++i) {
- uint64_t X = x[i];
- x[i] -= y;
- if (y > X)
- y = 1; // We have to "borrow 1" from next "digit"
- else {
- y = 0; // No need to borrow
- break; // Remaining digits are unchanged so exit early
- }
- }
- return bool(y);
-}
-
/// @brief Prefix decrement operator. Decrements the APInt by one.
APInt& APInt::operator--() {
if (isSingleWord())
--VAL;
else
- sub_1(pVal, getNumWords(), 1);
+ tcDecrement(pVal, getNumWords());
return clearUnusedBits();
}
-/// This function adds the integer array x to the integer array Y and
-/// places the result in dest.
-/// @returns the carry out from the addition
-/// @brief General addition of 64-bit integer arrays
-static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
- unsigned len) {
- bool carry = false;
- for (unsigned i = 0; i< len; ++i) {
- uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
- dest[i] = x[i] + y[i] + carry;
- carry = dest[i] < limit || (carry && dest[i] == limit);
- }
- return carry;
-}
-
/// Adds the RHS APint to this APInt.
/// @returns this, after addition of RHS.
/// @brief Addition assignment operator.
@@ -254,9 +196,8 @@ APInt& APInt::operator+=(const APInt& RHS) {
assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
if (isSingleWord())
VAL += RHS.VAL;
- else {
- add(pVal, pVal, RHS.pVal, getNumWords());
- }
+ else
+ tcAdd(pVal, RHS.pVal, 0, getNumWords());
return clearUnusedBits();
}
@@ -264,24 +205,10 @@ APInt& APInt::operator+=(uint64_t RHS) {
if (isSingleWord())
VAL += RHS;
else
- add_1(pVal, pVal, getNumWords(), RHS);
+ tcAddPart(pVal, RHS, getNumWords());
return clearUnusedBits();
}
-/// Subtracts the integer array y from the integer array x
-/// @returns returns the borrow out.
-/// @brief Generalized subtraction of 64-bit integer arrays.
-static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
- unsigned len) {
- bool borrow = false;
- for (unsigned i = 0; i < len; ++i) {
- uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
- borrow = y[i] > x_tmp || (borrow && x[i] == 0);
- dest[i] = x_tmp - y[i];
- }
- return borrow;
-}
-
/// Subtracts the RHS APInt from this APInt
/// @returns this, after subtraction
/// @brief Subtraction assignment operator.
@@ -290,7 +217,7 @@ APInt& APInt::operator-=(const APInt& RHS) {
if (isSingleWord())
VAL -= RHS.VAL;
else
- sub(pVal, pVal, RHS.pVal, getNumWords());
+ tcSubtract(pVal, RHS.pVal, 0, getNumWords());
return clearUnusedBits();
}
@@ -298,7 +225,7 @@ APInt& APInt::operator-=(uint64_t RHS) {
if (isSingleWord())
VAL -= RHS;
else
- sub_1(pVal, getNumWords(), RHS);
+ tcSubtractPart(pVal, RHS, getNumWords());
return clearUnusedBits();
}
@@ -339,7 +266,7 @@ static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
/// Multiplies integer array x by integer array y and stores the result into
/// the integer array dest. Note that dest's size must be >= xlen + ylen.
-/// @brief Generalized multiplicate of integer arrays.
+/// @brief Generalized multiplication of integer arrays.
static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
unsigned ylen) {
dest[xlen] = mul_1(dest, x, xlen, y[0]);
@@ -412,69 +339,19 @@ APInt& APInt::operator*=(const APInt& RHS) {
return *this;
}
-APInt& APInt::operator&=(const APInt& RHS) {
- assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
- if (isSingleWord()) {
- VAL &= RHS.VAL;
- return *this;
- }
- unsigned numWords = getNumWords();
- for (unsigned i = 0; i < numWords; ++i)
- pVal[i] &= RHS.pVal[i];
+APInt& APInt::AndAssignSlowCase(const APInt& RHS) {
+ tcAnd(pVal, RHS.pVal, getNumWords());
return *this;
}
-APInt& APInt::operator|=(const APInt& RHS) {
- assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
- if (isSingleWord()) {
- VAL |= RHS.VAL;
- return *this;
- }
- unsigned numWords = getNumWords();
- for (unsigned i = 0; i < numWords; ++i)
- pVal[i] |= RHS.pVal[i];
+APInt& APInt::OrAssignSlowCase(const APInt& RHS) {
+ tcOr(pVal, RHS.pVal, getNumWords());
return *this;
}
-APInt& APInt::operator^=(const APInt& RHS) {
- assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
- if (isSingleWord()) {
- VAL ^= RHS.VAL;
- this->clearUnusedBits();
- return *this;
- }
- unsigned numWords = getNumWords();
- for (unsigned i = 0; i < numWords; ++i)
- pVal[i] ^= RHS.pVal[i];
- return clearUnusedBits();
-}
-
-APInt APInt::AndSlowCase(const APInt& RHS) const {
- unsigned numWords = getNumWords();
- uint64_t* val = getMemory(numWords);
- for (unsigned i = 0; i < numWords; ++i)
- val[i] = pVal[i] & RHS.pVal[i];
- return APInt(val, getBitWidth());
-}
-
-APInt APInt::OrSlowCase(const APInt& RHS) const {
- unsigned numWords = getNumWords();
- uint64_t *val = getMemory(numWords);
- for (unsigned i = 0; i < numWords; ++i)
- val[i] = pVal[i] | RHS.pVal[i];
- return APInt(val, getBitWidth());
-}
-
-APInt APInt::XorSlowCase(const APInt& RHS) const {
- unsigned numWords = getNumWords();
- uint64_t *val = getMemory(numWords);
- for (unsigned i = 0; i < numWords; ++i)
- val[i] = pVal[i] ^ RHS.pVal[i];
-
- APInt Result(val, getBitWidth());
- // 0^0==1 so clear the high bits in case they got set.
- Result.clearUnusedBits();
- return Result;
+APInt& APInt::XorAssignSlowCase(const APInt& RHS) {
+ tcXor(pVal, RHS.pVal, getNumWords());
+ return *this;
}
APInt APInt::operator*(const APInt& RHS) const {
@@ -511,11 +388,11 @@ bool APInt::ult(const APInt& RHS) const {
if (n1 < n2)
return true;
- // If magnitude of RHS is greather than LHS, return false.
+ // If magnitude of RHS is greater than LHS, return false.
if (n2 < n1)
return false;
- // If they bot fit in a word, just compare the low order word
+ // If they both fit in a word, just compare the low order word
if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
return pVal[0] < RHS.pVal[0];
@@ -545,7 +422,7 @@ bool APInt::slt(const APInt& RHS) const {
if (lhsNeg != rhsNeg)
return lhsNeg;
- // Otherwise we can just use an unsigned comparision, because even negative
+ // Otherwise we can just use an unsigned comparison, because even negative
// numbers compare correctly this way if both have the same signed-ness.
return ult(RHS);
}
@@ -557,6 +434,33 @@ void APInt::setBit(unsigned bitPosition) {
pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
}
+void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
+ unsigned loWord = whichWord(loBit);
+ unsigned hiWord = whichWord(hiBit);
+
+ // Create an initial mask for the low word with zeros below loBit.
+ uint64_t loMask = UINT64_MAX << whichBit(loBit);
+
+ // If hiBit is not aligned, we need a high mask.
+ unsigned hiShiftAmt = whichBit(hiBit);
+ if (hiShiftAmt != 0) {
+ // Create a high mask with zeros above hiBit.
+ uint64_t hiMask = UINT64_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
+ // If loWord and hiWord are equal, then we combine the masks. Otherwise,
+ // set the bits in hiWord.
+ if (hiWord == loWord)
+ loMask &= hiMask;
+ else
+ pVal[hiWord] |= hiMask;
+ }
+ // Apply the mask to the low word.
+ pVal[loWord] |= loMask;
+
+ // Fill any words between loWord and hiWord with all ones.
+ for (unsigned word = loWord + 1; word < hiWord; ++word)
+ pVal[word] = UINT64_MAX;
+}
+
/// Set the given bit to 0 whose position is given as "bitPosition".
/// @brief Set a given bit to 0.
void APInt::clearBit(unsigned bitPosition) {
@@ -567,6 +471,10 @@ void APInt::clearBit(unsigned bitPosition) {
}
/// @brief Toggle every bit to its opposite value.
+void APInt::flipAllBitsSlowCase() {
+ tcComplement(pVal, getNumWords());
+ clearUnusedBits();
+}
/// Toggle a given bit to its opposite value whose position is given
/// as "bitPosition".
@@ -577,9 +485,104 @@ void APInt::flipBit(unsigned bitPosition) {
else setBit(bitPosition);
}
+void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
+ unsigned subBitWidth = subBits.getBitWidth();
+ assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
+ "Illegal bit insertion");
+
+ // Insertion is a direct copy.
+ if (subBitWidth == BitWidth) {
+ *this = subBits;
+ return;
+ }
+
+ // Single word result can be done as a direct bitmask.
+ if (isSingleWord()) {
+ uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ VAL &= ~(mask << bitPosition);
+ VAL |= (subBits.VAL << bitPosition);
+ return;
+ }
+
+ unsigned loBit = whichBit(bitPosition);
+ unsigned loWord = whichWord(bitPosition);
+ unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
+
+ // Insertion within a single word can be done as a direct bitmask.
+ if (loWord == hi1Word) {
+ uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
+ pVal[loWord] &= ~(mask << loBit);
+ pVal[loWord] |= (subBits.VAL << loBit);
+ return;
+ }
+
+ // Insert on word boundaries.
+ if (loBit == 0) {
+ // Direct copy whole words.
+ unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
+ memcpy(pVal + loWord, subBits.getRawData(),
+ numWholeSubWords * APINT_WORD_SIZE);
+
+ // Mask+insert remaining bits.
+ unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
+ if (remainingBits != 0) {
+ uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits);
+ pVal[hi1Word] &= ~mask;
+ pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
+ }
+ return;
+ }
+
+ // General case - set/clear individual bits in dst based on src.
+ // TODO - there is scope for optimization here, but at the moment this code
+ // path is barely used so prefer readability over performance.
+ for (unsigned i = 0; i != subBitWidth; ++i) {
+ if (subBits[i])
+ setBit(bitPosition + i);
+ else
+ clearBit(bitPosition + i);
+ }
+}
+
+APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
+ assert(numBits > 0 && "Can't extract zero bits");
+ assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
+ "Illegal bit extraction");
+
+ if (isSingleWord())
+ return APInt(numBits, VAL >> bitPosition);
+
+ unsigned loBit = whichBit(bitPosition);
+ unsigned loWord = whichWord(bitPosition);
+ unsigned hiWord = whichWord(bitPosition + numBits - 1);
+
+ // Single word result extracting bits from a single word source.
+ if (loWord == hiWord)
+ return APInt(numBits, pVal[loWord] >> loBit);
+
+ // Extracting bits that start on a source word boundary can be done
+ // as a fast memory copy.
+ if (loBit == 0)
+ return APInt(numBits, makeArrayRef(pVal + loWord, 1 + hiWord - loWord));
+
+ // General case - shift + copy source words directly into place.
+ APInt Result(numBits, 0);
+ unsigned NumSrcWords = getNumWords();
+ unsigned NumDstWords = Result.getNumWords();
+
+ for (unsigned word = 0; word < NumDstWords; ++word) {
+ uint64_t w0 = pVal[loWord + word];
+ uint64_t w1 =
+ (loWord + word + 1) < NumSrcWords ? pVal[loWord + word + 1] : 0;
+ Result.pVal[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
+ }
+
+ return Result.clearUnusedBits();
+}
+
unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(!str.empty() && "Invalid string length");
- assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
radix == 36) &&
"Radix should be 2, 8, 10, 16, or 36!");
@@ -604,7 +607,7 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
return slen * 4 + isNegative;
// FIXME: base 36
-
+
// This is grossly inefficient but accurate. We could probably do something
// with a computation of roughly slen*64/20 and then adjust by the value of
// the first few digits. But, I'm not sure how accurate that could be.
@@ -613,7 +616,7 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
// be too large. This avoids the assertion in the constructor. This
// calculation doesn't work appropriately for the numbers 0-9, so just use 4
// bits in that case.
- unsigned sufficient
+ unsigned sufficient
= radix == 10? (slen == 1 ? 4 : slen * 64/18)
: (slen == 1 ? 7 : slen * 16/3);
@@ -647,19 +650,20 @@ bool APInt::isSplat(unsigned SplatSizeInBits) const {
/// This function returns the high "numBits" bits of this APInt.
APInt APInt::getHiBits(unsigned numBits) const {
- return APIntOps::lshr(*this, BitWidth - numBits);
+ return this->lshr(BitWidth - numBits);
}
/// This function returns the low "numBits" bits of this APInt.
APInt APInt::getLoBits(unsigned numBits) const {
- return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
- BitWidth - numBits);
+ APInt Result(getLowBitsSet(BitWidth, numBits));
+ Result &= *this;
+ return Result;
}
unsigned APInt::countLeadingZerosSlowCase() const {
unsigned Count = 0;
for (int i = getNumWords()-1; i >= 0; --i) {
- integerPart V = pVal[i];
+ uint64_t V = pVal[i];
if (V == 0)
Count += APINT_BITS_PER_WORD;
else {
@@ -729,18 +733,6 @@ unsigned APInt::countPopulationSlowCase() const {
return Count;
}
-/// Perform a logical right-shift from Src to Dst, which must be equal or
-/// non-overlapping, of Words words, by Shift, which must be less than 64.
-static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words,
- unsigned Shift) {
- uint64_t Carry = 0;
- for (int I = Words - 1; I >= 0; --I) {
- uint64_t Tmp = Src[I];
- Dst[I] = (Tmp >> Shift) | Carry;
- Carry = Tmp << (64 - Shift);
- }
-}
-
APInt APInt::byteSwap() const {
assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
if (BitWidth == 16)
@@ -761,8 +753,7 @@ APInt APInt::byteSwap() const {
for (unsigned I = 0, N = getNumWords(); I != N; ++I)
Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
if (Result.BitWidth != BitWidth) {
- lshrNear(Result.pVal, Result.pVal, getNumWords(),
- Result.BitWidth - BitWidth);
+ Result.lshrInPlace(Result.BitWidth - BitWidth);
Result.BitWidth = BitWidth;
}
return Result;
@@ -798,14 +789,46 @@ APInt APInt::reverseBits() const {
return Reversed;
}
-APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
- const APInt& API2) {
- APInt A = API1, B = API2;
- while (!!B) {
- APInt T = B;
- B = APIntOps::urem(A, B);
- A = T;
+APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
+ // Fast-path a common case.
+ if (A == B) return A;
+
+ // Corner cases: if either operand is zero, the other is the gcd.
+ if (!A) return B;
+ if (!B) return A;
+
+ // Count common powers of 2 and remove all other powers of 2.
+ unsigned Pow2;
+ {
+ unsigned Pow2_A = A.countTrailingZeros();
+ unsigned Pow2_B = B.countTrailingZeros();
+ if (Pow2_A > Pow2_B) {
+ A.lshrInPlace(Pow2_A - Pow2_B);
+ Pow2 = Pow2_B;
+ } else if (Pow2_B > Pow2_A) {
+ B.lshrInPlace(Pow2_B - Pow2_A);
+ Pow2 = Pow2_A;
+ } else {
+ Pow2 = Pow2_A;
+ }
+ }
+
+ // Both operands are odd multiples of 2^Pow_2:
+ //
+ // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
+ //
+ // This is a modified version of Stein's algorithm, taking advantage of
+ // efficient countTrailingZeros().
+ while (A != B) {
+ if (A.ugt(B)) {
+ A -= B;
+ A.lshrInPlace(A.countTrailingZeros() - Pow2);
+ } else {
+ B -= A;
+ B.lshrInPlace(B.countTrailingZeros() - Pow2);
+ }
}
+
return A;
}
@@ -1117,68 +1140,59 @@ APInt APInt::lshr(const APInt &shiftAmt) const {
return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
}
+/// Perform a logical right-shift from Src to Dst of Words words, by Shift,
+/// which must be less than 64. If the source and destination ranges overlap,
+/// we require that Src >= Dst (put another way, we require that the overall
+/// operation is a right shift on the combined range).
+static void lshrWords(APInt::WordType *Dst, APInt::WordType *Src,
+ unsigned Words, unsigned Shift) {
+ assert(Shift < APInt::APINT_BITS_PER_WORD);
+
+ if (!Words)
+ return;
+
+ if (Shift == 0) {
+ std::memmove(Dst, Src, Words * APInt::APINT_WORD_SIZE);
+ return;
+ }
+
+ uint64_t Low = Src[0];
+ for (unsigned I = 1; I != Words; ++I) {
+ uint64_t High = Src[I];
+ Dst[I - 1] =
+ (Low >> Shift) | (High << (APInt::APINT_BITS_PER_WORD - Shift));
+ Low = High;
+ }
+ Dst[Words - 1] = Low >> Shift;
+}
+
/// Logical right-shift this APInt by shiftAmt.
/// @brief Logical right-shift function.
-APInt APInt::lshr(unsigned shiftAmt) const {
+void APInt::lshrInPlace(unsigned shiftAmt) {
if (isSingleWord()) {
if (shiftAmt >= BitWidth)
- return APInt(BitWidth, 0);
+ VAL = 0;
else
- return APInt(BitWidth, this->VAL >> shiftAmt);
- }
-
- // If all the bits were shifted out, the result is 0. This avoids issues
- // with shifting by the size of the integer type, which produces undefined
- // results. We define these "undefined results" to always be 0.
- if (shiftAmt >= BitWidth)
- return APInt(BitWidth, 0);
-
- // If none of the bits are shifted out, the result is *this. This avoids
- // issues with shifting by the size of the integer type, which produces
- // undefined results in the code below. This is also an optimization.
- if (shiftAmt == 0)
- return *this;
-
- // Create some space for the result.
- uint64_t * val = new uint64_t[getNumWords()];
-
- // If we are shifting less than a word, compute the shift with a simple carry
- if (shiftAmt < APINT_BITS_PER_WORD) {
- lshrNear(val, pVal, getNumWords(), shiftAmt);
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
+ VAL >>= shiftAmt;
+ return;
}
- // Compute some values needed by the remaining shift algorithms
- unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
- unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+ // Don't bother performing a no-op shift.
+ if (!shiftAmt)
+ return;
- // If we are shifting whole words, just move whole words
- if (wordShift == 0) {
- for (unsigned i = 0; i < getNumWords() - offset; ++i)
- val[i] = pVal[i+offset];
- for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
- val[i] = 0;
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
- }
+ // Find number of complete words being shifted out and zeroed.
+ const unsigned Words = getNumWords();
+ const unsigned ShiftFullWords =
+ std::min(shiftAmt / APINT_BITS_PER_WORD, Words);
- // Shift the low order words
- unsigned breakWord = getNumWords() - offset -1;
- for (unsigned i = 0; i < breakWord; ++i)
- val[i] = (pVal[i+offset] >> wordShift) |
- (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
- // Shift the break word.
- val[breakWord] = pVal[breakWord+offset] >> wordShift;
+ // Fill in first Words - ShiftFullWords by shifting.
+ lshrWords(pVal, pVal + ShiftFullWords, Words - ShiftFullWords,
+ shiftAmt % APINT_BITS_PER_WORD);
- // Remaining words are 0
- for (unsigned i = breakWord+1; i < getNumWords(); ++i)
- val[i] = 0;
- APInt Result(val, BitWidth);
- Result.clearUnusedBits();
- return Result;
+ // The remaining high words are all zero.
+ for (unsigned I = Words - ShiftFullWords; I != Words; ++I)
+ pVal[I] = 0;
}
/// Left-shift this APInt by shiftAmt.
@@ -1244,8 +1258,21 @@ APInt APInt::shlSlowCase(unsigned shiftAmt) const {
return Result;
}
+// Calculate the rotate amount modulo the bit width.
+static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
+ unsigned rotBitWidth = rotateAmt.getBitWidth();
+ APInt rot = rotateAmt;
+ if (rotBitWidth < BitWidth) {
+ // Extend the rotate APInt, so that the urem doesn't divide by 0.
+ // e.g. APInt(1, 32) would give APInt(1, 0).
+ rot = rotateAmt.zext(BitWidth);
+ }
+ rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
+ return rot.getLimitedValue(BitWidth);
+}
+
APInt APInt::rotl(const APInt &rotateAmt) const {
- return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
+ return rotl(rotateModulo(BitWidth, rotateAmt));
}
APInt APInt::rotl(unsigned rotateAmt) const {
@@ -1256,7 +1283,7 @@ APInt APInt::rotl(unsigned rotateAmt) const {
}
APInt APInt::rotr(const APInt &rotateAmt) const {
- return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
+ return rotr(rotateModulo(BitWidth, rotateAmt));
}
APInt APInt::rotr(unsigned rotateAmt) const {
@@ -1618,7 +1645,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
if (r) {
// The value d is expressed by the "shift" value above since we avoided
// multiplication by d by using a shift left. So, all we have to do is
- // shift right here. In order to mak
+ // shift right here.
if (shift) {
unsigned carry = 0;
DEBUG(dbgs() << "KnuthDiv: remainder:");
@@ -2014,7 +2041,7 @@ APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
APInt Res = *this * RHS;
-
+
if (*this != 0 && RHS != 0)
Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
else
@@ -2041,7 +2068,7 @@ APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
Overflow = ShAmt.uge(countLeadingZeros());
else
Overflow = ShAmt.uge(countLeadingOnes());
-
+
return *this << ShAmt;
}
@@ -2061,7 +2088,7 @@ APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
// Check our assumptions here
assert(!str.empty() && "Invalid string length");
- assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
+ assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
radix == 36) &&
"Radix should be 2, 8, 10, 16, or 36!");
@@ -2086,9 +2113,8 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
// Figure out if we can shift instead of multiply
unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
- // Set up an APInt for the digit to add outside the loop so we don't
+ // Set up an APInt for the radix multiplier outside the loop so we don't
// constantly construct/destruct it.
- APInt apdigit(getBitWidth(), 0);
APInt apradix(getBitWidth(), radix);
// Enter digit traversal loop
@@ -2105,11 +2131,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
}
// Add in the digit we just interpreted
- if (apdigit.isSingleWord())
- apdigit.VAL = digit;
- else
- apdigit.pVal[0] = digit;
- *this += apdigit;
+ *this += digit;
}
// If its negative, put it in two's complement form
if (isNeg) {
@@ -2120,7 +2142,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
bool Signed, bool formatAsCLiteral) const {
- assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
+ assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
Radix == 36) &&
"Radix should be 2, 8, 10, 16, or 36!");
@@ -2208,7 +2230,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
// For the 2, 8 and 16 bit cases, we can just shift instead of divide
// because the number of bits per digit (1, 3 and 4 respectively) divides
- // equaly. We just shift until the value is zero.
+ // equally. We just shift until the value is zero.
if (Radix == 2 || Radix == 8 || Radix == 16) {
// Just shift tmp right for each digit width until it becomes zero
unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
@@ -2245,14 +2267,15 @@ std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
return S.str();
}
-
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void APInt::dump() const {
SmallString<40> S, U;
this->toStringUnsigned(U);
this->toStringSigned(S);
dbgs() << "APInt(" << BitWidth << "b, "
- << U << "u " << S << "s)";
+ << U << "u " << S << "s)\n";
}
+#endif
void APInt::print(raw_ostream &OS, bool isSigned) const {
SmallString<40> S;
@@ -2265,83 +2288,60 @@ void APInt::print(raw_ostream &OS, bool isSigned) const {
// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
// and unrestricting assumption.
-static_assert(integerPartWidth % 2 == 0, "Part width must be divisible by 2!");
+static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
+ "Part width must be divisible by 2!");
/* Some handy functions local to this file. */
-namespace {
- /* Returns the integer part with the least significant BITS set.
- BITS cannot be zero. */
- static inline integerPart
- lowBitMask(unsigned int bits)
- {
- assert(bits != 0 && bits <= integerPartWidth);
+/* Returns the integer part with the least significant BITS set.
+ BITS cannot be zero. */
+static inline APInt::WordType lowBitMask(unsigned bits) {
+ assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
- return ~(integerPart) 0 >> (integerPartWidth - bits);
- }
+ return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
+}
- /* Returns the value of the lower half of PART. */
- static inline integerPart
- lowHalf(integerPart part)
- {
- return part & lowBitMask(integerPartWidth / 2);
- }
+/* Returns the value of the lower half of PART. */
+static inline APInt::WordType lowHalf(APInt::WordType part) {
+ return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
+}
- /* Returns the value of the upper half of PART. */
- static inline integerPart
- highHalf(integerPart part)
- {
- return part >> (integerPartWidth / 2);
- }
+/* Returns the value of the upper half of PART. */
+static inline APInt::WordType highHalf(APInt::WordType part) {
+ return part >> (APInt::APINT_BITS_PER_WORD / 2);
+}
- /* Returns the bit number of the most significant set bit of a part.
- If the input number has no bits set -1U is returned. */
- static unsigned int
- partMSB(integerPart value)
- {
- return findLastSet(value, ZB_Max);
- }
+/* Returns the bit number of the most significant set bit of a part.
+ If the input number has no bits set -1U is returned. */
+static unsigned partMSB(APInt::WordType value) {
+ return findLastSet(value, ZB_Max);
+}
- /* Returns the bit number of the least significant set bit of a
- part. If the input number has no bits set -1U is returned. */
- static unsigned int
- partLSB(integerPart value)
- {
- return findFirstSet(value, ZB_Max);
- }
+/* Returns the bit number of the least significant set bit of a
+ part. If the input number has no bits set -1U is returned. */
+static unsigned partLSB(APInt::WordType value) {
+ return findFirstSet(value, ZB_Max);
}
/* Sets the least significant part of a bignum to the input value, and
zeroes out higher parts. */
-void
-APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
-{
- unsigned int i;
-
+void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
assert(parts > 0);
dst[0] = part;
- for (i = 1; i < parts; i++)
+ for (unsigned i = 1; i < parts; i++)
dst[i] = 0;
}
/* Assign one bignum to another. */
-void
-APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
-{
- unsigned int i;
-
- for (i = 0; i < parts; i++)
+void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
dst[i] = src[i];
}
/* Returns true if a bignum is zero, false otherwise. */
-bool
-APInt::tcIsZero(const integerPart *src, unsigned int parts)
-{
- unsigned int i;
-
- for (i = 0; i < parts; i++)
+bool APInt::tcIsZero(const WordType *src, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
if (src[i])
return false;
@@ -2349,41 +2349,29 @@ APInt::tcIsZero(const integerPart *src, unsigned int parts)
}
/* Extract the given bit of a bignum; returns 0 or 1. */
-int
-APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
-{
- return (parts[bit / integerPartWidth] &
- ((integerPart) 1 << bit % integerPartWidth)) != 0;
+int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
+ return (parts[whichWord(bit)] & maskBit(bit)) != 0;
}
/* Set the given bit of a bignum. */
-void
-APInt::tcSetBit(integerPart *parts, unsigned int bit)
-{
- parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
+void APInt::tcSetBit(WordType *parts, unsigned bit) {
+ parts[whichWord(bit)] |= maskBit(bit);
}
/* Clears the given bit of a bignum. */
-void
-APInt::tcClearBit(integerPart *parts, unsigned int bit)
-{
- parts[bit / integerPartWidth] &=
- ~((integerPart) 1 << (bit % integerPartWidth));
+void APInt::tcClearBit(WordType *parts, unsigned bit) {
+ parts[whichWord(bit)] &= ~maskBit(bit);
}
/* Returns the bit number of the least significant set bit of a
number. If the input number has no bits set -1U is returned. */
-unsigned int
-APInt::tcLSB(const integerPart *parts, unsigned int n)
-{
- unsigned int i, lsb;
-
- for (i = 0; i < n; i++) {
- if (parts[i] != 0) {
- lsb = partLSB(parts[i]);
+unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
+ for (unsigned i = 0; i < n; i++) {
+ if (parts[i] != 0) {
+ unsigned lsb = partLSB(parts[i]);
- return lsb + i * integerPartWidth;
- }
+ return lsb + i * APINT_BITS_PER_WORD;
+ }
}
return -1U;
@@ -2391,18 +2379,14 @@ APInt::tcLSB(const integerPart *parts, unsigned int n)
/* Returns the bit number of the most significant set bit of a number.
If the input number has no bits set -1U is returned. */
-unsigned int
-APInt::tcMSB(const integerPart *parts, unsigned int n)
-{
- unsigned int msb;
-
+unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
do {
--n;
if (parts[n] != 0) {
- msb = partMSB(parts[n]);
+ unsigned msb = partMSB(parts[n]);
- return msb + n * integerPartWidth;
+ return msb + n * APINT_BITS_PER_WORD;
}
} while (n);
@@ -2414,31 +2398,28 @@ APInt::tcMSB(const integerPart *parts, unsigned int n)
the least significant bit of DST. All high bits above srcBITS in
DST are zero-filled. */
void
-APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
- unsigned int srcBits, unsigned int srcLSB)
-{
- unsigned int firstSrcPart, dstParts, shift, n;
-
- dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
+APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
+ unsigned srcBits, unsigned srcLSB) {
+ unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
assert(dstParts <= dstCount);
- firstSrcPart = srcLSB / integerPartWidth;
+ unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
tcAssign (dst, src + firstSrcPart, dstParts);
- shift = srcLSB % integerPartWidth;
+ unsigned shift = srcLSB % APINT_BITS_PER_WORD;
tcShiftRight (dst, dstParts, shift);
- /* We now have (dstParts * integerPartWidth - shift) bits from SRC
+ /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
in DST. If this is less that srcBits, append the rest, else
clear the high bits. */
- n = dstParts * integerPartWidth - shift;
+ unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
if (n < srcBits) {
- integerPart mask = lowBitMask (srcBits - n);
+ WordType mask = lowBitMask (srcBits - n);
dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
- << n % integerPartWidth);
+ << n % APINT_BITS_PER_WORD);
} else if (n > srcBits) {
- if (srcBits % integerPartWidth)
- dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
+ if (srcBits % APINT_BITS_PER_WORD)
+ dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
}
/* Clear high parts. */
@@ -2447,18 +2428,12 @@ APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
}
/* DST += RHS + C where C is zero or one. Returns the carry flag. */
-integerPart
-APInt::tcAdd(integerPart *dst, const integerPart *rhs,
- integerPart c, unsigned int parts)
-{
- unsigned int i;
-
+APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
+ WordType c, unsigned parts) {
assert(c <= 1);
- for (i = 0; i < parts; i++) {
- integerPart l;
-
- l = dst[i];
+ for (unsigned i = 0; i < parts; i++) {
+ WordType l = dst[i];
if (c) {
dst[i] += rhs[i] + 1;
c = (dst[i] <= l);
@@ -2471,19 +2446,29 @@ APInt::tcAdd(integerPart *dst, const integerPart *rhs,
return c;
}
-/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
-integerPart
-APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
- integerPart c, unsigned int parts)
-{
- unsigned int i;
+/// This function adds a single "word" integer, src, to the multiple
+/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
+ unsigned parts) {
+ for (unsigned i = 0; i < parts; ++i) {
+ dst[i] += src;
+ if (dst[i] >= src)
+ return 0; // No need to carry so exit early.
+ src = 1; // Carry one to next digit.
+ }
- assert(c <= 1);
+ return 1;
+}
- for (i = 0; i < parts; i++) {
- integerPart l;
+/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
+APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
+ WordType c, unsigned parts) {
+ assert(c <= 1);
- l = dst[i];
+ for (unsigned i = 0; i < parts; i++) {
+ WordType l = dst[i];
if (c) {
dst[i] -= rhs[i] + 1;
c = (dst[i] >= l);
@@ -2496,10 +2481,28 @@ APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
return c;
}
+/// This function subtracts a single "word" (64-bit word), src, from
+/// the multi-word integer array, dst[], propagating the borrowed 1 value until
+/// no further borrowing is needed or it runs out of "words" in dst. The result
+/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
+/// exhausted. In other words, if src > dst then this function returns 1,
+/// otherwise 0.
+/// @returns the borrow out of the subtraction
+APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
+ unsigned parts) {
+ for (unsigned i = 0; i < parts; ++i) {
+ WordType Dst = dst[i];
+ dst[i] -= src;
+ if (src <= Dst)
+ return 0; // No need to borrow so exit early.
+ src = 1; // We have to "borrow 1" from next "word"
+ }
+
+ return 1;
+}
+
/* Negate a bignum in-place. */
-void
-APInt::tcNegate(integerPart *dst, unsigned int parts)
-{
+void APInt::tcNegate(WordType *dst, unsigned parts) {
tcComplement(dst, parts);
tcIncrement(dst, parts);
}
@@ -2515,23 +2518,20 @@ APInt::tcNegate(integerPart *dst, unsigned int parts)
DSTPARTS parts of the result, and if all of the omitted higher
parts were zero return zero, otherwise overflow occurred and
return one. */
-int
-APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
- integerPart multiplier, integerPart carry,
- unsigned int srcParts, unsigned int dstParts,
- bool add)
-{
- unsigned int i, n;
-
+int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
+ WordType multiplier, WordType carry,
+ unsigned srcParts, unsigned dstParts,
+ bool add) {
/* Otherwise our writes of DST kill our later reads of SRC. */
assert(dst <= src || dst >= src + srcParts);
assert(dstParts <= srcParts + 1);
/* N loops; minimum of dstParts and srcParts. */
- n = dstParts < srcParts ? dstParts: srcParts;
+ unsigned n = dstParts < srcParts ? dstParts: srcParts;
+ unsigned i;
for (i = 0; i < n; i++) {
- integerPart low, mid, high, srcPart;
+ WordType low, mid, high, srcPart;
/* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
@@ -2543,7 +2543,7 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
srcPart = src[i];
- if (multiplier == 0 || srcPart == 0) {
+ if (multiplier == 0 || srcPart == 0) {
low = carry;
high = 0;
} else {
@@ -2552,14 +2552,14 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
mid = lowHalf(srcPart) * highHalf(multiplier);
high += highHalf(mid);
- mid <<= integerPartWidth / 2;
+ mid <<= APINT_BITS_PER_WORD / 2;
if (low + mid < low)
high++;
low += mid;
mid = highHalf(srcPart) * lowHalf(multiplier);
high += highHalf(mid);
- mid <<= integerPartWidth / 2;
+ mid <<= APINT_BITS_PER_WORD / 2;
if (low + mid < low)
high++;
low += mid;
@@ -2608,19 +2608,14 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
is filled with the least significant parts of the result. Returns
one if overflow occurred, otherwise zero. DST must be disjoint
from both operands. */
-int
-APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
- const integerPart *rhs, unsigned int parts)
-{
- unsigned int i;
- int overflow;
-
+int APInt::tcMultiply(WordType *dst, const WordType *lhs,
+ const WordType *rhs, unsigned parts) {
assert(dst != lhs && dst != rhs);
- overflow = 0;
+ int overflow = 0;
tcSet(dst, 0, parts);
- for (i = 0; i < parts; i++)
+ for (unsigned i = 0; i < parts; i++)
overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
parts - i, true);
@@ -2631,25 +2626,21 @@ APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
operands. No overflow occurs. DST must be disjoint from both
operands. Returns the number of parts required to hold the
result. */
-unsigned int
-APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
- const integerPart *rhs, unsigned int lhsParts,
- unsigned int rhsParts)
-{
+unsigned APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
+ const WordType *rhs, unsigned lhsParts,
+ unsigned rhsParts) {
/* Put the narrower number on the LHS for less loops below. */
if (lhsParts > rhsParts) {
return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
} else {
- unsigned int n;
-
assert(dst != lhs && dst != rhs);
tcSet(dst, 0, rhsParts);
- for (n = 0; n < lhsParts; n++)
- tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
+ for (unsigned i = 0; i < lhsParts; i++)
+ tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
- n = lhsParts + rhsParts;
+ unsigned n = lhsParts + rhsParts;
return n - (dst[n - 1] == 0);
}
@@ -2665,23 +2656,18 @@ APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
use by the routine; its contents need not be initialized and are
destroyed. LHS, REMAINDER and SCRATCH must be distinct.
*/
-int
-APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
- integerPart *remainder, integerPart *srhs,
- unsigned int parts)
-{
- unsigned int n, shiftCount;
- integerPart mask;
-
+int APInt::tcDivide(WordType *lhs, const WordType *rhs,
+ WordType *remainder, WordType *srhs,
+ unsigned parts) {
assert(lhs != remainder && lhs != srhs && remainder != srhs);
- shiftCount = tcMSB(rhs, parts) + 1;
+ unsigned shiftCount = tcMSB(rhs, parts) + 1;
if (shiftCount == 0)
return true;
- shiftCount = parts * integerPartWidth - shiftCount;
- n = shiftCount / integerPartWidth;
- mask = (integerPart) 1 << (shiftCount % integerPartWidth);
+ shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
+ unsigned n = shiftCount / APINT_BITS_PER_WORD;
+ WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
tcAssign(srhs, rhs, parts);
tcShiftLeft(srhs, parts, shiftCount);
@@ -2704,7 +2690,7 @@ APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
shiftCount--;
tcShiftRight(srhs, parts, 1);
if ((mask >>= 1) == 0) {
- mask = (integerPart) 1 << (integerPartWidth - 1);
+ mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
n--;
}
}
@@ -2714,18 +2700,14 @@ APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
/* Shift a bignum left COUNT bits in-place. Shifted in bits are zero.
There are no restrictions on COUNT. */
-void
-APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
-{
+void APInt::tcShiftLeft(WordType *dst, unsigned parts, unsigned count) {
if (count) {
- unsigned int jump, shift;
-
/* Jump is the inter-part jump; shift is is intra-part shift. */
- jump = count / integerPartWidth;
- shift = count % integerPartWidth;
+ unsigned jump = count / APINT_BITS_PER_WORD;
+ unsigned shift = count % APINT_BITS_PER_WORD;
while (parts > jump) {
- integerPart part;
+ WordType part;
parts--;
@@ -2735,7 +2717,7 @@ APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
if (shift) {
part <<= shift;
if (parts >= jump + 1)
- part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
+ part |= dst[parts - jump - 1] >> (APINT_BITS_PER_WORD - shift);
}
dst[parts] = part;
@@ -2748,20 +2730,16 @@ APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
/* Shift a bignum right COUNT bits in-place. Shifted in bits are
zero. There are no restrictions on COUNT. */
-void
-APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
-{
+void APInt::tcShiftRight(WordType *dst, unsigned parts, unsigned count) {
if (count) {
- unsigned int i, jump, shift;
-
/* Jump is the inter-part jump; shift is is intra-part shift. */
- jump = count / integerPartWidth;
- shift = count % integerPartWidth;
+ unsigned jump = count / APINT_BITS_PER_WORD;
+ unsigned shift = count % APINT_BITS_PER_WORD;
/* Perform the shift. This leaves the most significant COUNT bits
of the result at zero. */
- for (i = 0; i < parts; i++) {
- integerPart part;
+ for (unsigned i = 0; i < parts; i++) {
+ WordType part;
if (i + jump >= parts) {
part = 0;
@@ -2770,7 +2748,7 @@ APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
if (shift) {
part >>= shift;
if (i + jump + 1 < parts)
- part |= dst[i + jump + 1] << (integerPartWidth - shift);
+ part |= dst[i + jump + 1] << (APINT_BITS_PER_WORD - shift);
}
}
@@ -2780,107 +2758,55 @@ APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
}
/* Bitwise and of two bignums. */
-void
-APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
-{
- unsigned int i;
-
- for (i = 0; i < parts; i++)
+void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
dst[i] &= rhs[i];
}
/* Bitwise inclusive or of two bignums. */
-void
-APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
-{
- unsigned int i;
-
- for (i = 0; i < parts; i++)
+void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
dst[i] |= rhs[i];
}
/* Bitwise exclusive or of two bignums. */
-void
-APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
-{
- unsigned int i;
-
- for (i = 0; i < parts; i++)
+void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
dst[i] ^= rhs[i];
}
/* Complement a bignum in-place. */
-void
-APInt::tcComplement(integerPart *dst, unsigned int parts)
-{
- unsigned int i;
-
- for (i = 0; i < parts; i++)
+void APInt::tcComplement(WordType *dst, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
dst[i] = ~dst[i];
}
/* Comparison (unsigned) of two bignums. */
-int
-APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
- unsigned int parts)
-{
+int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
+ unsigned parts) {
while (parts) {
- parts--;
- if (lhs[parts] == rhs[parts])
- continue;
+ parts--;
+ if (lhs[parts] == rhs[parts])
+ continue;
- if (lhs[parts] > rhs[parts])
- return 1;
- else
- return -1;
- }
+ return (lhs[parts] > rhs[parts]) ? 1 : -1;
+ }
return 0;
}
-/* Increment a bignum in-place, return the carry flag. */
-integerPart
-APInt::tcIncrement(integerPart *dst, unsigned int parts)
-{
- unsigned int i;
-
- for (i = 0; i < parts; i++)
- if (++dst[i] != 0)
- break;
-
- return i == parts;
-}
-
-/* Decrement a bignum in-place, return the borrow flag. */
-integerPart
-APInt::tcDecrement(integerPart *dst, unsigned int parts) {
- for (unsigned int i = 0; i < parts; i++) {
- // If the current word is non-zero, then the decrement has no effect on the
- // higher-order words of the integer and no borrow can occur. Exit early.
- if (dst[i]--)
- return 0;
- }
- // If every word was zero, then there is a borrow.
- return 1;
-}
-
-
/* Set the least significant BITS bits of a bignum, clear the
rest. */
-void
-APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
- unsigned int bits)
-{
- unsigned int i;
-
- i = 0;
- while (bits > integerPartWidth) {
- dst[i++] = ~(integerPart) 0;
- bits -= integerPartWidth;
+void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
+ unsigned bits) {
+ unsigned i = 0;
+ while (bits > APINT_BITS_PER_WORD) {
+ dst[i++] = ~(WordType) 0;
+ bits -= APINT_BITS_PER_WORD;
}
if (bits)
- dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
+ dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
while (i < parts)
dst[i++] = 0;
diff --git a/lib/Support/ARMAttributeParser.cpp b/lib/Support/ARMAttributeParser.cpp
new file mode 100644
index 000000000000..63e800a5b78b
--- /dev/null
+++ b/lib/Support/ARMAttributeParser.cpp
@@ -0,0 +1,708 @@
+//===--- ARMAttributeParser.cpp - ARM Attribute Information Printer -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ARMAttributeParser.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+using namespace llvm;
+using namespace llvm::ARMBuildAttrs;
+
+
+static const EnumEntry<unsigned> TagNames[] = {
+ { "Tag_File", ARMBuildAttrs::File },
+ { "Tag_Section", ARMBuildAttrs::Section },
+ { "Tag_Symbol", ARMBuildAttrs::Symbol },
+};
+
+namespace llvm {
+#define ATTRIBUTE_HANDLER(Attr_) \
+ { ARMBuildAttrs::Attr_, &ARMAttributeParser::Attr_ }
+
+const ARMAttributeParser::DisplayHandler
+ARMAttributeParser::DisplayRoutines[] = {
+ { ARMBuildAttrs::CPU_raw_name, &ARMAttributeParser::StringAttribute, },
+ { ARMBuildAttrs::CPU_name, &ARMAttributeParser::StringAttribute },
+ ATTRIBUTE_HANDLER(CPU_arch),
+ ATTRIBUTE_HANDLER(CPU_arch_profile),
+ ATTRIBUTE_HANDLER(ARM_ISA_use),
+ ATTRIBUTE_HANDLER(THUMB_ISA_use),
+ ATTRIBUTE_HANDLER(FP_arch),
+ ATTRIBUTE_HANDLER(WMMX_arch),
+ ATTRIBUTE_HANDLER(Advanced_SIMD_arch),
+ ATTRIBUTE_HANDLER(PCS_config),
+ ATTRIBUTE_HANDLER(ABI_PCS_R9_use),
+ ATTRIBUTE_HANDLER(ABI_PCS_RW_data),
+ ATTRIBUTE_HANDLER(ABI_PCS_RO_data),
+ ATTRIBUTE_HANDLER(ABI_PCS_GOT_use),
+ ATTRIBUTE_HANDLER(ABI_PCS_wchar_t),
+ ATTRIBUTE_HANDLER(ABI_FP_rounding),
+ ATTRIBUTE_HANDLER(ABI_FP_denormal),
+ ATTRIBUTE_HANDLER(ABI_FP_exceptions),
+ ATTRIBUTE_HANDLER(ABI_FP_user_exceptions),
+ ATTRIBUTE_HANDLER(ABI_FP_number_model),
+ ATTRIBUTE_HANDLER(ABI_align_needed),
+ ATTRIBUTE_HANDLER(ABI_align_preserved),
+ ATTRIBUTE_HANDLER(ABI_enum_size),
+ ATTRIBUTE_HANDLER(ABI_HardFP_use),
+ ATTRIBUTE_HANDLER(ABI_VFP_args),
+ ATTRIBUTE_HANDLER(ABI_WMMX_args),
+ ATTRIBUTE_HANDLER(ABI_optimization_goals),
+ ATTRIBUTE_HANDLER(ABI_FP_optimization_goals),
+ ATTRIBUTE_HANDLER(compatibility),
+ ATTRIBUTE_HANDLER(CPU_unaligned_access),
+ ATTRIBUTE_HANDLER(FP_HP_extension),
+ ATTRIBUTE_HANDLER(ABI_FP_16bit_format),
+ ATTRIBUTE_HANDLER(MPextension_use),
+ ATTRIBUTE_HANDLER(DIV_use),
+ ATTRIBUTE_HANDLER(DSP_extension),
+ ATTRIBUTE_HANDLER(T2EE_use),
+ ATTRIBUTE_HANDLER(Virtualization_use),
+ ATTRIBUTE_HANDLER(nodefaults)
+};
+
+#undef ATTRIBUTE_HANDLER
+
+uint64_t ARMAttributeParser::ParseInteger(const uint8_t *Data,
+ uint32_t &Offset) {
+ unsigned Length;
+ uint64_t Value = decodeULEB128(Data + Offset, &Length);
+ Offset = Offset + Length;
+ return Value;
+}
+
+StringRef ARMAttributeParser::ParseString(const uint8_t *Data,
+ uint32_t &Offset) {
+ const char *String = reinterpret_cast<const char*>(Data + Offset);
+ size_t Length = std::strlen(String);
+ Offset = Offset + Length + 1;
+ return StringRef(String, Length);
+}
+
+void ARMAttributeParser::IntegerAttribute(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ Attributes.insert(std::make_pair(Tag, Value));
+
+ if (SW)
+ SW->printNumber(ARMBuildAttrs::AttrTypeAsString(Tag), Value);
+}
+
+void ARMAttributeParser::StringAttribute(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ StringRef TagName = ARMBuildAttrs::AttrTypeAsString(Tag, /*TagPrefix*/false);
+ StringRef ValueDesc = ParseString(Data, Offset);
+
+ if (SW) {
+ DictScope AS(*SW, "Attribute");
+ SW->printNumber("Tag", Tag);
+ if (!TagName.empty())
+ SW->printString("TagName", TagName);
+ SW->printString("Value", ValueDesc);
+ }
+}
+
+void ARMAttributeParser::PrintAttribute(unsigned Tag, unsigned Value,
+ StringRef ValueDesc) {
+ Attributes.insert(std::make_pair(Tag, Value));
+
+ if (SW) {
+ StringRef TagName = ARMBuildAttrs::AttrTypeAsString(Tag,
+ /*TagPrefix*/false);
+ DictScope AS(*SW, "Attribute");
+ SW->printNumber("Tag", Tag);
+ SW->printNumber("Value", Value);
+ if (!TagName.empty())
+ SW->printString("TagName", TagName);
+ if (!ValueDesc.empty())
+ SW->printString("Description", ValueDesc);
+ }
+}
+
+void ARMAttributeParser::CPU_arch(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6",
+ "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
+ "ARM v7E-M", "ARM v8"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::CPU_arch_profile(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ uint64_t Encoded = ParseInteger(Data, Offset);
+
+ StringRef Profile;
+ switch (Encoded) {
+ default: Profile = "Unknown"; break;
+ case 'A': Profile = "Application"; break;
+ case 'R': Profile = "Real-time"; break;
+ case 'M': Profile = "Microcontroller"; break;
+ case 'S': Profile = "Classic"; break;
+ case 0: Profile = "None"; break;
+ }
+
+ PrintAttribute(Tag, Encoded, Profile);
+}
+
+void ARMAttributeParser::ARM_ISA_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "Permitted" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::THUMB_ISA_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "Thumb-1", "Thumb-2" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::FP_arch(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16", "VFPv4",
+ "VFPv4-D16", "ARMv8-a FP", "ARMv8-a FP-D16"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::WMMX_arch(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "WMMXv1", "WMMXv2" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "NEONv1", "NEONv2+FMA", "ARMv8-a NEON", "ARMv8.1-a NEON"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "None", "Bare Platform", "Linux Application", "Linux DSO", "Palm OS 2004",
+ "Reserved (Palm OS)", "Symbian OS 2004", "Reserved (Symbian OS)"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_PCS_R9_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "v6", "Static Base", "TLS", "Unused" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_PCS_RW_data(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Absolute", "PC-relative", "SB-relative", "Not Permitted"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_PCS_RO_data(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Absolute", "PC-relative", "Not Permitted"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_PCS_GOT_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "Direct", "GOT-Indirect"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_PCS_wchar_t(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "Unknown", "2-byte", "Unknown", "4-byte"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_FP_rounding(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "IEEE-754", "Runtime" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_FP_denormal(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Unsupported", "IEEE-754", "Sign Only"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_FP_exceptions(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "IEEE-754" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_FP_user_exceptions(AttrType Tag,
+ const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "IEEE-754" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_FP_number_model(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "Finite Only", "RTABI", "IEEE-754"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_align_needed(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "8-byte alignment", "4-byte alignment", "Reserved"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+
+ std::string Description;
+ if (Value < array_lengthof(Strings))
+ Description = std::string(Strings[Value]);
+ else if (Value <= 12)
+ Description = std::string("8-byte alignment, ") + utostr(1ULL << Value)
+ + std::string("-byte extended alignment");
+ else
+ Description = "Invalid";
+
+ PrintAttribute(Tag, Value, Description);
+}
+
+void ARMAttributeParser::ABI_align_preserved(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Required", "8-byte data alignment", "8-byte data and code alignment",
+ "Reserved"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+
+ std::string Description;
+ if (Value < array_lengthof(Strings))
+ Description = std::string(Strings[Value]);
+ else if (Value <= 12)
+ Description = std::string("8-byte stack alignment, ") +
+ utostr(1ULL << Value) + std::string("-byte data alignment");
+ else
+ Description = "Invalid";
+
+ PrintAttribute(Tag, Value, Description);
+}
+
+void ARMAttributeParser::ABI_enum_size(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "Packed", "Int32", "External Int32"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_HardFP_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Tag_FP_arch", "Single-Precision", "Reserved", "Tag_FP_arch (deprecated)"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_VFP_args(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "AAPCS", "AAPCS VFP", "Custom", "Not Permitted"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_WMMX_args(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "AAPCS", "iWMMX", "Custom" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_optimization_goals(AttrType Tag,
+ const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Debugging",
+ "Best Debugging"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_FP_optimization_goals(AttrType Tag,
+ const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Accuracy",
+ "Best Accuracy"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::compatibility(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ uint64_t Integer = ParseInteger(Data, Offset);
+ StringRef String = ParseString(Data, Offset);
+
+ if (SW) {
+ DictScope AS(*SW, "Attribute");
+ SW->printNumber("Tag", Tag);
+ SW->startLine() << "Value: " << Integer << ", " << String << '\n';
+ SW->printString("TagName", AttrTypeAsString(Tag, /*TagPrefix*/false));
+ switch (Integer) {
+ case 0:
+ SW->printString("Description", StringRef("No Specific Requirements"));
+ break;
+ case 1:
+ SW->printString("Description", StringRef("AEABI Conformant"));
+ break;
+ default:
+ SW->printString("Description", StringRef("AEABI Non-Conformant"));
+ break;
+ }
+ }
+}
+
+void ARMAttributeParser::CPU_unaligned_access(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "v6-style" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::FP_HP_extension(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "If Available", "Permitted" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::ABI_FP_16bit_format(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "IEEE-754", "VFPv3" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::MPextension_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "Permitted" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::DIV_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "If Available", "Not Permitted", "Permitted"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::DSP_extension(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "Permitted" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::T2EE_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = { "Not Permitted", "Permitted" };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::Virtualization_use(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ static const char *const Strings[] = {
+ "Not Permitted", "TrustZone", "Virtualization Extensions",
+ "TrustZone + Virtualization Extensions"
+ };
+
+ uint64_t Value = ParseInteger(Data, Offset);
+ StringRef ValueDesc =
+ (Value < array_lengthof(Strings)) ? Strings[Value] : nullptr;
+ PrintAttribute(Tag, Value, ValueDesc);
+}
+
+void ARMAttributeParser::nodefaults(AttrType Tag, const uint8_t *Data,
+ uint32_t &Offset) {
+ uint64_t Value = ParseInteger(Data, Offset);
+ PrintAttribute(Tag, Value, "Unspecified Tags UNDEFINED");
+}
+
+void ARMAttributeParser::ParseIndexList(const uint8_t *Data, uint32_t &Offset,
+ SmallVectorImpl<uint8_t> &IndexList) {
+ for (;;) {
+ unsigned Length;
+ uint64_t Value = decodeULEB128(Data + Offset, &Length);
+ Offset = Offset + Length;
+ if (Value == 0)
+ break;
+ IndexList.push_back(Value);
+ }
+}
+
+void ARMAttributeParser::ParseAttributeList(const uint8_t *Data,
+ uint32_t &Offset, uint32_t Length) {
+ while (Offset < Length) {
+ unsigned Length;
+ uint64_t Tag = decodeULEB128(Data + Offset, &Length);
+ Offset += Length;
+
+ bool Handled = false;
+ for (unsigned AHI = 0, AHE = array_lengthof(DisplayRoutines);
+ AHI != AHE && !Handled; ++AHI) {
+ if (DisplayRoutines[AHI].Attribute == Tag) {
+ (this->*DisplayRoutines[AHI].Routine)(ARMBuildAttrs::AttrType(Tag),
+ Data, Offset);
+ Handled = true;
+ break;
+ }
+ }
+ if (!Handled) {
+ if (Tag < 32) {
+ errs() << "unhandled AEABI Tag " << Tag
+ << " (" << ARMBuildAttrs::AttrTypeAsString(Tag) << ")\n";
+ continue;
+ }
+
+ if (Tag % 2 == 0)
+ IntegerAttribute(ARMBuildAttrs::AttrType(Tag), Data, Offset);
+ else
+ StringAttribute(ARMBuildAttrs::AttrType(Tag), Data, Offset);
+ }
+ }
+}
+
+void ARMAttributeParser::ParseSubsection(const uint8_t *Data, uint32_t Length) {
+ uint32_t Offset = sizeof(uint32_t); /* SectionLength */
+
+ const char *VendorName = reinterpret_cast<const char*>(Data + Offset);
+ size_t VendorNameLength = std::strlen(VendorName);
+ Offset = Offset + VendorNameLength + 1;
+
+ if (SW) {
+ SW->printNumber("SectionLength", Length);
+ SW->printString("Vendor", StringRef(VendorName, VendorNameLength));
+ }
+
+ if (StringRef(VendorName, VendorNameLength).lower() != "aeabi") {
+ return;
+ }
+
+ while (Offset < Length) {
+ /// Tag_File | Tag_Section | Tag_Symbol uleb128:byte-size
+ uint8_t Tag = Data[Offset];
+ Offset = Offset + sizeof(Tag);
+
+ uint32_t Size =
+ *reinterpret_cast<const support::ulittle32_t*>(Data + Offset);
+ Offset = Offset + sizeof(Size);
+
+ if (SW) {
+ SW->printEnum("Tag", Tag, makeArrayRef(TagNames));
+ SW->printNumber("Size", Size);
+ }
+
+ if (Size > Length) {
+ errs() << "subsection length greater than section length\n";
+ return;
+ }
+
+ StringRef ScopeName, IndexName;
+ SmallVector<uint8_t, 8> Indicies;
+ switch (Tag) {
+ case ARMBuildAttrs::File:
+ ScopeName = "FileAttributes";
+ break;
+ case ARMBuildAttrs::Section:
+ ScopeName = "SectionAttributes";
+ IndexName = "Sections";
+ ParseIndexList(Data, Offset, Indicies);
+ break;
+ case ARMBuildAttrs::Symbol:
+ ScopeName = "SymbolAttributes";
+ IndexName = "Symbols";
+ ParseIndexList(Data, Offset, Indicies);
+ break;
+ default:
+ errs() << "unrecognised tag: 0x" << utohexstr(Tag) << '\n';
+ return;
+ }
+
+ if (SW) {
+ DictScope ASS(*SW, ScopeName);
+ if (!Indicies.empty())
+ SW->printList(IndexName, Indicies);
+ ParseAttributeList(Data, Offset, Length);
+ } else {
+ ParseAttributeList(Data, Offset, Length);
+ }
+ }
+}
+
+void ARMAttributeParser::Parse(ArrayRef<uint8_t> Section, bool isLittle) {
+ size_t Offset = 1;
+ unsigned SectionNumber = 0;
+
+ while (Offset < Section.size()) {
+ uint32_t SectionLength = isLittle ?
+ support::endian::read32le(Section.data() + Offset) :
+ support::endian::read32be(Section.data() + Offset);
+
+ if (SW) {
+ SW->startLine() << "Section " << ++SectionNumber << " {\n";
+ SW->indent();
+ }
+
+ ParseSubsection(Section.data() + Offset, SectionLength);
+ Offset = Offset + SectionLength;
+
+ if (SW) {
+ SW->unindent();
+ SW->startLine() << "}\n";
+ }
+ }
+}
+}
+
diff --git a/lib/Support/BinaryStreamError.cpp b/lib/Support/BinaryStreamError.cpp
new file mode 100644
index 000000000000..60f5e21f041a
--- /dev/null
+++ b/lib/Support/BinaryStreamError.cpp
@@ -0,0 +1,56 @@
+//===- BinaryStreamError.cpp - Error extensions for streams -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BinaryStreamError.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+char BinaryStreamError::ID = 0;
+
+BinaryStreamError::BinaryStreamError(stream_error_code C)
+ : BinaryStreamError(C, "") {}
+
+BinaryStreamError::BinaryStreamError(StringRef Context)
+ : BinaryStreamError(stream_error_code::unspecified, Context) {}
+
+BinaryStreamError::BinaryStreamError(stream_error_code C, StringRef Context)
+ : Code(C) {
+ ErrMsg = "Stream Error: ";
+ switch (C) {
+ case stream_error_code::unspecified:
+ ErrMsg += "An unspecified error has occurred.";
+ break;
+ case stream_error_code::stream_too_short:
+ ErrMsg += "The stream is too short to perform the requested operation.";
+ break;
+ case stream_error_code::invalid_array_size:
+ ErrMsg += "The buffer size is not a multiple of the array element size.";
+ break;
+ case stream_error_code::invalid_offset:
+ ErrMsg += "The specified offset is invalid for the current stream.";
+ break;
+ case stream_error_code::filesystem_error:
+ ErrMsg += "An I/O error occurred on the file system.";
+ break;
+ }
+
+ if (!Context.empty()) {
+ ErrMsg += " ";
+ ErrMsg += Context;
+ }
+}
+
+void BinaryStreamError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
+
+StringRef BinaryStreamError::getErrorMessage() const { return ErrMsg; }
+
+std::error_code BinaryStreamError::convertToErrorCode() const {
+ return inconvertibleErrorCode();
+}
diff --git a/lib/Support/BinaryStreamReader.cpp b/lib/Support/BinaryStreamReader.cpp
new file mode 100644
index 000000000000..c7a2e0ddb179
--- /dev/null
+++ b/lib/Support/BinaryStreamReader.cpp
@@ -0,0 +1,95 @@
+//===- BinaryStreamReader.cpp - Reads objects from a binary stream --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BinaryStreamReader.h"
+
+#include "llvm/Support/BinaryStreamError.h"
+#include "llvm/Support/BinaryStreamRef.h"
+
+using namespace llvm;
+
+BinaryStreamReader::BinaryStreamReader(BinaryStreamRef S)
+ : Stream(S), Offset(0) {}
+
+Error BinaryStreamReader::readLongestContiguousChunk(
+ ArrayRef<uint8_t> &Buffer) {
+ if (auto EC = Stream.readLongestContiguousChunk(Offset, Buffer))
+ return EC;
+ Offset += Buffer.size();
+ return Error::success();
+}
+
+Error BinaryStreamReader::readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size) {
+ if (auto EC = Stream.readBytes(Offset, Size, Buffer))
+ return EC;
+ Offset += Size;
+ return Error::success();
+}
+
+Error BinaryStreamReader::readCString(StringRef &Dest) {
+ // TODO: This could be made more efficient by using readLongestContiguousChunk
+ // and searching for null terminators in the resulting buffer.
+
+ uint32_t Length = 0;
+ // First compute the length of the string by reading 1 byte at a time.
+ uint32_t OriginalOffset = getOffset();
+ const char *C;
+ while (true) {
+ if (auto EC = readObject(C))
+ return EC;
+ if (*C == '\0')
+ break;
+ ++Length;
+ }
+ // Now go back and request a reference for that many bytes.
+ uint32_t NewOffset = getOffset();
+ setOffset(OriginalOffset);
+
+ if (auto EC = readFixedString(Dest, Length))
+ return EC;
+
+ // Now set the offset back to where it was after we calculated the length.
+ setOffset(NewOffset);
+ return Error::success();
+}
+
+Error BinaryStreamReader::readFixedString(StringRef &Dest, uint32_t Length) {
+ ArrayRef<uint8_t> Bytes;
+ if (auto EC = readBytes(Bytes, Length))
+ return EC;
+ Dest = StringRef(reinterpret_cast<const char *>(Bytes.begin()), Bytes.size());
+ return Error::success();
+}
+
+Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref) {
+ return readStreamRef(Ref, bytesRemaining());
+}
+
+Error BinaryStreamReader::readStreamRef(BinaryStreamRef &Ref, uint32_t Length) {
+ if (bytesRemaining() < Length)
+ return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
+ Ref = Stream.slice(Offset, Length);
+ Offset += Length;
+ return Error::success();
+}
+
+Error BinaryStreamReader::skip(uint32_t Amount) {
+ if (Amount > bytesRemaining())
+ return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
+ Offset += Amount;
+ return Error::success();
+}
+
+uint8_t BinaryStreamReader::peek() const {
+ ArrayRef<uint8_t> Buffer;
+ auto EC = Stream.readBytes(Offset, 1, Buffer);
+ assert(!EC && "Cannot peek an empty buffer!");
+ llvm::consumeError(std::move(EC));
+ return Buffer[0];
+}
diff --git a/lib/Support/BinaryStreamWriter.cpp b/lib/Support/BinaryStreamWriter.cpp
new file mode 100644
index 000000000000..d60b75642d0f
--- /dev/null
+++ b/lib/Support/BinaryStreamWriter.cpp
@@ -0,0 +1,68 @@
+//===- BinaryStreamWriter.cpp - Writes objects to a BinaryStream ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BinaryStreamWriter.h"
+
+#include "llvm/Support/BinaryStreamError.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamRef.h"
+
+using namespace llvm;
+
+BinaryStreamWriter::BinaryStreamWriter(WritableBinaryStreamRef S)
+ : Stream(S), Offset(0) {}
+
+Error BinaryStreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
+ if (auto EC = Stream.writeBytes(Offset, Buffer))
+ return EC;
+ Offset += Buffer.size();
+ return Error::success();
+}
+
+Error BinaryStreamWriter::writeCString(StringRef Str) {
+ if (auto EC = writeFixedString(Str))
+ return EC;
+ if (auto EC = writeObject('\0'))
+ return EC;
+
+ return Error::success();
+}
+
+Error BinaryStreamWriter::writeFixedString(StringRef Str) {
+ return writeBytes(ArrayRef<uint8_t>(Str.bytes_begin(), Str.bytes_end()));
+}
+
+Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref) {
+ return writeStreamRef(Ref, Ref.getLength());
+}
+
+Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) {
+ BinaryStreamReader SrcReader(Ref.slice(0, Length));
+ // This is a bit tricky. If we just call readBytes, we are requiring that it
+ // return us the entire stream as a contiguous buffer. There is no guarantee
+ // this can be satisfied by returning a reference straight from the buffer, as
+ // an implementation may not store all data in a single contiguous buffer. So
+ // we iterate over each contiguous chunk, writing each one in succession.
+ while (SrcReader.bytesRemaining() > 0) {
+ ArrayRef<uint8_t> Chunk;
+ if (auto EC = SrcReader.readLongestContiguousChunk(Chunk))
+ return EC;
+ if (auto EC = writeBytes(Chunk))
+ return EC;
+ }
+ return Error::success();
+}
+
+Error BinaryStreamWriter::padToAlignment(uint32_t Align) {
+ uint32_t NewOffset = alignTo(Offset, Align);
+ if (NewOffset > getLength())
+ return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
+ Offset = NewOffset;
+ return Error::success();
+}
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index 1c41659cf8df..44ad110d456a 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -32,7 +32,9 @@ raw_ostream &BranchProbability::print(raw_ostream &OS) const {
Percent);
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void BranchProbability::dump() const { print(dbgs()) << '\n'; }
+#endif
BranchProbability::BranchProbability(uint32_t Numerator, uint32_t Denominator) {
assert(Denominator > 0 && "Denominator cannot be 0!");
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 15418ad2fd06..491614b4bf63 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -9,6 +9,9 @@ elseif( CMAKE_HOST_UNIX )
if( HAVE_LIBDL )
set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
endif()
+ if( HAVE_BACKTRACE )
+ set(system_libs ${system_libs} ${Backtrace_LIBRARIES})
+ endif()
if(LLVM_ENABLE_TERMINFO)
if(HAVE_TERMINFO)
set(system_libs ${system_libs} ${TERMINFO_LIBS})
@@ -17,7 +20,7 @@ elseif( CMAKE_HOST_UNIX )
if( LLVM_ENABLE_THREADS AND HAVE_LIBATOMIC )
set(system_libs ${system_libs} atomic)
endif()
- set(system_libs ${system_libs} ${PTHREAD_LIB})
+ set(system_libs ${system_libs} ${LLVM_PTHREAD_LIB})
if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
set(system_libs ${system_libs} z)
endif()
@@ -31,8 +34,12 @@ add_llvm_library(LLVMSupport
APInt.cpp
APSInt.cpp
ARMBuildAttrs.cpp
+ ARMAttributeParser.cpp
ARMWinEH.cpp
Allocator.cpp
+ BinaryStreamError.cpp
+ BinaryStreamReader.cpp
+ BinaryStreamWriter.cpp
BlockFrequency.cpp
BranchProbability.cpp
CachePruning.cpp
@@ -46,6 +53,7 @@ add_llvm_library(LLVMSupport
CrashRecoveryContext.cpp
DataExtractor.cpp
Debug.cpp
+ DebugCounter.cpp
DeltaAlgorithm.cpp
DAGDeltaAlgorithm.cpp
Dwarf.cpp
@@ -66,6 +74,7 @@ add_llvm_library(LLVMSupport
LineIterator.cpp
Locale.cpp
LockFileManager.cpp
+ LowLevelType.cpp
ManagedStatic.cpp
MathExtras.cpp
MemoryBuffer.cpp
@@ -134,7 +143,7 @@ add_llvm_library(LLVMSupport
Windows
${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT
${LLVM_MAIN_INCLUDE_DIR}/llvm/Support
-
+ ${Backtrace_INCLUDE_DIRS}
LINK_LIBS ${system_libs}
)
diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp
index 3831625962ca..aca123639565 100644
--- a/lib/Support/CachePruning.cpp
+++ b/lib/Support/CachePruning.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,8 +34,75 @@ static void writeTimestampFile(StringRef TimestampFile) {
raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::F_None);
}
+static Expected<std::chrono::seconds> parseDuration(StringRef Duration) {
+ if (Duration.empty())
+ return make_error<StringError>("Duration must not be empty",
+ inconvertibleErrorCode());
+
+ StringRef NumStr = Duration.slice(0, Duration.size()-1);
+ uint64_t Num;
+ if (NumStr.getAsInteger(0, Num))
+ return make_error<StringError>("'" + NumStr + "' not an integer",
+ inconvertibleErrorCode());
+
+ switch (Duration.back()) {
+ case 's':
+ return std::chrono::seconds(Num);
+ case 'm':
+ return std::chrono::minutes(Num);
+ case 'h':
+ return std::chrono::hours(Num);
+ default:
+ return make_error<StringError>("'" + Duration +
+ "' must end with one of 's', 'm' or 'h'",
+ inconvertibleErrorCode());
+ }
+}
+
+Expected<CachePruningPolicy>
+llvm::parseCachePruningPolicy(StringRef PolicyStr) {
+ CachePruningPolicy Policy;
+ std::pair<StringRef, StringRef> P = {"", PolicyStr};
+ while (!P.second.empty()) {
+ P = P.second.split(':');
+
+ StringRef Key, Value;
+ std::tie(Key, Value) = P.first.split('=');
+ if (Key == "prune_interval") {
+ auto DurationOrErr = parseDuration(Value);
+ if (!DurationOrErr)
+ return DurationOrErr.takeError();
+ Policy.Interval = *DurationOrErr;
+ } else if (Key == "prune_after") {
+ auto DurationOrErr = parseDuration(Value);
+ if (!DurationOrErr)
+ return DurationOrErr.takeError();
+ Policy.Expiration = *DurationOrErr;
+ } else if (Key == "cache_size") {
+ if (Value.back() != '%')
+ return make_error<StringError>("'" + Value + "' must be a percentage",
+ inconvertibleErrorCode());
+ StringRef SizeStr = Value.slice(0, Value.size() - 1);
+ uint64_t Size;
+ if (SizeStr.getAsInteger(0, Size))
+ return make_error<StringError>("'" + SizeStr + "' not an integer",
+ inconvertibleErrorCode());
+ if (Size > 100)
+ return make_error<StringError>("'" + SizeStr +
+ "' must be between 0 and 100",
+ inconvertibleErrorCode());
+ Policy.PercentageOfAvailableSpace = Size;
+ } else {
+ return make_error<StringError>("Unknown key: '" + Key + "'",
+ inconvertibleErrorCode());
+ }
+ }
+
+ return Policy;
+}
+
/// Prune the cache of files that haven't been accessed in a long time.
-bool CachePruning::prune() {
+bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
using namespace std::chrono;
if (Path.empty())
@@ -47,7 +115,11 @@ bool CachePruning::prune() {
if (!isPathDir)
return false;
- if (Expiration == seconds(0) && PercentageOfAvailableSpace == 0) {
+ Policy.PercentageOfAvailableSpace =
+ std::min(Policy.PercentageOfAvailableSpace, 100u);
+
+ if (Policy.Expiration == seconds(0) &&
+ Policy.PercentageOfAvailableSpace == 0) {
DEBUG(dbgs() << "No pruning settings set, exit early\n");
// Nothing will be pruned, early exit
return false;
@@ -67,12 +139,12 @@ bool CachePruning::prune() {
return false;
}
} else {
- if (Interval == seconds(0)) {
+ if (Policy.Interval == seconds(0)) {
// Check whether the time stamp is older than our pruning interval.
// If not, do nothing.
const auto TimeStampModTime = FileStatus.getLastModificationTime();
auto TimeStampAge = CurrentTime - TimeStampModTime;
- if (TimeStampAge <= Interval) {
+ if (TimeStampAge <= Policy.Interval) {
DEBUG(dbgs() << "Timestamp file too recent ("
<< duration_cast<seconds>(TimeStampAge).count()
<< "s old), do not prune.\n");
@@ -85,7 +157,7 @@ bool CachePruning::prune() {
writeTimestampFile(TimestampFile);
}
- bool ShouldComputeSize = (PercentageOfAvailableSpace > 0);
+ bool ShouldComputeSize = (Policy.PercentageOfAvailableSpace > 0);
// Keep track of space
std::set<std::pair<uint64_t, std::string>> FileSizes;
@@ -108,8 +180,11 @@ bool CachePruning::prune() {
// Walk all of the files within this directory.
for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
File != FileEnd && !EC; File.increment(EC)) {
- // Do not touch the timestamp.
- if (File->path() == TimestampFile)
+ // Ignore any files not beginning with the string "llvmcache-". This
+ // includes the timestamp file as well as any files created by the user.
+ // This acts as a safeguard against data loss if the user specifies the
+ // wrong directory as their cache directory.
+ if (!sys::path::filename(File->path()).startswith("llvmcache-"))
continue;
// Look at this file. If we can't stat it, there's nothing interesting
@@ -122,7 +197,7 @@ bool CachePruning::prune() {
// If the file hasn't been used recently enough, delete it
const auto FileAccessTime = FileStatus.getLastAccessedTime();
auto FileAge = CurrentTime - FileAccessTime;
- if (FileAge > Expiration) {
+ if (FileAge > Policy.Expiration) {
DEBUG(dbgs() << "Remove " << File->path() << " ("
<< duration_cast<seconds>(FileAge).count() << "s old)\n");
sys::fs::remove(File->path());
@@ -143,9 +218,11 @@ bool CachePruning::prune() {
auto AvailableSpace = TotalSize + SpaceInfo.free;
auto FileAndSize = FileSizes.rbegin();
DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace)
- << "% target is: " << PercentageOfAvailableSpace << "\n");
+ << "% target is: " << Policy.PercentageOfAvailableSpace
+ << "\n");
// Remove the oldest accessed files first, till we get below the threshold
- while (((100 * TotalSize) / AvailableSpace) > PercentageOfAvailableSpace &&
+ while (((100 * TotalSize) / AvailableSpace) >
+ Policy.PercentageOfAvailableSpace &&
FileAndSize != FileSizes.rend()) {
// Remove the file.
sys::fs::remove(FileAndSize->second);
diff --git a/lib/Support/Chrono.cpp b/lib/Support/Chrono.cpp
index cdadbd879979..daccaf1fc103 100644
--- a/lib/Support/Chrono.cpp
+++ b/lib/Support/Chrono.cpp
@@ -16,6 +16,13 @@ namespace llvm {
using namespace sys;
+const char llvm::detail::unit<std::ratio<3600>>::value[] = "h";
+const char llvm::detail::unit<std::ratio<60>>::value[] = "m";
+const char llvm::detail::unit<std::ratio<1>>::value[] = "s";
+const char llvm::detail::unit<std::milli>::value[] = "ms";
+const char llvm::detail::unit<std::micro>::value[] = "us";
+const char llvm::detail::unit<std::nano>::value[] = "ns";
+
static inline struct tm getStructTM(TimePoint<> TP) {
struct tm Storage;
std::time_t OurTime = toTimeT(TP);
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 3889902eea54..f4a9108b8544 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -123,7 +123,7 @@ public:
void ResetAllOptionOccurrences();
bool ParseCommandLineOptions(int argc, const char *const *argv,
- StringRef Overview, bool IgnoreErrors);
+ StringRef Overview, raw_ostream *Errs = nullptr);
void addLiteralOption(Option &Opt, SubCommand *SC, StringRef Name) {
if (Opt.hasArgStr())
@@ -1013,9 +1013,9 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
}
bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
- StringRef Overview, bool IgnoreErrors) {
+ StringRef Overview, raw_ostream *Errs) {
return GlobalParser->ParseCommandLineOptions(argc, argv, Overview,
- IgnoreErrors);
+ Errs);
}
void CommandLineParser::ResetAllOptionOccurrences() {
@@ -1030,7 +1030,7 @@ void CommandLineParser::ResetAllOptionOccurrences() {
bool CommandLineParser::ParseCommandLineOptions(int argc,
const char *const *argv,
StringRef Overview,
- bool IgnoreErrors) {
+ raw_ostream *Errs) {
assert(hasOptions() && "No options specified!");
// Expand response files.
@@ -1045,6 +1045,9 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
ProgramName = sys::path::filename(StringRef(argv[0]));
ProgramOverview = Overview;
+ bool IgnoreErrors = Errs;
+ if (!Errs)
+ Errs = &errs();
bool ErrorParsing = false;
// Check out the positional arguments to collect information about them.
@@ -1097,15 +1100,14 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
// not specified after an option that eats all extra arguments, or this
// one will never get any!
//
- if (!IgnoreErrors) {
+ if (!IgnoreErrors)
Opt->error("error - option can never match, because "
"another positional argument will match an "
"unbounded number of values, and this option"
" does not require a value!");
- errs() << ProgramName << ": CommandLine Error: Option '"
- << Opt->ArgStr << "' is all messed up!\n";
- errs() << PositionalOpts.size();
- }
+ *Errs << ProgramName << ": CommandLine Error: Option '" << Opt->ArgStr
+ << "' is all messed up!\n";
+ *Errs << PositionalOpts.size();
ErrorParsing = true;
}
UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
@@ -1200,15 +1202,13 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
if (!Handler) {
if (SinkOpts.empty()) {
- if (!IgnoreErrors) {
- errs() << ProgramName << ": Unknown command line argument '"
- << argv[i] << "'. Try: '" << argv[0] << " -help'\n";
-
- if (NearestHandler) {
- // If we know a near match, report it as well.
- errs() << ProgramName << ": Did you mean '-" << NearestHandlerString
- << "'?\n";
- }
+ *Errs << ProgramName << ": Unknown command line argument '" << argv[i]
+ << "'. Try: '" << argv[0] << " -help'\n";
+
+ if (NearestHandler) {
+ // If we know a near match, report it as well.
+ *Errs << ProgramName << ": Did you mean '-" << NearestHandlerString
+ << "'?\n";
}
ErrorParsing = true;
@@ -1231,22 +1231,18 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
// Check and handle positional arguments now...
if (NumPositionalRequired > PositionalVals.size()) {
- if (!IgnoreErrors) {
- errs() << ProgramName
+ *Errs << ProgramName
<< ": Not enough positional command line arguments specified!\n"
<< "Must specify at least " << NumPositionalRequired
<< " positional argument" << (NumPositionalRequired > 1 ? "s" : "")
<< ": See: " << argv[0] << " - help\n";
- }
ErrorParsing = true;
} else if (!HasUnlimitedPositionals &&
PositionalVals.size() > PositionalOpts.size()) {
- if (!IgnoreErrors) {
- errs() << ProgramName << ": Too many positional arguments specified!\n"
- << "Can specify at most " << PositionalOpts.size()
- << " positional arguments: See: " << argv[0] << " -help\n";
- }
+ *Errs << ProgramName << ": Too many positional arguments specified!\n"
+ << "Can specify at most " << PositionalOpts.size()
+ << " positional arguments: See: " << argv[0] << " -help\n";
ErrorParsing = true;
} else if (!ConsumeAfterOpt) {
@@ -1404,8 +1400,8 @@ static StringRef getValueStr(const Option &O, StringRef DefaultMsg) {
// Return the width of the option tag for printing...
size_t alias::getOptionWidth() const { return ArgStr.size() + 6; }
-static void printHelpStr(StringRef HelpStr, size_t Indent,
- size_t FirstLineIndentedBy) {
+void Option::printHelpStr(StringRef HelpStr, size_t Indent,
+ size_t FirstLineIndentedBy) {
std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
outs().indent(Indent - FirstLineIndentedBy) << " - " << Split.first << "\n";
while (!Split.second.empty()) {
@@ -1448,7 +1444,7 @@ void basic_parser_impl::printOptionInfo(const Option &O,
if (!ValName.empty())
outs() << "=<" << getValueStr(O, ValName) << '>';
- printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O));
+ Option::printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O));
}
void basic_parser_impl::printOptionName(const Option &O,
@@ -1587,7 +1583,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
size_t GlobalWidth) const {
if (O.hasArgStr()) {
outs() << " -" << O.ArgStr;
- printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6);
+ Option::printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6);
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
size_t NumSpaces = GlobalWidth - getOption(i).size() - 8;
@@ -1600,7 +1596,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
auto Option = getOption(i);
outs() << " -" << Option;
- printHelpStr(getDescription(i), GlobalWidth, Option.size() + 8);
+ Option::printHelpStr(getDescription(i), GlobalWidth, Option.size() + 8);
}
}
}
@@ -1856,10 +1852,11 @@ public:
// Helper function for printOptions().
// It shall return a negative value if A's name should be lexicographically
- // ordered before B's name. It returns a value greater equal zero otherwise.
+ // ordered before B's name. It returns a value greater than zero if B's name
+ // should be ordered before A's name, and it returns 0 otherwise.
static int OptionCategoryCompare(OptionCategory *const *A,
OptionCategory *const *B) {
- return (*A)->getName() == (*B)->getName();
+ return (*A)->getName().compare((*B)->getName());
}
// Make sure we inherit our base class's operator=()
@@ -2182,5 +2179,6 @@ void cl::ResetAllOptionOccurrences() {
void LLVMParseCommandLineOptions(int argc, const char *const *argv,
const char *Overview) {
- llvm::cl::ParseCommandLineOptions(argc, argv, StringRef(Overview), true);
+ llvm::cl::ParseCommandLineOptions(argc, argv, StringRef(Overview),
+ &llvm::nulls());
}
diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp
index 5d556462e89c..c279d10f6c61 100644
--- a/lib/Support/Compression.cpp
+++ b/lib/Support/Compression.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
#include <zlib.h>
@@ -24,6 +25,10 @@
using namespace llvm;
#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+static Error createError(StringRef Err) {
+ return make_error<StringError>(Err, inconvertibleErrorCode());
+}
+
static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) {
switch (Level) {
case zlib::NoCompression: return 0;
@@ -34,53 +39,59 @@ static int encodeZlibCompressionLevel(zlib::CompressionLevel Level) {
llvm_unreachable("Invalid zlib::CompressionLevel!");
}
-static zlib::Status encodeZlibReturnValue(int ReturnValue) {
- switch (ReturnValue) {
- case Z_OK: return zlib::StatusOK;
- case Z_MEM_ERROR: return zlib::StatusOutOfMemory;
- case Z_BUF_ERROR: return zlib::StatusBufferTooShort;
- case Z_STREAM_ERROR: return zlib::StatusInvalidArg;
- case Z_DATA_ERROR: return zlib::StatusInvalidData;
- default: llvm_unreachable("unknown zlib return status!");
+static StringRef convertZlibCodeToString(int Code) {
+ switch (Code) {
+ case Z_MEM_ERROR:
+ return "zlib error: Z_MEM_ERROR";
+ case Z_BUF_ERROR:
+ return "zlib error: Z_BUF_ERROR";
+ case Z_STREAM_ERROR:
+ return "zlib error: Z_STREAM_ERROR";
+ case Z_DATA_ERROR:
+ return "zlib error: Z_DATA_ERROR";
+ case Z_OK:
+ default:
+ llvm_unreachable("unknown or unexpected zlib status code");
}
}
bool zlib::isAvailable() { return true; }
-zlib::Status zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer,
- CompressionLevel Level) {
+
+Error zlib::compress(StringRef InputBuffer,
+ SmallVectorImpl<char> &CompressedBuffer,
+ CompressionLevel Level) {
unsigned long CompressedSize = ::compressBound(InputBuffer.size());
CompressedBuffer.resize(CompressedSize);
int CLevel = encodeZlibCompressionLevel(Level);
- Status Res = encodeZlibReturnValue(::compress2(
- (Bytef *)CompressedBuffer.data(), &CompressedSize,
- (const Bytef *)InputBuffer.data(), InputBuffer.size(), CLevel));
+ int Res = ::compress2((Bytef *)CompressedBuffer.data(), &CompressedSize,
+ (const Bytef *)InputBuffer.data(), InputBuffer.size(),
+ CLevel);
// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(CompressedBuffer.data(), CompressedSize);
CompressedBuffer.resize(CompressedSize);
- return Res;
+ return Res ? createError(convertZlibCodeToString(Res)) : Error::success();
}
-zlib::Status zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
- size_t &UncompressedSize) {
- Status Res = encodeZlibReturnValue(
+Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
+ size_t &UncompressedSize) {
+ int Res =
::uncompress((Bytef *)UncompressedBuffer, (uLongf *)&UncompressedSize,
- (const Bytef *)InputBuffer.data(), InputBuffer.size()));
+ (const Bytef *)InputBuffer.data(), InputBuffer.size());
// Tell MemorySanitizer that zlib output buffer is fully initialized.
// This avoids a false report when running LLVM with uninstrumented ZLib.
__msan_unpoison(UncompressedBuffer, UncompressedSize);
- return Res;
+ return Res ? createError(convertZlibCodeToString(Res)) : Error::success();
}
-zlib::Status zlib::uncompress(StringRef InputBuffer,
- SmallVectorImpl<char> &UncompressedBuffer,
- size_t UncompressedSize) {
+Error zlib::uncompress(StringRef InputBuffer,
+ SmallVectorImpl<char> &UncompressedBuffer,
+ size_t UncompressedSize) {
UncompressedBuffer.resize(UncompressedSize);
- Status Res =
+ Error E =
uncompress(InputBuffer, UncompressedBuffer.data(), UncompressedSize);
UncompressedBuffer.resize(UncompressedSize);
- return Res;
+ return E;
}
uint32_t zlib::crc32(StringRef Buffer) {
@@ -89,19 +100,19 @@ uint32_t zlib::crc32(StringRef Buffer) {
#else
bool zlib::isAvailable() { return false; }
-zlib::Status zlib::compress(StringRef InputBuffer,
- SmallVectorImpl<char> &CompressedBuffer,
- CompressionLevel Level) {
- return zlib::StatusUnsupported;
+Error zlib::compress(StringRef InputBuffer,
+ SmallVectorImpl<char> &CompressedBuffer,
+ CompressionLevel Level) {
+ llvm_unreachable("zlib::compress is unavailable");
}
-zlib::Status zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
- size_t &UncompressedSize) {
- return zlib::StatusUnsupported;
+Error zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
+ size_t &UncompressedSize) {
+ llvm_unreachable("zlib::uncompress is unavailable");
}
-zlib::Status zlib::uncompress(StringRef InputBuffer,
- SmallVectorImpl<char> &UncompressedBuffer,
- size_t UncompressedSize) {
- return zlib::StatusUnsupported;
+Error zlib::uncompress(StringRef InputBuffer,
+ SmallVectorImpl<char> &UncompressedBuffer,
+ size_t UncompressedSize) {
+ llvm_unreachable("zlib::uncompress is unavailable");
}
uint32_t zlib::crc32(StringRef Buffer) {
llvm_unreachable("zlib::crc32 is unavailable");
diff --git a/lib/Support/DebugCounter.cpp b/lib/Support/DebugCounter.cpp
new file mode 100644
index 000000000000..29dae8a20f00
--- /dev/null
+++ b/lib/Support/DebugCounter.cpp
@@ -0,0 +1,108 @@
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Options.h"
+
+using namespace llvm;
+
+// This class overrides the default list implementation of printing so we
+// can pretty print the list of debug counter options. This type of
+// dynamic option is pretty rare (basically this and pass lists).
+class DebugCounterList : public cl::list<std::string, DebugCounter> {
+private:
+ using Base = cl::list<std::string, DebugCounter>;
+
+public:
+ template <class... Mods>
+ explicit DebugCounterList(Mods &&... Ms) : Base(std::forward<Mods>(Ms)...) {}
+
+private:
+ void printOptionInfo(size_t GlobalWidth) const override {
+ // This is a variant of from generic_parser_base::printOptionInfo. Sadly,
+ // it's not easy to make it more usable. We could get it to print these as
+ // options if we were a cl::opt and registered them, but lists don't have
+ // options, nor does the parser for std::string. The other mechanisms for
+ // options are global and would pollute the global namespace with our
+ // counters. Rather than go that route, we have just overridden the
+ // printing, which only a few things call anyway.
+ outs() << " -" << ArgStr;
+ // All of the other options in CommandLine.cpp use ArgStr.size() + 6 for
+ // width, so we do the same.
+ Option::printHelpStr(HelpStr, GlobalWidth, ArgStr.size() + 6);
+ const auto &CounterInstance = DebugCounter::instance();
+ for (auto Name : CounterInstance) {
+ const auto Info =
+ CounterInstance.getCounterInfo(CounterInstance.getCounterId(Name));
+ size_t NumSpaces = GlobalWidth - Info.first.size() - 8;
+ outs() << " =" << Info.first;
+ outs().indent(NumSpaces) << " - " << Info.second << '\n';
+ }
+ }
+};
+
+// Create our command line option.
+static DebugCounterList DebugCounterOption(
+ "debug-counter",
+ cl::desc("Comma separated list of debug counter skip and count"),
+ cl::CommaSeparated, cl::ZeroOrMore, cl::location(DebugCounter::instance()));
+
+static ManagedStatic<DebugCounter> DC;
+
+DebugCounter &DebugCounter::instance() { return *DC; }
+
+// This is called by the command line parser when it sees a value for the
+// debug-counter option defined above.
+void DebugCounter::push_back(const std::string &Val) {
+ if (Val.empty())
+ return;
+ // The strings should come in as counter=value
+ auto CounterPair = StringRef(Val).split('=');
+ if (CounterPair.second.empty()) {
+ errs() << "DebugCounter Error: " << Val << " does not have an = in it\n";
+ return;
+ }
+ // Now we have counter=value.
+ // First, process value.
+ long CounterVal;
+ if (CounterPair.second.getAsInteger(0, CounterVal)) {
+ errs() << "DebugCounter Error: " << CounterPair.second
+ << " is not a number\n";
+ return;
+ }
+ // Now we need to see if this is the skip or the count, remove the suffix, and
+ // add it to the counter values.
+ if (CounterPair.first.endswith("-skip")) {
+ auto CounterName = CounterPair.first.drop_back(5);
+ unsigned CounterID = RegisteredCounters.idFor(CounterName);
+ if (!CounterID) {
+ errs() << "DebugCounter Error: " << CounterName
+ << " is not a registered counter\n";
+ return;
+ }
+
+ auto Res = Counters.insert({CounterID, {0, -1}});
+ Res.first->second.first = CounterVal;
+ } else if (CounterPair.first.endswith("-count")) {
+ auto CounterName = CounterPair.first.drop_back(6);
+ unsigned CounterID = RegisteredCounters.idFor(CounterName);
+ if (!CounterID) {
+ errs() << "DebugCounter Error: " << CounterName
+ << " is not a registered counter\n";
+ return;
+ }
+
+ auto Res = Counters.insert({CounterID, {0, -1}});
+ Res.first->second.second = CounterVal;
+ } else {
+ errs() << "DebugCounter Error: " << CounterPair.first
+ << " does not end with -skip or -count\n";
+ }
+}
+
+void DebugCounter::print(raw_ostream &OS) {
+ OS << "Counters and values:\n";
+ for (const auto &KV : Counters)
+ OS << left_justify(RegisteredCounters[KV.first], 32) << ": {"
+ << KV.second.first << "," << KV.second.second << "}\n";
+}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 8950e8c919a4..f13da62e4a87 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -304,6 +304,17 @@ StringRef llvm::dwarf::ApplePropertyString(unsigned Prop) {
}
}
+StringRef llvm::dwarf::UnitTypeString(unsigned UT) {
+ switch (UT) {
+ default:
+ return StringRef();
+#define HANDLE_DW_UT(ID, NAME) \
+ case DW_UT_##NAME: \
+ return "DW_UT_" #NAME;
+#include "llvm/Support/Dwarf.def"
+ }
+}
+
StringRef llvm::dwarf::AtomTypeString(unsigned AT) {
switch (AT) {
case dwarf::DW_ATOM_null:
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index ced21e46afe8..92ce6185306a 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -9,8 +9,6 @@
//
// This file implements the operating system DynamicLibrary concept.
//
-// FIXME: This file leaks ExplicitSymbols and OpenedHandles!
-//
//===----------------------------------------------------------------------===//
#include "llvm/Support/DynamicLibrary.h"
@@ -51,7 +49,7 @@ using namespace llvm::sys;
//=== independent code.
//===----------------------------------------------------------------------===//
-static DenseSet<void *> *OpenedHandles = nullptr;
+static llvm::ManagedStatic<DenseSet<void *> > OpenedHandles;
DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
std::string *errMsg) {
@@ -70,9 +68,6 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
handle = RTLD_DEFAULT;
#endif
- if (!OpenedHandles)
- OpenedHandles = new DenseSet<void *>();
-
// If we've already loaded this library, dlclose() the handle in order to
// keep the internal refcount at +1.
if (!OpenedHandles->insert(handle).second)
@@ -81,6 +76,18 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
return DynamicLibrary(handle);
}
+DynamicLibrary DynamicLibrary::addPermanentLibrary(void *handle,
+ std::string *errMsg) {
+ SmartScopedLock<true> lock(*SymbolsMutex);
+ // If we've already loaded this library, tell the caller.
+ if (!OpenedHandles->insert(handle).second) {
+ if (errMsg) *errMsg = "Library already loaded";
+ return DynamicLibrary();
+ }
+
+ return DynamicLibrary(handle);
+}
+
void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
if (!isValid())
return nullptr;
@@ -121,7 +128,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN)
// Now search the libraries.
- if (OpenedHandles) {
+ if (OpenedHandles.isConstructed()) {
for (DenseSet<void *>::iterator I = OpenedHandles->begin(),
E = OpenedHandles->end(); I != E; ++I) {
//lt_ptr ptr = lt_dlsym(*I, symbolName);
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 57e5a8d7871c..731740d012d9 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -57,6 +57,8 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size, unsigned Flags) {
// FIXME: In posix, you use the access() call to check this.
}
break;
+ case sys::fs::file_type::directory_file:
+ return errc::is_a_directory;
default:
if (EC)
return EC;
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index d1b40412a6fc..970ecfd7df90 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -52,25 +52,218 @@
using namespace llvm;
-#if defined(__linux__)
-static ssize_t LLVM_ATTRIBUTE_UNUSED readCpuInfo(void *Buf, size_t Size) {
- // Note: We cannot mmap /proc/cpuinfo here and then process the resulting
- // memory buffer because the 'file' has 0 size (it can be read from only
- // as a stream).
-
- int FD;
- std::error_code EC = sys::fs::openFileForRead("/proc/cpuinfo", FD);
- if (EC) {
- DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << EC.message() << "\n");
- return -1;
+static std::unique_ptr<llvm::MemoryBuffer>
+ LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() {
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
+ llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
+ if (std::error_code EC = Text.getError()) {
+ llvm::errs() << "Can't read "
+ << "/proc/cpuinfo: " << EC.message() << "\n";
+ return nullptr;
}
- int Ret = read(FD, Buf, Size);
- int CloseStatus = close(FD);
- if (CloseStatus)
- return -1;
- return Ret;
+ return std::move(*Text);
+}
+
+StringRef sys::detail::getHostCPUNameForPowerPC(
+ const StringRef &ProcCpuinfoContent) {
+ // Access to the Processor Version Register (PVR) on PowerPC is privileged,
+ // and so we must use an operating-system interface to determine the current
+ // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
+ const char *generic = "generic";
+
+ // The cpu line is second (after the 'processor: 0' line), so if this
+ // buffer is too small then something has changed (or is wrong).
+ StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin();
+ StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end();
+
+ StringRef::const_iterator CIP = CPUInfoStart;
+
+ StringRef::const_iterator CPUStart = 0;
+ size_t CPULen = 0;
+
+ // We need to find the first line which starts with cpu, spaces, and a colon.
+ // After the colon, there may be some additional spaces and then the cpu type.
+ while (CIP < CPUInfoEnd && CPUStart == 0) {
+ if (CIP < CPUInfoEnd && *CIP == '\n')
+ ++CIP;
+
+ if (CIP < CPUInfoEnd && *CIP == 'c') {
+ ++CIP;
+ if (CIP < CPUInfoEnd && *CIP == 'p') {
+ ++CIP;
+ if (CIP < CPUInfoEnd && *CIP == 'u') {
+ ++CIP;
+ while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
+ ++CIP;
+
+ if (CIP < CPUInfoEnd && *CIP == ':') {
+ ++CIP;
+ while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
+ ++CIP;
+
+ if (CIP < CPUInfoEnd) {
+ CPUStart = CIP;
+ while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
+ *CIP != ',' && *CIP != '\n'))
+ ++CIP;
+ CPULen = CIP - CPUStart;
+ }
+ }
+ }
+ }
+ }
+
+ if (CPUStart == 0)
+ while (CIP < CPUInfoEnd && *CIP != '\n')
+ ++CIP;
+ }
+
+ if (CPUStart == 0)
+ return generic;
+
+ return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
+ .Case("604e", "604e")
+ .Case("604", "604")
+ .Case("7400", "7400")
+ .Case("7410", "7400")
+ .Case("7447", "7400")
+ .Case("7455", "7450")
+ .Case("G4", "g4")
+ .Case("POWER4", "970")
+ .Case("PPC970FX", "970")
+ .Case("PPC970MP", "970")
+ .Case("G5", "g5")
+ .Case("POWER5", "g5")
+ .Case("A2", "a2")
+ .Case("POWER6", "pwr6")
+ .Case("POWER7", "pwr7")
+ .Case("POWER8", "pwr8")
+ .Case("POWER8E", "pwr8")
+ .Case("POWER8NVL", "pwr8")
+ .Case("POWER9", "pwr9")
+ .Default(generic);
+}
+
+StringRef sys::detail::getHostCPUNameForARM(
+ const StringRef &ProcCpuinfoContent) {
+ // The cpuid register on arm is not accessible from user space. On Linux,
+ // it is exposed through the /proc/cpuinfo file.
+
+ // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
+ // in all cases.
+ SmallVector<StringRef, 32> Lines;
+ ProcCpuinfoContent.split(Lines, "\n");
+
+ // Look for the CPU implementer line.
+ StringRef Implementer;
+ StringRef Hardware;
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
+ if (Lines[I].startswith("CPU implementer"))
+ Implementer = Lines[I].substr(15).ltrim("\t :");
+ if (Lines[I].startswith("Hardware"))
+ Hardware = Lines[I].substr(8).ltrim("\t :");
+ }
+
+ if (Implementer == "0x41") { // ARM Ltd.
+ // MSM8992/8994 may give cpu part for the core that the kernel is running on,
+ // which is undeterministic and wrong. Always return cortex-a53 for these SoC.
+ if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996"))
+ return "cortex-a53";
+
+
+ // Look for the CPU part line.
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("CPU part"))
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
+ .Case("0x926", "arm926ej-s")
+ .Case("0xb02", "mpcore")
+ .Case("0xb36", "arm1136j-s")
+ .Case("0xb56", "arm1156t2-s")
+ .Case("0xb76", "arm1176jz-s")
+ .Case("0xc08", "cortex-a8")
+ .Case("0xc09", "cortex-a9")
+ .Case("0xc0f", "cortex-a15")
+ .Case("0xc20", "cortex-m0")
+ .Case("0xc23", "cortex-m3")
+ .Case("0xc24", "cortex-m4")
+ .Case("0xd04", "cortex-a35")
+ .Case("0xd03", "cortex-a53")
+ .Case("0xd07", "cortex-a57")
+ .Case("0xd08", "cortex-a72")
+ .Case("0xd09", "cortex-a73")
+ .Default("generic");
+ }
+
+ if (Implementer == "0x51") // Qualcomm Technologies, Inc.
+ // Look for the CPU part line.
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("CPU part"))
+ // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
+ // values correspond to the "Part number" in the CP15/c0 register. The
+ // contents are specified in the various processor manuals.
+ return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
+ .Case("0x06f", "krait") // APQ8064
+ .Case("0x201", "kryo")
+ .Case("0x205", "kryo")
+ .Default("generic");
+
+ return "generic";
+}
+
+StringRef sys::detail::getHostCPUNameForS390x(
+ const StringRef &ProcCpuinfoContent) {
+ // STIDP is a privileged operation, so use /proc/cpuinfo instead.
+
+ // The "processor 0:" line comes after a fair amount of other information,
+ // including a cache breakdown, but this should be plenty.
+ SmallVector<StringRef, 32> Lines;
+ ProcCpuinfoContent.split(Lines, "\n");
+
+ // Look for the CPU features.
+ SmallVector<StringRef, 32> CPUFeatures;
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I)
+ if (Lines[I].startswith("features")) {
+ size_t Pos = Lines[I].find(":");
+ if (Pos != StringRef::npos) {
+ Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
+ break;
+ }
+ }
+
+ // We need to check for the presence of vector support independently of
+ // the machine type, since we may only use the vector register set when
+ // supported by the kernel (and hypervisor).
+ bool HaveVectorSupport = false;
+ for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
+ if (CPUFeatures[I] == "vx")
+ HaveVectorSupport = true;
+ }
+
+ // Now check the processor machine type.
+ for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
+ if (Lines[I].startswith("processor ")) {
+ size_t Pos = Lines[I].find("machine = ");
+ if (Pos != StringRef::npos) {
+ Pos += sizeof("machine = ") - 1;
+ unsigned int Id;
+ if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
+ if (Id >= 2964 && HaveVectorSupport)
+ return "z13";
+ if (Id >= 2827)
+ return "zEC12";
+ if (Id >= 2817)
+ return "z196";
+ }
+ }
+ break;
+ }
+ }
+
+ return "generic";
}
-#endif
#if defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64__) || defined(_M_X64)
@@ -1020,201 +1213,21 @@ StringRef sys::getHostCPUName() {
}
#elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__))
StringRef sys::getHostCPUName() {
- // Access to the Processor Version Register (PVR) on PowerPC is privileged,
- // and so we must use an operating-system interface to determine the current
- // processor type. On Linux, this is exposed through the /proc/cpuinfo file.
- const char *generic = "generic";
-
- // The cpu line is second (after the 'processor: 0' line), so if this
- // buffer is too small then something has changed (or is wrong).
- char buffer[1024];
- ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
- if (CPUInfoSize == -1)
- return generic;
-
- const char *CPUInfoStart = buffer;
- const char *CPUInfoEnd = buffer + CPUInfoSize;
-
- const char *CIP = CPUInfoStart;
-
- const char *CPUStart = 0;
- size_t CPULen = 0;
-
- // We need to find the first line which starts with cpu, spaces, and a colon.
- // After the colon, there may be some additional spaces and then the cpu type.
- while (CIP < CPUInfoEnd && CPUStart == 0) {
- if (CIP < CPUInfoEnd && *CIP == '\n')
- ++CIP;
-
- if (CIP < CPUInfoEnd && *CIP == 'c') {
- ++CIP;
- if (CIP < CPUInfoEnd && *CIP == 'p') {
- ++CIP;
- if (CIP < CPUInfoEnd && *CIP == 'u') {
- ++CIP;
- while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
- ++CIP;
-
- if (CIP < CPUInfoEnd && *CIP == ':') {
- ++CIP;
- while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t'))
- ++CIP;
-
- if (CIP < CPUInfoEnd) {
- CPUStart = CIP;
- while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' &&
- *CIP != ',' && *CIP != '\n'))
- ++CIP;
- CPULen = CIP - CPUStart;
- }
- }
- }
- }
- }
-
- if (CPUStart == 0)
- while (CIP < CPUInfoEnd && *CIP != '\n')
- ++CIP;
- }
-
- if (CPUStart == 0)
- return generic;
-
- return StringSwitch<const char *>(StringRef(CPUStart, CPULen))
- .Case("604e", "604e")
- .Case("604", "604")
- .Case("7400", "7400")
- .Case("7410", "7400")
- .Case("7447", "7400")
- .Case("7455", "7450")
- .Case("G4", "g4")
- .Case("POWER4", "970")
- .Case("PPC970FX", "970")
- .Case("PPC970MP", "970")
- .Case("G5", "g5")
- .Case("POWER5", "g5")
- .Case("A2", "a2")
- .Case("POWER6", "pwr6")
- .Case("POWER7", "pwr7")
- .Case("POWER8", "pwr8")
- .Case("POWER8E", "pwr8")
- .Case("POWER8NVL", "pwr8")
- .Case("POWER9", "pwr9")
- .Default(generic);
+ std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
+ const StringRef& Content = P ? P->getBuffer() : "";
+ return detail::getHostCPUNameForPowerPC(Content);
}
-#elif defined(__linux__) && defined(__arm__)
+#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
StringRef sys::getHostCPUName() {
- // The cpuid register on arm is not accessible from user space. On Linux,
- // it is exposed through the /proc/cpuinfo file.
-
- // Read 1024 bytes from /proc/cpuinfo, which should contain the CPU part line
- // in all cases.
- char buffer[1024];
- ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
- if (CPUInfoSize == -1)
- return "generic";
-
- StringRef Str(buffer, CPUInfoSize);
-
- SmallVector<StringRef, 32> Lines;
- Str.split(Lines, "\n");
-
- // Look for the CPU implementer line.
- StringRef Implementer;
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU implementer"))
- Implementer = Lines[I].substr(15).ltrim("\t :");
-
- if (Implementer == "0x41") // ARM Ltd.
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x926", "arm926ej-s")
- .Case("0xb02", "mpcore")
- .Case("0xb36", "arm1136j-s")
- .Case("0xb56", "arm1156t2-s")
- .Case("0xb76", "arm1176jz-s")
- .Case("0xc08", "cortex-a8")
- .Case("0xc09", "cortex-a9")
- .Case("0xc0f", "cortex-a15")
- .Case("0xc20", "cortex-m0")
- .Case("0xc23", "cortex-m3")
- .Case("0xc24", "cortex-m4")
- .Default("generic");
-
- if (Implementer == "0x51") // Qualcomm Technologies, Inc.
- // Look for the CPU part line.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("CPU part"))
- // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
- // values correspond to the "Part number" in the CP15/c0 register. The
- // contents are specified in the various processor manuals.
- return StringSwitch<const char *>(Lines[I].substr(8).ltrim("\t :"))
- .Case("0x06f", "krait") // APQ8064
- .Default("generic");
-
- return "generic";
+ std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
+ const StringRef& Content = P ? P->getBuffer() : "";
+ return detail::getHostCPUNameForARM(Content);
}
#elif defined(__linux__) && defined(__s390x__)
StringRef sys::getHostCPUName() {
- // STIDP is a privileged operation, so use /proc/cpuinfo instead.
-
- // The "processor 0:" line comes after a fair amount of other information,
- // including a cache breakdown, but this should be plenty.
- char buffer[2048];
- ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
- if (CPUInfoSize == -1)
- return "generic";
-
- StringRef Str(buffer, CPUInfoSize);
- SmallVector<StringRef, 32> Lines;
- Str.split(Lines, "\n");
-
- // Look for the CPU features.
- SmallVector<StringRef, 32> CPUFeatures;
- for (unsigned I = 0, E = Lines.size(); I != E; ++I)
- if (Lines[I].startswith("features")) {
- size_t Pos = Lines[I].find(":");
- if (Pos != StringRef::npos) {
- Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
- break;
- }
- }
-
- // We need to check for the presence of vector support independently of
- // the machine type, since we may only use the vector register set when
- // supported by the kernel (and hypervisor).
- bool HaveVectorSupport = false;
- for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
- if (CPUFeatures[I] == "vx")
- HaveVectorSupport = true;
- }
-
- // Now check the processor machine type.
- for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
- if (Lines[I].startswith("processor ")) {
- size_t Pos = Lines[I].find("machine = ");
- if (Pos != StringRef::npos) {
- Pos += sizeof("machine = ") - 1;
- unsigned int Id;
- if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
- if (Id >= 2964 && HaveVectorSupport)
- return "z13";
- if (Id >= 2827)
- return "zEC12";
- if (Id >= 2817)
- return "z196";
- }
- }
- break;
- }
- }
-
- return "generic";
+ std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
+ const StringRef& Content = P ? P->getBuffer() : "";
+ return detail::getHostCPUNameForS390x(Content);
}
#else
StringRef sys::getHostCPUName() { return "generic"; }
@@ -1232,6 +1245,7 @@ static int computeHostNumPhysicalCores() {
if (std::error_code EC = Text.getError()) {
llvm::errs() << "Can't read "
<< "/proc/cpuinfo: " << EC.message() << "\n";
+ return -1;
}
SmallVector<StringRef, 8> strs;
(*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
@@ -1353,6 +1367,10 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1);
Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1);
+ bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
+ !getX86CpuIDAndInfoEx(0x80000008,0x0, &EAX, &EBX, &ECX, &EDX);
+ Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1);
+
bool HasLeaf7 =
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
@@ -1362,14 +1380,10 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1);
Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1);
Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1);
- Features["hle"] = HasLeaf7 && ((EBX >> 4) & 1);
Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1);
- Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1);
Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1);
Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1);
Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1);
- Features["smap"] = HasLeaf7 && ((EBX >> 20) & 1);
- Features["pcommit"] = HasLeaf7 && ((EBX >> 22) & 1);
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
@@ -1401,17 +1415,12 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
}
#elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
- // Read 1024 bytes from /proc/cpuinfo, which should contain the Features line
- // in all cases.
- char buffer[1024];
- ssize_t CPUInfoSize = readCpuInfo(buffer, sizeof(buffer));
- if (CPUInfoSize == -1)
+ std::unique_ptr<llvm::MemoryBuffer> P = getProcCpuinfoContent();
+ if (!P)
return false;
- StringRef Str(buffer, CPUInfoSize);
-
SmallVector<StringRef, 32> Lines;
- Str.split(Lines, "\n");
+ P->getBuffer().split(Lines, "\n");
SmallVector<StringRef, 32> CPUFeatures;
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 444aaa37c8c8..8be9879fbc24 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -304,9 +304,9 @@ LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
Interval.tv_sec = 0;
Interval.tv_nsec = 1000000;
#endif
- // Don't wait more than five minutes per iteration. Total timeout for the file
- // to appear is ~8.5 mins.
- const unsigned MaxSeconds = 5*60;
+ // Don't wait more than 40s per iteration. Total timeout for the file
+ // to appear is ~1.5 minutes.
+ const unsigned MaxSeconds = 40;
do {
// Sleep for the designated interval, to allow the owning process time to
// finish up and remove the lock file.
diff --git a/lib/Support/LowLevelType.cpp b/lib/Support/LowLevelType.cpp
new file mode 100644
index 000000000000..4290d69cd197
--- /dev/null
+++ b/lib/Support/LowLevelType.cpp
@@ -0,0 +1,47 @@
+//===-- llvm/Support/LowLevelType.cpp -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+LLT::LLT(MVT VT) {
+ if (VT.isVector()) {
+ SizeInBits = VT.getVectorElementType().getSizeInBits();
+ ElementsOrAddrSpace = VT.getVectorNumElements();
+ Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+ } else if (VT.isValid()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ Kind = Scalar;
+ SizeInBits = VT.getSizeInBits();
+ ElementsOrAddrSpace = 1;
+ assert(SizeInBits != 0 && "invalid zero-sized type");
+ } else {
+ Kind = Invalid;
+ SizeInBits = ElementsOrAddrSpace = 0;
+ }
+}
+
+void LLT::print(raw_ostream &OS) const {
+ if (isVector())
+ OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
+ else if (isPointer())
+ OS << "p" << getAddressSpace();
+ else if (isValid()) {
+ assert(isScalar() && "unexpected type");
+ OS << "s" << getScalarSizeInBits();
+ } else
+ llvm_unreachable("trying to print an invalid type");
+}
diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp
index 942571eab0f3..bdbf1d677938 100644
--- a/lib/Support/MD5.cpp
+++ b/lib/Support/MD5.cpp
@@ -38,9 +38,13 @@
*/
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
+#include <array>
+#include <cstdint>
#include <cstring>
// The basic MD5 functions.
@@ -68,7 +72,7 @@
((MD5_u32plus) ptr[(n) * 4 + 3] << 24))
#define GET(n) (block[(n)])
-namespace llvm {
+using namespace llvm;
/// \brief This processes one or more 64-byte data blocks, but does NOT update
///the bit counters. There are no alignment requirements.
@@ -179,9 +183,7 @@ const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
return ptr;
}
-MD5::MD5()
- : a(0x67452301), b(0xefcdab89), c(0x98badcfe), d(0x10325476), hi(0), lo(0) {
-}
+MD5::MD5() = default;
/// Incrementally add the bytes in \p Data to the hash.
void MD5::update(ArrayRef<uint8_t> Data) {
@@ -259,10 +261,16 @@ void MD5::final(MD5Result &Result) {
support::endian::write32le(&Result[12], d);
}
-void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
+SmallString<32> MD5::MD5Result::digest() const {
+ SmallString<32> Str;
raw_svector_ostream Res(Str);
for (int i = 0; i < 16; ++i)
- Res << format("%.2x", Result[i]);
+ Res << format("%.2x", Bytes[i]);
+ return Str;
+}
+
+void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
+ Str = Result.digest();
}
std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
@@ -271,8 +279,5 @@ std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
MD5::MD5Result Res;
Hash.final(Res);
- std::array<uint8_t, 16> Arr;
- memcpy(Arr.data(), Res, sizeof(Res));
- return Arr;
-}
+ return Res;
}
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 7dd31315f90d..fb7cd070c42d 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
static const ManagedStaticBase *StaticList = nullptr;
static sys::Mutex *ManagedStaticMutex = nullptr;
-LLVM_DEFINE_ONCE_FLAG(mutex_init_flag);
+static llvm::once_flag mutex_init_flag;
static void initializeMutex() {
ManagedStaticMutex = new sys::Mutex();
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index a3a18c9283ce..227e792d83dc 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -103,7 +103,7 @@ public:
static ErrorOr<std::unique_ptr<MemoryBuffer>>
getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
- uint64_t Offset, bool RequiresNullTerminator, bool IsVolatileSize);
+ uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile);
std::unique_ptr<MemoryBuffer>
MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
@@ -178,8 +178,8 @@ MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize,
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
- uint64_t Offset) {
- return getFileAux(FilePath, -1, MapSize, Offset, false, false);
+ uint64_t Offset, bool IsVolatile) {
+ return getFileAux(FilePath, -1, MapSize, Offset, false, IsVolatile);
}
@@ -254,19 +254,19 @@ getMemoryBufferForStream(int FD, const Twine &BufferName) {
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getFile(const Twine &Filename, int64_t FileSize,
- bool RequiresNullTerminator, bool IsVolatileSize) {
+ bool RequiresNullTerminator, bool IsVolatile) {
return getFileAux(Filename, FileSize, FileSize, 0,
- RequiresNullTerminator, IsVolatileSize);
+ RequiresNullTerminator, IsVolatile);
}
static ErrorOr<std::unique_ptr<MemoryBuffer>>
getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
- bool IsVolatileSize);
+ bool IsVolatile);
static ErrorOr<std::unique_ptr<MemoryBuffer>>
getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
- uint64_t Offset, bool RequiresNullTerminator, bool IsVolatileSize) {
+ uint64_t Offset, bool RequiresNullTerminator, bool IsVolatile) {
int FD;
std::error_code EC = sys::fs::openFileForRead(Filename, FD);
if (EC)
@@ -274,7 +274,7 @@ getFileAux(const Twine &Filename, int64_t FileSize, uint64_t MapSize,
ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
getOpenFileImpl(FD, Filename, FileSize, MapSize, Offset,
- RequiresNullTerminator, IsVolatileSize);
+ RequiresNullTerminator, IsVolatile);
close(FD);
return Ret;
}
@@ -285,11 +285,11 @@ static bool shouldUseMmap(int FD,
off_t Offset,
bool RequiresNullTerminator,
int PageSize,
- bool IsVolatileSize) {
+ bool IsVolatile) {
// mmap may leave the buffer without null terminator if the file size changed
// by the time the last page is mapped in, so avoid it if the file size is
// likely to change.
- if (IsVolatileSize)
+ if (IsVolatile)
return false;
// We don't use mmap for small files because this can severely fragment our
@@ -300,7 +300,6 @@ static bool shouldUseMmap(int FD,
if (!RequiresNullTerminator)
return true;
-
// If we don't know the file size, use fstat to find out. fstat on an open
// file descriptor is cheaper than stat on a random path.
// FIXME: this chunk of code is duplicated, but it avoids a fstat when
@@ -338,7 +337,7 @@ static bool shouldUseMmap(int FD,
static ErrorOr<std::unique_ptr<MemoryBuffer>>
getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
- bool IsVolatileSize) {
+ bool IsVolatile) {
static int PageSize = sys::Process::getPageSize();
// Default is to map the full file.
@@ -365,7 +364,7 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
}
if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
- PageSize, IsVolatileSize)) {
+ PageSize, IsVolatile)) {
std::error_code EC;
std::unique_ptr<MemoryBuffer> Result(
new (NamedBufferAlloc(Filename))
@@ -415,17 +414,16 @@ getOpenFileImpl(int FD, const Twine &Filename, uint64_t FileSize,
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getOpenFile(int FD, const Twine &Filename, uint64_t FileSize,
- bool RequiresNullTerminator, bool IsVolatileSize) {
+ bool RequiresNullTerminator, bool IsVolatile) {
return getOpenFileImpl(FD, Filename, FileSize, FileSize, 0,
- RequiresNullTerminator, IsVolatileSize);
+ RequiresNullTerminator, IsVolatile);
}
ErrorOr<std::unique_ptr<MemoryBuffer>>
MemoryBuffer::getOpenFileSlice(int FD, const Twine &Filename, uint64_t MapSize,
- int64_t Offset) {
+ int64_t Offset, bool IsVolatile) {
assert(MapSize != uint64_t(-1));
- return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false,
- /*IsVolatileSize*/ false);
+ return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false, IsVolatile);
}
ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 4bb035eeccca..9fd6652ce4b8 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/Path.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/COFF.h"
-#include "llvm/Support/MachO.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/Process.h"
#include <cctype>
#include <cstring>
@@ -34,16 +35,29 @@ using namespace llvm::support::endian;
namespace {
using llvm::StringRef;
using llvm::sys::path::is_separator;
+ using llvm::sys::path::Style;
+ inline Style real_style(Style style) {
#ifdef LLVM_ON_WIN32
- const char *separators = "\\/";
- const char preferred_separator = '\\';
+ return (style == Style::posix) ? Style::posix : Style::windows;
#else
- const char separators = '/';
- const char preferred_separator = '/';
+ return (style == Style::windows) ? Style::windows : Style::posix;
#endif
+ }
- StringRef find_first_component(StringRef path) {
+ inline const char *separators(Style style) {
+ if (real_style(style) == Style::windows)
+ return "\\/";
+ return "/";
+ }
+
+ inline char preferred_separator(Style style) {
+ if (real_style(style) == Style::windows)
+ return '\\';
+ return '/';
+ }
+
+ StringRef find_first_component(StringRef path, Style style) {
// Look for this first component in the following order.
// * empty (in this case we return an empty string)
// * either C: or {//,\\}net.
@@ -53,96 +67,85 @@ namespace {
if (path.empty())
return path;
-#ifdef LLVM_ON_WIN32
- // C:
- if (path.size() >= 2 && std::isalpha(static_cast<unsigned char>(path[0])) &&
- path[1] == ':')
- return path.substr(0, 2);
-#endif
+ if (real_style(style) == Style::windows) {
+ // C:
+ if (path.size() >= 2 &&
+ std::isalpha(static_cast<unsigned char>(path[0])) && path[1] == ':')
+ return path.substr(0, 2);
+ }
// //net
- if ((path.size() > 2) &&
- is_separator(path[0]) &&
- path[0] == path[1] &&
- !is_separator(path[2])) {
+ if ((path.size() > 2) && is_separator(path[0], style) &&
+ path[0] == path[1] && !is_separator(path[2], style)) {
// Find the next directory separator.
- size_t end = path.find_first_of(separators, 2);
+ size_t end = path.find_first_of(separators(style), 2);
return path.substr(0, end);
}
// {/,\}
- if (is_separator(path[0]))
+ if (is_separator(path[0], style))
return path.substr(0, 1);
// * {file,directory}name
- size_t end = path.find_first_of(separators);
+ size_t end = path.find_first_of(separators(style));
return path.substr(0, end);
}
- size_t filename_pos(StringRef str) {
- if (str.size() == 2 &&
- is_separator(str[0]) &&
- str[0] == str[1])
+ size_t filename_pos(StringRef str, Style style) {
+ if (str.size() == 2 && is_separator(str[0], style) && str[0] == str[1])
return 0;
- if (str.size() > 0 && is_separator(str[str.size() - 1]))
+ if (str.size() > 0 && is_separator(str[str.size() - 1], style))
return str.size() - 1;
- size_t pos = str.find_last_of(separators, str.size() - 1);
+ size_t pos = str.find_last_of(separators(style), str.size() - 1);
-#ifdef LLVM_ON_WIN32
- if (pos == StringRef::npos)
- pos = str.find_last_of(':', str.size() - 2);
-#endif
+ if (real_style(style) == Style::windows) {
+ if (pos == StringRef::npos)
+ pos = str.find_last_of(':', str.size() - 2);
+ }
- if (pos == StringRef::npos ||
- (pos == 1 && is_separator(str[0])))
+ if (pos == StringRef::npos || (pos == 1 && is_separator(str[0], style)))
return 0;
return pos + 1;
}
- size_t root_dir_start(StringRef str) {
+ size_t root_dir_start(StringRef str, Style style) {
// case "c:/"
-#ifdef LLVM_ON_WIN32
- if (str.size() > 2 &&
- str[1] == ':' &&
- is_separator(str[2]))
- return 2;
-#endif
+ if (real_style(style) == Style::windows) {
+ if (str.size() > 2 && str[1] == ':' && is_separator(str[2], style))
+ return 2;
+ }
// case "//"
- if (str.size() == 2 &&
- is_separator(str[0]) &&
- str[0] == str[1])
+ if (str.size() == 2 && is_separator(str[0], style) && str[0] == str[1])
return StringRef::npos;
// case "//net"
- if (str.size() > 3 &&
- is_separator(str[0]) &&
- str[0] == str[1] &&
- !is_separator(str[2])) {
- return str.find_first_of(separators, 2);
+ if (str.size() > 3 && is_separator(str[0], style) && str[0] == str[1] &&
+ !is_separator(str[2], style)) {
+ return str.find_first_of(separators(style), 2);
}
// case "/"
- if (str.size() > 0 && is_separator(str[0]))
+ if (str.size() > 0 && is_separator(str[0], style))
return 0;
return StringRef::npos;
}
- size_t parent_path_end(StringRef path) {
- size_t end_pos = filename_pos(path);
+ size_t parent_path_end(StringRef path, Style style) {
+ size_t end_pos = filename_pos(path, style);
- bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
+ bool filename_was_sep =
+ path.size() > 0 && is_separator(path[end_pos], style);
// Skip separators except for root dir.
- size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
+ size_t root_dir_pos = root_dir_start(path.substr(0, end_pos), style);
- while(end_pos > 0 &&
- (end_pos - 1) != root_dir_pos &&
- is_separator(path[end_pos - 1]))
+ while (end_pos > 0 && (end_pos - 1) != root_dir_pos &&
+ is_separator(path[end_pos - 1], style))
--end_pos;
if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
@@ -230,11 +233,12 @@ namespace llvm {
namespace sys {
namespace path {
-const_iterator begin(StringRef path) {
+const_iterator begin(StringRef path, Style style) {
const_iterator i;
i.Path = path;
- i.Component = find_first_component(path);
+ i.Component = find_first_component(path, style);
i.Position = 0;
+ i.S = style;
return i;
}
@@ -259,27 +263,21 @@ const_iterator &const_iterator::operator++() {
// Both POSIX and Windows treat paths that begin with exactly two separators
// specially.
- bool was_net = Component.size() > 2 &&
- is_separator(Component[0]) &&
- Component[1] == Component[0] &&
- !is_separator(Component[2]);
+ bool was_net = Component.size() > 2 && is_separator(Component[0], S) &&
+ Component[1] == Component[0] && !is_separator(Component[2], S);
// Handle separators.
- if (is_separator(Path[Position])) {
+ if (is_separator(Path[Position], S)) {
// Root dir.
- if (was_net
-#ifdef LLVM_ON_WIN32
+ if (was_net ||
// c:/
- || Component.endswith(":")
-#endif
- ) {
+ (real_style(S) == Style::windows && Component.endswith(":"))) {
Component = Path.substr(Position, 1);
return *this;
}
// Skip extra separators.
- while (Position != Path.size() &&
- is_separator(Path[Position])) {
+ while (Position != Path.size() && is_separator(Path[Position], S)) {
++Position;
}
@@ -292,7 +290,7 @@ const_iterator &const_iterator::operator++() {
}
// Find next component.
- size_t end_pos = Path.find_first_of(separators, Position);
+ size_t end_pos = Path.find_first_of(separators(S), Position);
Component = Path.slice(Position, end_pos);
return *this;
@@ -306,10 +304,11 @@ ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
return Position - RHS.Position;
}
-reverse_iterator rbegin(StringRef Path) {
+reverse_iterator rbegin(StringRef Path, Style style) {
reverse_iterator I;
I.Path = Path;
I.Position = Path.size();
+ I.S = style;
return ++I;
}
@@ -324,10 +323,9 @@ reverse_iterator rend(StringRef Path) {
reverse_iterator &reverse_iterator::operator++() {
// If we're at the end and the previous char was a '/', return '.' unless
// we are the root path.
- size_t root_dir_pos = root_dir_start(Path);
- if (Position == Path.size() &&
- Path.size() > root_dir_pos + 1 &&
- is_separator(Path[Position - 1])) {
+ size_t root_dir_pos = root_dir_start(Path, S);
+ if (Position == Path.size() && Path.size() > root_dir_pos + 1 &&
+ is_separator(Path[Position - 1], S)) {
--Position;
Component = ".";
return *this;
@@ -336,13 +334,12 @@ reverse_iterator &reverse_iterator::operator++() {
// Skip separators unless it's the root directory.
size_t end_pos = Position;
- while(end_pos > 0 &&
- (end_pos - 1) != root_dir_pos &&
- is_separator(Path[end_pos - 1]))
+ while (end_pos > 0 && (end_pos - 1) != root_dir_pos &&
+ is_separator(Path[end_pos - 1], S))
--end_pos;
// Find next separator.
- size_t start_pos = filename_pos(Path.substr(0, end_pos));
+ size_t start_pos = filename_pos(Path.substr(0, end_pos), S);
Component = Path.slice(start_pos, end_pos);
Position = start_pos;
return *this;
@@ -357,21 +354,15 @@ ptrdiff_t reverse_iterator::operator-(const reverse_iterator &RHS) const {
return Position - RHS.Position;
}
-StringRef root_path(StringRef path) {
- const_iterator b = begin(path),
- pos = b,
- e = end(path);
+StringRef root_path(StringRef path, Style style) {
+ const_iterator b = begin(path, style), pos = b, e = end(path);
if (b != e) {
- bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
- bool has_drive =
-#ifdef LLVM_ON_WIN32
- b->endswith(":");
-#else
- false;
-#endif
+ bool has_net =
+ b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
+ bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
if (has_net || has_drive) {
- if ((++pos != e) && is_separator((*pos)[0])) {
+ if ((++pos != e) && is_separator((*pos)[0], style)) {
// {C:/,//net/}, so get the first two components.
return path.substr(0, b->size() + pos->size());
} else {
@@ -381,7 +372,7 @@ StringRef root_path(StringRef path) {
}
// POSIX style root directory.
- if (is_separator((*b)[0])) {
+ if (is_separator((*b)[0], style)) {
return *b;
}
}
@@ -389,17 +380,12 @@ StringRef root_path(StringRef path) {
return StringRef();
}
-StringRef root_name(StringRef path) {
- const_iterator b = begin(path),
- e = end(path);
+StringRef root_name(StringRef path, Style style) {
+ const_iterator b = begin(path, style), e = end(path);
if (b != e) {
- bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
- bool has_drive =
-#ifdef LLVM_ON_WIN32
- b->endswith(":");
-#else
- false;
-#endif
+ bool has_net =
+ b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
+ bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
if (has_net || has_drive) {
// just {C:,//net}, return the first component.
@@ -411,27 +397,21 @@ StringRef root_name(StringRef path) {
return StringRef();
}
-StringRef root_directory(StringRef path) {
- const_iterator b = begin(path),
- pos = b,
- e = end(path);
+StringRef root_directory(StringRef path, Style style) {
+ const_iterator b = begin(path, style), pos = b, e = end(path);
if (b != e) {
- bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
- bool has_drive =
-#ifdef LLVM_ON_WIN32
- b->endswith(":");
-#else
- false;
-#endif
+ bool has_net =
+ b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
+ bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
if ((has_net || has_drive) &&
// {C:,//net}, skip to the next component.
- (++pos != e) && is_separator((*pos)[0])) {
+ (++pos != e) && is_separator((*pos)[0], style)) {
return *pos;
}
// POSIX style root directory.
- if (!has_net && is_separator((*b)[0])) {
+ if (!has_net && is_separator((*b)[0], style)) {
return *b;
}
}
@@ -440,15 +420,13 @@ StringRef root_directory(StringRef path) {
return StringRef();
}
-StringRef relative_path(StringRef path) {
- StringRef root = root_path(path);
+StringRef relative_path(StringRef path, Style style) {
+ StringRef root = root_path(path, style);
return path.substr(root.size());
}
-void append(SmallVectorImpl<char> &path, const Twine &a,
- const Twine &b,
- const Twine &c,
- const Twine &d) {
+void append(SmallVectorImpl<char> &path, Style style, const Twine &a,
+ const Twine &b, const Twine &c, const Twine &d) {
SmallString<32> a_storage;
SmallString<32> b_storage;
SmallString<32> c_storage;
@@ -461,13 +439,15 @@ void append(SmallVectorImpl<char> &path, const Twine &a,
if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
for (auto &component : components) {
- bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
- bool component_has_sep = !component.empty() && is_separator(component[0]);
- bool is_root_name = has_root_name(component);
+ bool path_has_sep =
+ !path.empty() && is_separator(path[path.size() - 1], style);
+ bool component_has_sep =
+ !component.empty() && is_separator(component[0], style);
+ bool is_root_name = has_root_name(component, style);
if (path_has_sep) {
// Strip separators from beginning of component.
- size_t loc = component.find_first_not_of(separators);
+ size_t loc = component.find_first_not_of(separators(style));
StringRef c = component.substr(loc);
// Append it.
@@ -477,41 +457,47 @@ void append(SmallVectorImpl<char> &path, const Twine &a,
if (!component_has_sep && !(path.empty() || is_root_name)) {
// Add a separator.
- path.push_back(preferred_separator);
+ path.push_back(preferred_separator(style));
}
path.append(component.begin(), component.end());
}
}
-void append(SmallVectorImpl<char> &path,
- const_iterator begin, const_iterator end) {
+void append(SmallVectorImpl<char> &path, const Twine &a, const Twine &b,
+ const Twine &c, const Twine &d) {
+ append(path, Style::native, a, b, c, d);
+}
+
+void append(SmallVectorImpl<char> &path, const_iterator begin,
+ const_iterator end, Style style) {
for (; begin != end; ++begin)
- path::append(path, *begin);
+ path::append(path, style, *begin);
}
-StringRef parent_path(StringRef path) {
- size_t end_pos = parent_path_end(path);
+StringRef parent_path(StringRef path, Style style) {
+ size_t end_pos = parent_path_end(path, style);
if (end_pos == StringRef::npos)
return StringRef();
else
return path.substr(0, end_pos);
}
-void remove_filename(SmallVectorImpl<char> &path) {
- size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
+void remove_filename(SmallVectorImpl<char> &path, Style style) {
+ size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()), style);
if (end_pos != StringRef::npos)
path.set_size(end_pos);
}
-void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
+ Style style) {
StringRef p(path.begin(), path.size());
SmallString<32> ext_storage;
StringRef ext = extension.toStringRef(ext_storage);
// Erase existing extension.
size_t pos = p.find_last_of('.');
- if (pos != StringRef::npos && pos >= filename_pos(p))
+ if (pos != StringRef::npos && pos >= filename_pos(p, style))
path.set_size(pos);
// Append '.' if needed.
@@ -523,8 +509,8 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
}
void replace_path_prefix(SmallVectorImpl<char> &Path,
- const StringRef &OldPrefix,
- const StringRef &NewPrefix) {
+ const StringRef &OldPrefix, const StringRef &NewPrefix,
+ Style style) {
if (OldPrefix.empty() && NewPrefix.empty())
return;
@@ -540,53 +526,58 @@ void replace_path_prefix(SmallVectorImpl<char> &Path,
StringRef RelPath = OrigPath.substr(OldPrefix.size());
SmallString<256> NewPath;
- path::append(NewPath, NewPrefix);
- path::append(NewPath, RelPath);
+ path::append(NewPath, style, NewPrefix);
+ path::append(NewPath, style, RelPath);
Path.swap(NewPath);
}
-void native(const Twine &path, SmallVectorImpl<char> &result) {
+void native(const Twine &path, SmallVectorImpl<char> &result, Style style) {
assert((!path.isSingleStringRef() ||
path.getSingleStringRef().data() != result.data()) &&
"path and result are not allowed to overlap!");
// Clear result.
result.clear();
path.toVector(result);
- native(result);
+ native(result, style);
}
-void native(SmallVectorImpl<char> &Path) {
-#ifdef LLVM_ON_WIN32
- std::replace(Path.begin(), Path.end(), '/', '\\');
-#else
- for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) {
- if (*PI == '\\') {
- auto PN = PI + 1;
- if (PN < PE && *PN == '\\')
- ++PI; // increment once, the for loop will move over the escaped slash
- else
- *PI = '/';
+void native(SmallVectorImpl<char> &Path, Style style) {
+ if (Path.empty())
+ return;
+ if (real_style(style) == Style::windows) {
+ std::replace(Path.begin(), Path.end(), '/', '\\');
+ if (Path[0] == '~' && (Path.size() == 1 || is_separator(Path[1], style))) {
+ SmallString<128> PathHome;
+ home_directory(PathHome);
+ PathHome.append(Path.begin() + 1, Path.end());
+ Path = PathHome;
+ }
+ } else {
+ for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) {
+ if (*PI == '\\') {
+ auto PN = PI + 1;
+ if (PN < PE && *PN == '\\')
+ ++PI; // increment once, the for loop will move over the escaped slash
+ else
+ *PI = '/';
+ }
}
}
-#endif
}
-std::string convert_to_slash(StringRef path) {
-#ifdef LLVM_ON_WIN32
+std::string convert_to_slash(StringRef path, Style style) {
+ if (real_style(style) != Style::windows)
+ return path;
+
std::string s = path.str();
std::replace(s.begin(), s.end(), '\\', '/');
return s;
-#else
- return path;
-#endif
}
-StringRef filename(StringRef path) {
- return *rbegin(path);
-}
+StringRef filename(StringRef path, Style style) { return *rbegin(path, style); }
-StringRef stem(StringRef path) {
- StringRef fname = filename(path);
+StringRef stem(StringRef path, Style style) {
+ StringRef fname = filename(path, style);
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
return fname;
@@ -598,8 +589,8 @@ StringRef stem(StringRef path) {
return fname.substr(0, pos);
}
-StringRef extension(StringRef path) {
- StringRef fname = filename(path);
+StringRef extension(StringRef path, Style style) {
+ StringRef fname = filename(path, style);
size_t pos = fname.find_last_of('.');
if (pos == StringRef::npos)
return StringRef();
@@ -611,110 +602,109 @@ StringRef extension(StringRef path) {
return fname.substr(pos);
}
-bool is_separator(char value) {
- switch(value) {
-#ifdef LLVM_ON_WIN32
- case '\\': // fall through
-#endif
- case '/': return true;
- default: return false;
- }
+bool is_separator(char value, Style style) {
+ if (value == '/')
+ return true;
+ if (real_style(style) == Style::windows)
+ return value == '\\';
+ return false;
}
-static const char preferred_separator_string[] = { preferred_separator, '\0' };
-
-StringRef get_separator() {
- return preferred_separator_string;
+StringRef get_separator(Style style) {
+ if (real_style(style) == Style::windows)
+ return "\\";
+ return "/";
}
-bool has_root_name(const Twine &path) {
+bool has_root_name(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !root_name(p).empty();
+ return !root_name(p, style).empty();
}
-bool has_root_directory(const Twine &path) {
+bool has_root_directory(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !root_directory(p).empty();
+ return !root_directory(p, style).empty();
}
-bool has_root_path(const Twine &path) {
+bool has_root_path(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !root_path(p).empty();
+ return !root_path(p, style).empty();
}
-bool has_relative_path(const Twine &path) {
+bool has_relative_path(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !relative_path(p).empty();
+ return !relative_path(p, style).empty();
}
-bool has_filename(const Twine &path) {
+bool has_filename(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !filename(p).empty();
+ return !filename(p, style).empty();
}
-bool has_parent_path(const Twine &path) {
+bool has_parent_path(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !parent_path(p).empty();
+ return !parent_path(p, style).empty();
}
-bool has_stem(const Twine &path) {
+bool has_stem(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !stem(p).empty();
+ return !stem(p, style).empty();
}
-bool has_extension(const Twine &path) {
+bool has_extension(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- return !extension(p).empty();
+ return !extension(p, style).empty();
}
-bool is_absolute(const Twine &path) {
+bool is_absolute(const Twine &path, Style style) {
SmallString<128> path_storage;
StringRef p = path.toStringRef(path_storage);
- bool rootDir = has_root_directory(p),
-#ifdef LLVM_ON_WIN32
- rootName = has_root_name(p);
-#else
- rootName = true;
-#endif
+ bool rootDir = has_root_directory(p, style);
+ bool rootName =
+ (real_style(style) != Style::windows) || has_root_name(p, style);
return rootDir && rootName;
}
-bool is_relative(const Twine &path) { return !is_absolute(path); }
+bool is_relative(const Twine &path, Style style) {
+ return !is_absolute(path, style);
+}
-StringRef remove_leading_dotslash(StringRef Path) {
+StringRef remove_leading_dotslash(StringRef Path, Style style) {
// Remove leading "./" (or ".//" or "././" etc.)
- while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1])) {
+ while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1], style)) {
Path = Path.substr(2);
- while (Path.size() > 0 && is_separator(Path[0]))
+ while (Path.size() > 0 && is_separator(Path[0], style))
Path = Path.substr(1);
}
return Path;
}
-static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot) {
+static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot,
+ Style style) {
SmallVector<StringRef, 16> components;
// Skip the root path, then look for traversal in the components.
- StringRef rel = path::relative_path(path);
- for (StringRef C : llvm::make_range(path::begin(rel), path::end(rel))) {
+ StringRef rel = path::relative_path(path, style);
+ for (StringRef C :
+ llvm::make_range(path::begin(rel, style), path::end(rel))) {
if (C == ".")
continue;
// Leading ".." will remain in the path unless it's at the root.
@@ -723,22 +713,23 @@ static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot) {
components.pop_back();
continue;
}
- if (path::is_absolute(path))
+ if (path::is_absolute(path, style))
continue;
}
components.push_back(C);
}
- SmallString<256> buffer = path::root_path(path);
+ SmallString<256> buffer = path::root_path(path, style);
for (StringRef C : components)
- path::append(buffer, C);
+ path::append(buffer, style, C);
return buffer;
}
-bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot) {
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot,
+ Style style) {
StringRef p(path.data(), path.size());
- SmallString<256> result = remove_dots(p, remove_dot_dot);
+ SmallString<256> result = remove_dots(p, remove_dot_dot, style);
if (result == path)
return false;
@@ -776,7 +767,7 @@ createTemporaryFile(const Twine &Model, int &ResultFD,
llvm::SmallVectorImpl<char> &ResultPath, FSEntity Type) {
SmallString<128> Storage;
StringRef P = Model.toNullTerminatedStringRef(Storage);
- assert(P.find_first_of(separators) == StringRef::npos &&
+ assert(P.find_first_of(separators(Style::native)) == StringRef::npos &&
"Model must be a simple filename.");
// Use P.begin() so that createUniqueEntity doesn't need to recreate Storage.
return createUniqueEntity(P.begin(), ResultFD, ResultPath,
@@ -818,12 +809,9 @@ static std::error_code make_absolute(const Twine &current_directory,
bool use_current_directory) {
StringRef p(path.data(), path.size());
- bool rootDirectory = path::has_root_directory(p),
-#ifdef LLVM_ON_WIN32
- rootName = path::has_root_name(p);
-#else
- rootName = true;
-#endif
+ bool rootDirectory = path::has_root_directory(p);
+ bool rootName =
+ (real_style(Style::native) != Style::windows) || path::has_root_name(p);
// Already absolute.
if (rootName && rootDirectory)
@@ -937,6 +925,36 @@ std::error_code copy_file(const Twine &From, const Twine &To) {
return std::error_code();
}
+ErrorOr<MD5::MD5Result> md5_contents(int FD) {
+ MD5 Hash;
+
+ constexpr size_t BufSize = 4096;
+ std::vector<uint8_t> Buf(BufSize);
+ int BytesRead = 0;
+ for (;;) {
+ BytesRead = read(FD, Buf.data(), BufSize);
+ if (BytesRead <= 0)
+ break;
+ Hash.update(makeArrayRef(Buf.data(), BytesRead));
+ }
+
+ if (BytesRead < 0)
+ return std::error_code(errno, std::generic_category());
+ MD5::MD5Result Result;
+ Hash.final(Result);
+ return Result;
+}
+
+ErrorOr<MD5::MD5Result> md5_contents(const Twine &Path) {
+ int FD;
+ if (auto EC = openFileForRead(Path, FD))
+ return EC;
+
+ auto Result = md5_contents(FD);
+ close(FD);
+ return Result;
+}
+
bool exists(file_status status) {
return status_known(status) && status.type() != file_type::file_not_found;
}
@@ -945,6 +963,13 @@ bool status_known(file_status s) {
return s.type() != file_type::status_error;
}
+file_type get_file_type(const Twine &Path, bool Follow) {
+ file_status st;
+ if (status(Path, st, Follow))
+ return file_type::status_error;
+ return st.type();
+}
+
bool is_directory(file_status status) {
return status.type() == file_type::directory_file;
}
@@ -969,6 +994,18 @@ std::error_code is_regular_file(const Twine &path, bool &result) {
return std::error_code();
}
+bool is_symlink_file(file_status status) {
+ return status.type() == file_type::symlink_file;
+}
+
+std::error_code is_symlink_file(const Twine &path, bool &result) {
+ file_status st;
+ if (std::error_code ec = status(path, st, false))
+ return ec;
+ result = is_symlink_file(st);
+ return std::error_code();
+}
+
bool is_other(file_status status) {
return exists(status) &&
!is_regular_file(status) &&
@@ -1162,7 +1199,15 @@ std::error_code identify_magic(const Twine &Path, file_magic &Result) {
}
std::error_code directory_entry::status(file_status &result) const {
- return fs::status(Path, result);
+ return fs::status(Path, result, FollowSymlinks);
+}
+
+ErrorOr<perms> getPermissions(const Twine &Path) {
+ file_status Status;
+ if (std::error_code EC = status(Path, Status))
+ return EC;
+
+ return Status.permissions();
}
} // end namespace fs
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index 3b6309cef21a..6c9781c4e2d6 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -13,7 +13,6 @@
#include "llvm/Config/config.h"
#include "llvm/Support/RWMutex.h"
-#include <cstring>
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only TRULY operating system
@@ -22,29 +21,31 @@
#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
// Define all methods as no-ops if threading is explicitly disabled
-namespace llvm {
+
+using namespace llvm;
using namespace sys;
-RWMutexImpl::RWMutexImpl() { }
-RWMutexImpl::~RWMutexImpl() { }
+
+RWMutexImpl::RWMutexImpl() = default;
+RWMutexImpl::~RWMutexImpl() = default;
+
bool RWMutexImpl::reader_acquire() { return true; }
bool RWMutexImpl::reader_release() { return true; }
bool RWMutexImpl::writer_acquire() { return true; }
bool RWMutexImpl::writer_release() { return true; }
-}
+
#else
#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
#include <cassert>
+#include <cstdlib>
#include <pthread.h>
-#include <stdlib.h>
-namespace llvm {
+using namespace llvm;
using namespace sys;
// Construct a RWMutex using pthread calls
RWMutexImpl::RWMutexImpl()
- : data_(nullptr)
{
// Declare the pthread_rwlock data structures
pthread_rwlock_t* rwlock =
@@ -113,8 +114,6 @@ RWMutexImpl::writer_release()
return errorcode == 0;
}
-}
-
#elif defined(LLVM_ON_UNIX)
#include "Unix/RWMutex.inc"
#elif defined( LLVM_ON_WIN32)
diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp
index e5e38f59c040..57f36bf175b3 100644
--- a/lib/Support/Signals.cpp
+++ b/lib/Support/Signals.cpp
@@ -29,7 +29,6 @@
#include <vector>
namespace llvm {
-using namespace sys;
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only TRULY operating system
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 4cb9b2ff2cda..ca2391c10ff1 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -13,30 +13,43 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <utility>
+
using namespace llvm;
static const size_t TabStop = 8;
namespace {
+
struct LineNoCacheTy {
const char *LastQuery;
unsigned LastQueryBufferID;
unsigned LineNoOfQuery;
};
-}
+
+} // end anonymous namespace
static LineNoCacheTy *getCache(void *Ptr) {
return (LineNoCacheTy*)Ptr;
}
-
SourceMgr::~SourceMgr() {
// Delete the line # cache if allocated.
if (LineNoCacheTy *Cache = getCache(LineNoCache))
@@ -132,12 +145,10 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
<< ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
}
-
SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
const Twine &Msg,
ArrayRef<SMRange> Ranges,
ArrayRef<SMFixIt> FixIts) const {
-
// First thing to do: find the current buffer containing the specified
// location to pull out the source line.
SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
@@ -223,7 +234,7 @@ void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
const Twine &Msg, ArrayRef<SMRange> Ranges,
ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
- PrintMessage(llvm::errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
+ PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
}
//===----------------------------------------------------------------------===//
@@ -233,7 +244,7 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
int Line, int Col, SourceMgr::DiagKind Kind,
StringRef Msg, StringRef LineStr,
- ArrayRef<std::pair<unsigned,unsigned> > Ranges,
+ ArrayRef<std::pair<unsigned,unsigned>> Ranges,
ArrayRef<SMFixIt> Hints)
: SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
@@ -286,7 +297,7 @@ static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
// FIXME: This assertion is intended to catch unintended use of multibyte
// characters in fixits. If we decide to do this, we'll have to track
// separate byte widths for the source and fixit lines.
- assert((size_t)llvm::sys::locale::columnWidth(I->getText()) ==
+ assert((size_t)sys::locale::columnWidth(I->getText()) ==
I->getText().size());
// This relies on one byte per column in our fixit hints.
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index d81250e48dde..9b7cc1c1d182 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/edit_distance.h"
@@ -595,6 +596,18 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
return false;
}
+bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
+ APFloat F(0.0);
+ APFloat::opStatus Status =
+ F.convertFromString(*this, APFloat::rmNearestTiesToEven);
+ if (Status != APFloat::opOK) {
+ if (!AllowInexact || Status != APFloat::opInexact)
+ return true;
+ }
+
+ Result = F.convertToDouble();
+ return false;
+}
// Implementation of StringRef hashing.
hash_code llvm::hash_value(StringRef S) {
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index 42fab671a251..639d2ece263a 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -448,6 +448,8 @@ bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+spe");
if (Extensions & AArch64::AEK_RAS)
Features.push_back("+ras");
+ if (Extensions & AArch64::AEK_LSE)
+ Features.push_back("+lse");
return true;
}
@@ -725,6 +727,7 @@ unsigned llvm::ARM::parseArchProfile(StringRef Arch) {
case ARM::AK_ARMV8R:
return ARM::PK_R;
case ARM::AK_ARMV7A:
+ case ARM::AK_ARMV7VE:
case ARM::AK_ARMV7K:
case ARM::AK_ARMV8A:
case ARM::AK_ARMV8_1A:
@@ -761,6 +764,7 @@ unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
case ARM::AK_ARMV6M:
return 6;
case ARM::AK_ARMV7A:
+ case ARM::AK_ARMV7VE:
case ARM::AK_ARMV7R:
case ARM::AK_ARMV7M:
case ARM::AK_ARMV7S:
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 760f9e2c388b..6a10b988d464 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -14,14 +14,20 @@
#include "llvm/Support/Threading.h"
#include "llvm/Config/config.h"
-#include "llvm/Support/Atomic.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/thread.h"
+
#include <cassert>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
using namespace llvm;
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
bool llvm::llvm_is_multithreaded() {
#if LLVM_ENABLE_THREADS != 0
return true;
@@ -30,100 +36,47 @@ bool llvm::llvm_is_multithreaded() {
#endif
}
-#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
-#include <pthread.h>
-
-struct ThreadInfo {
- void (*UserFn)(void *);
- void *UserData;
-};
-static void *ExecuteOnThread_Dispatch(void *Arg) {
- ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
- TI->UserFn(TI->UserData);
- return nullptr;
-}
-
-void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+#if LLVM_ENABLE_THREADS == 0 || \
+ (!defined(LLVM_ON_WIN32) && !defined(HAVE_PTHREAD_H))
+// Support for non-Win32, non-pthread implementation.
+void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData,
unsigned RequestedStackSize) {
- ThreadInfo Info = { Fn, UserData };
- pthread_attr_t Attr;
- pthread_t Thread;
-
- // Construct the attributes object.
- if (::pthread_attr_init(&Attr) != 0)
- return;
-
- // Set the requested stack size, if given.
- if (RequestedStackSize != 0) {
- if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
- goto error;
- }
-
- // Construct and execute the thread.
- if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
- goto error;
-
- // Wait for the thread and clean up.
- ::pthread_join(Thread, nullptr);
-
- error:
- ::pthread_attr_destroy(&Attr);
+ (void)RequestedStackSize;
+ Fn(UserData);
}
-#elif LLVM_ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
-#include "Windows/WindowsSupport.h"
-#include <process.h>
-// Windows will at times define MemoryFence.
-#ifdef MemoryFence
-#undef MemoryFence
-#endif
+unsigned llvm::heavyweight_hardware_concurrency() { return 1; }
-struct ThreadInfo {
- void (*func)(void*);
- void *param;
-};
+uint64_t llvm::get_threadid() { return 0; }
-static unsigned __stdcall ThreadCallback(void *param) {
- struct ThreadInfo *info = reinterpret_cast<struct ThreadInfo *>(param);
- info->func(info->param);
+uint32_t llvm::get_max_thread_name_length() { return 0; }
- return 0;
-}
+void llvm::set_thread_name(const Twine &Name) {}
-void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
- unsigned RequestedStackSize) {
- struct ThreadInfo param = { Fn, UserData };
-
- HANDLE hThread = (HANDLE)::_beginthreadex(NULL,
- RequestedStackSize, ThreadCallback,
- &param, 0, NULL);
+void llvm::get_thread_name(SmallVectorImpl<char> &Name) { Name.clear(); }
- if (hThread) {
- // We actually don't care whether the wait succeeds or fails, in
- // the same way we don't care whether the pthread_join call succeeds
- // or fails. There's not much we could do if this were to fail. But
- // on success, this call will wait until the thread finishes executing
- // before returning.
- (void)::WaitForSingleObject(hThread, INFINITE);
- ::CloseHandle(hThread);
- }
-}
#else
-// Support for non-Win32, non-pthread implementation.
-void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
- unsigned RequestedStackSize) {
- (void) RequestedStackSize;
- Fn(UserData);
-}
-
-#endif
+#include <thread>
unsigned llvm::heavyweight_hardware_concurrency() {
-#if !LLVM_ENABLE_THREADS
- return 1;
-#endif
+ // Since we can't get here unless LLVM_ENABLE_THREADS == 1, it is safe to use
+ // `std::thread` directly instead of `llvm::thread` (and indeed, doing so
+ // allows us to not define `thread` in the llvm namespace, which conflicts
+ // with some platforms such as FreeBSD whose headers also define a struct
+ // called `thread` in the global namespace which can cause ambiguity due to
+ // ADL.
int NumPhysical = sys::getHostNumPhysicalCores();
if (NumPhysical == -1)
- return thread::hardware_concurrency();
+ return std::thread::hardware_concurrency();
return NumPhysical;
}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Threading.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Threading.inc"
+#endif
+
+#endif
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index fbd73d0b6b3b..8d68c6ae9682 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -72,22 +72,9 @@ std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() {
return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
}
-
-static TimerGroup *DefaultTimerGroup = nullptr;
static TimerGroup *getDefaultTimerGroup() {
- TimerGroup *tmp = DefaultTimerGroup;
- sys::MemoryFence();
- if (tmp) return tmp;
-
- sys::SmartScopedLock<true> Lock(*TimerLock);
- tmp = DefaultTimerGroup;
- if (!tmp) {
- tmp = new TimerGroup("misc", "Miscellaneous Ungrouped Timers");
- sys::MemoryFence();
- DefaultTimerGroup = tmp;
- }
-
- return tmp;
+ static TimerGroup DefaultTimerGroup("misc", "Miscellaneous Ungrouped Timers");
+ return &DefaultTimerGroup;
}
//===----------------------------------------------------------------------===//
@@ -309,7 +296,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
// If this is not an collection of ungrouped times, print the total time.
// Ungrouped timers don't really make sense to add up. We still print the
// TOTAL line to make the percentages make sense.
- if (this != DefaultTimerGroup)
+ if (this != getDefaultTimerGroup())
OS << format(" Total Execution Time: %5.4f seconds (%5.4f wall clock)\n",
Total.getProcessTime(), Total.getWallTime());
OS << '\n';
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 6783b40a125d..64d5977e2ebd 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -510,6 +510,7 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
.EndsWith("coff", Triple::COFF)
.EndsWith("elf", Triple::ELF)
.EndsWith("macho", Triple::MachO)
+ .EndsWith("wasm", Triple::Wasm)
.Default(Triple::UnknownObjectFormat);
}
@@ -550,6 +551,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
case ARM::AK_ARMV7A:
case ARM::AK_ARMV7R:
return Triple::ARMSubArch_v7;
+ case ARM::AK_ARMV7VE:
+ return Triple::ARMSubArch_v7ve;
case ARM::AK_ARMV7K:
return Triple::ARMSubArch_v7k;
case ARM::AK_ARMV7M:
@@ -581,6 +584,7 @@ static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
case Triple::COFF: return "coff";
case Triple::ELF: return "elf";
case Triple::MachO: return "macho";
+ case Triple::Wasm: return "wasm";
}
llvm_unreachable("unknown object format type");
}
@@ -1511,6 +1515,7 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const {
return "strongarm";
}
case llvm::Triple::NaCl:
+ case llvm::Triple::OpenBSD:
return "cortex-a8";
default:
switch (getEnvironment()) {
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 465c6e6b8c4c..d17cd4e66439 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -173,10 +173,12 @@ void Twine::printRepr(raw_ostream &OS) const {
OS << ")";
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Twine::dump() const {
print(dbgs());
}
-void Twine::dumpRepr() const {
+LLVM_DUMP_METHOD void Twine::dumpRepr() const {
printRepr(dbgs());
}
+#endif
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index e0b11aaff007..93f8982196b3 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -48,6 +48,8 @@
# endif
#endif
+#include <pwd.h>
+
#ifdef __APPLE__
#include <mach-o/dyld.h>
#include <sys/attr.h>
@@ -65,23 +67,41 @@
#endif
#include <sys/types.h>
-#if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__ANDROID__)
+#if !defined(__APPLE__) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && \
+ !defined(__linux__)
#include <sys/statvfs.h>
#define STATVFS statvfs
+#define FSTATVFS fstatvfs
#define STATVFS_F_FRSIZE(vfs) vfs.f_frsize
#else
-#ifdef __OpenBSD__
+#if defined(__OpenBSD__) || defined(__FreeBSD__)
#include <sys/param.h>
#include <sys/mount.h>
-#elif defined(__ANDROID__)
+#elif defined(__linux__)
+#if defined(HAVE_LINUX_MAGIC_H)
+#include <linux/magic.h>
+#else
+#if defined(HAVE_LINUX_NFS_FS_H)
+#include <linux/nfs_fs.h>
+#endif
+#if defined(HAVE_LINUX_SMB_H)
+#include <linux/smb.h>
+#endif
+#endif
#include <sys/vfs.h>
#else
#include <sys/mount.h>
#endif
#define STATVFS statfs
+#define FSTATVFS fstatfs
#define STATVFS_F_FRSIZE(vfs) static_cast<uint64_t>(vfs.f_bsize)
#endif
+#if defined(__NetBSD__)
+#define STATVFS_F_FLAG(vfs) (vfs).f_flag
+#else
+#define STATVFS_F_FLAG(vfs) (vfs).f_flags
+#endif
using namespace llvm;
@@ -180,7 +200,7 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
if (getprogpath(exe_path, argv0))
return exe_path;
}
-#elif defined(HAVE_DLFCN_H)
+#elif defined(HAVE_DLFCN_H) && defined(HAVE_DLADDR)
// Use dladdr to get executable path if available.
Dl_info DLInfo;
int err = dladdr(MainAddr, &DLInfo);
@@ -210,6 +230,10 @@ UniqueID file_status::getUniqueID() const {
return UniqueID(fs_st_dev, fs_st_ino);
}
+uint32_t file_status::getLinkCount() const {
+ return fs_st_nlinks;
+}
+
ErrorOr<space_info> disk_space(const Twine &Path) {
struct STATVFS Vfs;
if (::STATVFS(Path.str().c_str(), &Vfs))
@@ -257,6 +281,16 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
return std::error_code();
}
+std::error_code set_current_path(const Twine &path) {
+ SmallString<128> path_storage;
+ StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+ if (::chdir(p.begin()) == -1)
+ return std::error_code(errno, std::generic_category());
+
+ return std::error_code();
+}
+
std::error_code create_directory(const Twine &path, bool IgnoreExisting,
perms Perms) {
SmallString<128> path_storage;
@@ -325,6 +359,51 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
return std::error_code();
}
+static bool is_local_impl(struct STATVFS &Vfs) {
+#if defined(__linux__)
+#ifndef NFS_SUPER_MAGIC
+#define NFS_SUPER_MAGIC 0x6969
+#endif
+#ifndef SMB_SUPER_MAGIC
+#define SMB_SUPER_MAGIC 0x517B
+#endif
+#ifndef CIFS_MAGIC_NUMBER
+#define CIFS_MAGIC_NUMBER 0xFF534D42
+#endif
+ switch ((uint32_t)Vfs.f_type) {
+ case NFS_SUPER_MAGIC:
+ case SMB_SUPER_MAGIC:
+ case CIFS_MAGIC_NUMBER:
+ return false;
+ default:
+ return true;
+ }
+#elif defined(__CYGWIN__)
+ // Cygwin doesn't expose this information; would need to use Win32 API.
+ return false;
+#else
+ return !!(STATVFS_F_FLAG(Vfs) & MNT_LOCAL);
+#endif
+}
+
+std::error_code is_local(const Twine &Path, bool &Result) {
+ struct STATVFS Vfs;
+ if (::STATVFS(Path.str().c_str(), &Vfs))
+ return std::error_code(errno, std::generic_category());
+
+ Result = is_local_impl(Vfs);
+ return std::error_code();
+}
+
+std::error_code is_local(int FD, bool &Result) {
+ struct STATVFS Vfs;
+ if (::FSTATVFS(FD, &Vfs))
+ return std::error_code(errno, std::generic_category());
+
+ Result = is_local_impl(Vfs);
+ return std::error_code();
+}
+
std::error_code rename(const Twine &from, const Twine &to) {
// Get arguments.
SmallString<128> from_storage;
@@ -405,6 +484,46 @@ std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
return std::error_code();
}
+static void expandTildeExpr(SmallVectorImpl<char> &Path) {
+ StringRef PathStr(Path.begin(), Path.size());
+ if (PathStr.empty() || !PathStr.startswith("~"))
+ return;
+
+ PathStr = PathStr.drop_front();
+ StringRef Expr =
+ PathStr.take_until([](char c) { return path::is_separator(c); });
+ StringRef Remainder = PathStr.substr(Expr.size() + 1);
+ SmallString<128> Storage;
+ if (Expr.empty()) {
+ // This is just ~/..., resolve it to the current user's home dir.
+ if (!path::home_directory(Storage)) {
+ // For some reason we couldn't get the home directory. Just exit.
+ return;
+ }
+
+ // Overwrite the first character and insert the rest.
+ Path[0] = Storage[0];
+ Path.insert(Path.begin() + 1, Storage.begin() + 1, Storage.end());
+ return;
+ }
+
+ // This is a string of the form ~username/, look up this user's entry in the
+ // password database.
+ struct passwd *Entry = nullptr;
+ std::string User = Expr.str();
+ Entry = ::getpwnam(User.c_str());
+
+ if (!Entry) {
+ // Unable to look up the entry, just return back the original path.
+ return;
+ }
+
+ Storage = Remainder;
+ Path.clear();
+ Path.append(Entry->pw_dir, Entry->pw_dir + strlen(Entry->pw_dir));
+ llvm::sys::path::append(Path, Storage);
+}
+
static std::error_code fillStatus(int StatRet, const struct stat &Status,
file_status &Result) {
if (StatRet != 0) {
@@ -430,22 +549,23 @@ static std::error_code fillStatus(int StatRet, const struct stat &Status,
Type = file_type::fifo_file;
else if (S_ISSOCK(Status.st_mode))
Type = file_type::socket_file;
+ else if (S_ISLNK(Status.st_mode))
+ Type = file_type::symlink_file;
- perms Perms = static_cast<perms>(Status.st_mode);
- Result =
- file_status(Type, Perms, Status.st_dev, Status.st_ino, Status.st_atime,
- Status.st_mtime, Status.st_uid, Status.st_gid,
- Status.st_size);
+ perms Perms = static_cast<perms>(Status.st_mode) & all_perms;
+ Result = file_status(Type, Perms, Status.st_dev, Status.st_nlink,
+ Status.st_ino, Status.st_atime, Status.st_mtime,
+ Status.st_uid, Status.st_gid, Status.st_size);
return std::error_code();
}
-std::error_code status(const Twine &Path, file_status &Result) {
+std::error_code status(const Twine &Path, file_status &Result, bool Follow) {
SmallString<128> PathStorage;
StringRef P = Path.toNullTerminatedStringRef(PathStorage);
struct stat Status;
- int StatRet = ::stat(P.begin(), &Status);
+ int StatRet = (Follow ? ::stat : ::lstat)(P.begin(), &Status);
return fillStatus(StatRet, Status, Result);
}
@@ -455,6 +575,15 @@ std::error_code status(int FD, file_status &Result) {
return fillStatus(StatRet, Status, Result);
}
+std::error_code setPermissions(const Twine &Path, perms Permissions) {
+ SmallString<128> PathStorage;
+ StringRef P = Path.toNullTerminatedStringRef(PathStorage);
+
+ if (::chmod(P.begin(), Permissions))
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
+}
+
std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
#if defined(HAVE_FUTIMENS)
timespec Times[2];
@@ -481,6 +610,26 @@ std::error_code mapped_file_region::init(int FD, uint64_t Offset,
int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE;
int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE);
+#if defined(__APPLE__)
+ //----------------------------------------------------------------------
+ // Newer versions of MacOSX have a flag that will allow us to read from
+ // binaries whose code signature is invalid without crashing by using
+ // the MAP_RESILIENT_CODESIGN flag. Also if a file from removable media
+ // is mapped we can avoid crashing and return zeroes to any pages we try
+ // to read if the media becomes unavailable by using the
+ // MAP_RESILIENT_MEDIA flag. These flags are only usable when mapping
+ // with PROT_READ, so take care not to specify them otherwise.
+ //----------------------------------------------------------------------
+ if (Mode == readonly) {
+#if defined(MAP_RESILIENT_CODESIGN)
+ flags |= MAP_RESILIENT_CODESIGN;
+#endif
+#if defined(MAP_RESILIENT_MEDIA)
+ flags |= MAP_RESILIENT_MEDIA;
+#endif
+ }
+#endif // #if defined (__APPLE__)
+
Mapping = ::mmap(nullptr, Size, prot, flags, FD, Offset);
if (Mapping == MAP_FAILED)
return std::error_code(errno, std::generic_category());
@@ -526,7 +675,8 @@ int mapped_file_region::alignment() {
}
std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
- StringRef path){
+ StringRef path,
+ bool follow_symlinks) {
SmallString<128> path_null(path);
DIR *directory = ::opendir(path_null.c_str());
if (!directory)
@@ -535,7 +685,7 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
it.IterationHandle = reinterpret_cast<intptr_t>(directory);
// Add something for replace_filename to replace.
path::append(path_null, ".");
- it.CurrentEntry = directory_entry(path_null.str());
+ it.CurrentEntry = directory_entry(path_null.str(), follow_symlinks);
return directory_iterator_increment(it);
}
@@ -577,10 +727,19 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD,
SmallVectorImpl<char> *RealPath) {
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
- while ((ResultFD = open(P.begin(), O_RDONLY)) < 0) {
+ int OpenFlags = O_RDONLY;
+#ifdef O_CLOEXEC
+ OpenFlags |= O_CLOEXEC;
+#endif
+ while ((ResultFD = open(P.begin(), OpenFlags)) < 0) {
if (errno != EINTR)
return std::error_code(errno, std::generic_category());
}
+#ifndef O_CLOEXEC
+ int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC);
+ (void)r;
+ assert(r == 0 && "fcntl(F_SETFD, FD_CLOEXEC) failed");
+#endif
// Attempt to get the real name of the file, if the user asked
if(!RealPath)
return std::error_code();
@@ -616,6 +775,10 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
int OpenFlags = O_CREAT;
+#ifdef O_CLOEXEC
+ OpenFlags |= O_CLOEXEC;
+#endif
+
if (Flags & F_RW)
OpenFlags |= O_RDWR;
else
@@ -635,6 +798,11 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
if (errno != EINTR)
return std::error_code(errno, std::generic_category());
}
+#ifndef O_CLOEXEC
+ int r = fcntl(ResultFD, F_SETFD, FD_CLOEXEC);
+ (void)r;
+ assert(r == 0 && "fcntl(F_SETFD, FD_CLOEXEC) failed");
+#endif
return std::error_code();
}
@@ -685,18 +853,85 @@ std::error_code getPathFromOpenFD(int FD, SmallVectorImpl<char> &ResultPath) {
return std::error_code();
}
+template <typename T>
+static std::error_code remove_directories_impl(const T &Entry,
+ bool IgnoreErrors) {
+ std::error_code EC;
+ directory_iterator Begin(Entry, EC, false);
+ directory_iterator End;
+ while (Begin != End) {
+ auto &Item = *Begin;
+ file_status st;
+ EC = Item.status(st);
+ if (EC && !IgnoreErrors)
+ return EC;
+
+ if (is_directory(st)) {
+ EC = remove_directories_impl(Item, IgnoreErrors);
+ if (EC && !IgnoreErrors)
+ return EC;
+ }
+
+ EC = fs::remove(Item.path(), true);
+ if (EC && !IgnoreErrors)
+ return EC;
+
+ Begin.increment(EC);
+ if (EC && !IgnoreErrors)
+ return EC;
+ }
+ return std::error_code();
+}
+
+std::error_code remove_directories(const Twine &path, bool IgnoreErrors) {
+ auto EC = remove_directories_impl(path, IgnoreErrors);
+ if (EC && !IgnoreErrors)
+ return EC;
+ EC = fs::remove(path, true);
+ if (EC && !IgnoreErrors)
+ return EC;
+ return std::error_code();
+}
+
+std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest,
+ bool expand_tilde) {
+ dest.clear();
+ if (path.isTriviallyEmpty())
+ return std::error_code();
+
+ if (expand_tilde) {
+ SmallString<128> Storage;
+ path.toVector(Storage);
+ expandTildeExpr(Storage);
+ return real_path(Storage, dest, false);
+ }
+
+ int fd;
+ std::error_code EC = openFileForRead(path, fd, &dest);
+
+ if (EC)
+ return EC;
+ ::close(fd);
+ return std::error_code();
+}
+
} // end namespace fs
namespace path {
bool home_directory(SmallVectorImpl<char> &result) {
- if (char *RequestedDir = getenv("HOME")) {
- result.clear();
- result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
- return true;
+ char *RequestedDir = getenv("HOME");
+ if (!RequestedDir) {
+ struct passwd *pw = getpwuid(getuid());
+ if (pw && pw->pw_dir)
+ RequestedDir = pw->pw_dir;
}
+ if (!RequestedDir)
+ return false;
- return false;
+ result.clear();
+ result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
+ return true;
}
static bool getDarwinConfDir(bool TempDir, SmallVectorImpl<char> &Result) {
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 9752b70644c6..88ad21e9806e 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -25,8 +25,8 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
-#if HAVE_EXECINFO_H
-# include <execinfo.h> // For backtrace().
+#ifdef HAVE_BACKTRACE
+# include BACKTRACE_HEADER // For backtrace().
#endif
#if HAVE_SIGNAL_H
#include <signal.h>
@@ -59,7 +59,7 @@ using namespace llvm;
static RETSIGTYPE SignalHandler(int Sig); // defined below.
-static ManagedStatic<SmartMutex<true> > SignalsMutex;
+static ManagedStatic<sys::SmartMutex<true> > SignalsMutex;
/// InterruptFunction - The function to call if ctrl-c is pressed.
static void (*InterruptFunction)() = nullptr;
@@ -149,11 +149,7 @@ static void CreateSigAltStack() {}
#endif
static void RegisterHandlers() {
- // We need to dereference the signals mutex during handler registration so
- // that we force its construction. This is to prevent the first use being
- // during handling an actual signal because you can't safely call new in a
- // signal handler.
- *SignalsMutex;
+ sys::SmartScopedLock<true> Guard(*SignalsMutex);
// If the handlers are already registered, we're done.
if (NumRegisteredSignals != 0) return;
@@ -223,7 +219,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
{
- unique_lock<SmartMutex<true>> Guard(*SignalsMutex);
+ unique_lock<sys::SmartMutex<true>> Guard(*SignalsMutex);
RemoveFilesToRemove();
if (std::find(std::begin(IntSigs), std::end(IntSigs), Sig)
@@ -412,7 +408,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
if (printSymbolizedStackTrace(Argv0, StackTrace, depth, OS))
return;
-#if HAVE_DLFCN_H && __GNUG__ && !defined(__CYGWIN__)
+#if HAVE_DLFCN_H && HAVE_DLADDR
int width = 0;
for (int i = 0; i < depth; ++i) {
Dl_info dlinfo;
@@ -462,7 +458,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
}
static void PrintStackTraceSignalHandler(void *) {
- PrintStackTrace(llvm::errs());
+ sys::PrintStackTrace(llvm::errs());
}
void llvm::sys::DisableSystemDialogsOnCrash() {}
diff --git a/lib/Support/Unix/Threading.inc b/lib/Support/Unix/Threading.inc
new file mode 100644
index 000000000000..407b194e1b6a
--- /dev/null
+++ b/lib/Support/Unix/Threading.inc
@@ -0,0 +1,215 @@
+//===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Unix specific implementation of Threading functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+
+#if defined(__APPLE__)
+#include <mach/mach_init.h>
+#include <mach/mach_port.h>
+#endif
+
+#include <pthread.h>
+
+#if defined(__FreeBSD__)
+#include <pthread_np.h> // For pthread_getthreadid_np()
+#endif
+
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <sys/sysctl.h>
+#include <sys/user.h>
+#include <errno.h>
+#include <unistd.h>
+#endif
+
+#if defined(__NetBSD__)
+#include <lwp.h> // For _lwp_self()
+#endif
+
+#if defined(__linux__)
+#include <unistd.h> // For syscall()
+#include <sys/syscall.h> // For syscall codes
+#endif
+
+namespace {
+ struct ThreadInfo {
+ void(*UserFn)(void *);
+ void *UserData;
+ };
+}
+
+static void *ExecuteOnThread_Dispatch(void *Arg) {
+ ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
+ TI->UserFn(TI->UserData);
+ return nullptr;
+}
+
+void llvm::llvm_execute_on_thread(void(*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ ThreadInfo Info = { Fn, UserData };
+ pthread_attr_t Attr;
+ pthread_t Thread;
+
+ // Construct the attributes object.
+ if (::pthread_attr_init(&Attr) != 0)
+ return;
+
+ // Set the requested stack size, if given.
+ if (RequestedStackSize != 0) {
+ if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
+ goto error;
+ }
+
+ // Construct and execute the thread.
+ if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
+ goto error;
+
+ // Wait for the thread and clean up.
+ ::pthread_join(Thread, nullptr);
+
+error:
+ ::pthread_attr_destroy(&Attr);
+}
+
+
+uint64_t llvm::get_threadid() {
+#if defined(__APPLE__)
+ // Calling "mach_thread_self()" bumps the reference count on the thread
+ // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
+ // count.
+ thread_port_t Self = mach_thread_self();
+ mach_port_deallocate(mach_task_self(), Self);
+ return Self;
+#elif defined(__FreeBSD__)
+ return uint64_t(pthread_getthreadid_np());
+#elif defined(__NetBSD__)
+ return uint64_t(_lwp_self());
+#elif defined(__ANDROID__)
+ return uint64_t(gettid());
+#elif defined(__linux__)
+ return uint64_t(syscall(SYS_gettid));
+#elif defined(LLVM_ON_WIN32)
+ return uint64_t(::GetCurrentThreadId());
+#else
+ return uint64_t(pthread_self());
+#endif
+}
+
+
+static constexpr uint32_t get_max_thread_name_length_impl() {
+#if defined(__NetBSD__)
+ return PTHREAD_MAX_NAMELEN_NP;
+#elif defined(__APPLE__)
+ return 64;
+#elif defined(__linux__)
+#if HAVE_PTHREAD_SETNAME_NP
+ return 16;
+#else
+ return 0;
+#endif
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+ return 16;
+#else
+ return 0;
+#endif
+}
+
+uint32_t llvm::get_max_thread_name_length() {
+ return get_max_thread_name_length_impl();
+}
+
+void llvm::set_thread_name(const Twine &Name) {
+ // Make sure the input is null terminated.
+ SmallString<64> Storage;
+ StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
+
+ // Truncate from the beginning, not the end, if the specified name is too
+ // long. For one, this ensures that the resulting string is still null
+ // terminated, but additionally the end of a long thread name will usually
+ // be more unique than the beginning, since a common pattern is for similar
+ // threads to share a common prefix.
+ if (get_max_thread_name_length() > 0)
+ NameStr = NameStr.take_back(get_max_thread_name_length());
+ (void)NameStr;
+#if defined(__linux__)
+#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
+#if HAVE_PTHREAD_SETNAME_NP
+ ::pthread_setname_np(::pthread_self(), NameStr.data());
+#endif
+#endif
+#elif defined(__FreeBSD__)
+ ::pthread_set_name_np(::pthread_self(), NameStr.data());
+#elif defined(__NetBSD__)
+ ::pthread_setname_np(::pthread_self(), "%s",
+ const_cast<char *>(NameStr.data()));
+#elif defined(__APPLE__)
+ ::pthread_setname_np(NameStr.data());
+#endif
+}
+
+void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
+ Name.clear();
+
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+ int pid = ::getpid();
+ uint64_t tid = get_threadid();
+
+ struct kinfo_proc *kp = nullptr, *nkp;
+ size_t len = 0;
+ int error;
+ int ctl[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
+ (int)pid };
+
+ while (1) {
+ error = sysctl(ctl, 4, kp, &len, nullptr, 0);
+ if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
+ // Add extra space in case threads are added before next call.
+ len += sizeof(*kp) + len / 10;
+ nkp = (struct kinfo_proc *)realloc(kp, len);
+ if (nkp == nullptr) {
+ free(kp);
+ return;
+ }
+ kp = nkp;
+ continue;
+ }
+ if (error != 0)
+ len = 0;
+ break;
+ }
+
+ for (size_t i = 0; i < len / sizeof(*kp); i++) {
+ if (kp[i].ki_tid == (lwpid_t)tid) {
+ Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
+ break;
+ }
+ }
+ free(kp);
+ return;
+#elif defined(__NetBSD__)
+ constexpr uint32_t len = get_max_thread_name_length_impl();
+ char buf[len];
+ ::pthread_getname_np(::pthread_self(), buf, len);
+
+ Name.append(buf, buf + strlen(buf));
+#elif defined(__linux__)
+#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
+#if HAVE_PTHREAD_GETNAME_NP
+ constexpr uint32_t len = get_max_thread_name_length_impl();
+ char Buffer[len];
+ if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
+ Name.append(Buffer, Buffer + strlen(Buffer));
+#endif
+#endif
+#endif
+}
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 050689483deb..709499deeafa 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -24,7 +24,6 @@
#endif
namespace llvm {
-using namespace sys;
//===----------------------------------------------------------------------===//
//=== WARNING: Implementation here must contain only Win32 specific code
@@ -33,7 +32,7 @@ using namespace sys;
typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID);
static fpEnumerateLoadedModules fEnumerateLoadedModules;
-static DenseSet<HMODULE> *OpenedHandles;
+static llvm::ManagedStatic<DenseSet<HMODULE> > OpenedHandles;
static bool loadDebugHelp(void) {
HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
@@ -51,15 +50,13 @@ ELM_Callback(PCSTR ModuleName, DWORD64 ModuleBase,
return TRUE;
}
-DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
- std::string *errMsg) {
+sys::DynamicLibrary
+sys::DynamicLibrary::getPermanentLibrary(const char *filename,
+ std::string *errMsg) {
SmartScopedLock<true> lock(*SymbolsMutex);
if (!filename) {
// When no file is specified, enumerate all DLLs and EXEs in the process.
- if (OpenedHandles == 0)
- OpenedHandles = new DenseSet<HMODULE>();
-
if (!fEnumerateLoadedModules) {
if (!loadDebugHelp()) {
assert(false && "These APIs should always be available");
@@ -79,7 +76,7 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16");
return DynamicLibrary();
}
-
+
HMODULE a_handle = LoadLibraryW(filenameUnicode.data());
if (a_handle == 0) {
@@ -87,9 +84,6 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
return DynamicLibrary();
}
- if (OpenedHandles == 0)
- OpenedHandles = new DenseSet<HMODULE>();
-
// If we've already loaded this library, FreeLibrary() the handle in order to
// keep the internal refcount at +1.
if (!OpenedHandles->insert(a_handle).second)
@@ -98,6 +92,18 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
return DynamicLibrary(a_handle);
}
+sys::DynamicLibrary
+sys::DynamicLibrary::addPermanentLibrary(void *handle, std::string *errMsg) {
+ SmartScopedLock<true> lock(*SymbolsMutex);
+ // If we've already loaded this library, tell the caller.
+ if (!OpenedHandles->insert((HMODULE)handle).second) {
+ MakeErrMsg(errMsg, "Library already loaded");
+ return DynamicLibrary();
+ }
+
+ return DynamicLibrary(handle);
+}
+
// Stack probing routines are in the support library (e.g. libgcc), but we don't
// have dynamic linking on windows. Provide a hook.
#define EXPLICIT_SYMBOL(SYM) \
@@ -123,7 +129,7 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
#undef INLINE_DEF_SYMBOL1
#undef INLINE_DEF_SYMBOL2
-void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+void *sys::DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
SmartScopedLock<true> Lock(*SymbolsMutex);
// First check symbols added via AddSymbol().
@@ -135,7 +141,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
}
// Now search the libraries.
- if (OpenedHandles) {
+ if (OpenedHandles.isConstructed()) {
for (DenseSet<HMODULE>::iterator I = OpenedHandles->begin(),
E = OpenedHandles->end(); I != E; ++I) {
FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
@@ -171,7 +177,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
return 0;
}
-void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
+void *sys::DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
if (!isValid())
return NULL;
if (Data == &OpenedHandles)
diff --git a/lib/Support/Windows/Mutex.inc b/lib/Support/Windows/Mutex.inc
index ab79d079122f..0af145ec9a4e 100644
--- a/lib/Support/Windows/Mutex.inc
+++ b/lib/Support/Windows/Mutex.inc
@@ -20,15 +20,14 @@
#include "llvm/Support/Mutex.h"
namespace llvm {
-using namespace sys;
-MutexImpl::MutexImpl(bool /*recursive*/)
+sys::MutexImpl::MutexImpl(bool /*recursive*/)
{
data_ = new CRITICAL_SECTION;
InitializeCriticalSection((LPCRITICAL_SECTION)data_);
}
-MutexImpl::~MutexImpl()
+sys::MutexImpl::~MutexImpl()
{
DeleteCriticalSection((LPCRITICAL_SECTION)data_);
delete (LPCRITICAL_SECTION)data_;
@@ -36,21 +35,21 @@ MutexImpl::~MutexImpl()
}
bool
-MutexImpl::acquire()
+sys::MutexImpl::acquire()
{
EnterCriticalSection((LPCRITICAL_SECTION)data_);
return true;
}
bool
-MutexImpl::release()
+sys::MutexImpl::release()
{
LeaveCriticalSection((LPCRITICAL_SECTION)data_);
return true;
}
bool
-MutexImpl::tryacquire()
+sys::MutexImpl::tryacquire()
{
return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
}
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 27b250b428a5..b00d3905f658 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -26,6 +26,7 @@
// These two headers must be included last, and make sure shlobj is required
// after Windows.h to make sure it picks up our definition of _WIN32_WINNT
#include "WindowsSupport.h"
+#include <shellapi.h>
#include <shlobj.h>
#undef max
@@ -178,6 +179,10 @@ TimePoint<> file_status::getLastModificationTime() const {
return toTimePoint(Time);
}
+uint32_t file_status::getLinkCount() const {
+ return NumLinks;
+}
+
std::error_code current_path(SmallVectorImpl<char> &result) {
SmallVector<wchar_t, MAX_PATH> cur_path;
DWORD len = MAX_PATH;
@@ -200,6 +205,18 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
}
+std::error_code set_current_path(const Twine &path) {
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> wide_path;
+ if (std::error_code ec = widenPath(path, wide_path))
+ return ec;
+
+ if (!::SetCurrentDirectoryW(wide_path.begin()))
+ return mapWindowsError(::GetLastError());
+
+ return std::error_code();
+}
+
std::error_code create_directory(const Twine &path, bool IgnoreExisting,
perms Perms) {
SmallVector<wchar_t, 128> path_utf16;
@@ -265,6 +282,80 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
return std::error_code();
}
+static std::error_code is_local_internal(SmallVectorImpl<wchar_t> &Path,
+ bool &Result) {
+ SmallVector<wchar_t, 128> VolumePath;
+ size_t Len = 128;
+ while (true) {
+ VolumePath.resize(Len);
+ BOOL Success =
+ ::GetVolumePathNameW(Path.data(), VolumePath.data(), VolumePath.size());
+
+ if (Success)
+ break;
+
+ DWORD Err = ::GetLastError();
+ if (Err != ERROR_INSUFFICIENT_BUFFER)
+ return mapWindowsError(Err);
+
+ Len *= 2;
+ }
+ // If the output buffer has exactly enough space for the path name, but not
+ // the null terminator, it will leave the output unterminated. Push a null
+ // terminator onto the end to ensure that this never happens.
+ VolumePath.push_back(L'\0');
+ VolumePath.set_size(wcslen(VolumePath.data()));
+ const wchar_t *P = VolumePath.data();
+
+ UINT Type = ::GetDriveTypeW(P);
+ switch (Type) {
+ case DRIVE_FIXED:
+ Result = true;
+ return std::error_code();
+ case DRIVE_REMOTE:
+ case DRIVE_CDROM:
+ case DRIVE_RAMDISK:
+ case DRIVE_REMOVABLE:
+ Result = false;
+ return std::error_code();
+ default:
+ return make_error_code(errc::no_such_file_or_directory);
+ }
+ llvm_unreachable("Unreachable!");
+}
+
+std::error_code is_local(const Twine &path, bool &result) {
+ if (!llvm::sys::fs::exists(path) || !llvm::sys::path::has_root_path(path))
+ return make_error_code(errc::no_such_file_or_directory);
+
+ SmallString<128> Storage;
+ StringRef P = path.toStringRef(Storage);
+
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> WidePath;
+ if (std::error_code ec = widenPath(P, WidePath))
+ return ec;
+ return is_local_internal(WidePath, result);
+}
+
+std::error_code is_local(int FD, bool &Result) {
+ SmallVector<wchar_t, 128> FinalPath;
+ HANDLE Handle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
+
+ size_t Len = 128;
+ do {
+ FinalPath.reserve(Len);
+ Len = ::GetFinalPathNameByHandleW(Handle, FinalPath.data(),
+ FinalPath.capacity() - 1, VOLUME_NAME_NT);
+ if (Len == 0)
+ return mapWindowsError(::GetLastError());
+ } while (Len > FinalPath.capacity());
+
+ FinalPath.set_size(Len);
+
+ return is_local_internal(FinalPath, Result);
+}
+
std::error_code rename(const Twine &from, const Twine &to) {
// Convert to utf-16.
SmallVector<wchar_t, 128> wide_from;
@@ -443,13 +534,16 @@ static std::error_code getStatus(HANDLE FileHandle, file_status &Result) {
file_type Type = (Info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
? file_type::directory_file
: file_type::regular_file;
- Result =
- file_status(Type, Info.ftLastAccessTime.dwHighDateTime,
- Info.ftLastAccessTime.dwLowDateTime,
- Info.ftLastWriteTime.dwHighDateTime,
- Info.ftLastWriteTime.dwLowDateTime,
- Info.dwVolumeSerialNumber, Info.nFileSizeHigh,
- Info.nFileSizeLow, Info.nFileIndexHigh, Info.nFileIndexLow);
+ perms Permissions = (Info.dwFileAttributes & FILE_ATTRIBUTE_READONLY)
+ ? (all_read | all_exe)
+ : all_all;
+ Result = file_status(
+ Type, Permissions, Info.nNumberOfLinks,
+ Info.ftLastAccessTime.dwHighDateTime,
+ Info.ftLastAccessTime.dwLowDateTime,
+ Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime,
+ Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow,
+ Info.nFileIndexHigh, Info.nFileIndexLow);
return std::error_code();
}
@@ -465,7 +559,7 @@ handle_status_error:
return mapWindowsError(LastError);
}
-std::error_code status(const Twine &path, file_status &result) {
+std::error_code status(const Twine &path, file_status &result, bool Follow) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
@@ -482,28 +576,19 @@ std::error_code status(const Twine &path, file_status &result) {
if (attr == INVALID_FILE_ATTRIBUTES)
return getStatus(INVALID_HANDLE_VALUE, result);
+ DWORD Flags = FILE_FLAG_BACKUP_SEMANTICS;
// Handle reparse points.
- if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
- ScopedFileHandle h(
- ::CreateFileW(path_utf16.begin(),
- 0, // Attributes only.
- FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- NULL,
- OPEN_EXISTING,
- FILE_FLAG_BACKUP_SEMANTICS,
- 0));
- if (!h)
- return getStatus(INVALID_HANDLE_VALUE, result);
- }
+ if (!Follow && (attr & FILE_ATTRIBUTE_REPARSE_POINT))
+ Flags |= FILE_FLAG_OPEN_REPARSE_POINT;
ScopedFileHandle h(
::CreateFileW(path_utf16.begin(), 0, // Attributes only.
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
- NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0));
- if (!h)
- return getStatus(INVALID_HANDLE_VALUE, result);
+ NULL, OPEN_EXISTING, Flags, 0));
+ if (!h)
+ return getStatus(INVALID_HANDLE_VALUE, result);
- return getStatus(h, result);
+ return getStatus(h, result);
}
std::error_code status(int FD, file_status &Result) {
@@ -511,6 +596,37 @@ std::error_code status(int FD, file_status &Result) {
return getStatus(FileHandle, Result);
}
+std::error_code setPermissions(const Twine &Path, perms Permissions) {
+ SmallVector<wchar_t, 128> PathUTF16;
+ if (std::error_code EC = widenPath(Path, PathUTF16))
+ return EC;
+
+ DWORD Attributes = ::GetFileAttributesW(PathUTF16.begin());
+ if (Attributes == INVALID_FILE_ATTRIBUTES)
+ return mapWindowsError(GetLastError());
+
+ // There are many Windows file attributes that are not to do with the file
+ // permissions (e.g. FILE_ATTRIBUTE_HIDDEN). We need to be careful to preserve
+ // them.
+ if (Permissions & all_write) {
+ Attributes &= ~FILE_ATTRIBUTE_READONLY;
+ if (Attributes == 0)
+ // FILE_ATTRIBUTE_NORMAL indicates no other attributes are set.
+ Attributes |= FILE_ATTRIBUTE_NORMAL;
+ }
+ else {
+ Attributes |= FILE_ATTRIBUTE_READONLY;
+ // FILE_ATTRIBUTE_NORMAL is not compatible with any other attributes, so
+ // remove it, if it is present.
+ Attributes &= ~FILE_ATTRIBUTE_NORMAL;
+ }
+
+ if (!::SetFileAttributesW(PathUTF16.begin(), Attributes))
+ return mapWindowsError(GetLastError());
+
+ return std::error_code();
+}
+
std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
FILETIME FT = toFILETIME(Time);
HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
@@ -616,7 +732,8 @@ int mapped_file_region::alignment() {
}
std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
- StringRef path){
+ StringRef path,
+ bool follow_symlinks) {
SmallVector<wchar_t, 128> path_utf16;
if (std::error_code ec = widenPath(path, path_utf16))
@@ -661,7 +778,7 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
it.IterationHandle = intptr_t(FindHandle.take());
SmallString<128> directory_entry_path(path);
path::append(directory_entry_path, directory_entry_name_utf8);
- it.CurrentEntry = directory_entry(directory_entry_path);
+ it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks);
return std::error_code();
}
@@ -701,6 +818,52 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
return std::error_code();
}
+static std::error_code realPathFromHandle(HANDLE H,
+ SmallVectorImpl<char> &RealPath) {
+ RealPath.clear();
+ llvm::SmallVector<wchar_t, MAX_PATH> Buffer;
+ DWORD CountChars = ::GetFinalPathNameByHandleW(
+ H, Buffer.begin(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED);
+ if (CountChars > Buffer.capacity()) {
+ // The buffer wasn't big enough, try again. In this case the return value
+ // *does* indicate the size of the null terminator.
+ Buffer.reserve(CountChars);
+ CountChars = ::GetFinalPathNameByHandleW(
+ H, Buffer.data(), Buffer.capacity() - 1, FILE_NAME_NORMALIZED);
+ }
+ if (CountChars == 0)
+ return mapWindowsError(GetLastError());
+
+ const wchar_t *Data = Buffer.data();
+ if (CountChars >= 4) {
+ if (0 == ::memcmp(Data, L"\\\\?\\", 8)) {
+ CountChars -= 4;
+ Data += 4;
+ }
+ }
+
+ // Convert the result from UTF-16 to UTF-8.
+ return UTF16ToUTF8(Data, CountChars, RealPath);
+}
+
+static std::error_code directoryRealPath(const Twine &Name,
+ SmallVectorImpl<char> &RealPath) {
+ SmallVector<wchar_t, 128> PathUTF16;
+
+ if (std::error_code EC = widenPath(Name, PathUTF16))
+ return EC;
+
+ HANDLE H =
+ ::CreateFileW(PathUTF16.begin(), GENERIC_READ,
+ FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+ NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
+ if (H == INVALID_HANDLE_VALUE)
+ return mapWindowsError(GetLastError());
+ std::error_code EC = realPathFromHandle(H, RealPath);
+ ::CloseHandle(H);
+ return EC;
+}
+
std::error_code openFileForRead(const Twine &Name, int &ResultFD,
SmallVectorImpl<char> *RealPath) {
SmallVector<wchar_t, 128> PathUTF16;
@@ -732,20 +895,8 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD,
}
// Fetch the real name of the file, if the user asked
- if (RealPath) {
- RealPath->clear();
- wchar_t RealPathUTF16[MAX_PATH];
- DWORD CountChars =
- ::GetFinalPathNameByHandleW(H, RealPathUTF16, MAX_PATH,
- FILE_NAME_NORMALIZED);
- if (CountChars > 0 && CountChars < MAX_PATH) {
- // Convert the result from UTF-16 to UTF-8.
- SmallString<MAX_PATH> RealPathUTF8;
- if (!UTF16ToUTF8(RealPathUTF16, CountChars, RealPathUTF8))
- RealPath->append(RealPathUTF8.data(),
- RealPathUTF8.data() + strlen(RealPathUTF8.data()));
- }
- }
+ if (RealPath)
+ realPathFromHandle(H, *RealPath);
ResultFD = FD;
return std::error_code();
@@ -843,6 +994,81 @@ std::error_code getPathFromOpenFD(int FD, SmallVectorImpl<char> &ResultPath) {
return windows::UTF16ToUTF8(TempPath.data(), CharCount, ResultPath);
}
+
+std::error_code remove_directories(const Twine &path, bool IgnoreErrors) {
+ // Convert to utf-16.
+ SmallVector<wchar_t, 128> Path16;
+ std::error_code EC = widenPath(path, Path16);
+ if (EC && !IgnoreErrors)
+ return EC;
+
+ // SHFileOperation() accepts a list of paths, and so must be double null-
+ // terminated to indicate the end of the list. The buffer is already null
+ // terminated, but since that null character is not considered part of the
+ // vector's size, pushing another one will just consume that byte. So we
+ // need to push 2 null terminators.
+ Path16.push_back(0);
+ Path16.push_back(0);
+
+ SHFILEOPSTRUCTW shfos = {};
+ shfos.wFunc = FO_DELETE;
+ shfos.pFrom = Path16.data();
+ shfos.fFlags = FOF_NO_UI;
+
+ int result = ::SHFileOperationW(&shfos);
+ if (result != 0 && !IgnoreErrors)
+ return mapWindowsError(result);
+ return std::error_code();
+}
+
+static void expandTildeExpr(SmallVectorImpl<char> &Path) {
+ // Path does not begin with a tilde expression.
+ if (Path.empty() || Path[0] != '~')
+ return;
+
+ StringRef PathStr(Path.begin(), Path.size());
+ PathStr = PathStr.drop_front();
+ StringRef Expr = PathStr.take_until([](char c) { return path::is_separator(c); });
+
+ if (!Expr.empty()) {
+ // This is probably a ~username/ expression. Don't support this on Windows.
+ return;
+ }
+
+ SmallString<128> HomeDir;
+ if (!path::home_directory(HomeDir)) {
+ // For some reason we couldn't get the home directory. Just exit.
+ return;
+ }
+
+ // Overwrite the first character and insert the rest.
+ Path[0] = HomeDir[0];
+ Path.insert(Path.begin() + 1, HomeDir.begin() + 1, HomeDir.end());
+}
+
+std::error_code real_path(const Twine &path, SmallVectorImpl<char> &dest,
+ bool expand_tilde) {
+ dest.clear();
+ if (path.isTriviallyEmpty())
+ return std::error_code();
+
+ if (expand_tilde) {
+ SmallString<128> Storage;
+ path.toVector(Storage);
+ expandTildeExpr(Storage);
+ return real_path(Storage, dest, false);
+ }
+
+ if (is_directory(path))
+ return directoryRealPath(path, dest);
+
+ int fd;
+ if (std::error_code EC = llvm::sys::fs::openFileForRead(path, fd, &dest))
+ return EC;
+ ::close(fd);
+ return std::error_code();
+}
+
} // end namespace fs
namespace path {
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 8d646b3217a0..18aef610d54a 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -47,7 +47,6 @@
#endif
using namespace llvm;
-using namespace sys;
// This function retrieves the page size using GetNativeSystemInfo() and is
// present solely so it can be called once to initialize the self_process member
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 78fc538bd9bf..721167da5b15 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -29,7 +29,6 @@
//===----------------------------------------------------------------------===//
namespace llvm {
-using namespace sys;
ProcessInfo::ProcessInfo() : ProcessHandle(0), Pid(0), ReturnCode(0) {}
diff --git a/lib/Support/Windows/RWMutex.inc b/lib/Support/Windows/RWMutex.inc
index 2d1d25f67b8a..ac60c2fc05be 100644
--- a/lib/Support/Windows/RWMutex.inc
+++ b/lib/Support/Windows/RWMutex.inc
@@ -19,7 +19,6 @@
#include "WindowsSupport.h"
namespace llvm {
-using namespace sys;
// Windows has slim read-writer lock support on Vista and higher, so we
// will attempt to load the APIs. If they exist, we will use them, and
@@ -73,7 +72,7 @@ static bool loadSRW() {
return sHasSRW;
}
-RWMutexImpl::RWMutexImpl() {
+sys::RWMutexImpl::RWMutexImpl() {
if (loadSRW()) {
data_ = calloc(1, sizeof(SRWLOCK));
fpInitializeSRWLock(static_cast<PSRWLOCK>(data_));
@@ -83,14 +82,14 @@ RWMutexImpl::RWMutexImpl() {
}
}
-RWMutexImpl::~RWMutexImpl() {
+sys::RWMutexImpl::~RWMutexImpl() {
if (!sHasSRW)
DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
// Nothing to do in the case of slim reader/writers except free the memory.
free(data_);
}
-bool RWMutexImpl::reader_acquire() {
+bool sys::RWMutexImpl::reader_acquire() {
if (sHasSRW) {
fpAcquireSRWLockShared(static_cast<PSRWLOCK>(data_));
} else {
@@ -99,7 +98,7 @@ bool RWMutexImpl::reader_acquire() {
return true;
}
-bool RWMutexImpl::reader_release() {
+bool sys::RWMutexImpl::reader_release() {
if (sHasSRW) {
fpReleaseSRWLockShared(static_cast<PSRWLOCK>(data_));
} else {
@@ -108,7 +107,7 @@ bool RWMutexImpl::reader_release() {
return true;
}
-bool RWMutexImpl::writer_acquire() {
+bool sys::RWMutexImpl::writer_acquire() {
if (sHasSRW) {
fpAcquireSRWLockExclusive(static_cast<PSRWLOCK>(data_));
} else {
@@ -117,7 +116,7 @@ bool RWMutexImpl::writer_acquire() {
return true;
}
-bool RWMutexImpl::writer_release() {
+bool sys::RWMutexImpl::writer_release() {
if (sHasSRW) {
fpReleaseSRWLockExclusive(static_cast<PSRWLOCK>(data_));
} else {
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index f739421eece4..1ef51888baf3 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -776,7 +776,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
// the nasty sorts of crashes that aren't 100% reproducible from a set of
// inputs (or in the event that the user is unable or unwilling to provide a
// reproducible case).
- if (!llvm::Process::AreCoreFilesPrevented()) {
+ if (!llvm::sys::Process::AreCoreFilesPrevented()) {
MINIDUMP_EXCEPTION_INFORMATION ExceptionInfo;
ExceptionInfo.ThreadId = ::GetCurrentThreadId();
ExceptionInfo.ExceptionPointers = ep;
diff --git a/lib/Support/Windows/ThreadLocal.inc b/lib/Support/Windows/ThreadLocal.inc
index b9cb8ff9836e..8be1c3ecfbb9 100644
--- a/lib/Support/Windows/ThreadLocal.inc
+++ b/lib/Support/Windows/ThreadLocal.inc
@@ -20,33 +20,32 @@
#include "llvm/Support/ThreadLocal.h"
namespace llvm {
-using namespace sys;
-ThreadLocalImpl::ThreadLocalImpl() : data() {
+sys::ThreadLocalImpl::ThreadLocalImpl() : data() {
static_assert(sizeof(DWORD) <= sizeof(data), "size too big");
DWORD* tls = reinterpret_cast<DWORD*>(&data);
*tls = TlsAlloc();
assert(*tls != TLS_OUT_OF_INDEXES);
}
-ThreadLocalImpl::~ThreadLocalImpl() {
+sys::ThreadLocalImpl::~ThreadLocalImpl() {
DWORD* tls = reinterpret_cast<DWORD*>(&data);
TlsFree(*tls);
}
-void *ThreadLocalImpl::getInstance() {
+void *sys::ThreadLocalImpl::getInstance() {
DWORD* tls = reinterpret_cast<DWORD*>(&data);
return TlsGetValue(*tls);
}
-void ThreadLocalImpl::setInstance(const void* d){
+void sys::ThreadLocalImpl::setInstance(const void* d){
DWORD* tls = reinterpret_cast<DWORD*>(&data);
int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
assert(errorcode != 0);
(void)errorcode;
}
-void ThreadLocalImpl::removeInstance() {
+void sys::ThreadLocalImpl::removeInstance() {
setInstance(0);
}
diff --git a/lib/Support/Windows/Threading.inc b/lib/Support/Windows/Threading.inc
new file mode 100644
index 000000000000..decb48887af2
--- /dev/null
+++ b/lib/Support/Windows/Threading.inc
@@ -0,0 +1,109 @@
+//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of Threading functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+
+#include "Windows/WindowsSupport.h"
+#include <process.h>
+
+// Windows will at times define MemoryFence.
+#ifdef MemoryFence
+#undef MemoryFence
+#endif
+
+namespace {
+ struct ThreadInfo {
+ void(*func)(void*);
+ void *param;
+ };
+}
+
+static unsigned __stdcall ThreadCallback(void *param) {
+ struct ThreadInfo *info = reinterpret_cast<struct ThreadInfo *>(param);
+ info->func(info->param);
+
+ return 0;
+}
+
+void llvm::llvm_execute_on_thread(void(*Fn)(void*), void *UserData,
+ unsigned RequestedStackSize) {
+ struct ThreadInfo param = { Fn, UserData };
+
+ HANDLE hThread = (HANDLE)::_beginthreadex(NULL,
+ RequestedStackSize, ThreadCallback,
+ &param, 0, NULL);
+
+ if (hThread) {
+ // We actually don't care whether the wait succeeds or fails, in
+ // the same way we don't care whether the pthread_join call succeeds
+ // or fails. There's not much we could do if this were to fail. But
+ // on success, this call will wait until the thread finishes executing
+ // before returning.
+ (void)::WaitForSingleObject(hThread, INFINITE);
+ ::CloseHandle(hThread);
+ }
+}
+
+uint64_t llvm::get_threadid() {
+ return uint64_t(::GetCurrentThreadId());
+}
+
+uint32_t llvm::get_max_thread_name_length() { return 0; }
+
+#if defined(_MSC_VER)
+static void SetThreadName(DWORD Id, LPCSTR Name) {
+ constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
+
+#pragma pack(push, 8)
+ struct THREADNAME_INFO {
+ DWORD dwType; // Must be 0x1000.
+ LPCSTR szName; // Pointer to thread name
+ DWORD dwThreadId; // Thread ID (-1 == current thread)
+ DWORD dwFlags; // Reserved. Do not use.
+ };
+#pragma pack(pop)
+
+ THREADNAME_INFO info;
+ info.dwType = 0x1000;
+ info.szName = Name;
+ info.dwThreadId = Id;
+ info.dwFlags = 0;
+
+ __try {
+ ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
+ (ULONG_PTR *)&info);
+ }
+ __except (EXCEPTION_EXECUTE_HANDLER) {
+ }
+}
+#endif
+
+void llvm::set_thread_name(const Twine &Name) {
+#if defined(_MSC_VER)
+ // Make sure the input is null terminated.
+ SmallString<64> Storage;
+ StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
+ SetThreadName(::GetCurrentThreadId(), NameStr.data());
+#endif
+}
+
+void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
+ // "Name" is not an inherent property of a thread on Windows. In fact, when
+ // you "set" the name, you are only firing a one-time message to a debugger
+ // which it interprets as a program setting its threads' name. We may be
+ // able to get fancy by creating a TLS entry when someone calls
+ // set_thread_name so that subsequent calls to get_thread_name return this
+ // value.
+ Name.clear();
+}
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 9849b3aa1ce9..c410b1d56086 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -398,17 +398,10 @@ bool Input::canElideEmptySequence() {
//===----------------------------------------------------------------------===//
Output::Output(raw_ostream &yout, void *context, int WrapColumn)
- : IO(context),
- Out(yout),
- WrapColumn(WrapColumn),
- Column(0),
- ColumnAtFlowStart(0),
- ColumnAtMapFlowStart(0),
- NeedBitValueComma(false),
- NeedFlowSequenceComma(false),
- EnumerationMatchFound(false),
- NeedsNewLine(false) {
-}
+ : IO(context), Out(yout), WrapColumn(WrapColumn), Column(0),
+ ColumnAtFlowStart(0), ColumnAtMapFlowStart(0), NeedBitValueComma(false),
+ NeedFlowSequenceComma(false), EnumerationMatchFound(false),
+ NeedsNewLine(false), WriteDefaultValues(false) {}
Output::~Output() {
}
@@ -462,7 +455,7 @@ std::vector<StringRef> Output::keys() {
bool Output::preflightKey(const char *Key, bool Required, bool SameAsDefault,
bool &UseDefault, void *&) {
UseDefault = false;
- if (Required || !SameAsDefault) {
+ if (Required || !SameAsDefault || WriteDefaultValues) {
auto State = StateStack.back();
if (State == inFlowMapFirstKey || State == inFlowMapOtherKey) {
flowKey(Key);
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index d073802db932..1abc8ed8683d 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -465,8 +465,7 @@ void format_object_base::home() {
static int getFD(StringRef Filename, std::error_code &EC,
sys::fs::OpenFlags Flags) {
// Handle "-" as stdout. Note that when we do this, we consider ourself
- // the owner of stdout. This means that we can do things like close the
- // file descriptor when we're done and set the "binary" flag globally.
+ // the owner of stdout and may set the "binary" flag globally based on Flags.
if (Filename == "-") {
EC = std::error_code();
// If user requested binary then put stdout into binary mode if
@@ -497,6 +496,13 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
ShouldClose = false;
return;
}
+ // We do not want to close STDOUT as there may have been several uses of it
+ // such as the case: llc %s -o=- -pass-remarks-output=- -filetype=asm
+ // which cause multiple closes of STDOUT_FILENO and/or use-after-close of it.
+ // Using dup() in getFD doesn't work as we end up with original STDOUT_FILENO
+ // open anyhow.
+ if (FD <= STDERR_FILENO)
+ ShouldClose = false;
// Get the starting position.
off_t loc = ::lseek(FD, 0, SEEK_CUR);
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index ea9c9a19904e..33d3de5daf33 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -40,7 +40,9 @@ IntRecTy IntRecTy::Shared;
StringRecTy StringRecTy::Shared;
DagRecTy DagRecTy::Shared;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecTy::dump() const { print(errs()); }
+#endif
ListRecTy *RecTy::getListTy() {
if (!ListTy)
@@ -161,7 +163,9 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
//===----------------------------------------------------------------------===//
void Init::anchor() { }
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); }
+#endif
UnsetInit *UnsetInit::get() {
static UnsetInit TheInit;
@@ -1591,7 +1595,9 @@ StringRef RecordVal::getName() const {
return cast<StringInit>(getNameInit())->getValue();
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecordVal::dump() const { errs() << *this; }
+#endif
void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
if (getPrefix()) OS << "field ";
@@ -1673,7 +1679,9 @@ void Record::resolveReferencesTo(const RecordVal *RV) {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void Record::dump() const { errs() << *this; }
+#endif
raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
OS << R.getNameInitAsString();
@@ -1865,6 +1873,7 @@ DagInit *Record::getValueAsDag(StringRef FieldName) const {
FieldName + "' does not have a dag initializer!");
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MultiClass::dump() const {
errs() << "Record:\n";
Rec.dump();
@@ -1875,6 +1884,7 @@ LLVM_DUMP_METHOD void MultiClass::dump() const {
}
LLVM_DUMP_METHOD void RecordKeeper::dump() const { errs() << *this; }
+#endif
raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
OS << "------------- Classes -----------------\n";
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 1a91b37b742b..96015b06d798 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -54,6 +54,7 @@ struct SubMultiClassReference {
void dump() const;
};
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
errs() << "Multiclass:\n";
@@ -63,6 +64,7 @@ LLVM_DUMP_METHOD void SubMultiClassReference::dump() const {
for (Init *TA : TemplateArgs)
TA->dump();
}
+#endif
} // end namespace llvm
@@ -945,7 +947,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
else if (ListInit *Arg0 = dyn_cast<ListInit>(InitList[0]))
Type = Arg0->getType();
else {
- InitList[0]->dump();
+ InitList[0]->print(errs());
Error(OpLoc, "expected a list");
return nullptr;
}
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index fd106a8d9b0b..b44b13e36e15 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -22,8 +22,11 @@
namespace llvm {
+class AArch64RegisterBankInfo;
+class AArch64Subtarget;
class AArch64TargetMachine;
class FunctionPass;
+class InstructionSelector;
class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
@@ -45,6 +48,9 @@ FunctionPass *createAArch64A53Fix835769();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
FunctionPass *createAArch64CollectLOHPass();
+InstructionSelector *
+createAArch64InstructionSelector(const AArch64TargetMachine &,
+ AArch64Subtarget &, AArch64RegisterBankInfo &);
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 91c335fac32d..519ca2894683 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -27,7 +27,7 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
- "Enable cryptographic instructions">;
+ "Enable cryptographic instructions", [FeatureNEON]>;
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
@@ -38,6 +38,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+def FeatureRDM : SubtargetFeature<"rdm", "HasRDM", "true",
+ "Enable ARMv8.1 Rounding Double Multiply Add/Subtract instructions">;
+
def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
"Enable ARMv8 PMUv3 Performance Monitors extension">;
@@ -100,6 +103,14 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;
+def FeatureFuseAES : SubtargetFeature<
+ "fuse-aes", "HasFuseAES", "true",
+ "CPU fuses AES crypto operations">;
+
+def FeatureFuseLiterals : SubtargetFeature<
+ "fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
@@ -108,12 +119,22 @@ def FeatureUseRSqrt : SubtargetFeature<
"use-reciprocal-square-root", "UseRSqrt", "true",
"Use the reciprocal square root approximation">;
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
+
+def FeatureLSLFast : SubtargetFeature<
+ "lsl-fast", "HasLSLFast", "true",
+ "CPU has a fastpath logical shift of up to 3 places">;
//===----------------------------------------------------------------------===//
// Architectures.
//
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
- "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE]>;
+ "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
@@ -123,6 +144,7 @@ def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
//===----------------------------------------------------------------------===//
include "AArch64RegisterInfo.td"
+include "AArch64RegisterBanks.td"
include "AArch64CallingConvention.td"
//===----------------------------------------------------------------------===//
@@ -149,7 +171,8 @@ include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
include "AArch64SchedKryo.td"
include "AArch64SchedM1.td"
-include "AArch64SchedVulcan.td"
+include "AArch64SchedThunderX.td"
+include "AArch64SchedThunderX2T99.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors", [
@@ -180,6 +203,8 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -226,6 +251,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
@@ -256,7 +282,8 @@ def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureZCZeroing
+ FeatureZCZeroing,
+ FeatureLSLFast
]>;
def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
@@ -269,19 +296,66 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureZCZeroing
+ FeatureRDM,
+ FeatureZCZeroing,
+ FeatureLSLFast
]>;
-def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
- "Broadcom Vulcan processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureArithmeticBccFusion,
- FeatureNEON,
- FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- HasV8_1aOps]>;
+def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
+ "ThunderX2T99",
+ "Cavium ThunderX2 processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureArithmeticBccFusion,
+ FeatureNEON,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureLSE,
+ HasV8_1aOps]>;
+
+def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
+ "ThunderXT88",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
+ "ThunderXT81",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
+
+def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
+ "ThunderXT83",
+ "Cavium ThunderX processors", [
+ FeatureCRC,
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureNEON]>;
def : ProcessorModel<"generic", NoSchedModel, [
FeatureCRC,
@@ -291,11 +365,11 @@ def : ProcessorModel<"generic", NoSchedModel, [
FeaturePostRAScheduler
]>;
-// FIXME: Cortex-A35 is currently modelled as a Cortex-A53
+// FIXME: Cortex-A35 is currently modeled as a Cortex-A53.
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-// FIXME: Cortex-A72 and Cortex-A73 are currently modelled as an Cortex-A57.
+// FIXME: Cortex-A72 and Cortex-A73 are currently modeled as a Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
@@ -304,7 +378,13 @@ def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>;
def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
-def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>;
+// Cavium ThunderX/ThunderX T8X Processors
+def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
+def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>;
+def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
+def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
+// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
+def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 0aa597bcdc56..4a7e0b2b803e 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -493,43 +493,30 @@ bool AArch64A57FPLoadBalancing::colorChainSet(std::vector<Chain*> GV,
int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
MachineBasicBlock &MBB) {
- RegScavenger RS;
- RS.enterBasicBlock(MBB);
- RS.forward(MachineBasicBlock::iterator(G->getStart()));
-
// Can we find an appropriate register that is available throughout the life
- // of the chain?
- unsigned RegClassID = G->getStart()->getDesc().OpInfo[0].RegClass;
- BitVector AvailableRegs = RS.getRegsAvailable(TRI->getRegClass(RegClassID));
- for (MachineBasicBlock::iterator I = G->begin(), E = G->end(); I != E; ++I) {
- RS.forward(I);
- AvailableRegs &= RS.getRegsAvailable(TRI->getRegClass(RegClassID));
-
- // Remove any registers clobbered by a regmask or any def register that is
- // immediately dead.
- for (auto J : I->operands()) {
- if (J.isRegMask())
- AvailableRegs.clearBitsNotInMask(J.getRegMask());
-
- if (J.isReg() && J.isDef()) {
- MCRegAliasIterator AI(J.getReg(), TRI, /*IncludeSelf=*/true);
- if (J.isDead())
- for (; AI.isValid(); ++AI)
- AvailableRegs.reset(*AI);
-#ifndef NDEBUG
- else
- for (; AI.isValid(); ++AI)
- assert(!AvailableRegs[*AI] &&
- "Non-dead def should have been removed by now!");
-#endif
- }
- }
+ // of the chain? Simulate liveness backwards until the end of the chain.
+ LiveRegUnits Units(*TRI);
+ Units.addLiveOuts(MBB);
+ MachineBasicBlock::iterator I = MBB.end();
+ MachineBasicBlock::iterator ChainEnd = G->end();
+ while (I != ChainEnd) {
+ --I;
+ Units.stepBackward(*I);
}
+ // Check which register units are alive throughout the chain.
+ MachineBasicBlock::iterator ChainBegin = G->begin();
+ assert(ChainBegin != ChainEnd && "Chain should contain instructions");
+ do {
+ --I;
+ Units.accumulateBackward(*I);
+ } while (I != ChainBegin);
+
// Make sure we allocate in-order, to get the cheapest registers first.
+ unsigned RegClassID = ChainBegin->getDesc().OpInfo[0].RegClass;
auto Ord = RCI.getOrder(TRI->getRegClass(RegClassID));
for (auto Reg : Ord) {
- if (!AvailableRegs[Reg])
+ if (!Units.available(Reg))
continue;
if (C == getColor(Reg))
return Reg;
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 0cbb2db1134a..e1b8ee6d03c3 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -31,16 +31,23 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
@@ -59,12 +66,12 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
//===----------------------------------------------------------------------===//
namespace {
-class AArch64AddressTypePromotion : public FunctionPass {
+class AArch64AddressTypePromotion : public FunctionPass {
public:
static char ID;
- AArch64AddressTypePromotion()
- : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) {
+
+ AArch64AddressTypePromotion() : FunctionPass(ID) {
initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
}
@@ -76,10 +83,11 @@ public:
private:
/// The current function.
- Function *Func;
+ Function *Func = nullptr;
+
/// Filter out all sexts that does not have this type.
/// Currently initialized with Int64Ty.
- Type *ConsideredSExtType;
+ Type *ConsideredSExtType = nullptr;
// This transformation requires dominator info.
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -129,7 +137,8 @@ private:
void mergeSExts(ValueToInsts &ValToSExtendedUses,
SetOfInstructions &ToRemove);
};
-} // end anonymous namespace.
+
+} // end anonymous namespace
char AArch64AddressTypePromotion::ID = 0;
diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp
index a4950af32097..b2f55a7e1e09 100644
--- a/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/lib/Target/AArch64/AArch64CallLowering.cpp - Call lowering ---===//
+//===--- AArch64CallLowering.cpp - Call lowering --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,15 +15,36 @@
#include "AArch64CallLowering.h"
#include "AArch64ISelLowering.h"
-
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -31,12 +52,12 @@ using namespace llvm;
#endif
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
- : CallLowering(&TLI) {
-}
+ : CallLowering(&TLI) {}
struct IncomingArgHandler : public CallLowering::ValueHandler {
- IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : ValueHandler(MIRBuilder, MRI) {}
+ IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), StackUsed(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -45,6 +66,7 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
MIRBuilder.buildFrameIndex(AddrReg, FI);
+ StackUsed = std::max(StackUsed, Size + Offset);
return AddrReg;
}
@@ -67,11 +89,14 @@ struct IncomingArgHandler : public CallLowering::ValueHandler {
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+
+ uint64_t StackUsed;
};
struct FormalArgHandler : public IncomingArgHandler {
- FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : IncomingArgHandler(MIRBuilder, MRI) {}
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMBB().addLiveIn(PhysReg);
@@ -80,8 +105,8 @@ struct FormalArgHandler : public IncomingArgHandler {
struct CallReturnHandler : public IncomingArgHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB)
- : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : IncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
@@ -92,8 +117,10 @@ struct CallReturnHandler : public IncomingArgHandler {
struct OutgoingArgHandler : public CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder MIB)
- : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn,
+ CCAssignFn *AssignFnVarArg)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
+ AssignFnVarArg(AssignFnVarArg), StackSize(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@@ -126,14 +153,29 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
MIRBuilder.buildStore(ValVReg, Addr, *MMO);
}
+ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info,
+ CCState &State) override {
+ bool Res;
+ if (Info.IsFixed)
+ Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+ else
+ Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State);
+
+ StackSize = State.getNextStackOffset();
+ return Res;
+ }
+
MachineInstrBuilder MIB;
+ CCAssignFn *AssignFnVarArg;
+ uint64_t StackSize;
};
-void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
- SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL,
- MachineRegisterInfo &MRI,
- SplitArgTy PerformArgSplit) const {
+void AArch64CallLowering::splitToValueTypes(
+ const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI,
+ const SplitArgTy &PerformArgSplit) const {
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
@@ -145,7 +187,7 @@ void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags);
+ OrigArg.Flags, OrigArg.IsFixed);
return;
}
@@ -154,19 +196,12 @@ void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
// FIXME: set split flags if they're actually used (e.g. i128 on AAPCS).
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
SplitArgs.push_back(
- ArgInfo{MRI.createGenericVirtualRegister(LLT{*SplitTy, DL}), SplitTy,
- OrigArg.Flags});
+ ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
+ SplitTy, OrigArg.Flags, OrigArg.IsFixed});
}
- SmallVector<uint64_t, 4> BitOffsets;
- for (auto Offset : Offsets)
- BitOffsets.push_back(Offset * 8);
-
- SmallVector<unsigned, 8> SplitRegs;
- for (auto I = &SplitArgs[FirstRegIdx]; I != SplitArgs.end(); ++I)
- SplitRegs.push_back(I->Reg);
-
- PerformArgSplit(SplitRegs, BitOffsets);
+ for (unsigned i = 0; i < Offsets.size(); ++i)
+ PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8);
}
bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -184,16 +219,16 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
auto &DL = F.getParent()->getDataLayout();
ArgInfo OrigArg{VReg, Val->getType()};
- setArgFlags(OrigArg, AttributeSet::ReturnIndex, DL, F);
+ setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 8> SplitArgs;
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildExtract(Regs, Offsets, VReg);
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, VReg, Offset);
});
- OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
- Success = handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler);
+ OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn);
+ Success = handleAssignments(MIRBuilder, SplitArgs, Handler);
}
MIRBuilder.insertInstr(MIB);
@@ -203,7 +238,6 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
- auto &Args = F.getArgumentList();
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -211,13 +245,27 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
unsigned i = 0;
- for (auto &Arg : Args) {
+ for (auto &Arg : F.args()) {
ArgInfo OrigArg{VRegs[i], Arg.getType()};
setArgFlags(OrigArg, i + 1, DL, F);
+ bool Split = false;
+ LLT Ty = MRI.getType(VRegs[i]);
+ unsigned Dst = VRegs[i];
+
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildSequence(VRegs[i], Regs, Offsets);
+ [&](unsigned Reg, uint64_t Offset) {
+ if (!Split) {
+ Split = true;
+ Dst = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Dst);
+ }
+ unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
+ Dst = Tmp;
});
+
+ if (Dst != VRegs[i])
+ MIRBuilder.buildCopy(VRegs[i], Dst);
++i;
}
@@ -228,10 +276,25 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
- FormalArgHandler Handler(MIRBuilder, MRI);
- if (!handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler))
+ FormalArgHandler Handler(MIRBuilder, MRI, AssignFn);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
+ if (F.isVarArg()) {
+ if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
+ // FIXME: we need to reimplement saveVarArgsRegisters from
+ // AArch64ISelLowering.
+ return false;
+ }
+
+ // We currently pass all varargs at 8-byte alignment.
+ uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
+
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+ }
+
// Move back to the end of the basic block.
MIRBuilder.setMBB(MBB);
@@ -239,6 +302,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
}
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv,
const MachineOperand &Callee,
const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const {
@@ -250,21 +314,25 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 8> SplitArgs;
for (auto &OrigArg : OrigArgs) {
splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- MIRBuilder.buildExtract(Regs, Offsets, OrigArg.Reg);
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset);
});
}
// Find out which ABI gets to decide where things go.
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
- CCAssignFn *CallAssignFn =
- TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
+ CCAssignFn *AssignFnFixed =
+ TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ CCAssignFn *AssignFnVarArg =
+ TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/true);
+
+ auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
: AArch64::BL);
- MIB.addOperand(Callee);
+ MIB.add(Callee);
// Tell the call which registers are clobbered.
auto TRI = MF.getSubtarget().getRegisterInfo();
@@ -272,8 +340,9 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
- OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
- if (!handleAssignments(MIRBuilder, CallAssignFn, SplitArgs, Handler))
+ OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
+ AssignFnVarArg);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
// Now we can add the actual call instruction to the correct basic block.
@@ -298,20 +367,23 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
SmallVector<uint64_t, 8> RegOffsets;
SmallVector<unsigned, 8> SplitRegs;
splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
- [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
- std::copy(Offsets.begin(), Offsets.end(),
- std::back_inserter(RegOffsets));
- std::copy(Regs.begin(), Regs.end(),
- std::back_inserter(SplitRegs));
+ [&](unsigned Reg, uint64_t Offset) {
+ RegOffsets.push_back(Offset);
+ SplitRegs.push_back(Reg);
});
- CallReturnHandler Handler(MIRBuilder, MRI, MIB);
- if (!handleAssignments(MIRBuilder, RetAssignFn, SplitArgs, Handler))
+ CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
return false;
if (!RegOffsets.empty())
MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
}
+ CallSeqStart.addImm(Handler.StackSize);
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
+ .addImm(Handler.StackSize)
+ .addImm(0);
+
return true;
}
diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h
index ce6676249df6..d96ce95c4de0 100644
--- a/lib/Target/AArch64/AArch64CallLowering.h
+++ b/lib/Target/AArch64/AArch64CallLowering.h
@@ -1,4 +1,4 @@
-//===-- llvm/lib/Target/AArch64/AArch64CallLowering.h - Call lowering -----===//
+//===--- AArch64CallLowering.h - Call lowering ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,18 +12,20 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING
-#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include <cstdint>
+#include <functional>
namespace llvm {
class AArch64TargetLowering;
class AArch64CallLowering: public CallLowering {
- public:
+public:
AArch64CallLowering(const AArch64TargetLowering &TLI);
bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
@@ -32,8 +34,8 @@ class AArch64CallLowering: public CallLowering {
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
- bool lowerCall(MachineIRBuilder &MIRBuilder, const MachineOperand &Callee,
- const ArgInfo &OrigRet,
+ bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
ArrayRef<ArgInfo> OrigArgs) const override;
private:
@@ -44,13 +46,14 @@ private:
typedef std::function<void(MachineIRBuilder &, int, CCValAssign &)>
MemHandler;
- typedef std::function<void(ArrayRef<unsigned>, ArrayRef<uint64_t>)>
- SplitArgTy;
+ typedef std::function<void(unsigned, uint64_t)> SplitArgTy;
void splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI,
- SplitArgTy SplitArg) const;
+ const SplitArgTy &SplitArg) const;
};
-} // End of namespace llvm;
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING_H
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 8b186328d125..2dfcd2d1c393 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -265,10 +265,10 @@ void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
// Change immediate in comparison instruction (ADDS or SUBS).
BuildMI(*MBB, CmpMI, CmpMI->getDebugLoc(), TII->get(Opc))
- .addOperand(CmpMI->getOperand(0))
- .addOperand(CmpMI->getOperand(1))
+ .add(CmpMI->getOperand(0))
+ .add(CmpMI->getOperand(1))
.addImm(Imm)
- .addOperand(CmpMI->getOperand(3));
+ .add(CmpMI->getOperand(3));
CmpMI->eraseFromParent();
// The fact that this comparison was picked ensures that it's related to the
@@ -278,7 +278,7 @@ void AArch64ConditionOptimizer::modifyCmp(MachineInstr *CmpMI,
// Change condition in branch instruction.
BuildMI(*MBB, BrMI, BrMI.getDebugLoc(), TII->get(AArch64::Bcc))
.addImm(Cmp)
- .addOperand(BrMI.getOperand(1));
+ .add(BrMI.getOperand(1));
BrMI.eraseFromParent();
MBB->updateTerminator();
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index da09b36cac9c..00a0111f2bd2 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -594,7 +594,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
BuildMI(*Head, Head->end(), TermDL, MCID)
.addReg(DestReg, RegState::Define | RegState::Dead)
- .addOperand(HeadCond[2])
+ .add(HeadCond[2])
.addImm(0)
.addImm(0);
// SUBS uses the GPR*sp register classes.
@@ -650,13 +650,12 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
if (CmpMI->getOperand(FirstOp + 1).isReg())
MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
TII->getRegClass(MCID, 1, TRI, *MF));
- MachineInstrBuilder MIB =
- BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
- .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn
+ MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
+ .add(CmpMI->getOperand(FirstOp)); // Register Rn
if (isZBranch)
MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
else
- MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
+ MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
MIB.addImm(NZCV).addImm(HeadCmpBBCC);
// If CmpMI was a terminator, we need a new conditional branch to replace it.
@@ -666,7 +665,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
CmpMI->getOpcode() == AArch64::CBNZX;
BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
.addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
- .addOperand(CmpMI->getOperand(1)); // Branch target.
+ .add(CmpMI->getOperand(1)); // Branch target.
}
CmpMI->eraseFromParent();
Head->updateTerminator();
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index fe1c0beee0eb..d0c0956b87ca 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -70,9 +71,9 @@ static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
- UseMI.addOperand(MO);
+ UseMI.add(MO);
else
- DefMI.addOperand(MO);
+ DefMI.add(MO);
}
}
@@ -112,7 +113,7 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -179,7 +180,7 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -362,7 +363,7 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AArch64::XZR)
.addImm(Encoding);
@@ -425,7 +426,7 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
.addImm(Encoding);
transferImpOps(MI, MIB, MIB);
@@ -539,15 +540,15 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
if (Imm != 0) {
unsigned LZ = countLeadingZeros(Imm);
unsigned TZ = countTrailingZeros(Imm);
- Shift = ((63 - LZ) / 16) * 16;
- LastShift = (TZ / 16) * 16;
+ Shift = (TZ / 16) * 16;
+ LastShift = ((63 - LZ) / 16) * 16;
}
unsigned Imm16 = (Imm >> Shift) & Mask;
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
.addReg(DstReg, RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
+ getDeadRegState(DstIsDead && Shift == LastShift))
.addImm(Imm16)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
@@ -564,15 +565,15 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
MachineInstrBuilder MIB2;
unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
- while (Shift != LastShift) {
- Shift -= 16;
+ while (Shift < LastShift) {
+ Shift += 16;
Imm16 = (Imm >> Shift) & Mask;
if (Imm16 == (isNeg ? Mask : 0))
continue; // This 16-bit portion is already set correctly.
MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addReg(DstReg,
RegState::Define |
- getDeadRegState(DstIsDead && Shift == LastShift))
+ getDeadRegState(DstIsDead && Shift == LastShift))
.addReg(DstReg)
.addImm(Imm16)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
@@ -627,7 +628,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
.addReg(Addr.getReg());
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .addOperand(Desired)
+ .add(Desired)
.addImm(ExtendImm);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::NE)
@@ -643,9 +644,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
StoreBB->addLiveIn(New.getReg());
addPostLoopLiveIns(StoreBB, LiveRegs);
- BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
- .addOperand(New)
- .addOperand(Addr);
+ BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill)
.addMBB(LoadCmpBB);
@@ -710,7 +709,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addReg(Addr.getReg());
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
- .addOperand(DesiredLo)
+ .add(DesiredLo)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(AArch64::WZR)
@@ -718,7 +717,7 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
- .addOperand(DesiredHi)
+ .add(DesiredHi)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(StatusReg, RegState::Kill)
@@ -738,9 +737,9 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
StoreBB->addLiveIn(NewHi.getReg());
addPostLoopLiveIns(StoreBB, LiveRegs);
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
- .addOperand(NewLo)
- .addOperand(NewHi)
- .addOperand(Addr);
+ .add(NewLo)
+ .add(NewHi)
+ .add(Addr);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
.addReg(StatusReg, RegState::Kill)
.addMBB(LoadCmpBB);
@@ -825,8 +824,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
@@ -842,7 +841,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(DstReg);
if (MO1.isGlobal()) {
@@ -878,19 +877,31 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
- .addOperand(MI.getOperand(1));
+ .add(MI.getOperand(1));
MachineInstrBuilder MIB2 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(DstReg)
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(2))
.addImm(0);
transferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
}
+ case AArch64::MOVbaseTLS: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ auto SysReg = AArch64SysReg::TPIDR_EL0;
+ MachineFunction *MF = MBB.getParent();
+ if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
+ MF->getTarget().getCodeModel() == CodeModel::Kernel)
+ SysReg = AArch64SysReg::TPIDR_EL1;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
+ .addImm(SysReg);
+ MI.eraseFromParent();
+ return true;
+ }
case AArch64::MOVi32imm:
return expandMOVImm(MBB, MBBI, 32);
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index fe2c2d4550a7..4e5e3e43a468 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -15,28 +15,62 @@
#include "AArch64.h"
#include "AArch64CallingConvention.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -50,48 +84,55 @@ class AArch64FastISel final : public FastISel {
} BaseKind;
private:
- BaseKind Kind;
- AArch64_AM::ShiftExtendType ExtType;
+ BaseKind Kind = RegBase;
+ AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
union {
unsigned Reg;
int FI;
} Base;
- unsigned OffsetReg;
- unsigned Shift;
- int64_t Offset;
- const GlobalValue *GV;
+ unsigned OffsetReg = 0;
+ unsigned Shift = 0;
+ int64_t Offset = 0;
+ const GlobalValue *GV = nullptr;
public:
- Address() : Kind(RegBase), ExtType(AArch64_AM::InvalidShiftExtend),
- OffsetReg(0), Shift(0), Offset(0), GV(nullptr) { Base.Reg = 0; }
+ Address() { Base.Reg = 0; }
+
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
+
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
+
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+
void setOffsetReg(unsigned Reg) {
OffsetReg = Reg;
}
+
unsigned getOffsetReg() const {
return OffsetReg;
}
+
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
+
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
}
+
void setOffset(int64_t O) { Offset = O; }
int64_t getOffset() { return Offset; }
void setShift(unsigned S) { Shift = S; }
@@ -417,7 +458,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
- if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
+ if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
return 0;
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
@@ -531,23 +572,23 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
switch (Opcode) {
default:
break;
- case Instruction::BitCast: {
+ case Instruction::BitCast:
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr, Ty);
- }
- case Instruction::IntToPtr: {
+
+ case Instruction::IntToPtr:
// Look past no-op inttoptrs.
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
- }
- case Instruction::PtrToInt: {
+
+ case Instruction::PtrToInt:
// Look past no-op ptrtoints.
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return computeAddress(U->getOperand(0), Addr, Ty);
break;
- }
+
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
uint64_t TmpOffset = Addr.getOffset();
@@ -563,7 +604,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -2813,8 +2854,8 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
- assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
- "Unexpected value type.");
+ assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
+ "Unexpected value type.");
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (!SrcReg)
@@ -3106,8 +3147,8 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
return false;
CodeModel::Model CM = TM.getCodeModel();
- // Only support the small and large code model.
- if (CM != CodeModel::Small && CM != CodeModel::Large)
+ // Only support the small-addressing and large code models.
+ if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
return false;
// FIXME: Add large code model support for ELF.
@@ -3158,7 +3199,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Issue the call.
MachineInstrBuilder MIB;
- if (CM == CodeModel::Small) {
+ if (Subtarget->useSmallAddressing()) {
const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
if (Symbol)
@@ -3369,8 +3410,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
MFI.setFrameAddressIsTaken(true);
- const AArch64RegisterInfo *RegInfo =
- static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
+ const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -3521,11 +3561,11 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
- case Intrinsic::trap: {
+ case Intrinsic::trap:
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
- }
+
case Intrinsic::sqrt: {
Type *RetTy = II->getCalledFunction()->getReturnType();
@@ -5092,8 +5132,10 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
}
namespace llvm {
-llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
+
+FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
return new AArch64FastISel(FuncInfo, LibInfo);
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index f5b8c35375f8..550174b22a89 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -90,21 +90,42 @@
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
using namespace llvm;
@@ -245,14 +266,13 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
if (&MF->front() == MBB)
return AArch64::X9;
- const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
- LivePhysRegs LiveRegs(&TRI);
+ const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo();
+ LivePhysRegs LiveRegs(TRI);
LiveRegs.addLiveIns(*MBB);
// Mark callee saved registers as used so we will not choose them.
- const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
- const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF);
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF);
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
@@ -319,7 +339,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
-
unsigned NewOpc;
bool NewIsUnscaled = false;
switch (MBBI->getOpcode()) {
@@ -362,7 +381,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
unsigned OpndIdx = 0;
for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
++OpndIdx)
- MIB.addOperand(MBBI->getOperand(OpndIdx));
+ MIB.add(MBBI->getOperand(OpndIdx));
assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
"Unexpected immediate offset in first/last callee-save save/restore "
@@ -863,22 +882,26 @@ static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
static bool produceCompactUnwindFrame(MachineFunction &MF) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- AttributeSet Attrs = MF.getFunction()->getAttributes();
+ AttributeList Attrs = MF.getFunction()->getAttributes();
return Subtarget.isTargetMachO() &&
!(Subtarget.getTargetLowering()->supportSwiftError() &&
Attrs.hasAttrSomewhere(Attribute::SwiftError));
}
namespace {
+
struct RegPairInfo {
- RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
- unsigned Reg1;
- unsigned Reg2;
+ unsigned Reg1 = AArch64::NoRegister;
+ unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
bool IsGPR;
+
+ RegPairInfo() = default;
+
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
};
+
} // end anonymous namespace
static void computeCalleeSaveRegisterPairs(
diff --git a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index d472a54d9543..8b1c9740d2ad 100644
--- a/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -16,281 +16,198 @@
#endif
namespace llvm {
-namespace AArch64 {
-
-const uint32_t GPRCoverageData[] = {
- // Classes 0-31
- (1u << AArch64::GPR32allRegClassID) | (1u << AArch64::GPR32RegClassID) |
- (1u << AArch64::GPR32spRegClassID) |
- (1u << AArch64::GPR32commonRegClassID) |
- (1u << AArch64::GPR32sponlyRegClassID) |
- (1u << AArch64::GPR64allRegClassID) | (1u << AArch64::GPR64RegClassID) |
- (1u << AArch64::GPR64spRegClassID) |
- (1u << AArch64::GPR64commonRegClassID) |
- (1u << AArch64::tcGPR64RegClassID) |
- (1u << AArch64::GPR64sponlyRegClassID),
- // Classes 32-63
- 0,
- // FIXME: The entries below this point can be safely removed once this is
- // tablegenerated. It's only needed because of the hardcoded register class
- // limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-const uint32_t FPRCoverageData[] = {
- // Classes 0-31
- (1u << AArch64::FPR8RegClassID) | (1u << AArch64::FPR16RegClassID) |
- (1u << AArch64::FPR32RegClassID) | (1u << AArch64::FPR64RegClassID) |
- (1u << AArch64::DDRegClassID) | (1u << AArch64::FPR128RegClassID) |
- (1u << AArch64::FPR128_loRegClassID) | (1u << AArch64::DDDRegClassID) |
- (1u << AArch64::DDDDRegClassID),
- // Classes 32-63
- (1u << (AArch64::QQRegClassID - 32)) |
- (1u << (AArch64::QQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub1_in_FPR128_lo_and_QQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub0_in_FPR128_lo_and_QQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u << (AArch64::QQQQRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub2_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQQ_with_qsub3_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub1_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub1_in_FPR128_lo_and_QQQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub2_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub2_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub1_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQQQ_with_qsub0_in_FPR128_lo_and_QQQQ_with_qsub3_in_FPR128_loRegClassID -
- 32)) |
- (1u
- << (AArch64::
- QQ_with_qsub0_in_FPR128_lo_and_QQ_with_qsub1_in_FPR128_loRegClassID -
- 32)) |
- (1u << (AArch64::QQQRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub0_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub1_in_FPR128_loRegClassID - 32)) |
- (1u << (AArch64::QQQ_with_qsub2_in_FPR128_loRegClassID - 32)) |
- (1u
- << (AArch64::
- QQQ_with_qsub0_in_FPR128_lo_and_QQQ_with_qsub1_in_FPR128_loRegClassID -
- 32)),
- // FIXME: The entries below this point can be safely removed once this
- // is tablegenerated. It's only needed because of the hardcoded register
- // class limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-const uint32_t CCRCoverageData[] = {
- // Classes 0-31
- 1u << AArch64::CCRRegClassID,
- // Classes 32-63
- 0,
- // FIXME: The entries below this point can be safely removed once this
- // is tablegenerated. It's only needed because of the hardcoded register
- // class limit.
- // Classes 64-96
- 0,
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
-};
-
-RegisterBank GPRRegBank(AArch64::GPRRegBankID, "GPR", 64, GPRCoverageData);
-RegisterBank FPRRegBank(AArch64::FPRRegBankID, "FPR", 512, FPRCoverageData);
-RegisterBank CCRRegBank(AArch64::CCRRegBankID, "CCR", 32, CCRCoverageData);
-
-RegisterBank *RegBanks[] = {&GPRRegBank, &FPRRegBank, &CCRRegBank};
-
-// PartialMappings.
-enum PartialMappingIdx {
- PMI_None = -1,
- PMI_GPR32 = 1,
- PMI_GPR64,
- PMI_FPR32,
- PMI_FPR64,
- PMI_FPR128,
- PMI_FPR256,
- PMI_FPR512,
- PMI_FirstGPR = PMI_GPR32,
- PMI_LastGPR = PMI_GPR64,
- PMI_FirstFPR = PMI_FPR32,
- PMI_LastFPR = PMI_FPR512,
- PMI_Min = PMI_FirstGPR,
-};
-
-static unsigned getRegBankBaseIdxOffset(unsigned Size) {
- assert(Size && "0-sized type!!");
- // Make anything smaller than 32 gets 32
- Size = ((Size + 31) / 32) * 32;
- // 32 is 0, 64 is 1, 128 is 2, and so on.
- return Log2_32(Size) - /*Log2_32(32)=*/ 5;
-}
-
-RegisterBankInfo::PartialMapping PartMappings[] {
- /* StartIdx, Length, RegBank */
- // 0: GPR 32-bit value.
- {0, 32, GPRRegBank},
- // 1: GPR 64-bit value.
- {0, 64, GPRRegBank},
- // 2: FPR 32-bit value.
- {0, 32, FPRRegBank},
- // 3: FPR 64-bit value.
- {0, 64, FPRRegBank},
- // 4: FPR 128-bit value.
- {0, 128, FPRRegBank},
- // 5: FPR 256-bit value.
- {0, 256, FPRRegBank},
- // 6: FPR 512-bit value.
- {0, 512, FPRRegBank}
-};
-
-enum ValueMappingIdx {
- First3OpsIdx = 0,
- Last3OpsIdx = 18,
- DistanceBetweenRegBanks = 3,
- FirstCrossRegCpyIdx = 21,
- LastCrossRegCpyIdx = 27,
- DistanceBetweenCrossRegCpy = 2
+RegisterBankInfo::PartialMapping AArch64GenRegisterBankInfo::PartMappings[]{
+ /* StartIdx, Length, RegBank */
+ // 0: FPR 32-bit value.
+ {0, 32, AArch64::FPRRegBank},
+ // 1: FPR 64-bit value.
+ {0, 64, AArch64::FPRRegBank},
+ // 2: FPR 128-bit value.
+ {0, 128, AArch64::FPRRegBank},
+ // 3: FPR 256-bit value.
+ {0, 256, AArch64::FPRRegBank},
+ // 4: FPR 512-bit value.
+ {0, 512, AArch64::FPRRegBank},
+ // 5: GPR 32-bit value.
+ {0, 32, AArch64::GPRRegBank},
+ // 6: GPR 64-bit value.
+ {0, 64, AArch64::GPRRegBank},
};
// ValueMappings.
-RegisterBankInfo::ValueMapping ValMappings[]{
+RegisterBankInfo::ValueMapping AArch64GenRegisterBankInfo::ValMappings[]{
/* BreakDown, NumBreakDowns */
+ // 0: invalid
+ {nullptr, 0},
// 3-operands instructions (all binary operations should end up with one of
// those mapping).
- // 0: GPR 32-bit value. <-- This must match First3OpsIdx.
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 3: GPR 64-bit value.
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- // 6: FPR 32-bit value.
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 9: FPR 64-bit value.
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 12: FPR 128-bit value.
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- {&PartMappings[PMI_FPR128 - PMI_Min], 1},
- // 15: FPR 256-bit value.
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- {&PartMappings[PMI_FPR256 - PMI_Min], 1},
- // 18: FPR 512-bit value. <-- This must match Last3OpsIdx.
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
- {&PartMappings[PMI_FPR512 - PMI_Min], 1},
+ // 1: FPR 32-bit value. <-- This must match First3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 4: FPR 64-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ // 7: FPR 128-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1},
+ // 10: FPR 256-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR256 - PMI_Min], 1},
+ // 13: FPR 512-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR512 - PMI_Min], 1},
+ // 16: GPR 32-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ // 19: GPR 64-bit value. <-- This must match Last3OpsIdx.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
// Cross register bank copies.
- // 21: GPR 32-bit value to FPR 32-bit value. <-- This must match
+ // 22: FPR 32-bit value to GPR 32-bit value. <-- This must match
// FirstCrossRegCpyIdx.
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- // 23: GPR 64-bit value to FPR 64-bit value.
- {&PartMappings[PMI_GPR64 - PMI_Min], 1},
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 25: FPR 32-bit value to GPR 32-bit value.
- {&PartMappings[PMI_FPR32 - PMI_Min], 1},
- {&PartMappings[PMI_GPR32 - PMI_Min], 1},
- // 27: FPR 64-bit value to GPR 64-bit value. <-- This must match
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ // 24: FPR 64-bit value to GPR 64-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ // 26: FPR 128-bit value to GPR 128-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 28: FPR 256-bit value to GPR 256-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 30: FPR 512-bit value to GPR 512-bit value (invalid)
+ {nullptr, 1},
+ {nullptr, 1},
+ // 32: GPR 32-bit value to FPR 32-bit value.
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1},
+ // 34: GPR 64-bit value to FPR 64-bit value. <-- This must match
// LastCrossRegCpyIdx.
- {&PartMappings[PMI_FPR64 - PMI_Min], 1},
- {&PartMappings[PMI_GPR64 - PMI_Min], 1}
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1},
+ {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
};
-/// Get the pointer to the ValueMapping representing the RegisterBank
-/// at \p RBIdx with a size of \p Size.
-///
-/// The returned mapping works for instructions with the same kind of
-/// operands for up to 3 operands.
-///
-/// \pre \p RBIdx != PartialMappingIdx::None
+bool AArch64GenRegisterBankInfo::checkPartialMap(unsigned Idx,
+ unsigned ValStartIdx,
+ unsigned ValLength,
+ const RegisterBank &RB) {
+ const PartialMapping &Map = PartMappings[Idx - PartialMappingIdx::PMI_Min];
+ return Map.StartIdx == ValStartIdx && Map.Length == ValLength &&
+ Map.RegBank == &RB;
+}
+
+bool AArch64GenRegisterBankInfo::checkValueMapImpl(unsigned Idx,
+ unsigned FirstInBank,
+ unsigned Size,
+ unsigned Offset) {
+ unsigned PartialMapBaseIdx = Idx - PartialMappingIdx::PMI_Min;
+ const ValueMapping &Map =
+ AArch64GenRegisterBankInfo::getValueMapping((PartialMappingIdx)FirstInBank, Size)[Offset];
+ return Map.BreakDown == &PartMappings[PartialMapBaseIdx] &&
+ Map.NumBreakDowns == 1;
+}
+
+bool AArch64GenRegisterBankInfo::checkPartialMappingIdx(
+ PartialMappingIdx FirstAlias, PartialMappingIdx LastAlias,
+ ArrayRef<PartialMappingIdx> Order) {
+ if (Order.front() != FirstAlias)
+ return false;
+ if (Order.back() != LastAlias)
+ return false;
+ if (Order.front() > Order.back())
+ return false;
+
+ PartialMappingIdx Previous = Order.front();
+ bool First = true;
+ for (const auto &Current : Order) {
+ if (First) {
+ First = false;
+ continue;
+ }
+ if (Previous + 1 != Current)
+ return false;
+ Previous = Current;
+ }
+ return true;
+}
+
+unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
+ unsigned Size) {
+ if (RBIdx == PMI_FirstGPR) {
+ if (Size <= 32)
+ return 0;
+ if (Size <= 64)
+ return 1;
+ return -1;
+ }
+ if (RBIdx == PMI_FirstFPR) {
+ if (Size <= 32)
+ return 0;
+ if (Size <= 64)
+ return 1;
+ if (Size <= 128)
+ return 2;
+ if (Size <= 256)
+ return 3;
+ if (Size <= 512)
+ return 4;
+ return -1;
+ }
+ return -1;
+}
+
const RegisterBankInfo::ValueMapping *
-getValueMapping(PartialMappingIdx RBIdx, unsigned Size) {
+AArch64GenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx,
+ unsigned Size) {
assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
- unsigned ValMappingIdx = First3OpsIdx +
- (RBIdx - AArch64::PartialMappingIdx::PMI_Min +
- getRegBankBaseIdxOffset(Size)) *
- ValueMappingIdx::DistanceBetweenRegBanks;
- assert(ValMappingIdx >= AArch64::First3OpsIdx &&
- ValMappingIdx <= AArch64::Last3OpsIdx && "Mapping out of bound");
+ unsigned BaseIdxOffset = getRegBankBaseIdxOffset(RBIdx, Size);
+ if (BaseIdxOffset == -1u)
+ return &ValMappings[InvalidIdx];
+
+ unsigned ValMappingIdx =
+ First3OpsIdx + (RBIdx - PartialMappingIdx::PMI_Min + BaseIdxOffset) *
+ ValueMappingIdx::DistanceBetweenRegBanks;
+ assert(ValMappingIdx >= First3OpsIdx && ValMappingIdx <= Last3OpsIdx &&
+ "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
-/// Get the pointer to the ValueMapping of the operands of a copy
-/// instruction from a GPR or FPR register to a GPR or FPR register
-/// with a size of \p Size.
-///
-/// If \p DstIsGPR is true, the destination of the copy is on GPR,
-/// otherwise it is on FPR. Same thing for \p SrcIsGPR.
+AArch64GenRegisterBankInfo::PartialMappingIdx
+ AArch64GenRegisterBankInfo::BankIDToCopyMapIdx[]{
+ PMI_None, // CCR
+ PMI_FirstFPR, // FPR
+ PMI_FirstGPR, // GPR
+ };
+
const RegisterBankInfo::ValueMapping *
-getCopyMapping(bool DstIsGPR, bool SrcIsGPR, unsigned Size) {
- PartialMappingIdx DstRBIdx = DstIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
- PartialMappingIdx SrcRBIdx = SrcIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
+AArch64GenRegisterBankInfo::getCopyMapping(unsigned DstBankID,
+ unsigned SrcBankID, unsigned Size) {
+ assert(DstBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+ assert(SrcBankID < AArch64::NumRegisterBanks && "Invalid bank ID");
+ PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+ PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+ assert(DstRBIdx != PMI_None && "No such mapping");
+ assert(SrcRBIdx != PMI_None && "No such mapping");
+
if (DstRBIdx == SrcRBIdx)
return getValueMapping(DstRBIdx, Size);
+
assert(Size <= 64 && "GPR cannot handle that size");
unsigned ValMappingIdx =
FirstCrossRegCpyIdx +
- (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(Size)) *
+ (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(DstRBIdx, Size)) *
ValueMappingIdx::DistanceBetweenCrossRegCpy;
- assert(ValMappingIdx >= AArch64::FirstCrossRegCpyIdx &&
- ValMappingIdx <= AArch64::LastCrossRegCpyIdx &&
- "Mapping out of bound");
+ assert(ValMappingIdx >= FirstCrossRegCpyIdx &&
+ ValMappingIdx <= LastCrossRegCpyIdx && "Mapping out of bound");
return &ValMappings[ValMappingIdx];
}
-
-} // End AArch64 namespace.
} // End llvm namespace.
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 3099383e5b32..ae01ea477bb9 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -328,11 +328,52 @@ static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
}
}
+/// \brief Determine whether it is worth it to fold SHL into the addressing
+/// mode.
+static bool isWorthFoldingSHL(SDValue V) {
+ assert(V.getOpcode() == ISD::SHL && "invalid opcode");
+ // It is worth folding logical shift of up to three places.
+ auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (!CSD)
+ return false;
+ unsigned ShiftVal = CSD->getZExtValue();
+ if (ShiftVal > 3)
+ return false;
+
+ // Check if this particular node is reused in any non-memory related
+ // operation. If yes, do not try to fold this node into the address
+ // computation, since the computation will be kept.
+ const SDNode *Node = V.getNode();
+ for (SDNode *UI : Node->uses())
+ if (!isa<MemSDNode>(*UI))
+ for (SDNode *UII : UI->uses())
+ if (!isa<MemSDNode>(*UII))
+ return false;
+ return true;
+}
+
/// \brief Determine whether it is worth to fold V into an extended register.
bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the value is used at least twice, unless we are optimizing
- // for code size.
- return ForCodeSize || V.hasOneUse();
+ // Trivial if we are optimizing for code size or if there is only
+ // one use of the value.
+ if (ForCodeSize || V.hasOneUse())
+ return true;
+ // If a subtarget has a fastpath LSL we can fold a logical shift into
+ // the addressing mode and save a cycle.
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
+ isWorthFoldingSHL(V))
+ return true;
+ if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
+ const SDValue LHS = V.getOperand(0);
+ const SDValue RHS = V.getOperand(1);
+ if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
+ return true;
+ if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
+ return true;
+ }
+
+ // It hurts otherwise, since the value will be reused.
+ return false;
}
/// SelectShiftedRegister - Select a "shifted register" operand. If the value
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 849058bdfbdb..0d3289ac84c3 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -554,8 +555,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setSchedulingPreference(Sched::Hybrid);
- // Enable TBZ/TBNZ
- MaskAndBranchFoldingIsLegal = true;
EnableExtLdPromotion = true;
// Set required alignment.
@@ -793,7 +792,7 @@ EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
/// KnownZero/KnownOne bitsets.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, APInt &KnownZero, APInt &KnownOne,
- const SelectionDAG &DAG, unsigned Depth) const {
+ const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
@@ -2113,8 +2112,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
const char *LibcallName =
@@ -2124,8 +2123,9 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2231,19 +2231,13 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND)
- return true;
- if (isExtendedBUILD_VECTOR(N, DAG, true))
- return true;
- return false;
+ return N->getOpcode() == ISD::SIGN_EXTEND ||
+ isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::ZERO_EXTEND)
- return true;
- if (isExtendedBUILD_VECTOR(N, DAG, false))
- return true;
- return false;
+ return N->getOpcode() == ISD::ZERO_EXTEND ||
+ isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
@@ -3578,7 +3572,7 @@ SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
- assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
+ assert(Subtarget->useSmallAddressing() &&
"ELF TLS only supported in small memory model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
@@ -3679,7 +3673,7 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
if (Subtarget->isTargetDarwin())
return LowerDarwinGlobalTLSAddress(Op, DAG);
- else if (Subtarget->isTargetELF())
+ if (Subtarget->isTargetELF())
return LowerELFGlobalTLSAddress(Op, DAG);
llvm_unreachable("Unexpected platform trying to use TLS");
@@ -4516,7 +4510,12 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
SelectionDAG &DAG) const {
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("sp", AArch64::SP)
+ .Case("x18", AArch64::X18)
+ .Case("w18", AArch64::W18)
.Default(0);
+ if ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
+ !Subtarget->isX18Reserved())
+ Reg = 0;
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
@@ -6591,21 +6590,20 @@ FailedModImm:
if (!isConstant && !usesOnlyOneValue) {
SDValue Vec = DAG.getUNDEF(VT);
SDValue Op0 = Op.getOperand(0);
- unsigned ElemSize = VT.getScalarSizeInBits();
unsigned i = 0;
- // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
+
+ // Use SCALAR_TO_VECTOR for lane zero to
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
- // value is already in an S or D register.
- // Do not do this for UNDEF/LOAD nodes because we have better patterns
- // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR.
- if (!Op0.isUndef() && Op0.getOpcode() != ISD::LOAD &&
- (ElemSize == 32 || ElemSize == 64)) {
- unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
- MachineSDNode *N =
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
- DAG.getTargetConstant(SubIdx, dl, MVT::i32));
- Vec = SDValue(N, 0);
+ // value is already in an S or D register, and we're forced to emit an
+ // INSERT_SUBREG that we can't fold anywhere.
+ //
+ // We also allow types like i8 and i16 which are illegal scalar but legal
+ // vector element types. After type-legalization the inserted value is
+ // extended (i32) and it is safe to cast them to the vector type by ignoring
+ // the upper bits of the lowest lane (e.g. v8i8, v4i16).
+ if (!Op0.isUndef()) {
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
++i;
}
for (; i < NumElts; ++i) {
@@ -7249,6 +7247,33 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
return NumBits == 32 || NumBits == 64;
}
+/// A helper function for determining the number of interleaved accesses we
+/// will generate when lowering accesses of the given type.
+unsigned
+AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const {
+ return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+}
+
+bool AArch64TargetLowering::isLegalInterleavedAccessType(
+ VectorType *VecTy, const DataLayout &DL) const {
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+ // Ensure the number of vector elements is greater than 1.
+ if (VecTy->getNumElements() < 2)
+ return false;
+
+ // Ensure the element type is legal.
+ if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
+ return false;
+
+ // Ensure the total vector size is 64 or a multiple of 128. Types larger than
+ // 128 will be split into multiple interleaved accesses.
+ return VecSize == 64 || VecSize % 128 == 0;
+}
+
/// \brief Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -7272,12 +7297,15 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- // Skip if we do not have NEON and skip illegal vector types.
- if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128))
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
+ unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = VecTy->getVectorElementType();
@@ -7285,6 +7313,25 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
VecTy =
VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ IRBuilder<> Builder(LI);
+
+ // The base address of the load.
+ Value *BaseAddr = LI->getPointerOperand();
+
+ if (NumLoads > 1) {
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ VecTy = VectorType::get(VecTy->getVectorElementType(),
+ VecTy->getVectorNumElements() / NumLoads);
+
+ // We will compute the pointer operand of each load from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, VecTy->getVectorElementType()->getPointerTo(
+ LI->getPointerAddressSpace()));
+ }
+
Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {VecTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
@@ -7293,39 +7340,49 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
Function *LdNFunc =
Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- IRBuilder<> Builder(LI);
- Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
+ // Holds sub-vectors extracted from the load intrinsic return values. The
+ // sub-vectors are associated with the shufflevector instructions they will
+ // replace.
+ DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
- CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
+ for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
- // Replace uses of each shufflevector with the corresponding vector loaded
- // by ldN.
- for (unsigned i = 0; i < Shuffles.size(); i++) {
- ShuffleVectorInst *SVI = Shuffles[i];
- unsigned Index = Indices[i];
+ // If we're generating more than one load, compute the base address of
+ // subsequent loads as an offset from the previous.
+ if (LoadCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(
+ BaseAddr, VecTy->getVectorNumElements() * Factor);
- Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+ CallInst *LdN = Builder.CreateCall(
+ LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
- // Convert the integer vector to pointer vector if the element is pointer.
- if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
+ // Extract and store the sub-vectors returned by the load intrinsic.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SVI = Shuffles[i];
+ unsigned Index = Indices[i];
- SVI->replaceAllUsesWith(SubVec);
- }
+ Value *SubVec = Builder.CreateExtractValue(LdN, Index);
- return true;
-}
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
-/// \brief Get a mask consisting of sequential integers starting from \p Start.
-///
-/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
-static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
- unsigned NumElts) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < NumElts; i++)
- Mask.push_back(Builder.getInt32(Start + i));
+ SubVecs[SVI].push_back(SubVec);
+ }
+ }
+
+ // Replace uses of the shufflevector instructions with the sub-vectors
+ // returned by the load intrinsic. If a shufflevector instruction is
+ // associated with more than one sub-vector, those sub-vectors will be
+ // concatenated into a single wide vector.
+ for (ShuffleVectorInst *SVI : Shuffles) {
+ auto &SubVec = SubVecs[SVI];
+ auto *WideVec =
+ SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+ SVI->replaceAllUsesWith(WideVec);
+ }
- return ConstantVector::get(Mask);
+ return true;
}
/// \brief Lower an interleaved store into a stN intrinsic.
@@ -7369,12 +7426,15 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- // Skip if we do not have NEON and skip illegal vector types.
- if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128))
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
@@ -7394,6 +7454,25 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, LaneLen);
}
+ // The base address of the store.
+ Value *BaseAddr = SI->getPointerOperand();
+
+ if (NumStores > 1) {
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+
+ // We will compute the pointer operand of each store from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
+ SI->getPointerAddressSpace()));
+ }
+
+ auto Mask = SVI->getShuffleMask();
+
Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
Type *Tys[2] = {SubVecTy, PtrTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
@@ -7402,34 +7481,43 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
Function *StNFunc =
Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
- SmallVector<Value *, 5> Ops;
+ for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
- // Split the shufflevector operands into sub vectors for the new stN call.
- auto Mask = SVI->getShuffleMask();
- for (unsigned i = 0; i < Factor; i++) {
- if (Mask[i] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
- } else {
- unsigned StartMask = 0;
- for (unsigned j = 1; j < LaneLen; j++) {
- if (Mask[j*Factor + i] >= 0) {
- StartMask = Mask[j*Factor + i] - j;
- break;
+ SmallVector<Value *, 5> Ops;
+
+ // Split the shufflevector operands into sub vectors for the new stN call.
+ for (unsigned i = 0; i < Factor; i++) {
+ unsigned IdxI = StoreCount * LaneLen * Factor + i;
+ if (Mask[IdxI] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ unsigned IdxJ = StoreCount * LaneLen * Factor + j;
+ if (Mask[IdxJ * Factor + IdxI] >= 0) {
+ StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
+ break;
+ }
}
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in
+ // isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
- // Note: If all elements in a chunk are undefs, StartMask=0!
- // Note: Filling undef gaps with random elements is ok, since
- // those elements were being written anyway (with undefs).
- // In the case of all undefs we're defaulting to using elems from 0
- // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
}
- }
- Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
- Builder.CreateCall(StNFunc, Ops);
+ // If we generating more than one store, we compute the base address of
+ // subsequent stores as an offset from the previous.
+ if (StoreCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
+
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
+ Builder.CreateCall(StNFunc, Ops);
+ }
return true;
}
@@ -7690,7 +7778,7 @@ SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -9267,7 +9355,7 @@ static SDValue performSTORECombine(SDNode *N,
return SDValue();
}
- /// This function handles the log2-shuffle pattern produced by the
+/// This function handles the log2-shuffle pattern produced by the
/// LoopVectorizer for the across vector reduction. It consists of
/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
/// are reduced, where s is an induction variable from 0 to
@@ -10483,9 +10571,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
}
bool AArch64TargetLowering::useLoadStackGuardNode() const {
- if (!Subtarget->isTargetAndroid())
- return true;
- return TargetLowering::useLoadStackGuardNode();
+ if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
+ return TargetLowering::useLoadStackGuardNode();
+ return true;
}
unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
@@ -10623,36 +10711,56 @@ bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
return false;
}
-Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
- if (!Subtarget->isTargetAndroid())
- return TargetLowering::getIRStackGuard(IRB);
-
- // Android provides a fixed TLS slot for the stack cookie. See the definition
- // of TLS_SLOT_STACK_GUARD in
- // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- const unsigned TlsOffset = 0x28;
+static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
+ IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), Offset),
Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
}
-Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
- if (!Subtarget->isTargetAndroid())
- return TargetLowering::getSafeStackPointerLocation(IRB);
+Value *AArch64TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
+ // Android provides a fixed TLS slot for the stack cookie. See the definition
+ // of TLS_SLOT_STACK_GUARD in
+ // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+ if (Subtarget->isTargetAndroid())
+ return UseTlsOffset(IRB, 0x28);
+ // Fuchsia is similar.
+ // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
+ if (Subtarget->isTargetFuchsia())
+ return UseTlsOffset(IRB, -0x10);
+
+ return TargetLowering::getIRStackGuard(IRB);
+}
+
+Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- const unsigned TlsOffset = 0x48;
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- Function *ThreadPointerFunc =
- Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
- return IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
- Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+ if (Subtarget->isTargetAndroid())
+ return UseTlsOffset(IRB, 0x48);
+
+ // Fuchsia is similar.
+ // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
+ if (Subtarget->isTargetFuchsia())
+ return UseTlsOffset(IRB, -0x8);
+
+ return TargetLowering::getSafeStackPointerLocation(IRB);
+}
+
+bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ // Only sink 'and' mask to cmp use block if it is masking a single bit, since
+ // this is likely to be fold the and/cmp/br into a single tbz instruction. It
+ // may be beneficial to sink in other cases, but we would have to check that
+ // the cmp would not get folded into the br to form a cbz for these to be
+ // beneficial.
+ ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ if (!Mask)
+ return false;
+ return Mask->getUniqueInteger().isPowerOf2();
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
@@ -10702,7 +10810,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR(
}
}
-bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on AArch64 is expensive. However, when aggressively
// optimizing for code size, we prefer to use a div instruction, as it is
// usually smaller than the alternative sequence.
@@ -10711,6 +10819,14 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
bool OptSize =
- Attr.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
return OptSize && !VT.isVector();
}
+
+unsigned
+AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
+ if (Subtarget->isTargetDarwin())
+ return getPointerTy(DL).getSizeInBits();
+
+ return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 054ccc31674f..2ad6c8b23df8 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -251,7 +251,8 @@ public:
/// Determine which of the bits specified in Mask are known to be either zero
/// or one and return them in the KnownZero/KnownOne bitsets.
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
- APInt &KnownOne, const SelectionDAG &DAG,
+ APInt &KnownOne, const APInt &DemandedElts,
+ const SelectionDAG &DAG,
unsigned Depth = 0) const override;
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
@@ -402,7 +403,7 @@ public:
return AArch64::X1;
}
- bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isCheapToSpeculateCttz() const override {
return true;
@@ -412,6 +413,8 @@ public:
return true;
}
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
bool hasAndNotCompare(SDValue) const override {
// 'bics'
return true;
@@ -435,6 +438,20 @@ public:
return true;
}
+ /// Returns the size of the platform's va_list object.
+ unsigned getVaListSizeInBits(const DataLayout &DL) const override;
+
+ /// Returns true if \p VecTy is a legal interleaved access type. This
+ /// function checks the vector element type and the overall width of the
+ /// vector.
+ bool isLegalInterleavedAccessType(VectorType *VecTy,
+ const DataLayout &DL) const;
+
+ /// Returns the number of interleaved accesses that will be generated when
+ /// lowering accesses of the given type.
+ unsigned getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const;
+
private:
bool isExtFreeImpl(const Instruction *Ext) const override;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index cefdf51b50d2..16be4432b160 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -39,6 +39,9 @@ class AArch64Inst<Format f, string cstr> : Instruction {
let Constraints = cstr;
}
+class InstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>, Requires<[UseNegativeImmediates]>;
+
// Pseudo instructions (don't have encoding information)
class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
: AArch64Inst<PseudoFrm, cstr> {
@@ -257,6 +260,7 @@ def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
class AsmImmRange<int Low, int High> : AsmOperandClass {
let Name = "Imm" # Low # "_" # High;
let DiagnosticType = "InvalidImm" # Low # "_" # High;
+ let PredicateMethod = "isImmInRange<" # Low # "," # High # ">";
}
def Imm1_8Operand : AsmImmRange<1, 8>;
@@ -264,6 +268,20 @@ def Imm1_16Operand : AsmImmRange<1, 16>;
def Imm1_32Operand : AsmImmRange<1, 32>;
def Imm1_64Operand : AsmImmRange<1, 64>;
+class BranchTarget<int N> : AsmOperandClass {
+ let Name = "BranchTarget" # N;
+ let DiagnosticType = "InvalidLabel";
+ let PredicateMethod = "isBranchTarget<" # N # ">";
+}
+
+class PCRelLabel<int N> : BranchTarget<N> {
+ let Name = "PCRelLabel" # N;
+}
+
+def BranchTarget14Operand : BranchTarget<14>;
+def BranchTarget26Operand : BranchTarget<26>;
+def PCRelLabel19Operand : PCRelLabel<19>;
+
def MovZSymbolG3AsmOperand : AsmOperandClass {
let Name = "MovZSymbolG3";
let RenderMethod = "addImmOperands";
@@ -500,7 +518,8 @@ def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
}
// imm0_255 predicate - True if the immediate is in the range [0,255].
-def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; }
+def Imm0_255Operand : AsmImmRange<0,255>;
+
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 256;
}]> {
@@ -673,6 +692,14 @@ def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
def addsub_shifted_imm32_neg : addsub_shifted_imm_neg<i32>;
def addsub_shifted_imm64_neg : addsub_shifted_imm_neg<i64>;
+def gi_addsub_shifted_imm32 :
+ GIComplexOperandMatcher<s32, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm32>;
+
+def gi_addsub_shifted_imm64 :
+ GIComplexOperandMatcher<s64, (ops i32imm, i32imm), "selectArithImmed">,
+ GIComplexPatternEquiv<addsub_shifted_imm64>;
+
class neg_addsub_shifted_imm<ValueType Ty>
: Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
let PrintMethod = "printAddSubImm";
@@ -1094,10 +1121,6 @@ def inv_ccode : Operand<i32> {
// Conditional branch target. 19-bit immediate. The low two bits of the target
// offset are implied zero and so are not part of the immediate.
-def PCRelLabel19Operand : AsmOperandClass {
- let Name = "PCRelLabel19";
- let DiagnosticType = "InvalidLabel";
-}
def am_brcond : Operand<OtherVT> {
let EncoderMethod = "getCondBranchTargetOpValue";
let DecoderMethod = "DecodePCRelLabel19";
@@ -1154,9 +1177,6 @@ multiclass CmpBranch<bit op, string asm, SDNode node> {
//---
// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of
// the target offset are implied zero and so are not part of the immediate.
-def BranchTarget14Operand : AsmOperandClass {
- let Name = "BranchTarget14";
-}
def am_tbrcond : Operand<OtherVT> {
let EncoderMethod = "getTestBranchTargetOpValue";
let PrintMethod = "printAlignedLabel";
@@ -1166,11 +1186,12 @@ def am_tbrcond : Operand<OtherVT> {
// AsmOperand classes to emit (or not) special diagnostics
def TBZImm0_31Operand : AsmOperandClass {
let Name = "TBZImm0_31";
- let PredicateMethod = "isImm0_31";
+ let PredicateMethod = "isImmInRange<0,31>";
let RenderMethod = "addImm0_31Operands";
}
def TBZImm32_63Operand : AsmOperandClass {
let Name = "Imm32_63";
+ let PredicateMethod = "isImmInRange<32,63>";
let DiagnosticType = "InvalidImm0_63";
}
@@ -1232,10 +1253,6 @@ multiclass TestBranch<bit op, string asm, SDNode node> {
//---
// Unconditional branch (immediate) instructions.
//---
-def BranchTarget26Operand : AsmOperandClass {
- let Name = "BranchTarget26";
- let DiagnosticType = "InvalidLabel";
-}
def am_b_target : Operand<OtherVT> {
let EncoderMethod = "getBranchTargetOpValue";
let PrintMethod = "printAlignedLabel";
@@ -1784,10 +1801,10 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
}
// add Rd, Rb, -imm -> sub Rd, Rn, imm
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1859,10 +1876,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
} // Defs = [NZCV]
// Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<alias#"\t$Rd, $Rn, $imm",
+ def : InstSubst<alias#"\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
addsub_shifted_imm64_neg:$imm), 0>;
@@ -1883,9 +1900,9 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
// Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
- def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
- def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
+ def : InstSubst<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
// Compare shorthands
@@ -2100,10 +2117,10 @@ multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
let Inst{31} = 1;
}
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2122,10 +2139,10 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
}
} // end Defs = [NZCV]
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
logical_imm32_not:$imm), 0>;
- def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
+ def : InstSubst<Alias # "\t$Rd, $Rn, $imm",
(!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
logical_imm64_not:$imm), 0>;
}
@@ -2454,7 +2471,7 @@ class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
// Load literal address: 19-bit immediate. The low two bits of the target
// offset are implied zero and so are not part of the immediate.
-def am_ldrlit : Operand<OtherVT> {
+def am_ldrlit : Operand<iPTR> {
let EncoderMethod = "getLoadLiteralOpValue";
let DecoderMethod = "DecodePCRelLabel19";
let PrintMethod = "printAlignedLabel";
@@ -9060,7 +9077,7 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
//----------------------------------------------------------------------------
-let Predicates = [HasNEON, HasV8_1a] in {
+let Predicates = [HasNEON, HasRDM] in {
class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand regtype, string asm,
@@ -9221,7 +9238,7 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
let Inst{21} = idx{0};
}
}
-} // let Predicates = [HasNeon, HasV8_1a]
+} // let Predicates = [HasNeon, HasRDM]
//----------------------------------------------------------------------------
// Crypto extensions
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 4c789926e3e4..41fc8eceab5c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
@@ -369,7 +370,7 @@ void AArch64InstrInfo::instantiateCondBranch(
// Folded compare-and-branch
// Note that we use addOperand instead of addReg to keep the flags.
const MachineInstrBuilder MIB =
- BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]);
+ BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
if (Cond.size() > 3)
MIB.addImm(Cond[3].getImm());
MIB.addMBB(TBB);
@@ -762,6 +763,17 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
+bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const {
+ if (MI.getNumOperands() < 4)
+ return false;
+ unsigned ShOpVal = MI.getOperand(3).getImm();
+ unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal);
+ if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL &&
+ ShImm < 4)
+ return true;
+ return false;
+}
+
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SubIdx) const {
@@ -1299,16 +1311,16 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addMemOperand(*MI.memoperands_begin());
} else if (TM.getCodeModel() == CodeModel::Large) {
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
- .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
+ .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
@@ -1345,14 +1357,6 @@ bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const {
case AArch64::BICSXrs:
case AArch64::BICWrs:
case AArch64::BICXrs:
- case AArch64::CRC32Brr:
- case AArch64::CRC32CBrr:
- case AArch64::CRC32CHrr:
- case AArch64::CRC32CWrr:
- case AArch64::CRC32CXrr:
- case AArch64::CRC32Hrr:
- case AArch64::CRC32Wrr:
- case AArch64::CRC32Xrr:
case AArch64::EONWrs:
case AArch64::EONXrs:
case AArch64::EORWrs:
@@ -1691,16 +1695,59 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
} else
return false;
- // Offset is calculated as the immediate operand multiplied by the scaling factor.
- // Unscaled instructions have scaling factor set to 1.
+ // Get the scaling factor for the instruction and set the width for the
+ // instruction.
unsigned Scale = 0;
- switch (LdSt.getOpcode()) {
+ int64_t Dummy1, Dummy2;
+
+ // If this returns false, then it's an instruction we don't want to handle.
+ if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
+ return false;
+
+ // Compute the offset. Offset is calculated as the immediate operand
+ // multiplied by the scaling factor. Unscaled instructions have scaling factor
+ // set to 1.
+ if (LdSt.getNumExplicitOperands() == 3) {
+ BaseReg = LdSt.getOperand(1).getReg();
+ Offset = LdSt.getOperand(2).getImm() * Scale;
+ } else {
+ assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
+ BaseReg = LdSt.getOperand(2).getReg();
+ Offset = LdSt.getOperand(3).getImm() * Scale;
+ }
+ return true;
+}
+
+MachineOperand&
+AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
+ assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
+ MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1);
+ assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
+ return OfsOp;
+}
+
+bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
+ unsigned &Width, int64_t &MinOffset,
+ int64_t &MaxOffset) const {
+ switch (Opcode) {
+ // Not a memory operation or something we want to handle.
default:
+ Scale = Width = 0;
+ MinOffset = MaxOffset = 0;
return false;
+ case AArch64::STRWpost:
+ case AArch64::LDRWpost:
+ Width = 32;
+ Scale = 4;
+ MinOffset = -256;
+ MaxOffset = 255;
+ break;
case AArch64::LDURQi:
case AArch64::STURQi:
Width = 16;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURXi:
case AArch64::LDURDi:
@@ -1708,6 +1755,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURDi:
Width = 8;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURWi:
case AArch64::LDURSi:
@@ -1716,6 +1765,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURSi:
Width = 4;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURHi:
case AArch64::LDURHHi:
@@ -1725,6 +1776,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURHHi:
Width = 2;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDURBi:
case AArch64::LDURBBi:
@@ -1734,6 +1787,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STURBBi:
Width = 1;
Scale = 1;
+ MinOffset = -256;
+ MaxOffset = 255;
break;
case AArch64::LDPQi:
case AArch64::LDNPQi:
@@ -1741,10 +1796,14 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPQi:
Scale = 16;
Width = 32;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = Width = 16;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDPXi:
case AArch64::LDPDi:
@@ -1756,12 +1815,16 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPDi:
Scale = 8;
Width = 16;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRXui:
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
Scale = Width = 8;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDPWi:
case AArch64::LDPSi:
@@ -1773,6 +1836,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STNPSi:
Scale = 4;
Width = 8;
+ MinOffset = -64;
+ MaxOffset = 63;
break;
case AArch64::LDRWui:
case AArch64::LDRSui:
@@ -1780,29 +1845,27 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
case AArch64::STRWui:
case AArch64::STRSui:
Scale = Width = 4;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDRHui:
case AArch64::LDRHHui:
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = Width = 2;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
case AArch64::LDRBui:
case AArch64::LDRBBui:
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = Width = 1;
+ MinOffset = 0;
+ MaxOffset = 4095;
break;
}
- if (LdSt.getNumExplicitOperands() == 3) {
- BaseReg = LdSt.getOperand(1).getReg();
- Offset = LdSt.getOperand(2).getImm() * Scale;
- } else {
- assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
- BaseReg = LdSt.getOperand(2).getReg();
- Offset = LdSt.getOperand(3).getImm() * Scale;
- }
return true;
}
@@ -1903,88 +1966,6 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return Offset1 + 1 == Offset2;
}
-bool AArch64InstrInfo::shouldScheduleAdjacent(
- const MachineInstr &First, const MachineInstr &Second) const {
- if (Subtarget.hasArithmeticBccFusion()) {
- // Fuse CMN, CMP, TST followed by Bcc.
- unsigned SecondOpcode = Second.getOpcode();
- if (SecondOpcode == AArch64::Bcc) {
- switch (First.getOpcode()) {
- default:
- return false;
- case AArch64::ADDSWri:
- case AArch64::ADDSWrr:
- case AArch64::ADDSXri:
- case AArch64::ADDSXrr:
- case AArch64::ANDSWri:
- case AArch64::ANDSWrr:
- case AArch64::ANDSXri:
- case AArch64::ANDSXrr:
- case AArch64::SUBSWri:
- case AArch64::SUBSWrr:
- case AArch64::SUBSXri:
- case AArch64::SUBSXrr:
- case AArch64::BICSWrr:
- case AArch64::BICSXrr:
- return true;
- case AArch64::ADDSWrs:
- case AArch64::ADDSXrs:
- case AArch64::ANDSWrs:
- case AArch64::ANDSXrs:
- case AArch64::SUBSWrs:
- case AArch64::SUBSXrs:
- case AArch64::BICSWrs:
- case AArch64::BICSXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return !hasShiftedReg(Second);
- }
- }
- }
- if (Subtarget.hasArithmeticCbzFusion()) {
- // Fuse ALU operations followed by CBZ/CBNZ.
- unsigned SecondOpcode = Second.getOpcode();
- if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
- SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
- switch (First.getOpcode()) {
- default:
- return false;
- case AArch64::ADDWri:
- case AArch64::ADDWrr:
- case AArch64::ADDXri:
- case AArch64::ADDXrr:
- case AArch64::ANDWri:
- case AArch64::ANDWrr:
- case AArch64::ANDXri:
- case AArch64::ANDXrr:
- case AArch64::EORWri:
- case AArch64::EORWrr:
- case AArch64::EORXri:
- case AArch64::EORXrr:
- case AArch64::ORRWri:
- case AArch64::ORRWrr:
- case AArch64::ORRXri:
- case AArch64::ORRXrr:
- case AArch64::SUBWri:
- case AArch64::SUBWrr:
- case AArch64::SUBXri:
- case AArch64::SUBXrr:
- return true;
- case AArch64::ADDWrs:
- case AArch64::ADDXrs:
- case AArch64::ANDWrs:
- case AArch64::ANDXrs:
- case AArch64::SUBWrs:
- case AArch64::SUBXrs:
- case AArch64::BICWrs:
- case AArch64::BICXrs:
- // Shift value can be 0 making these behave like the "rr" variant...
- return !hasShiftedReg(Second);
- }
- }
- }
- return false;
-}
-
MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var,
const MDNode *Expr, const DebugLoc &DL) const {
@@ -3793,7 +3774,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
.addReg(ZeroReg)
- .addOperand(Root.getOperand(2));
+ .add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
@@ -4286,3 +4267,199 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
{MO_TLS, "aarch64-tls"}};
return makeArrayRef(TargetFlags);
}
+
+unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
+ size_t Occurrences,
+ bool CanBeTailCall) const {
+ unsigned NotOutlinedSize = SequenceSize * Occurrences;
+ unsigned OutlinedSize;
+
+ // Is this candidate something we can outline as a tail call?
+ if (CanBeTailCall) {
+ // If yes, then we just outline the sequence and replace each of its
+ // occurrences with a branch instruction.
+ OutlinedSize = SequenceSize + Occurrences;
+ } else {
+ // If no, then we outline the sequence (SequenceSize), add a return (+1),
+ // and replace each occurrence with a save/restore to LR and a call
+ // (3 * Occurrences)
+ OutlinedSize = (SequenceSize + 1) + (3 * Occurrences);
+ }
+
+ // Return the number of instructions saved by outlining this sequence.
+ return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+}
+
+bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
+ return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+}
+
+AArch64GenInstrInfo::MachineOutlinerInstrType
+AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
+
+ MachineFunction *MF = MI.getParent()->getParent();
+ AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
+
+ // Don't outline LOHs.
+ if (FuncInfo->getLOHRelated().count(&MI))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugValue() || MI.isIndirectDebugValue())
+ return MachineOutlinerInstrType::Invisible;
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+
+ // Is this the end of a function?
+ if (MI.getParent()->succ_empty())
+ return MachineOutlinerInstrType::Legal;
+
+ // It's not, so don't outline it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ // Don't outline positions.
+ if (MI.isPosition())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Make sure none of the operands are un-outlinable.
+ for (const MachineOperand &MOP : MI.operands())
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Don't outline anything that uses the link register.
+ if (MI.modifiesRegister(AArch64::LR, &RI) ||
+ MI.readsRegister(AArch64::LR, &RI))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Does this use the stack?
+ if (MI.modifiesRegister(AArch64::SP, &RI) ||
+ MI.readsRegister(AArch64::SP, &RI)) {
+
+ // Is it a memory operation?
+ if (MI.mayLoadOrStore()) {
+ unsigned Base; // Filled with the base regiser of MI.
+ int64_t Offset; // Filled with the offset of MI.
+ unsigned DummyWidth;
+
+ // Does it allow us to offset the base register and is the base SP?
+ if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
+ Base != AArch64::SP)
+ return MachineOutlinerInstrType::Illegal;
+
+ // Find the minimum/maximum offset for this instruction and check if
+ // fixing it up would be in range.
+ int64_t MinOffset, MaxOffset;
+ unsigned DummyScale;
+ getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset,
+ MaxOffset);
+
+ // TODO: We should really test what happens if an instruction overflows.
+ // This is tricky to test with IR tests, but when the outliner is moved
+ // to a MIR test, it really ought to be checked.
+ if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
+ return MachineOutlinerInstrType::Illegal;
+
+ // It's in range, so we can outline it.
+ return MachineOutlinerInstrType::Legal;
+ }
+
+ // We can't fix it up, so don't outline it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ return MachineOutlinerInstrType::Legal;
+}
+
+void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
+ for (MachineInstr &MI : MBB) {
+ unsigned Base, Width;
+ int64_t Offset;
+
+ // Is this a load or store with an immediate offset with SP as the base?
+ if (!MI.mayLoadOrStore() ||
+ !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
+ Base != AArch64::SP)
+ continue;
+
+ // It is, so we have to fix it up.
+ unsigned Scale;
+ int64_t Dummy1, Dummy2;
+
+ MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
+ assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
+ getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
+ assert(Scale != 0 && "Unexpected opcode!");
+
+ // We've pushed the return address to the stack, so add 16 to the offset.
+ // This is safe, since we already checked if it would overflow when we
+ // checked if this instruction was legal to outline.
+ int64_t NewImm = (Offset + 16)/Scale;
+ StackOffsetOperand.setImm(NewImm);
+ }
+}
+
+void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+
+ // If this is a tail call outlined function, then there's already a return.
+ if (IsTailCall)
+ return;
+
+ // It's not a tail call, so we have to insert the return ourselves.
+ MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
+ .addReg(AArch64::LR, RegState::Undef);
+ MBB.insert(MBB.end(), ret);
+
+ // Walk over the basic block and fix up all the stack accesses.
+ fixupPostOutline(MBB);
+}
+
+void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {}
+
+MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
+ Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
+ MachineFunction &MF, bool IsTailCall) const {
+
+ // Are we tail calling?
+ if (IsTailCall) {
+ // If yes, then we can just branch to the label.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(AArch64::B))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ return It;
+ }
+
+ // We're not tail calling, so we have to save LR before the call and restore
+ // it after.
+ MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ It = MBB.insert(It, STRXpre);
+ It++;
+
+ // Insert the call.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(AArch64::BL))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+
+ It++;
+
+ // Restore the link register.
+ MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(AArch64::LR)
+ .addReg(AArch64::SP)
+ .addImm(16);
+ It = MBB.insert(It, LDRXpost);
+
+ return It;
+}
+
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 5037866925d3..bacce441f6c5 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -133,12 +133,19 @@ public:
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;
+ /// Return the immediate offset of the base register in a load/store \p LdSt.
+ MachineOperand &getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const;
+
+ /// \brief Returns true if opcode \p Opc is a memory operation. If it is, set
+ /// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
+ ///
+ /// For unscaled instructions, \p Scale is set to 1.
+ bool getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width,
+ int64_t &MinOffset, int64_t &MaxOffset) const;
+
bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt,
unsigned NumLoads) const override;
- bool shouldScheduleAdjacent(const MachineInstr &First,
- const MachineInstr &Second) const override;
-
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
uint64_t Offset, const MDNode *Var,
const MDNode *Expr,
@@ -245,7 +252,33 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const override;
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
+ unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
+ bool CanBeTailCall) const override;
+ AArch64GenInstrInfo::MachineOutlinerInstrType
+ getOutliningType(MachineInstr &MI) const override;
+ void insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+ void insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool isTailCall) const override;
+ MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+ /// Returns true if the instruction has a shift by immediate that can be
+ /// executed in one cycle less.
+ bool isFalkorLSLFast(const MachineInstr &MI) const;
private:
+
+ /// \brief Sets the offsets on outlined instructions in \p MBB which use SP
+ /// so that they will be valid post-outlining.
+ ///
+ /// \param MBB A \p MachineBasicBlock in an outlined function.
+ void fixupPostOutline(MachineBasicBlock &MBB) const;
+
void instantiateCondBranch(MachineBasicBlock &MBB, const DebugLoc &DL,
MachineBasicBlock *TBB,
ArrayRef<MachineOperand> Cond) const;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 2244baacca17..4449412532f3 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -30,6 +30,8 @@ def HasLSE : Predicate<"Subtarget->hasLSE()">,
AssemblerPredicate<"FeatureLSE", "lse">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
AssemblerPredicate<"FeatureRAS", "ras">;
+def HasRDM : Predicate<"Subtarget->hasRDM()">,
+ AssemblerPredicate<"FeatureRDM", "rdm">;
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
@@ -41,6 +43,11 @@ def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def UseAlternateSExtLoadCVTF32
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
+def UseNegativeImmediates
+ : Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
+
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
//
@@ -424,8 +431,10 @@ def MSRpstateImm1 : MSRpstateImm0_1;
def MSRpstateImm4 : MSRpstateImm0_15;
// The thread pointer (on Linux, at least, where this has been implemented) is
-// TPIDR_EL0.
-def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
+// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
+let hasSideEffects = 0 in
+def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
+ [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
@@ -574,31 +583,31 @@ def : Pat<(f64 fpimm:$in),
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
tglobaladdr:$g1, tglobaladdr:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
- tglobaladdr:$g2, 32),
- tglobaladdr:$g1, 16),
- tglobaladdr:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
+ tglobaladdr:$g1, 16),
+ tglobaladdr:$g2, 32),
+ tglobaladdr:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
tblockaddress:$g1, tblockaddress:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
- tblockaddress:$g2, 32),
- tblockaddress:$g1, 16),
- tblockaddress:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
+ tblockaddress:$g1, 16),
+ tblockaddress:$g2, 32),
+ tblockaddress:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
tconstpool:$g1, tconstpool:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
- tconstpool:$g2, 32),
- tconstpool:$g1, 16),
- tconstpool:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
+ tconstpool:$g1, 16),
+ tconstpool:$g2, 32),
+ tconstpool:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
tjumptable:$g1, tjumptable:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48),
- tjumptable:$g2, 32),
- tjumptable:$g1, 16),
- tjumptable:$g0, 0)>;
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
+ tjumptable:$g1, 16),
+ tjumptable:$g2, 32),
+ tjumptable:$g3, 48)>;
//===----------------------------------------------------------------------===//
@@ -3284,7 +3293,7 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
-let Predicates = [HasV8_1a] in {
+let Predicates = [HasRDM] in {
defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
def : Pat<(i32 (int_aarch64_neon_sqadd
@@ -5029,7 +5038,7 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
0),
dsub)))>,
Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
-
+
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index b51473524c72..878dac6bff1e 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -12,17 +12,19 @@
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
-#include "AArch64InstructionSelector.h"
#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "AArch64RegisterBankInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Debug.h"
@@ -36,13 +38,61 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
+namespace {
+
+class AArch64InstructionSelector : public InstructionSelector {
+public:
+ AArch64InstructionSelector(const AArch64TargetMachine &TM,
+ const AArch64Subtarget &STI,
+ const AArch64RegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+
+private:
+ /// tblgen-erated 'select' implementation, used as the initial selector for
+ /// the patterns that don't require complex C++.
+ bool selectImpl(MachineInstr &I) const;
+
+ bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+ bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+
+ bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
+ MachineRegisterInfo &MRI) const;
+
+ bool selectArithImmed(MachineOperand &Root, MachineOperand &Result1,
+ MachineOperand &Result2) const;
+
+ const AArch64TargetMachine &TM;
+ const AArch64Subtarget &STI;
+ const AArch64InstrInfo &TII;
+ const AArch64RegisterInfo &TRI;
+ const AArch64RegisterBankInfo &RBI;
+
+// We declare the temporaries used by selectImpl() in the class to minimize the
+// cost of constructing placeholder values.
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
AArch64InstructionSelector::AArch64InstructionSelector(
const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
const AArch64RegisterBankInfo &RBI)
- : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
- TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+ : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI)
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "AArch64GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
// FIXME: This should be target-independent, inferred from the types declared
// for each class in the bank.
@@ -119,67 +169,34 @@ static bool unsupportedBinOp(const MachineInstr &I,
}
/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
-/// (such as G_OR or G_ADD), appropriate for the register bank \p RegBankID
+/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
/// and of size \p OpSize.
/// \returns \p GenericOpc if the combination is unsupported.
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
unsigned OpSize) {
switch (RegBankID) {
case AArch64::GPRRegBankID:
- if (OpSize <= 32) {
- assert((OpSize == 32 || (GenericOpc != TargetOpcode::G_SDIV &&
- GenericOpc != TargetOpcode::G_UDIV &&
- GenericOpc != TargetOpcode::G_LSHR &&
- GenericOpc != TargetOpcode::G_ASHR)) &&
- "operation should have been legalized before now");
-
+ if (OpSize == 32) {
switch (GenericOpc) {
- case TargetOpcode::G_OR:
- return AArch64::ORRWrr;
- case TargetOpcode::G_XOR:
- return AArch64::EORWrr;
- case TargetOpcode::G_AND:
- return AArch64::ANDWrr;
- case TargetOpcode::G_ADD:
- assert(OpSize != 32 && "s32 G_ADD should have been selected");
- return AArch64::ADDWrr;
- case TargetOpcode::G_SUB:
- return AArch64::SUBWrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVWr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVWr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVWr;
- case TargetOpcode::G_SDIV:
- return AArch64::SDIVWr;
- case TargetOpcode::G_UDIV:
- return AArch64::UDIVWr;
default:
return GenericOpc;
}
} else if (OpSize == 64) {
switch (GenericOpc) {
- case TargetOpcode::G_OR:
- return AArch64::ORRXrr;
- case TargetOpcode::G_XOR:
- return AArch64::EORXrr;
- case TargetOpcode::G_AND:
- return AArch64::ANDXrr;
case TargetOpcode::G_GEP:
return AArch64::ADDXrr;
- case TargetOpcode::G_SUB:
- return AArch64::SUBXrr;
case TargetOpcode::G_SHL:
return AArch64::LSLVXr;
case TargetOpcode::G_LSHR:
return AArch64::LSRVXr;
case TargetOpcode::G_ASHR:
return AArch64::ASRVXr;
- case TargetOpcode::G_SDIV:
- return AArch64::SDIVXr;
- case TargetOpcode::G_UDIV:
- return AArch64::UDIVXr;
default:
return GenericOpc;
}
@@ -473,6 +490,82 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
}
}
+bool AArch64InstructionSelector::selectCompareBranch(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+
+ const unsigned CondReg = I.getOperand(0).getReg();
+ MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ MachineInstr *CCMI = MRI.getVRegDef(CondReg);
+ if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
+ return false;
+
+ unsigned LHS = CCMI->getOperand(2).getReg();
+ unsigned RHS = CCMI->getOperand(3).getReg();
+ if (!getConstantVRegVal(RHS, MRI))
+ std::swap(RHS, LHS);
+
+ const auto RHSImm = getConstantVRegVal(RHS, MRI);
+ if (!RHSImm || *RHSImm != 0)
+ return false;
+
+ const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
+ if (RB.getID() != AArch64::GPRRegBankID)
+ return false;
+
+ const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
+ if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
+ return false;
+
+ const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
+ unsigned CBOpc = 0;
+ if (CmpWidth <= 32)
+ CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
+ else if (CmpWidth == 64)
+ CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
+ else
+ return false;
+
+ auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
+ .addUse(LHS)
+ .addMBB(DestMBB);
+
+ constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectVaStartAAPCS(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ return false;
+}
+
+bool AArch64InstructionSelector::selectVaStartDarwin(
+ MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ unsigned ListReg = I.getOperand(0).getReg();
+
+ unsigned ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+
+ auto MIB =
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
+ .addDef(ArgsAddrReg)
+ .addFrameIndex(FuncInfo->getVarArgsStackIndex())
+ .addImm(0)
+ .addImm(0);
+
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+
+ MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
+ .addUse(ArgsAddrReg)
+ .addUse(ListReg)
+ .addImm(0)
+ .addMemOperand(*I.memoperands_begin());
+
+ constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -549,6 +642,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const unsigned CondReg = I.getOperand(0).getReg();
MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+ if (selectCompareBranch(I, MF, MRI))
+ return true;
+
auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
.addUse(CondReg)
.addImm(/*bit offset=*/0)
@@ -558,6 +654,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
}
+ case TargetOpcode::G_BRINDIRECT: {
+ I.setDesc(TII.get(AArch64::BR));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_CONSTANT: {
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
@@ -629,9 +730,12 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
// FIXME: Is going through int64_t always correct?
ImmOp.ChangeToImmediate(
ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
- } else {
+ } else if (I.getOperand(1).isCImm()) {
uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
I.getOperand(1).ChangeToImmediate(Val);
+ } else if (I.getOperand(1).isImm()) {
+ uint64_t Val = I.getOperand(1).getImm();
+ I.getOperand(1).ChangeToImmediate(Val);
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -686,10 +790,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return false;
}
-#ifndef NDEBUG
- // Sanity-check the pointer register.
+ auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
const unsigned PtrReg = I.getOperand(1).getReg();
+#ifndef NDEBUG
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
+ // Sanity-check the pointer register.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
@@ -706,11 +816,46 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateImm(0));
+ uint64_t Offset = 0;
+ auto *PtrMI = MRI.getVRegDef(PtrReg);
+
+ // Try to fold a GEP into our unsigned immediate addressing mode.
+ if (PtrMI->getOpcode() == TargetOpcode::G_GEP) {
+ if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
+ int64_t Imm = *COff;
+ const unsigned Size = MemTy.getSizeInBits() / 8;
+ const unsigned Scale = Log2_32(Size);
+ if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
+ unsigned Ptr2Reg = PtrMI->getOperand(1).getReg();
+ I.getOperand(1).setReg(Ptr2Reg);
+ PtrMI = MRI.getVRegDef(Ptr2Reg);
+ Offset = Imm / Size;
+ }
+ }
+ }
+
+ // If we haven't folded anything into our addressing mode yet, try to fold
+ // a frame index into the base+offset.
+ if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
+
+ I.addOperand(MachineOperand::CreateImm(Offset));
+
+ // If we're storing a 0, use WZR/XZR.
+ if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
+ if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
+ if (I.getOpcode() == AArch64::STRWui)
+ I.getOperand(0).setReg(AArch64::WZR);
+ else if (I.getOpcode() == AArch64::STRXui)
+ I.getOperand(0).setReg(AArch64::XZR);
+ }
+ }
+
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- case TargetOpcode::G_MUL: {
+ case TargetOpcode::G_SMULH:
+ case TargetOpcode::G_UMULH: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
return false;
@@ -719,48 +864,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID) {
- DEBUG(dbgs() << "G_MUL on bank: " << RB << ", expected: GPR\n");
+ DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
return false;
}
- unsigned ZeroReg;
- unsigned NewOpc;
- if (Ty.isScalar() && Ty.getSizeInBits() <= 32) {
- NewOpc = AArch64::MADDWrrr;
- ZeroReg = AArch64::WZR;
- } else if (Ty == LLT::scalar(64)) {
- NewOpc = AArch64::MADDXrrr;
- ZeroReg = AArch64::XZR;
- } else {
- DEBUG(dbgs() << "G_MUL has type: " << Ty << ", expected: "
- << LLT::scalar(32) << " or " << LLT::scalar(64) << '\n');
+ if (Ty != LLT::scalar(64)) {
+ DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
+ << ", expected: " << LLT::scalar(64) << '\n');
return false;
}
+ unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
+ : AArch64::UMULHrr;
I.setDesc(TII.get(NewOpc));
- I.addOperand(MachineOperand::CreateReg(ZeroReg, /*isDef=*/false));
-
// Now that we selected an opcode, we need to constrain the register
// operands to use appropriate classes.
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
-
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
case TargetOpcode::G_OR:
- case TargetOpcode::G_XOR:
- case TargetOpcode::G_AND:
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR:
- case TargetOpcode::G_SDIV:
- case TargetOpcode::G_UDIV:
- case TargetOpcode::G_ADD:
- case TargetOpcode::G_SUB:
case TargetOpcode::G_GEP: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -783,6 +913,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
+ case TargetOpcode::G_PTR_MASK: {
+ uint64_t Align = I.getOperand(2).getImm();
+ if (Align >= 64 || Align == 0)
+ return false;
+
+ uint64_t Mask = ~((1ULL << Align) - 1);
+ I.setDesc(TII.get(AArch64::ANDXri));
+ I.getOperand(2).setImm(AArch64_AM::encodeLogicalImmediate(Mask, 64));
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
case TargetOpcode::G_PTRTOINT:
case TargetOpcode::G_TRUNC: {
const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
@@ -1026,7 +1167,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
if (Ty == LLT::scalar(32)) {
CSelOpc = AArch64::CSELWr;
- } else if (Ty == LLT::scalar(64)) {
+ } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
CSelOpc = AArch64::CSELXr;
} else {
return false;
@@ -1134,7 +1275,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(Def1Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC1);
+ .addImm(getInvertedCondCode(CC1));
if (CC2 != AArch64CC::AL) {
unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
@@ -1143,7 +1284,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
.addDef(Def2Reg)
.addUse(AArch64::WZR)
.addUse(AArch64::WZR)
- .addImm(CC2);
+ .addImm(getInvertedCondCode(CC2));
MachineInstr &OrMI =
*BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
.addDef(DefReg)
@@ -1159,7 +1300,69 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const {
I.eraseFromParent();
return true;
}
+ case TargetOpcode::G_VASTART:
+ return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
+ : selectVaStartAAPCS(I, MF, MRI);
}
return false;
}
+
+/// SelectArithImmed - Select an immediate value that can be represented as
+/// a 12-bit value shifted left by either 0 or 12. If so, return true with
+/// Val set to the 12-bit value and Shift set to the shifter operand.
+bool AArch64InstructionSelector::selectArithImmed(
+ MachineOperand &Root, MachineOperand &Result1,
+ MachineOperand &Result2) const {
+ MachineInstr &MI = *Root.getParent();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // This function is called from the addsub_shifted_imm ComplexPattern,
+ // which lists [imm] as the list of opcode it's interested in, however
+ // we still need to check whether the operand is actually an immediate
+ // here because the ComplexPattern opcode list is only used in
+ // root-level opcode matching.
+ uint64_t Immed;
+ if (Root.isImm())
+ Immed = Root.getImm();
+ else if (Root.isCImm())
+ Immed = Root.getCImm()->getZExtValue();
+ else if (Root.isReg()) {
+ MachineInstr *Def = MRI.getVRegDef(Root.getReg());
+ if (Def->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+ MachineOperand &Op1 = Def->getOperand(1);
+ if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64)
+ return false;
+ Immed = Op1.getCImm()->getZExtValue();
+ } else
+ return false;
+
+ unsigned ShiftAmt;
+
+ if (Immed >> 12 == 0) {
+ ShiftAmt = 0;
+ } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
+ ShiftAmt = 12;
+ Immed = Immed >> 12;
+ } else
+ return false;
+
+ unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
+ Result1.ChangeToImmediate(Immed);
+ Result1.clearParent();
+ Result2.ChangeToImmediate(ShVal);
+ Result2.clearParent();
+ return true;
+}
+
+namespace llvm {
+InstructionSelector *
+createAArch64InstructionSelector(const AArch64TargetMachine &TM,
+ AArch64Subtarget &Subtarget,
+ AArch64RegisterBankInfo &RBI) {
+ return new AArch64InstructionSelector(TM, Subtarget, RBI);
+}
+}
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.h b/lib/Target/AArch64/AArch64InstructionSelector.h
deleted file mode 100644
index 2c6e5a912fb7..000000000000
--- a/lib/Target/AArch64/AArch64InstructionSelector.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- AArch64InstructionSelector --------------------------------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares the targeting of the InstructionSelector class for
-/// AArch64.
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
-
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-
-namespace llvm {
-
-class AArch64InstrInfo;
-class AArch64RegisterBankInfo;
-class AArch64RegisterInfo;
-class AArch64Subtarget;
-class AArch64TargetMachine;
-
-class AArch64InstructionSelector : public InstructionSelector {
-public:
- AArch64InstructionSelector(const AArch64TargetMachine &TM,
- const AArch64Subtarget &STI,
- const AArch64RegisterBankInfo &RBI);
-
- bool select(MachineInstr &I) const override;
-
-private:
- /// tblgen-erated 'select' implementation, used as the initial selector for
- /// the patterns that don't require complex C++.
- bool selectImpl(MachineInstr &I) const;
-
- const AArch64TargetMachine &TM;
- const AArch64Subtarget &STI;
- const AArch64InstrInfo &TII;
- const AArch64RegisterInfo &TRI;
- const AArch64RegisterBankInfo &RBI;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 83f276a8161b..6e6daf812295 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -13,7 +13,10 @@
//===----------------------------------------------------------------------===//
#include "AArch64LegalizerInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Target/TargetOpcodes.h"
@@ -36,11 +39,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
- for (auto BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
// These operations naturally get the right answer when used on
// GPR32, even if the actual type is narrower.
- for (auto Ty : {s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
+ for (auto Ty : {s32, s64, v2s32, v4s32, v2s64})
setAction({BinOp, Ty}, Legal);
+
+ for (auto Ty : {s1, s8, s16})
+ setAction({BinOp, Ty}, WidenScalar);
}
setAction({G_GEP, p0}, Legal);
@@ -49,7 +55,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
for (auto Ty : {s1, s8, s16, s32})
setAction({G_GEP, 1, Ty}, WidenScalar);
- for (auto BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
+ setAction({G_PTR_MASK, p0}, Legal);
+
+ for (unsigned BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
@@ -57,25 +65,41 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({BinOp, Ty}, WidenScalar);
}
- for (auto BinOp : { G_SREM, G_UREM })
+ for (unsigned BinOp : {G_SREM, G_UREM})
for (auto Ty : { s1, s8, s16, s32, s64 })
setAction({BinOp, Ty}, Lower);
- for (auto Op : { G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULO, G_UMULO }) {
+ for (unsigned Op : {G_SMULO, G_UMULO})
+ setAction({Op, s64}, Lower);
+
+ for (unsigned Op : {G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULH, G_UMULH}) {
for (auto Ty : { s32, s64 })
setAction({Op, Ty}, Legal);
setAction({Op, 1, s1}, Legal);
}
- for (auto BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
for (auto Ty : {s32, s64})
setAction({BinOp, Ty}, Legal);
- setAction({G_FREM, s32}, Libcall);
- setAction({G_FREM, s64}, Libcall);
+ for (unsigned BinOp : {G_FREM, G_FPOW}) {
+ setAction({BinOp, s32}, Libcall);
+ setAction({BinOp, s64}, Libcall);
+ }
- for (auto MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s32, s64, p0}) {
+ setAction({G_INSERT, Ty}, Legal);
+ setAction({G_INSERT, 1, Ty}, Legal);
+ }
+ for (auto Ty : {s1, s8, s16}) {
+ setAction({G_INSERT, Ty}, WidenScalar);
+ setAction({G_INSERT, 1, Ty}, Legal);
+ // FIXME: Can't widen the sources because that violates the constraints on
+ // G_INSERT (It seems entirely reasonable that inputs shouldn't overlap).
+ }
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
setAction({MemOp, Ty}, Legal);
@@ -141,12 +165,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_TRUNC, 1, Ty}, Legal);
// Conversions
- for (auto Ty : { s1, s8, s16, s32, s64 }) {
+ for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 0, Ty}, Legal);
setAction({G_FPTOUI, 0, Ty}, Legal);
setAction({G_SITOFP, 1, Ty}, Legal);
setAction({G_UITOFP, 1, Ty}, Legal);
}
+ for (auto Ty : { s1, s8, s16 }) {
+ setAction({G_FPTOSI, 0, Ty}, WidenScalar);
+ setAction({G_FPTOUI, 0, Ty}, WidenScalar);
+ setAction({G_SITOFP, 1, Ty}, WidenScalar);
+ setAction({G_UITOFP, 1, Ty}, WidenScalar);
+ }
for (auto Ty : { s32, s64 }) {
setAction({G_FPTOSI, 1, Ty}, Legal);
@@ -158,9 +188,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
// Control-flow
for (auto Ty : {s1, s8, s16, s32})
setAction({G_BRCOND, Ty}, Legal);
+ setAction({G_BRINDIRECT, p0}, Legal);
// Select
- for (auto Ty : {s1, s8, s16, s32, s64})
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_SELECT, Ty}, WidenScalar);
+
+ for (auto Ty : {s32, s64, p0})
setAction({G_SELECT, Ty}, Legal);
setAction({G_SELECT, 1, s1}, Legal);
@@ -200,5 +234,82 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() {
setAction({G_BITCAST, 1, LLT::vector(32/EltSize, EltSize)}, Legal);
}
+ setAction({G_VASTART, p0}, Legal);
+
+ // va_list must be a pointer, but most sized types are pretty easy to handle
+ // as the destination.
+ setAction({G_VAARG, 1, p0}, Legal);
+
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({G_VAARG, Ty}, Custom);
+
computeTables();
}
+
+bool AArch64LegalizerInfo::legalizeCustom(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ switch (MI.getOpcode()) {
+ default:
+ // No idea what to do.
+ return false;
+ case TargetOpcode::G_VAARG:
+ return legalizeVaArg(MI, MRI, MIRBuilder);
+ }
+
+ llvm_unreachable("expected switch to return");
+}
+
+bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ MIRBuilder.setInstr(MI);
+ MachineFunction &MF = MIRBuilder.getMF();
+ unsigned Align = MI.getOperand(2).getImm();
+ unsigned Dst = MI.getOperand(0).getReg();
+ unsigned ListPtr = MI.getOperand(1).getReg();
+
+ LLT PtrTy = MRI.getType(ListPtr);
+ LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ const unsigned PtrSize = PtrTy.getSizeInBits() / 8;
+ unsigned List = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildLoad(
+ List, ListPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
+ PtrSize, /* Align = */ PtrSize));
+
+ unsigned DstPtr;
+ if (Align > PtrSize) {
+ // Realign the list to the actual required alignment.
+ unsigned AlignMinus1 = MRI.createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildConstant(AlignMinus1, Align - 1);
+
+ unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(ListTmp, List, AlignMinus1);
+
+ DstPtr = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
+ } else
+ DstPtr = List;
+
+ uint64_t ValSize = MRI.getType(Dst).getSizeInBits() / 8;
+ MIRBuilder.buildLoad(
+ Dst, DstPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOLoad,
+ ValSize, std::max(Align, PtrSize)));
+
+ unsigned SizeReg = MRI.createGenericVirtualRegister(IntPtrTy);
+ MIRBuilder.buildConstant(SizeReg, alignTo(ValSize, PtrSize));
+
+ unsigned NewList = MRI.createGenericVirtualRegister(PtrTy);
+ MIRBuilder.buildGEP(NewList, DstPtr, SizeReg);
+
+ MIRBuilder.buildStore(
+ NewList, ListPtr,
+ *MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
+ PtrSize, /* Align = */ PtrSize));
+
+ MI.eraseFromParent();
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.h b/lib/Target/AArch64/AArch64LegalizerInfo.h
index feacbef9f147..42d4ac130c5c 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.h
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -25,6 +25,13 @@ class LLVMContext;
class AArch64LegalizerInfo : public LegalizerInfo {
public:
AArch64LegalizerInfo();
+
+ bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const override;
+
+private:
+ bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const;
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8e312dcf276f..976498aa70d6 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -16,19 +16,29 @@
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+
using namespace llvm;
#define DEBUG_TYPE "aarch64-ldst-opt"
@@ -58,15 +68,15 @@ typedef struct LdStPairFlags {
// If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
- bool MergeForward;
+ bool MergeForward = false;
// SExtIdx gives the index of the result of the load pair that must be
// extended. The value of SExtIdx assumes that the paired load produces the
// value in this order: (I, returned iterator), i.e., -1 means no value has
// to be extended, 0 means I, and 1 means the returned iterator.
- int SExtIdx;
+ int SExtIdx = -1;
- LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
+ LdStPairFlags() = default;
void setMergeForward(bool V = true) { MergeForward = V; }
bool getMergeForward() const { return MergeForward; }
@@ -78,10 +88,12 @@ typedef struct LdStPairFlags {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
+
AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
}
+ AliasAnalysis *AA;
const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
const AArch64Subtarget *Subtarget;
@@ -89,6 +101,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Track which registers have been modified and used.
BitVector ModifiedRegs, UsedRegs;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
@@ -162,8 +179,10 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
};
+
char AArch64LoadStoreOpt::ID = 0;
-} // namespace
+
+} // end anonymous namespace
INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
AARCH64_LOAD_STORE_OPT_NAME, false, false)
@@ -246,7 +265,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
default:
if (IsValidLdStrOpc)
*IsValidLdStrOpc = false;
- return UINT_MAX;
+ return std::numeric_limits<unsigned>::max();
case AArch64::STRDui:
case AArch64::STURDi:
case AArch64::STRQui:
@@ -595,7 +614,7 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
MachineInstrBuilder MIB;
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
.addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
- .addOperand(BaseRegOp)
+ .add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*MergeMI));
(void)MIB;
@@ -709,9 +728,9 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
}
MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingPairOpcode(Opc)))
- .addOperand(RegOp0)
- .addOperand(RegOp1)
- .addOperand(BaseRegOp)
+ .add(RegOp0)
+ .add(RegOp1)
+ .add(BaseRegOp)
.addImm(OffsetImm)
.setMemRefs(I->mergeMemRefsWith(*Paired));
@@ -923,7 +942,7 @@ static int alignTo(int Num, int PowOf2) {
}
static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
- const AArch64InstrInfo *TII) {
+ AliasAnalysis *AA) {
// One of the instructions must modify memory.
if (!MIa.mayStore() && !MIb.mayStore())
return false;
@@ -932,14 +951,14 @@ static bool mayAlias(MachineInstr &MIa, MachineInstr &MIb,
if (!MIa.mayLoadOrStore() && !MIb.mayLoadOrStore())
return false;
- return !TII->areMemAccessesTriviallyDisjoint(MIa, MIb);
+ return MIa.mayAlias(AA, MIb, /*UseTBAA*/false);
}
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
- const AArch64InstrInfo *TII) {
+ AliasAnalysis *AA) {
for (MachineInstr *MIb : MemInsns)
- if (mayAlias(MIa, *MIb, TII))
+ if (mayAlias(MIa, *MIb, AA))
return true;
return false;
@@ -997,7 +1016,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
return false;
// If we encounter a store aliased with the load, return early.
- if (MI.mayStore() && mayAlias(LoadMI, MI, TII))
+ if (MI.mayStore() && mayAlias(LoadMI, MI, AA))
return false;
} while (MBBI != B && Count < Limit);
return false;
@@ -1167,7 +1186,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// first.
if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
!(MI.mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
- !mayAlias(MI, MemInsns, TII)) {
+ !mayAlias(MI, MemInsns, AA)) {
Flags.setMergeForward(false);
return MBBI;
}
@@ -1178,7 +1197,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// into the second.
if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
!(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
- !mayAlias(FirstMI, MemInsns, TII)) {
+ !mayAlias(FirstMI, MemInsns, AA)) {
Flags.setMergeForward(true);
return MBBI;
}
@@ -1233,19 +1252,19 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
if (!isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(*Update))
- .addOperand(getLdStRegOp(*I))
- .addOperand(getLdStBaseOp(*I))
+ .add(getLdStRegOp(*Update))
+ .add(getLdStRegOp(*I))
+ .add(getLdStBaseOp(*I))
.addImm(Value)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
} else {
// Paired instruction.
int Scale = getMemScale(*I);
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
- .addOperand(getLdStRegOp(*Update))
- .addOperand(getLdStRegOp(*I, 0))
- .addOperand(getLdStRegOp(*I, 1))
- .addOperand(getLdStBaseOp(*I))
+ .add(getLdStRegOp(*Update))
+ .add(getLdStRegOp(*I, 0))
+ .add(getLdStRegOp(*I, 1))
+ .add(getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands_begin(), I->memoperands_end());
}
@@ -1545,7 +1564,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
case AArch64::LDURBBi:
case AArch64::LDURHHi:
case AArch64::LDURWi:
- case AArch64::LDURXi: {
+ case AArch64::LDURXi:
if (tryToPromoteLoadFromStore(MBBI)) {
Modified = true;
break;
@@ -1553,7 +1572,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
break;
}
- }
}
// 2) Merge adjacent zero stores into a wider store.
// e.g.,
@@ -1722,6 +1740,7 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
TRI = Subtarget->getRegisterInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
// Resize the modified and used register bitfield trackers. We do this once
// per function and then clear the bitfield each time we optimize a load or
diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp
new file mode 100644
index 000000000000..a6926a6700e1
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -0,0 +1,272 @@
+//===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file This file contains the AArch64 implementation of the DAG scheduling mutation
+// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MacroFusion.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+namespace {
+
+/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given an anchor instr, when the other instr is unspecified, then
+/// check if the anchor instr may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr *SecondMI) {
+ assert((FirstMI || SecondMI) && "At least one instr must be specified");
+
+ const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);
+ const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
+
+ // Assume wildcards for unspecified instrs.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode =
+ SecondMI ? SecondMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+
+ if (ST.hasArithmeticBccFusion())
+ // Fuse CMN, CMP, TST followed by Bcc.
+ if (SecondOpcode == AArch64::Bcc)
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case AArch64::ADDSWri:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXri:
+ case AArch64::ADDSXrr:
+ case AArch64::ANDSWri:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSXri:
+ case AArch64::ANDSXrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXri:
+ case AArch64::SUBSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ return true;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !II.hasShiftedReg(*FirstMI);
+ case AArch64::INSTRUCTION_LIST_END:
+ return true;
+ }
+
+ if (ST.hasArithmeticCbzFusion())
+ // Fuse ALU operations followed by CBZ/CBNZ.
+ if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
+ SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX)
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case AArch64::ADDWri:
+ case AArch64::ADDWrr:
+ case AArch64::ADDXri:
+ case AArch64::ADDXrr:
+ case AArch64::ANDWri:
+ case AArch64::ANDWrr:
+ case AArch64::ANDXri:
+ case AArch64::ANDXrr:
+ case AArch64::EORWri:
+ case AArch64::EORWrr:
+ case AArch64::EORXri:
+ case AArch64::EORXrr:
+ case AArch64::ORRWri:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXri:
+ case AArch64::ORRXrr:
+ case AArch64::SUBWri:
+ case AArch64::SUBWrr:
+ case AArch64::SUBXri:
+ case AArch64::SUBXrr:
+ return true;
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ // Shift value can be 0 making these behave like the "rr" variant...
+ return !II.hasShiftedReg(*FirstMI);
+ case AArch64::INSTRUCTION_LIST_END:
+ return true;
+ }
+
+ if (ST.hasFuseAES())
+ // Fuse AES crypto operations.
+ switch(FirstOpcode) {
+ // AES encode.
+ case AArch64::AESErr:
+ return SecondOpcode == AArch64::AESMCrr ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // AES decode.
+ case AArch64::AESDrr:
+ return SecondOpcode == AArch64::AESIMCrr ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ }
+
+ if (ST.hasFuseLiterals())
+ // Fuse literal generation operations.
+ switch (FirstOpcode) {
+ // PC relative address.
+ case AArch64::ADRP:
+ return SecondOpcode == AArch64::ADDXri ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // 32 bit immediate.
+ case AArch64::MOVZWi:
+ return (SecondOpcode == AArch64::MOVKWi &&
+ SecondMI->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Lower half of 64 bit immediate.
+ case AArch64::MOVZXi:
+ return (SecondOpcode == AArch64::MOVKXi &&
+ SecondMI->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Upper half of 64 bit immediate.
+ case AArch64::MOVKXi:
+ return FirstMI->getOperand(3).getImm() == 32 &&
+ ((SecondOpcode == AArch64::MOVKXi &&
+ SecondMI->getOperand(3).getImm() == 48) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END);
+ }
+
+ return false;
+}
+
+/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
+ const MachineInstr *AnchorMI = AnchorSU.getInstr();
+ if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient())
+ return false;
+
+ // If the anchor instr is the ExitSU, then consider its predecessors;
+ // otherwise, its successors.
+ bool Preds = (&AnchorSU == &DAG->ExitSU);
+ SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs;
+
+ const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI;
+ const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr;
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorDeps) {
+ // Ignore dependencies that don't enforce ordering.
+ if (Dep.isWeak())
+ continue;
+
+ SUnit &DepSU = *Dep.getSUnit();
+ // Ignore the ExitSU if the dependents are successors.
+ if (!Preds && &DepSU == &DAG->ExitSU)
+ continue;
+
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!DepMI || DepMI->isPseudo() || DepMI->isTransient())
+ continue;
+
+ FirstMI = Preds ? DepMI : AnchorMI;
+ SecondMI = Preds ? AnchorMI : DepMI;
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ continue;
+
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ SUnit &FirstSU = Preds ? DepSU : AnchorSU;
+ SUnit &SecondSU = Preds ? AnchorSU : DepSU;
+ DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+
+ // Adjust the latency between the anchor instr and its
+ // predecessors/successors.
+ for (SDep &IDep : AnchorDeps)
+ if (IDep.getSUnit() == &DepSU)
+ IDep.setLatency(0);
+
+ // Adjust the latency between the dependent instr and its
+ // successors/predecessors.
+ for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds)
+ if (IDep.getSUnit() == &AnchorSU)
+ IDep.setLatency(0);
+
+ DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
+ FirstSU.print(dbgs(), DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), DAG); dbgs() << " / ";
+ dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
+ DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
+
+ ++NumFused;
+ return true;
+ }
+
+ return false;
+}
+
+/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
+class AArch64MacroFusion : public ScheduleDAGMutation {
+public:
+ AArch64MacroFusion() {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+
+ // For each of the SUnits in the scheduling block, try to fuse the instr in it
+ // with one in its successors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(DAG, ISU);
+
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(DAG, DAG->ExitSU);
+}
+
+} // end namespace
+
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () {
+ return EnableMacroFusion ? make_unique<AArch64MacroFusion>() : nullptr;
+}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64MacroFusion.h b/lib/Target/AArch64/AArch64MacroFusion.h
new file mode 100644
index 000000000000..e5efedd9fbfd
--- /dev/null
+++ b/lib/Target/AArch64/AArch64MacroFusion.h
@@ -0,0 +1,29 @@
+//===- AArch64MacroFusion.h - AArch64 Macro Fusion ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// \fileThis file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AArch64 definition of the DAG scheduling mutation
+// to pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+
+//===----------------------------------------------------------------------===//
+// AArch64MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createAArch64MacroFusionDAGMutation());
+/// to AArch64PassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation();
+
+} // llvm
diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 8f45e6a80a36..f3c8e7e9bdc2 100644
--- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -12,13 +12,14 @@
// CBZW %W0, <BB#2>
// BB#2:
// %W0 = COPY %WZR
-// This pass should be run after register allocation.
+// Similarly, this pass also handles non-zero copies.
+// BB#0:
+// cmp x0, #1
+// b.eq .LBB0_1
+// .LBB0_1:
+// orr x0, xzr, #0x1
//
-// FIXME: This should be extended to handle any constant other than zero. E.g.,
-// cmp w0, #1
-// b.eq .BB1
-// BB1:
-// mov w0, #1
+// This pass should be run after register allocation.
//
// FIXME: This could also be extended to check the whole dominance subtree below
// the comparison if the compile time regression is acceptable.
@@ -26,6 +27,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -43,6 +45,7 @@ namespace {
class AArch64RedundantCopyElimination : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
+ BitVector ClobberedRegs;
public:
static char ID;
@@ -50,6 +53,16 @@ public:
initializeAArch64RedundantCopyEliminationPass(
*PassRegistry::getPassRegistry());
}
+
+ struct RegImm {
+ MCPhysReg Reg;
+ int32_t Imm;
+ RegImm(MCPhysReg Reg, int32_t Imm) : Reg(Reg), Imm(Imm) {}
+ };
+
+ Optional<RegImm> knownRegValInBlock(MachineInstr &CondBr,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &FirstUse);
bool optimizeCopy(MachineBasicBlock *MBB);
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -66,18 +79,120 @@ char AArch64RedundantCopyElimination::ID = 0;
INITIALIZE_PASS(AArch64RedundantCopyElimination, "aarch64-copyelim",
"AArch64 redundant copy elimination pass", false, false)
-static bool guaranteesZeroRegInBlock(MachineInstr &MI, MachineBasicBlock *MBB) {
- unsigned Opc = MI.getOpcode();
+/// Remember what registers the specified instruction modifies.
+static void trackRegDefs(const MachineInstr &MI, BitVector &ClobberedRegs,
+ const TargetRegisterInfo *TRI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isRegMask()) {
+ ClobberedRegs.setBitsNotInMask(MO.getRegMask());
+ continue;
+ }
+
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (!MO.isDef())
+ continue;
+
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ ClobberedRegs.set(*AI);
+ }
+}
+
+/// It's possible to determine the value of a register based on a dominating
+/// condition. To do so, this function checks to see if the basic block \p MBB
+/// is the target to which a conditional branch \p CondBr jumps and whose
+/// equality comparison is against a constant. If so, return a known physical
+/// register and constant value pair. Otherwise, return None.
+Optional<AArch64RedundantCopyElimination::RegImm>
+AArch64RedundantCopyElimination::knownRegValInBlock(
+ MachineInstr &CondBr, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &FirstUse) {
+ unsigned Opc = CondBr.getOpcode();
+
// Check if the current basic block is the target block to which the
// CBZ/CBNZ instruction jumps when its Wt/Xt is zero.
- if ((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
- MBB == MI.getOperand(1).getMBB())
- return true;
- else if ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
- MBB != MI.getOperand(1).getMBB())
- return true;
-
- return false;
+ if (((Opc == AArch64::CBZW || Opc == AArch64::CBZX) &&
+ MBB == CondBr.getOperand(1).getMBB()) ||
+ ((Opc == AArch64::CBNZW || Opc == AArch64::CBNZX) &&
+ MBB != CondBr.getOperand(1).getMBB())) {
+ FirstUse = CondBr;
+ return RegImm(CondBr.getOperand(0).getReg(), 0);
+ }
+
+ // Otherwise, must be a conditional branch.
+ if (Opc != AArch64::Bcc)
+ return None;
+
+ // Must be an equality check (i.e., == or !=).
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)CondBr.getOperand(0).getImm();
+ if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
+ return None;
+
+ MachineBasicBlock *BrTarget = CondBr.getOperand(1).getMBB();
+ if ((CC == AArch64CC::EQ && BrTarget != MBB) ||
+ (CC == AArch64CC::NE && BrTarget == MBB))
+ return None;
+
+ // Stop if we get to the beginning of PredMBB.
+ MachineBasicBlock *PredMBB = *MBB->pred_begin();
+ assert(PredMBB == CondBr.getParent() &&
+ "Conditional branch not in predecessor block!");
+ if (CondBr == PredMBB->begin())
+ return None;
+
+ // Registers clobbered in PredMBB between CondBr instruction and current
+ // instruction being checked in loop.
+ ClobberedRegs.reset();
+
+ // Find compare instruction that sets NZCV used by CondBr.
+ MachineBasicBlock::reverse_iterator RIt = CondBr.getReverseIterator();
+ for (MachineInstr &PredI : make_range(std::next(RIt), PredMBB->rend())) {
+
+ // Track clobbered registers.
+ trackRegDefs(PredI, ClobberedRegs, TRI);
+
+ bool IsCMN = false;
+ switch (PredI.getOpcode()) {
+ default:
+ break;
+
+ // CMN is an alias for ADDS with a dead destination register.
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ IsCMN = true;
+ // CMP is an alias for SUBS with a dead destination register.
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri: {
+ MCPhysReg SrcReg = PredI.getOperand(1).getReg();
+
+ // Must not be a symbolic immediate.
+ if (!PredI.getOperand(2).isImm())
+ return None;
+
+ // The src register must not be modified between the cmp and conditional
+ // branch. This includes a self-clobbering compare.
+ if (ClobberedRegs[SrcReg])
+ return None;
+
+ // We've found the Cmp that sets NZCV.
+ int32_t KnownImm = PredI.getOperand(2).getImm();
+ int32_t Shift = PredI.getOperand(3).getImm();
+ KnownImm <<= Shift;
+ if (IsCMN)
+ KnownImm = -KnownImm;
+ FirstUse = PredI;
+ return RegImm(SrcReg, KnownImm);
+ }
+ }
+
+ // Bail if we see an instruction that defines NZCV that we don't handle.
+ if (PredI.definesRegister(AArch64::NZCV))
+ return None;
+ }
+ return None;
}
bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
@@ -85,79 +200,187 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
if (MBB->pred_size() != 1)
return false;
+ // Check if the predecessor has two successors, implying the block ends in a
+ // conditional branch.
MachineBasicBlock *PredMBB = *MBB->pred_begin();
- MachineBasicBlock::iterator CompBr = PredMBB->getLastNonDebugInstr();
- if (CompBr == PredMBB->end() || PredMBB->succ_size() != 2)
+ if (PredMBB->succ_size() != 2)
+ return false;
+
+ MachineBasicBlock::iterator CondBr = PredMBB->getLastNonDebugInstr();
+ if (CondBr == PredMBB->end())
return false;
- ++CompBr;
+ // Keep track of the earliest point in the PredMBB block where kill markers
+ // need to be removed if a COPY is removed.
+ MachineBasicBlock::iterator FirstUse;
+ // After calling knownRegValInBlock, FirstUse will either point to a CBZ/CBNZ
+ // or a compare (i.e., SUBS). In the latter case, we must take care when
+ // updating FirstUse when scanning for COPY instructions. In particular, if
+ // there's a COPY in between the compare and branch the COPY should not
+ // update FirstUse.
+ bool SeenFirstUse = false;
+ // Registers that contain a known value at the start of MBB.
+ SmallVector<RegImm, 4> KnownRegs;
+
+ MachineBasicBlock::iterator Itr = std::next(CondBr);
do {
- --CompBr;
- if (guaranteesZeroRegInBlock(*CompBr, MBB))
- break;
- } while (CompBr != PredMBB->begin() && CompBr->isTerminator());
+ --Itr;
- // We've not found a CBZ/CBNZ, time to bail out.
- if (!guaranteesZeroRegInBlock(*CompBr, MBB))
- return false;
+ Optional<RegImm> KnownRegImm = knownRegValInBlock(*Itr, MBB, FirstUse);
+ if (KnownRegImm == None)
+ continue;
- unsigned TargetReg = CompBr->getOperand(0).getReg();
- if (!TargetReg)
- return false;
- assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) &&
- "Expect physical register");
+ KnownRegs.push_back(*KnownRegImm);
+
+ // Reset the clobber list, which is used by knownRegValInBlock.
+ ClobberedRegs.reset();
+
+ // Look backward in PredMBB for COPYs from the known reg to find other
+ // registers that are known to be a constant value.
+ for (auto PredI = Itr;; --PredI) {
+ if (FirstUse == PredI)
+ SeenFirstUse = true;
+
+ if (PredI->isCopy()) {
+ MCPhysReg CopyDstReg = PredI->getOperand(0).getReg();
+ MCPhysReg CopySrcReg = PredI->getOperand(1).getReg();
+ for (auto &KnownReg : KnownRegs) {
+ if (ClobberedRegs[KnownReg.Reg])
+ continue;
+ // If we have X = COPY Y, and Y is known to be zero, then now X is
+ // known to be zero.
+ if (CopySrcReg == KnownReg.Reg && !ClobberedRegs[CopyDstReg]) {
+ KnownRegs.push_back(RegImm(CopyDstReg, KnownReg.Imm));
+ if (SeenFirstUse)
+ FirstUse = PredI;
+ break;
+ }
+ // If we have X = COPY Y, and X is known to be zero, then now Y is
+ // known to be zero.
+ if (CopyDstReg == KnownReg.Reg && !ClobberedRegs[CopySrcReg]) {
+ KnownRegs.push_back(RegImm(CopySrcReg, KnownReg.Imm));
+ if (SeenFirstUse)
+ FirstUse = PredI;
+ break;
+ }
+ }
+ }
+
+ // Stop if we get to the beginning of PredMBB.
+ if (PredI == PredMBB->begin())
+ break;
+
+ trackRegDefs(*PredI, ClobberedRegs, TRI);
+ // Stop if all of the known-zero regs have been clobbered.
+ if (all_of(KnownRegs, [&](RegImm KnownReg) {
+ return ClobberedRegs[KnownReg.Reg];
+ }))
+ break;
+ }
+ break;
+
+ } while (Itr != PredMBB->begin() && Itr->isTerminator());
- // Remember all registers aliasing with TargetReg.
- SmallSetVector<unsigned, 8> TargetRegs;
- for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI)
- TargetRegs.insert(*AI);
+ // We've not found a registers with a known value, time to bail out.
+ if (KnownRegs.empty())
+ return false;
bool Changed = false;
+ // UsedKnownRegs is the set of KnownRegs that have had uses added to MBB.
+ SmallSetVector<unsigned, 4> UsedKnownRegs;
MachineBasicBlock::iterator LastChange = MBB->begin();
- unsigned SmallestDef = TargetReg;
- // Remove redundant Copy instructions unless TargetReg is modified.
+ // Remove redundant Copy instructions unless KnownReg is modified.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = &*I;
++I;
- if (MI->isCopy() && MI->getOperand(0).isReg() &&
- MI->getOperand(1).isReg()) {
-
- unsigned DefReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
-
- if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) &&
- !MRI->isReserved(DefReg) &&
- (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) {
- DEBUG(dbgs() << "Remove redundant Copy : ");
- DEBUG((MI)->print(dbgs()));
-
- MI->eraseFromParent();
- Changed = true;
- LastChange = I;
- NumCopiesRemoved++;
- SmallestDef =
- TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef;
- continue;
+ bool RemovedMI = false;
+ bool IsCopy = MI->isCopy();
+ bool IsMoveImm = MI->isMoveImmediate();
+ if (IsCopy || IsMoveImm) {
+ MCPhysReg DefReg = MI->getOperand(0).getReg();
+ MCPhysReg SrcReg = IsCopy ? MI->getOperand(1).getReg() : 0;
+ int64_t SrcImm = IsMoveImm ? MI->getOperand(1).getImm() : 0;
+ if (!MRI->isReserved(DefReg) &&
+ ((IsCopy && (SrcReg == AArch64::XZR || SrcReg == AArch64::WZR)) ||
+ IsMoveImm)) {
+ for (RegImm &KnownReg : KnownRegs) {
+ if (KnownReg.Reg != DefReg &&
+ !TRI->isSuperRegister(DefReg, KnownReg.Reg))
+ continue;
+
+ // For a copy, the known value must be a zero.
+ if (IsCopy && KnownReg.Imm != 0)
+ continue;
+
+ if (IsMoveImm) {
+ // For a move immediate, the known immediate must match the source
+ // immediate.
+ if (KnownReg.Imm != SrcImm)
+ continue;
+
+ // Don't remove a move immediate that implicitly defines the upper
+ // bits when only the lower 32 bits are known.
+ MCPhysReg CmpReg = KnownReg.Reg;
+ if (any_of(MI->implicit_operands(), [CmpReg](MachineOperand &O) {
+ return !O.isDead() && O.isReg() && O.isDef() &&
+ O.getReg() != CmpReg;
+ }))
+ continue;
+ }
+
+ if (IsCopy)
+ DEBUG(dbgs() << "Remove redundant Copy : " << *MI);
+ else
+ DEBUG(dbgs() << "Remove redundant Move : " << *MI);
+
+ MI->eraseFromParent();
+ Changed = true;
+ LastChange = I;
+ NumCopiesRemoved++;
+ UsedKnownRegs.insert(KnownReg.Reg);
+ RemovedMI = true;
+ break;
+ }
}
}
- if (MI->modifiesRegister(TargetReg, TRI))
+ // Skip to the next instruction if we removed the COPY/MovImm.
+ if (RemovedMI)
+ continue;
+
+ // Remove any regs the MI clobbers from the KnownConstRegs set.
+ for (unsigned RI = 0; RI < KnownRegs.size();)
+ if (MI->modifiesRegister(KnownRegs[RI].Reg, TRI)) {
+ std::swap(KnownRegs[RI], KnownRegs[KnownRegs.size() - 1]);
+ KnownRegs.pop_back();
+ // Don't increment RI since we need to now check the swapped-in
+ // KnownRegs[RI].
+ } else {
+ ++RI;
+ }
+
+ // Continue until the KnownRegs set is empty.
+ if (KnownRegs.empty())
break;
}
if (!Changed)
return false;
- // Otherwise, we have to fixup the use-def chain, starting with the
- // CBZ/CBNZ. Conservatively mark as much as we can live.
- CompBr->clearRegisterKills(SmallestDef, TRI);
+ // Add newly used regs to the block's live-in list if they aren't there
+ // already.
+ for (MCPhysReg KnownReg : UsedKnownRegs)
+ if (!MBB->isLiveIn(KnownReg))
+ MBB->addLiveIn(KnownReg);
- if (none_of(TargetRegs, [&](unsigned Reg) { return MBB->isLiveIn(Reg); }))
- MBB->addLiveIn(TargetReg);
-
- // Clear any kills of TargetReg between CompBr and the last removed COPY.
+ // Clear kills in the range where changes were made. This is conservative,
+ // but should be okay since kill markers are being phased out.
+ DEBUG(dbgs() << "Clearing kill flags.\n\tFirstUse: " << *FirstUse
+ << "\tLastChange: " << *LastChange);
+ for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end()))
+ MMI.clearKillInfo();
for (MachineInstr &MMI : make_range(MBB->begin(), LastChange))
- MMI.clearRegisterKills(SmallestDef, TRI);
+ MMI.clearKillInfo();
return true;
}
@@ -168,6 +391,11 @@ bool AArch64RedundantCopyElimination::runOnMachineFunction(
return false;
TRI = MF.getSubtarget().getRegisterInfo();
MRI = &MF.getRegInfo();
+
+ // Resize the clobber register bitfield tracker. We do this once per
+ // function and then clear the bitfield each time we optimize a copy.
+ ClobberedRegs.resize(TRI->getNumRegs());
+
bool Changed = false;
for (MachineBasicBlock &MBB : MF)
Changed |= optimizeCopy(&MBB);
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index b292c9c87dcd..20a5979f9b4b 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -1,4 +1,4 @@
-//===- AArch64RegisterBankInfo.cpp -------------------------------*- C++ -*-==//
+//===- AArch64RegisterBankInfo.cpp ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,13 +13,24 @@
//===----------------------------------------------------------------------===//
#include "AArch64RegisterBankInfo.h"
-#include "AArch64InstrInfo.h" // For XXXRegClassID.
+#include "AArch64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOpcodes.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+
+#define GET_TARGET_REGBANK_IMPL
+#include "AArch64GenRegisterBank.inc"
// This file will be TableGen'ed at some point.
#include "AArch64GenRegisterBankInfo.def"
@@ -31,7 +42,7 @@ using namespace llvm;
#endif
AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
- : RegisterBankInfo(AArch64::RegBanks, AArch64::NumRegisterBanks) {
+ : AArch64GenRegisterBankInfo() {
static bool AlreadyInit = false;
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
@@ -78,44 +89,21 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
// Check that the TableGen'ed like file is in sync we our expectations.
// First, the Idx.
- assert(AArch64::PartialMappingIdx::PMI_GPR32 ==
- AArch64::PartialMappingIdx::PMI_FirstGPR &&
- "GPR32 index not first in the GPR list");
- assert(AArch64::PartialMappingIdx::PMI_GPR64 ==
- AArch64::PartialMappingIdx::PMI_LastGPR &&
- "GPR64 index not last in the GPR list");
- assert(AArch64::PartialMappingIdx::PMI_FirstGPR <=
- AArch64::PartialMappingIdx::PMI_LastGPR &&
- "GPR list is backward");
- assert(AArch64::PartialMappingIdx::PMI_FPR32 ==
- AArch64::PartialMappingIdx::PMI_FirstFPR &&
- "FPR32 index not first in the FPR list");
- assert(AArch64::PartialMappingIdx::PMI_FPR512 ==
- AArch64::PartialMappingIdx::PMI_LastFPR &&
- "FPR512 index not last in the FPR list");
- assert(AArch64::PartialMappingIdx::PMI_FirstFPR <=
- AArch64::PartialMappingIdx::PMI_LastFPR &&
- "FPR list is backward");
- assert(AArch64::PartialMappingIdx::PMI_FPR32 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR64 &&
- AArch64::PartialMappingIdx::PMI_FPR64 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR128 &&
- AArch64::PartialMappingIdx::PMI_FPR128 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR256 &&
- AArch64::PartialMappingIdx::PMI_FPR256 + 1 ==
- AArch64::PartialMappingIdx::PMI_FPR512 &&
- "FPR indices not properly ordered");
+ assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR,
+ {PMI_GPR32, PMI_GPR64}) &&
+ "PartialMappingIdx's are incorrectly ordered");
+ assert(checkPartialMappingIdx(
+ PMI_FirstFPR, PMI_LastFPR,
+ {PMI_FPR32, PMI_FPR64, PMI_FPR128, PMI_FPR256, PMI_FPR512}) &&
+ "PartialMappingIdx's are incorrectly ordered");
// Now, the content.
// Check partial mapping.
#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB) \
do { \
- const PartialMapping &Map = \
- AArch64::PartMappings[AArch64::PartialMappingIdx::Idx - \
- AArch64::PartialMappingIdx::PMI_Min]; \
- (void)Map; \
- assert(Map.StartIdx == ValStartIdx && Map.Length == ValLength && \
- Map.RegBank == &RB && #Idx " is incorrectly initialized"); \
- } while (0)
+ assert( \
+ checkPartialMap(PartialMappingIdx::Idx, ValStartIdx, ValLength, RB) && \
+ #Idx " is incorrectly initialized"); \
+ } while (false)
CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
@@ -128,17 +116,11 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
// Check value mapping.
#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset) \
do { \
- unsigned PartialMapBaseIdx = \
- AArch64::PartialMappingIdx::PMI_##RBName##Size - \
- AArch64::PartialMappingIdx::PMI_Min; \
- (void)PartialMapBaseIdx; \
- const ValueMapping &Map = AArch64::getValueMapping( \
- AArch64::PartialMappingIdx::PMI_First##RBName, Size)[Offset]; \
- (void)Map; \
- assert(Map.BreakDown == &AArch64::PartMappings[PartialMapBaseIdx] && \
- Map.NumBreakDowns == 1 && #RBName #Size \
- " " #Offset " is incorrectly initialized"); \
- } while (0)
+ assert(checkValueMapImpl(PartialMappingIdx::PMI_##RBName##Size, \
+ PartialMappingIdx::PMI_First##RBName, Size, \
+ Offset) && \
+ #RBName #Size " " #Offset " is incorrectly initialized"); \
+ } while (false)
#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
@@ -157,7 +139,7 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
CHECK_VALUEMAP_IMPL(RBName, Size, 0); \
CHECK_VALUEMAP_IMPL(RBName, Size, 1); \
CHECK_VALUEMAP_IMPL(RBName, Size, 2); \
- } while (0)
+ } while (false)
CHECK_VALUEMAP_3OPS(GPR, 32);
CHECK_VALUEMAP_3OPS(GPR, 64);
@@ -169,24 +151,23 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size) \
do { \
- unsigned PartialMapDstIdx = \
- AArch64::PMI_##RBNameDst##Size - AArch64::PMI_Min; \
- unsigned PartialMapSrcIdx = \
- AArch64::PMI_##RBNameSrc##Size - AArch64::PMI_Min; \
- (void) PartialMapDstIdx; \
- (void) PartialMapSrcIdx; \
- const ValueMapping *Map = AArch64::getCopyMapping( \
- AArch64::PMI_First##RBNameDst == AArch64::PMI_FirstGPR, \
- AArch64::PMI_First##RBNameSrc == AArch64::PMI_FirstGPR, Size); \
- (void) Map; \
- assert(Map[0].BreakDown == &AArch64::PartMappings[PartialMapDstIdx] && \
+ unsigned PartialMapDstIdx = PMI_##RBNameDst##Size - PMI_Min; \
+ unsigned PartialMapSrcIdx = PMI_##RBNameSrc##Size - PMI_Min; \
+ (void)PartialMapDstIdx; \
+ (void)PartialMapSrcIdx; \
+ const ValueMapping *Map = getCopyMapping( \
+ AArch64::RBNameDst##RegBankID, AArch64::RBNameSrc##RegBankID, Size); \
+ (void)Map; \
+ assert(Map[0].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapDstIdx] && \
Map[0].NumBreakDowns == 1 && #RBNameDst #Size \
" Dst is incorrectly initialized"); \
- assert(Map[1].BreakDown == &AArch64::PartMappings[PartialMapSrcIdx] && \
+ assert(Map[1].BreakDown == \
+ &AArch64GenRegisterBankInfo::PartMappings[PartialMapSrcIdx] && \
Map[1].NumBreakDowns == 1 && #RBNameSrc #Size \
" Src is incorrectly initialized"); \
\
- } while (0)
+ } while (false)
CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
@@ -280,12 +261,10 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
break;
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
- /*ID*/ 1, /*Cost*/ 1,
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
+ /*ID*/ 1, /*Cost*/ 1, getValueMapping(PMI_FirstGPR, Size),
/*NumOperands*/ 3);
InstructionMapping FPRMapping(
- /*ID*/ 2, /*Cost*/ 1,
- AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
+ /*ID*/ 2, /*Cost*/ 1, getValueMapping(PMI_FirstFPR, Size),
/*NumOperands*/ 3);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -305,21 +284,21 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
/*ID*/ 1, /*Cost*/ 1,
- AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ true, Size),
+ getCopyMapping(AArch64::GPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping FPRMapping(
/*ID*/ 2, /*Cost*/ 1,
- AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ false, Size),
+ getCopyMapping(AArch64::FPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping GPRToFPRMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
- AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ true, Size),
+ getCopyMapping(AArch64::FPRRegBankID, AArch64::GPRRegBankID, Size),
/*NumOperands*/ 2);
InstructionMapping FPRToGPRMapping(
/*ID*/ 3,
/*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
- AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ false, Size),
+ getCopyMapping(AArch64::GPRRegBankID, AArch64::FPRRegBankID, Size),
/*NumOperands*/ 2);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -341,17 +320,15 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
InstructionMappings AltMappings;
InstructionMapping GPRMapping(
/*ID*/ 1, /*Cost*/ 1,
- getOperandsMapping(
- {AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
- // Addresses are GPR 64-bit.
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+ getOperandsMapping({getValueMapping(PMI_FirstGPR, Size),
+ // Addresses are GPR 64-bit.
+ getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
InstructionMapping FPRMapping(
/*ID*/ 2, /*Cost*/ 1,
- getOperandsMapping(
- {AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
- // Addresses are GPR 64-bit.
- AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+ getOperandsMapping({getValueMapping(PMI_FirstFPR, Size),
+ // Addresses are GPR 64-bit.
+ getValueMapping(PMI_FirstGPR, 64)}),
/*NumOperands*/ 2);
AltMappings.emplace_back(std::move(GPRMapping));
@@ -369,13 +346,12 @@ void AArch64RegisterBankInfo::applyMappingImpl(
switch (OpdMapper.getMI().getOpcode()) {
case TargetOpcode::G_OR:
case TargetOpcode::G_BITCAST:
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_LOAD:
// Those ID must match getInstrAlternativeMappings.
assert((OpdMapper.getInstrMapping().getID() >= 1 &&
OpdMapper.getInstrMapping().getID() <= 4) &&
"Don't know how to handle that ID");
return applyDefaultMapping(OpdMapper);
- }
default:
llvm_unreachable("Don't know how to handle that operation");
}
@@ -411,6 +387,8 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
unsigned Size = Ty.getSizeInBits();
bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
+ PartialMappingIdx RBIdx = IsFPR ? PMI_FirstFPR : PMI_FirstGPR;
+
#ifndef NDEBUG
// Make sure all the operands are using similar size and type.
// Should probably be checked by the machine verifier.
@@ -422,20 +400,19 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
// for each types.
for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
- assert(AArch64::getRegBankBaseIdxOffset(OpTy.getSizeInBits()) ==
- AArch64::getRegBankBaseIdxOffset(Size) &&
- "Operand has incompatible size");
+ assert(
+ AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(
+ RBIdx, OpTy.getSizeInBits()) ==
+ AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(RBIdx, Size) &&
+ "Operand has incompatible size");
bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
(void)OpIsFPR;
assert(IsFPR == OpIsFPR && "Operand has incompatible type");
}
#endif // End NDEBUG.
- AArch64::PartialMappingIdx RBIdx =
- IsFPR ? AArch64::PMI_FirstFPR : AArch64::PMI_FirstGPR;
-
- return InstructionMapping{DefaultMappingID, 1,
- AArch64::getValueMapping(RBIdx, Size), NumOperands};
+ return InstructionMapping{DefaultMappingID, 1, getValueMapping(RBIdx, Size),
+ NumOperands};
}
RegisterBankInfo::InstructionMapping
@@ -485,9 +462,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
const RegisterBank &SrcRB =
SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
- return InstructionMapping{DefaultMappingID, copyCost(DstRB, SrcRB, Size),
- AArch64::getCopyMapping(DstIsGPR, SrcIsGPR, Size),
- /*NumOperands*/ 2};
+ return InstructionMapping{
+ DefaultMappingID, copyCost(DstRB, SrcRB, Size),
+ getCopyMapping(DstRB.getID(), SrcRB.getID(), Size),
+ /*NumOperands*/ 2};
}
case TargetOpcode::G_SEQUENCE:
// FIXME: support this, but the generic code is really not going to do
@@ -501,7 +479,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Track the size and bank of each register. We don't do partial mappings.
SmallVector<unsigned, 4> OpSize(NumOperands);
- SmallVector<AArch64::PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
auto &MO = MI.getOperand(Idx);
if (!MO.isReg())
@@ -513,9 +491,9 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
// For floating-point instructions, scalars go in FPRs.
if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc))
- OpRegBankIdx[Idx] = AArch64::PMI_FirstFPR;
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
else
- OpRegBankIdx[Idx] = AArch64::PMI_FirstGPR;
+ OpRegBankIdx[Idx] = PMI_FirstGPR;
}
unsigned Cost = 1;
@@ -523,49 +501,50 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// fine-tune the computed mapping.
switch (Opc) {
case TargetOpcode::G_SITOFP:
- case TargetOpcode::G_UITOFP: {
- OpRegBankIdx = {AArch64::PMI_FirstFPR, AArch64::PMI_FirstGPR};
+ case TargetOpcode::G_UITOFP:
+ OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
break;
- }
case TargetOpcode::G_FPTOSI:
- case TargetOpcode::G_FPTOUI: {
- OpRegBankIdx = {AArch64::PMI_FirstGPR, AArch64::PMI_FirstFPR};
+ case TargetOpcode::G_FPTOUI:
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
break;
- }
- case TargetOpcode::G_FCMP: {
- OpRegBankIdx = {AArch64::PMI_FirstGPR,
- /* Predicate */ AArch64::PMI_None, AArch64::PMI_FirstFPR,
- AArch64::PMI_FirstFPR};
+ case TargetOpcode::G_FCMP:
+ OpRegBankIdx = {PMI_FirstGPR,
+ /* Predicate */ PMI_None, PMI_FirstFPR, PMI_FirstFPR};
break;
- }
- case TargetOpcode::G_BITCAST: {
+ case TargetOpcode::G_BITCAST:
// This is going to be a cross register bank copy and this is expensive.
if (OpRegBankIdx[0] != OpRegBankIdx[1])
- Cost =
- copyCost(*AArch64::PartMappings[OpRegBankIdx[0]].RegBank,
- *AArch64::PartMappings[OpRegBankIdx[1]].RegBank, OpSize[0]);
+ Cost = copyCost(
+ *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[0]].RegBank,
+ *AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
+ OpSize[0]);
break;
- }
- case TargetOpcode::G_LOAD: {
+ case TargetOpcode::G_LOAD:
// Loading in vector unit is slightly more expensive.
// This is actually only true for the LD1R and co instructions,
// but anyway for the fast mode this number does not matter and
// for the greedy mode the cost of the cross bank copy will
// offset this number.
// FIXME: Should be derived from the scheduling model.
- if (OpRegBankIdx[0] >= AArch64::PMI_FirstFPR)
+ if (OpRegBankIdx[0] >= PMI_FirstFPR)
Cost = 2;
- }
+ break;
}
// Finally construct the computed mapping.
RegisterBankInfo::InstructionMapping Mapping =
InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
- for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
- if (MI.getOperand(Idx).isReg())
- OpdsMapping[Idx] =
- AArch64::getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ if (MI.getOperand(Idx).isReg()) {
+ auto Mapping = getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
+ if (!Mapping->isValid())
+ return InstructionMapping();
+
+ OpdsMapping[Idx] = Mapping;
+ }
+ }
Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
return Mapping;
diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.h b/lib/Target/AArch64/AArch64RegisterBankInfo.h
index f763235049d4..0a795a42c0b1 100644
--- a/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -16,25 +16,78 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#define GET_REGBANK_DECLARATIONS
+#include "AArch64GenRegisterBank.inc"
+
namespace llvm {
class TargetRegisterInfo;
-namespace AArch64 {
-enum {
- GPRRegBankID = 0, /// General Purpose Registers: W, X.
- FPRRegBankID = 1, /// Floating Point/Vector Registers: B, H, S, D, Q.
- CCRRegBankID = 2, /// Conditional register: NZCV.
- NumRegisterBanks
-};
+class AArch64GenRegisterBankInfo : public RegisterBankInfo {
+protected:
+
+ enum PartialMappingIdx {
+ PMI_None = -1,
+ PMI_FPR32 = 1,
+ PMI_FPR64,
+ PMI_FPR128,
+ PMI_FPR256,
+ PMI_FPR512,
+ PMI_GPR32,
+ PMI_GPR64,
+ PMI_FirstGPR = PMI_GPR32,
+ PMI_LastGPR = PMI_GPR64,
+ PMI_FirstFPR = PMI_FPR32,
+ PMI_LastFPR = PMI_FPR512,
+ PMI_Min = PMI_FirstFPR,
+ };
+
+ static RegisterBankInfo::PartialMapping PartMappings[];
+ static RegisterBankInfo::ValueMapping ValMappings[];
+ static PartialMappingIdx BankIDToCopyMapIdx[];
+
+ enum ValueMappingIdx {
+ InvalidIdx = 0,
+ First3OpsIdx = 1,
+ Last3OpsIdx = 19,
+ DistanceBetweenRegBanks = 3,
+ FirstCrossRegCpyIdx = 22,
+ LastCrossRegCpyIdx = 34,
+ DistanceBetweenCrossRegCpy = 2
+ };
+
+ static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx,
+ unsigned ValLength, const RegisterBank &RB);
+ static bool checkValueMapImpl(unsigned Idx, unsigned FirstInBank,
+ unsigned Size, unsigned Offset);
+ static bool checkPartialMappingIdx(PartialMappingIdx FirstAlias,
+ PartialMappingIdx LastAlias,
+ ArrayRef<PartialMappingIdx> Order);
-extern RegisterBank GPRRegBank;
-extern RegisterBank FPRRegBank;
-extern RegisterBank CCRRegBank;
-} // End AArch64 namespace.
+ static unsigned getRegBankBaseIdxOffset(unsigned RBIdx, unsigned Size);
+
+ /// Get the pointer to the ValueMapping representing the RegisterBank
+ /// at \p RBIdx with a size of \p Size.
+ ///
+ /// The returned mapping works for instructions with the same kind of
+ /// operands for up to 3 operands.
+ ///
+ /// \pre \p RBIdx != PartialMappingIdx::None
+ static const RegisterBankInfo::ValueMapping *
+ getValueMapping(PartialMappingIdx RBIdx, unsigned Size);
+
+ /// Get the pointer to the ValueMapping of the operands of a copy
+ /// instruction from the \p SrcBankID register bank to the \p DstBankID
+ /// register bank with a size of \p Size.
+ static const RegisterBankInfo::ValueMapping *
+ getCopyMapping(unsigned DstBankID, unsigned SrcBankID, unsigned Size);
+
+#define GET_TARGET_REGBANK_CLASS
+#include "AArch64GenRegisterBank.inc"
+};
/// This class provides the information for the target register banks.
-class AArch64RegisterBankInfo final : public RegisterBankInfo {
+class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
diff --git a/lib/Target/AArch64/AArch64RegisterBanks.td b/lib/Target/AArch64/AArch64RegisterBanks.td
new file mode 100644
index 000000000000..c2b6c0b04e9b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64RegisterBanks.td
@@ -0,0 +1,20 @@
+//=- AArch64RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: W, X.
+def GPRRegBank : RegisterBank<"GPR", [GPR64all]>;
+
+/// Floating Point/Vector Registers: B, H, S, D, Q.
+def FPRRegBank : RegisterBank<"FPR", [QQQQ]>;
+
+/// Conditional register: NZCV.
+def CCRRegBank : RegisterBank<"CCR", [CCR]>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 98fad71aa18a..baf15ac540cf 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -118,25 +118,17 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// FIXME: avoid re-calculating this every time.
BitVector Reserved(getNumRegs());
- markSuperRegs(Reserved, AArch64::SP);
- markSuperRegs(Reserved, AArch64::XZR);
markSuperRegs(Reserved, AArch64::WSP);
markSuperRegs(Reserved, AArch64::WZR);
- if (TFI->hasFP(MF) || TT.isOSDarwin()) {
- markSuperRegs(Reserved, AArch64::FP);
+ if (TFI->hasFP(MF) || TT.isOSDarwin())
markSuperRegs(Reserved, AArch64::W29);
- }
- if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) {
- markSuperRegs(Reserved, AArch64::X18); // Platform register
- markSuperRegs(Reserved, AArch64::W18);
- }
+ if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved())
+ markSuperRegs(Reserved, AArch64::W18); // Platform register
- if (hasBasePointer(MF)) {
- markSuperRegs(Reserved, AArch64::X19);
+ if (hasBasePointer(MF))
markSuperRegs(Reserved, AArch64::W19);
- }
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index 93ca079275c8..18d000ace94c 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -13,7 +13,7 @@
// ===---------------------------------------------------------------------===//
// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See MCSchedModel.h for details.
+// This works with MachineScheduler. See MCSchedule.h for details.
// Cortex-A53 machine model for scheduling and other instruction cost heuristics.
def CortexA53Model : SchedMachineModel {
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 99c48d0146e4..303398ea0b7f 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -162,7 +162,9 @@ def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
// Cryptography Extensions
// -----------------------------------------------------------------------------
-def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>;
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>;
+def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>;
def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkor.td b/lib/Target/AArch64/AArch64SchedFalkor.td
index 19a6d6f2a1ad..eec089087fe0 100644
--- a/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -17,10 +17,112 @@
// instruction cost model.
def FalkorModel : SchedMachineModel {
- let IssueWidth = 4; // 4-wide issue for expanded uops.
+ let IssueWidth = 8; // 8 uops are dispatched per cycle.
let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer.
let LoopMicroOpBufferSize = 16;
let LoadLatency = 3; // Optimistic load latency.
let MispredictPenalty = 11; // Minimum branch misprediction penalty.
- let CompleteModel = 0;
+ let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Falkor.
+
+let SchedModel = FalkorModel in {
+
+ def FalkorUnitB : ProcResource<1>; // Branch
+ def FalkorUnitLD : ProcResource<1>; // Load pipe
+ def FalkorUnitSD : ProcResource<1>; // Store data
+ def FalkorUnitST : ProcResource<1>; // Store pipe
+ def FalkorUnitX : ProcResource<1>; // Complex arithmetic
+ def FalkorUnitY : ProcResource<1>; // Simple arithmetic
+ def FalkorUnitZ : ProcResource<1>; // Simple arithmetic
+
+ def FalkorUnitVSD : ProcResource<1>; // Vector store data
+ def FalkorUnitVX : ProcResource<1>; // Vector X-pipe
+ def FalkorUnitVY : ProcResource<1>; // Vector Y-pipe
+
+ def FalkorUnitGTOV : ProcResource<1>; // Scalar to Vector
+ def FalkorUnitVTOG : ProcResource<1>; // Vector to Scalar
+
+ // Define the resource groups.
+ def FalkorUnitXY : ProcResGroup<[FalkorUnitX, FalkorUnitY]>;
+ def FalkorUnitXYZ : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ]>;
+ def FalkorUnitXYZB : ProcResGroup<[FalkorUnitX, FalkorUnitY, FalkorUnitZ,
+ FalkorUnitB]>;
+ def FalkorUnitZB : ProcResGroup<[FalkorUnitZ, FalkorUnitB]>;
+ def FalkorUnitVXVY : ProcResGroup<[FalkorUnitVX, FalkorUnitVY]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Falkor.
+
+let SchedModel = FalkorModel in {
+
+def : WriteRes<WriteImm, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteI, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 1; let NumMicroOps = 2; }
+def : WriteRes<WriteIEReg, [FalkorUnitXYZ, FalkorUnitXYZ]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteExtr, [FalkorUnitXYZ, FalkorUnitXYZ]>
+ { let Latency = 2; let NumMicroOps = 2; }
+def : WriteRes<WriteIS, [FalkorUnitXYZ]> { let Latency = 1; }
+def : WriteRes<WriteID32, [FalkorUnitX, FalkorUnitZ]>
+ { let Latency = 8; let NumMicroOps = 2; }
+def : WriteRes<WriteID64, [FalkorUnitX, FalkorUnitZ]>
+ { let Latency = 16; let NumMicroOps = 2; }
+def : WriteRes<WriteIM32, [FalkorUnitX]> { let Latency = 4; }
+def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
+def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
+def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }
+def : WriteRes<WriteST, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 3; let NumMicroOps = 3; }
+def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
+def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 5; }
+def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
+def : WriteRes<WriteSTIdx, [FalkorUnitLD, FalkorUnitST, FalkorUnitSD]>
+ { let Latency = 4; let NumMicroOps = 3; }
+def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 3; let NumMicroOps = 2; }
+def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; }
+def : WriteRes<WriteFCvt, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [FalkorUnitVXVY]> { let Latency = 4; }
+def : WriteRes<WriteFMul, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 6; let NumMicroOps = 2; }
+def : WriteRes<WriteFDiv, [FalkorUnitVXVY, FalkorUnitVXVY]>
+ { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
+def : WriteRes<WriteV, [FalkorUnitVXVY]> { let Latency = 6; }
+def : WriteRes<WriteVLD, [FalkorUnitLD]> { let Latency = 3; }
+def : WriteRes<WriteVST, [FalkorUnitST, FalkorUnitVSD]>
+ { let Latency = 0; let NumMicroOps = 2; }
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 3; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// No forwarding logic is modelled yet.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// Detailed Refinements
+// -----------------------------------------------------------------------------
+include "AArch64SchedFalkorDetails.td"
+
}
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
new file mode 100644
index 000000000000..6bce4ef6b652
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -0,0 +1,523 @@
+//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the uop and latency details for the machine model for the
+// Qualcomm Falkor subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+include "AArch64SchedFalkorWriteRes.td"
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the earlier mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// SIMD Floating-point Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FMULX16, FMULX32)>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FMULX64)>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
+
+def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>;
+
+def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v1i64_indexed$")>;
+def : InstRW<[FalkorWr_2VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v2i64_indexed$")>;
+// SIMD Integer Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQRDML(A|S)?H(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>;
+
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
+
+def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>;
+
+def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>;
+
+def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
+
+def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)(i16|i32)$")>;
+def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)v.*$")>;
+// SIMD Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
+def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[WriteVLD], (instrs LD2i64)>;
+def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>;
+
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>;
+def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>;
+def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>;
+def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>;
+def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>;
+def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>;
+
+def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
+
+// SIMD Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>;
+
+def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
+def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
+
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
+
+def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>;
+
+def : InstRW<[FalkorWr_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>;
+
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
+def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
+
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>;
+def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>;
+
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>;
+def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
+
+// SIMD Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
+def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
+
+def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
+
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
+def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>;
+
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
+
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>;
+def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
+def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
+def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>;
+def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>;
+def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
+def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
+def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
+
+// FP Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
+def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
+def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>;
+def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>;
+def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>;
+def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>;
+
+// FP Data Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>;
+def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>;
+
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>;
+
+def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>;
+
+def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>;
+def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
+
+def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
+def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>;
+// FP Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
+def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>;
+def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hi|Hr|S0|Si|Sr|D0|Di|Dr|v.*_ns)$")>;
+
+def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
+def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>;
+
+
+// Load Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
+
+def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
+def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>;
+def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>;
+
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
+def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
+
+def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>;
+def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
+
+def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
+def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
+def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>;
+def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>;
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
+def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>;
+def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
+def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>;
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1X_4cyc], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+def : InstRW<[FalkorWr_1X_4cyc], (instregex "^M(ADD|SUB)Wrrr$")>;
+
+def : InstRW<[FalkorWr_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
+def : InstRW<[FalkorWr_1X_5cyc], (instregex "^M(ADD|SUB)Xrrr$")>;
+
+def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
+def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>;
+
+def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)(MLAL|MLSL|MULL)v.*$")>;
+
+// Move and Shift Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
+def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
+def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
+
+// Other Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>;
+def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
+def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
+def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
+
+def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
+def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>;
+
+def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
+
+def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
+def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>;
+def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>;
+def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
+
+def : InstRW<[WriteST], (instregex "^LDC.*$")>;
+def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>;
+def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>;
+def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>;
+
+def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>;
+def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>;
+def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
+
+// Store Instructions
+// -----------------------------------------------------------------------------
+def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>;
+def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>;
+def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>;
+def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
+def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>;
+def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
+def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;
+
+def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>;
+
+def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>;
diff --git a/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
new file mode 100644
index 000000000000..9cdb4be4246b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td
@@ -0,0 +1,361 @@
+//=- AArch64SchedFalkorWrRes.td - Falkor Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Falkor specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: FalkorWr
+// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
+// Latency: #cyc
+//
+// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
+// down one Z pipe, six SD pipes, four VX pipes and the total latency is
+// six cycles.
+//
+// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
+//
+// Contains all of the Falkor specific WriteVariant types for immediate zero
+// and LSLFast.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define 1 micro-op types
+
+
+def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
+def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
+def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
+def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
+def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
+def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
+def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
+def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
+def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
+def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
+def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
+def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+
+def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
+def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
+def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
+def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
+def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
+def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
+
+def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
+def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
+def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
+
+def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
+def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
+def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Define 2 micro-op types
+
+def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 8;
+ let ResourceCycles = [2, 8];
+}
+
+def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
+ let Latency = 16;
+ let ResourceCycles = [2, 16];
+}
+
+def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 3 micro-op types
+
+def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitZ]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 4 micro-op types
+
+def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
+ FalkorUnitVX, FalkorUnitVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 5 micro-op types
+
+def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitVXVY]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 6 micro-op types
+
+def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 8 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define 9 micro-op types
+
+def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitXYZ,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
+ FalkorUnitLD, FalkorUnitVXVY,
+ FalkorUnitVXVY, FalkorUnitXYZ,
+ FalkorUnitLD, FalkorUnitLD,
+ FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 4;
+ let NumMicroOps = 9;
+}
+
+// Forwarding logic is modeled for vector multiply and accumulate
+// -----------------------------------------------------------------------------
+def FalkorReadVMA : SchedReadAdvance<2, [FalkorWr_1VXVY_4cyc,
+ FalkorWr_2VXVY_4cyc]>;
+def FalkorReadFMA : SchedReadAdvance<3, [FalkorWr_1VXVY_5cyc,
+ FalkorWr_1VXVY_6cyc,
+ FalkorWr_2VXVY_5cyc,
+ FalkorWr_2VXVY_6cyc]>;
+
+// SchedPredicates and WriteVariants for Immediate Zero and LSLFast
+// -----------------------------------------------------------------------------
+def FalkorImmZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>;
+def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>;
+
+def FalkorWr_FMOV : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
+
+def FalkorWr_MOVZ : SchedWriteVariant<[
+ SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
+
+def FalkorWr_LDR : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
+
+def FalkorWr_ADD : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<FalkorImmZPred, [FalkorWr_1XYZ_1cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
+
+def FalkorWr_PRFM : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1ST_3cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
+
+def FalkorWr_LDRS : SchedWriteVariant<[
+ SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_4cyc]>,
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
diff --git a/lib/Target/AArch64/AArch64SchedKryoDetails.td b/lib/Target/AArch64/AArch64SchedKryoDetails.td
index 426ae6103e4b..02cccccd3078 100644
--- a/lib/Target/AArch64/AArch64SchedKryoDetails.td
+++ b/lib/Target/AArch64/AArch64SchedKryoDetails.td
@@ -776,23 +776,29 @@ def KryoWrite_4cyc_X_X_115ln :
}
def : InstRW<[KryoWrite_4cyc_X_X_115ln],
(instregex "FCVTZ(S|U)(v2f64|v4f32|(v2i64|v4i32)(_shift)?)$")>;
-def KryoWrite_1cyc_XA_Y_noRSV_43ln :
+def KryoWrite_10cyc_XA_Y_noRSV_43ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 10; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_43ln],
- (instrs FDIVDrr, FDIVSrr)>;
-def KryoWrite_1cyc_XA_Y_noRSV_121ln :
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVSrr)>;
+def KryoWrite_14cyc_XA_Y_noRSV_43ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 14; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_121ln],
+def : InstRW<[KryoWrite_14cyc_XA_Y_noRSV_43ln],
+ (instrs FDIVDrr)>;
+def KryoWrite_10cyc_XA_Y_noRSV_121ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 10; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_10cyc_XA_Y_noRSV_121ln],
(instrs FDIVv2f32)>;
-def KryoWrite_1cyc_XA_Y_XA_Y_123ln :
+def KryoWrite_14cyc_XA_Y_XA_Y_123ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 4;
+ let Latency = 14; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_123ln],
+def : InstRW<[KryoWrite_14cyc_XA_Y_XA_Y_123ln],
(instrs FDIVv2f64, FDIVv4f32)>;
def KryoWrite_5cyc_X_noRSV_55ln :
SchedWriteRes<[KryoUnitX]> {
@@ -968,24 +974,36 @@ def KryoWrite_2cyc_XY_XY_109ln :
}
def : InstRW<[KryoWrite_2cyc_XY_XY_109ln],
(instregex "FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)")>;
-def KryoWrite_1cyc_XA_Y_noRSV_42ln :
+def KryoWrite_12cyc_XA_Y_noRSV_42ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 12; let NumMicroOps = 3;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_42ln],
- (instregex "FSQRT(S|D)r")>;
-def KryoWrite_1cyc_XA_Y_noRSV_120ln :
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTSr)>;
+def KryoWrite_21cyc_XA_Y_noRSV_42ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 3;
+ let Latency = 21; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_21cyc_XA_Y_noRSV_42ln],
+ (instrs FSQRTDr)>;
+def KryoWrite_12cyc_XA_Y_noRSV_120ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY]> {
+ let Latency = 12; let NumMicroOps = 3;
+}
+def : InstRW<[KryoWrite_12cyc_XA_Y_noRSV_120ln],
+ (instrs FSQRTv2f32)>;
+def KryoWrite_21cyc_XA_Y_XA_Y_122ln :
+ SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
+ let Latency = 21; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_noRSV_120ln],
- (instregex "FSQRTv2f32")>;
-def KryoWrite_1cyc_XA_Y_XA_Y_122ln :
+def : InstRW<[KryoWrite_21cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv4f32)>;
+def KryoWrite_36cyc_XA_Y_XA_Y_122ln :
SchedWriteRes<[KryoUnitXA, KryoUnitY, KryoUnitXA, KryoUnitY]> {
- let Latency = 1; let NumMicroOps = 4;
+ let Latency = 36; let NumMicroOps = 4;
}
-def : InstRW<[KryoWrite_1cyc_XA_Y_XA_Y_122ln],
- (instregex "FSQRT(v2f64|v4f32)")>;
+def : InstRW<[KryoWrite_36cyc_XA_Y_XA_Y_122ln],
+ (instrs FSQRTv2f64)>;
def KryoWrite_1cyc_X_201ln :
SchedWriteRes<[KryoUnitX]> {
let Latency = 1; let NumMicroOps = 1;
diff --git a/lib/Target/AArch64/AArch64SchedM1.td b/lib/Target/AArch64/AArch64SchedM1.td
index 14d6891253fa..3fbbc0be682d 100644
--- a/lib/Target/AArch64/AArch64SchedM1.td
+++ b/lib/Target/AArch64/AArch64SchedM1.td
@@ -366,7 +366,8 @@ def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
// Cryptography instructions.
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
-def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>;
+def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
+def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
diff --git a/lib/Target/AArch64/AArch64SchedThunderX.td b/lib/Target/AArch64/AArch64SchedThunderX.td
new file mode 100644
index 000000000000..9a0cb702518d
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -0,0 +1,352 @@
+//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM ThunderX T8X
+// (T88, T81, T83) processors.
+// Loosely based on Cortex-A53 which is somewhat similar.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
+
+// Cavium ThunderX T8X scheduling machine model.
+def ThunderXT8XModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops dispatched per cycle.
+ let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order.
+ let LoadLatency = 3; // Optimistic load latency.
+ let MispredictPenalty = 8; // Branch mispredict penalty.
+ let PostRAScheduler = 1; // Use PostRA scheduler.
+ let CompleteModel = 1;
+}
+
+// Modeling each pipeline with BufferSize == 0 since T8X is in-order.
+def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
+def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch
+def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU
+def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types mapping the ProcResources and
+// latencies.
+
+let SchedModel = ThunderXT8XModel in {
+
+// ALU
+def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; }
+def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
+
+// MAC
+def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+
+// Div
+def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
+ let Latency = 12;
+ let ResourceCycles = [6];
+}
+
+def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
+ let Latency = 14;
+ let ResourceCycles = [8];
+}
+
+// Load
+def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; }
+def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
+
+// Vector Load
+def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
+ let Latency = 8;
+ let ResourceCycles = [3];
+}
+
+def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 6;
+ let ResourceCycles = [1];
+}
+
+def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [7];
+}
+
+def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 12;
+ let ResourceCycles = [8];
+}
+
+def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 13;
+ let ResourceCycles = [9];
+}
+
+// Pre/Post Indexing
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; }
+
+// Vector Store
+def : WriteRes<WriteVST, [THXT8XUnitLdSt]>;
+def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
+
+def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 10;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
+ let Latency = 11;
+ let ResourceCycles = [10];
+}
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [THXT8XUnitBr]>;
+def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>;
+def : WriteRes<WriteBrReg, [THXT8XUnitBr]>;
+def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>;
+def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>;
+def : WriteRes<WriteSys, [THXT8XUnitBr]>;
+def : WriteRes<WriteBarrier, [THXT8XUnitBr]>;
+def : WriteRes<WriteHint, [THXT8XUnitBr]>;
+
+// FP ALU
+def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
+
+def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 12;
+ let ResourceCycles = [9];
+}
+
+def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 22;
+ let ResourceCycles = [19];
+}
+
+def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 17;
+ let ResourceCycles = [14];
+}
+
+def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
+ let Latency = 31;
+ let ResourceCycles = [28];
+}
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding for these reads.
+def : ReadAdvance<ReadExtrHi, 1>;
+def : ReadAdvance<ReadAdrBase, 2>;
+def : ReadAdvance<ReadVLD, 2>;
+
+// FIXME: This needs more targeted benchmarking.
+// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
+// operands are needed one cycle later if and only if they are to be
+// shifted. Otherwise, they too are needed two cycles later. This same
+// ReadAdvance applies to Extended registers as well, even though there is
+// a separate SchedPredicate for them.
+def : ReadAdvance<ReadI, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def THXT8XReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, THXT8XReadISReg>;
+
+def THXT8XReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [THXT8XReadShifted]>,
+ SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, THXT8XReadIEReg>;
+
+// MAC - Operands are generally needed one cycle later in the MAC pipe.
+// Accumulator operands are needed two cycles later.
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
+ WriteISReg, WriteIEReg, WriteIS,
+ WriteID32, WriteID64,
+ WriteIM32, WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRW.
+
+//---
+// Branch
+//---
+def : InstRW<[THXT8XWriteBR], (instregex "^B")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^BL")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^B.*")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
+def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BR")>;
+def : InstRW<[THXT8XWriteBRR], (instregex "^BLR")>;
+
+//---
+// Ret
+//---
+def : InstRW<[THXT8XWriteRET], (instregex "^RET")>;
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[WriteI], (instrs COPY)>;
+
+//---
+// Vector Loads
+//---
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+//---
+// Floating Point MAC, DIV, SQRT
+//---
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
diff --git a/lib/Target/AArch64/AArch64SchedVulcan.td b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 35a40c314bf4..3654eeca530a 100644
--- a/lib/Target/AArch64/AArch64SchedVulcan.td
+++ b/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -1,4 +1,4 @@
-//=- AArch64SchedVulcan.td - Vulcan Scheduling Defs ----------*- tablegen -*-=//
+//=- AArch64SchedThunderX2T99.td - Cavium ThunderX T99 Scheduling ---*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -6,23 +6,23 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-// 1. Introduction
//
-// This file defines the machine model for Broadcom Vulcan to support
-// instruction scheduling and other instruction cost heuristics.
+// This file defines the scheduling model for Cavium ThunderX2T99
+// processors.
+// Based on Broadcom Vulcan.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// 2. Pipeline Description.
-def VulcanModel : SchedMachineModel {
+def ThunderX2T99Model : SchedMachineModel {
let IssueWidth = 4; // 4 micro-ops dispatched at a time.
let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
// Determined via a mix of micro-arch details and experimentation.
- let LoopMicroOpBufferSize = 32;
+ let LoopMicroOpBufferSize = 32;
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
}
@@ -30,155 +30,155 @@ def VulcanModel : SchedMachineModel {
// Define the issue ports.
// Port 0: ALU, FP/SIMD.
-def VulcanP0 : ProcResource<1>;
+def THX2T99P0 : ProcResource<1>;
// Port 1: ALU, FP/SIMD, integer mul/div.
-def VulcanP1 : ProcResource<1>;
+def THX2T99P1 : ProcResource<1>;
// Port 2: ALU, Branch.
-def VulcanP2 : ProcResource<1>;
+def THX2T99P2 : ProcResource<1>;
// Port 3: Store data.
-def VulcanP3 : ProcResource<1>;
+def THX2T99P3 : ProcResource<1>;
// Port 4: Load/store.
-def VulcanP4 : ProcResource<1>;
+def THX2T99P4 : ProcResource<1>;
// Port 5: Load/store.
-def VulcanP5 : ProcResource<1>;
+def THX2T99P5 : ProcResource<1>;
-let SchedModel = VulcanModel in {
+let SchedModel = ThunderX2T99Model in {
// Define groups for the functional units on each issue port. Each group
// created will be used by a WriteRes later on.
//
// NOTE: Some groups only contain one member. This is a way to create names for
// the various functional units that share a single issue port. For example,
-// "VulcanI1" for ALU ops on port 1 and "VulcanF1" for FP ops on port 1.
+// "THX2T99I1" for ALU ops on port 1 and "THX2T99F1" for FP ops on port 1.
// Integer divide and multiply micro-ops only on port 1.
-def VulcanI1 : ProcResGroup<[VulcanP1]>;
+def THX2T99I1 : ProcResGroup<[THX2T99P1]>;
// Branch micro-ops only on port 2.
-def VulcanI2 : ProcResGroup<[VulcanP2]>;
+def THX2T99I2 : ProcResGroup<[THX2T99P2]>;
// ALU micro-ops on ports 0, 1, and 2.
-def VulcanI012 : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2]>;
+def THX2T99I012 : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2]>;
// Crypto FP/SIMD micro-ops only on port 1.
-def VulcanF1 : ProcResGroup<[VulcanP1]>;
+def THX2T99F1 : ProcResGroup<[THX2T99P1]>;
// FP/SIMD micro-ops on ports 0 and 1.
-def VulcanF01 : ProcResGroup<[VulcanP0, VulcanP1]>;
+def THX2T99F01 : ProcResGroup<[THX2T99P0, THX2T99P1]>;
// Store data micro-ops only on port 3.
-def VulcanSD : ProcResGroup<[VulcanP3]>;
+def THX2T99SD : ProcResGroup<[THX2T99P3]>;
// Load/store micro-ops on ports 4 and 5.
-def VulcanLS01 : ProcResGroup<[VulcanP4, VulcanP5]>;
+def THX2T99LS01 : ProcResGroup<[THX2T99P4, THX2T99P5]>;
// 60 entry unified scheduler.
-def VulcanAny : ProcResGroup<[VulcanP0, VulcanP1, VulcanP2,
- VulcanP3, VulcanP4, VulcanP5]> {
+def THX2T99Any : ProcResGroup<[THX2T99P0, THX2T99P1, THX2T99P2,
+ THX2T99P3, THX2T99P4, THX2T99P5]> {
let BufferSize=60;
}
// Define commonly used write types for InstRW specializations.
-// All definitions follow the format: VulcanWrite_<NumCycles>Cyc_<Resources>.
+// All definitions follow the format: THX2T99Write_<NumCycles>Cyc_<Resources>.
// 3 cycles on I1.
-def VulcanWrite_3Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 3; }
+def THX2T99Write_3Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 3; }
// 4 cycles on I1.
-def VulcanWrite_4Cyc_I1 : SchedWriteRes<[VulcanI1]> { let Latency = 4; }
+def THX2T99Write_4Cyc_I1 : SchedWriteRes<[THX2T99I1]> { let Latency = 4; }
// 1 cycle on I0, I1, or I2.
-def VulcanWrite_1Cyc_I012 : SchedWriteRes<[VulcanI012]> { let Latency = 1; }
+def THX2T99Write_1Cyc_I012 : SchedWriteRes<[THX2T99I012]> { let Latency = 1; }
// 5 cycles on F1.
-def VulcanWrite_5Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 5; }
// 7 cycles on F1.
-def VulcanWrite_7Cyc_F1 : SchedWriteRes<[VulcanF1]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F1 : SchedWriteRes<[THX2T99F1]> { let Latency = 7; }
// 4 cycles on F0 or F1.
-def VulcanWrite_4Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 4; }
// 5 cycles on F0 or F1.
-def VulcanWrite_5Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 5; }
// 6 cycles on F0 or F1.
-def VulcanWrite_6Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 6; }
// 7 cycles on F0 or F1.
-def VulcanWrite_7Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 7; }
+def THX2T99Write_7Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 7; }
// 8 cycles on F0 or F1.
-def VulcanWrite_8Cyc_F01 : SchedWriteRes<[VulcanF01]> { let Latency = 8; }
+def THX2T99Write_8Cyc_F01 : SchedWriteRes<[THX2T99F01]> { let Latency = 8; }
// 16 cycles on F0 or F1.
-def VulcanWrite_16Cyc_F01 : SchedWriteRes<[VulcanF01]> {
+def THX2T99Write_16Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
}
// 23 cycles on F0 or F1.
-def VulcanWrite_23Cyc_F01 : SchedWriteRes<[VulcanF01]> {
+def THX2T99Write_23Cyc_F01 : SchedWriteRes<[THX2T99F01]> {
let Latency = 23;
let ResourceCycles = [11];
}
// 1 cycles on LS0 or LS1.
-def VulcanWrite_1Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 1; }
+def THX2T99Write_1Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 1; }
// 4 cycles on LS0 or LS1.
-def VulcanWrite_4Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 4; }
+def THX2T99Write_4Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 4; }
// 5 cycles on LS0 or LS1.
-def VulcanWrite_5Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 5; }
+def THX2T99Write_5Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 5; }
// 6 cycles on LS0 or LS1.
-def VulcanWrite_6Cyc_LS01 : SchedWriteRes<[VulcanLS01]> { let Latency = 6; }
+def THX2T99Write_6Cyc_LS01 : SchedWriteRes<[THX2T99LS01]> { let Latency = 6; }
// 5 cycles on LS0 or LS1 and I0, I1, or I2.
-def VulcanWrite_5Cyc_LS01_I012 : SchedWriteRes<[VulcanLS01, VulcanI012]> {
+def THX2T99Write_5Cyc_LS01_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> {
let Latency = 5;
let NumMicroOps = 2;
}
// 5 cycles on LS0 or LS1 and 2 of I0, I1, or I2.
-def VulcanWrite_6Cyc_LS01_I012_I012 :
- SchedWriteRes<[VulcanLS01, VulcanI012, VulcanI012]> {
+def THX2T99Write_6Cyc_LS01_I012_I012 :
+ SchedWriteRes<[THX2T99LS01, THX2T99I012, THX2T99I012]> {
let Latency = 6;
let NumMicroOps = 3;
}
// 1 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_1Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_1Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 1;
let NumMicroOps = 2;
}
// 5 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_5Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_5Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 5;
let NumMicroOps = 2;
}
// 6 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_6Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_6Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 6;
let NumMicroOps = 2;
}
// 7 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_7Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_7Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 2;
}
// 8 cycles on LS0 or LS1 and F0 or F1.
-def VulcanWrite_8Cyc_LS01_F01 : SchedWriteRes<[VulcanLS01, VulcanF01]> {
+def THX2T99Write_8Cyc_LS01_F01 : SchedWriteRes<[THX2T99LS01, THX2T99F01]> {
let Latency = 8;
let NumMicroOps = 2;
}
@@ -202,7 +202,7 @@ def : ReadAdvance<ReadVLD, 0>;
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
-let SchedModel = VulcanModel in {
+let SchedModel = ThunderX2T99Model in {
//---
// 3.1 Branch Instructions
@@ -211,7 +211,7 @@ let SchedModel = VulcanModel in {
// Branch, immed
// Branch and link, immed
// Compare and branch
-def : WriteRes<WriteBr, [VulcanI2]> { let Latency = 1; }
+def : WriteRes<WriteBr, [THX2T99I2]> { let Latency = 1; }
def : WriteRes<WriteSys, []> { let Latency = 1; }
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
@@ -222,7 +222,7 @@ def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
// Branch, register
// Branch and link, register != LR
// Branch and link, register = LR
-def : WriteRes<WriteBrReg, [VulcanI2]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [THX2T99I2]> { let Latency = 1; }
//---
// 3.2 Arithmetic and Logical Instructions
@@ -233,25 +233,25 @@ def : WriteRes<WriteBrReg, [VulcanI2]> { let Latency = 1; }
// Conditional compare
// Conditional select
// Address generation
-def : WriteRes<WriteI, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteI, [THX2T99I012]> { let Latency = 1; }
def : InstRW<[WriteI], (instrs COPY)>;
// ALU, extend and/or shift
-def : WriteRes<WriteISReg, [VulcanI012]> {
+def : WriteRes<WriteISReg, [THX2T99I012]> {
let Latency = 2;
let ResourceCycles = [2];
}
-def : WriteRes<WriteIEReg, [VulcanI012]> {
+def : WriteRes<WriteIEReg, [THX2T99I012]> {
let Latency = 2;
let ResourceCycles = [2];
}
// Move immed
-def : WriteRes<WriteImm, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteImm, [THX2T99I012]> { let Latency = 1; }
// Variable shift
-def : WriteRes<WriteIS, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteIS, [THX2T99I012]> { let Latency = 1; }
//---
// 3.4 Divide and Multiply Instructions
@@ -259,33 +259,33 @@ def : WriteRes<WriteIS, [VulcanI012]> { let Latency = 1; }
// Divide, W-form
// Latency range of 13-23. Take the average.
-def : WriteRes<WriteID32, [VulcanI1]> {
+def : WriteRes<WriteID32, [THX2T99I1]> {
let Latency = 18;
let ResourceCycles = [18];
}
// Divide, X-form
// Latency range of 13-39. Take the average.
-def : WriteRes<WriteID64, [VulcanI1]> {
+def : WriteRes<WriteID64, [THX2T99I1]> {
let Latency = 26;
let ResourceCycles = [26];
}
// Multiply accumulate, W-form
-def : WriteRes<WriteIM32, [VulcanI012]> { let Latency = 5; }
+def : WriteRes<WriteIM32, [THX2T99I012]> { let Latency = 5; }
// Multiply accumulate, X-form
-def : WriteRes<WriteIM64, [VulcanI012]> { let Latency = 5; }
+def : WriteRes<WriteIM64, [THX2T99I012]> { let Latency = 5; }
// Bitfield extract, two reg
-def : WriteRes<WriteExtr, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteExtr, [THX2T99I012]> { let Latency = 1; }
// Bitfield move, basic
// Bitfield move, insert
// NOTE: Handled by WriteIS.
// Count leading
-def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$",
+def : InstRW<[THX2T99Write_3Cyc_I1], (instregex "^CLS(W|X)r$",
"^CLZ(W|X)r$")>;
// Reverse bits/bytes
@@ -300,13 +300,13 @@ def : InstRW<[VulcanWrite_3Cyc_I1], (instregex "^CLS(W|X)r$",
// Load register, unscaled immed
// Load register, immed unprivileged
// Load register, unsigned immed
-def : WriteRes<WriteLD, [VulcanLS01]> { let Latency = 4; }
+def : WriteRes<WriteLD, [THX2T99LS01]> { let Latency = 4; }
// Load register, immed post-index
// NOTE: Handled by WriteLD, WriteI.
// Load register, immed pre-index
// NOTE: Handled by WriteLD, WriteAdr.
-def : WriteRes<WriteAdr, [VulcanI012]> { let Latency = 1; }
+def : WriteRes<WriteAdr, [THX2T99I012]> { let Latency = 1; }
// Load register offset, basic
// Load register, register offset, scale by 4/8
@@ -314,15 +314,15 @@ def : WriteRes<WriteAdr, [VulcanI012]> { let Latency = 1; }
// Load register offset, extend
// Load register, register offset, extend, scale by 4/8
// Load register, register offset, extend, scale by 2
-def VulcanWriteLDIdx : SchedWriteVariant<[
- SchedVar<ScaledIdxPred, [VulcanWrite_6Cyc_LS01_I012_I012]>,
- SchedVar<NoSchedPred, [VulcanWrite_5Cyc_LS01_I012]>]>;
-def : SchedAlias<WriteLDIdx, VulcanWriteLDIdx>;
+def THX2T99WriteLDIdx : SchedWriteVariant<[
+ SchedVar<ScaledIdxPred, [THX2T99Write_6Cyc_LS01_I012_I012]>,
+ SchedVar<NoSchedPred, [THX2T99Write_5Cyc_LS01_I012]>]>;
+def : SchedAlias<WriteLDIdx, THX2T99WriteLDIdx>;
-def VulcanReadAdrBase : SchedReadVariant<[
+def THX2T99ReadAdrBase : SchedReadVariant<[
SchedVar<ScaledIdxPred, [ReadDefault]>,
SchedVar<NoSchedPred, [ReadDefault]>]>;
-def : SchedAlias<ReadAdrBase, VulcanReadAdrBase>;
+def : SchedAlias<ReadAdrBase, THX2T99ReadAdrBase>;
// Load pair, immed offset, normal
// Load pair, immed offset, signed words, base != SP
@@ -347,7 +347,7 @@ def : WriteRes<WriteLDHi, []> {
// Store register, unscaled immed
// Store register, immed unprivileged
// Store register, unsigned immed
-def : WriteRes<WriteST, [VulcanLS01, VulcanSD]> {
+def : WriteRes<WriteST, [THX2T99LS01, THX2T99SD]> {
let Latency = 1;
let NumMicroOps = 2;
}
@@ -364,14 +364,14 @@ def : WriteRes<WriteST, [VulcanLS01, VulcanSD]> {
// Store register, register offset, extend
// Store register, register offset, extend, scale by 4/8
// Store register, register offset, extend, scale by 1
-def : WriteRes<WriteSTIdx, [VulcanLS01, VulcanSD, VulcanI012]> {
+def : WriteRes<WriteSTIdx, [THX2T99LS01, THX2T99SD, THX2T99I012]> {
let Latency = 1;
let NumMicroOps = 3;
}
// Store pair, immed offset, W-form
// Store pair, immed offset, X-form
-def : WriteRes<WriteSTP, [VulcanLS01, VulcanSD]> {
+def : WriteRes<WriteSTP, [THX2T99LS01, THX2T99SD]> {
let Latency = 1;
let NumMicroOps = 2;
}
@@ -389,35 +389,35 @@ def : WriteRes<WriteSTP, [VulcanLS01, VulcanSD]> {
// FP absolute value
// FP min/max
// FP negate
-def : WriteRes<WriteF, [VulcanF01]> { let Latency = 5; }
+def : WriteRes<WriteF, [THX2T99F01]> { let Latency = 5; }
// FP arithmetic
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADD", "^FSUB")>;
// FP compare
-def : WriteRes<WriteFCmp, [VulcanF01]> { let Latency = 5; }
+def : WriteRes<WriteFCmp, [THX2T99F01]> { let Latency = 5; }
// FP divide, S-form
// FP square root, S-form
-def : WriteRes<WriteFDiv, [VulcanF01]> {
+def : WriteRes<WriteFDiv, [THX2T99F01]> {
let Latency = 16;
let ResourceCycles = [8];
}
// FP divide, D-form
// FP square root, D-form
-def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVDrr, FSQRTDr)>;
// FP multiply
// FP multiply accumulate
-def : WriteRes<WriteFMul, [VulcanF01]> { let Latency = 6; }
+def : WriteRes<WriteFMul, [THX2T99F01]> { let Latency = 6; }
// FP round to integral
-def : InstRW<[VulcanWrite_7Cyc_F01],
+def : InstRW<[THX2T99Write_7Cyc_F01],
(instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
// FP select
-def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>;
+def : InstRW<[THX2T99Write_4Cyc_F01], (instregex "^FCSEL")>;
//---
// 3.9 FP Miscellaneous Instructions
@@ -426,16 +426,16 @@ def : InstRW<[VulcanWrite_4Cyc_F01], (instregex "^FCSEL")>;
// FP convert, from vec to vec reg
// FP convert, from gen to vec reg
// FP convert, from vec to gen reg
-def : WriteRes<WriteFCvt, [VulcanF01]> { let Latency = 7; }
+def : WriteRes<WriteFCvt, [THX2T99F01]> { let Latency = 7; }
// FP move, immed
// FP move, register
-def : WriteRes<WriteFImm, [VulcanF01]> { let Latency = 4; }
+def : WriteRes<WriteFImm, [THX2T99F01]> { let Latency = 4; }
// FP transfer, from gen to vec reg
// FP transfer, from vec to gen reg
-def : WriteRes<WriteFCopy, [VulcanF01]> { let Latency = 4; }
-def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
+def : WriteRes<WriteFCopy, [THX2T99F01]> { let Latency = 4; }
+def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
//---
// 3.12 ASIMD Integer Instructions
@@ -470,39 +470,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [VulcanF01]> { let Latency = 7; }
+def : WriteRes<WriteV, [THX2T99F01]> { let Latency = 7; }
// ASIMD arith, reduce, 4H/4S
// ASIMD arith, reduce, 8B/8H
// ASIMD arith, reduce, 16B
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
// ASIMD logical (MOV, MVN, ORN, ORR)
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ORRv", "^ORNv", "^NOTv")>;
// ASIMD polynomial (8x8) multiply long
-def : InstRW<[VulcanWrite_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instrs PMULLv8i8, PMULLv16i8)>;
//---
// 3.13 ASIMD Floating-point Instructions
//---
// ASIMD FP absolute value
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FABSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FABSv")>;
// ASIMD FP arith, normal, D-form
// ASIMD FP arith, normal, Q-form
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
// ASIMD FP arith,pairwise, D-form
// ASIMD FP arith, pairwise, Q-form
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FADDPv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FADDPv")>;
// ASIMD FP compare, D-form
// ASIMD FP compare, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FACGEv", "^FACGTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
"^FCMGTv", "^FCMLEv",
"^FCMLTv")>;
@@ -513,42 +513,42 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FCMEQv", "^FCMGEv",
// NOTE: Handled by WriteV.
// ASIMD FP divide, D-form, F32
-def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv2f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv2f32)>;
// ASIMD FP divide, Q-form, F32
-def : InstRW<[VulcanWrite_16Cyc_F01], (instrs FDIVv4f32)>;
+def : InstRW<[THX2T99Write_16Cyc_F01], (instrs FDIVv4f32)>;
// ASIMD FP divide, Q-form, F64
-def : InstRW<[VulcanWrite_23Cyc_F01], (instrs FDIVv2f64)>;
+def : InstRW<[THX2T99Write_23Cyc_F01], (instrs FDIVv2f64)>;
// ASIMD FP max/min, normal, D-form
// ASIMD FP max/min, normal, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXv", "^FMAXNMv",
"^FMINv", "^FMINNMv")>;
// ASIMD FP max/min, pairwise, D-form
// ASIMD FP max/min, pairwise, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXPv", "^FMAXNMPv",
"^FMINPv", "^FMINNMPv")>;
// ASIMD FP max/min, reduce
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMAXVv", "^FMAXNMVv",
"^FMINVv", "^FMINNMVv")>;
// ASIMD FP multiply, D-form, FZ
// ASIMD FP multiply, D-form, no FZ
// ASIMD FP multiply, Q-form, FZ
// ASIMD FP multiply, Q-form, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMULv", "^FMULXv")>;
// ASIMD FP multiply accumulate, Dform, FZ
// ASIMD FP multiply accumulate, Dform, no FZ
// ASIMD FP multiply accumulate, Qform, FZ
// ASIMD FP multiply accumulate, Qform, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FMLAv", "^FMLSv")>;
// ASIMD FP negate
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FNEGv")>;
// ASIMD FP round, D-form
// ASIMD FP round, Q-form
@@ -559,39 +559,39 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FNEGv")>;
//--
// ASIMD bit reverse
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^RBITv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^RBITv")>;
// ASIMD bitwise insert, D-form
// ASIMD bitwise insert, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^BIFv", "^BITv", "^BSLv")>;
// ASIMD count, D-form
// ASIMD count, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^CLSv", "^CLZv", "^CNTv")>;
// ASIMD duplicate, gen reg
// ASIMD duplicate, element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^DUPv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^DUPv")>;
// ASIMD extract
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^EXTv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^EXTv")>;
// ASIMD extract narrow
// ASIMD extract narrow, saturating
// NOTE: Handled by WriteV.
// ASIMD insert, element to element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD move, integer immed
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^MOVIv", "^MOVIDv")>;
// ASIMD move, FP immed
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^FMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^FMOVv")>;
// ASIMD reciprocal estimate, D-form
// ASIMD reciprocal estimate, Q-form
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
"^FRSQRTEv", "^URSQRTEv")>;
@@ -599,31 +599,31 @@ def : InstRW<[VulcanWrite_5Cyc_F01],
// ASIMD reciprocal step, D-form, no FZ
// ASIMD reciprocal step, Q-form, FZ
// ASIMD reciprocal step, Q-form, no FZ
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^FRECPSv", "^FRSQRTSv")>;
// ASIMD reverse
-def : InstRW<[VulcanWrite_5Cyc_F01],
+def : InstRW<[THX2T99Write_5Cyc_F01],
(instregex "^REV16v", "^REV32v", "^REV64v")>;
// ASIMD table lookup, D-form
// ASIMD table lookup, Q-form
-def : InstRW<[VulcanWrite_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
+def : InstRW<[THX2T99Write_8Cyc_F01], (instregex "^TBLv", "^TBXv")>;
// ASIMD transfer, element to word or word
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^UMOVv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^UMOVv")>;
// ASIMD transfer, element to gen reg
-def : InstRW<[VulcanWrite_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
+def : InstRW<[THX2T99Write_6Cyc_F01], (instregex "^SMOVv", "^UMOVv")>;
// ASIMD transfer gen reg to element
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^INSv")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^INSv")>;
// ASIMD transpose
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^TRN1v", "^TRN2v",
"^UZP1v", "^UZP2v")>;
// ASIMD unzip/zip
-def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
+def : InstRW<[THX2T99Write_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
//--
// 3.15 ASIMD Load Instructions
@@ -631,114 +631,114 @@ def : InstRW<[VulcanWrite_5Cyc_F01], (instregex "^ZIP1v", "^ZIP2v")>;
// ASIMD load, 1 element, multiple, 1 reg, D-form
// ASIMD load, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[VulcanWrite_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 2 reg, D-form
// ASIMD load, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[VulcanWrite_4Cyc_LS01],
+def : InstRW<[THX2T99Write_4Cyc_LS01],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_4Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_4Cyc_LS01, WriteAdr],
(instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 3 reg, D-form
// ASIMD load, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01],
+def : InstRW<[THX2T99Write_5Cyc_LS01],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01, WriteAdr],
(instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, multiple, 4 reg, D-form
// ASIMD load, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[VulcanWrite_6Cyc_LS01],
+def : InstRW<[THX2T99Write_6Cyc_LS01],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01, WriteAdr],
(instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 1 element, one lane, B/H/S
// ASIMD load, 1 element, one lane, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD1i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1i(8|16|32|64)_POST$")>;
// ASIMD load, 1 element, all lanes, D-form, B/H/S
// ASIMD load, 1 element, all lanes, D-form, D
// ASIMD load, 1 element, all lanes, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, multiple, D-form, B/H/S
// ASIMD load, 2 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 2 element, one lane, B/H
// ASIMD load, 2 element, one lane, S
// ASIMD load, 2 element, one lane, D
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01], (instregex "^LD2i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2i(8|16|32|64)_POST$")>;
// ASIMD load, 2 element, all lanes, D-form, B/H/S
// ASIMD load, 2 element, all lanes, D-form, D
// ASIMD load, 2 element, all lanes, Q-form
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_5Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_5Cyc_LS01_F01, WriteAdr],
(instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, multiple, D-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, B/H/S
// ASIMD load, 3 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 3 element, one lone, B/H
// ASIMD load, 3 element, one lane, S
// ASIMD load, 3 element, one lane, D
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01], (instregex "^LD3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3i(8|16|32|64)_POST$")>;
// ASIMD load, 3 element, all lanes, D-form, B/H/S
// ASIMD load, 3 element, all lanes, D-form, D
// ASIMD load, 3 element, all lanes, Q-form, B/H/S
// ASIMD load, 3 element, all lanes, Q-form, D
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_7Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_7Cyc_LS01_F01, WriteAdr],
(instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, multiple, D-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, B/H/S
// ASIMD load, 4 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_8Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_8Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD load, 4 element, one lane, B/H
// ASIMD load, 4 element, one lane, S
// ASIMD load, 4 element, one lane, D
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01], (instregex "^LD4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4i(8|16|32|64)_POST$")>;
// ASIMD load, 4 element, all lanes, D-form, B/H/S
// ASIMD load, 4 element, all lanes, D-form, D
// ASIMD load, 4 element, all lanes, Q-form, B/H/S
// ASIMD load, 4 element, all lanes, Q-form, D
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_6Cyc_LS01_F01, WriteAdr],
(instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
//--
@@ -747,82 +747,82 @@ def : InstRW<[VulcanWrite_6Cyc_LS01_F01, WriteAdr],
// ASIMD store, 1 element, multiple, 1 reg, D-form
// ASIMD store, 1 element, multiple, 1 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 2 reg, D-form
// ASIMD store, 1 element, multiple, 2 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 3 reg, D-form
// ASIMD store, 1 element, multiple, 3 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, multiple, 4 reg, D-form
// ASIMD store, 1 element, multiple, 4 reg, Q-form
-def : InstRW<[VulcanWrite_1Cyc_LS01],
+def : InstRW<[THX2T99Write_1Cyc_LS01],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01, WriteAdr],
(instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 1 element, one lane, B/H/S
// ASIMD store, 1 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST1i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST1i(8|16|32|64)_POST$")>;
// ASIMD store, 2 element, multiple, D-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, B/H/S
// ASIMD store, 2 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 2 element, one lane, B/H/S
// ASIMD store, 2 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST2i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST2i(8|16|32|64)_POST$")>;
// ASIMD store, 3 element, multiple, D-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, B/H/S
// ASIMD store, 3 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 3 element, one lane, B/H
// ASIMD store, 3 element, one lane, S
// ASIMD store, 3 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST3i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST3i(8|16|32|64)_POST$")>;
// ASIMD store, 4 element, multiple, D-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, B/H/S
// ASIMD store, 4 element, multiple, Q-form, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
// ASIMD store, 4 element, one lane, B/H
// ASIMD store, 4 element, one lane, S
// ASIMD store, 4 element, one lane, D
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
-def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01], (instregex "^ST4i(8|16|32|64)$")>;
+def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr],
(instregex "^ST4i(8|16|32|64)_POST$")>;
//--
@@ -830,23 +830,23 @@ def : InstRW<[VulcanWrite_1Cyc_LS01_F01, WriteAdr],
//--
// Crypto AES ops
-def : InstRW<[VulcanWrite_5Cyc_F1], (instregex "^AES")>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instregex "^AES")>;
// Crypto polynomial (64x64) multiply long
-def : InstRW<[VulcanWrite_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
+def : InstRW<[THX2T99Write_5Cyc_F1], (instrs PMULLv1i64, PMULLv2i64)>;
// Crypto SHA1 xor ops
// Crypto SHA1 schedule acceleration ops
// Crypto SHA256 schedule acceleration op (1 u-op)
// Crypto SHA256 schedule acceleration op (2 u-ops)
// Crypto SHA256 hash acceleration ops
-def : InstRW<[VulcanWrite_7Cyc_F1], (instregex "^SHA")>;
+def : InstRW<[THX2T99Write_7Cyc_F1], (instregex "^SHA")>;
//--
// 3.18 CRC
//--
// CRC checksum ops
-def : InstRW<[VulcanWrite_4Cyc_I1], (instregex "^CRC32")>;
+def : InstRW<[THX2T99Write_4Cyc_I1], (instregex "^CRC32")>;
-} // SchedModel = VulcanModel
+} // SchedModel = ThunderX2T99Model
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 66a8f332513a..7f5507371fa0 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -42,10 +42,12 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
Entry.Node = Size;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args))
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ std::move(Args))
+ .setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
@@ -53,7 +55,5 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
}
bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner(
CodeGenOpt::Level OptLevel) const {
- if (OptLevel >= CodeGenOpt::Aggressive)
- return true;
- return false;
+ return OptLevel >= CodeGenOpt::Aggressive;
}
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 03e01329e036..b3aba4781db8 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -81,8 +81,22 @@ void AArch64Subtarget::initializeProperties() {
MinPrefetchStride = 1024;
MaxPrefetchIterationsAhead = 11;
break;
- case Vulcan:
+ case ThunderX2T99:
+ CacheLineSize = 64;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
MaxInterleaveFactor = 4;
+ PrefetchDistance = 128;
+ MinPrefetchStride = 1024;
+ MaxPrefetchIterationsAhead = 4;
+ break;
+ case ThunderX:
+ case ThunderXT88:
+ case ThunderXT81:
+ case ThunderXT83:
+ CacheLineSize = 128;
+ PrefFunctionAlignment = 3;
+ PrefLoopAlignment = 2;
break;
case CortexA35: break;
case CortexA53: break;
@@ -133,9 +147,9 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
return AArch64II::MO_GOT;
- // The small code mode's direct accesses use ADRP, which cannot necessarily
- // produce the value 0 (if the code is above 4GB).
- if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage())
+ // The small code model's direct accesses use ADRP, which cannot
+ // necessarily produce the value 0 (if the code is above 4GB).
+ if (useSmallAddressing() && GV->hasExternalWeakLinkage())
return AArch64II::MO_GOT;
return AArch64II::MO_NO_FLAG;
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index a99340225082..40ad9185012c 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -45,7 +45,11 @@ public:
ExynosM1,
Falkor,
Kryo,
- Vulcan
+ ThunderX2T99,
+ ThunderX,
+ ThunderXT81,
+ ThunderXT83,
+ ThunderXT88
};
protected:
@@ -61,9 +65,11 @@ protected:
bool HasCRC = false;
bool HasLSE = false;
bool HasRAS = false;
+ bool HasRDM = false;
bool HasPerfMon = false;
bool HasFullFP16 = false;
bool HasSPE = false;
+ bool HasLSLFast = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
@@ -73,6 +79,10 @@ protected:
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
+
+ // NegativeImmediates - transform instructions with negative immediates
+ bool NegativeImmediates = true;
+
bool UseAA = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
@@ -83,6 +93,8 @@ protected:
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
+ bool HasFuseAES = false;
+ bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
uint8_t MaxInterleaveFactor = 2;
@@ -183,6 +195,7 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
bool hasRAS() const { return HasRAS; }
+ bool hasRDM() const { return HasRDM; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
@@ -195,6 +208,8 @@ public:
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
+ bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
@@ -218,6 +233,7 @@ public:
bool hasPerfMon() const { return HasPerfMon; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasSPE() const { return HasSPE; }
+ bool hasLSLFast() const { return HasLSLFast; }
bool isLittleEndian() const { return IsLittle; }
@@ -226,6 +242,7 @@ public:
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
@@ -233,9 +250,17 @@ public:
bool useAA() const override { return UseAA; }
- /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
- /// that still makes it profitable to inline the call.
- unsigned getMaxInlineSizeThreshold() const { return 64; }
+ bool useSmallAddressing() const {
+ switch (TLInfo.getTargetMachine().getCodeModel()) {
+ case CodeModel::Kernel:
+ // Kernel is currently allowed only for Fuchsia targets,
+ // where it is the same as Small for almost all purposes.
+ case CodeModel::Small:
+ return true;
+ default:
+ return false;
+ }
+ }
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/lib/Target/AArch64/AArch64SystemOperands.td b/lib/Target/AArch64/AArch64SystemOperands.td
index a3736c0868fb..7c5dcb0853eb 100644
--- a/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/lib/Target/AArch64/AArch64SystemOperands.td
@@ -18,35 +18,37 @@ include "llvm/TableGen/SearchableTable.td"
// AT (address translate) instruction options.
//===----------------------------------------------------------------------===//
-class AT<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class AT<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
}
-def : AT<"S1E1R", 0b01, 0b000, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E2R", 0b01, 0b100, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E3R", 0b01, 0b110, 0b0111, 0b1000, 0b000>;
-def : AT<"S1E1W", 0b01, 0b000, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E2W", 0b01, 0b100, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E3W", 0b01, 0b110, 0b0111, 0b1000, 0b001>;
-def : AT<"S1E0R", 0b01, 0b000, 0b0111, 0b1000, 0b010>;
-def : AT<"S1E0W", 0b01, 0b000, 0b0111, 0b1000, 0b011>;
-def : AT<"S12E1R", 0b01, 0b100, 0b0111, 0b1000, 0b100>;
-def : AT<"S12E1W", 0b01, 0b100, 0b0111, 0b1000, 0b101>;
-def : AT<"S12E0R", 0b01, 0b100, 0b0111, 0b1000, 0b110>;
-def : AT<"S12E0W", 0b01, 0b100, 0b0111, 0b1000, 0b111>;
-def : AT<"S1E1RP", 0b01, 0b000, 0b0111, 0b1001, 0b000>;
-def : AT<"S1E1WP", 0b01, 0b000, 0b0111, 0b1001, 0b001>;
-
+def : AT<"S1E1R", 0b000, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E2R", 0b100, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E3R", 0b110, 0b0111, 0b1000, 0b000>;
+def : AT<"S1E1W", 0b000, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E2W", 0b100, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E3W", 0b110, 0b0111, 0b1000, 0b001>;
+def : AT<"S1E0R", 0b000, 0b0111, 0b1000, 0b010>;
+def : AT<"S1E0W", 0b000, 0b0111, 0b1000, 0b011>;
+def : AT<"S12E1R", 0b100, 0b0111, 0b1000, 0b100>;
+def : AT<"S12E1W", 0b100, 0b0111, 0b1000, 0b101>;
+def : AT<"S12E0R", 0b100, 0b0111, 0b1000, 0b110>;
+def : AT<"S12E0W", 0b100, 0b0111, 0b1000, 0b111>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in {
+def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
+def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
+}
//===----------------------------------------------------------------------===//
// DMB/DSB (data barrier) instruction options.
@@ -77,28 +79,31 @@ def : DB<"sy", 0xf>;
// DC (data cache maintenance) instruction options.
//===----------------------------------------------------------------------===//
-class DC<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class DC<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
let Encoding{2-0} = op2;
+ code Requires = [{ {} }];
}
-def : DC<"ZVA", 0b01, 0b011, 0b0111, 0b0100, 0b001>;
-def : DC<"IVAC", 0b01, 0b000, 0b0111, 0b0110, 0b001>;
-def : DC<"ISW", 0b01, 0b000, 0b0111, 0b0110, 0b010>;
-def : DC<"CVAC", 0b01, 0b011, 0b0111, 0b1010, 0b001>;
-def : DC<"CSW", 0b01, 0b000, 0b0111, 0b1010, 0b010>;
-def : DC<"CVAU", 0b01, 0b011, 0b0111, 0b1011, 0b001>;
-def : DC<"CIVAC", 0b01, 0b011, 0b0111, 0b1110, 0b001>;
-def : DC<"CISW", 0b01, 0b000, 0b0111, 0b1110, 0b010>;
+def : DC<"ZVA", 0b011, 0b0111, 0b0100, 0b001>;
+def : DC<"IVAC", 0b000, 0b0111, 0b0110, 0b001>;
+def : DC<"ISW", 0b000, 0b0111, 0b0110, 0b010>;
+def : DC<"CVAC", 0b011, 0b0111, 0b1010, 0b001>;
+def : DC<"CSW", 0b000, 0b0111, 0b1010, 0b010>;
+def : DC<"CVAU", 0b011, 0b0111, 0b1011, 0b001>;
+def : DC<"CIVAC", 0b011, 0b0111, 0b1110, 0b001>;
+def : DC<"CISW", 0b000, 0b0111, 0b1110, 0b010>;
+
+let Requires = [{ {AArch64::HasV8_2aOps} }] in
+def : DC<"CVAP", 0b011, 0b0111, 0b1100, 0b001>;
//===----------------------------------------------------------------------===//
// IC (instruction cache maintenance) instruction options.
@@ -120,7 +125,7 @@ class IC<string name, bits<3> op1, bits<4> crn, bits<4> crm, bits<3> op2,
def : IC<"IALLUIS", 0b000, 0b0111, 0b0001, 0b000, 0>;
def : IC<"IALLU", 0b000, 0b0111, 0b0101, 0b000, 0>;
-def : IC<"IVAU", 0b000, 0b0111, 0b0001, 0b000, 1>;
+def : IC<"IVAU", 0b011, 0b0111, 0b0101, 0b001, 1>;
//===----------------------------------------------------------------------===//
// ISB (instruction-fetch barrier) instruction options.
@@ -213,14 +218,13 @@ def : PSB<"csync", 0x11>;
// TLBI (translation lookaside buffer invalidate) instruction options.
//===----------------------------------------------------------------------===//
-class TLBI<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
+class TLBI<string name, bits<3> op1, bits<4> crn, bits<4> crm,
bits<3> op2, bit needsreg = 1> : SearchableTable {
let SearchableFields = ["Name", "Encoding"];
let EnumValueField = "Encoding";
string Name = name;
- bits<16> Encoding;
- let Encoding{15-14} = op0;
+ bits<14> Encoding;
let Encoding{13-11} = op1;
let Encoding{10-7} = crn;
let Encoding{6-3} = crm;
@@ -228,38 +232,38 @@ class TLBI<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
bit NeedsReg = needsreg;
}
-def : TLBI<"IPAS2E1IS", 0b01, 0b100, 0b1000, 0b0000, 0b001>;
-def : TLBI<"IPAS2LE1IS", 0b01, 0b100, 0b1000, 0b0000, 0b101>;
-def : TLBI<"VMALLE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"ALLE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b000, 0>;
-def : TLBI<"VAE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b001>;
-def : TLBI<"VAE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b001>;
-def : TLBI<"ASIDE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b010>;
-def : TLBI<"VAAE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b011>;
-def : TLBI<"ALLE1IS", 0b01, 0b100, 0b1000, 0b0011, 0b100, 0>;
-def : TLBI<"VALE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE2IS", 0b01, 0b100, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VALE3IS", 0b01, 0b110, 0b1000, 0b0011, 0b101>;
-def : TLBI<"VMALLS12E1IS", 0b01, 0b100, 0b1000, 0b0011, 0b110, 0>;
-def : TLBI<"VAALE1IS", 0b01, 0b000, 0b1000, 0b0011, 0b111>;
-def : TLBI<"IPAS2E1", 0b01, 0b100, 0b1000, 0b0100, 0b001>;
-def : TLBI<"IPAS2LE1", 0b01, 0b100, 0b1000, 0b0100, 0b101>;
-def : TLBI<"VMALLE1", 0b01, 0b000, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE2", 0b01, 0b100, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"ALLE3", 0b01, 0b110, 0b1000, 0b0111, 0b000, 0>;
-def : TLBI<"VAE1", 0b01, 0b000, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE2", 0b01, 0b100, 0b1000, 0b0111, 0b001>;
-def : TLBI<"VAE3", 0b01, 0b110, 0b1000, 0b0111, 0b001>;
-def : TLBI<"ASIDE1", 0b01, 0b000, 0b1000, 0b0111, 0b010>;
-def : TLBI<"VAAE1", 0b01, 0b000, 0b1000, 0b0111, 0b011>;
-def : TLBI<"ALLE1", 0b01, 0b100, 0b1000, 0b0111, 0b100, 0>;
-def : TLBI<"VALE1", 0b01, 0b000, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE2", 0b01, 0b100, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VALE3", 0b01, 0b110, 0b1000, 0b0111, 0b101>;
-def : TLBI<"VMALLS12E1", 0b01, 0b100, 0b1000, 0b0111, 0b110, 0>;
-def : TLBI<"VAALE1", 0b01, 0b000, 0b1000, 0b0111, 0b111>;
+def : TLBI<"IPAS2E1IS", 0b100, 0b1000, 0b0000, 0b001>;
+def : TLBI<"IPAS2LE1IS", 0b100, 0b1000, 0b0000, 0b101>;
+def : TLBI<"VMALLE1IS", 0b000, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE2IS", 0b100, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"ALLE3IS", 0b110, 0b1000, 0b0011, 0b000, 0>;
+def : TLBI<"VAE1IS", 0b000, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE2IS", 0b100, 0b1000, 0b0011, 0b001>;
+def : TLBI<"VAE3IS", 0b110, 0b1000, 0b0011, 0b001>;
+def : TLBI<"ASIDE1IS", 0b000, 0b1000, 0b0011, 0b010>;
+def : TLBI<"VAAE1IS", 0b000, 0b1000, 0b0011, 0b011>;
+def : TLBI<"ALLE1IS", 0b100, 0b1000, 0b0011, 0b100, 0>;
+def : TLBI<"VALE1IS", 0b000, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE2IS", 0b100, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VALE3IS", 0b110, 0b1000, 0b0011, 0b101>;
+def : TLBI<"VMALLS12E1IS", 0b100, 0b1000, 0b0011, 0b110, 0>;
+def : TLBI<"VAALE1IS", 0b000, 0b1000, 0b0011, 0b111>;
+def : TLBI<"IPAS2E1", 0b100, 0b1000, 0b0100, 0b001>;
+def : TLBI<"IPAS2LE1", 0b100, 0b1000, 0b0100, 0b101>;
+def : TLBI<"VMALLE1", 0b000, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE2", 0b100, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"ALLE3", 0b110, 0b1000, 0b0111, 0b000, 0>;
+def : TLBI<"VAE1", 0b000, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE2", 0b100, 0b1000, 0b0111, 0b001>;
+def : TLBI<"VAE3", 0b110, 0b1000, 0b0111, 0b001>;
+def : TLBI<"ASIDE1", 0b000, 0b1000, 0b0111, 0b010>;
+def : TLBI<"VAAE1", 0b000, 0b1000, 0b0111, 0b011>;
+def : TLBI<"ALLE1", 0b100, 0b1000, 0b0111, 0b100, 0>;
+def : TLBI<"VALE1", 0b000, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE2", 0b100, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VALE3", 0b110, 0b1000, 0b0111, 0b101>;
+def : TLBI<"VMALLS12E1", 0b100, 0b1000, 0b0111, 0b110, 0>;
+def : TLBI<"VAALE1", 0b000, 0b1000, 0b0111, 0b111>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d2883941e2c4..dcc51bf02329 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,9 +12,11 @@
#include "AArch64.h"
#include "AArch64CallLowering.h"
-#include "AArch64InstructionSelector.h"
#include "AArch64LegalizerInfo.h"
+#include "AArch64MacroFusion.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
#include "AArch64RegisterBankInfo.h"
+#endif
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
@@ -115,7 +117,7 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
static cl::opt<bool>
EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
cl::desc("Enable the type promotion pass"),
- cl::init(true));
+ cl::init(false));
static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
@@ -136,6 +138,11 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));
+static cl::opt<int> EnableGlobalISelAtO(
+ "aarch64-enable-global-isel-at-O", cl::Hidden,
+ cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
+ cl::init(-1));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -278,7 +285,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// FIXME: At this point, we can't rely on Subtarget having RBI.
// It's awkward to mix passing RBI and the Subtarget; should we pass
// TII/TRI as well?
- GISel->InstSelector.reset(new AArch64InstructionSelector(*this, *I, *RBI));
+ GISel->InstSelector.reset(
+ createAArch64InstructionSelector(*this, *I, *RBI));
GISel->RegBankInfo.reset(RBI);
#endif
@@ -323,10 +331,24 @@ public:
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
- DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
return DAG;
}
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+ if (ST.hasFuseLiterals()) {
+ // Run the Macro Fusion after RA again since literals are expanded from
+ // pseudos then (v. addPreSched2()).
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
+ return DAG;
+ }
+
+ return nullptr;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
@@ -341,6 +363,8 @@ public:
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+
+ bool isGlobalISelEnabled() const override;
};
} // end anonymous namespace
@@ -450,6 +474,10 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
}
#endif
+bool AArch64PassConfig::isGlobalISelEnabled() const {
+ return TM->getOptLevel() <= EnableGlobalISelAtO;
+}
+
bool AArch64PassConfig::addILPOpts() {
if (EnableCondOpt)
addPass(createAArch64ConditionOptimizerPass());
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 6fa5e83957e1..2c75a3258c1c 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -21,6 +21,8 @@
namespace llvm {
+class AArch64RegisterBankInfo;
+
class AArch64TargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b8833e5a5552..4d59da0c646d 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -176,7 +176,8 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
-int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -436,7 +437,7 @@ int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
}
int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) {
+ Type *CondTy, const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// We don't lower some vector selects well that are wider than the register
@@ -463,11 +464,12 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
- unsigned Alignment, unsigned AddressSpace) {
+ unsigned Alignment, unsigned AddressSpace,
+ const Instruction *I) {
auto LT = TLI->getTypeLegalizationCost(DL, Ty);
if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
@@ -505,12 +507,14 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
unsigned NumElts = VecTy->getVectorNumElements();
- Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
// ldN/stN only support legal vector types of size 64 or 128 in bits.
- if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
- return Factor;
+ // Accesses having vector types that are a multiple of 128 bits can be
+ // matched to more than one ldN/stN instruction.
+ if (NumElts % Factor == 0 &&
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@@ -594,8 +598,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_ld4:
Info.ReadMem = true;
Info.WriteMem = false;
- Info.IsSimple = true;
- Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(0);
break;
case Intrinsic::aarch64_neon_st2:
@@ -603,8 +605,6 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_st4:
Info.ReadMem = false;
Info.WriteMem = true;
- Info.IsSimple = true;
- Info.NumMemRefs = 1;
Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
break;
}
@@ -628,6 +628,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
return true;
}
+/// See if \p I should be considered for address type promotion. We check if \p
+/// I is a sext with right type and used in memory accesses. If it used in a
+/// "complex" getelementptr, we allow it to be promoted without finding other
+/// sext instructions that sign extended the same initial value. A getelementptr
+/// is considered as "complex" if it has more than 2 operands.
+bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
+ bool Considerable = false;
+ AllowPromotionWithoutCommonHeader = false;
+ if (!isa<SExtInst>(&I))
+ return false;
+ Type *ConsideredSExtType =
+ Type::getInt64Ty(I.getParent()->getParent()->getContext());
+ if (I.getType() != ConsideredSExtType)
+ return false;
+ // See if the sext is the one with the right type and used in at least one
+ // GetElementPtrInst.
+ for (const User *U : I.users()) {
+ if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+ Considerable = true;
+ // A getelementptr is considered as "complex" if it has more than 2
+ // operands. We will promote a SExt used in such complex GEP as we
+ // expect some computation to be merged if they are done on 64 bits.
+ if (GEPInst->getNumOperands() > 2) {
+ AllowPromotionWithoutCommonHeader = true;
+ break;
+ }
+ }
+ }
+ return Considerable;
+}
+
unsigned AArch64TTIImpl::getCacheLineSize() {
return ST->getCacheLineSize();
}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 18287ed6653f..e37c003e064c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -34,10 +34,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const AArch64Subtarget *getST() const { return ST; }
const AArch64TargetLowering *getTLI() const { return TLI; }
@@ -90,7 +86,8 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index);
@@ -107,10 +104,11 @@ public:
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
@@ -125,6 +123,10 @@ public:
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
+ bool
+ shouldConsiderAddressTypePromotion(const Instruction &I,
+ bool &AllowPromotionWithoutCommonHeader);
+
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
diff --git a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
index e3b1d7cea48d..f53af2315ec9 100644
--- a/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
+++ b/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
@@ -19,13 +19,27 @@
// is rewritten into
// dup v3.4s, v2.s[1]
// fmla v0.4s, v1.4s, v3.4s
+//
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <map>
using namespace llvm;
@@ -41,14 +55,15 @@ namespace {
struct AArch64VectorByElementOpt : public MachineFunctionPass {
static char ID;
- AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
- initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
- }
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
TargetSchedModel SchedModel;
+ AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
+ initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
+ }
+
/// Based only on latency of instructions, determine if it is cost efficient
/// to replace the instruction InstDesc by the two instructions InstDescRep1
/// and InstDescRep2.
@@ -90,8 +105,10 @@ struct AArch64VectorByElementOpt : public MachineFunctionPass {
return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME;
}
};
+
char AArch64VectorByElementOpt::ID = 0;
-} // namespace
+
+} // end anonymous namespace
INITIALIZE_PASS(AArch64VectorByElementOpt, "aarch64-vectorbyelement-opt",
AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false)
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index b86a283b40d4..cbab68979c56 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -74,6 +74,7 @@ private:
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
+ void createSysAlias(uint16_t Encoding, OperandVector &Operands, SMLoc S);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
@@ -537,154 +538,15 @@ public:
return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
}
- bool isImm0_1() const {
+ template <int N, int M>
+ bool isImmInRange() const {
if (!isImm())
return false;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
return false;
int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 2);
- }
-
- bool isImm0_7() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 8);
- }
-
- bool isImm1_8() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 9);
- }
-
- bool isImm0_15() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 16);
- }
-
- bool isImm1_16() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 17);
- }
-
- bool isImm0_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 32);
- }
-
- bool isImm1_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 32);
- }
-
- bool isImm1_32() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 33);
- }
-
- bool isImm0_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 64);
- }
-
- bool isImm1_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 64);
- }
-
- bool isImm1_64() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 65);
- }
-
- bool isImm0_127() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 128);
- }
-
- bool isImm0_255() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 256);
- }
-
- bool isImm0_65535() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 65536);
- }
-
- bool isImm32_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 32 && Val < 64);
+ return (Val >= N && Val <= M);
}
bool isLogicalImm32() const {
@@ -804,31 +666,8 @@ public:
return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
}
- bool isBranchTarget26() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
- }
-
- bool isPCRelLabel19() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
- }
-
- bool isBranchTarget14() const {
+ template<int N>
+ bool isBranchTarget() const {
if (!isImm())
return false;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
@@ -837,7 +676,8 @@ public:
int64_t Val = MCE->getValue();
if (Val & 0x3)
return false;
- return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
+ assert(N > 0 && "Branch target immediate cannot be 0 bits!");
+ return (Val >= -((1<<(N-1)) << 2) && Val <= (((1<<(N-1))-1) << 2));
}
bool
@@ -2494,6 +2334,35 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
return MatchOperand_Success;
}
+static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
+ if (FBS[AArch64::HasV8_1aOps])
+ Str += "ARMv8.1a";
+ else if (FBS[AArch64::HasV8_2aOps])
+ Str += "ARMv8.2a";
+ else
+ Str += "(unknown)";
+}
+
+void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands,
+ SMLoc S) {
+ const uint16_t Op2 = Encoding & 7;
+ const uint16_t Cm = (Encoding & 0x78) >> 3;
+ const uint16_t Cn = (Encoding & 0x780) >> 7;
+ const uint16_t Op1 = (Encoding & 0x3800) >> 11;
+
+ const MCExpr *Expr = MCConstantExpr::create(Op1, getContext());
+
+ Operands.push_back(
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext()));
+ Expr = MCConstantExpr::create(Op2, getContext());
+ Operands.push_back(
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext()));
+}
+
/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
@@ -2510,228 +2379,48 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
- const MCExpr *Expr = nullptr;
-
-#define SYS_ALIAS(op1, Cn, Cm, op2) \
- do { \
- Expr = MCConstantExpr::create(op1, getContext()); \
- Operands.push_back( \
- AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- Operands.push_back( \
- AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \
- Operands.push_back( \
- AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \
- Expr = MCConstantExpr::create(op2, getContext()); \
- Operands.push_back( \
- AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- } while (false)
-
if (Mnemonic == "ic") {
- if (!Op.compare_lower("ialluis")) {
- // SYS #0, C7, C1, #0
- SYS_ALIAS(0, 7, 1, 0);
- } else if (!Op.compare_lower("iallu")) {
- // SYS #0, C7, C5, #0
- SYS_ALIAS(0, 7, 5, 0);
- } else if (!Op.compare_lower("ivau")) {
- // SYS #3, C7, C5, #1
- SYS_ALIAS(3, 7, 5, 1);
- } else {
+ const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op);
+ if (!IC)
return TokError("invalid operand for IC instruction");
+ else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("IC " + std::string(IC->Name) + " requires ");
+ setRequiredFeatureString(IC->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(IC->Encoding, Operands, S);
} else if (Mnemonic == "dc") {
- if (!Op.compare_lower("zva")) {
- // SYS #3, C7, C4, #1
- SYS_ALIAS(3, 7, 4, 1);
- } else if (!Op.compare_lower("ivac")) {
- // SYS #3, C7, C6, #1
- SYS_ALIAS(0, 7, 6, 1);
- } else if (!Op.compare_lower("isw")) {
- // SYS #0, C7, C6, #2
- SYS_ALIAS(0, 7, 6, 2);
- } else if (!Op.compare_lower("cvac")) {
- // SYS #3, C7, C10, #1
- SYS_ALIAS(3, 7, 10, 1);
- } else if (!Op.compare_lower("csw")) {
- // SYS #0, C7, C10, #2
- SYS_ALIAS(0, 7, 10, 2);
- } else if (!Op.compare_lower("cvau")) {
- // SYS #3, C7, C11, #1
- SYS_ALIAS(3, 7, 11, 1);
- } else if (!Op.compare_lower("civac")) {
- // SYS #3, C7, C14, #1
- SYS_ALIAS(3, 7, 14, 1);
- } else if (!Op.compare_lower("cisw")) {
- // SYS #0, C7, C14, #2
- SYS_ALIAS(0, 7, 14, 2);
- } else if (!Op.compare_lower("cvap")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #3, C7, C12, #1
- SYS_ALIAS(3, 7, 12, 1);
- } else {
- return TokError("DC CVAP requires ARMv8.2a");
- }
- } else {
+ const AArch64DC::DC *DC = AArch64DC::lookupDCByName(Op);
+ if (!DC)
return TokError("invalid operand for DC instruction");
+ else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("DC " + std::string(DC->Name) + " requires ");
+ setRequiredFeatureString(DC->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(DC->Encoding, Operands, S);
} else if (Mnemonic == "at") {
- if (!Op.compare_lower("s1e1r")) {
- // SYS #0, C7, C8, #0
- SYS_ALIAS(0, 7, 8, 0);
- } else if (!Op.compare_lower("s1e2r")) {
- // SYS #4, C7, C8, #0
- SYS_ALIAS(4, 7, 8, 0);
- } else if (!Op.compare_lower("s1e3r")) {
- // SYS #6, C7, C8, #0
- SYS_ALIAS(6, 7, 8, 0);
- } else if (!Op.compare_lower("s1e1w")) {
- // SYS #0, C7, C8, #1
- SYS_ALIAS(0, 7, 8, 1);
- } else if (!Op.compare_lower("s1e2w")) {
- // SYS #4, C7, C8, #1
- SYS_ALIAS(4, 7, 8, 1);
- } else if (!Op.compare_lower("s1e3w")) {
- // SYS #6, C7, C8, #1
- SYS_ALIAS(6, 7, 8, 1);
- } else if (!Op.compare_lower("s1e0r")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 2);
- } else if (!Op.compare_lower("s1e0w")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 3);
- } else if (!Op.compare_lower("s12e1r")) {
- // SYS #4, C7, C8, #4
- SYS_ALIAS(4, 7, 8, 4);
- } else if (!Op.compare_lower("s12e1w")) {
- // SYS #4, C7, C8, #5
- SYS_ALIAS(4, 7, 8, 5);
- } else if (!Op.compare_lower("s12e0r")) {
- // SYS #4, C7, C8, #6
- SYS_ALIAS(4, 7, 8, 6);
- } else if (!Op.compare_lower("s12e0w")) {
- // SYS #4, C7, C8, #7
- SYS_ALIAS(4, 7, 8, 7);
- } else if (!Op.compare_lower("s1e1rp")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #0, C7, C9, #0
- SYS_ALIAS(0, 7, 9, 0);
- } else {
- return TokError("AT S1E1RP requires ARMv8.2a");
- }
- } else if (!Op.compare_lower("s1e1wp")) {
- if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
- // SYS #0, C7, C9, #1
- SYS_ALIAS(0, 7, 9, 1);
- } else {
- return TokError("AT S1E1WP requires ARMv8.2a");
- }
- } else {
+ const AArch64AT::AT *AT = AArch64AT::lookupATByName(Op);
+ if (!AT)
return TokError("invalid operand for AT instruction");
+ else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("AT " + std::string(AT->Name) + " requires ");
+ setRequiredFeatureString(AT->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(AT->Encoding, Operands, S);
} else if (Mnemonic == "tlbi") {
- if (!Op.compare_lower("vmalle1is")) {
- // SYS #0, C8, C3, #0
- SYS_ALIAS(0, 8, 3, 0);
- } else if (!Op.compare_lower("alle2is")) {
- // SYS #4, C8, C3, #0
- SYS_ALIAS(4, 8, 3, 0);
- } else if (!Op.compare_lower("alle3is")) {
- // SYS #6, C8, C3, #0
- SYS_ALIAS(6, 8, 3, 0);
- } else if (!Op.compare_lower("vae1is")) {
- // SYS #0, C8, C3, #1
- SYS_ALIAS(0, 8, 3, 1);
- } else if (!Op.compare_lower("vae2is")) {
- // SYS #4, C8, C3, #1
- SYS_ALIAS(4, 8, 3, 1);
- } else if (!Op.compare_lower("vae3is")) {
- // SYS #6, C8, C3, #1
- SYS_ALIAS(6, 8, 3, 1);
- } else if (!Op.compare_lower("aside1is")) {
- // SYS #0, C8, C3, #2
- SYS_ALIAS(0, 8, 3, 2);
- } else if (!Op.compare_lower("vaae1is")) {
- // SYS #0, C8, C3, #3
- SYS_ALIAS(0, 8, 3, 3);
- } else if (!Op.compare_lower("alle1is")) {
- // SYS #4, C8, C3, #4
- SYS_ALIAS(4, 8, 3, 4);
- } else if (!Op.compare_lower("vale1is")) {
- // SYS #0, C8, C3, #5
- SYS_ALIAS(0, 8, 3, 5);
- } else if (!Op.compare_lower("vaale1is")) {
- // SYS #0, C8, C3, #7
- SYS_ALIAS(0, 8, 3, 7);
- } else if (!Op.compare_lower("vmalle1")) {
- // SYS #0, C8, C7, #0
- SYS_ALIAS(0, 8, 7, 0);
- } else if (!Op.compare_lower("alle2")) {
- // SYS #4, C8, C7, #0
- SYS_ALIAS(4, 8, 7, 0);
- } else if (!Op.compare_lower("vale2is")) {
- // SYS #4, C8, C3, #5
- SYS_ALIAS(4, 8, 3, 5);
- } else if (!Op.compare_lower("vale3is")) {
- // SYS #6, C8, C3, #5
- SYS_ALIAS(6, 8, 3, 5);
- } else if (!Op.compare_lower("alle3")) {
- // SYS #6, C8, C7, #0
- SYS_ALIAS(6, 8, 7, 0);
- } else if (!Op.compare_lower("vae1")) {
- // SYS #0, C8, C7, #1
- SYS_ALIAS(0, 8, 7, 1);
- } else if (!Op.compare_lower("vae2")) {
- // SYS #4, C8, C7, #1
- SYS_ALIAS(4, 8, 7, 1);
- } else if (!Op.compare_lower("vae3")) {
- // SYS #6, C8, C7, #1
- SYS_ALIAS(6, 8, 7, 1);
- } else if (!Op.compare_lower("aside1")) {
- // SYS #0, C8, C7, #2
- SYS_ALIAS(0, 8, 7, 2);
- } else if (!Op.compare_lower("vaae1")) {
- // SYS #0, C8, C7, #3
- SYS_ALIAS(0, 8, 7, 3);
- } else if (!Op.compare_lower("alle1")) {
- // SYS #4, C8, C7, #4
- SYS_ALIAS(4, 8, 7, 4);
- } else if (!Op.compare_lower("vale1")) {
- // SYS #0, C8, C7, #5
- SYS_ALIAS(0, 8, 7, 5);
- } else if (!Op.compare_lower("vale2")) {
- // SYS #4, C8, C7, #5
- SYS_ALIAS(4, 8, 7, 5);
- } else if (!Op.compare_lower("vale3")) {
- // SYS #6, C8, C7, #5
- SYS_ALIAS(6, 8, 7, 5);
- } else if (!Op.compare_lower("vaale1")) {
- // SYS #0, C8, C7, #7
- SYS_ALIAS(0, 8, 7, 7);
- } else if (!Op.compare_lower("ipas2e1")) {
- // SYS #4, C8, C4, #1
- SYS_ALIAS(4, 8, 4, 1);
- } else if (!Op.compare_lower("ipas2le1")) {
- // SYS #4, C8, C4, #5
- SYS_ALIAS(4, 8, 4, 5);
- } else if (!Op.compare_lower("ipas2e1is")) {
- // SYS #4, C8, C4, #1
- SYS_ALIAS(4, 8, 0, 1);
- } else if (!Op.compare_lower("ipas2le1is")) {
- // SYS #4, C8, C4, #5
- SYS_ALIAS(4, 8, 0, 5);
- } else if (!Op.compare_lower("vmalls12e1")) {
- // SYS #4, C8, C7, #6
- SYS_ALIAS(4, 8, 7, 6);
- } else if (!Op.compare_lower("vmalls12e1is")) {
- // SYS #4, C8, C3, #6
- SYS_ALIAS(4, 8, 3, 6);
- } else {
+ const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByName(Op);
+ if (!TLBI)
return TokError("invalid operand for TLBI instruction");
+ else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
+ std::string Str("TLBI " + std::string(TLBI->Name) + " requires ");
+ setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
+ return TokError(Str.c_str());
}
+ createSysAlias(TLBI->Encoding, Operands, S);
}
-#undef SYS_ALIAS
-
Parser.Lex(); // Eat operand.
bool ExpectRegister = (Op.lower().find("all") == StringRef::npos);
@@ -2744,12 +2433,10 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
HasRegister = true;
}
- if (ExpectRegister && !HasRegister) {
+ if (ExpectRegister && !HasRegister)
return TokError("specified " + Mnemonic + " op requires a register");
- }
- else if (!ExpectRegister && HasRegister) {
+ else if (!ExpectRegister && HasRegister)
return TokError("specified " + Mnemonic + " op does not use a register");
- }
if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
return true;
@@ -2884,7 +2571,6 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
/// parseRegister - Parse a non-vector register operand.
bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
// Try for a vector register.
if (!tryParseVectorRegister(Operands))
@@ -2897,30 +2583,6 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
- // A small number of instructions (FMOVXDhighr, for example) have "[1]"
- // as a string token in the instruction itself.
- SMLoc LBracS = getLoc();
- const AsmToken &Tok = Parser.getTok();
- if (parseOptionalToken(AsmToken::LBrac)) {
- if (Tok.is(AsmToken::Integer)) {
- SMLoc IntS = getLoc();
- int64_t Val = Tok.getIntVal();
- if (Val == 1) {
- Parser.Lex();
- SMLoc RBracS = getLoc();
- if (parseOptionalToken(AsmToken::RBrac)) {
- Operands.push_back(
- AArch64Operand::CreateToken("[", false, LBracS, getContext()));
- Operands.push_back(
- AArch64Operand::CreateToken("1", false, IntS, getContext()));
- Operands.push_back(
- AArch64Operand::CreateToken("]", false, RBracS, getContext()));
- return false;
- }
- }
- }
- }
-
return false;
}
@@ -3696,6 +3358,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
return Error(Loc, "immediate must be an integer in range [0, 63].");
case Match_InvalidImm0_127:
return Error(Loc, "immediate must be an integer in range [0, 127].");
+ case Match_InvalidImm0_255:
+ return Error(Loc, "immediate must be an integer in range [0, 255].");
case Match_InvalidImm0_65535:
return Error(Loc, "immediate must be an integer in range [0, 65535].");
case Match_InvalidImm1_8:
@@ -4120,6 +3784,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidImm0_31:
case Match_InvalidImm0_63:
case Match_InvalidImm0_127:
+ case Match_InvalidImm0_255:
case Match_InvalidImm0_65535:
case Match_InvalidImm1_8:
case Match_InvalidImm1_16:
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index 6bcf67fb3fef..6d0930c358f1 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -14,6 +14,7 @@ tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM AArch64GenSystemOperands.inc -gen-searchable-tables)
if(LLVM_BUILD_GLOBAL_ISEL)
+ tablegen(LLVM AArch64GenRegisterBank.inc -gen-register-bank)
tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
endif()
@@ -55,6 +56,7 @@ add_llvm_target(AArch64CodeGen
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
AArch64LoadStoreOptimizer.cpp
+ AArch64MacroFusion.cpp
AArch64MCInstLower.cpp
AArch64PromoteConstant.cpp
AArch64PBQPRegAlloc.cpp
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index b4f85204714f..41ae70f85e58 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -16,12 +16,20 @@
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -451,8 +459,8 @@ static const LdStNInstrDesc LdStNInstInfo[] = {
{ AArch64::LD3i64, "ld3", ".d", 1, true, 0 },
{ AArch64::LD3i8_POST, "ld3", ".b", 2, true, 3 },
{ AArch64::LD3i16_POST, "ld3", ".h", 2, true, 6 },
- { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 },
- { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 },
+ { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 },
+ { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 },
{ AArch64::LD3Rv16b, "ld3r", ".16b", 0, false, 0 },
{ AArch64::LD3Rv8h, "ld3r", ".8h", 0, false, 0 },
{ AArch64::LD3Rv4s, "ld3r", ".4s", 0, false, 0 },
@@ -731,7 +739,6 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
#endif
- const char *Asm = nullptr;
const MCOperand &Op1 = MI->getOperand(0);
const MCOperand &Cn = MI->getOperand(1);
const MCOperand &Cm = MI->getOperand(2);
@@ -742,230 +749,74 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
unsigned CmVal = Cm.getImm();
unsigned Op2Val = Op2.getImm();
+ uint16_t Encoding = Op2Val;
+ Encoding |= CmVal << 3;
+ Encoding |= CnVal << 7;
+ Encoding |= Op1Val << 11;
+
+ bool NeedsReg;
+ std::string Ins;
+ std::string Name;
+
if (CnVal == 7) {
switch (CmVal) {
- default:
- break;
-
+ default: return false;
// IC aliases
- case 1:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tialluis";
- break;
- case 5:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tiallu";
- else if (Op1Val == 3 && Op2Val == 1)
- Asm = "ic\tivau";
- break;
-
+ case 1: case 5: {
+ const AArch64IC::IC *IC = AArch64IC::lookupICByEncoding(Encoding);
+ if (!IC || !IC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = IC->NeedsReg;
+ Ins = "ic\t";
+ Name = std::string(IC->Name);
+ }
+ break;
// DC aliases
- case 4:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tzva";
- break;
- case 6:
- if (Op1Val == 0 && Op2Val == 1)
- Asm = "dc\tivac";
- if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tisw";
- break;
- case 10:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcsw";
- break;
- case 11:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvau";
- break;
- case 12:
- if (Op1Val == 3 && Op2Val == 1 &&
- (STI.getFeatureBits()[AArch64::HasV8_2aOps]))
- Asm = "dc\tcvap";
- break;
- case 14:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcivac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcisw";
- break;
-
+ case 4: case 6: case 10: case 11: case 12: case 14:
+ {
+ const AArch64DC::DC *DC = AArch64DC::lookupDCByEncoding(Encoding);
+ if (!DC || !DC->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "dc\t";
+ Name = std::string(DC->Name);
+ }
+ break;
// AT aliases
- case 8:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e1r"; break;
- case 1: Asm = "at\ts1e1w"; break;
- case 2: Asm = "at\ts1e0r"; break;
- case 3: Asm = "at\ts1e0w"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e2r"; break;
- case 1: Asm = "at\ts1e2w"; break;
- case 4: Asm = "at\ts12e1r"; break;
- case 5: Asm = "at\ts12e1w"; break;
- case 6: Asm = "at\ts12e0r"; break;
- case 7: Asm = "at\ts12e0w"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e3r"; break;
- case 1: Asm = "at\ts1e3w"; break;
- }
- break;
- }
- break;
- case 9:
- switch (Op1Val) {
- default:
- break;
- case 0:
- if (STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e1rp"; break;
- case 1: Asm = "at\ts1e1wp"; break;
- }
- }
- break;
- }
+ case 8: case 9: {
+ const AArch64AT::AT *AT = AArch64AT::lookupATByEncoding(Encoding);
+ if (!AT || !AT->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = true;
+ Ins = "at\t";
+ Name = std::string(AT->Name);
+ }
+ break;
}
} else if (CnVal == 8) {
// TLBI aliases
- switch (CmVal) {
- default:
- break;
- case 3:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1is"; break;
- case 1: Asm = "tlbi\tvae1is"; break;
- case 2: Asm = "tlbi\taside1is"; break;
- case 3: Asm = "tlbi\tvaae1is"; break;
- case 5: Asm = "tlbi\tvale1is"; break;
- case 7: Asm = "tlbi\tvaale1is"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2is"; break;
- case 1: Asm = "tlbi\tvae2is"; break;
- case 4: Asm = "tlbi\talle1is"; break;
- case 5: Asm = "tlbi\tvale2is"; break;
- case 6: Asm = "tlbi\tvmalls12e1is"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3is"; break;
- case 1: Asm = "tlbi\tvae3is"; break;
- case 5: Asm = "tlbi\tvale3is"; break;
- }
- break;
- }
- break;
- case 0:
- switch (Op1Val) {
- default:
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 1: Asm = "tlbi\tipas2e1is"; break;
- case 5: Asm = "tlbi\tipas2le1is"; break;
- }
- break;
- }
- break;
- case 4:
- switch (Op1Val) {
- default:
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 1: Asm = "tlbi\tipas2e1"; break;
- case 5: Asm = "tlbi\tipas2le1"; break;
- }
- break;
- }
- break;
- case 7:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1"; break;
- case 1: Asm = "tlbi\tvae1"; break;
- case 2: Asm = "tlbi\taside1"; break;
- case 3: Asm = "tlbi\tvaae1"; break;
- case 5: Asm = "tlbi\tvale1"; break;
- case 7: Asm = "tlbi\tvaale1"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2"; break;
- case 1: Asm = "tlbi\tvae2"; break;
- case 4: Asm = "tlbi\talle1"; break;
- case 5: Asm = "tlbi\tvale2"; break;
- case 6: Asm = "tlbi\tvmalls12e1"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3"; break;
- case 1: Asm = "tlbi\tvae3"; break;
- case 5: Asm = "tlbi\tvale3"; break;
- }
- break;
- }
- break;
- }
+ const AArch64TLBI::TLBI *TLBI = AArch64TLBI::lookupTLBIByEncoding(Encoding);
+ if (!TLBI || !TLBI->haveFeatures(STI.getFeatureBits()))
+ return false;
+
+ NeedsReg = TLBI->NeedsReg;
+ Ins = "tlbi\t";
+ Name = std::string(TLBI->Name);
}
+ else
+ return false;
- if (Asm) {
- unsigned Reg = MI->getOperand(4).getReg();
+ std::string Str = Ins + Name;
+ std::transform(Str.begin(), Str.end(), Str.begin(), ::tolower);
- O << '\t' << Asm;
- if (StringRef(Asm).lower().find("all") == StringRef::npos)
- O << ", " << getRegisterName(Reg);
- }
+ O << '\t' << Str;
+ if (NeedsReg)
+ O << ", " << getRegisterName(MI->getOperand(4).getReg());
- return Asm != nullptr;
+ return true;
}
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 65dca99ed04e..a45258cb97b7 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -37,9 +38,11 @@ public:
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O);
+
virtual StringRef getRegName(unsigned RegNo) const {
return getRegisterName(RegNo);
}
+
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = AArch64::NoRegAltName);
@@ -177,12 +180,15 @@ public:
unsigned PrintMethodIdx,
const MCSubtargetInfo &STI,
raw_ostream &O) override;
+
StringRef getRegName(unsigned RegNo) const override {
return getRegisterName(RegNo);
}
+
static const char *getRegisterName(unsigned RegNo,
unsigned AltIdx = AArch64::NoRegAltName);
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 14c0327f5fa8..ebf05ae303dd 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -73,7 +73,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -138,15 +138,15 @@ static unsigned AdrImmBits(unsigned Value) {
}
static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext *Ctx) {
+ MCContext &Ctx) {
unsigned Kind = Fixup.getKind();
int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_pcrel_adr_imm21:
- if (Ctx && (SignedValue > 2097151 || SignedValue < -2097152))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 2097151 || SignedValue < -2097152)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
@@ -154,66 +154,65 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case AArch64::fixup_aarch64_pcrel_branch19:
// Signed 21-bit immediate
if (SignedValue > 2097151 || SignedValue < -2097152)
- if (Ctx) Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
// Low two bits are not encoded.
return (Value >> 2) & 0x7ffff;
case AArch64::fixup_aarch64_add_imm12:
case AArch64::fixup_aarch64_ldst_imm12_scale1:
// Unsigned 12-bit immediate
- if (Ctx && Value >= 0x1000)
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value >= 0x1000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return Value;
case AArch64::fixup_aarch64_ldst_imm12_scale2:
// Unsigned 12-bit immediate which gets multiplied by 2
- if (Ctx && (Value >= 0x2000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x1))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
+ if (Value >= 0x2000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x1)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 2-byte aligned");
return Value >> 1;
case AArch64::fixup_aarch64_ldst_imm12_scale4:
// Unsigned 12-bit immediate which gets multiplied by 4
- if (Ctx && (Value >= 0x4000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
+ if (Value >= 0x4000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 4-byte aligned");
return Value >> 2;
case AArch64::fixup_aarch64_ldst_imm12_scale8:
// Unsigned 12-bit immediate which gets multiplied by 8
- if (Ctx && (Value >= 0x8000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0x7))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
+ if (Value >= 0x8000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0x7)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 8-byte aligned");
return Value >> 3;
case AArch64::fixup_aarch64_ldst_imm12_scale16:
// Unsigned 12-bit immediate which gets multiplied by 16
- if (Ctx && (Value >= 0x10000))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
- if (Ctx && (Value & 0xf))
- Ctx->reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
+ if (Value >= 0x10000)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
+ if (Value & 0xf)
+ Ctx.reportError(Fixup.getLoc(), "fixup must be 16-byte aligned");
return Value >> 4;
case AArch64::fixup_aarch64_movw:
- if (Ctx)
- Ctx->reportError(Fixup.getLoc(),
- "no resolvable MOVZ/MOVK fixups supported yet");
+ Ctx.reportError(Fixup.getLoc(),
+ "no resolvable MOVZ/MOVK fixups supported yet");
return Value;
case AArch64::fixup_aarch64_pcrel_branch14:
// Signed 16-bit immediate
- if (Ctx && (SignedValue > 32767 || SignedValue < -32768))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 32767 || SignedValue < -32768)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3fff;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
// Signed 28-bit immediate
- if (Ctx && (SignedValue > 134217727 || SignedValue < -134217728))
- Ctx->reportError(Fixup.getLoc(), "fixup value out of range");
+ if (SignedValue > 134217727 || SignedValue < -134217728)
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
// Low two bits are not encoded (4-byte alignment assumed).
- if (Ctx && (Value & 0x3))
- Ctx->reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
+ if (Value & 0x3)
+ Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
return (Value >> 2) & 0x3ffffff;
case FK_Data_1:
case FK_Data_2:
@@ -264,13 +263,13 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con
void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
if (!Value)
return; // Doesn't change encoding.
MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
// Apply any target-specific value adjustments.
- Value = adjustFixupValue(Fixup, Value, nullptr);
+ Value = adjustFixupValue(Fixup, Value, Ctx);
// Shift the value into position.
Value <<= Info.TargetOffset;
@@ -521,17 +520,6 @@ public:
return CompactUnwindEncoding;
}
-
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override {
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value is invalid.
- if (IsResolved)
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
- }
};
} // end anonymous namespace
@@ -575,12 +563,6 @@ void ELFAArch64AsmBackend::processFixupValue(
// to the linker -- a relocation!
if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
IsResolved = false;
-
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value is invalid.
- if (IsResolved)
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
}
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 685907a2178e..271263507ae1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -14,27 +14,23 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetStreamer.h"
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
@@ -106,8 +102,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
@@ -180,6 +176,7 @@ private:
DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
ElfMappingSymbol LastEMS;
};
+
} // end anonymous namespace
AArch64ELFStreamer &AArch64TargetELFStreamer::getStreamer() {
@@ -191,6 +188,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) {
}
namespace llvm {
+
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
MCInstPrinter *InstPrint,
@@ -214,4 +212,5 @@ createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new AArch64TargetELFStreamer(S);
return nullptr;
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index e9d38d3dcf10..f710065d9bc7 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -84,9 +84,14 @@ static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
// no matter how far away they are.
else if (CM == CodeModel::JITDefault)
CM = CodeModel::Large;
- else if (CM != CodeModel::Small && CM != CodeModel::Large)
- report_fatal_error(
- "Only small and large code models are allowed on AArch64");
+ else if (CM != CodeModel::Small && CM != CodeModel::Large) {
+ if (!TT.isOSFuchsia())
+ report_fatal_error(
+ "Only small and large code models are allowed on AArch64");
+ else if (CM != CodeModel::Kernel)
+ report_fatal_error(
+ "Only small, kernel, and large code models are allowed on AArch64");
+ }
}
static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 53a68527ee8e..3d296ba4806b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -16,14 +16,22 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/MachO.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
namespace {
+
class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
bool getAArch64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
const MCSymbolRefExpr *Sym,
@@ -38,7 +46,8 @@ public:
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) override;
};
-}
+
+} // end anonymous namespace
bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
@@ -51,18 +60,18 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return false;
case FK_Data_1:
- Log2Size = llvm::Log2_32(1);
+ Log2Size = Log2_32(1);
return true;
case FK_Data_2:
- Log2Size = llvm::Log2_32(2);
+ Log2Size = Log2_32(2);
return true;
case FK_Data_4:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
case FK_Data_8:
- Log2Size = llvm::Log2_32(8);
+ Log2Size = Log2_32(8);
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
@@ -72,7 +81,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
case AArch64::fixup_aarch64_ldst_imm12_scale4:
case AArch64::fixup_aarch64_ldst_imm12_scale8:
case AArch64::fixup_aarch64_ldst_imm12_scale16:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
switch (Sym->getKind()) {
default:
return false;
@@ -87,14 +96,13 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return true;
}
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
- default: {
+ default:
Asm.getContext().reportError(Fixup.getLoc(),
"ADR/ADRP relocations must be GOT relative");
return false;
- }
case MCSymbolRefExpr::VK_PAGE:
RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
return true;
@@ -108,7 +116,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
return true;
case AArch64::fixup_aarch64_pcrel_branch26:
case AArch64::fixup_aarch64_pcrel_call26:
- Log2Size = llvm::Log2_32(4);
+ Log2Size = Log2_32(4);
RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
return true;
}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index dcc39176031c..5d76681cd97b 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -266,82 +266,86 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
}
} // end namespace AArch64CC
+struct SysAlias {
+ const char *Name;
+ uint16_t Encoding;
+ FeatureBitset FeaturesRequired;
+
+ SysAlias (const char *N, uint16_t E) : Name(N), Encoding(E) {};
+ SysAlias (const char *N, uint16_t E, FeatureBitset F) :
+ Name(N), Encoding(E), FeaturesRequired(F) {};
+
+ bool haveFeatures(FeatureBitset ActiveFeatures) const {
+ return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ }
+
+ FeatureBitset getRequiredFeatures() const { return FeaturesRequired; }
+};
+
+struct SysAliasReg : SysAlias {
+ bool NeedsReg;
+ SysAliasReg(const char *N, uint16_t E, bool R) : SysAlias(N, E), NeedsReg(R) {};
+};
+
namespace AArch64AT{
- struct AT {
- const char *Name;
- uint16_t Encoding;
+ struct AT : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_AT_DECL
#include "AArch64GenSystemOperands.inc"
-
}
+
namespace AArch64DB {
- struct DB {
- const char *Name;
- uint16_t Encoding;
+ struct DB : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_DB_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64DC {
- struct DC {
- const char *Name;
- uint16_t Encoding;
+ struct DC : SysAlias {
+ using SysAlias::SysAlias;
};
-
#define GET_DC_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64IC {
- struct IC {
- const char *Name;
- uint16_t Encoding;
- bool NeedsReg;
+ struct IC : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
};
#define GET_IC_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64ISB {
- struct ISB {
- const char *Name;
- uint16_t Encoding;
+ struct ISB : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_ISB_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PRFM {
- struct PRFM {
- const char *Name;
- uint16_t Encoding;
+ struct PRFM : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_PRFM_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PState {
- struct PState {
- const char *Name;
- uint16_t Encoding;
- FeatureBitset FeaturesRequired;
-
- bool haveFeatures(FeatureBitset ActiveFeatures) const {
- return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
- }
+ struct PState : SysAlias{
+ using SysAlias::SysAlias;
};
#define GET_PSTATE_DECL
#include "AArch64GenSystemOperands.inc"
}
namespace AArch64PSBHint {
- struct PSB {
- const char *Name;
- uint16_t Encoding;
+ struct PSB : SysAlias {
+ using SysAlias::SysAlias;
};
#define GET_PSB_DECL
#include "AArch64GenSystemOperands.inc"
@@ -451,10 +455,8 @@ namespace AArch64SysReg {
}
namespace AArch64TLBI {
- struct TLBI {
- const char *Name;
- uint16_t Encoding;
- bool NeedsReg;
+ struct TLBI : SysAliasReg {
+ using SysAliasReg::SysAliasReg;
};
#define GET_TLBI_DECL
#include "AArch64GenSystemOperands.inc"
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 7b0a7f4b6058..8f6e1e7d8846 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -11,6 +11,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -23,6 +24,7 @@ class Pass;
class Target;
class TargetMachine;
class PassRegistry;
+class Module;
// R600 Passes
FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
@@ -37,6 +39,7 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSIFoldOperandsPass();
+FunctionPass *createSIPeepholeSDWAPass();
FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSIShrinkInstructionsPass();
FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
@@ -45,21 +48,32 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
+FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
-ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
+ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
+ModulePass *createAMDGPULowerIntrinsicsPass(const TargetMachine *TM = nullptr);
+void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
+extern char &AMDGPULowerIntrinsicsID;
+
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
+void initializeSIPeepholeSDWAPass(PassRegistry &);
+extern char &SIPeepholeSDWAID;
+
void initializeSIShrinkInstructionsPass(PassRegistry&);
extern char &SIShrinkInstructionsID;
void initializeSIFixSGPRCopiesPass(PassRegistry &);
extern char &SIFixSGPRCopiesID;
+void initializeSIFixVGPRCopiesPass(PassRegistry &);
+extern char &SIFixVGPRCopiesID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -86,11 +100,11 @@ extern char &AMDGPUPromoteAllocaID;
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
CodeGenOpt::Level OptLevel);
-ModulePass *createAMDGPUAlwaysInlinePass();
+ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
-FunctionPass* createAMDGPUUnifyMetadataPass();
+ModulePass* createAMDGPUUnifyMetadataPass();
void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
extern char &AMDGPUUnifyMetadataID;
@@ -112,6 +126,15 @@ extern char &SIDebuggerInsertNopsID;
void initializeSIInsertWaitsPass(PassRegistry&);
extern char &SIInsertWaitsID;
+void initializeSIInsertWaitcntsPass(PassRegistry&);
+extern char &SIInsertWaitcntsID;
+
+void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
+extern char &AMDGPUUnifyDivergentExitNodesID;
+
+ImmutablePass *createAMDGPUAAWrapperPass();
+void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
+
Target &getTheAMDGPUTarget();
Target &getTheGCNTarget();
@@ -133,43 +156,53 @@ enum TargetIndex {
/// however on the GPU, each address space points to
/// a separate piece of memory that is unique from other
/// memory locations.
-namespace AMDGPUAS {
-enum AddressSpaces : unsigned {
- PRIVATE_ADDRESS = 0, ///< Address space for private memory.
- GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2)
- LOCAL_ADDRESS = 3, ///< Address space for local memory.
- FLAT_ADDRESS = 4, ///< Address space for flat memory.
- REGION_ADDRESS = 5, ///< Address space for region memory.
- PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
- PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
+struct AMDGPUAS {
+ // The following address space values depend on the triple environment.
+ unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
+ unsigned FLAT_ADDRESS; ///< Address space for flat memory.
+ unsigned REGION_ADDRESS; ///< Address space for region memory.
+
+ // The maximum value for flat, generic, local, private, constant and region.
+ const static unsigned MAX_COMMON_ADDRESS = 5;
+
+ const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0).
+ const static unsigned CONSTANT_ADDRESS = 2; ///< Address space for constant memory (VTX2)
+ const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory.
+ const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0)
+ const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1)
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
// order to be able to dynamically index a constant buffer, for example:
//
// ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
- CONSTANT_BUFFER_0 = 8,
- CONSTANT_BUFFER_1 = 9,
- CONSTANT_BUFFER_2 = 10,
- CONSTANT_BUFFER_3 = 11,
- CONSTANT_BUFFER_4 = 12,
- CONSTANT_BUFFER_5 = 13,
- CONSTANT_BUFFER_6 = 14,
- CONSTANT_BUFFER_7 = 15,
- CONSTANT_BUFFER_8 = 16,
- CONSTANT_BUFFER_9 = 17,
- CONSTANT_BUFFER_10 = 18,
- CONSTANT_BUFFER_11 = 19,
- CONSTANT_BUFFER_12 = 20,
- CONSTANT_BUFFER_13 = 21,
- CONSTANT_BUFFER_14 = 22,
- CONSTANT_BUFFER_15 = 23,
+ const static unsigned CONSTANT_BUFFER_0 = 8;
+ const static unsigned CONSTANT_BUFFER_1 = 9;
+ const static unsigned CONSTANT_BUFFER_2 = 10;
+ const static unsigned CONSTANT_BUFFER_3 = 11;
+ const static unsigned CONSTANT_BUFFER_4 = 12;
+ const static unsigned CONSTANT_BUFFER_5 = 13;
+ const static unsigned CONSTANT_BUFFER_6 = 14;
+ const static unsigned CONSTANT_BUFFER_7 = 15;
+ const static unsigned CONSTANT_BUFFER_8 = 16;
+ const static unsigned CONSTANT_BUFFER_9 = 17;
+ const static unsigned CONSTANT_BUFFER_10 = 18;
+ const static unsigned CONSTANT_BUFFER_11 = 19;
+ const static unsigned CONSTANT_BUFFER_12 = 20;
+ const static unsigned CONSTANT_BUFFER_13 = 21;
+ const static unsigned CONSTANT_BUFFER_14 = 22;
+ const static unsigned CONSTANT_BUFFER_15 = 23;
// Some places use this if the address space can't be determined.
- UNKNOWN_ADDRESS_SPACE = ~0u
+ const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u;
};
-} // namespace AMDGPUAS
+namespace llvm {
+namespace AMDGPU {
+AMDGPUAS getAMDGPUAS(const Module &M);
+AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
+AMDGPUAS getAMDGPUAS(Triple T);
+} // namespace AMDGPU
+} // namespace llvm
#endif
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 13022009af16..2c7a2d8962d0 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -67,12 +67,24 @@ def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
"Support unaligned global loads and stores"
>;
+def FeatureTrapHandler: SubtargetFeature<"trap-handler",
+ "TrapHandler",
+ "true",
+ "Trap handler support"
+>;
+
def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
"UnalignedScratchAccess",
"true",
"Support unaligned scratch loads and stores"
>;
+def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
+ "HasApertureRegs",
+ "true",
+ "Has Memory Aperture Base and Size Registers"
+>;
+
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
// XNACK. The current default kernel driver setting is:
// - graphics ring: XNACK disabled
@@ -154,6 +166,12 @@ def FeatureCIInsts : SubtargetFeature<"ci-insts",
"Additional intstructions for CI+"
>;
+def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts",
+ "GFX9Insts",
+ "true",
+ "Additional intstructions for GFX9+"
+>;
+
def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
"HasSMemRealTime",
"true",
@@ -172,6 +190,12 @@ def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
"Has i16/f16 instructions"
>;
+def FeatureVOP3P : SubtargetFeature<"vop3p",
+ "HasVOP3PInsts",
+ "true",
+ "Has VOP3P packed instructions"
+>;
+
def FeatureMovrel : SubtargetFeature<"movrel",
"HasMovrel",
"true",
@@ -190,16 +214,22 @@ def FeatureScalarStores : SubtargetFeature<"scalar-stores",
"Has store scalar memory instructions"
>;
-//===------------------------------------------------------------===//
-// Subtarget Features (options and debugging)
-//===------------------------------------------------------------===//
+def FeatureSDWA : SubtargetFeature<"sdwa",
+ "HasSDWA",
+ "true",
+ "Support SDWA (Sub-DWORD Addressing) extension"
+>;
-def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
- "FP16Denormals",
+def FeatureDPP : SubtargetFeature<"dpp",
+ "HasDPP",
"true",
- "Enable half precision denormal handling"
+ "Support DPP (Data Parallel Primitives) extension"
>;
+//===------------------------------------------------------------===//
+// Subtarget Features (options and debugging)
+//===------------------------------------------------------------===//
+
// Some instructions do not support denormals despite this flag. Using
// fp32 denormals also causes instructions to run at the double
// precision rate for the device.
@@ -209,13 +239,36 @@ def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
"Enable single precision denormal handling"
>;
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64Denormals",
+// Denormal handling for fp64 and fp16 is controlled by the same
+// config register when fp16 supported.
+// TODO: Do we need a separate f16 setting when not legal?
+def FeatureFP64FP16Denormals : SubtargetFeature<"fp64-fp16-denormals",
+ "FP64FP16Denormals",
"true",
- "Enable double precision denormal handling",
+ "Enable double and half precision denormal handling",
[FeatureFP64]
>;
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable double and half precision denormal handling",
+ [FeatureFP64, FeatureFP64FP16Denormals]
+>;
+
+def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
+ "FP64FP16Denormals",
+ "true",
+ "Enable half precision denormal handling",
+ [FeatureFP64FP16Denormals]
+>;
+
+def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
+ "DX10Clamp",
+ "true",
+ "clamp modifier clamps NaNs to 0.0"
+>;
+
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
@@ -343,7 +396,17 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
- FeatureScalarStores, FeatureInv2PiInlineImm
+ FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
+ FeatureDPP
+ ]
+>;
+
+def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
+ [FeatureFP64, FeatureLocalMemorySize65536,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+ FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+ FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
+ FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode
]
>;
@@ -399,6 +462,9 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
FeatureLDSBankCount16,
FeatureXNACK]>;
+def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
+def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
+
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
//===----------------------------------------------------------------------===//
@@ -504,14 +570,27 @@ def isVI : Predicate <
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
AssemblerPredicate<"FeatureGCN3Encoding">;
+def isGFX9 : Predicate <
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
+ AssemblerPredicate<"FeatureGFX9Insts">;
+
+// TODO: Either the name to be changed or we simply use IsCI!
def isCIVI : Predicate <
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
- "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->, AssemblerPredicate<"FeatureCIInsts">;
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"FeatureCIInsts">;
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
-def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
+def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
+ AssemblerPredicate<"Feature16BitInsts">;
+def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
+ AssemblerPredicate<"FeatureVOP3P">;
+
+def HasSDWA : Predicate<"Subtarget->hasSDWA()">,
+ AssemblerPredicate<"FeatureSDWA">;
+
+def HasDPP : Predicate<"Subtarget->hasDPP()">,
+ AssemblerPredicate<"FeatureDPP">;
class PredicateControl {
Predicate SubtargetPredicate;
@@ -532,5 +611,6 @@ include "Processors.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUIntrinsics.td"
include "AMDGPURegisterInfo.td"
+include "AMDGPURegisterBanks.td"
include "AMDGPUInstructions.td"
include "AMDGPUCallingConv.td"
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
new file mode 100644
index 000000000000..3c99f48e818a
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -0,0 +1,147 @@
+//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the AMGPU address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUAliasAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-aa"
+
+// Register this pass...
+char AMDGPUAAWrapperPass::ID = 0;
+INITIALIZE_PASS(AMDGPUAAWrapperPass, "amdgpu-aa",
+ "AMDGPU Address space based Alias Analysis", false, true)
+
+ImmutablePass *llvm::createAMDGPUAAWrapperPass() {
+ return new AMDGPUAAWrapperPass();
+}
+
+void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+// Must match the table in getAliasResult.
+AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_)
+ : Arch(Arch_), AS(AS_) {
+ // These arrarys are indexed by address space value
+ // enum elements 0 ... to 5
+ static const AliasResult ASAliasRulesPrivIsZero[6][6] = {
+ /* Private Global Constant Group Flat Region*/
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias},
+ /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias},
+ /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias},
+ /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias},
+ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+ /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}
+ };
+ static const AliasResult ASAliasRulesGenIsZero[6][6] = {
+ /* Flat Global Constant Group Region Private */
+ /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias},
+ /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias},
+ /* Constant */ {MayAlias, NoAlias , MayAlias, NoAlias , NoAlias, NoAlias},
+ /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias},
+ /* Region */ {MayAlias, NoAlias , NoAlias , NoAlias, MayAlias, NoAlias},
+ /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias}
+ };
+ assert(AS.MAX_COMMON_ADDRESS <= 5);
+ if (AS.FLAT_ADDRESS == 0) {
+ assert(AS.GLOBAL_ADDRESS == 1 &&
+ AS.REGION_ADDRESS == 4 &&
+ AS.LOCAL_ADDRESS == 3 &&
+ AS.CONSTANT_ADDRESS == 2 &&
+ AS.PRIVATE_ADDRESS == 5);
+ ASAliasRules = &ASAliasRulesGenIsZero;
+ } else {
+ assert(AS.PRIVATE_ADDRESS == 0 &&
+ AS.GLOBAL_ADDRESS == 1 &&
+ AS.CONSTANT_ADDRESS == 2 &&
+ AS.LOCAL_ADDRESS == 3 &&
+ AS.FLAT_ADDRESS == 4 &&
+ AS.REGION_ADDRESS == 5);
+ ASAliasRules = &ASAliasRulesPrivIsZero;
+ }
+}
+
+AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1,
+ unsigned AS2) const {
+ if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) {
+ if (Arch == Triple::amdgcn)
+ report_fatal_error("Pointer address space out of range");
+ return AS1 == AS2 ? MayAlias : NoAlias;
+ }
+
+ return (*ASAliasRules)[AS1][AS2];
+}
+
+AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
+ const MemoryLocation &LocB) {
+ unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
+ unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
+
+ AliasResult Result = ASAliasRules.getAliasResult(asA, asB);
+ if (Result == NoAlias) return Result;
+
+ // Forward the query to the next alias analysis.
+ return AAResultBase::alias(LocA, LocB);
+}
+
+bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+ bool OrLocal) {
+ const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
+
+ if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) {
+ return true;
+ }
+
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
+ if (GV->isConstant())
+ return true;
+ } else if (const Argument *Arg = dyn_cast<Argument>(Base)) {
+ const Function *F = Arg->getParent();
+
+ // Only assume constant memory for arguments on kernels.
+ switch (F->getCallingConv()) {
+ default:
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ break;
+ }
+
+ unsigned ArgNo = Arg->getArgNo();
+ /* On an argument, ReadOnly attribute indicates that the function does
+ not write through this pointer argument, even though it may write
+ to the memory that the pointer points to.
+ On an argument, ReadNone attribute indicates that the function does
+ not dereference that pointer argument, even though it may read or write
+ the memory that the pointer points to if accessed through other pointers.
+ */
+ if (F->hasParamAttribute(ArgNo, Attribute::NoAlias) &&
+ (F->hasParamAttribute(ArgNo, Attribute::ReadNone) ||
+ F->hasParamAttribute(ArgNo, Attribute::ReadOnly))) {
+ return true;
+ }
+ }
+ return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
new file mode 100644
index 000000000000..5f8ed9b1f9a3
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -0,0 +1,102 @@
+//===- AMDGPUAliasAnalysis ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the AMGPU address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
+#define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
+
+#include "AMDGPU.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple AA result that uses TBAA metadata to answer queries.
+class AMDGPUAAResult : public AAResultBase<AMDGPUAAResult> {
+ friend AAResultBase<AMDGPUAAResult>;
+
+ const DataLayout &DL;
+ AMDGPUAS AS;
+
+public:
+ explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(),
+ DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS, T.getArch()) {}
+ AMDGPUAAResult(AMDGPUAAResult &&Arg)
+ : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS),
+ ASAliasRules(Arg.ASAliasRules){}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+
+private:
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+ bool PathAliases(const MDNode *A, const MDNode *B) const;
+
+ class ASAliasRulesTy {
+ public:
+ ASAliasRulesTy(AMDGPUAS AS_, Triple::ArchType Arch_);
+ AliasResult getAliasResult(unsigned AS1, unsigned AS2) const;
+ private:
+ Triple::ArchType Arch;
+ AMDGPUAS AS;
+ const AliasResult (*ASAliasRules)[6][6];
+ } ASAliasRules;
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class AMDGPUAA : public AnalysisInfoMixin<AMDGPUAA> {
+ friend AnalysisInfoMixin<AMDGPUAA>;
+ static char PassID;
+
+public:
+ typedef AMDGPUAAResult Result;
+
+ AMDGPUAAResult run(Function &F, AnalysisManager<Function> &AM) {
+ return AMDGPUAAResult(F.getParent()->getDataLayout(),
+ Triple(F.getParent()->getTargetTriple()));
+ }
+};
+
+/// Legacy wrapper pass to provide the AMDGPUAAResult object.
+class AMDGPUAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<AMDGPUAAResult> Result;
+
+public:
+ static char ID;
+
+ AMDGPUAAWrapperPass() : ImmutablePass(ID) {
+ initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ AMDGPUAAResult &getResult() { return *Result; }
+ const AMDGPUAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override {
+ Result.reset(new AMDGPUAAResult(M.getDataLayout(),
+ Triple(M.getTargetTriple())));
+ return false;
+ }
+ bool doFinalization(Module &M) override {
+ Result.reset();
+ return false;
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+}
+#endif // LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 067a16a2af7f..1d03714874e2 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -24,8 +24,10 @@ namespace {
class AMDGPUAlwaysInline : public ModulePass {
static char ID;
+ bool GlobalOpt;
+
public:
- AMDGPUAlwaysInline() : ModulePass(ID) { }
+ AMDGPUAlwaysInline(bool GlobalOpt) : ModulePass(ID), GlobalOpt(GlobalOpt) { }
bool runOnModule(Module &M) override;
StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; }
};
@@ -45,8 +47,10 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
}
}
- for (GlobalAlias* A : AliasesToRemove) {
- A->eraseFromParent();
+ if (GlobalOpt) {
+ for (GlobalAlias* A : AliasesToRemove) {
+ A->eraseFromParent();
+ }
}
for (Function &F : M) {
@@ -70,6 +74,6 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
return false;
}
-ModulePass *llvm::createAMDGPUAlwaysInlinePass() {
- return new AMDGPUAlwaysInline();
+ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
+ return new AMDGPUAlwaysInline(GlobalOpt);
}
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index c98d25e20185..3d8db7cd8af5 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
@@ -26,7 +27,9 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public ModulePass {
private:
- static bool hasAddrSpaceCast(const Function &F);
+ const TargetMachine *TM;
+ AMDGPUAS AS;
+ static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS);
void addAttrToCallers(Function *Intrin, StringRef AttrName);
bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
@@ -34,7 +37,8 @@ private:
public:
static char ID;
- AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
+ AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) :
+ ModulePass(ID), TM(TM_) {}
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
return "AMDGPU Annotate Kernel Features";
@@ -45,10 +49,11 @@ public:
ModulePass::getAnalysisUsage(AU);
}
- static bool visitConstantExpr(const ConstantExpr *CE);
+ static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS);
static bool visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+ AMDGPUAS AS);
};
}
@@ -62,18 +67,20 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
// The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS) {
- return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
+static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) {
+ return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS;
}
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
- return castRequiresQueuePtr(ASC->getSrcAddressSpace());
+static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC,
+ const AMDGPUAS &AS) {
+ return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS);
}
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
+bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE,
+ AMDGPUAS AS) {
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
- return castRequiresQueuePtr(SrcAS);
+ return castRequiresQueuePtr(SrcAS, AS);
}
return false;
@@ -81,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
+ SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
+ AMDGPUAS AS) {
if (!ConstantExprVisited.insert(EntryC).second)
return false;
@@ -94,7 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
// Check this constant expression.
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (visitConstantExpr(CE))
+ if (visitConstantExpr(CE, AS))
return true;
}
@@ -115,13 +123,14 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
}
// Return true if an addrspacecast is used that requires the queue ptr.
-bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
+bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F,
+ AMDGPUAS AS) {
SmallPtrSet<const Constant *, 8> ConstantExprVisited;
for (const BasicBlock &BB : F) {
for (const Instruction &I : BB) {
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (castRequiresQueuePtr(ASC))
+ if (castRequiresQueuePtr(ASC, AS))
return true;
}
@@ -130,7 +139,7 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) {
if (!OpC)
continue;
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited))
+ if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS))
return true;
}
}
@@ -170,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
Triple TT(M.getTargetTriple());
+ AS = AMDGPU::getAMDGPUAS(M);
static const StringRef IntrinsicToAttr[][2] = {
// .x omitted
@@ -190,7 +200,9 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
static const StringRef HSAIntrinsicToAttr[][2] = {
{ "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
{ "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
- { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }
+ { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" },
+ { "llvm.trap", "amdgpu-queue-ptr" },
+ { "llvm.debugtrap", "amdgpu-queue-ptr" }
};
// TODO: We should not add the attributes if the known compile time workgroup
@@ -209,7 +221,9 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
if (F.hasFnAttribute("amdgpu-queue-ptr"))
continue;
- if (hasAddrSpaceCast(F))
+ bool HasApertureRegs =
+ TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
+ if (!HasApertureRegs && hasAddrSpaceCast(F, AS))
F.addFnAttr("amdgpu-queue-ptr");
}
}
@@ -217,6 +231,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
return Changed;
}
-ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
- return new AMDGPUAnnotateKernelFeatures();
+ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) {
+ return new AMDGPUAnnotateKernelFeatures(TM);
}
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index c011be6fa169..91b3649f5c39 100644
--- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -37,6 +37,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass,
LoopInfo *LI;
DenseMap<Value*, GetElementPtrInst*> noClobberClones;
bool isKernelFunc;
+ AMDGPUAS AMDGPUASI;
public:
static char ID;
@@ -130,8 +131,8 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Value *Ptr = I.getPointerOperand();
if (!DA->isUniform(Ptr))
return;
- auto isGlobalLoad = [](LoadInst &Load)->bool {
- return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ auto isGlobalLoad = [&](LoadInst &Load)->bool {
+ return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
};
// We're tracking up to the Function boundaries
// We cannot go beyond because of FunctionPass restrictions
@@ -166,6 +167,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
}
bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
+ AMDGPUASI = AMDGPU::getAMDGPUAS(M);
return false;
}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 974e79fff3d7..0446655830d1 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -17,11 +17,11 @@
//
#include "AMDGPUAsmPrinter.h"
+#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "AMDGPU.h"
-#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
@@ -93,33 +93,40 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
+ : AsmPrinter(TM, std::move(Streamer)) {
+ AMDGPUASI = static_cast<AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
+ }
StringRef AMDGPUAsmPrinter::getPassName() const {
return "AMDGPU Assembly Printer";
}
+const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {
+ return TM.getMCSubtargetInfo();
+}
+
+AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const {
+ return static_cast<AMDGPUTargetStreamer&>(*OutStreamer->getTargetStreamer());
+}
+
void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
return;
- // Need to construct an MCSubtargetInfo here in case we have no functions
- // in the module.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple().str(), TM.getTargetCPU(),
- TM.getTargetFeatureString()));
-
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
- TS->EmitDirectiveHSACodeObjectVersion(2, 1);
+ getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1);
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(
+ ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+ getTargetStreamer().EmitStartOfCodeObjectMetadata(M);
+}
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
- TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
- "AMD", "AMDGPU");
+void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
- // Emit runtime metadata.
- TS->EmitRuntimeMetadata(M);
+ getTargetStreamer().EmitEndOfCodeObjectMetadata();
}
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -136,25 +143,32 @@ bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
}
-
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
+ amd_kernel_code_t KernelCode;
if (STM.isAmdCodeObjectV2(*MF)) {
getSIProgramInfo(KernelInfo, *MF);
- EmitAmdKernelCodeT(*MF, KernelInfo);
+ getAmdKernelCode(KernelCode, KernelInfo, *MF);
+
+ OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
+ getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
}
+
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
+ getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(),
+ KernelCode);
}
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
- if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+ if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) {
SmallString<128> SymbolName;
getNameWithPrefix(SymbolName, MF->getFunction()),
- TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
+ getTargetStreamer().EmitAMDGPUSymbolType(
+ SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
}
AsmPrinter::EmitFunctionEntryLabel();
@@ -163,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Group segment variables aren't emitted in HSA.
- if (AMDGPU::isGroupSegment(GV))
+ if (AMDGPU::isGroupSegment(GV, AMDGPUASI))
return;
AsmPrinter::EmitGlobalVariable(GV);
@@ -247,6 +261,9 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
+ Twine(G_00B84C_TRAP_HANDLER(KernelInfo.ComputePGMRSrc2)),
+ false);
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
false);
@@ -382,6 +399,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
case AMDGPU::EXEC_HI:
case AMDGPU::SCC:
case AMDGPU::M0:
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
continue;
case AMDGPU::VCC:
@@ -478,33 +499,20 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ExtraSGPRs = 6;
}
- // Record first reserved register and reserved register count fields, and
- // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
- // requested.
- ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
- ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);
-
- // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
- // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
- // attribute was requested.
- if (STM.debuggerEmitPrologue()) {
- ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
- RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
- ProgInfo.DebuggerPrivateSegmentBufferSGPR =
- RI->getHWRegIndex(MFI->getScratchRSrcReg());
- }
+ unsigned ExtraVGPRs = STM.getReservedNumVGPRs(MF);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
!STM.hasSGPRInitBug()) {
- unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
+ unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"addressable scalar registers",
MaxSGPR + 1, DS_Error,
- DK_ResourceLimit, MaxAddressableNumSGPRs);
+ DK_ResourceLimit,
+ MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
MaxSGPR = MaxAddressableNumSGPRs - 1;
}
@@ -512,41 +520,43 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// Account for extra SGPRs and VGPRs reserved for debugger use.
MaxSGPR += ExtraSGPRs;
- MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
+ MaxVGPR += ExtraVGPRs;
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
- ProgInfo.NumVGPR = MaxVGPR + 1;
ProgInfo.NumSGPR = MaxSGPR + 1;
+ ProgInfo.NumVGPR = MaxVGPR + 1;
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
- ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
+ ProgInfo.NumSGPR, STM.getMinNumSGPRs(MFI->getMaxWavesPerEU()));
ProgInfo.NumVGPRsForWavesPerEU = std::max(
- ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
+ ProgInfo.NumVGPR, STM.getMinNumVGPRs(MFI->getMaxWavesPerEU()));
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
STM.hasSGPRInitBug()) {
- unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
- if (ProgInfo.NumSGPR > MaxNumSGPRs) {
- // This can happen due to a compiler bug or when using inline asm to use the
- // registers which are usually reserved for vcc etc.
-
+ unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
+ if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+ // This can happen due to a compiler bug or when using inline asm to use
+ // the registers which are usually reserved for vcc etc.
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"scalar registers",
ProgInfo.NumSGPR, DS_Error,
- DK_ResourceLimit, MaxNumSGPRs);
+ DK_ResourceLimit,
+ MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
- ProgInfo.NumSGPR = MaxNumSGPRs;
- ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
+ ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
+ ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
}
}
if (STM.hasSGPRInitBug()) {
- ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
- ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ ProgInfo.NumSGPR =
+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+ ProgInfo.NumSGPRsForWavesPerEU =
+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
}
if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
@@ -565,13 +575,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// SGPRBlocks is actual number of SGPR blocks minus 1.
ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
- RI->getSGPRAllocGranule());
- ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
+ STM.getSGPREncodingGranule());
+ ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / STM.getSGPREncodingGranule() - 1;
// VGPRBlocks is actual number of VGPR blocks minus 1.
ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
- RI->getVGPRAllocGranule());
- ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
+ STM.getVGPREncodingGranule());
+ ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
+
+ // Record first reserved VGPR and number of reserved VGPRs.
+ ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
+ ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
+
+ // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
+ // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
+ // attribute was requested.
+ if (STM.debuggerEmitPrologue()) {
+ ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
+ RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
+ ProgInfo.DebuggerPrivateSegmentBufferSGPR =
+ RI->getHWRegIndex(MFI->getScratchRSrcReg());
+ }
// Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
// register.
@@ -580,7 +604,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
// Make clamp modifier on NaN input returns 0.
- ProgInfo.DX10Clamp = 1;
+ ProgInfo.DX10Clamp = STM.enableDX10Clamp();
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
ProgInfo.ScratchSize = FrameInfo.getStackSize();
@@ -635,6 +659,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.ComputePGMRSrc2 =
S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
+ S_00B84C_TRAP_HANDLER(STM.isTrapHandlerEnabled()) |
S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
@@ -688,7 +713,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
}
@@ -713,97 +738,88 @@ static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
}
}
-void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const {
+void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
+ const SIProgramInfo &KernelInfo,
+ const MachineFunction &MF) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const SISubtarget &STM = MF.getSubtarget<SISubtarget>();
- amd_kernel_code_t header;
- AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
- header.compute_pgm_resource_registers =
+ Out.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
- header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
-
+ Out.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
- AMD_HSA_BITS_SET(header.code_properties,
+ AMD_HSA_BITS_SET(Out.code_properties,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
getElementByteSizeValue(STM.getMaxPrivateElementSize()));
if (MFI->hasPrivateSegmentBuffer()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
}
if (MFI->hasDispatchPtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (MFI->hasQueuePtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (MFI->hasKernargSegmentPtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
if (MFI->hasDispatchID())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
if (MFI->hasFlatScratchInit())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
-
- // TODO: Private segment size
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (MFI->hasGridWorkgroupCountX()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
}
if (MFI->hasGridWorkgroupCountY()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
}
if (MFI->hasGridWorkgroupCountZ()) {
- header.code_properties |=
+ Out.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
}
if (MFI->hasDispatchPtr())
- header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
if (STM.debuggerSupported())
- header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
if (STM.isXNACKEnabled())
- header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
+ Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
// FIXME: Should use getKernArgSize
- header.kernarg_segment_byte_size =
+ Out.kernarg_segment_byte_size =
STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
- header.wavefront_sgpr_count = KernelInfo.NumSGPR;
- header.workitem_vgpr_count = KernelInfo.NumVGPR;
- header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
- header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
- header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
- header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
+ Out.wavefront_sgpr_count = KernelInfo.NumSGPR;
+ Out.workitem_vgpr_count = KernelInfo.NumVGPR;
+ Out.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+ Out.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+ Out.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
+ Out.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
- header.kernarg_segment_alignment = std::max((size_t)4,
+ Out.kernarg_segment_alignment = std::max((size_t)4,
countTrailingZeros(MFI->getMaxKernArgAlign()));
if (STM.debuggerEmitPrologue()) {
- header.debug_wavefront_private_segment_offset_sgpr =
+ Out.debug_wavefront_private_segment_offset_sgpr =
KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
- header.debug_private_segment_buffer_sgpr =
+ Out.debug_private_segment_buffer_sgpr =
KernelInfo.DebuggerPrivateSegmentBufferSGPR;
}
-
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
-
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
- TS->EmitAMDKernelCodeT(header);
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 9a4bafef3a25..13425c8b2a0f 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,95 +15,84 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
-#include "AMDGPUMCInstLower.h"
-
+#include "AMDKernelCodeT.h"
+#include "AMDGPU.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
#include <vector>
namespace llvm {
+
+class AMDGPUTargetStreamer;
class MCOperand;
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
struct SIProgramInfo {
- SIProgramInfo() :
- VGPRBlocks(0),
- SGPRBlocks(0),
- Priority(0),
- FloatMode(0),
- Priv(0),
- DX10Clamp(0),
- DebugMode(0),
- IEEEMode(0),
- ScratchSize(0),
- ComputePGMRSrc1(0),
- LDSBlocks(0),
- ScratchBlocks(0),
- ComputePGMRSrc2(0),
- NumVGPR(0),
- NumSGPR(0),
- FlatUsed(false),
- NumSGPRsForWavesPerEU(0),
- NumVGPRsForWavesPerEU(0),
- ReservedVGPRFirst(0),
- ReservedVGPRCount(0),
- DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1),
- DebuggerPrivateSegmentBufferSGPR((uint16_t)-1),
- VCCUsed(false),
- CodeLen(0) {}
-
// Fields set in PGM_RSRC1 pm4 packet.
- uint32_t VGPRBlocks;
- uint32_t SGPRBlocks;
- uint32_t Priority;
- uint32_t FloatMode;
- uint32_t Priv;
- uint32_t DX10Clamp;
- uint32_t DebugMode;
- uint32_t IEEEMode;
- uint32_t ScratchSize;
-
- uint64_t ComputePGMRSrc1;
+ uint32_t VGPRBlocks = 0;
+ uint32_t SGPRBlocks = 0;
+ uint32_t Priority = 0;
+ uint32_t FloatMode = 0;
+ uint32_t Priv = 0;
+ uint32_t DX10Clamp = 0;
+ uint32_t DebugMode = 0;
+ uint32_t IEEEMode = 0;
+ uint32_t ScratchSize = 0;
+
+ uint64_t ComputePGMRSrc1 = 0;
// Fields set in PGM_RSRC2 pm4 packet.
- uint32_t LDSBlocks;
- uint32_t ScratchBlocks;
+ uint32_t LDSBlocks = 0;
+ uint32_t ScratchBlocks = 0;
- uint64_t ComputePGMRSrc2;
+ uint64_t ComputePGMRSrc2 = 0;
- uint32_t NumVGPR;
- uint32_t NumSGPR;
+ uint32_t NumVGPR = 0;
+ uint32_t NumSGPR = 0;
uint32_t LDSSize;
- bool FlatUsed;
+ bool FlatUsed = false;
// Number of SGPRs that meets number of waves per execution unit request.
- uint32_t NumSGPRsForWavesPerEU;
+ uint32_t NumSGPRsForWavesPerEU = 0;
// Number of VGPRs that meets number of waves per execution unit request.
- uint32_t NumVGPRsForWavesPerEU;
+ uint32_t NumVGPRsForWavesPerEU = 0;
// If ReservedVGPRCount is 0 then must be 0. Otherwise, this is the first
// fixed VGPR number reserved.
- uint16_t ReservedVGPRFirst;
+ uint16_t ReservedVGPRFirst = 0;
// The number of consecutive VGPRs reserved.
- uint16_t ReservedVGPRCount;
+ uint16_t ReservedVGPRCount = 0;
// Fixed SGPR number used to hold wave scratch offset for entire kernel
- // execution, or uint16_t(-1) if the register is not used or not known.
- uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR;
+ // execution, or std::numeric_limits<uint16_t>::max() if the register is not
+ // used or not known.
+ uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR =
+ std::numeric_limits<uint16_t>::max();
// Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
- // kernel execution, or uint16_t(-1) if the register is not used or not
- // known.
- uint16_t DebuggerPrivateSegmentBufferSGPR;
+ // kernel execution, or std::numeric_limits<uint16_t>::max() if the register
+ // is not used or not known.
+ uint16_t DebuggerPrivateSegmentBufferSGPR =
+ std::numeric_limits<uint16_t>::max();
// Bonus information for debugging.
- bool VCCUsed;
- uint64_t CodeLen;
+ bool VCCUsed = false;
+ uint64_t CodeLen = 0;
+
+ SIProgramInfo() = default;
};
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF) const;
+ void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
+ const MachineFunction &MF) const;
void findNumUsedRegistersSI(const MachineFunction &MF,
unsigned &NumSGPR,
unsigned &NumVGPR) const;
@@ -112,21 +101,28 @@ private:
/// can correctly setup the GPU state.
void EmitProgramInfoR600(const MachineFunction &MF);
void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
- void EmitAmdKernelCodeT(const MachineFunction &MF,
- const SIProgramInfo &KernelInfo) const;
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
- bool runOnMachineFunction(MachineFunction &MF) override;
-
StringRef getPassName() const override;
+ const MCSubtargetInfo* getSTI() const;
+
+ AMDGPUTargetStreamer& getTargetStreamer() const;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
/// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated
/// pseudo lowering.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+ /// \brief Lower the specified LLVM Constant to an MCExpr.
+ /// The AsmPrinter::lowerConstantof does not know how to lower
+ /// addrspacecast, therefore they should be lowered by this function.
+ const MCExpr *lowerConstant(const Constant *CV) override;
+
/// \brief tblgen'erated driver function for lowering simple MI->MC pseudo
/// instructions.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
@@ -143,6 +139,8 @@ public:
void EmitStartOfAsmFile(Module &M) override;
+ void EmitEndOfAsmFile(Module &M) override;
+
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const override;
@@ -153,8 +151,9 @@ public:
protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;
+ AMDGPUAS AMDGPUASI;
};
-} // End anonymous llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index d53cc153dc9a..e67ae092fdda 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -14,8 +14,13 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUCallLowering.h"
+#include "AMDGPU.h"
#include "AMDGPUISelLowering.h"
-
+#include "AMDGPUSubtarget.h"
+#include "SIISelLowering.h"
+#include "SIRegisterInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -26,17 +31,138 @@ using namespace llvm;
#endif
AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
- : CallLowering(&TLI) {
+ : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
- const Value *Val, unsigned VReg) const {
+ const Value *Val, unsigned VReg) const {
+ MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
return true;
}
+unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
+ Type *ParamTy,
+ unsigned Offset) const {
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const Function &F = *MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ LLT PtrType = getLLTForType(*PtrTy, DL);
+ unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
+ unsigned KernArgSegmentPtr =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
+
+ unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIRBuilder.buildConstant(OffsetReg, Offset);
+
+ MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
+
+ return DstReg;
+}
+
+void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
+ Type *ParamTy, unsigned Offset,
+ unsigned DstReg) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = *MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
+ unsigned Align = DL.getABITypeAlignment(ParamTy);
+ unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
+ MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MOInvariant,
+ TypeSize, Align);
+
+ MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
+}
+
bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
- // TODO: Implement once there are generic loads/stores.
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const SISubtarget *Subtarget = static_cast<const SISubtarget *>(&MF.getSubtarget());
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+
+ // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
+ if (Info->hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ CCInfo.AllocateReg(PrivateSegmentBufferReg);
+ }
+
+ if (Info->hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(DispatchPtrReg);
+ }
+
+ if (Info->hasQueuePtr()) {
+ unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(QueuePtrReg);
+ }
+
+ if (Info->hasKernargSegmentPtr()) {
+ unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+ const LLT P2 = LLT::pointer(2, 64);
+ unsigned VReg = MRI.createGenericVirtualRegister(P2);
+ MRI.addLiveIn(InputPtrReg, VReg);
+ MIRBuilder.getMBB().addLiveIn(InputPtrReg);
+ MIRBuilder.buildCopy(VReg, InputPtrReg);
+ CCInfo.AllocateReg(InputPtrReg);
+ }
+
+ if (Info->hasDispatchID()) {
+ unsigned DispatchIDReg = Info->addDispatchID(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(DispatchIDReg);
+ }
+
+ if (Info->hasFlatScratchInit()) {
+ unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+ // FIXME: Need to add reg as live-in
+ CCInfo.AllocateReg(FlatScratchInitReg);
+ }
+
+ unsigned NumArgs = F.arg_size();
+ Function::const_arg_iterator CurOrigArg = F.arg_begin();
+ const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
+ for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
+ MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT();
+ ISD::ArgFlagsTy Flags;
+ Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
+ CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
+ /*IsVarArg=*/false);
+ bool Res =
+ AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo);
+ assert(!Res && "Call operand has unhandled type");
+ (void)Res;
+ }
+
+ Function::const_arg_iterator Arg = F.arg_begin();
+ for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
+ // FIXME: We should be getting DebugInfo from the arguments some how.
+ CCValAssign &VA = ArgLocs[i];
+ lowerParameter(MIRBuilder, Arg->getType(),
+ VA.getLocMemOffset() +
+ Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
+ }
+
return true;
}
diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 9ae87c9397ab..09bdf8ffcde7 100644
--- a/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H
+#include "AMDGPU.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
namespace llvm {
@@ -22,6 +23,14 @@ namespace llvm {
class AMDGPUTargetLowering;
class AMDGPUCallLowering: public CallLowering {
+ AMDGPUAS AMDGPUASI;
+
+ unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
+ unsigned Offset) const;
+
+ void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
+ unsigned Offset, unsigned DstReg) const;
+
public:
AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);
@@ -29,6 +38,7 @@ class AMDGPUCallLowering: public CallLowering {
unsigned VReg) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
};
} // End of namespace llvm;
#endif
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 47dfa4992068..d308f718aae1 100644
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -17,7 +17,7 @@ class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
// Calling convention for SI
def CC_SI : CallingConv<[
- CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ CCIfInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -25,17 +25,13 @@ def CC_SI : CallingConv<[
SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
]>>>,
- CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
- SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
- SGPR32, SGPR34, SGPR36, SGPR38 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
- SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
- SGPR33, SGPR35, SGPR37, SGPR39 ]
- >>>,
+ // We have no way of referring to the generated register tuples
+ // here, so use a custom function.
+ CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
+ CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
- CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -53,17 +49,7 @@ def CC_SI : CallingConv<[
VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
- ]>>>,
-
- CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
- SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
- SGPR32, SGPR34, SGPR36, SGPR38 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
- SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
- SGPR33, SGPR35, SGPR37, SGPR39 ]
- >>>
-
+ ]>>>
]>;
def RetCC_SI : CallingConv<[
@@ -76,7 +62,7 @@ def RetCC_SI : CallingConv<[
]>>,
// 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
- CCIfType<[f32] , CCAssignToReg<[
+ CCIfType<[f32, f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index e6230547a9b3..e19314fe0a6c 100644
--- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -14,16 +14,31 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
-
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <iterator>
#define DEBUG_TYPE "amdgpu-codegenprepare"
@@ -34,17 +49,15 @@ namespace {
class AMDGPUCodeGenPrepare : public FunctionPass,
public InstVisitor<AMDGPUCodeGenPrepare, bool> {
const GCNTargetMachine *TM;
- const SISubtarget *ST;
- DivergenceAnalysis *DA;
- Module *Mod;
- bool HasUnsafeFPMath;
+ const SISubtarget *ST = nullptr;
+ DivergenceAnalysis *DA = nullptr;
+ Module *Mod = nullptr;
+ bool HasUnsafeFPMath = false;
/// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
///
/// \returns Binary operation \p V.
- Value *copyFlags(const BinaryOperator &I, Value *V) const;
-
/// \returns \p T's base element bit width.
unsigned getBaseElementBitWidth(const Type *T) const;
@@ -113,13 +126,9 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
public:
static char ID;
+
AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
- FunctionPass(ID),
- TM(static_cast<const GCNTargetMachine *>(TM)),
- ST(nullptr),
- DA(nullptr),
- Mod(nullptr),
- HasUnsafeFPMath(false) { }
+ FunctionPass(ID), TM(static_cast<const GCNTargetMachine *>(TM)) {}
bool visitFDiv(BinaryOperator &I);
@@ -142,22 +151,7 @@ public:
}
};
-} // End anonymous namespace
-
-Value *AMDGPUCodeGenPrepare::copyFlags(
- const BinaryOperator &I, Value *V) const {
- BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
- if (!BinOp) // Possibly constant expression.
- return V;
-
- if (isa<OverflowingBinaryOperator>(BinOp)) {
- BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
- BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
- } else if (isa<PossiblyExactOperator>(BinOp))
- BinOp->setIsExact(I.isExact());
-
- return V;
-}
+} // end anonymous namespace
unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
assert(needsPromotionToI32(T) && "T does not need promotion to i32");
@@ -186,12 +180,48 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
}
bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
- if (T->isIntegerTy() && T->getIntegerBitWidth() > 1 &&
- T->getIntegerBitWidth() <= 16)
+ const IntegerType *IntTy = dyn_cast<IntegerType>(T);
+ if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16)
+ return true;
+
+ if (const VectorType *VT = dyn_cast<VectorType>(T)) {
+ // TODO: The set of packed operations is more limited, so may want to
+ // promote some anyway.
+ if (ST->hasVOP3PInsts())
+ return false;
+
+ return needsPromotionToI32(VT->getElementType());
+ }
+
+ return false;
+}
+
+// Return true if the op promoted to i32 should have nsw set.
+static bool promotedOpIsNSW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Sub:
+ return true;
+ case Instruction::Mul:
+ return I.hasNoUnsignedWrap();
+ default:
+ return false;
+ }
+}
+
+// Return true if the op promoted to i32 should have nuw set.
+static bool promotedOpIsNUW(const Instruction &I) {
+ switch (I.getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::Add:
+ case Instruction::Mul:
return true;
- if (!T->isVectorTy())
+ case Instruction::Sub:
+ return I.hasNoUnsignedWrap();
+ default:
return false;
- return needsPromotionToI32(cast<VectorType>(T)->getElementType());
+ }
}
bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
@@ -218,7 +248,19 @@ bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
}
- ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+
+ ExtRes = Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1);
+ if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
+ if (promotedOpIsNSW(cast<Instruction>(I)))
+ Inst->setHasNoSignedWrap();
+
+ if (promotedOpIsNUW(cast<Instruction>(I)))
+ Inst->setHasNoUnsignedWrap();
+
+ if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
+ Inst->setIsExact(ExactOp->isExact());
+ }
+
TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
I.replaceAllUsesWith(TruncRes);
@@ -346,9 +388,7 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
Builder.setFastMathFlags(FMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
- const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
- Function *Decl
- = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
+ Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index 805fb7102a35..e32ca9653b3a 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -12,11 +12,6 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUFrameLowering.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Support/MathExtras.h"
using namespace llvm;
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
@@ -69,34 +64,3 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
// T1.W = stack[1].w
return 1;
}
-
-/// \returns The number of registers allocated for \p FI.
-int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg) const {
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- const AMDGPURegisterInfo *RI
- = MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
-
- // Fill in FrameReg output argument.
- FrameReg = RI->getFrameRegister(MF);
-
- // Start the offset at 2 so we don't overwrite work group information.
- // XXX: We should only do this when the shader actually uses this
- // information.
- unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
- int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
-
- for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
- OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
- OffsetBytes += MFI.getObjectSize(i);
- // Each register holds 4 bytes, so we must always align the offset to at
- // least 4 bytes, so that 2 frame objects won't share the same register.
- OffsetBytes = alignTo(OffsetBytes, 4);
- }
-
- if (FI != -1)
- OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
-
- return OffsetBytes / (getStackWidth(MF) * 4);
-}
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 5d51351a00d2..8e187c7e56c1 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -34,9 +34,6 @@ public:
/// values to the stack.
unsigned getStackWidth(const MachineFunction &MF) const;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const override;
-
bool hasFP(const MachineFunction &MF) const override {
return false;
}
diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
new file mode 100644
index 000000000000..5cb9036f4823
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -0,0 +1,62 @@
+//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines all the static objects used by AMDGPURegisterBankInfo.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+namespace llvm {
+namespace AMDGPU {
+
+enum PartialMappingIdx {
+ None = - 1,
+ PM_SGPR32 = 0,
+ PM_SGPR64 = 1,
+ PM_VGPR32 = 2,
+ PM_VGPR64 = 3
+};
+
+const RegisterBankInfo::PartialMapping PartMappings[] {
+ // StartIdx, Length, RegBank
+ {0, 32, SGPRRegBank},
+ {0, 64, SGPRRegBank},
+ {0, 32, VGPRRegBank},
+ {0, 64, VGPRRegBank}
+};
+
+const RegisterBankInfo::ValueMapping ValMappings[] {
+ // SGPR 32-bit
+ {&PartMappings[0], 1},
+ // SGPR 64-bit
+ {&PartMappings[1], 1},
+ // VGPR 32-bit
+ {&PartMappings[2], 1},
+ // VGPR 64-bit
+ {&PartMappings[3], 1}
+};
+
+enum ValueMappingIdx {
+ SGPRStartIdx = 0,
+ VGPRStartIdx = 2
+};
+
+const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
+ unsigned Size) {
+ assert(Size % 32 == 0);
+ unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx;
+ Idx += (Size / 32) - 1;
+ return &ValMappings[Idx];
+}
+
+} // End AMDGPU namespace.
+} // End llvm namespace.
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5bf347e48650..318de7f2e3d2 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
// make the right decision when generating code for different targets.
const AMDGPUSubtarget *Subtarget;
+ AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(TM, OptLevel) {}
+ : SelectionDAGISel(TM, OptLevel){
+ AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
+ }
~AMDGPUDAGToDAGISel() override = default;
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -80,6 +83,7 @@ public:
private:
SDValue foldFrameIndex(SDValue N) const;
+ bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
@@ -143,6 +147,8 @@ private:
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
+
+ bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -156,7 +162,15 @@ private:
SDValue &Clamp,
SDValue &Omod) const;
+ bool SelectVOP3OMods(SDValue In, SDValue &Src,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp) const;
+
void SelectADD_SUB_I64(SDNode *N);
+ void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
@@ -187,6 +201,17 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return SelectionDAGISel::runOnMachineFunction(MF);
}
+bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
+ if (TM.Options.NoNaNsFPMath)
+ return true;
+
+ // TODO: Move into isKnownNeverNaN
+ if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(N))
+ return BO->Flags.hasNoNaNs();
+
+ return CurDAG->isKnownNeverNaN(N);
+}
+
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
const SIInstrInfo *TII
= static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
@@ -250,7 +275,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
- cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS)
return N;
const SITargetLowering& Lowering =
@@ -290,6 +315,20 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
llvm_unreachable("invalid vector size");
}
+static bool getConstantValue(SDValue N, uint32_t &Out) {
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getZExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
+ return true;
+ }
+
+ return false;
+}
+
void AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -319,6 +358,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectADD_SUB_I64(N);
return;
}
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SelectUADDO_USUBO(N);
+ return;
+ }
case AMDGPUISD::FMUL_W_CHAIN: {
SelectFMUL_W_CHAIN(N);
return;
@@ -336,7 +380,24 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
+
+ if (VT == MVT::v2i16 || VT == MVT::v2f16) {
+ if (Opc == ISD::BUILD_VECTOR) {
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ uint32_t K = LHSVal | (RHSVal << 16);
+ CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
+ CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
+ return;
+ }
+ }
+
+ break;
+ }
+
assert(EltVT.bitsEq(MVT::i32));
+
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
} else {
@@ -502,7 +563,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
- Lowering.legalizeTargetIndependentNode(N, *CurDAG);
+ N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
break;
}
case ISD::AND:
@@ -531,9 +592,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
if (!N->readMem())
return false;
if (CbId == -1)
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
}
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
@@ -689,6 +750,17 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
+void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
+ // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
+ // carry out despite the _i32 name. These were renamed in VI to _U32.
+ // FIXME: We should probably rename the opcodes here.
+ unsigned Opc = N->getOpcode() == ISD::UADDO ?
+ AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
+ { N->getOperand(0), N->getOperand(1) });
+}
+
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
SDLoc SL(N);
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
@@ -1176,16 +1248,6 @@ bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr,
return true;
}
-///
-/// \param EncodedOffset This is the immediate value that will be encoded
-/// directly into the instruction. On SI/CI the \p EncodedOffset
-/// will be in units of dwords and on VI+ it will be units of bytes.
-static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
- int64_t EncodedOffset) {
- return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
- isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
-}
-
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDValue &Offset, bool &Imm) const {
@@ -1197,10 +1259,9 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
SDLoc SL(ByteOffsetNode);
AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
int64_t ByteOffset = C->getSExtValue();
- int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
- ByteOffset >> 2 : ByteOffset;
+ int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
- if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
+ if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
Imm = true;
return true;
@@ -1481,7 +1542,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
MemSDNode *Mem = cast<MemSDNode>(N);
unsigned AS = Mem->getAddressSpace();
- if (AS == AMDGPUAS::FLAT_ADDRESS) {
+ if (AS == AMDGPUASI.FLAT_ADDRESS) {
SelectCode(N);
return;
}
@@ -1545,7 +1606,6 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
-
Src = In;
if (Src.getOpcode() == ISD::FNEG) {
@@ -1559,10 +1619,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
}
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
-
return true;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ SelectVOP3Mods(In, Src, SrcMods);
+ return isNoNanSrc(Src);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
bool Res = SelectVOP3Mods(In, Src, SrcMods);
@@ -1607,6 +1672,50 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}
+bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
+ SDValue &Clamp, SDValue &Omod) const {
+ Src = In;
+
+ SDLoc DL(In);
+ // FIXME: Handle Clamp and Omod
+ Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods = 0;
+ Src = In;
+
+ // FIXME: Look for on separate components
+ if (Src.getOpcode() == ISD::FNEG) {
+ Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ Src = Src.getOperand(0);
+ }
+
+ // Packed instructions do not have abs modifiers.
+
+ // FIXME: Handle abs/neg of individual components.
+ // FIXME: Handle swizzling with op_sel
+ Mods |= SISrcMods::OP_SEL_1;
+
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
+ SDValue &SrcMods,
+ SDValue &Clamp) const {
+ SDLoc SL(In);
+
+ // FIXME: Handle clamp and op_sel
+ Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
+
+ return SelectVOP3PMods(In, Src, SrcMods);
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 54caa2c5dfad..c0f336e082bd 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUCallLowering.h"
#include "AMDGPUFrameLowering.h"
#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
@@ -43,6 +44,37 @@ static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
return true;
}
+static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State,
+ const TargetRegisterClass *RC,
+ unsigned NumRegs) {
+ ArrayRef<MCPhysReg> RegList = makeArrayRef(RC->begin(), NumRegs);
+ unsigned RegResult = State.AllocateReg(RegList);
+ if (RegResult == AMDGPU::NoRegister)
+ return false;
+
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo));
+ return true;
+}
+
+static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ switch (LocVT.SimpleTy) {
+ case MVT::i64:
+ case MVT::f64:
+ case MVT::v2i32:
+ case MVT::v2f32: {
+ // Up to SGPR0-SGPR39
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::SGPR_64RegClass, 20);
+ }
+ default:
+ return false;
+ }
+}
+
#include "AMDGPUGenCallingConv.inc"
// Find a larger type to do a load / store of a vector with.
@@ -58,6 +90,7 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
const AMDGPUSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
+ AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::LOAD, MVT::f32, Promote);
@@ -211,10 +244,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// This is totally unsupported, just custom lower to produce an error.
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- // We need to custom lower some of the intrinsics
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-
// Library functions. These default to Expand, but we have instructions
// for them.
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
@@ -270,6 +299,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
for (MVT VT : ScalarIntVTs) {
@@ -460,10 +490,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// N > 4 stores on the same chain.
GatherAllAliasesMaxDepth = 16;
- // FIXME: Need to really handle these.
- MaxStoresPerMemcpy = 4096;
- MaxStoresPerMemmove = 4096;
- MaxStoresPerMemset = 4096;
+ // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
+ // about these during lowering.
+ MaxStoresPerMemcpy = 0xffffffff;
+ MaxStoresPerMemmove = 0xffffffff;
+ MaxStoresPerMemset = 0xffffffff;
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::SHL);
@@ -478,12 +509,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FNEG);
+ setTargetDAGCombine(ISD::FABS);
}
//===----------------------------------------------------------------------===//
// Target Information
//===----------------------------------------------------------------------===//
+LLVM_READNONE
static bool fnegFoldsIntoOp(unsigned Opc) {
switch (Opc) {
case ISD::FADD:
@@ -491,17 +524,77 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FMUL:
case ISD::FMA:
case ISD::FMAD:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
case ISD::FSIN:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::SIN_HW:
case AMDGPUISD::FMUL_LEGACY:
+ case AMDGPUISD::FMIN_LEGACY:
+ case AMDGPUISD::FMAX_LEGACY:
return true;
default:
return false;
}
}
+/// \p returns true if the operation will definitely need to use a 64-bit
+/// encoding, and thus will use a VOP3 encoding regardless of the source
+/// modifiers.
+LLVM_READONLY
+static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
+ return N->getNumOperands() > 2 || VT == MVT::f64;
+}
+
+// Most FP instructions support source modifiers, but this could be refined
+// slightly.
+LLVM_READONLY
+static bool hasSourceMods(const SDNode *N) {
+ if (isa<MemSDNode>(N))
+ return false;
+
+ switch (N->getOpcode()) {
+ case ISD::CopyToReg:
+ case ISD::SELECT:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::INLINEASM:
+ case AMDGPUISD::INTERP_P1:
+ case AMDGPUISD::INTERP_P2:
+ case AMDGPUISD::DIV_SCALE:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold = 4) {
+ // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
+ // it is truly free to use a source modifier in all cases. If there are
+ // multiple users but for each one will necessitate using VOP3, there will be
+ // a code size increase. Try to avoid increasing code size unless we know it
+ // will save on the instruction count.
+ unsigned NumMayIncreaseSize = 0;
+ MVT VT = N->getValueType(0).getScalarType().getSimpleVT();
+
+ // XXX - Should this limit number of uses to check?
+ for (const SDNode *U : N->uses()) {
+ if (!hasSourceMods(U))
+ return false;
+
+ if (!opMustUseVOP3Encoding(U, VT)) {
+ if (++NumMayIncreaseSize > CostThreshold)
+ return false;
+ }
+ }
+
+ return true;
+}
+
MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
return MVT::i32;
}
@@ -580,12 +673,17 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
assert(VT.isFloatingPoint());
- return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() &&
- VT == MVT::f16);
+
+ // Packed operations do not have a fabs modifier.
+ return VT == MVT::f32 || VT == MVT::f64 ||
+ (Subtarget->has16BitInsts() && VT == MVT::f16);
}
bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
- return isFAbsFree(VT);
+ assert(VT.isFloatingPoint());
+ return VT == MVT::f32 || VT == MVT::f64 ||
+ (Subtarget->has16BitInsts() && VT == MVT::f16) ||
+ (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16);
}
bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
@@ -667,6 +765,11 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
+CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) const {
+ return CC_AMDGPU;
+}
+
/// The SelectionDAGBuilder will automatically promote function arguments
/// with illegal types. However, this does not work for the AMDGPU targets
/// since the function arguments are stored in memory as these illegal types.
@@ -764,11 +867,6 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
}
}
-void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const {
- State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
-}
-
void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
const SmallVectorImpl<ISD::OutputArg> &Outs) const {
@@ -788,6 +886,24 @@ AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Target specific lowering
//===---------------------------------------------------------------------===//
+/// Selects the correct CCAssignFn for a given CallingConvention value.
+CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return CC_AMDGPU_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ return CC_AMDGPU;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
+}
+
SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SDValue Callee = CLI.Callee;
@@ -829,14 +945,13 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
- Op->dump(&DAG);
+ Op->print(errs(), &DAG);
llvm_unreachable("Custom lowering code for this"
"instruction is not implemented yet!");
break;
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
case ISD::FREM: return LowerFREM(Op, DAG);
@@ -892,19 +1007,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
- switch (G->getAddressSpace()) {
- case AMDGPUAS::LOCAL_ADDRESS: {
+ if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
// XXX: What does the value of G->getOffset() mean?
assert(G->getOffset() == 0 &&
"Do not know what to do with an non-zero offset");
// TODO: We could emit code to handle the initialization somewhere.
- if (hasDefinedInitializer(GV))
- break;
-
- unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
- return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
- }
+ if (!hasDefinedInitializer(GV)) {
+ unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
+ return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
+ }
}
const Function &Fn = *DAG.getMachineFunction().getFunction();
@@ -936,41 +1048,12 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
}
-SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const {
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- switch (IntrinsicID) {
- default: return Op;
- case AMDGPUIntrinsic::AMDGPU_clamp: // Legacy name.
- return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
- Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_bfe_i32:
- return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::AMDGPU_bfe_u32:
- return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
- }
-}
-
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(const SDLoc &DL, EVT VT,
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
SDValue LHS, SDValue RHS,
SDValue True, SDValue False,
SDValue CC,
DAGCombinerInfo &DCI) const {
- if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return SDValue();
-
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
@@ -1228,7 +1311,10 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
// float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FMAD, DL, FltVT, fqneg, fb, fa);
+ unsigned OpCode = Subtarget->hasFP32Denormals() ?
+ (unsigned)AMDGPUISD::FMAD_FTZ :
+ (unsigned)ISD::FMAD;
+ SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
// int iq = (int)fq;
SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
@@ -1662,32 +1748,37 @@ SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) con
}
// XXX - May require not supporting f32 denormals?
-SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const {
+
+// Don't handle v2f16. The extra instructions to scalarize and repack around the
+// compare and vselect end up producing worse code than scalarizing the whole
+// operation.
+SDValue AMDGPUTargetLowering::LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue X = Op.getOperand(0);
+ EVT VT = Op.getValueType();
- SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
+ SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X);
// TODO: Should this propagate fast-math-flags?
- SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
+ SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T);
- SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
+ SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);
- const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f32);
- const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
- const SDValue Half = DAG.getConstantFP(0.5, SL, MVT::f32);
+ const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
+ const SDValue One = DAG.getConstantFP(1.0, SL, VT);
+ const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
- SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
+ SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, VT, One, X);
EVT SetCCVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
- SDValue Sel = DAG.getNode(ISD::SELECT, SL, MVT::f32, Cmp, SignOne, Zero);
+ SDValue Sel = DAG.getNode(ISD::SELECT, SL, VT, Cmp, SignOne, Zero);
- return DAG.getNode(ISD::FADD, SL, MVT::f32, T, Sel);
+ return DAG.getNode(ISD::FADD, SL, VT, T, Sel);
}
SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const {
@@ -1750,8 +1841,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const
SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (VT == MVT::f32)
- return LowerFROUND32(Op, DAG);
+ if (VT == MVT::f32 || VT == MVT::f16)
+ return LowerFROUND32_16(Op, DAG);
if (VT == MVT::f64)
return LowerFROUND64(Op, DAG);
@@ -2030,15 +2121,19 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
}
SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue N0 = Op.getOperand(0);
+
+ // Convert to target node to get known bits
+ if (N0.getValueType() == MVT::f32)
+ return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);
if (getTargetMachine().Options.UnsafeFPMath) {
// There is a generic expand for FP_TO_FP16 with unsafe fast math.
return SDValue();
}
- SDLoc DL(Op);
- SDValue N0 = Op.getOperand(0);
- assert (N0.getSimpleValueType() == MVT::f64);
+ assert(N0.getSimpleValueType() == MVT::f64);
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
const unsigned ExpMask = 0x7ff;
@@ -2379,6 +2474,28 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SN->getBasePtr(), SN->getMemOperand());
}
+SDValue AMDGPUTargetLowering::performClampCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
+ if (!CSrc)
+ return SDValue();
+
+ const APFloat &F = CSrc->getValueAPF();
+ APFloat Zero = APFloat::getZero(F.getSemantics());
+ APFloat::cmpResult Cmp0 = F.compare(Zero);
+ if (Cmp0 == APFloat::cmpLessThan ||
+ (Cmp0 == APFloat::cmpUnordered && Subtarget->enableDX10Clamp())) {
+ return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
+ }
+
+ APFloat One(F.getSemantics(), "1.0");
+ APFloat::cmpResult Cmp1 = F.compare(One);
+ if (Cmp1 == APFloat::cmpGreaterThan)
+ return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));
+
+ return SDValue(CSrc, 0);
+}
+
/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
/// binary operation \p Opc to it with the corresponding constant operands.
SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
@@ -2821,20 +2938,41 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
}
- }
- if (VT == MVT::f32 && Cond.hasOneUse()) {
- SDValue MinMax
- = CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
- // Revisit this node so we can catch min3/max3/med3 patterns.
- //DCI.AddToWorklist(MinMax.getNode());
- return MinMax;
+ if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
+ SDValue MinMax
+ = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+ // Revisit this node so we can catch min3/max3/med3 patterns.
+ //DCI.AddToWorklist(MinMax.getNode());
+ return MinMax;
+ }
}
// There's no reason to not do this if the condition has other uses.
return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
}
+static bool isConstantFPZero(SDValue N) {
+ if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
+ return C->isZero() && !C->isNegative();
+ return false;
+}
+
+static unsigned inverseMinMax(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FMAXNUM:
+ return ISD::FMINNUM;
+ case ISD::FMINNUM:
+ return ISD::FMAXNUM;
+ case AMDGPUISD::FMAX_LEGACY:
+ return AMDGPUISD::FMIN_LEGACY;
+ case AMDGPUISD::FMIN_LEGACY:
+ return AMDGPUISD::FMAX_LEGACY;
+ default:
+ llvm_unreachable("invalid min/max opcode");
+ }
+}
+
SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -2847,10 +2985,16 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
// the other uses cannot, give up. This both prevents unprofitable
// transformations and infinite loops: we won't repeatedly try to fold around
// a negate that has no 'good' form.
- //
- // TODO: Check users can fold
- if (fnegFoldsIntoOp(Opc) && !N0.hasOneUse())
- return SDValue();
+ if (N0.hasOneUse()) {
+ // This may be able to fold into the source, but at a code size cost. Don't
+ // fold if the fold into the user is free.
+ if (allUsesHaveSourceMods(N, 0))
+ return SDValue();
+ } else {
+ if (fnegFoldsIntoOp(Opc) &&
+ (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode())))
+ return SDValue();
+ }
SDLoc SL(N);
switch (Opc) {
@@ -2872,7 +3016,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
else
RHS = RHS.getOperand(0);
- SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS);
+ SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -2891,7 +3035,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
else
RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
- SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS);
+ SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
if (!N0.hasOneUse())
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
@@ -2923,10 +3067,40 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
return Res;
}
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ case AMDGPUISD::FMAX_LEGACY:
+ case AMDGPUISD::FMIN_LEGACY: {
+ // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
+ // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)
+ // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y)
+ // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y)
+
+ SDValue LHS = N0.getOperand(0);
+ SDValue RHS = N0.getOperand(1);
+
+ // 0 doesn't have a negated inline immediate.
+ // TODO: Shouldn't fold 1/2pi either, and should be generalized to other
+ // operations.
+ if (isConstantFPZero(RHS))
+ return SDValue();
+
+ SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
+ SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+ unsigned Opposite = inverseMinMax(Opc);
+
+ SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
+ if (!N0.hasOneUse())
+ DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
+ return Res;
+ }
case ISD::FP_EXTEND:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT: // XXX - Should fround be handled?
+ case ISD::FSIN:
case AMDGPUISD::RCP:
case AMDGPUISD::RCP_LEGACY:
- case ISD::FSIN:
case AMDGPUISD::SIN_HW: {
SDValue CvtSrc = N0.getOperand(0);
if (CvtSrc.getOpcode() == ISD::FNEG) {
@@ -2941,7 +3115,7 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
// (fneg (fp_extend x)) -> (fp_extend (fneg x))
// (fneg (rcp x)) -> (rcp (fneg x))
SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
- return DAG.getNode(Opc, SL, VT, Neg);
+ return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags());
}
case ISD::FP_ROUND: {
SDValue CvtSrc = N0.getOperand(0);
@@ -2959,6 +3133,45 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
}
+ case ISD::FP16_TO_FP: {
+ // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
+ // f16, but legalization of f16 fneg ends up pulling it out of the source.
+ // Put the fneg back as a legal source operation that can be matched later.
+ SDLoc SL(N);
+
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
+ SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
+ DAG.getConstant(0x8000, SL, SrcVT));
+ return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
+ }
+ default:
+ return SDValue();
+ }
+}
+
+SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ switch (N0.getOpcode()) {
+ case ISD::FP16_TO_FP: {
+ assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
+ SDLoc SL(N);
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
+ SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
+ DAG.getConstant(0x7fff, SL, SrcVT));
+ return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
+ }
default:
return SDValue();
}
@@ -3071,6 +3284,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performSelectCombine(N, DCI);
case ISD::FNEG:
return performFNegCombine(N, DCI);
+ case ISD::FABS:
+ return performFAbsCombine(N, DCI);
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
@@ -3159,6 +3374,18 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performLoadCombine(N, DCI);
case ISD::STORE:
return performStoreCombine(N, DCI);
+ case AMDGPUISD::CLAMP:
+ return performClampCombine(N, DCI);
+ case AMDGPUISD::RCP: {
+ if (const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) {
+ // XXX - Should this flush denormals?
+ const APFloat &Val = CFP->getValueAPF();
+ APFloat One(Val.getSemantics(), "1.0");
+ return DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
+ }
+
+ break;
+ }
}
return SDValue();
}
@@ -3201,13 +3428,17 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((AMDGPUISD::NodeType)Opcode) {
case AMDGPUISD::FIRST_NUMBER: break;
// AMDIL DAG nodes
- NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
+ NODE_NAME_CASE(IF)
+ NODE_NAME_CASE(ELSE)
+ NODE_NAME_CASE(LOOP)
+ NODE_NAME_CASE(CALL)
+ NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
- NODE_NAME_CASE(RETURN)
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(SETCC)
@@ -3232,6 +3463,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(DIV_SCALE)
NODE_NAME_CASE(DIV_FMAS)
NODE_NAME_CASE(DIV_FIXUP)
+ NODE_NAME_CASE(FMAD_FTZ)
NODE_NAME_CASE(TRIG_PREOP)
NODE_NAME_CASE(RCP)
NODE_NAME_CASE(RSQ)
@@ -3265,7 +3497,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
NODE_NAME_CASE(REGISTER_STORE)
- NODE_NAME_CASE(LOAD_INPUT)
NODE_NAME_CASE(SAMPLE)
NODE_NAME_CASE(SAMPLEB)
NODE_NAME_CASE(SAMPLED)
@@ -3274,6 +3505,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(CVT_F32_UBYTE1)
NODE_NAME_CASE(CVT_F32_UBYTE2)
NODE_NAME_CASE(CVT_F32_UBYTE3)
+ NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
+ NODE_NAME_CASE(FP_TO_FP16)
+ NODE_NAME_CASE(FP16_ZEXT)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
@@ -3338,13 +3572,11 @@ SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
}
void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
- const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+ const SDValue Op, APInt &KnownZero, APInt &KnownOne,
+ const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
+ unsigned BitWidth = KnownZero.getBitWidth();
+ KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
APInt KnownZero2;
APInt KnownOne2;
@@ -3365,21 +3597,27 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
if (!CWidth)
return;
- unsigned BitWidth = 32;
uint32_t Width = CWidth->getZExtValue() & 0x1f;
if (Opc == AMDGPUISD::BFE_U32)
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
+ KnownZero = APInt::getHighBitsSet(32, 32 - Width);
break;
}
+ case AMDGPUISD::FP_TO_FP16:
+ case AMDGPUISD::FP16_ZEXT: {
+ unsigned BitWidth = KnownZero.getBitWidth();
+
+ // High bits are zero.
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
+ break;
+ }
}
}
unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
- SDValue Op,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ unsigned Depth) const {
switch (Op.getOpcode()) {
case AMDGPUISD::BFE_I32: {
ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
@@ -3403,7 +3641,9 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
case AMDGPUISD::CARRY:
case AMDGPUISD::BORROW:
return 31;
-
+ case AMDGPUISD::FP_TO_FP16:
+ case AMDGPUISD::FP16_ZEXT:
+ return 16;
default:
return 1;
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index f6adceac6f11..d6aa0ba92bf7 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -16,6 +16,8 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
+#include "AMDGPU.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -34,10 +36,10 @@ private:
protected:
const AMDGPUSubtarget *Subtarget;
+ AMDGPUAS AMDGPUASI;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
@@ -47,7 +49,7 @@ protected:
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFROUND32_16(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
@@ -70,6 +72,7 @@ protected:
bool shouldCombineMemoryType(EVT VT) const;
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
@@ -85,6 +88,7 @@ protected:
SDValue RHS, DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
@@ -111,8 +115,6 @@ protected:
SmallVectorImpl<SDValue> &Results) const;
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- void AnalyzeFormalArguments(CCState &State,
- const SmallVectorImpl<ISD::InputArg> &Ins) const;
void AnalyzeReturn(CCState &State,
const SmallVectorImpl<ISD::OutputArg> &Outs) const;
@@ -120,7 +122,7 @@ public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
bool mayIgnoreSignedZero(SDValue Op) const {
- if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+ if (getTargetMachine().Options.NoSignedZerosFPMath)
return true;
if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
@@ -158,6 +160,7 @@ public:
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
@@ -174,7 +177,7 @@ public:
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
- SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
SDValue CC, DAGCombinerInfo &DCI) const;
@@ -198,10 +201,12 @@ public:
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
- unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
+ unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
+ const SelectionDAG &DAG,
unsigned Depth = 0) const override;
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
@@ -222,6 +227,10 @@ public:
/// type of implicit parameter.
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
const ImplicitParameter Param) const;
+
+ AMDGPUAS getAMDGPUAS() const {
+ return AMDGPUASI;
+ }
};
namespace AMDGPUISD {
@@ -229,15 +238,34 @@ namespace AMDGPUISD {
enum NodeType : unsigned {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
BRANCH_COND,
// End AMDIL ISD Opcodes
+
+ // Function call.
+ CALL,
+
+ // Masked control flow nodes.
+ IF,
+ ELSE,
+ LOOP,
+
+ // A uniform kernel return that terminates the wavefront.
ENDPGM,
- RETURN,
+
+ // Return to a shader part's epilog code.
+ RETURN_TO_EPILOG,
+
+ // Return with values from a non-entry function.
+ RET_FLAG,
+
DWORDADDR,
FRACT,
+
+ /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
+ /// modifier behavior with dx10_enable.
CLAMP,
+
// This is SETCC with the full mask result which is used for a compare with a
// result bit per item in the wavefront.
SETCC,
@@ -265,6 +293,9 @@ enum NodeType : unsigned {
DIV_SCALE,
DIV_FMAS,
DIV_FIXUP,
+ // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
+ // treated as an illegal operation.
+ FMAD_FTZ,
TRIG_PREOP, // 1 ULP max error for f64
// RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
@@ -301,7 +332,6 @@ enum NodeType : unsigned {
CONST_ADDRESS,
REGISTER_LOAD,
REGISTER_STORE,
- LOAD_INPUT,
SAMPLE,
SAMPLEB,
SAMPLED,
@@ -312,6 +342,18 @@ enum NodeType : unsigned {
CVT_F32_UBYTE1,
CVT_F32_UBYTE2,
CVT_F32_UBYTE3,
+
+ // Convert two float 32 numbers into a single register holding two packed f16
+ // with round to zero.
+ CVT_PKRTZ_F16_F32,
+
+ // Same as the standard node, except the high bits of the resulting integer
+ // are known 0.
+ FP_TO_FP16,
+
+ // Wrapper around fp16 results that are known to zero the high bits.
+ FP16_ZEXT,
+
/// This node is for VLIW targets and it is used to represent a vector
/// that is stored in consecutive registers with the same channel.
/// For example:
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index e4dc6599e156..a01f5d37c7c1 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
void AMDGPUInstrInfo::anchor() {}
AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
- : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
+ : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
// the first 16 loads will be interleaved with the stores, and the next 16 will
@@ -86,6 +86,7 @@ static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
case AMDGPUSubtarget::SEA_ISLANDS:
return SIEncodingFamily::SI;
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+ case AMDGPUSubtarget::GFX9:
return SIEncodingFamily::VI;
// FIXME: This should never be called for r600 GPUs.
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index bd8e389639f5..12caa5118342 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -16,11 +16,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
+#include "AMDGPU.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#define GET_INSTRINFO_HEADER
-#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
namespace llvm {
@@ -35,6 +35,8 @@ private:
const AMDGPUSubtarget &ST;
virtual void anchor();
+protected:
+ AMDGPUAS AMDGPUASI;
public:
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index d7fa28bdc001..56f060984f08 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -31,6 +31,10 @@ def AMDGPUFPClassOp : SDTypeProfile<1, 2,
[SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
>;
+def AMDGPUFPPackOp : SDTypeProfile<1, 2,
+ [SDTCisFP<1>, SDTCisSameAs<1, 2>]
+>;
+
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
@@ -42,10 +46,38 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def AMDGPUIfOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
+>;
+
+def AMDGPUElseOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
+>;
+
+def AMDGPULoopOp : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
+>;
+
+def AMDGPUBreakOp : SDTypeProfile<1, 1,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>]
+>;
+
+def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
+>;
+
+def AMDGPUElseBreakOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
+def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
+def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
+def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
+
def AMDGPUconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
SDTCisVT<0, iPTR>]>
@@ -78,6 +110,11 @@ def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
+def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
+def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
+def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;
+
+
def AMDGPUfp_class : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>;
// out = max(a, b) a and b are floats, where a nan comparison fails.
@@ -92,17 +129,7 @@ def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
-def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
-
-// out = max(a, b) a and b are signed ints
-def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = max(a, b) a and b are unsigned ints
-def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
+def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
// out = min(a, b) a and b are floats, where a nan comparison fails.
def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
@@ -194,6 +221,8 @@ def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp>;
// Denominator, src2 = Numerator).
def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+def AMDGPUfmad_ftz : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>;
+
// Look Up 2.0 / pi src0 with segment select src1[4:0]
def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
@@ -291,15 +320,16 @@ def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
// SI+ export
def AMDGPUExportOp : SDTypeProfile<0, 8, [
- SDTCisInt<0>, // i8 en
- SDTCisInt<1>, // i1 vm
+ SDTCisInt<0>, // i8 tgt
+ SDTCisInt<1>, // i8 en
+ // i32 or f32 src0
+ SDTCisSameAs<3, 2>, // f32 src1
+ SDTCisSameAs<4, 2>, // f32 src2
+ SDTCisSameAs<5, 2>, // f32 src3
+ SDTCisInt<6>, // i1 compr
// skip done
- SDTCisInt<2>, // i8 tgt
- SDTCisSameAs<3, 1>, // i1 compr
- SDTCisFP<4>, // f32 src0
- SDTCisSameAs<5, 4>, // f32 src1
- SDTCisSameAs<6, 4>, // f32 src2
- SDTCisSameAs<7, 4> // f32 src3
+ SDTCisInt<1> // i1 vm
+
]>;
def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
@@ -333,5 +363,9 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
-def AMDGPUreturn : SDNode<"AMDGPUISD::RETURN", SDTNone,
+def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+>;
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
new file mode 100644
index 000000000000..8867ed689a31
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -0,0 +1,424 @@
+//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstructionSelector.h"
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterBankInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "amdgpu-isel"
+
+using namespace llvm;
+
+AMDGPUInstructionSelector::AMDGPUInstructionSelector(
+ const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
+ : InstructionSelector(), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {}
+
+MachineOperand
+AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
+ unsigned SubIdx) const {
+
+ MachineInstr *MI = MO.getParent();
+ MachineBasicBlock *BB = MO.getParent()->getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
+ if (MO.isReg()) {
+ unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
+ unsigned Reg = MO.getReg();
+ BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
+ .addReg(Reg, 0, ComposedSubIdx);
+
+ return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
+ MO.isKill(), MO.isDead(), MO.isUndef(),
+ MO.isEarlyClobber(), 0, MO.isDebug(),
+ MO.isInternalRead());
+ }
+
+ assert(MO.isImm());
+
+ APInt Imm(64, MO.getImm());
+
+ switch (SubIdx) {
+ default:
+ llvm_unreachable("do not know to split immediate with this sub index.");
+ case AMDGPU::sub0:
+ return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
+ case AMDGPU::sub1:
+ return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
+ }
+}
+
+bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
+ unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ if (Size != 64)
+ return false;
+
+ DebugLoc DL = I.getDebugLoc();
+
+ MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
+ MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
+ .add(Lo1)
+ .add(Lo2);
+
+ MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
+ MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
+ .add(Hi1)
+ .add(Hi2);
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
+ .addReg(DstLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(DstHi)
+ .addImm(AMDGPU::sub1);
+
+ for (MachineOperand &MO : I.explicit_operands()) {
+ if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+ RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
+ return selectG_ADD(I);
+}
+
+bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ DebugLoc DL = I.getDebugLoc();
+
+ // FIXME: Select store instruction based on address space
+ MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
+ .add(I.getOperand(1))
+ .add(I.getOperand(0))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+
+ // Now that we selected an opcode, we need to constrain the register
+ // operands to use appropriate classes.
+ bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
+
+ I.eraseFromParent();
+ return Ret;
+}
+
+bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+
+ if (Size == 32) {
+ I.setDesc(TII.get(AMDGPU::S_MOV_B32));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
+ assert(Size == 64);
+
+ DebugLoc DL = I.getDebugLoc();
+ unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ const APInt &Imm = I.getOperand(1).getCImm()->getValue();
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
+ .addImm(Imm.trunc(32).getZExtValue());
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
+ .addImm(Imm.ashr(32).getZExtValue());
+
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(LoReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiReg)
+ .addImm(AMDGPU::sub1);
+ // We can't call constrainSelectedInstRegOperands here, because it doesn't
+ // work for target independent opcodes
+ I.eraseFromParent();
+ return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
+}
+
+static bool isConstant(const MachineInstr &MI) {
+ return MI.getOpcode() == TargetOpcode::G_CONSTANT;
+}
+
+void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
+ const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {
+
+ const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());
+
+ assert(PtrMI);
+
+ if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
+ return;
+
+ GEPInfo GEPInfo(*PtrMI);
+
+ for (unsigned i = 1, e = 3; i < e; ++i) {
+ const MachineOperand &GEPOp = PtrMI->getOperand(i);
+ const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
+ assert(OpDef);
+ if (isConstant(*OpDef)) {
+ // FIXME: Is it possible to have multiple Imm parts? Maybe if we
+ // are lacking other optimizations.
+ assert(GEPInfo.Imm == 0);
+ GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
+ continue;
+ }
+ const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
+ if (OpBank->getID() == AMDGPU::SGPRRegBankID)
+ GEPInfo.SgprParts.push_back(GEPOp.getReg());
+ else
+ GEPInfo.VgprParts.push_back(GEPOp.getReg());
+ }
+
+ AddrInfo.push_back(GEPInfo);
+ getAddrModeInfo(*PtrMI, MRI, AddrInfo);
+}
+
+static bool isInstrUniform(const MachineInstr &MI) {
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ const Value *Ptr = MMO->getValue();
+
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
+static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {
+
+ if (LoadSize == 32)
+ return BaseOpcode;
+
+ switch (BaseOpcode) {
+ case AMDGPU::S_LOAD_DWORD_IMM:
+ switch (LoadSize) {
+ case 64:
+ return AMDGPU::S_LOAD_DWORDX2_IMM;
+ case 128:
+ return AMDGPU::S_LOAD_DWORDX4_IMM;
+ case 256:
+ return AMDGPU::S_LOAD_DWORDX8_IMM;
+ case 512:
+ return AMDGPU::S_LOAD_DWORDX16_IMM;
+ }
+ break;
+ case AMDGPU::S_LOAD_DWORD_IMM_ci:
+ switch (LoadSize) {
+ case 64:
+ return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
+ case 128:
+ return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
+ case 256:
+ return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
+ case 512:
+ return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
+ }
+ break;
+ case AMDGPU::S_LOAD_DWORD_SGPR:
+ switch (LoadSize) {
+ case 64:
+ return AMDGPU::S_LOAD_DWORDX2_SGPR;
+ case 128:
+ return AMDGPU::S_LOAD_DWORDX4_SGPR;
+ case 256:
+ return AMDGPU::S_LOAD_DWORDX8_SGPR;
+ case 512:
+ return AMDGPU::S_LOAD_DWORDX16_SGPR;
+ }
+ break;
+ }
+ llvm_unreachable("Invalid base smrd opcode or size");
+}
+
+bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
+ for (const GEPInfo &GEPInfo : AddrInfo) {
+ if (!GEPInfo.VgprParts.empty())
+ return true;
+ }
+ return false;
+}
+
+bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
+ ArrayRef<GEPInfo> AddrInfo) const {
+
+ if (!I.hasOneMemOperand())
+ return false;
+
+ if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS)
+ return false;
+
+ if (!isInstrUniform(I))
+ return false;
+
+ if (hasVgprParts(AddrInfo))
+ return false;
+
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned DstReg = I.getOperand(0).getReg();
+ const DebugLoc &DL = I.getDebugLoc();
+ unsigned Opcode;
+ unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+
+ if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {
+
+ const GEPInfo &GEPInfo = AddrInfo[0];
+
+ unsigned PtrReg = GEPInfo.SgprParts[0];
+ int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
+ if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
+
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addImm(EncodedImm)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+ }
+
+ if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
+ isUInt<32>(EncodedImm)) {
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addImm(EncodedImm)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+ }
+
+ if (isUInt<32>(GEPInfo.Imm)) {
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
+ unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
+ .addImm(GEPInfo.Imm);
+
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addReg(OffsetReg)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+ }
+ }
+
+ unsigned PtrReg = I.getOperand(1).getReg();
+ Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
+ MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg)
+ .addReg(PtrReg)
+ .addImm(0)
+ .addImm(0); // glc
+ return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
+}
+
+
+bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ DebugLoc DL = I.getDebugLoc();
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned PtrReg = I.getOperand(1).getReg();
+ unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+ unsigned Opcode;
+
+ SmallVector<GEPInfo, 4> AddrInfo;
+
+ getAddrModeInfo(I, MRI, AddrInfo);
+
+ if (selectSMRD(I, AddrInfo)) {
+ I.eraseFromParent();
+ return true;
+ }
+
+ switch (LoadSize) {
+ default:
+ llvm_unreachable("Load size not supported\n");
+ case 32:
+ Opcode = AMDGPU::FLAT_LOAD_DWORD;
+ break;
+ case 64:
+ Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
+ break;
+ }
+
+ MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode))
+ .add(I.getOperand(0))
+ .addReg(PtrReg)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+
+ bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
+ I.eraseFromParent();
+ return Ret;
+}
+
+bool AMDGPUInstructionSelector::select(MachineInstr &I) const {
+
+ if (!isPreISelGenericOpcode(I.getOpcode()))
+ return true;
+
+ switch (I.getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_ADD:
+ return selectG_ADD(I);
+ case TargetOpcode::G_CONSTANT:
+ return selectG_CONSTANT(I);
+ case TargetOpcode::G_GEP:
+ return selectG_GEP(I);
+ case TargetOpcode::G_LOAD:
+ return selectG_LOAD(I);
+ case TargetOpcode::G_STORE:
+ return selectG_STORE(I);
+ }
+ return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
new file mode 100644
index 000000000000..c87102e55dfb
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -0,0 +1,67 @@
+//===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the InstructionSelector class for
+/// AMDGPU.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
+
+#include "AMDGPU.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class AMDGPUInstrInfo;
+class AMDGPURegisterBankInfo;
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
+class SIInstrInfo;
+class SIRegisterInfo;
+class SISubtarget;
+
+class AMDGPUInstructionSelector : public InstructionSelector {
+public:
+ AMDGPUInstructionSelector(const SISubtarget &STI,
+ const AMDGPURegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+private:
+ struct GEPInfo {
+ const MachineInstr &GEP;
+ SmallVector<unsigned, 2> SgprParts;
+ SmallVector<unsigned, 2> VgprParts;
+ int64_t Imm;
+ GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
+ };
+
+ MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
+ bool selectG_CONSTANT(MachineInstr &I) const;
+ bool selectG_ADD(MachineInstr &I) const;
+ bool selectG_GEP(MachineInstr &I) const;
+ bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
+ void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
+ SmallVectorImpl<GEPInfo> &AddrInfo) const;
+ bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
+ bool selectG_LOAD(MachineInstr &I) const;
+ bool selectG_STORE(MachineInstr &I) const;
+
+ const SIInstrInfo &TII;
+ const SIRegisterInfo &TRI;
+ const AMDGPURegisterBankInfo &RBI;
+protected:
+ AMDGPUAS AMDGPUASI;
+};
+
+} // End llvm namespace.
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 59cba636c586..b8d681298dee 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -72,6 +72,49 @@ def u8imm : Operand<i8> {
def brtarget : Operand<OtherVT>;
//===----------------------------------------------------------------------===//
+// Misc. PatFrags
+//===----------------------------------------------------------------------===//
+
+class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0),
+ (op $src0),
+ [{ return N->hasOneUse(); }]
+>;
+
+class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0, node:$src1),
+ (op $src0, $src1),
+ [{ return N->hasOneUse(); }]
+>;
+
+class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
+ (ops node:$src0, node:$src1, node:$src2),
+ (op $src0, $src1, $src2),
+ [{ return N->hasOneUse(); }]
+>;
+
+def trunc_oneuse : HasOneUseUnaryOp<trunc>;
+
+let Properties = [SDNPCommutative, SDNPAssociative] in {
+def smax_oneuse : HasOneUseBinOp<smax>;
+def smin_oneuse : HasOneUseBinOp<smin>;
+def umax_oneuse : HasOneUseBinOp<umax>;
+def umin_oneuse : HasOneUseBinOp<umin>;
+def fminnum_oneuse : HasOneUseBinOp<fminnum>;
+def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>;
+def and_oneuse : HasOneUseBinOp<and>;
+def or_oneuse : HasOneUseBinOp<or>;
+def xor_oneuse : HasOneUseBinOp<xor>;
+} // Properties = [SDNPCommutative, SDNPAssociative]
+
+def sub_oneuse : HasOneUseBinOp<sub>;
+
+def srl_oneuse : HasOneUseBinOp<srl>;
+def shl_oneuse : HasOneUseBinOp<shl>;
+
+def select_oneuse : HasOneUseTernaryOp<select>;
+
+//===----------------------------------------------------------------------===//
// PatLeafs for floating-point comparisons
//===----------------------------------------------------------------------===//
@@ -157,27 +200,11 @@ def COND_NULL : PatLeaf <
//===----------------------------------------------------------------------===//
-// Misc. PatFrags
-//===----------------------------------------------------------------------===//
-
-class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
- (ops node:$src0, node:$src1),
- (op $src0, $src1),
- [{ return N->hasOneUse(); }]
->;
-
-class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
- (ops node:$src0, node:$src1, node:$src2),
- (op $src0, $src1, $src2),
- [{ return N->hasOneUse(); }]
->;
-
-//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS;
}]>;
class PrivateLoad <SDPatternOperator op> : PrivateMemOp <
@@ -195,7 +222,7 @@ def truncstorei16_private : PrivateStore <truncstorei16>;
def store_private : PrivateStore <store>;
class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
// Global address space loads
@@ -215,7 +242,7 @@ def global_store_atomic : GlobalStore<atomic_store>;
class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
// Constant address space loads
@@ -226,7 +253,7 @@ class ConstantLoad <SDPatternOperator op> : ConstantMemOp <
def constant_load : ConstantLoad<load>;
class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
// Local address space loads
@@ -239,7 +266,7 @@ class LocalStore <SDPatternOperator op> : LocalMemOp <
>;
class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{
- return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS;
}]>;
class FlatLoad <SDPatternOperator op> : FlatMemOp <
@@ -321,7 +348,7 @@ def local_store_aligned8bytes : Aligned8Bytes <
class local_binary_atomic_op<SDNode atomic_op> :
PatFrag<(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
@@ -339,7 +366,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
@@ -349,7 +376,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
return AN->getMemoryVT() == MVT::i32 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def _64_local : PatFrag<
@@ -357,7 +384,7 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
(cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{
AtomicSDNode *AN = cast<AtomicSDNode>(N);
return AN->getMemoryVT() == MVT::i64 &&
- AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
}
@@ -367,17 +394,17 @@ multiclass global_binary_atomic_op<SDNode atomic_op> {
def "" : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def _noret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def _ret : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
}
defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
@@ -395,22 +422,22 @@ defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
def AMDGPUatomic_cmp_swap_global : PatFrag<
(ops node:$ptr, node:$value),
(AMDGPUatomic_cmp_swap node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def atomic_cmp_swap_global : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>;
def atomic_cmp_swap_global_noret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
def atomic_cmp_swap_global_ret : PatFrag<
(ops node:$ptr, node:$cmp, node:$value),
(atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
//===----------------------------------------------------------------------===//
// Misc Pattern Fragments
@@ -422,6 +449,7 @@ int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
int FP16_ONE = 0x3C00;
+int V2FP16_ONE = 0x3C003C00;
int FP32_ONE = 0x3f800000;
int FP32_NEG_ONE = 0xbf800000;
int FP64_ONE = 0x3ff0000000000000;
@@ -452,7 +480,7 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
- [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+ [(set f32:$dst, (AMDGPUclamp f32:$src0))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
@@ -565,6 +593,12 @@ multiclass BFIPatterns <Instruction BFI_INT,
>;
def : Pat <
+ (f32 (fcopysign f32:$src0, f64:$src1)),
+ (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0,
+ (i32 (EXTRACT_SUBREG $src1, sub1)))
+ >;
+
+ def : Pat <
(f64 (fcopysign f64:$src0, f64:$src1)),
(REG_SEQUENCE RC64,
(i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
@@ -602,10 +636,22 @@ def IMMPopCount : SDNodeXForm<imm, [{
MVT::i32);
}]>;
-class BFEPattern <Instruction BFE, Instruction MOV> : Pat <
- (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
- (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
->;
+multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
+ def : Pat <
+ (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)),
+ (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask))))
+ >;
+
+ def : Pat <
+ (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (UBFE $src, (i32 0), $width)
+ >;
+
+ def : Pat <
+ (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)),
+ (SBFE $src, (i32 0), $width)
+ >;
+}
// rotr pattern
class ROTRPattern <Instruction BIT_ALIGN> : Pat <
@@ -618,23 +664,13 @@ class ROTRPattern <Instruction BIT_ALIGN> : Pat <
class IntMed3Pat<Instruction med3Inst,
SDPatternOperator max,
SDPatternOperator max_oneuse,
- SDPatternOperator min_oneuse> : Pat<
- (max (min_oneuse i32:$src0, i32:$src1),
- (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)),
+ SDPatternOperator min_oneuse,
+ ValueType vt = i32> : Pat<
+ (max (min_oneuse vt:$src0, vt:$src1),
+ (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)),
(med3Inst $src0, $src1, $src2)
>;
-let Properties = [SDNPCommutative, SDNPAssociative] in {
-def smax_oneuse : HasOneUseBinOp<smax>;
-def smin_oneuse : HasOneUseBinOp<smin>;
-def umax_oneuse : HasOneUseBinOp<umax>;
-def umin_oneuse : HasOneUseBinOp<umin>;
-} // Properties = [SDNPCommutative, SDNPAssociative]
-
-def sub_oneuse : HasOneUseBinOp<sub>;
-
-def select_oneuse : HasOneUseTernaryOp<select>;
-
// Special conversion patterns
def cvt_rpi_i32_f32 : PatFrag <
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
index 8e3471bd2083..86dc9bd9ea74 100644
--- a/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsicInfo.cpp
@@ -54,14 +54,7 @@ std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
FunctionType *AMDGPUIntrinsicInfo::getType(LLVMContext &Context, unsigned ID,
ArrayRef<Type*> Tys) const {
// FIXME: Re-use Intrinsic::getType machinery
- switch (ID) {
- case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
- Type *F32Ty = Type::getFloatTy(Context);
- return FunctionType::get(F32Ty, { F32Ty, F32Ty }, false);
- }
- default:
- llvm_unreachable("unhandled intrinsic");
- }
+ llvm_unreachable("unhandled intrinsic");
}
unsigned AMDGPUIntrinsicInfo::lookupName(const char *NameData,
@@ -97,8 +90,8 @@ Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
Function *F
= cast<Function>(M->getOrInsertFunction(getName(IntrID, Tys), FTy));
- AttributeSet AS = getAttributes(M->getContext(),
- static_cast<AMDGPUIntrinsic::ID>(IntrID));
+ AttributeList AS =
+ getAttributes(M->getContext(), static_cast<AMDGPUIntrinsic::ID>(IntrID));
F->setAttributes(AS);
return F;
}
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index ceae0b575395..18c9bd933af2 100644
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -12,25 +12,8 @@
//===----------------------------------------------------------------------===//
let TargetPrefix = "AMDGPU", isTarget = 1 in {
- def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
-
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
-
- // Deprecated in favor of llvm.amdgcn.sffbh
- def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-
- // Deprecated in favor of separate int_amdgcn_cube* intrinsics.
- def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-
- // Deprecated in favor of expanded bit operations
- def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-
- // Deprecated in favor of llvm.amdgcn.rsq
- def int_AMDGPU_rsq : Intrinsic<
- [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
- >;
}
include "SIIntrinsics.td"
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
new file mode 100644
index 000000000000..a2567a549028
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -0,0 +1,62 @@
+//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPULegalizerInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
+ using namespace TargetOpcode;
+
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P1 = LLT::pointer(1, 64);
+ const LLT P2 = LLT::pointer(2, 64);
+
+ setAction({G_CONSTANT, S64}, Legal);
+
+ setAction({G_GEP, P1}, Legal);
+ setAction({G_GEP, P2}, Legal);
+ setAction({G_GEP, 1, S64}, Legal);
+
+ setAction({G_LOAD, P1}, Legal);
+ setAction({G_LOAD, P2}, Legal);
+ setAction({G_LOAD, S32}, Legal);
+ setAction({G_LOAD, 1, P1}, Legal);
+ setAction({G_LOAD, 1, P2}, Legal);
+
+ setAction({G_STORE, S32}, Legal);
+ setAction({G_STORE, 1, P1}, Legal);
+
+ // FIXME: When RegBankSelect inserts copies, it will only create new
+ // registers with scalar types. This means we can end up with
+ // G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
+ // operands. In assert builds, the instruction selector will assert
+ // if it sees a generic instruction which isn't legal, so we need to
+ // tell it that scalar types are legal for pointer operands
+ setAction({G_GEP, S64}, Legal);
+ setAction({G_LOAD, 1, S64}, Legal);
+ setAction({G_STORE, 1, S64}, Legal);
+
+ computeTables();
+}
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
new file mode 100644
index 000000000000..291e3361f163
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -0,0 +1,30 @@
+//===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+/// This class provides the information for the target register banks.
+class AMDGPULegalizerInfo : public LegalizerInfo {
+public:
+ AMDGPULegalizerInfo();
+};
+} // End llvm namespace.
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
new file mode 100644
index 000000000000..dcb6670621ee
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -0,0 +1,160 @@
+//===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+
+#define DEBUG_TYPE "amdgpu-lower-intrinsics"
+
+using namespace llvm;
+
+namespace {
+
+const unsigned MaxStaticSize = 1024;
+
+class AMDGPULowerIntrinsics : public ModulePass {
+private:
+ const TargetMachine *TM;
+
+ bool makeLIDRangeMetadata(Function &F) const;
+
+public:
+ static char ID;
+
+ AMDGPULowerIntrinsics(const TargetMachine *TM = nullptr)
+ : ModulePass(ID), TM(TM) { }
+ bool runOnModule(Module &M) override;
+ StringRef getPassName() const override {
+ return "AMDGPU Lower Intrinsics";
+ }
+};
+
+}
+
+char AMDGPULowerIntrinsics::ID = 0;
+
+char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID;
+
+INITIALIZE_TM_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE,
+ "Lower intrinsics", false, false)
+
+// TODO: Should refine based on estimated number of accesses (e.g. does it
+// require splitting based on alignment)
+static bool shouldExpandOperationWithSize(Value *Size) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Size);
+ return !CI || (CI->getZExtValue() > MaxStaticSize);
+}
+
+static bool expandMemIntrinsicUses(Function &F) {
+ Intrinsic::ID ID = F.getIntrinsicID();
+ bool Changed = false;
+
+ for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
+ Instruction *Inst = cast<Instruction>(*I);
+ ++I;
+
+ switch (ID) {
+ case Intrinsic::memcpy: {
+ auto *Memcpy = cast<MemCpyInst>(Inst);
+ if (shouldExpandOperationWithSize(Memcpy->getLength())) {
+ expandMemCpyAsLoop(Memcpy);
+ Changed = true;
+ Memcpy->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memmove: {
+ auto *Memmove = cast<MemMoveInst>(Inst);
+ if (shouldExpandOperationWithSize(Memmove->getLength())) {
+ expandMemMoveAsLoop(Memmove);
+ Changed = true;
+ Memmove->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memset: {
+ auto *Memset = cast<MemSetInst>(Inst);
+ if (shouldExpandOperationWithSize(Memset->getLength())) {
+ expandMemSetAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
+ if (!TM)
+ return false;
+
+ bool Changed = false;
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+
+ for (auto *U : F.users()) {
+ auto *CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ Changed |= ST.makeLIDRangeMetadata(CI);
+ }
+ return Changed;
+}
+
+bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
+ bool Changed = false;
+
+ for (Function &F : M) {
+ if (!F.isDeclaration())
+ continue;
+
+ switch (F.getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ if (expandMemIntrinsicUses(F))
+ Changed = true;
+ break;
+
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::r600_read_local_size_x:
+ case Intrinsic::r600_read_local_size_y:
+ case Intrinsic::r600_read_local_size_z:
+ Changed |= makeLIDRangeMetadata(F);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return Changed;
+}
+
+ModulePass *llvm::createAMDGPULowerIntrinsicsPass(const TargetMachine *TM) {
+ return new AMDGPULowerIntrinsics(TM);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 7d56355074b1..14ee1c81f8fa 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -151,6 +151,28 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
return MCInstLowering.lowerOperand(MO, MCOp);
}
+const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
+ // TargetMachine does not support llvm-style cast. Use C++-style cast.
+ // This is safe since TM is always of type AMDGPUTargetMachine or its
+ // derived class.
+ auto *AT = static_cast<AMDGPUTargetMachine*>(&TM);
+ auto *CE = dyn_cast<ConstantExpr>(CV);
+
+ // Lower null pointers in private and local address space.
+ // Clang generates addrspacecast for null pointers in private and local
+ // address space, which needs to be lowered.
+ if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
+ auto Op = CE->getOperand(0);
+ auto SrcAddr = Op->getType()->getPointerAddressSpace();
+ if (Op->isNullValue() && AT->getNullPointerValue(SrcAddr) == 0) {
+ auto DstAddr = CE->getType()->getPointerAddressSpace();
+ return MCConstantExpr::create(AT->getNullPointerValue(DstAddr),
+ OutContext);
+ }
+ }
+ return AsmPrinter::lowerConstant(CV);
+}
+
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
@@ -162,7 +184,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
C.emitError("Illegal instruction detected: " + Err);
- MI->dump();
+ MI->print(errs());
}
if (MI->isBundle()) {
@@ -173,8 +195,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
++I;
}
} else {
- // We don't want SI_MASK_BRANCH/SI_RETURN encoded. They are placeholder
- // terminator instructions and should only be printed as comments.
+ // We don't want SI_MASK_BRANCH/SI_RETURN_TO_EPILOG encoded. They are
+ // placeholder terminator instructions and should only be printed as
+ // comments.
if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
if (isVerbose()) {
SmallVector<char, 16> BBStr;
@@ -190,9 +213,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
- if (MI->getOpcode() == AMDGPU::SI_RETURN) {
+ if (MI->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
if (isVerbose())
- OutStreamer->emitRawComment(" return");
+ OutStreamer->emitRawComment(" return to shader part epilog");
return;
}
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 40c3327a98db..27fe639e3d4b 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -12,6 +12,20 @@
using namespace llvm;
+static bool isEntryFunctionCC(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ return true;
+ default:
+ return false;
+ }
+}
+
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
LocalMemoryObjects(),
@@ -19,8 +33,8 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MaxKernArgAlign(0),
LDSSize(0),
ABIArgOffset(0),
- IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL ||
- MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL) {
+ IsEntryFunction(isEntryFunctionCC(MF.getFunction()->getCallingConv())),
+ NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
// except reserved size is not correctly aligned.
}
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 5d0640b816f3..8bfeb67ad4ec 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -30,7 +30,11 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
/// Start of implicit kernel args
unsigned ABIArgOffset;
- bool IsKernel;
+ // Kernels + shaders. i.e. functions called by the driver and not not called
+ // by other functions.
+ bool IsEntryFunction;
+
+ bool NoSignedZerosFPMath;
public:
AMDGPUMachineFunction(const MachineFunction &MF);
@@ -66,8 +70,12 @@ public:
return LDSSize;
}
- bool isKernel() const {
- return IsKernel;
+ bool isEntryFunction() const {
+ return IsEntryFunction;
+ }
+
+ bool hasNoSignedZerosFPMath() const {
+ return NoSignedZerosFPMath;
}
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
diff --git a/lib/Target/AMDGPU/AMDGPUPTNote.h b/lib/Target/AMDGPU/AMDGPUPTNote.h
index 947d45b66969..71b9ab699b96 100644
--- a/lib/Target/AMDGPU/AMDGPUPTNote.h
+++ b/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -19,12 +19,13 @@
namespace AMDGPU {
-namespace PT_NOTE {
+namespace ElfNote {
const char SectionName[] = ".note";
const char NoteName[] = "AMD";
+// TODO: Move this enum to include/llvm/Support so it can be used in tools?
enum NoteType{
NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
NT_AMDGPU_HSA_HSAIL = 2,
@@ -32,7 +33,7 @@ enum NoteType{
NT_AMDGPU_HSA_PRODUCER = 4,
NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
NT_AMDGPU_HSA_EXTENSION = 6,
- NT_AMDGPU_HSA_RUNTIME_METADATA = 7,
+ NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10,
NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
};
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index baa28de7a770..4fb262c6277c 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -14,12 +14,49 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <tuple>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -31,16 +68,16 @@ namespace {
class AMDGPUPromoteAlloca : public FunctionPass {
private:
const TargetMachine *TM;
- Module *Mod;
- const DataLayout *DL;
- MDNode *MaxWorkGroupSizeRange;
+ Module *Mod = nullptr;
+ const DataLayout *DL = nullptr;
+ AMDGPUAS AS;
// FIXME: This should be per-kernel.
- uint32_t LocalMemLimit;
- uint32_t CurrentLocalMemUsage;
+ uint32_t LocalMemLimit = 0;
+ uint32_t CurrentLocalMemUsage = 0;
- bool IsAMDGCN;
- bool IsAMDHSA;
+ bool IsAMDGCN = false;
+ bool IsAMDHSA = false;
std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
@@ -63,15 +100,7 @@ public:
static char ID;
AMDGPUPromoteAlloca(const TargetMachine *TM_ = nullptr) :
- FunctionPass(ID),
- TM(TM_),
- Mod(nullptr),
- DL(nullptr),
- MaxWorkGroupSizeRange(nullptr),
- LocalMemLimit(0),
- CurrentLocalMemUsage(0),
- IsAMDGCN(false),
- IsAMDHSA(false) { }
+ FunctionPass(ID), TM(TM_) {}
bool doInitialization(Module &M) override;
bool runOnFunction(Function &F) override;
@@ -86,7 +115,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
char AMDGPUPromoteAlloca::ID = 0;
@@ -95,7 +124,6 @@ INITIALIZE_TM_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
-
bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
if (!TM)
return false;
@@ -103,13 +131,6 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
Mod = &M;
DL = &Mod->getDataLayout();
- // The maximum workitem id.
- //
- // FIXME: Should get as subtarget property. Usually runtime enforced max is
- // 256.
- MDBuilder MDB(Mod->getContext());
- MaxWorkGroupSizeRange = MDB.createRange(APInt(32, 0), APInt(32, 2048));
-
const Triple &TT = TM->getTargetTriple();
IsAMDGCN = TT.getArch() == Triple::amdgcn;
@@ -125,6 +146,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
if (!ST.isPromoteAllocaEnabled())
return false;
+ AS = AMDGPU::getAMDGPUAS(*F.getParent());
FunctionType *FTy = F.getFunctionType();
@@ -133,7 +155,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// we cannot use local memory in the pass.
for (Type *ParamTy : FTy->params()) {
PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
- if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
LocalMemLimit = 0;
DEBUG(dbgs() << "Function has local memory argument. Promoting to "
"local memory disabled.\n");
@@ -150,7 +172,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// Check how much local memory is being used by global objects
CurrentLocalMemUsage = 0;
for (GlobalVariable &GV : Mod->globals()) {
- if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
continue;
for (const User *U : GV.users()) {
@@ -175,7 +197,8 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
}
}
- unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage);
+ unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
+ F);
// Restrict local memory usage so that we don't drastically reduce occupancy,
// unless it is already significantly reduced.
@@ -196,7 +219,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
// Round up to the next tier of usage.
unsigned MaxSizeWithWaveCount
- = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy);
+ = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
// Program is possibly broken by using more local mem than available.
if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
@@ -226,6 +249,9 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
std::pair<Value *, Value *>
AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
+ *Builder.GetInsertBlock()->getParent());
+
if (!IsAMDHSA) {
Function *LocalSizeYFn
= Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
@@ -235,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
- LocalSizeY->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
- LocalSizeZ->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ ST.makeLIDRangeMetadata(LocalSizeY);
+ ST.makeLIDRangeMetadata(LocalSizeZ);
return std::make_pair(LocalSizeY, LocalSizeZ);
}
@@ -279,15 +305,15 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
= Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
- DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
- DispatchPtr->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
// Size of the dispatch packet struct.
- DispatchPtr->addDereferenceableAttr(AttributeSet::ReturnIndex, 64);
+ DispatchPtr->addDereferenceableAttr(AttributeList::ReturnIndex, 64);
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
Value *CastDispatchPtr = Builder.CreateBitCast(
- DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
+ DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
@@ -298,10 +324,10 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
- MDNode *MD = llvm::MDNode::get(Mod->getContext(), None);
+ MDNode *MD = MDNode::get(Mod->getContext(), None);
LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
- LoadZU->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ ST.makeLIDRangeMetadata(LoadZU);
// Extract y component. Upper half of LoadZU should be zero already.
Value *Y = Builder.CreateLShr(LoadXY, 16);
@@ -310,6 +336,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
}
Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
+ *Builder.GetInsertBlock()->getParent());
Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
switch (N) {
@@ -332,7 +360,7 @@ Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
CallInst *CI = Builder.CreateCall(WorkitemIdFn);
- CI->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ ST.makeLIDRangeMetadata(CI);
return CI;
}
@@ -383,7 +411,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) {
}
}
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) {
ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
DEBUG(dbgs() << "Alloca candidate for vectorization\n");
@@ -438,7 +466,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(0);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@@ -450,7 +478,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
break;
}
case Instruction::Store: {
- Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
+ Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
Value *Ptr = Inst->getOperand(1);
Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
@@ -580,6 +608,9 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
}
if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
+ // Give up if the pointer may be captured.
+ if (PointerMayBeCaptured(UseInst, true, true))
+ return false;
// Don't collect the users of this.
WorkList.push_back(User);
continue;
@@ -640,7 +671,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Trying to promote " << I << '\n');
- if (tryPromoteAllocaToVector(&I)) {
+ if (tryPromoteAllocaToVector(&I, AS)) {
DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
return;
}
@@ -655,8 +686,6 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
const AMDGPUSubtarget &ST =
TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
- // FIXME: We should also try to get this value from the reqd_work_group_size
- // function attribute if it is available.
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
const DataLayout &DL = Mod->getDataLayout();
@@ -701,7 +730,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Twine(F->getName()) + Twine('.') + I.getName(),
nullptr,
GlobalVariable::NotThreadLocal,
- AMDGPUAS::LOCAL_ADDRESS);
+ AS.LOCAL_ADDRESS);
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
GV->setAlignment(I.getAlignment());
@@ -734,7 +763,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
Value *Src0 = CI->getOperand(0);
Type *EltTy = Src0->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
if (isa<ConstantPointerNull>(CI->getOperand(0)))
CI->setOperand(0, ConstantPointerNull::get(NewTy));
@@ -751,7 +780,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
continue;
Type *EltTy = V->getType()->getPointerElementType();
- PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
@@ -819,17 +848,17 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
Type *SrcTy = Src->getType()->getPointerElementType();
Function *ObjectSize = Intrinsic::getDeclaration(Mod,
Intrinsic::objectsize,
- { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
+ { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
);
- CallInst *NewCall
- = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) });
+ CallInst *NewCall = Builder.CreateCall(
+ ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
Intr->replaceAllUsesWith(NewCall);
Intr->eraseFromParent();
continue;
}
default:
- Intr->dump();
+ Intr->print(errs());
llvm_unreachable("Don't know how to promote alloca intrinsic use.");
}
}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
new file mode 100644
index 000000000000..a5edc0c3b937
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -0,0 +1,230 @@
+//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for
+/// AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterBankInfo.h"
+#include "AMDGPUInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "AMDGPUGenRegisterBank.inc"
+
+// This file will be TableGen'ed at some point.
+#include "AMDGPUGenRegisterBankInfo.def"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
+ : AMDGPUGenRegisterBankInfo(),
+ TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
+
+ // HACK: Until this is fully tablegen'd
+ static bool AlreadyInit = false;
+ if (AlreadyInit)
+ return;
+
+ AlreadyInit = true;
+
+ const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
+ (void)RBSGPR;
+ assert(&RBSGPR == &AMDGPU::SGPRRegBank);
+
+ const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
+ (void)RBVGPR;
+ assert(&RBVGPR == &AMDGPU::VGPRRegBank);
+
+}
+
+unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A,
+ const RegisterBank &B,
+ unsigned Size) const {
+ return RegisterBankInfo::copyCost(A, B, Size);
+}
+
+const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
+ const TargetRegisterClass &RC) const {
+
+ if (TRI->isSGPRClass(&RC))
+ return getRegBank(AMDGPU::SGPRRegBankID);
+
+ return getRegBank(AMDGPU::VGPRRegBankID);
+}
+
+RegisterBankInfo::InstructionMappings
+AMDGPURegisterBankInfo::getInstrAlternativeMappings(
+ const MachineInstr &MI) const {
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+
+ InstructionMappings AltMappings;
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_LOAD: {
+ // FIXME: Should we be hard coding the size for these mappings?
+ InstructionMapping SSMapping(1, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.emplace_back(std::move(SSMapping));
+
+ InstructionMapping VVMapping(2, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.emplace_back(std::move(VVMapping));
+
+ // FIXME: Should this be the pointer-size (64-bits) or the size of the
+ // register that will hold the bufffer resourc (128-bits).
+ InstructionMapping VSMapping(3, 1,
+ getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.emplace_back(std::move(VSMapping));
+
+ return AltMappings;
+
+ }
+ default:
+ break;
+ }
+ return RegisterBankInfo::getInstrAlternativeMappings(MI);
+}
+
+void AMDGPURegisterBankInfo::applyMappingImpl(
+ const OperandsMapper &OpdMapper) const {
+ return applyDefaultMapping(OpdMapper);
+}
+
+static bool isInstrUniform(const MachineInstr &MI) {
+ if (!MI.hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ return AMDGPU::isUniformMMO(MMO);
+}
+
+RegisterBankInfo::InstructionMapping
+AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ RegisterBankInfo::InstructionMapping Mapping =
+ InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
+ SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ const ValueMapping *ValMapping;
+ const ValueMapping *PtrMapping;
+
+ if (isInstrUniform(MI)) {
+ // We have a uniform instruction so we want to use an SMRD load
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
+ } else {
+ ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ // FIXME: What would happen if we used SGPRRegBankID here?
+ PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
+ }
+
+ OpdsMapping[0] = ValMapping;
+ OpdsMapping[1] = PtrMapping;
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+
+ // FIXME: Do we want to add a mapping for FLAT load, or should we just
+ // handle that during instruction selection?
+}
+
+RegisterBankInfo::InstructionMapping
+AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+
+ if (Mapping.isValid())
+ return Mapping;
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
+ SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+
+ switch (MI.getOpcode()) {
+ default: break;
+ case AMDGPU::G_CONSTANT: {
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+ }
+ case AMDGPU::G_GEP: {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+
+ unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
+ OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ }
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+ }
+ case AMDGPU::G_STORE: {
+ assert(MI.getOperand(0).isReg());
+ unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ // FIXME: We need to specify a different reg bank once scalar stores
+ // are supported.
+ const ValueMapping *ValMapping =
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ // FIXME: Depending on the type of store, the pointer could be in
+ // the SGPR Reg bank.
+ // FIXME: Pointer size should be based on the address space.
+ const ValueMapping *PtrMapping =
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
+
+ OpdsMapping[0] = ValMapping;
+ OpdsMapping[1] = PtrMapping;
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ return Mapping;
+ }
+
+ case AMDGPU::G_LOAD:
+ return getInstrMappingForLoad(MI);
+ }
+
+ unsigned BankID = AMDGPU::SGPRRegBankID;
+
+ Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
+ unsigned Size = 0;
+ for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
+ // If the operand is not a register default to the size of the previous
+ // operand.
+ // FIXME: Can't we pull the types from the MachineInstr rather than the
+ // operands.
+ if (MI.getOperand(Idx).isReg())
+ Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
+ OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
+ }
+ Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+
+ return Mapping;
+}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
new file mode 100644
index 000000000000..f13bde87ef2d
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -0,0 +1,65 @@
+//===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for AMDGPU.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+
+namespace llvm {
+
+class SIRegisterInfo;
+class TargetRegisterInfo;
+
+namespace AMDGPU {
+enum {
+ SGPRRegBankID = 0,
+ VGPRRegBankID = 1,
+ NumRegisterBanks
+};
+} // End AMDGPU namespace.
+
+/// This class provides the information for the target register banks.
+class AMDGPUGenRegisterBankInfo : public RegisterBankInfo {
+
+protected:
+
+#define GET_TARGET_REGBANK_CLASS
+#include "AMDGPUGenRegisterBank.inc"
+
+};
+class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
+ const SIRegisterInfo *TRI;
+
+ /// See RegisterBankInfo::applyMapping.
+ void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+
+ RegisterBankInfo::InstructionMapping
+ getInstrMappingForLoad(const MachineInstr &MI) const;
+
+public:
+ AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
+
+ unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
+ unsigned Size) const override;
+
+ const RegisterBank &
+ getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+
+ InstructionMappings
+ getInstrAlternativeMappings(const MachineInstr &MI) const override;
+
+ InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+};
+} // End llvm namespace.
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
new file mode 100644
index 000000000000..f4428e56035f
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -0,0 +1,16 @@
+//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def SGPRRegBank : RegisterBank<"SGPR",
+ [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
+>;
+
+def VGPRRegBank : RegisterBank<"VGPR",
+ [VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
+>;
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index ef51aad95dce..22b1663821d9 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -16,10 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERINFO_H
-#include "llvm/Target/TargetRegisterInfo.h"
-
#define GET_REGINFO_HEADER
-#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
namespace llvm {
diff --git a/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h b/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
deleted file mode 100644
index ecd2ac72bf1b..000000000000
--- a/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
+++ /dev/null
@@ -1,193 +0,0 @@
-//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Enums and structure types used by runtime metadata.
-///
-/// Runtime requests certain information (metadata) about kernels to be able
-/// to execute the kernels and answer the queries about the kernels.
-/// The metadata is represented as a note element in the .note ELF section of a
-/// binary (code object). The desc field of the note element is a YAML string
-/// consisting of key-value pairs. Each key is a string. Each value can be
-/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
-/// At the beginning of the YAML string is the module level YAML map. A
-/// kernel-level YAML map is in the amd.Kernels sequence. A
-/// kernel-argument-level map is in the amd.Args sequence.
-///
-/// The format should be kept backward compatible. New enum values and bit
-/// fields should be appended at the end. It is suggested to bump up the
-/// revision number whenever the format changes and document the change
-/// in the revision in this header.
-///
-//
-//===----------------------------------------------------------------------===//
-//
-#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
-#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
-
-#include <cstdint>
-#include <vector>
-#include <string>
-
-namespace AMDGPU {
-
-namespace RuntimeMD {
-
- // Version and revision of runtime metadata
- const unsigned char MDVersion = 2;
- const unsigned char MDRevision = 0;
-
- // Name of keys for runtime metadata.
- namespace KeyName {
- const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version
- const char Language[] = "amd.Language"; // Language
- const char LanguageVersion[] = "amd.LanguageVersion"; // Language version
- const char Kernels[] = "amd.Kernels"; // Kernels
- const char KernelName[] = "amd.KernelName"; // Kernel name
- const char Args[] = "amd.Args"; // Kernel arguments
- const char ArgSize[] = "amd.ArgSize"; // Kernel arg size
- const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment
- const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name
- const char ArgName[] = "amd.ArgName"; // Kernel name
- const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind
- const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type
- const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier
- const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier
- const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified
- const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified
- const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified
- const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified
- const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size
- const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint
- const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint
- const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue
- const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups
- const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information
- const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier
- const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type
- }
-
- namespace KernelArg {
- enum Kind : uint8_t {
- ByValue = 0,
- GlobalBuffer = 1,
- DynamicSharedPointer = 2,
- Sampler = 3,
- Image = 4,
- Pipe = 5,
- Queue = 6,
- HiddenGlobalOffsetX = 7,
- HiddenGlobalOffsetY = 8,
- HiddenGlobalOffsetZ = 9,
- HiddenNone = 10,
- HiddenPrintfBuffer = 11,
- HiddenDefaultQueue = 12,
- HiddenCompletionAction = 13,
- };
-
- enum ValueType : uint16_t {
- Struct = 0,
- I8 = 1,
- U8 = 2,
- I16 = 3,
- U16 = 4,
- F16 = 5,
- I32 = 6,
- U32 = 7,
- F32 = 8,
- I64 = 9,
- U64 = 10,
- F64 = 11,
- };
-
- // Avoid using 'None' since it conflicts with a macro in X11 header file.
- enum AccessQualifer : uint8_t {
- AccNone = 0,
- ReadOnly = 1,
- WriteOnly = 2,
- ReadWrite = 3,
- };
-
- enum AddressSpaceQualifer : uint8_t {
- Private = 0,
- Global = 1,
- Constant = 2,
- Local = 3,
- Generic = 4,
- Region = 5,
- };
- } // namespace KernelArg
-
- // Invalid values are used to indicate an optional key should not be emitted.
- const uint8_t INVALID_ADDR_QUAL = 0xff;
- const uint8_t INVALID_ACC_QUAL = 0xff;
- const uint32_t INVALID_KERNEL_INDEX = ~0U;
-
- namespace KernelArg {
- // In-memory representation of kernel argument information.
- struct Metadata {
- uint32_t Size;
- uint32_t Align;
- uint32_t PointeeAlign;
- uint8_t Kind;
- uint16_t ValueType;
- std::string TypeName;
- std::string Name;
- uint8_t AddrQual;
- uint8_t AccQual;
- uint8_t IsVolatile;
- uint8_t IsConst;
- uint8_t IsRestrict;
- uint8_t IsPipe;
- Metadata() : Size(0), Align(0), PointeeAlign(0), Kind(0), ValueType(0),
- AddrQual(INVALID_ADDR_QUAL), AccQual(INVALID_ACC_QUAL), IsVolatile(0),
- IsConst(0), IsRestrict(0), IsPipe(0) {}
- };
- }
-
- namespace Kernel {
- // In-memory representation of kernel information.
- struct Metadata {
- std::string Name;
- std::string Language;
- std::vector<uint8_t> LanguageVersion;
- std::vector<uint32_t> ReqdWorkGroupSize;
- std::vector<uint32_t> WorkGroupSizeHint;
- std::string VecTypeHint;
- uint32_t KernelIndex;
- uint8_t NoPartialWorkGroups;
- std::vector<KernelArg::Metadata> Args;
- Metadata() : KernelIndex(INVALID_KERNEL_INDEX), NoPartialWorkGroups(0) {}
- };
- }
-
- namespace Program {
- // In-memory representation of program information.
- struct Metadata {
- std::vector<uint8_t> MDVersionSeq;
- std::vector<std::string> PrintfInfo;
- std::vector<Kernel::Metadata> Kernels;
-
- explicit Metadata(){}
-
- // Construct from an YAML string.
- explicit Metadata(const std::string &YAML);
-
- // Convert to YAML string.
- std::string toYAML();
-
- // Convert from YAML string.
- static Metadata fromYAML(const std::string &S);
- };
- }
-} // namespace RuntimeMD
-} // namespace AMDGPU
-
-#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index c35a67de1d7f..972c28579f7a 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -13,8 +13,10 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/Target/TargetFrameLowering.h"
#include <algorithm>
@@ -22,7 +24,6 @@ using namespace llvm;
#define DEBUG_TYPE "amdgpu-subtarget"
-#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
@@ -41,9 +42,10 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// for SI has the unhelpful behavior that it unsets everything else if you
// disable it.
- SmallString<256> FullFS("+promote-alloca,+fp64-denormals,+load-store-opt,");
+ SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
- FullFS += "+flat-for-global,+unaligned-buffer-access,";
+ FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
+
FullFS += FS;
ParseSubtargetFeatures(GPU, FullFS);
@@ -59,9 +61,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- FP16Denormals = false;
+ FP64FP16Denormals = false;
FP32Denormals = false;
- FP64Denormals = false;
}
// Set defaults if needed.
@@ -85,15 +86,17 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FastFMAF32(false),
HalfRate64Ops(false),
- FP16Denormals(false),
FP32Denormals(false),
- FP64Denormals(false),
+ FP64FP16Denormals(false),
FPExceptions(false),
+ DX10Clamp(false),
FlatForGlobal(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
+ HasApertureRegs(false),
EnableXNACK(false),
+ TrapHandler(false),
DebuggerInsertNops(false),
DebuggerReserveRegs(false),
DebuggerEmitPrologue(false),
@@ -110,13 +113,17 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
GCN1Encoding(false),
GCN3Encoding(false),
CIInsts(false),
+ GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
Has16BitInsts(false),
+ HasVOP3PInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),
HasInv2PiInlineImm(false),
+ HasSDWA(false),
+ HasDPP(false),
FlatAddressSpace(false),
R600ALUInst(false),
@@ -128,65 +135,30 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FeatureDisable(false),
InstrItins(getInstrItineraryForCPU(GPU)) {
+ AS = AMDGPU::getAMDGPUAS(TT);
initializeSubtargetDependencies(TT, GPU, FS);
}
-// FIXME: These limits are for SI. Did they change with the larger maximum LDS
-// size?
-unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves) const {
- switch (NWaves) {
- case 10:
- return 1638;
- case 9:
- return 1820;
- case 8:
- return 2048;
- case 7:
- return 2340;
- case 6:
- return 2730;
- case 5:
- return 3276;
- case 4:
- return 4096;
- case 3:
- return 5461;
- case 2:
- return 8192;
- default:
+unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
+ const Function &F) const {
+ if (NWaves == 1)
return getLocalMemorySize();
- }
+ unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
+ unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ unsigned MaxWaves = getMaxWavesPerEU();
+ return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
-unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
- if (Bytes <= 1638)
- return 10;
-
- if (Bytes <= 1820)
- return 9;
-
- if (Bytes <= 2048)
- return 8;
-
- if (Bytes <= 2340)
- return 7;
-
- if (Bytes <= 2730)
- return 6;
-
- if (Bytes <= 3276)
- return 5;
-
- if (Bytes <= 4096)
- return 4;
-
- if (Bytes <= 5461)
- return 3;
-
- if (Bytes <= 8192)
- return 2;
-
- return 1;
+unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
+ const Function &F) const {
+ unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
+ unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ unsigned MaxWaves = getMaxWavesPerEU();
+ unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
+ unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
+ NumWaves = std::min(NumWaves, MaxWaves);
+ NumWaves = std::max(NumWaves, 1u);
+ return NumWaves;
}
std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
@@ -224,7 +196,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
const Function &F) const {
// Default minimum/maximum number of waves per execution unit.
- std::pair<unsigned, unsigned> Default(1, 0);
+ std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
// Default/requested minimum/maximum flat work group sizes.
std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
@@ -269,6 +241,65 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
return Requested;
}
+bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
+ Function *Kernel = I->getParent()->getParent();
+ unsigned MinSize = 0;
+ unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
+ bool IdQuery = false;
+
+ // If reqd_work_group_size is present it narrows value down.
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ const Function *F = CI->getCalledFunction();
+ if (F) {
+ unsigned Dim = UINT_MAX;
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::amdgcn_workitem_id_x:
+ case Intrinsic::r600_read_tidig_x:
+ IdQuery = true;
+ case Intrinsic::r600_read_local_size_x:
+ Dim = 0;
+ break;
+ case Intrinsic::amdgcn_workitem_id_y:
+ case Intrinsic::r600_read_tidig_y:
+ IdQuery = true;
+ case Intrinsic::r600_read_local_size_y:
+ Dim = 1;
+ break;
+ case Intrinsic::amdgcn_workitem_id_z:
+ case Intrinsic::r600_read_tidig_z:
+ IdQuery = true;
+ case Intrinsic::r600_read_local_size_z:
+ Dim = 2;
+ break;
+ default:
+ break;
+ }
+ if (Dim <= 3) {
+ if (auto Node = Kernel->getMetadata("reqd_work_group_size"))
+ if (Node->getNumOperands() == 3)
+ MinSize = MaxSize = mdconst::extract<ConstantInt>(
+ Node->getOperand(Dim))->getZExtValue();
+ }
+ }
+ }
+
+ if (!MaxSize)
+ return false;
+
+ // Range metadata is [Lo, Hi). For ID query we need to pass max size
+ // as Hi. For size query we need to pass Hi + 1.
+ if (IdQuery)
+ MinSize = 0;
+ else
+ ++MaxSize;
+
+ MDBuilder MDB(I->getContext());
+ MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize),
+ APInt(32, MaxSize));
+ I->setMetadata(LLVMContext::MD_range, MaxWorkGroupSizeRange);
+ return true;
+}
+
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
AMDGPUSubtarget(TT, GPU, FS, TM),
@@ -305,7 +336,7 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
}
unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
- unsigned ExplicitArgBytes) const {
+ unsigned ExplicitArgBytes) const {
unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
if (ImplicitBytes == 0)
return ExplicitArgBytes;
@@ -359,12 +390,100 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
return 1;
}
-unsigned SISubtarget::getMaxNumSGPRs() const {
+unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI.hasFlatScratchInit()) {
+ if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+ return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
+ if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
+ return 4; // FLAT_SCRATCH, VCC (in that order).
+ }
+
+ if (isXNACKEnabled())
+ return 4; // XNACK, VCC (in that order).
+ return 2; // VCC.
+}
+
+unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
+ const Function &F = *MF.getFunction();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ // Compute maximum number of SGPRs function can use using default/requested
+ // minimum number of waves per execution unit.
+ std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+ unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false);
+ unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true);
+
+ // Check if maximum number of SGPRs was explicitly requested using
+ // "amdgpu-num-sgpr" attribute.
+ if (F.hasFnAttribute("amdgpu-num-sgpr")) {
+ unsigned Requested = AMDGPU::getIntegerAttribute(
+ F, "amdgpu-num-sgpr", MaxNumSGPRs);
+
+ // Make sure requested value does not violate subtarget's specifications.
+ if (Requested && (Requested <= getReservedNumSGPRs(MF)))
+ Requested = 0;
+
+ // If more SGPRs are required to support the input user/system SGPRs,
+ // increase to accommodate them.
+ //
+ // FIXME: This really ends up using the requested number of SGPRs + number
+ // of reserved special registers in total. Theoretically you could re-use
+ // the last input registers for these special registers, but this would
+ // require a lot of complexity to deal with the weird aliasing.
+ unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs();
+ if (Requested && Requested < InputNumSGPRs)
+ Requested = InputNumSGPRs;
+
+ // Make sure requested value is compatible with values implied by
+ // default/requested minimum/maximum number of waves per execution unit.
+ if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false))
+ Requested = 0;
+ if (WavesPerEU.second &&
+ Requested && Requested < getMinNumSGPRs(WavesPerEU.second))
+ Requested = 0;
+
+ if (Requested)
+ MaxNumSGPRs = Requested;
+ }
+
if (hasSGPRInitBug())
- return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
+ MaxAddressableNumSGPRs);
+}
- if (getGeneration() >= VOLCANIC_ISLANDS)
- return 102;
+unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
+ const Function &F = *MF.getFunction();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+ // Compute maximum number of VGPRs function can use using default/requested
+ // minimum number of waves per execution unit.
+ std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+ unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
+
+ // Check if maximum number of VGPRs was explicitly requested using
+ // "amdgpu-num-vgpr" attribute.
+ if (F.hasFnAttribute("amdgpu-num-vgpr")) {
+ unsigned Requested = AMDGPU::getIntegerAttribute(
+ F, "amdgpu-num-vgpr", MaxNumVGPRs);
+
+ // Make sure requested value does not violate subtarget's specifications.
+ if (Requested && Requested <= getReservedNumVGPRs(MF))
+ Requested = 0;
+
+ // Make sure requested value is compatible with values implied by
+ // default/requested minimum/maximum number of waves per execution unit.
+ if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
+ Requested = 0;
+ if (WavesPerEU.second &&
+ Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
+ Requested = 0;
+
+ if (Requested)
+ MaxNumVGPRs = Requested;
+ }
- return 104;
+ return MaxNumVGPRs - getReservedNumVGPRs(MF);
}
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0e3cb7dc1f87..36bc2498781f 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -22,6 +22,7 @@
#include "SIInstrInfo.h"
#include "SIISelLowering.h"
#include "SIFrameLowering.h"
+#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
@@ -51,6 +52,7 @@ public:
SOUTHERN_ISLANDS,
SEA_ISLANDS,
VOLCANIC_ISLANDS,
+ GFX9,
};
enum {
@@ -64,6 +66,28 @@ public:
ISAVersion8_0_3,
ISAVersion8_0_4,
ISAVersion8_1_0,
+ ISAVersion9_0_0,
+ ISAVersion9_0_1
+ };
+
+ enum TrapHandlerAbi {
+ TrapHandlerAbiNone = 0,
+ TrapHandlerAbiHsa = 1
+ };
+
+ enum TrapID {
+ TrapIDHardwareReserved = 0,
+ TrapIDHSADebugTrap = 1,
+ TrapIDLLVMTrap = 2,
+ TrapIDLLVMDebugTrap = 3,
+ TrapIDDebugBreakpoint = 7,
+ TrapIDDebugReserved8 = 8,
+ TrapIDDebugReservedFE = 0xfe,
+ TrapIDDebugReservedFF = 0xff
+ };
+
+ enum TrapRegValues {
+ LLVMTrapHandlerRegValue = 1
};
protected:
@@ -81,14 +105,16 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP16Denormals;
bool FP32Denormals;
- bool FP64Denormals;
+ bool FP64FP16Denormals;
bool FPExceptions;
+ bool DX10Clamp;
bool FlatForGlobal;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
+ bool HasApertureRegs;
bool EnableXNACK;
+ bool TrapHandler;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
bool DebuggerEmitPrologue;
@@ -107,13 +133,17 @@ protected:
bool GCN1Encoding;
bool GCN3Encoding;
bool CIInsts;
+ bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
bool Has16BitInsts;
+ bool HasVOP3PInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasInv2PiInlineImm;
+ bool HasSDWA;
+ bool HasDPP;
bool FlatAddressSpace;
bool R600ALUInst;
bool CaymanISA;
@@ -127,6 +157,7 @@ protected:
InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
+ AMDGPUAS AS;
public:
AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
@@ -184,10 +215,18 @@ public:
return MaxPrivateElementSize;
}
+ AMDGPUAS getAMDGPUAS() const {
+ return AS;
+ }
+
bool has16BitInsts() const {
return Has16BitInsts;
}
+ bool hasVOP3PInsts() const {
+ return HasVOP3PInsts;
+ }
+
bool hasHWFP64() const {
return FP64;
}
@@ -243,6 +282,10 @@ public:
return (getGeneration() >= EVERGREEN);
}
+ bool hasMed3_16() const {
+ return getGeneration() >= GFX9;
+ }
+
bool hasCARRY() const {
return (getGeneration() >= EVERGREEN);
}
@@ -255,6 +298,10 @@ public:
return CaymanISA;
}
+ TrapHandlerAbi getTrapHandlerAbi() const {
+ return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -267,20 +314,22 @@ public:
return DumpCode;
}
- bool enableIEEEBit(const MachineFunction &MF) const {
- return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
- }
-
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
- unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+ const Function &) const;
/// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
/// the given LDS memory size is the only constraint.
- unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
+ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
+
+ unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
+ const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ return getOccupancyWithLocalMemSize(MFI->getLDSSize(), *MF.getFunction());
+ }
bool hasFP16Denormals() const {
- return FP16Denormals;
+ return FP64FP16Denormals;
}
bool hasFP32Denormals() const {
@@ -288,13 +337,21 @@ public:
}
bool hasFP64Denormals() const {
- return FP64Denormals;
+ return FP64FP16Denormals;
}
bool hasFPExceptions() const {
return FPExceptions;
}
+ bool enableDX10Clamp() const {
+ return DX10Clamp;
+ }
+
+ bool enableIEEEBit(const MachineFunction &MF) const {
+ return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
+ }
+
bool useFlatForGlobal() const {
return FlatForGlobal;
}
@@ -307,10 +364,22 @@ public:
return UnalignedScratchAccess;
}
+ bool hasApertureRegs() const {
+ return HasApertureRegs;
+ }
+
+ bool isTrapHandlerEnabled() const {
+ return TrapHandler;
+ }
+
bool isXNACKEnabled() const {
return EnableXNACK;
}
+ bool hasFlatAddressSpace() const {
+ return FlatAddressSpace;
+ }
+
bool isMesaKernel(const MachineFunction &MF) const {
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
}
@@ -324,6 +393,10 @@ public:
return isAmdHsaOS() || isMesaKernel(MF);
}
+ bool hasFminFmaxLegacy() const {
+ return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ }
+
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
@@ -355,72 +428,71 @@ public:
return true;
}
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return 4;
+ return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
}
/// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return getMaxWavesPerEU() * getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return getWavesPerWorkGroup(FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
unsigned getMinWavesPerEU() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- // FIXME: Need to take scratch memory into account.
- return 10;
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
- getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum flat work group size supported by the subtarget.
unsigned getMinFlatWorkGroupSize() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
}
/// \returns Maximum flat work group size supported by the subtarget.
unsigned getMaxFlatWorkGroupSize() const {
- return 2048;
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
}
- /// \returns Number of waves per work group given the flat work group size.
+ /// \returns Number of waves per work group supported by the subtarget and
+ /// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
+ FlatWorkGroupSize);
}
- void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
- bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
-
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
/// for function \p F, or minimum/maximum flat work group sizes explicitly
/// requested using "amdgpu-flat-work-group-size" attribute attached to
@@ -440,6 +512,9 @@ public:
/// compatible with minimum/maximum number of waves limited by flat work group
/// size, register usage, and/or lds usage.
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+
+ /// Creates value range metadata on an workitemid.* inrinsic call or load.
+ bool makeLIDRangeMetadata(Instruction *I) const;
};
class R600Subtarget final : public AMDGPUSubtarget {
@@ -482,13 +557,6 @@ public:
};
class SISubtarget final : public AMDGPUSubtarget {
-public:
- enum {
- // The closed Vulkan driver sets 96, which limits the wave count to 8 but
- // doesn't spill SGPRs as much as when 80 is set.
- FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
- };
-
private:
SIInstrInfo InstrInfo;
SIFrameLowering FrameLowering;
@@ -516,6 +584,21 @@ public:
return GISel->getCallLowering();
}
+ const InstructionSelector *getInstructionSelector() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getInstructionSelector();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getLegalizerInfo();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ assert(GISel && "Access to GlobalISel APIs not set");
+ return GISel->getRegBankInfo();
+ }
+
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
@@ -524,6 +607,11 @@ public:
this->GISel.reset(&GISel);
}
+ // XXX - Why is this here if it isn't in the default pass set?
+ bool enableEarlyIfConversion() const override {
+ return true;
+ }
+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
@@ -533,10 +621,6 @@ public:
return 16;
}
- bool hasFlatAddressSpace() const {
- return FlatAddressSpace;
- }
-
bool hasSMemRealTime() const {
return HasSMemRealTime;
}
@@ -549,6 +633,10 @@ public:
return HasVGPRIndexMode;
}
+ bool useVGPRIndexMode(bool UserEnable) const {
+ return !hasMovrel() || (UserEnable && hasVGPRIndexMode());
+ }
+
bool hasScalarCompareEq64() const {
return getGeneration() >= VOLCANIC_ISLANDS;
}
@@ -561,6 +649,14 @@ public:
return HasInv2PiInlineImm;
}
+ bool hasSDWA() const {
+ return HasSDWA;
+ }
+
+ bool hasDPP() const {
+ return HasDPP;
+ }
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -594,6 +690,14 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ bool hasSMovFedHazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0Hazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
@@ -605,10 +709,107 @@ public:
/// \returns True if waitcnt instruction is needed before barrier instruction,
/// false otherwise.
bool needWaitcntBeforeBarrier() const {
- return true;
+ return getGeneration() < GFX9;
+ }
+
+ /// \returns true if the flat_scratch register should be initialized with the
+ /// pointer to the wave's scratch memory rather than a size and offset.
+ bool flatScratchIsPointer() const {
+ return getGeneration() >= GFX9;
+ }
+
+ /// \returns SGPR allocation granularity supported by the subtarget.
+ unsigned getSGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
+ }
+
+ /// \returns SGPR encoding granularity supported by the subtarget.
+ unsigned getSGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
}
- unsigned getMaxNumSGPRs() const;
+ /// \returns Total number of SGPRs supported by the subtarget.
+ unsigned getTotalNumSGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
+ }
+
+ /// \returns Addressable number of SGPRs supported by the subtarget.
+ unsigned getAddressableNumSGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
+ }
+
+ /// \returns Minimum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Maximum number of SGPRs that meets the given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
+ Addressable);
+ }
+
+ /// \returns Reserved number of SGPRs for given function \p MF.
+ unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns Maximum number of SGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of SGPRs explicitly
+ /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
+
+ /// \returns VGPR allocation granularity supported by the subtarget.
+ unsigned getVGPRAllocGranule() const {
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
+ }
+
+ /// \returns VGPR encoding granularity supported by the subtarget.
+ unsigned getVGPREncodingGranule() const {
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
+ }
+
+ /// \returns Total number of VGPRs supported by the subtarget.
+ unsigned getTotalNumVGPRs() const {
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
+ }
+
+ /// \returns Addressable number of VGPRs supported by the subtarget.
+ unsigned getAddressableNumVGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
+ }
+
+ /// \returns Minimum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Maximum number of VGPRs that meets given number of waves per
+ /// execution unit requirement supported by the subtarget.
+ unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
+
+ /// \returns Reserved number of VGPRs for given function \p MF.
+ unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
+ return debuggerReserveRegs() ? 4 : 0;
+ }
+
+ /// \returns Maximum number of VGPRs that meets number of waves per execution
+ /// unit requirement for function \p MF, or number of VGPRs explicitly
+ /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
+ ///
+ /// \returns Value that meets number of waves per execution unit requirement
+ /// if explicitly requested value cannot be converted to integer, violates
+ /// subtarget's specifications, or does not meet number of waves per execution
+ /// unit requirement.
+ unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d8a0c716279c..0202220b8011 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -15,24 +15,29 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
+#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUCallLowering.h"
+#include "AMDGPUInstructionSelector.h"
+#include "AMDGPULegalizerInfo.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "AMDGPURegisterBankInfo.h"
+#endif
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
+#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "R600MachineScheduler.h"
#include "SIMachineScheduler.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Vectorize.h"
@@ -58,6 +63,11 @@ static cl::opt<bool> EnableSROA(
cl::ReallyHidden,
cl::init(true));
+static cl::opt<bool>
+EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
+ cl::desc("Run early if-conversion"),
+ cl::init(false));
+
static cl::opt<bool> EnableR600IfConvert(
"r600-if-convert",
cl::desc("Use if conversion pass"),
@@ -78,6 +88,36 @@ static cl::opt<bool> ScalarizeGlobal(
cl::init(false),
cl::Hidden);
+// Option to run internalize pass.
+static cl::opt<bool> InternalizeSymbols(
+ "amdgpu-internalize-symbols",
+ cl::desc("Enable elimination of non-kernel functions and unused globals"),
+ cl::init(false),
+ cl::Hidden);
+
+// Option to inline all early.
+static cl::opt<bool> EarlyInlineAll(
+ "amdgpu-early-inline-all",
+ cl::desc("Inline all functions early"),
+ cl::init(false),
+ cl::Hidden);
+
+static cl::opt<bool> EnableSDWAPeephole(
+ "amdgpu-sdwa-peephole",
+ cl::desc("Enable SDWA peepholer"),
+ cl::init(true));
+
+// Enable address space based alias analysis
+static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
+ cl::desc("Enable AMDGPU Alias Analysis"),
+ cl::init(true));
+
+// Option to enable new waitcnt insertion pass.
+static cl::opt<bool> EnableSIInsertWaitcntsPass(
+ "enable-si-insert-waitcnts",
+ cl::desc("Use new waitcnt insertion pass"),
+ cl::init(false));
+
extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -86,22 +126,28 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
PassRegistry *PR = PassRegistry::getPassRegistry();
initializeSILowerI1CopiesPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
+ initializeSIFixVGPRCopiesPass(*PR);
initializeSIFoldOperandsPass(*PR);
+ initializeSIPeepholeSDWAPass(*PR);
initializeSIShrinkInstructionsPass(*PR);
initializeSIFixControlFlowLiveIntervalsPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
+ initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
initializeSIInsertWaitsPass(*PR);
+ initializeSIInsertWaitcntsPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIDebuggerInsertNopsPass(*PR);
initializeSIOptimizeExecMaskingPass(*PR);
+ initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
+ initializeAMDGPUAAWrapperPassPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -119,13 +165,26 @@ static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
- new ScheduleDAGMILive(C,
- llvm::make_unique<GCNMaxOccupancySchedStrategy>(C));
+ new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
return DAG;
}
+static ScheduleDAGInstrs *
+createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+ auto DAG = new GCNIterativeScheduler(C,
+ GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
+}
+
+static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
+ return new GCNIterativeScheduler(C,
+ GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
+}
+
static MachineSchedRegistry
R600SchedRegistry("r600", "Run R600's custom scheduler",
createR600MachineScheduler);
@@ -139,6 +198,16 @@ GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
"Run GCN scheduler to maximize occupancy",
createGCNMaxOccupancyMachineScheduler);
+static MachineSchedRegistry
+IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
+ "Run GCN scheduler to maximize occupancy (experimental)",
+ createIterativeGCNMaxOccupancyMachineScheduler);
+
+static MachineSchedRegistry
+GCNMinRegSchedRegistry("gcn-minreg",
+ "Run GCN iterative scheduler for minimal register usage (experimental)",
+ createMinRegScheduler);
+
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
@@ -148,9 +217,14 @@ static StringRef computeDataLayout(const Triple &TT) {
// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
- return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+ if (TT.getEnvironmentName() == "amdgiz" ||
+ TT.getEnvironmentName() == "amdgizcl")
+ return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
+ return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
+ "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
+ "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}
LLVM_READNONE
@@ -180,6 +254,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
TLOF(createTLOF(getTargetTriple())) {
+ AS = AMDGPU::getAMDGPUAS(TT);
initAsmInfo();
}
@@ -199,8 +274,65 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
FSAttr.getValueAsString();
}
-void AMDGPUTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
- PM.add(createAMDGPUUnifyMetadataPass());
+static ImmutablePass *createAMDGPUExternalAAWrapperPass() {
+ return createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ });
+}
+
+void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ Builder.DivergentTarget = true;
+
+ bool Internalize = InternalizeSymbols &&
+ (getOptLevel() > CodeGenOpt::None) &&
+ (getTargetTriple().getArch() == Triple::amdgcn);
+ bool EarlyInline = EarlyInlineAll &&
+ (getOptLevel() > CodeGenOpt::None);
+ bool AMDGPUAA = EnableAMDGPUAliasAnalysis && getOptLevel() > CodeGenOpt::None;
+
+ Builder.addExtension(
+ PassManagerBuilder::EP_ModuleOptimizerEarly,
+ [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+ if (AMDGPUAA) {
+ PM.add(createAMDGPUAAWrapperPass());
+ PM.add(createAMDGPUExternalAAWrapperPass());
+ }
+ PM.add(createAMDGPUUnifyMetadataPass());
+ if (Internalize) {
+ PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
+ if (const Function *F = dyn_cast<Function>(&GV)) {
+ if (F->isDeclaration())
+ return true;
+ switch (F->getCallingConv()) {
+ default:
+ return false;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ }
+ }
+ return !GV.use_empty();
+ }));
+ PM.add(createGlobalDCEPass());
+ }
+ if (EarlyInline)
+ PM.add(createAMDGPUAlwaysInlinePass(false));
+ });
+
+ Builder.addExtension(
+ PassManagerBuilder::EP_EarlyAsPossible,
+ [AMDGPUAA](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ if (AMDGPUAA) {
+ PM.add(createAMDGPUAAWrapperPass());
+ PM.add(createAMDGPUExternalAAWrapperPass());
+ }
+ });
}
//===----------------------------------------------------------------------===//
@@ -245,9 +377,21 @@ namespace {
struct SIGISelActualAccessor : public GISelAccessor {
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
const AMDGPUCallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
+ }
};
} // end anonymous namespace
@@ -281,6 +425,11 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
GISel->CallLoweringInfo.reset(
new AMDGPUCallLowering(*I->getTargetLowering()));
+ GISel->Legalizer.reset(new AMDGPULegalizerInfo());
+
+ GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo()));
+ GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I,
+ *static_cast<AMDGPURegisterBankInfo*>(GISel->RegBankInfo.get())));
#endif
I->setGISelAccessor(*GISel);
@@ -356,9 +505,9 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override;
- void addIRPasses() override;
bool addPreISel() override;
void addMachineSSAOptimization() override;
+ bool addILPOpts() override;
bool addInstSelector() override;
#ifdef LLVM_BUILD_GLOBAL_ISEL
bool addIRTranslator() override;
@@ -406,11 +555,15 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
}
void AMDGPUPassConfig::addIRPasses() {
+ const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+
// There is no reason to run these.
disablePass(&StackMapLivenessID);
disablePass(&FuncletLayoutID);
disablePass(&PatchableFunctionID);
+ addPass(createAMDGPULowerIntrinsicsPass(&TM));
+
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
@@ -421,17 +574,33 @@ void AMDGPUPassConfig::addIRPasses() {
// without ever running any passes on the second.
addPass(createBarrierNoopPass());
+ if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
+ // TODO: May want to move later or split into an early and late one.
+
+ addPass(createAMDGPUCodeGenPreparePass(
+ static_cast<const GCNTargetMachine *>(&TM)));
+ }
+
// Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
addPass(createAMDGPUOpenCLImageTypeLoweringPass());
- const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
if (TM.getOptLevel() > CodeGenOpt::None) {
+ addPass(createInferAddressSpacesPass());
addPass(createAMDGPUPromoteAlloca(&TM));
if (EnableSROA)
addPass(createSROAPass());
addStraightLineScalarOptimizationPasses();
+
+ if (EnableAMDGPUAliasAnalysis) {
+ addPass(createAMDGPUAAWrapperPass());
+ addPass(createExternalAAWrapperPass([](Pass &P, Function &,
+ AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }));
+ }
}
TargetPassConfig::addIRPasses();
@@ -526,7 +695,12 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
- addPass(&AMDGPUAnnotateKernelFeaturesID);
+ const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+ addPass(createAMDGPUAnnotateKernelFeaturesPass(&TM));
+
+ // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
+ // regions formed by them.
+ addPass(&AMDGPUUnifyDivergentExitNodesID);
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
addPass(createSinkingPass());
addPass(createSITypeRewriter());
@@ -549,13 +723,19 @@ void GCNPassConfig::addMachineSSAOptimization() {
addPass(&SIFoldOperandsID);
addPass(&DeadMachineInstructionElimID);
addPass(&SILoadStoreOptimizerID);
+ addPass(createSIShrinkInstructionsPass());
+ if (EnableSDWAPeephole) {
+ addPass(&SIPeepholeSDWAID);
+ addPass(&DeadMachineInstructionElimID);
+ }
}
-void GCNPassConfig::addIRPasses() {
- // TODO: May want to move later or split into an early and late one.
- addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
+bool GCNPassConfig::addILPOpts() {
+ if (EnableEarlyIfConversion)
+ addPass(&EarlyIfConverterID);
- AMDGPUPassConfig::addIRPasses();
+ TargetPassConfig::addILPOpts();
+ return false;
}
bool GCNPassConfig::addInstSelector() {
@@ -572,20 +752,23 @@ bool GCNPassConfig::addIRTranslator() {
}
bool GCNPassConfig::addLegalizeMachineIR() {
+ addPass(new Legalizer());
return false;
}
bool GCNPassConfig::addRegBankSelect() {
+ addPass(new RegBankSelect());
return false;
}
bool GCNPassConfig::addGlobalInstructionSelect() {
+ addPass(new InstructionSelect());
return false;
}
+
#endif
void GCNPassConfig::addPreRegAlloc() {
- addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass());
}
@@ -615,6 +798,7 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
}
void GCNPassConfig::addPostRegAlloc() {
+ addPass(&SIFixVGPRCopiesID);
addPass(&SIOptimizeExecMaskingID);
TargetPassConfig::addPostRegAlloc();
}
@@ -633,7 +817,10 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);
- addPass(createSIInsertWaitsPass());
+ if (EnableSIInsertWaitcntsPass)
+ addPass(createSIInsertWaitcntsPass());
+ else
+ addPass(createSIInsertWaitsPass());
addPass(createSIShrinkInstructionsPass());
addPass(&SIInsertSkipsPassID);
addPass(createSIDebuggerInsertNopsPass());
@@ -643,3 +830,4 @@ void GCNPassConfig::addPreEmitPass() {
TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
return new GCNPassConfig(this, PM);
}
+
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 9496773a073f..934bf7f31bab 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -35,6 +35,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AMDGPUIntrinsicInfo IntrinsicInfo;
+ AMDGPUAS AS;
StringRef getGPUName(const Function &F) const;
StringRef getFeatureString(const Function &F) const;
@@ -57,7 +58,18 @@ public:
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
- void addEarlyAsPossiblePasses(PassManagerBase &PM) override;
+ AMDGPUAS getAMDGPUAS() const {
+ return AS;
+ }
+
+ void adjustPassManager(PassManagerBuilder &) override;
+ /// Get the integer value of a null pointer in the given address space.
+ uint64_t getNullPointerValue(unsigned AddrSpace) const {
+ if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS)
+ return -1;
+ return 0;
+ }
+
};
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index 1fddc88a705a..c96761c0b04e 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUTargetMachine.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPU.h"
#include "llvm/MC/MCContext.h"
@@ -22,7 +23,8 @@ using namespace llvm;
MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
- if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) &&
+ auto AS = static_cast<const AMDGPUTargetMachine*>(&TM)->getAMDGPUAS();
+ if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) &&
AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple()))
return TextSection;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index de327786dff6..ca6210f69298 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -16,6 +16,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+#include "AMDGPU.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index e90487065992..01ac9968181a 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -29,6 +29,39 @@ using namespace llvm;
#define DEBUG_TYPE "AMDGPUtti"
+static cl::opt<unsigned> UnrollThresholdPrivate(
+ "amdgpu-unroll-threshold-private",
+ cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"),
+ cl::init(2500), cl::Hidden);
+
+static cl::opt<unsigned> UnrollThresholdLocal(
+ "amdgpu-unroll-threshold-local",
+ cl::desc("Unroll threshold for AMDGPU if local memory used in a loop"),
+ cl::init(1000), cl::Hidden);
+
+static cl::opt<unsigned> UnrollThresholdIf(
+ "amdgpu-unroll-threshold-if",
+ cl::desc("Unroll threshold increment for AMDGPU for each if statement inside loop"),
+ cl::init(150), cl::Hidden);
+
+static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
+ unsigned Depth = 0) {
+ const Instruction *I = dyn_cast<Instruction>(Cond);
+ if (!I)
+ return false;
+
+ for (const Value *V : I->operand_values()) {
+ if (!L->contains(I))
+ continue;
+ if (const PHINode *PHI = dyn_cast<PHINode>(V)) {
+ if (none_of(L->getSubLoops(), [PHI](const Loop* SubLoop) {
+ return SubLoop->contains(PHI); }))
+ return true;
+ } else if (Depth < 10 && dependsOnLocalPhi(L, V, Depth+1))
+ return true;
+ }
+ return false;
+}
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
@@ -38,29 +71,115 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
// TODO: Do we want runtime unrolling?
+ // Maximum alloca size than can fit registers. Reserve 16 registers.
+ const unsigned MaxAlloca = (256 - 16) * 4;
+ unsigned ThresholdPrivate = UnrollThresholdPrivate;
+ unsigned ThresholdLocal = UnrollThresholdLocal;
+ unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
+ AMDGPUAS ASST = ST->getAMDGPUAS();
for (const BasicBlock *BB : L->getBlocks()) {
const DataLayout &DL = BB->getModule()->getDataLayout();
+ unsigned LocalGEPsSeen = 0;
+
+ if (any_of(L->getSubLoops(), [BB](const Loop* SubLoop) {
+ return SubLoop->contains(BB); }))
+ continue; // Block belongs to an inner loop.
+
for (const Instruction &I : *BB) {
+
+ // Unroll a loop which contains an "if" statement whose condition
+ // defined by a PHI belonging to the loop. This may help to eliminate
+ // if region and potentially even PHI itself, saving on both divergence
+ // and registers used for the PHI.
+ // Add a small bonus for each of such "if" statements.
+ if (const BranchInst *Br = dyn_cast<BranchInst>(&I)) {
+ if (UP.Threshold < MaxBoost && Br->isConditional()) {
+ if (L->isLoopExiting(Br->getSuccessor(0)) ||
+ L->isLoopExiting(Br->getSuccessor(1)))
+ continue;
+ if (dependsOnLocalPhi(L, Br->getCondition())) {
+ UP.Threshold += UnrollThresholdIf;
+ DEBUG(dbgs() << "Set unroll threshold " << UP.Threshold
+ << " for loop:\n" << *L << " due to " << *Br << '\n');
+ if (UP.Threshold >= MaxBoost)
+ return;
+ }
+ }
+ continue;
+ }
+
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I);
- if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ if (!GEP)
+ continue;
+
+ unsigned AS = GEP->getAddressSpace();
+ unsigned Threshold = 0;
+ if (AS == ASST.PRIVATE_ADDRESS)
+ Threshold = ThresholdPrivate;
+ else if (AS == ASST.LOCAL_ADDRESS)
+ Threshold = ThresholdLocal;
+ else
+ continue;
+
+ if (UP.Threshold >= Threshold)
continue;
- const Value *Ptr = GEP->getPointerOperand();
- const AllocaInst *Alloca =
- dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
- if (Alloca) {
- // We want to do whatever we can to limit the number of alloca
- // instructions that make it through to the code generator. allocas
- // require us to use indirect addressing, which is slow and prone to
- // compiler bugs. If this loop does an address calculation on an
- // alloca ptr, then we want to use a higher than normal loop unroll
- // threshold. This will give SROA a better chance to eliminate these
- // allocas.
- //
- // Don't use the maximum allowed value here as it will make some
- // programs way too big.
- UP.Threshold = 800;
+ if (AS == ASST.PRIVATE_ADDRESS) {
+ const Value *Ptr = GEP->getPointerOperand();
+ const AllocaInst *Alloca =
+ dyn_cast<AllocaInst>(GetUnderlyingObject(Ptr, DL));
+ if (!Alloca || !Alloca->isStaticAlloca())
+ continue;
+ Type *Ty = Alloca->getAllocatedType();
+ unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
+ if (AllocaSize > MaxAlloca)
+ continue;
+ } else if (AS == ASST.LOCAL_ADDRESS) {
+ LocalGEPsSeen++;
+ // Inhibit unroll for local memory if we have seen addressing not to
+ // a variable, most likely we will be unable to combine it.
+ // Do not unroll too deep inner loops for local memory to give a chance
+ // to unroll an outer loop for a more important reason.
+ if (LocalGEPsSeen > 1 || L->getLoopDepth() > 2 ||
+ (!isa<GlobalVariable>(GEP->getPointerOperand()) &&
+ !isa<Argument>(GEP->getPointerOperand())))
+ continue;
}
+
+ // Check if GEP depends on a value defined by this loop itself.
+ bool HasLoopDef = false;
+ for (const Value *Op : GEP->operands()) {
+ const Instruction *Inst = dyn_cast<Instruction>(Op);
+ if (!Inst || L->isLoopInvariant(Op))
+ continue;
+
+ if (any_of(L->getSubLoops(), [Inst](const Loop* SubLoop) {
+ return SubLoop->contains(Inst); }))
+ continue;
+ HasLoopDef = true;
+ break;
+ }
+ if (!HasLoopDef)
+ continue;
+
+ // We want to do whatever we can to limit the number of alloca
+ // instructions that make it through to the code generator. allocas
+ // require us to use indirect addressing, which is slow and prone to
+ // compiler bugs. If this loop does an address calculation on an
+ // alloca ptr, then we want to use a higher than normal loop unroll
+ // threshold. This will give SROA a better chance to eliminate these
+ // allocas.
+ //
+ // We also want to have more unrolling for local memory to let ds
+ // instructions with different offsets combine.
+ //
+ // Don't use the maximum allowed value here as it will make some
+ // programs way too big.
+ UP.Threshold = Threshold;
+ DEBUG(dbgs() << "Set unroll threshold " << Threshold << " for loop:\n"
+ << *L << " due to " << *GEP << '\n');
+ if (UP.Threshold >= MaxBoost)
+ return;
}
}
}
@@ -81,28 +200,56 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
}
unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- switch (AddrSpace) {
- case AMDGPUAS::GLOBAL_ADDRESS:
- case AMDGPUAS::CONSTANT_ADDRESS:
- case AMDGPUAS::FLAT_ADDRESS:
+ AMDGPUAS AS = ST->getAMDGPUAS();
+ if (AddrSpace == AS.GLOBAL_ADDRESS ||
+ AddrSpace == AS.CONSTANT_ADDRESS ||
+ AddrSpace == AS.FLAT_ADDRESS)
return 128;
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS:
+ if (AddrSpace == AS.LOCAL_ADDRESS ||
+ AddrSpace == AS.REGION_ADDRESS)
return 64;
- case AMDGPUAS::PRIVATE_ADDRESS:
+ if (AddrSpace == AS.PRIVATE_ADDRESS)
return 8 * ST->getMaxPrivateElementSize();
- default:
- if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
- (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
- AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
- (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
- AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
- return 128;
- llvm_unreachable("unhandled address space");
+
+ if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS &&
+ (AddrSpace == AS.PARAM_D_ADDRESS ||
+ AddrSpace == AS.PARAM_I_ADDRESS ||
+ (AddrSpace >= AS.CONSTANT_BUFFER_0 &&
+ AddrSpace <= AS.CONSTANT_BUFFER_15)))
+ return 128;
+ llvm_unreachable("unhandled address space");
+}
+
+bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ // We allow vectorization of flat stores, even though we may need to decompose
+ // them later if they may access private memory. We don't have enough context
+ // here, and legalization can handle it.
+ if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) {
+ return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) &&
+ ChainSizeInBytes <= ST->getMaxPrivateElementSize();
}
+ return true;
+}
+
+bool AMDGPUTTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ // Disable unrolling if the loop is not vectorized.
+ if (VF == 1)
+ return 1;
+
// Semi-arbitrary large amount.
return 64;
}
@@ -228,16 +375,8 @@ int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
}
}
-static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
- const IntrinsicInst *I) {
+static bool isIntrinsicSourceOfDivergence(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::not_intrinsic:
- // This means we have an intrinsic that isn't defined in
- // IntrinsicsAMDGPU.td
- break;
-
case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::amdgcn_workitem_id_z:
@@ -249,6 +388,8 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
case Intrinsic::r600_read_tidig_x:
case Intrinsic::r600_read_tidig_y:
case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_image_atomic_swap:
case Intrinsic::amdgcn_image_atomic_add:
case Intrinsic::amdgcn_image_atomic_sub:
@@ -274,16 +415,10 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
case Intrinsic::amdgcn_buffer_atomic_xor:
case Intrinsic::amdgcn_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_ps_live:
+ case Intrinsic::amdgcn_ds_swizzle:
return true;
- }
-
- StringRef Name = I->getCalledFunction()->getName();
- switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
default:
return false;
- case AMDGPUIntrinsic::SI_fs_interp:
- case AMDGPUIntrinsic::SI_fs_constant:
- return true;
}
}
@@ -295,8 +430,8 @@ static bool isArgPassedInSGPR(const Argument *A) {
return true;
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
- if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) ||
- F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal))
+ if (F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
+ F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal))
return true;
// Everything else is in VGPRs.
@@ -318,7 +453,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
if (const LoadInst *Load = dyn_cast<LoadInst>(V))
- return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+ return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS;
// Atomics are divergent because they are executed sequentially: when an
// atomic operation refers to the same address in each thread, then each
@@ -327,10 +462,8 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
if (isa<AtomicRMWInst>(V) || isa<AtomicCmpXchgInst>(V))
return true;
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
- const TargetMachine &TM = getTLI()->getTargetMachine();
- return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic);
- }
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
+ return isIntrinsicSourceOfDivergence(Intrinsic);
// Assume all function calls are a source of divergence.
if (isa<CallInst>(V) || isa<InvokeInst>(V))
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 0d83b2a585bf..71d6306bc1a5 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -32,6 +32,7 @@ class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
const AMDGPUSubtarget *ST;
const AMDGPUTargetLowering *TLI;
+ bool IsGraphicsShader;
const AMDGPUSubtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
@@ -62,7 +63,8 @@ public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
ST(TM->getSubtargetImpl(F)),
- TLI(ST->getTargetLowering()) {}
+ TLI(ST->getTargetLowering()),
+ IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
bool hasBranchDivergence() { return true; }
@@ -76,6 +78,17 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ unsigned Alignment,
+ unsigned AddrSpace) const;
+
unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost(
@@ -91,6 +104,15 @@ public:
int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
bool isSourceOfDivergence(const Value *V) const;
+ unsigned getFlatAddressSpace() const {
+ // Don't bother running InferAddressSpaces pass on graphics shaders which
+ // don't use flat addressing.
+ if (IsGraphicsShader)
+ return -1;
+ return ST->hasFlatAddressSpace() ?
+ ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE;
+ }
+
unsigned getVectorSplitCost() { return 0; }
};
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
new file mode 100644
index 000000000000..309913f87fb6
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -0,0 +1,225 @@
+//===- AMDGPUUnifyDivergentExitNodes.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a variant of the UnifyDivergentExitNodes pass. Rather than ensuring
+// there is at most one ret and one unreachable instruction, it ensures there is
+// at most one divergent exiting block.
+//
+// StructurizeCFG can't deal with multi-exit regions formed by branches to
+// multiple return nodes. It is not desirable to structurize regions with
+// uniform branches, so unifying those to the same return block as divergent
+// branches inhibits use of scalar branching. It still can't deal with the case
+// where one branch goes to return, and one unreachable. Replace unreachable in
+// this case with a return.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes"
+
+namespace {
+
+class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid
+ AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
+ initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry());
+ }
+
+ // We can preserve non-critical-edgeness when we unify function exit nodes
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
+
+}
+
+char AMDGPUUnifyDivergentExitNodes::ID = 0;
+INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
+ "Unify divergent function exit nodes", false, false)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
+ "Unify divergent function exit nodes", false, false)
+
+char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
+
+void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+ // TODO: Preserve dominator tree.
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+
+ AU.addRequired<DivergenceAnalysis>();
+
+ // No divergent values are changed, only blocks and branch edges.
+ AU.addPreserved<DivergenceAnalysis>();
+
+ // We preserve the non-critical-edgeness property
+ AU.addPreservedID(BreakCriticalEdgesID);
+
+ // This is a cluster of orthogonal Transforms
+ AU.addPreservedID(LowerSwitchID);
+ FunctionPass::getAnalysisUsage(AU);
+
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+}
+
+/// \returns true if \p BB is reachable through only uniform branches.
+/// XXX - Is there a more efficient way to find this?
+static bool isUniformlyReached(const DivergenceAnalysis &DA,
+ BasicBlock &BB) {
+ SmallVector<BasicBlock *, 8> Stack;
+ SmallPtrSet<BasicBlock *, 8> Visited;
+
+ for (BasicBlock *Pred : predecessors(&BB))
+ Stack.push_back(Pred);
+
+ while (!Stack.empty()) {
+ BasicBlock *Top = Stack.pop_back_val();
+ if (!DA.isUniform(Top->getTerminator()))
+ return false;
+
+ for (BasicBlock *Pred : predecessors(Top)) {
+ if (Visited.insert(Pred).second)
+ Stack.push_back(Pred);
+ }
+ }
+
+ return true;
+}
+
+static BasicBlock *unifyReturnBlockSet(Function &F,
+ ArrayRef<BasicBlock *> ReturningBlocks,
+ const TargetTransformInfo &TTI,
+ StringRef Name) {
+ // Otherwise, we need to insert a new basic block into the function, add a PHI
+ // nodes (if the function returns values), and convert all of the return
+ // instructions into unconditional branches.
+ //
+ BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F);
+
+ PHINode *PN = nullptr;
+ if (F.getReturnType()->isVoidTy()) {
+ ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
+ } else {
+ // If the function doesn't return void... add a PHI node to the block...
+ PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
+ "UnifiedRetVal");
+ NewRetBlock->getInstList().push_back(PN);
+ ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+ }
+
+ // Loop over all of the blocks, replacing the return instruction with an
+ // unconditional branch.
+ //
+ for (BasicBlock *BB : ReturningBlocks) {
+ // Add an incoming element to the PHI node for every return instruction that
+ // is merging into this new block...
+ if (PN)
+ PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+ BB->getInstList().pop_back(); // Remove the return insn
+ BranchInst::Create(NewRetBlock, BB);
+ }
+
+ for (BasicBlock *BB : ReturningBlocks) {
+ // Cleanup possible branch to unconditional branch to the return.
+ SimplifyCFG(BB, TTI, 2);
+ }
+
+ return NewRetBlock;
+}
+
+bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
+ auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ if (PDT.getRoots().size() <= 1)
+ return false;
+
+ DivergenceAnalysis &DA = getAnalysis<DivergenceAnalysis>();
+
+ // Loop over all of the blocks in a function, tracking all of the blocks that
+ // return.
+ //
+ SmallVector<BasicBlock *, 4> ReturningBlocks;
+ SmallVector<BasicBlock *, 4> UnreachableBlocks;
+
+ for (BasicBlock *BB : PDT.getRoots()) {
+ if (isa<ReturnInst>(BB->getTerminator())) {
+ if (!isUniformlyReached(DA, *BB))
+ ReturningBlocks.push_back(BB);
+ } else if (isa<UnreachableInst>(BB->getTerminator())) {
+ if (!isUniformlyReached(DA, *BB))
+ UnreachableBlocks.push_back(BB);
+ }
+ }
+
+ if (!UnreachableBlocks.empty()) {
+ BasicBlock *UnreachableBlock = nullptr;
+
+ if (UnreachableBlocks.size() == 1) {
+ UnreachableBlock = UnreachableBlocks.front();
+ } else {
+ UnreachableBlock = BasicBlock::Create(F.getContext(),
+ "UnifiedUnreachableBlock", &F);
+ new UnreachableInst(F.getContext(), UnreachableBlock);
+
+ for (BasicBlock *BB : UnreachableBlocks) {
+ BB->getInstList().pop_back(); // Remove the unreachable inst.
+ BranchInst::Create(UnreachableBlock, BB);
+ }
+ }
+
+ if (!ReturningBlocks.empty()) {
+ // Don't create a new unreachable inst if we have a return. The
+ // structurizer/annotator can't handle the multiple exits
+
+ Type *RetTy = F.getReturnType();
+ Value *RetVal = RetTy->isVoidTy() ? nullptr : UndefValue::get(RetTy);
+ UnreachableBlock->getInstList().pop_back(); // Remove the unreachable inst.
+
+ Function *UnreachableIntrin =
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::amdgcn_unreachable);
+
+ // Insert a call to an intrinsic tracking that this is an unreachable
+ // point, in case we want to kill the active lanes or something later.
+ CallInst::Create(UnreachableIntrin, {}, "", UnreachableBlock);
+
+ // Don't create a scalar trap. We would only want to trap if this code was
+ // really reached, but a scalar trap would happen even if no lanes
+ // actually reached here.
+ ReturnInst::Create(F.getContext(), RetVal, UnreachableBlock);
+ ReturningBlocks.push_back(UnreachableBlock);
+ }
+ }
+
+ // Now handle return blocks.
+ if (ReturningBlocks.empty())
+ return false; // No blocks return
+
+ if (ReturningBlocks.size() == 1)
+ return false; // Already has a single return block
+
+ const TargetTransformInfo &TTI
+ = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ unifyReturnBlockSet(F, ReturningBlocks, TTI, "UnifiedReturnBlock");
+ return true;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
index bf501a1e8405..3a0c3ede08f4 100644
--- a/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
+++ b/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
@@ -13,38 +13,39 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
namespace {
+
namespace kOCLMD {
+
const char SpirVer[] = "opencl.spir.version";
const char OCLVer[] = "opencl.ocl.version";
const char UsedExt[] = "opencl.used.extensions";
const char UsedOptCoreFeat[] = "opencl.used.optional.core.features";
const char CompilerOptions[] = "opencl.compiler.options";
const char LLVMIdent[] = "llvm.ident";
- }
+
+ } // end namespace kOCLMD
/// \brief Unify multiple OpenCL metadata due to linking.
- class AMDGPUUnifyMetadata : public FunctionPass {
+ class AMDGPUUnifyMetadata : public ModulePass {
public:
static char ID;
- explicit AMDGPUUnifyMetadata() : FunctionPass(ID) {};
+ explicit AMDGPUUnifyMetadata() : ModulePass(ID) {};
private:
- // This should really be a module pass but we have to run it as early
- // as possible, so given function passes are executed first and
- // TargetMachine::addEarlyAsPossiblePasses() expects only function passes
- // it has to be a function pass.
virtual bool runOnModule(Module &M);
- // \todo: Convert to a module pass.
- virtual bool runOnFunction(Function &F);
-
/// \brief Unify version metadata.
/// \return true if changes are made.
/// Assume the named metadata has operands each of which is a pair of
@@ -117,7 +118,7 @@ INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata",
"Unify multiple OpenCL metadata due to linking",
false, false)
-FunctionPass* llvm::createAMDGPUUnifyMetadataPass() {
+ModulePass* llvm::createAMDGPUUnifyMetadataPass() {
return new AMDGPUUnifyMetadata();
}
@@ -143,7 +144,3 @@ bool AMDGPUUnifyMetadata::runOnModule(Module &M) {
return Changed;
}
-
-bool AMDGPUUnifyMetadata::runOnFunction(Function &F) {
- return runOnModule(*F.getParent());
-}
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 7faeccdc5df3..1a393845a822 100644
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -9,27 +9,40 @@
//==-----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
+#include "R600RegisterInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstddef>
#include <deque>
+#include <iterator>
+#include <map>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -53,15 +66,19 @@ STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
namespace llvm {
+
void initializeAMDGPUCFGStructurizerPass(PassRegistry&);
-}
+
+} // end namespace llvm
+
+namespace {
//===----------------------------------------------------------------------===//
//
// Miscellaneous utility for CFGStructurizer.
//
//===----------------------------------------------------------------------===//
-namespace {
+
#define SHOWNEWINSTR(i) \
DEBUG(dbgs() << "New instr: " << *i << "\n");
@@ -82,35 +99,19 @@ DEBUG( \
#define INVALIDSCCNUM -1
-template<class NodeT>
-void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
- size_t sz = Src.size();
- for (size_t i = 0; i < sz/2; ++i) {
- NodeT *t = Src[i];
- Src[i] = Src[sz - i - 1];
- Src[sz - i - 1] = t;
- }
-}
-
-} // end anonymous namespace
-
//===----------------------------------------------------------------------===//
//
// supporting data structure for CFGStructurizer
//
//===----------------------------------------------------------------------===//
-
-namespace {
-
class BlockInformation {
public:
- bool IsRetired;
- int SccNum;
- BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
-};
+ bool IsRetired = false;
+ int SccNum = INVALIDSCCNUM;
-} // end anonymous namespace
+ BlockInformation() = default;
+};
//===----------------------------------------------------------------------===//
//
@@ -118,7 +119,6 @@ public:
//
//===----------------------------------------------------------------------===//
-namespace {
class AMDGPUCFGStructurizer : public MachineFunctionPass {
public:
typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
@@ -133,8 +133,7 @@ public:
static char ID;
- AMDGPUCFGStructurizer() :
- MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
+ AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
}
@@ -167,7 +166,7 @@ public:
MLI = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
MDT = &getAnalysis<MachineDominatorTree>();
- DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
+ DEBUG(MDT->print(dbgs(), (const Module*)nullptr););
PDT = &getAnalysis<MachinePostDominatorTree>();
DEBUG(PDT->print(dbgs()););
prepare();
@@ -180,8 +179,8 @@ protected:
MachineDominatorTree *MDT;
MachinePostDominatorTree *PDT;
MachineLoopInfo *MLI;
- const R600InstrInfo *TII;
- const R600RegisterInfo *TRI;
+ const R600InstrInfo *TII = nullptr;
+ const R600RegisterInfo *TRI = nullptr;
// PRINT FUNCTIONS
/// Print the ordered Blocks.
@@ -198,6 +197,7 @@ protected:
}
}
}
+
static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
for (MachineLoop::iterator iter = LoopInfo.begin(),
iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
@@ -263,7 +263,6 @@ protected:
MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
static void wrapup(MachineBasicBlock *MBB);
-
int patternMatch(MachineBasicBlock *MBB);
int patternMatchGroup(MachineBasicBlock *MBB);
int serialPatternMatch(MachineBasicBlock *MBB);
@@ -328,7 +327,6 @@ protected:
void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
void retireBlock(MachineBasicBlock *MBB);
-
private:
MBBInfoMap BlockInfoMap;
LoopLandInfoMap LLInfoMap;
@@ -337,6 +335,10 @@ private:
SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
};
+char AMDGPUCFGStructurizer::ID = 0;
+
+} // end anonymous namespace
+
int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
if (It == BlockInfoMap.end())
@@ -379,6 +381,7 @@ bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
}
return false;
}
+
AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
bool AllowSideEntry) const {
@@ -697,10 +700,8 @@ void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
// (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
// there isn't such an interface yet. alternatively, replace all the other
// blocks in the jump table with the entryBlk //}
-
}
-
bool AMDGPUCFGStructurizer::prepare() {
bool Changed = false;
@@ -748,7 +749,6 @@ bool AMDGPUCFGStructurizer::prepare() {
}
bool AMDGPUCFGStructurizer::run() {
-
//Assume reducible CFG...
DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
@@ -886,8 +886,6 @@ bool AMDGPUCFGStructurizer::run() {
return true;
}
-
-
void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
int SccNum = 0;
MachineBasicBlock *MBB;
@@ -903,11 +901,8 @@ void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
}
}
- //walk through all the block in func to check for unreachable
- typedef GraphTraits<MachineFunction *> GTM;
- auto It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
- for (; It != E; ++It) {
- MachineBasicBlock *MBB = *It;
+ // walk through all the block in func to check for unreachable
+ for (auto *MBB : nodes(MF)) {
SccNum = getSCCNum(MBB);
if (SccNum == INVALIDSCCNUM)
dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
@@ -941,7 +936,6 @@ int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
return NumMatch;
}
-
int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
if (MBB->succ_size() != 1)
return 0;
@@ -1039,7 +1033,7 @@ int AMDGPUCFGStructurizer::loopendPatternMatch() {
for (MachineLoop *ML : depth_first(It))
NestedLoops.push_front(ML);
- if (NestedLoops.size() == 0)
+ if (NestedLoops.empty())
return 0;
// Process nested loop outside->inside (we did push_front),
@@ -1074,13 +1068,9 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
MachineBasicBlock *ExitBlk = *ExitBlks.begin();
assert(ExitBlk && "Loop has several exit block");
MBBVector LatchBlks;
- typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
- InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
- PE = InvMBBTraits::child_end(LoopHeader);
- for (; PI != PE; PI++) {
- if (LoopRep->contains(*PI))
- LatchBlks.push_back(*PI);
- }
+ for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
+ if (LoopRep->contains(LB))
+ LatchBlks.push_back(LB);
for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
@@ -1217,7 +1207,7 @@ void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
}
}
- dbgs() << "\n";
+ dbgs() << "\n";
}
int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
@@ -1478,7 +1468,6 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
if (LandMBB && TrueMBB && FalseMBB)
MBB->addSuccessor(LandMBB);
-
}
void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
@@ -1491,7 +1480,6 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
DstBlk->replaceSuccessor(DstBlk, LandMBB);
}
-
void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
MachineBasicBlock *LandMBB) {
DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
@@ -1727,11 +1715,6 @@ void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
&& "can't retire block yet");
}
-char AMDGPUCFGStructurizer::ID = 0;
-
-} // end anonymous namespace
-
-
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
"AMDGPU CFG Structurizer", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 3cf9a1d92469..961f7186f373 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -16,6 +16,7 @@
#include "Utils/AMDGPUAsmUtils.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
@@ -39,15 +40,12 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -56,7 +54,6 @@
#include <map>
#include <memory>
#include <string>
-#include <vector>
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -83,7 +80,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
const AMDGPUAsmParser *AsmParser;
public:
- AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
+ AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
: MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
typedef std::unique_ptr<AMDGPUOperand> Ptr;
@@ -160,7 +157,11 @@ public:
ImmTySendMsg,
ImmTyInterpSlot,
ImmTyInterpAttr,
- ImmTyAttrChan
+ ImmTyAttrChan,
+ ImmTyOpSel,
+ ImmTyOpSelHi,
+ ImmTyNegLo,
+ ImmTyNegHi
};
struct TokOp {
@@ -297,6 +298,10 @@ public:
bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
+ bool isOpSel() const { return isImmTy(ImmTyOpSel); }
+ bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
+ bool isNegLo() const { return isImmTy(ImmTyNegLo); }
+ bool isNegHi() const { return isImmTy(ImmTyNegHi); }
bool isMod() const {
return isClampSI() || isOModSI();
@@ -316,6 +321,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
}
+ bool isSCSrcV2B16() const {
+ return isSCSrcB16();
+ }
+
bool isSCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
}
@@ -328,6 +337,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
}
+ bool isSCSrcV2F16() const {
+ return isSCSrcF16();
+ }
+
bool isSCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
}
@@ -344,6 +357,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::i16);
}
+ bool isSSrcV2B16() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcB16();
+ }
+
bool isSSrcB64() const {
// TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
// See isVSrc64().
@@ -362,6 +380,11 @@ public:
return isSCSrcB16() || isLiteralImm(MVT::f16);
}
+ bool isSSrcV2F16() const {
+ llvm_unreachable("cannot happen");
+ return isSSrcF16();
+ }
+
bool isVCSrcB32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}
@@ -374,6 +397,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
}
+ bool isVCSrcV2B16() const {
+ return isVCSrcB16();
+ }
+
bool isVCSrcF32() const {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
}
@@ -386,6 +413,10 @@ public:
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
}
+ bool isVCSrcV2F16() const {
+ return isVCSrcF16();
+ }
+
bool isVSrcB32() const {
return isVCSrcF32() || isLiteralImm(MVT::i32);
}
@@ -398,6 +429,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::i16);
}
+ bool isVSrcV2B16() const {
+ llvm_unreachable("cannot happen");
+ return isVSrcB16();
+ }
+
bool isVSrcF32() const {
return isVCSrcF32() || isLiteralImm(MVT::f32);
}
@@ -410,6 +446,11 @@ public:
return isVCSrcF16() || isLiteralImm(MVT::f16);
}
+ bool isVSrcV2F16() const {
+ llvm_unreachable("cannot happen");
+ return isVSrcF16();
+ }
+
bool isKImmFP32() const {
return isLiteralImm(MVT::f32);
}
@@ -459,7 +500,7 @@ public:
return Imm.Val;
}
- enum ImmTy getImmTy() const {
+ ImmTy getImmTy() const {
assert(isImm());
return Imm.Type;
}
@@ -501,9 +542,11 @@ public:
return getModifiers().hasIntModifiers();
}
+ uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
+
void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
- void addLiteralImmOperand(MCInst &Inst, int64_t Val) const;
+ void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
template <unsigned Bitwidth>
void addKImmFPOperands(MCInst &Inst, unsigned N) const;
@@ -610,6 +653,10 @@ public:
case ImmTyInterpSlot: OS << "InterpSlot"; break;
case ImmTyInterpAttr: OS << "InterpAttr"; break;
case ImmTyAttrChan: OS << "AttrChan"; break;
+ case ImmTyOpSel: OS << "OpSel"; break;
+ case ImmTyOpSelHi: OS << "OpSelHi"; break;
+ case ImmTyNegLo: OS << "NegLo"; break;
+ case ImmTyNegHi: OS << "NegHi"; break;
}
}
@@ -636,7 +683,7 @@ public:
static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
int64_t Val, SMLoc Loc,
- enum ImmTy Type = ImmTyNone,
+ ImmTy Type = ImmTyNone,
bool IsFPImm = false) {
auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
Op->Imm.Val = Val;
@@ -695,9 +742,9 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
// .amdgpu_hsa_kernel or at EOF.
class KernelScopeInfo {
- int SgprIndexUnusedMin;
- int VgprIndexUnusedMin;
- MCContext *Ctx;
+ int SgprIndexUnusedMin = -1;
+ int VgprIndexUnusedMin = -1;
+ MCContext *Ctx = nullptr;
void usesSgprAt(int i) {
if (i >= SgprIndexUnusedMin) {
@@ -708,6 +755,7 @@ class KernelScopeInfo {
}
}
}
+
void usesVgprAt(int i) {
if (i >= VgprIndexUnusedMin) {
VgprIndexUnusedMin = ++i;
@@ -717,14 +765,16 @@ class KernelScopeInfo {
}
}
}
+
public:
- KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr)
- {}
+ KernelScopeInfo() = default;
+
void initialize(MCContext &Context) {
Ctx = &Context;
usesSgprAt(SgprIndexUnusedMin = -1);
usesVgprAt(VgprIndexUnusedMin = -1);
}
+
void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
switch (RegKind) {
case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
@@ -738,9 +788,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
const MCInstrInfo &MII;
MCAsmParser &Parser;
- unsigned ForcedEncodingSize;
- bool ForcedDPP;
- bool ForcedSDWA;
+ unsigned ForcedEncodingSize = 0;
+ bool ForcedDPP = false;
+ bool ForcedSDWA = false;
KernelScopeInfo KernelScope;
/// @name Auto-generated Match Functions
@@ -756,7 +806,7 @@ private:
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
- bool ParseDirectiveRuntimeMetadata();
+ bool ParseDirectiveCodeObjectMetadata();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
bool ParseSectionDirectiveHSAText();
@@ -767,44 +817,52 @@ private:
bool ParseSectionDirectiveHSADataGlobalAgent();
bool ParseSectionDirectiveHSADataGlobalProgram();
bool ParseSectionDirectiveHSARodataReadonlyAgent();
- bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
- bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex);
- void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);
+ bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
+ RegisterKind RegKind, unsigned Reg1,
+ unsigned RegNum);
+ bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
+ unsigned& RegNum, unsigned& RegWidth,
+ unsigned *DwordRegIndex);
+ void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
+ bool IsAtomic, bool IsAtomicReturn);
+ void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
+ bool IsGdsHardcoded);
public:
enum AMDGPUMatchResultTy {
Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
};
+ typedef std::map<AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
+
AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
const MCInstrInfo &MII,
const MCTargetOptions &Options)
- : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
- ForcedEncodingSize(0),
- ForcedDPP(false),
- ForcedSDWA(false) {
+ : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) {
MCAsmParserExtension::Initialize(Parser);
- if (getSTI().getFeatureBits().none()) {
+ if (getFeatureBits().none()) {
// Set default features.
copySTI().ToggleFeature("SOUTHERN_ISLANDS");
}
- setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+ setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
{
// TODO: make those pre-defined variables read-only.
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
MCContext &Ctx = getContext();
- MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx));
+ MCSymbol *Sym =
+ Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
KernelScope.initialize(getContext());
}
@@ -822,7 +880,7 @@ public:
}
bool hasInv2PiInlineImm() const {
- return getSTI().getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
+ return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
bool hasSGPR102_SGPR103() const {
@@ -844,6 +902,10 @@ public:
return &MII;
}
+ const FeatureBitset &getFeatureBits() const {
+ return getSTI().getFeatureBits();
+ }
+
void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
@@ -871,19 +933,28 @@ public:
//bool ProcessInstruction(MCInst &Inst);
OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
+
OperandMatchResultTy
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
bool (*ConvertResult)(int64_t &) = nullptr);
+
+ OperandMatchResultTy parseOperandArrayWithPrefix(
+ const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ bool (*ConvertResult)(int64_t&) = nullptr);
+
OperandMatchResultTy
parseNamedBit(const char *Name, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
StringRef &Value);
- OperandMatchResultTy parseImm(OperandVector &Operands);
+ bool parseAbsoluteExpr(int64_t &Val, bool AbsMod = false);
+ OperandMatchResultTy parseImm(OperandVector &Operands, bool AbsMod = false);
OperandMatchResultTy parseReg(OperandVector &Operands);
- OperandMatchResultTy parseRegOrImm(OperandVector &Operands);
+ OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false);
OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
@@ -891,7 +962,8 @@ public:
OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
- void cvtDS(MCInst &Inst, const OperandVector &Operands);
+ void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
+ void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
void cvtExp(MCInst &Inst, const OperandVector &Operands);
bool parseCnt(int64_t &IntVal);
@@ -911,6 +983,12 @@ private:
void errorExpTgt();
OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
+ bool validateOperandLimitations(const MCInst &Inst);
+ bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
+ bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
+ unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
+ bool isSGPR(unsigned Reg);
+
public:
OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
@@ -940,7 +1018,13 @@ public:
void cvtId(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
+
+ void cvtVOP3Impl(MCInst &Inst,
+ const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands);
+ void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
@@ -988,6 +1072,30 @@ static const fltSemantics *getFltSemantics(MVT VT) {
return getFltSemantics(VT.getSizeInBits() / 8);
}
+static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
+ switch (OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ return &APFloat::IEEEsingle();
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ return &APFloat::IEEEdouble();
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ return &APFloat::IEEEhalf();
+ default:
+ llvm_unreachable("unsupported fp type");
+ }
+}
+
//===----------------------------------------------------------------------===//
// Operand
//===----------------------------------------------------------------------===//
@@ -1031,13 +1139,18 @@ bool AMDGPUOperand::isInlinableImm(MVT type) const {
if (!canLosslesslyConvertToFPType(FPLiteral, type))
return false;
+ if (type.getScalarSizeInBits() == 16) {
+ return AMDGPU::isInlinableLiteral16(
+ static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
+ AsmParser->hasInv2PiInlineImm());
+ }
+
// Check if single precision literal is inlinable
return AMDGPU::isInlinableLiteral32(
static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
AsmParser->hasInv2PiInlineImm());
}
-
// We got int literal token.
if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
return AMDGPU::isInlinableLiteral64(Imm.Val,
@@ -1064,6 +1177,13 @@ bool AMDGPUOperand::isLiteralImm(MVT type) const {
if (!Imm.IsFPImm) {
// We got int literal token.
+ if (type == MVT::f64 && hasFPModifiers()) {
+ // Cannot apply fp modifiers to int literals preserving the same semantics
+ // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
+ // disable these cases.
+ return false;
+ }
+
unsigned Size = type.getSizeInBits();
if (Size == 64)
Size = 32;
@@ -1093,40 +1213,57 @@ bool AMDGPUOperand::isRegClass(unsigned RCID) const {
return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
}
-void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
- int64_t Val = Imm.Val;
- if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) {
- // Apply modifiers to immediate value. Only negate can get here
- if (Imm.IsFPImm) {
- APFloat F(BitsToDouble(Val));
- F.changeSign();
- Val = F.bitcastToAPInt().getZExtValue();
- } else {
- Val = -Val;
- }
+uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
+{
+ assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
+ assert(Size == 2 || Size == 4 || Size == 8);
+
+ const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
+
+ if (Imm.Mods.Abs) {
+ Val &= ~FpSignMask;
}
+ if (Imm.Mods.Neg) {
+ Val ^= FpSignMask;
+ }
+
+ return Val;
+}
+
+void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
Inst.getNumOperands())) {
- addLiteralImmOperand(Inst, Val);
+ addLiteralImmOperand(Inst, Imm.Val,
+ ApplyModifiers &
+ isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
} else {
- Inst.addOperand(MCOperand::createImm(Val));
+ assert(!isImmTy(ImmTyNone) || !hasModifiers());
+ Inst.addOperand(MCOperand::createImm(Imm.Val));
}
}
-void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
+void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
auto OpNum = Inst.getNumOperands();
// Check that this operand accepts literals
assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
- auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
+ if (ApplyModifiers) {
+ assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
+ const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
+ Val = applyInputFPModifiers(Val, Size);
+ }
+
+ APInt Literal(64, Val);
+ uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
if (Imm.IsFPImm) { // We got fp literal token
- APInt Literal(64, Val);
-
- switch (OpSize) {
- case 8: {
+ switch (OpTy) {
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
@@ -1151,16 +1288,31 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// in predicate methods (isLiteralImm())
llvm_unreachable("fp literal in 64-bit integer instruction.");
}
- case 4:
- case 2: {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
- FPLiteral.convert(*getFltSemantics(OpSize),
+ FPLiteral.convert(*getOpFltSemantics(OpTy),
APFloat::rmNearestTiesToEven, &lost);
// We allow precision lost but not overflow or underflow. This should be
// checked earlier in isLiteralImm()
- Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+
+ uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
+ if (OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
+ OpTy == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ ImmVal |= (ImmVal << 16);
+ }
+
+ Inst.addOperand(MCOperand::createImm(ImmVal));
return;
}
default:
@@ -1173,8 +1325,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
// We got int literal token.
// Only sign extend inline immediates.
// FIXME: No errors on truncation
- switch (OpSize) {
- case 4: {
+ switch (OpTy) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
if (isInt<32>(Val) &&
AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -1185,9 +1340,11 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
return;
}
- case 8: {
- if (AMDGPU::isInlinableLiteral64(Val,
- AsmParser->hasInv2PiInlineImm())) {
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
+ if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
Inst.addOperand(MCOperand::createImm(Val));
return;
}
@@ -1195,7 +1352,10 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
return;
}
- case 2: {
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
if (isInt<16>(Val) &&
AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
AsmParser->hasInv2PiInlineImm())) {
@@ -1206,6 +1366,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
Inst.addOperand(MCOperand::createImm(Val & 0xffff));
return;
}
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ auto LiteralVal = static_cast<uint16_t>(Literal.getLoBits(16).getZExtValue());
+ assert(AMDGPU::isInlinableLiteral16(LiteralVal,
+ AsmParser->hasInv2PiInlineImm()));
+
+ uint32_t ImmVal = static_cast<uint32_t>(LiteralVal) << 16 |
+ static_cast<uint32_t>(LiteralVal);
+ Inst.addOperand(MCOperand::createImm(ImmVal));
+ return;
+ }
default:
llvm_unreachable("invalid operand size");
}
@@ -1289,7 +1460,8 @@ static unsigned getSpecialRegForName(StringRef RegName) {
.Default(0);
}
-bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
auto R = parseRegister();
if (!R) return true;
assert(R->isReg());
@@ -1299,20 +1471,43 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
return false;
}
-bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum)
-{
+bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
+ RegisterKind RegKind, unsigned Reg1,
+ unsigned RegNum) {
switch (RegKind) {
case IS_SPECIAL:
- if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { Reg = AMDGPU::EXEC; RegWidth = 2; return true; }
- if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { Reg = AMDGPU::FLAT_SCR; RegWidth = 2; return true; }
- if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { Reg = AMDGPU::VCC; RegWidth = 2; return true; }
- if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { Reg = AMDGPU::TBA; RegWidth = 2; return true; }
- if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { Reg = AMDGPU::TMA; RegWidth = 2; return true; }
+ if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
+ Reg = AMDGPU::EXEC;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
+ Reg = AMDGPU::FLAT_SCR;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
+ Reg = AMDGPU::VCC;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
+ Reg = AMDGPU::TBA;
+ RegWidth = 2;
+ return true;
+ }
+ if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
+ Reg = AMDGPU::TMA;
+ RegWidth = 2;
+ return true;
+ }
return false;
case IS_VGPR:
case IS_SGPR:
case IS_TTMP:
- if (Reg1 != Reg + RegWidth) { return false; }
+ if (Reg1 != Reg + RegWidth) {
+ return false;
+ }
RegWidth++;
return true;
default:
@@ -1320,8 +1515,9 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R
}
}
-bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex)
-{
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
+ unsigned &RegNum, unsigned &RegWidth,
+ unsigned *DwordRegIndex) {
if (DwordRegIndex) { *DwordRegIndex = 0; }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
if (getLexer().is(AsmToken::Identifier)) {
@@ -1462,8 +1658,33 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}
+bool
+AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool AbsMod) {
+ if (AbsMod && getLexer().peekTok().is(AsmToken::Pipe) &&
+ (getLexer().getKind() == AsmToken::Integer ||
+ getLexer().getKind() == AsmToken::Real)) {
+
+ // This is a workaround for handling operands like these:
+ // |1.0|
+ // |-1|
+ // This syntax is not compatible with syntax of standard
+ // MC expressions (due to the trailing '|').
+
+ SMLoc EndLoc;
+ const MCExpr *Expr;
+
+ if (getParser().parsePrimaryExpr(Expr, EndLoc)) {
+ return true;
+ }
+
+ return !Expr->evaluateAsAbsolute(Val);
+ }
+
+ return getParser().parseAbsoluteExpression(Val);
+}
+
OperandMatchResultTy
-AMDGPUAsmParser::parseImm(OperandVector &Operands) {
+AMDGPUAsmParser::parseImm(OperandVector &Operands, bool AbsMod) {
// TODO: add syntactic sugar for 1/(2*PI)
bool Minus = false;
if (getLexer().getKind() == AsmToken::Minus) {
@@ -1475,7 +1696,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
switch(getLexer().getKind()) {
case AsmToken::Integer: {
int64_t IntVal;
- if (getParser().parseAbsoluteExpression(IntVal))
+ if (parseAbsoluteExpr(IntVal, AbsMod))
return MatchOperand_ParseFail;
if (Minus)
IntVal *= -1;
@@ -1484,7 +1705,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
}
case AsmToken::Real: {
int64_t IntVal;
- if (getParser().parseAbsoluteExpression(IntVal))
+ if (parseAbsoluteExpr(IntVal, AbsMod))
return MatchOperand_ParseFail;
APFloat F(BitsToDouble(IntVal));
@@ -1512,8 +1733,8 @@ AMDGPUAsmParser::parseReg(OperandVector &Operands) {
}
OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
- auto res = parseImm(Operands);
+AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) {
+ auto res = parseImm(Operands, AbsMod);
if (res != MatchOperand_NoMatch) {
return res;
}
@@ -1522,18 +1743,50 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
}
OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm) {
- // XXX: During parsing we can't determine if minus sign means
- // negate-modifier or negative immediate value.
- // By default we suppose it is modifier.
- bool Negate = false, Abs = false, Abs2 = false;
+AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
+ bool AllowImm) {
+ bool Negate = false, Negate2 = false, Abs = false, Abs2 = false;
if (getLexer().getKind()== AsmToken::Minus) {
+ const AsmToken NextToken = getLexer().peekTok();
+
+ // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
+ if (NextToken.is(AsmToken::Minus)) {
+ Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier");
+ return MatchOperand_ParseFail;
+ }
+
+ // '-' followed by an integer literal N should be interpreted as integer
+ // negation rather than a floating-point NEG modifier applied to N.
+ // Beside being contr-intuitive, such use of floating-point NEG modifier
+ // results in different meaning of integer literals used with VOP1/2/C
+ // and VOP3, for example:
+ // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
+ // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
+ // Negative fp literals should be handled likewise for unifomtity
+ if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) {
+ Parser.Lex();
+ Negate = true;
+ }
+ }
+
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == "neg") {
+ if (Negate) {
+ Error(Parser.getTok().getLoc(), "expected register or immediate");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+ Negate2 = true;
+ if (getLexer().isNot(AsmToken::LParen)) {
+ Error(Parser.getTok().getLoc(), "expected left paren after neg");
+ return MatchOperand_ParseFail;
+ }
Parser.Lex();
- Negate = true;
}
- if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "abs") {
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == "abs") {
Parser.Lex();
Abs2 = true;
if (getLexer().isNot(AsmToken::LParen)) {
@@ -1554,7 +1807,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
OperandMatchResultTy Res;
if (AllowImm) {
- Res = parseRegOrImm(Operands);
+ Res = parseRegOrImm(Operands, Abs);
} else {
Res = parseReg(Operands);
}
@@ -1563,9 +1816,6 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
}
AMDGPUOperand::Modifiers Mods;
- if (Negate) {
- Mods.Neg = true;
- }
if (Abs) {
if (getLexer().getKind() != AsmToken::Pipe) {
Error(Parser.getTok().getLoc(), "expected vertical bar");
@@ -1583,6 +1833,17 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
Mods.Abs = true;
}
+ if (Negate) {
+ Mods.Neg = true;
+ } else if (Negate2) {
+ if (getLexer().isNot(AsmToken::RParen)) {
+ Error(Parser.getTok().getLoc(), "expected closing parentheses");
+ return MatchOperand_ParseFail;
+ }
+ Parser.Lex();
+ Mods.Neg = true;
+ }
+
if (Mods.hasFPModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
Op.setModifiers(Mods);
@@ -1591,10 +1852,12 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, bool Allo
}
OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm) {
+AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
+ bool AllowImm) {
bool Sext = false;
- if (getLexer().getKind() == AsmToken::Identifier && Parser.getTok().getString() == "sext") {
+ if (getLexer().getKind() == AsmToken::Identifier &&
+ Parser.getTok().getString() == "sext") {
Parser.Lex();
Sext = true;
if (getLexer().isNot(AsmToken::LParen)) {
@@ -1661,7 +1924,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands)
}
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
-
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
@@ -1719,6 +1981,128 @@ ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
return makeArrayRef(Variants);
}
+unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ const unsigned Num = Desc.getNumImplicitUses();
+ for (unsigned i = 0; i < Num; ++i) {
+ unsigned Reg = Desc.ImplicitUses[i];
+ switch (Reg) {
+ case AMDGPU::FLAT_SCR:
+ case AMDGPU::VCC:
+ case AMDGPU::M0:
+ return Reg;
+ default:
+ break;
+ }
+ }
+ return AMDGPU::NoRegister;
+}
+
+bool AMDGPUAsmParser::isSGPR(unsigned Reg) {
+ const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+ const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
+ const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
+ return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
+ Reg == AMDGPU::SCC;
+}
+
+// NB: This code is correct only when used to check constant
+// bus limitations because GFX7 support no f16 inline constants.
+// Note that there are no cases when a GFX7 opcode violates
+// constant bus limitations due to the use of an f16 constant.
+bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
+ unsigned OpIdx) const {
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+
+ if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
+ return false;
+ }
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+
+ int64_t Val = MO.getImm();
+ auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
+
+ switch (OpSize) { // expected operand size
+ case 8:
+ return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
+ case 4:
+ return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
+ case 2: {
+ const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
+ if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
+ OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) {
+ return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
+ } else {
+ return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
+ }
+ }
+ default:
+ llvm_unreachable("invalid operand size");
+ }
+}
+
+bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (MO.isImm()) {
+ return !isInlineConstant(Inst, OpIdx);
+ }
+ return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg()));
+}
+
+bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) {
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &Desc = MII.get(Opcode);
+ unsigned ConstantBusUseCount = 0;
+
+ if (Desc.TSFlags &
+ (SIInstrFlags::VOPC |
+ SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
+ SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) {
+
+ // Check special imm operands (used by madmk, etc)
+ if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
+ ++ConstantBusUseCount;
+ }
+
+ unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
+ if (SGPRUsed != AMDGPU::NoRegister) {
+ ++ConstantBusUseCount;
+ }
+
+ const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
+ const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
+ const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
+
+ const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+
+ for (int OpIdx : OpIndices) {
+ if (OpIdx == -1) break;
+
+ const MCOperand &MO = Inst.getOperand(OpIdx);
+ if (usesConstantBus(Inst, OpIdx)) {
+ if (MO.isReg()) {
+ const unsigned Reg = mc2PseudoReg(MO.getReg());
+ // Pairs of registers with a partial intersections like these
+ // s0, s[0:1]
+ // flat_scratch_lo, flat_scratch
+ // flat_scratch_lo, flat_scratch_hi
+ // are theoretically valid but they are disabled anyway.
+ // Note that this code mimics SIInstrInfo::verifyInstruction
+ if (Reg != SGPRUsed) {
+ ++ConstantBusUseCount;
+ }
+ SGPRUsed = Reg;
+ } else { // Expression or a literal
+ ++ConstantBusUseCount;
+ }
+ }
+ }
+ }
+
+ return ConstantBusUseCount <= 1;
+}
+
bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -1751,6 +2135,10 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (Result) {
default: break;
case Match_Success:
+ if (!validateOperandLimitations(Inst)) {
+ return Error(IDLoc,
+ "invalid operand (violates constant bus restrictions)");
+ }
Inst.setLoc(IDLoc);
Out.EmitInstruction(Inst, getSTI());
return false;
@@ -1793,7 +2181,6 @@ bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
return false;
}
-
bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
uint32_t &Minor) {
if (ParseAsAbsoluteExpression(Major))
@@ -1810,7 +2197,6 @@ bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
}
bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
-
uint32_t Major;
uint32_t Minor;
@@ -1831,9 +2217,10 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
- getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
- Isa.Stepping,
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
+ ISA.Stepping,
"AMD", "AMDGPU");
return false;
}
@@ -1873,42 +2260,45 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return false;
}
-bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
- std::string Metadata;
- raw_string_ostream MS(Metadata);
+bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() {
+ std::string YamlString;
+ raw_string_ostream YamlStream(YamlString);
getLexer().setSkipSpace(false);
bool FoundEnd = false;
while (!getLexer().is(AsmToken::Eof)) {
while (getLexer().is(AsmToken::Space)) {
- MS << ' ';
+ YamlStream << getLexer().getTok().getString();
Lex();
}
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
- if (ID == ".end_amdgpu_runtime_metadata") {
+ if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) {
Lex();
FoundEnd = true;
break;
}
}
- MS << Parser.parseStringToEndOfStatement()
- << getContext().getAsmInfo()->getSeparatorString();
+ YamlStream << Parser.parseStringToEndOfStatement()
+ << getContext().getAsmInfo()->getSeparatorString();
Parser.eatToEndOfStatement();
}
getLexer().setSkipSpace(true);
- if (getLexer().is(AsmToken::Eof) && !FoundEnd)
- return TokError("expected directive .end_amdgpu_runtime_metadata not found");
+ if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
+ return TokError(
+ "expected directive .end_amdgpu_code_object_metadata not found");
+ }
- MS.flush();
+ YamlStream.flush();
- getTargetStreamer().EmitRuntimeMetadata(Metadata);
+ if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString))
+ return Error(getParser().getTok().getLoc(), "invalid code object metadata");
return false;
}
@@ -1926,7 +2316,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -2020,8 +2410,8 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();
- if (IDVal == ".amdgpu_runtime_metadata")
- return ParseDirectiveRuntimeMetadata();
+ if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin)
+ return ParseDirectiveCodeObjectMetadata();
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
@@ -2080,7 +2470,6 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
-
// Try to parse with a custom parser
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -2208,7 +2597,7 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
OperandMatchResultTy
AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy,
+ AMDGPUOperand::ImmTy ImmTy,
bool (*ConvertResult)(int64_t&)) {
SMLoc S = Parser.getTok().getLoc();
int64_t Value = 0;
@@ -2225,9 +2614,59 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
return MatchOperand_Success;
}
+OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix(
+ const char *Prefix,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy,
+ bool (*ConvertResult)(int64_t&)) {
+ StringRef Name = Parser.getTok().getString();
+ if (!Name.equals(Prefix))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+
+ unsigned Val = 0;
+ SMLoc S = Parser.getTok().getLoc();
+
+ // FIXME: How to verify the number of elements matches the number of src
+ // operands?
+ for (int I = 0; I < 3; ++I) {
+ if (I != 0) {
+ if (getLexer().is(AsmToken::RBrac))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+ }
+
+ if (getLexer().isNot(AsmToken::Integer))
+ return MatchOperand_ParseFail;
+
+ int64_t Op;
+ if (getParser().parseAbsoluteExpression(Op))
+ return MatchOperand_ParseFail;
+
+ if (Op != 0 && Op != 1)
+ return MatchOperand_ParseFail;
+ Val |= (Op << I);
+ }
+
+ Parser.Lex();
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
- enum AMDGPUOperand::ImmTy ImmTy) {
+ AMDGPUOperand::ImmTy ImmTy) {
int64_t Bit = 0;
SMLoc S = Parser.getTok().getLoc();
@@ -2257,11 +2696,11 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
return MatchOperand_Success;
}
-typedef std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalImmIndexMap;
-
-void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
- OptionalImmIndexMap& OptionalIdx,
- enum AMDGPUOperand::ImmTy ImmT, int64_t Default = 0) {
+static void addOptionalImmOperand(
+ MCInst& Inst, const OperandVector& Operands,
+ AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
+ AMDGPUOperand::ImmTy ImmT,
+ int64_t Default = 0) {
auto i = OptionalIdx.find(ImmT);
if (i != OptionalIdx.end()) {
unsigned Idx = i->second;
@@ -2323,9 +2762,9 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
}
-void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
- std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
- bool GDSOnly = false;
+void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
+ bool IsGdsHardcoded) {
+ OptionalImmIndexMap OptionalIdx;
for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
@@ -2337,7 +2776,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
}
if (Op.isToken() && Op.getToken() == "gds") {
- GDSOnly = true;
+ IsGdsHardcoded = true;
continue;
}
@@ -2346,9 +2785,7 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
}
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
-
- if (!GDSOnly) {
+ if (!IsGdsHardcoded) {
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
}
Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
@@ -2421,13 +2858,14 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
Parser.Lex();
- IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
if (CntName == "vmcnt")
- IntVal = encodeVmcnt(IV, IntVal, CntVal);
+ IntVal = encodeVmcnt(ISA, IntVal, CntVal);
else if (CntName == "expcnt")
- IntVal = encodeExpcnt(IV, IntVal, CntVal);
+ IntVal = encodeExpcnt(ISA, IntVal, CntVal);
else if (CntName == "lgkmcnt")
- IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
+ IntVal = encodeLgkmcnt(ISA, IntVal, CntVal);
else
return true;
@@ -2436,8 +2874,9 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
- int64_t Waitcnt = getWaitcntBitMask(IV);
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
@@ -2459,7 +2898,8 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
return MatchOperand_Success;
}
-bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width) {
+bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
+ int64_t &Width) {
using namespace llvm::AMDGPU::Hwreg;
if (Parser.getTok().getString() != "hwreg")
@@ -2520,8 +2960,7 @@ bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
return false;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
+OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
using namespace llvm::AMDGPU::Hwreg;
int64_t Imm16Val = 0;
@@ -3170,6 +3609,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
{"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
{"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
{"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
+ {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
+ {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
+ {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
+ {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}
};
OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
@@ -3186,6 +3629,12 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operan
res = parseSDWASel(Operands, Op.Name, Op.Type);
} else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
res = parseSDWADstUnused(Operands);
+ } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
+ Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
+ Op.Type == AMDGPUOperand::ImmTyNegLo ||
+ Op.Type == AMDGPUOperand::ImmTyNegHi) {
+ res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
+ Op.ConvertResult);
} else {
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
}
@@ -3241,8 +3690,8 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
&& Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
}
-void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
+void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands,
+ OptionalImmIndexMap &OptionalIdx) {
unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
@@ -3253,12 +3702,20 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
- } else if (Op.isImm()) {
+ } else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
+ } else if (Op.isRegOrImm()) {
+ Op.addRegOrImmOperands(Inst, 1);
} else {
llvm_unreachable("unhandled operand type");
}
}
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ cvtVOP3Impl(Inst, Operands, OptionalIdx);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
@@ -3283,6 +3740,96 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
}
}
+void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptionalIdx;
+
+ unsigned I = 1;
+ const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+ for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
+ ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
+ }
+
+ for (unsigned E = Operands.size(); I != E; ++I) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
+ if (Op.isMod()) {
+ OptionalIdx[Op.getImmTy()] = I;
+ } else {
+ Op.addRegOrImmOperands(Inst, 1);
+ }
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+}
+
+void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
+ OptionalImmIndexMap OptIdx;
+
+ cvtVOP3Impl(Inst, Operands, OptIdx);
+
+ // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
+ // instruction, and then figure out where to actually put the modifiers
+ int Opc = Inst.getOpcode();
+
+ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
+ }
+
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1);
+
+ int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
+ if (NegLoIdx != -1) {
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
+ addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
+ }
+
+ const int Ops[] = { AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ AMDGPU::OpName::src2 };
+ const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers };
+
+ int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
+ int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
+
+ unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
+ unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
+ unsigned NegLo = 0;
+ unsigned NegHi = 0;
+
+ if (NegLoIdx != -1) {
+ int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
+ NegLo = Inst.getOperand(NegLoIdx).getImm();
+ NegHi = Inst.getOperand(NegHiIdx).getImm();
+ }
+
+ for (int J = 0; J < 3; ++J) {
+ int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
+ if (OpIdx == -1)
+ break;
+
+ uint32_t ModVal = 0;
+
+ if ((OpSel & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_0;
+
+ if ((OpSelHi & (1 << J)) != 0)
+ ModVal |= SISrcMods::OP_SEL_1;
+
+ if ((NegLo & (1 << J)) != 0)
+ ModVal |= SISrcMods::NEG;
+
+ if ((NegHi & (1 << J)) != 0)
+ ModVal |= SISrcMods::NEG_HI;
+
+ int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
+
+ Inst.getOperand(ModIdx).setImm(ModVal);
+ }
+}
+
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
@@ -3436,7 +3983,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
- // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token.
+ // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
// Skip it.
continue;
} if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
@@ -3547,6 +4094,7 @@ void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
uint64_t BasicInstType) {
+ using namespace llvm::AMDGPU::SDWA;
OptionalImmIndexMap OptionalIdx;
unsigned I = 1;
@@ -3581,21 +4129,21 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
// V_NOP_sdwa_vi has no optional sdwa arguments
switch (BasicInstType) {
case SIInstrFlags::VOP1:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOP2:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOPC:
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
break;
default:
diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td
index 45a7fe6d3439..a6609f0725ab 100644
--- a/lib/Target/AMDGPU/BUFInstructions.td
+++ b/lib/Target/AMDGPU/BUFInstructions.td
@@ -21,8 +21,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset"
class MubufLoad <SDPatternOperator op> : PatFrag <
(ops node:$ptr), (op node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+ return AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
def mubuf_load : MubufLoad <load>;
@@ -705,12 +705,6 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
let Predicates = [isGCN] in {
-// int_SI_vs_load_input
-def : Pat<
- (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
- (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, (i32 0), imm:$attr_offset, 0, 0, 0)
->;
-
// Offset in an 32-bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 02d441756c85..7c0ef4aeac3c 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -12,11 +12,17 @@ tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
+if(LLVM_BUILD_GLOBAL_ISEL)
+ tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank)
+endif()
add_public_tablegen_target(AMDGPUCommonTableGen)
# List of all GlobalISel files.
set(GLOBAL_ISEL_FILES
AMDGPUCallLowering.cpp
+ AMDGPUInstructionSelector.cpp
+ AMDGPULegalizerInfo.cpp
+ AMDGPURegisterBankInfo.cpp
)
# Add GlobalISel files to the dependencies if the user wants to build it.
@@ -30,6 +36,7 @@ endif()
add_llvm_target(AMDGPUCodeGen
AMDILCFGStructurizer.cpp
+ AMDGPUAliasAnalysis.cpp
AMDGPUAlwaysInlinePass.cpp
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAnnotateUniformValues.cpp
@@ -39,6 +46,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUTargetObjectFile.cpp
AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
+ AMDGPULowerIntrinsics.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
AMDGPUUnifyMetadata.cpp
@@ -50,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUInstrInfo.cpp
AMDGPUPromoteAlloca.cpp
AMDGPURegisterInfo.cpp
+ AMDGPUUnifyDivergentExitNodes.cpp
GCNHazardRecognizer.cpp
GCNSchedStrategy.cpp
R600ClauseMergePass.cpp
@@ -68,10 +77,12 @@ add_llvm_target(AMDGPUCodeGen
SIDebuggerInsertNops.cpp
SIFixControlFlowLiveIntervals.cpp
SIFixSGPRCopies.cpp
+ SIFixVGPRCopies.cpp
SIFoldOperands.cpp
SIFrameLowering.cpp
SIInsertSkips.cpp
SIInsertWaits.cpp
+ SIInsertWaitcnts.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
SILoadStoreOptimizer.cpp
@@ -80,10 +91,14 @@ add_llvm_target(AMDGPUCodeGen
SIMachineFunctionInfo.cpp
SIMachineScheduler.cpp
SIOptimizeExecMasking.cpp
+ SIPeepholeSDWA.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SITypeRewriter.cpp
SIWholeQuadMode.cpp
+ GCNIterativeScheduler.cpp
+ GCNMinRegStrategy.cpp
+ GCNRegPressure.cpp
${GLOBAL_ISEL_BUILD_FILES}
)
diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td
index a077001df6bd..a9f64589fa5e 100644
--- a/lib/Target/AMDGPU/DSInstructions.td
+++ b/lib/Target/AMDGPU/DSInstructions.td
@@ -88,18 +88,6 @@ class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
let has_vdst = 0;
}
-class DS_1A_Off8_NORET<string opName> : DS_Pseudo<opName,
- (outs),
- (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
- "$addr $offset0$offset1$gds"> {
-
- let has_data0 = 0;
- let has_data1 = 0;
- let has_vdst = 0;
- let has_offset = 0;
- let AsmMatchConverter = "cvtDSOffset01";
-}
-
class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs),
@@ -143,6 +131,20 @@ class DS_1A2D_RET<string opName,
let hasPostISelHook = 1;
}
+class DS_1A2D_Off8_RET<string opName,
+ RegisterClass rc = VGPR_32,
+ RegisterClass src = rc>
+: DS_Pseudo<opName,
+ (outs rc:$vdst),
+ (ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
+ "$vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
+
+ let has_offset = 0;
+ let AsmMatchConverter = "cvtDSOffset01";
+
+ let hasPostISelHook = 1;
+}
+
class DS_1A_RET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs rc:$vdst),
@@ -174,6 +176,7 @@ class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
let has_data1 = 0;
let has_gds = 0;
let gdsValue = 1;
+ let AsmMatchConverter = "cvtDSGds";
}
class DS_0A_RET <string opName> : DS_Pseudo<opName,
@@ -202,20 +205,46 @@ class DS_1A <string opName> : DS_Pseudo<opName,
let has_data1 = 0;
}
-class DS_1A_GDS <string opName> : DS_Pseudo<opName,
- (outs),
- (ins VGPR_32:$addr),
- "$addr gds"> {
+class DS_GWS <string opName, dag ins, string asmOps>
+: DS_Pseudo<opName, (outs), ins, asmOps> {
+
+ let has_vdst = 0;
+ let has_addr = 0;
+ let has_data0 = 0;
+ let has_data1 = 0;
+
+ let has_gds = 0;
+ let gdsValue = 1;
+ let AsmMatchConverter = "cvtDSGds";
+}
+
+class DS_GWS_0D <string opName>
+: DS_GWS<opName,
+ (ins offset:$offset, gds:$gds), "$offset gds">;
- let has_vdst = 0;
- let has_data0 = 0;
- let has_data1 = 0;
- let has_offset = 0;
+class DS_GWS_1D <string opName>
+: DS_GWS<opName,
+ (ins VGPR_32:$data0, offset:$offset, gds:$gds), "$data0$offset gds"> {
+
+ let has_data0 = 1;
+}
+
+class DS_VOID <string opName> : DS_Pseudo<opName,
+ (outs), (ins), ""> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 1;
+ let UseNamedOperandTable = 0;
+ let AsmMatchConverter = "";
+
+ let has_vdst = 0;
+ let has_addr = 0;
+ let has_data0 = 0;
+ let has_data1 = 0;
+ let has_offset = 0;
let has_offset0 = 0;
let has_offset1 = 0;
-
- let has_gds = 0;
- let gdsValue = 1;
+ let has_gds = 0;
}
class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
@@ -226,6 +255,8 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
[(set i32:$vdst,
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
+ let LGKM_CNT = 0;
+
let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
@@ -324,9 +355,9 @@ def DS_MAX_RTN_F32 : DS_1A1D_RET <"ds_max_rtn_f32">,
def DS_WRXCHG_RTN_B32 : DS_1A1D_RET<"ds_wrxchg_rtn_b32">,
AtomicNoRet<"", 1>;
-def DS_WRXCHG2_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
+def DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;
-def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
+def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;
def DS_ADD_RTN_U64 : DS_1A1D_RET<"ds_add_rtn_u64", VReg_64>,
@@ -365,17 +396,17 @@ def DS_MAX_RTN_F64 : DS_1A1D_RET<"ds_max_rtn_f64", VReg_64>,
AtomicNoRet<"ds_max_f64", 1>;
def DS_WRXCHG_RTN_B64 : DS_1A1D_RET<"ds_wrxchg_rtn_b64", VReg_64>,
- AtomicNoRet<"ds_wrxchg_b64", 1>;
-def DS_WRXCHG2_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
- AtomicNoRet<"ds_wrxchg2_b64", 1>;
-def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
- AtomicNoRet<"ds_wrxchg2st64_b64", 1>;
-
-def DS_GWS_INIT : DS_1A_GDS<"ds_gws_init">;
-def DS_GWS_SEMA_V : DS_1A_GDS<"ds_gws_sema_v">;
-def DS_GWS_SEMA_BR : DS_1A_GDS<"ds_gws_sema_br">;
-def DS_GWS_SEMA_P : DS_1A_GDS<"ds_gws_sema_p">;
-def DS_GWS_BARRIER : DS_1A_GDS<"ds_gws_barrier">;
+ AtomicNoRet<"", 1>;
+def DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
+ AtomicNoRet<"", 1>;
+def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
+ AtomicNoRet<"", 1>;
+
+def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">;
+def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
+def DS_GWS_SEMA_BR : DS_GWS_1D<"ds_gws_sema_br">;
+def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">;
+def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">;
def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">;
def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">;
@@ -386,7 +417,7 @@ def DS_MIN_SRC2_I32 : DS_1A<"ds_min_src2_i32">;
def DS_MAX_SRC2_I32 : DS_1A<"ds_max_src2_i32">;
def DS_MIN_SRC2_U32 : DS_1A<"ds_min_src2_u32">;
def DS_MAX_SRC2_U32 : DS_1A<"ds_max_src2_u32">;
-def DS_AND_SRC2_B32 : DS_1A<"ds_and_src_b32">;
+def DS_AND_SRC2_B32 : DS_1A<"ds_and_src2_b32">;
def DS_OR_SRC2_B32 : DS_1A<"ds_or_src2_b32">;
def DS_XOR_SRC2_B32 : DS_1A<"ds_xor_src2_b32">;
def DS_MIN_SRC2_F32 : DS_1A<"ds_min_src2_f32">;
@@ -407,8 +438,8 @@ def DS_XOR_SRC2_B64 : DS_1A<"ds_xor_src2_b64">;
def DS_MIN_SRC2_F64 : DS_1A<"ds_min_src2_f64">;
def DS_MAX_SRC2_F64 : DS_1A<"ds_max_src2_f64">;
-def DS_WRITE_SRC2_B32 : DS_1A_Off8_NORET<"ds_write_src2_b32">;
-def DS_WRITE_SRC2_B64 : DS_1A_Off8_NORET<"ds_write_src2_b64">;
+def DS_WRITE_SRC2_B32 : DS_1A<"ds_write_src2_b32">;
+def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">;
@@ -429,30 +460,34 @@ def DS_READ2_B64 : DS_1A_Off8_RET<"ds_read2_b64", VReg_128>;
def DS_READ2ST64_B64 : DS_1A_Off8_RET<"ds_read2st64_b64", VReg_128>;
}
-let SubtargetPredicate = isSICI in {
def DS_CONSUME : DS_0A_RET<"ds_consume">;
def DS_APPEND : DS_0A_RET<"ds_append">;
def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
-}
//===----------------------------------------------------------------------===//
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
-// Remaining instructions:
-// DS_NOP
-// DS_GWS_SEMA_RELEASE_ALL
-// DS_WRAP_RTN_B32
-// DS_CNDXCHG32_RTN_B64
-// DS_WRITE_B96
-// DS_WRITE_B128
-// DS_CONDXCHG32_RTN_B128
-// DS_READ_B96
-// DS_READ_B128
let SubtargetPredicate = isCIVI in {
-def DS_WRAP_RTN_F32 : DS_1A1D_RET <"ds_wrap_rtn_f32">,
- AtomicNoRet<"ds_wrap_f32", 1>;
+def DS_WRAP_RTN_B32 : DS_1A2D_RET<"ds_wrap_rtn_b32">, AtomicNoRet<"", 1>;
+
+def DS_CONDXCHG32_RTN_B64 : DS_1A1D_RET<"ds_condxchg32_rtn_b64", VReg_64>,
+ AtomicNoRet<"", 1>;
+
+def DS_GWS_SEMA_RELEASE_ALL : DS_GWS_0D<"ds_gws_sema_release_all">;
+
+let mayStore = 0 in {
+def DS_READ_B96 : DS_1A_RET<"ds_read_b96", VReg_96>;
+def DS_READ_B128: DS_1A_RET<"ds_read_b128", VReg_128>;
+} // End mayStore = 0
+
+let mayLoad = 0 in {
+def DS_WRITE_B96 : DS_1A1D_NORET<"ds_write_b96", VReg_96>;
+def DS_WRITE_B128 : DS_1A1D_NORET<"ds_write_b128", VReg_128>;
+} // End mayLoad = 0
+
+def DS_NOP : DS_VOID<"ds_nop">;
} // let SubtargetPredicate = isCIVI
@@ -623,6 +658,7 @@ def DS_CMPST_B32_si : DS_Real_si<0x10, DS_CMPST_B32>;
def DS_CMPST_F32_si : DS_Real_si<0x11, DS_CMPST_F32>;
def DS_MIN_F32_si : DS_Real_si<0x12, DS_MIN_F32>;
def DS_MAX_F32_si : DS_Real_si<0x13, DS_MAX_F32>;
+def DS_NOP_si : DS_Real_si<0x14, DS_NOP>;
def DS_GWS_INIT_si : DS_Real_si<0x19, DS_GWS_INIT>;
def DS_GWS_SEMA_V_si : DS_Real_si<0x1a, DS_GWS_SEMA_V>;
def DS_GWS_SEMA_BR_si : DS_Real_si<0x1b, DS_GWS_SEMA_BR>;
@@ -651,8 +687,10 @@ def DS_CMPST_RTN_F32_si : DS_Real_si<0x31, DS_CMPST_RTN_F32>;
def DS_MIN_RTN_F32_si : DS_Real_si<0x32, DS_MIN_RTN_F32>;
def DS_MAX_RTN_F32_si : DS_Real_si<0x33, DS_MAX_RTN_F32>;
-// FIXME: this instruction is actually CI/VI
-def DS_WRAP_RTN_F32_si : DS_Real_si<0x34, DS_WRAP_RTN_F32>;
+// These instruction are CI/VI only
+def DS_WRAP_RTN_B32_si : DS_Real_si<0x34, DS_WRAP_RTN_B32>;
+def DS_CONDXCHG32_RTN_B64_si : DS_Real_si<0x7e, DS_CONDXCHG32_RTN_B64>;
+def DS_GWS_SEMA_RELEASE_ALL_si : DS_Real_si<0x18, DS_GWS_SEMA_RELEASE_ALL>;
def DS_SWIZZLE_B32_si : DS_Real_si<0x35, DS_SWIZZLE_B32>;
def DS_READ_B32_si : DS_Real_si<0x36, DS_READ_B32>;
@@ -744,6 +782,10 @@ def DS_WRITE_SRC2_B64_si : DS_Real_si<0xcd, DS_WRITE_SRC2_B64>;
def DS_MIN_SRC2_F64_si : DS_Real_si<0xd2, DS_MIN_SRC2_F64>;
def DS_MAX_SRC2_F64_si : DS_Real_si<0xd3, DS_MAX_SRC2_F64>;
+def DS_WRITE_B96_si : DS_Real_si<0xde, DS_WRITE_B96>;
+def DS_WRITE_B128_si : DS_Real_si<0xdf, DS_WRITE_B128>;
+def DS_READ_B96_si : DS_Real_si<0xfe, DS_READ_B96>;
+def DS_READ_B128_si : DS_Real_si<0xff, DS_READ_B128>;
//===----------------------------------------------------------------------===//
// VIInstructions.td
@@ -787,12 +829,13 @@ def DS_CMPST_B32_vi : DS_Real_vi<0x10, DS_CMPST_B32>;
def DS_CMPST_F32_vi : DS_Real_vi<0x11, DS_CMPST_F32>;
def DS_MIN_F32_vi : DS_Real_vi<0x12, DS_MIN_F32>;
def DS_MAX_F32_vi : DS_Real_vi<0x13, DS_MAX_F32>;
+def DS_NOP_vi : DS_Real_vi<0x14, DS_NOP>;
def DS_ADD_F32_vi : DS_Real_vi<0x15, DS_ADD_F32>;
-def DS_GWS_INIT_vi : DS_Real_vi<0x19, DS_GWS_INIT>;
-def DS_GWS_SEMA_V_vi : DS_Real_vi<0x1a, DS_GWS_SEMA_V>;
-def DS_GWS_SEMA_BR_vi : DS_Real_vi<0x1b, DS_GWS_SEMA_BR>;
-def DS_GWS_SEMA_P_vi : DS_Real_vi<0x1c, DS_GWS_SEMA_P>;
-def DS_GWS_BARRIER_vi : DS_Real_vi<0x1d, DS_GWS_BARRIER>;
+def DS_GWS_INIT_vi : DS_Real_vi<0x99, DS_GWS_INIT>;
+def DS_GWS_SEMA_V_vi : DS_Real_vi<0x9a, DS_GWS_SEMA_V>;
+def DS_GWS_SEMA_BR_vi : DS_Real_vi<0x9b, DS_GWS_SEMA_BR>;
+def DS_GWS_SEMA_P_vi : DS_Real_vi<0x9c, DS_GWS_SEMA_P>;
+def DS_GWS_BARRIER_vi : DS_Real_vi<0x9d, DS_GWS_BARRIER>;
def DS_WRITE_B8_vi : DS_Real_vi<0x1e, DS_WRITE_B8>;
def DS_WRITE_B16_vi : DS_Real_vi<0x1f, DS_WRITE_B16>;
def DS_ADD_RTN_U32_vi : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
@@ -815,7 +858,7 @@ def DS_CMPST_RTN_B32_vi : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
def DS_CMPST_RTN_F32_vi : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
def DS_MIN_RTN_F32_vi : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
def DS_MAX_RTN_F32_vi : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
-def DS_WRAP_RTN_F32_vi : DS_Real_vi<0x34, DS_WRAP_RTN_F32>;
+def DS_WRAP_RTN_B32_vi : DS_Real_vi<0x34, DS_WRAP_RTN_B32>;
def DS_ADD_RTN_F32_vi : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
def DS_READ_B32_vi : DS_Real_vi<0x36, DS_READ_B32>;
def DS_READ2_B32_vi : DS_Real_vi<0x37, DS_READ2_B32>;
@@ -824,6 +867,9 @@ def DS_READ_I8_vi : DS_Real_vi<0x39, DS_READ_I8>;
def DS_READ_U8_vi : DS_Real_vi<0x3a, DS_READ_U8>;
def DS_READ_I16_vi : DS_Real_vi<0x3b, DS_READ_I16>;
def DS_READ_U16_vi : DS_Real_vi<0x3c, DS_READ_U16>;
+def DS_CONSUME_vi : DS_Real_vi<0xbd, DS_CONSUME>;
+def DS_APPEND_vi : DS_Real_vi<0xbe, DS_APPEND>;
+def DS_ORDERED_COUNT_vi : DS_Real_vi<0xbf, DS_ORDERED_COUNT>;
def DS_SWIZZLE_B32_vi : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
def DS_PERMUTE_B32_vi : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
def DS_BPERMUTE_B32_vi : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
@@ -865,6 +911,8 @@ def DS_MSKOR_RTN_B64_vi : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
def DS_WRXCHG_RTN_B64_vi : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
+def DS_CONDXCHG32_RTN_B64_vi : DS_Real_vi<0x7e, DS_CONDXCHG32_RTN_B64>;
+def DS_GWS_SEMA_RELEASE_ALL_vi : DS_Real_vi<0x98, DS_GWS_SEMA_RELEASE_ALL>;
def DS_CMPST_RTN_B64_vi : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
def DS_CMPST_RTN_F64_vi : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
def DS_MIN_RTN_F64_vi : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
@@ -904,3 +952,7 @@ def DS_XOR_SRC2_B64_vi : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
def DS_WRITE_SRC2_B64_vi : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
def DS_MIN_SRC2_F64_vi : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
def DS_MAX_SRC2_F64_vi : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
+def DS_WRITE_B96_vi : DS_Real_vi<0xde, DS_WRITE_B96>;
+def DS_WRITE_B128_vi : DS_Real_vi<0xdf, DS_WRITE_B128>;
+def DS_READ_B96_vi : DS_Real_vi<0xfe, DS_READ_B96>;
+def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>;
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 2247cad7bb51..4fb03b62bba9 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -22,6 +22,7 @@
#include "AMDGPURegisterInfo.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
@@ -97,9 +98,13 @@ static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
}
-#define GET_SUBTARGETINFO_ENUM
-#include "AMDGPUGenSubtargetInfo.inc"
-#undef GET_SUBTARGETINFO_ENUM
+static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst,
+ unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+ return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
+}
#include "AMDGPUGenDisassemblerTables.inc"
@@ -138,7 +143,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
CommentStream = &CS;
// ToDo: AMDGPUDisassembler supports only VI ISA.
- assert(AMDGPU::isVI(STI) && "Can disassemble only VI ISA.");
+ if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding])
+ report_fatal_error("Disassembly not yet supported for subtarget");
const unsigned MaxInstBytesNum = (std::min)((size_t)8, Bytes_.size());
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -179,6 +185,17 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
} while (false);
+ if (Res && (MI.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
+ MI.getOpcode() == AMDGPU::V_MAC_F16_e64_vi)) {
+ // Insert dummy unused src2_modifiers.
+ int Src2ModIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::src2_modifiers);
+ auto I = MI.begin();
+ std::advance(I, Src2ModIdx);
+ MI.insert(I, MCOperand::createImm(0));
+ }
+
Size = Res ? (MaxInstBytesNum - Bytes.size()) : 0;
return Res;
}
@@ -263,6 +280,10 @@ MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
return decodeSrcOp(OPW16, Val);
}
+MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
+ return decodeSrcOp(OPWV216, Val);
+}
+
MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
// Some instructions have operand restrictions beyond what the encoding
// allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
@@ -423,6 +444,7 @@ MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
case OPW64:
return MCOperand::createImm(getInlineImmVal64(Imm));
case OPW16:
+ case OPWV216:
return MCOperand::createImm(getInlineImmVal16(Imm));
default:
llvm_unreachable("implement me");
@@ -436,6 +458,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return VGPR_32RegClassID;
case OPW64: return VReg_64RegClassID;
case OPW128: return VReg_128RegClassID;
@@ -449,6 +472,7 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return SGPR_32RegClassID;
case OPW64: return SGPR_64RegClassID;
case OPW128: return SGPR_128RegClassID;
@@ -462,6 +486,7 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
default: // fall
case OPW32:
case OPW16:
+ case OPWV216:
return TTMP_32RegClassID;
case OPW64: return TTMP_64RegClassID;
case OPW128: return TTMP_128RegClassID;
@@ -497,6 +522,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
switch (Width) {
case OPW32:
case OPW16:
+ case OPWV216:
return decodeSpecialReg32(Val);
case OPW64:
return decodeSpecialReg64(Val);
@@ -522,6 +548,11 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
case 124: return createRegOperand(M0);
case 126: return createRegOperand(EXEC_LO);
case 127: return createRegOperand(EXEC_HI);
+ case 235: return createRegOperand(SRC_SHARED_BASE);
+ case 236: return createRegOperand(SRC_SHARED_LIMIT);
+ case 237: return createRegOperand(SRC_PRIVATE_BASE);
+ case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
+ // TODO: SRC_POPS_EXITING_WAVE_ID
// ToDo: no support for vccz register
case 251: break;
// ToDo: no support for execz register
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index ee5883a984e0..d50665187e10 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -67,6 +67,7 @@ public:
MCOperand decodeOperand_VS_32(unsigned Val) const;
MCOperand decodeOperand_VS_64(unsigned Val) const;
MCOperand decodeOperand_VSrc16(unsigned Val) const;
+ MCOperand decodeOperand_VSrcV216(unsigned Val) const;
MCOperand decodeOperand_VReg_64(unsigned Val) const;
MCOperand decodeOperand_VReg_96(unsigned Val) const;
@@ -85,6 +86,7 @@ public:
OPW64,
OPW128,
OPW16,
+ OPWV216,
OPW_LAST_,
OPW_FIRST_ = OPW32
};
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 48c6592ca5b2..5480110d8315 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -35,28 +35,59 @@ class CF_MEM_RAT_CACHELESS <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag
: EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins,
"MEM_RAT_CACHELESS "#name, pattern>;
-class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
- list<dag> pattern>
- : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
+class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, bits<4> mask, dag ins,
+ dag outs, string name, list<dag> pattern>
+ : EG_CF_RAT <0x56, rat_inst, rat_id, mask, outs, ins,
"MEM_RAT "#name, pattern>;
class CF_MEM_RAT_STORE_TYPED<bits<1> has_eop>
- : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
- i32imm:$rat_id, InstFlag:$eop),
+ : CF_MEM_RAT <0x1, ?, 0xf, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
+ i32imm:$rat_id, InstFlag:$eop), (outs),
"STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr"
#!if(has_eop, ", $eop", ""),
[(int_r600_rat_store_typed R600_Reg128:$rw_gpr,
R600_Reg128:$index_gpr,
(i32 imm:$rat_id))]>;
-def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
- (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+def RAT_MSKOR : CF_MEM_RAT <0x11, 0, 0xf,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), (outs),
"MSKOR $rw_gpr.XW, $index_gpr",
[(mskor_global v4i32:$rw_gpr, i32:$index_gpr)]
> {
let eop = 0;
}
+
+multiclass RAT_ATOMIC<bits<6> op_ret, bits<6> op_noret, string name> {
+ let Constraints = "$rw_gpr = $out_gpr", eop = 0, mayStore = 1 in {
+ def _RTN: CF_MEM_RAT <op_ret, 0, 0xf,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+ (outs R600_Reg128:$out_gpr),
+ name ## "_RTN" ## " $rw_gpr, $index_gpr", [] >;
+ def _NORET: CF_MEM_RAT <op_noret, 0, 0xf,
+ (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
+ (outs R600_Reg128:$out_gpr),
+ name ## " $rw_gpr, $index_gpr", [] >;
+ }
+}
+
+// Swap no-ret is just store. Raw store to cached target
+// can only store on dword, which exactly matches swap_no_ret.
+defm RAT_ATOMIC_XCHG_INT : RAT_ATOMIC<1, 34, "ATOMIC_XCHG_INT">;
+defm RAT_ATOMIC_CMPXCHG_INT : RAT_ATOMIC<4, 36, "ATOMIC_CMPXCHG_INT">;
+defm RAT_ATOMIC_ADD : RAT_ATOMIC<7, 39, "ATOMIC_ADD">;
+defm RAT_ATOMIC_SUB : RAT_ATOMIC<8, 40, "ATOMIC_SUB">;
+defm RAT_ATOMIC_RSUB : RAT_ATOMIC<9, 41, "ATOMIC_RSUB">;
+defm RAT_ATOMIC_MIN_INT : RAT_ATOMIC<10, 42, "ATOMIC_MIN_INT">;
+defm RAT_ATOMIC_MIN_UINT : RAT_ATOMIC<11, 43, "ATOMIC_MIN_UINT">;
+defm RAT_ATOMIC_MAX_INT : RAT_ATOMIC<12, 44, "ATOMIC_MAX_INT">;
+defm RAT_ATOMIC_MAX_UINT : RAT_ATOMIC<13, 45, "ATOMIC_MAX_UINT">;
+defm RAT_ATOMIC_AND : RAT_ATOMIC<14, 46, "ATOMIC_AND">;
+defm RAT_ATOMIC_OR : RAT_ATOMIC<15, 47, "ATOMIC_OR">;
+defm RAT_ATOMIC_XOR : RAT_ATOMIC<16, 48, "ATOMIC_XOR">;
+defm RAT_ATOMIC_INC_UINT : RAT_ATOMIC<18, 50, "ATOMIC_INC_UINT">;
+defm RAT_ATOMIC_DEC_UINT : RAT_ATOMIC<19, 51, "ATOMIC_DEC_UINT">;
+
} // End let Predicates = [isEGorCayman]
//===----------------------------------------------------------------------===//
@@ -257,6 +288,76 @@ def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
let Predicates = [isEGorCayman] in {
+multiclass AtomicPat<Instruction inst_ret, Instruction inst_noret,
+ SDPatternOperator node_ret, SDPatternOperator node_noret> {
+ // FIXME: Add _RTN version. We need per WI scratch location to store the old value
+ // EXTRACT_SUBREG here is dummy, we know the node has no uses
+ def : Pat<(i32 (node_noret i32:$ptr, i32:$data)),
+ (EXTRACT_SUBREG (inst_noret
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>;
+}
+multiclass AtomicIncDecPat<Instruction inst_ret, Instruction inst_noret,
+ SDPatternOperator node_ret, SDPatternOperator node_noret, int C> {
+ // FIXME: Add _RTN version. We need per WI scratch location to store the old value
+ // EXTRACT_SUBREG here is dummy, we know the node has no uses
+ def : Pat<(i32 (node_noret i32:$ptr, C)),
+ (EXTRACT_SUBREG (inst_noret
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>;
+}
+
+// CMPSWAP is pattern is special
+// EXTRACT_SUBREG here is dummy, we know the node has no uses
+// FIXME: Add _RTN version. We need per WI scratch location to store the old value
+def : Pat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$data)),
+ (EXTRACT_SUBREG (RAT_ATOMIC_CMPXCHG_INT_NORET
+ (INSERT_SUBREG
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $cmp, sub3),
+ $data, sub0),
+ $ptr), sub1)>;
+
+defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_RTN,
+ RAT_ATOMIC_XCHG_INT_NORET,
+ atomic_swap_global_ret,
+ atomic_swap_global_noret>;
+defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_RTN, RAT_ATOMIC_ADD_NORET,
+ atomic_add_global_ret, atomic_add_global_noret>;
+defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_RTN, RAT_ATOMIC_SUB_NORET,
+ atomic_sub_global_ret, atomic_sub_global_noret>;
+defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_RTN,
+ RAT_ATOMIC_MIN_INT_NORET,
+ atomic_min_global_ret, atomic_min_global_noret>;
+defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_RTN,
+ RAT_ATOMIC_MIN_UINT_NORET,
+ atomic_umin_global_ret, atomic_umin_global_noret>;
+defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_RTN,
+ RAT_ATOMIC_MAX_INT_NORET,
+ atomic_max_global_ret, atomic_max_global_noret>;
+defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_RTN,
+ RAT_ATOMIC_MAX_UINT_NORET,
+ atomic_umax_global_ret, atomic_umax_global_noret>;
+defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_RTN, RAT_ATOMIC_AND_NORET,
+ atomic_and_global_ret, atomic_and_global_noret>;
+defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_RTN, RAT_ATOMIC_OR_NORET,
+ atomic_or_global_ret, atomic_or_global_noret>;
+defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_RTN, RAT_ATOMIC_XOR_NORET,
+ atomic_xor_global_ret, atomic_xor_global_noret>;
+defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
+ RAT_ATOMIC_INC_UINT_NORET,
+ atomic_add_global_ret,
+ atomic_add_global_noret, 1>;
+defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
+ RAT_ATOMIC_INC_UINT_NORET,
+ atomic_sub_global_ret,
+ atomic_sub_global_noret, -1>;
+defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
+ RAT_ATOMIC_DEC_UINT_NORET,
+ atomic_add_global_ret,
+ atomic_add_global_noret, -1>;
+defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
+ RAT_ATOMIC_DEC_UINT_NORET,
+ atomic_sub_global_ret,
+ atomic_sub_global_noret, 1>;
+
// Should be predicated on FeatureFP64
// def FMA_64 : R600_3OP <
// 0xA, "FMA_64",
@@ -287,7 +388,7 @@ def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
VecALU
>;
-def : BFEPattern <BFE_UINT_eg, MOV_IMM_I32>;
+defm : BFEPattern <BFE_UINT_eg, BFE_INT_eg, MOV_IMM_I32>;
def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))],
@@ -337,7 +438,7 @@ defm CUBE_eg : CUBE_Common<0xC0>;
def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
-def FLT32_TO_FLT16 : R600_1OP_Helper <0xA2, "FLT32_TO_FLT16", fp_to_f16, VecALU>;
+def FLT32_TO_FLT16 : R600_1OP_Helper <0xA2, "FLT32_TO_FLT16", AMDGPUfp_to_f16, VecALU>;
def FLT16_TO_FLT32 : R600_1OP_Helper <0xA3, "FLT16_TO_FLT32", f16_to_fp, VecALU>;
def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>;
diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td
index 849fb8ad50f5..b0ac0e689a0b 100644
--- a/lib/Target/AMDGPU/FLATInstructions.td
+++ b/lib/Target/AMDGPU/FLATInstructions.td
@@ -136,7 +136,7 @@ multiclass FLAT_Atomic_Pseudo<
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
(ops node:$ptr, node:$value),
(atomic_op node:$ptr, node:$value),
- [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
+ [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}]
>;
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
@@ -284,16 +284,16 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
(ld node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+ return AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS;
}]>;
class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
(st node:$val, node:$ptr), [{
auto const AS = cast<MemSDNode>(N)->getAddressSpace();
- return AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::GLOBAL_ADDRESS;
+ return AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.GLOBAL_ADDRESS;
}]>;
def atomic_flat_load : flat_ld <atomic_load>;
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index dd3b46f13921..80fc4ac9d2a3 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -11,11 +11,24 @@
//
//===----------------------------------------------------------------------===//
-#include "GCNHazardRecognizer.h"
#include "AMDGPUSubtarget.h"
+#include "GCNHazardRecognizer.h"
+#include "SIDefines.h"
#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <limits>
+#include <set>
+#include <vector>
using namespace llvm;
@@ -26,7 +39,8 @@ using namespace llvm;
GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
CurrCycleInstr(nullptr),
MF(MF),
- ST(MF.getSubtarget<SISubtarget>()) {
+ ST(MF.getSubtarget<SISubtarget>()),
+ TII(*ST.getInstrInfo()) {
MaxLookAhead = 5;
}
@@ -58,8 +72,19 @@ static bool isRFE(unsigned Opcode) {
return Opcode == AMDGPU::S_RFE_B64;
}
-static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
+static bool isSMovRel(unsigned Opcode) {
+ switch (Opcode) {
+ case AMDGPU::S_MOVRELS_B32:
+ case AMDGPU::S_MOVRELS_B64:
+ case AMDGPU::S_MOVRELD_B32:
+ case AMDGPU::S_MOVRELD_B64:
+ return true;
+ default:
+ return false;
+ }
+}
+static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
@@ -96,6 +121,13 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return NoopHazard;
+ if ((TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode())) &&
+ checkReadM0Hazards(MI) > 0)
+ return NoopHazard;
+
+ if (checkAnyInstHazards(MI) > 0)
+ return NoopHazard;
+
return NoHazard;
}
@@ -104,11 +136,13 @@ unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
}
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ int WaitStates = std::max(0, checkAnyInstHazards(MI));
+
if (SIInstrInfo::isSMRD(*MI))
- return std::max(0, checkSMRDHazards(MI));
+ return std::max(WaitStates, checkSMRDHazards(MI));
if (SIInstrInfo::isVALU(*MI)) {
- int WaitStates = std::max(0, checkVALUHazards(MI));
+ WaitStates = std::max(WaitStates, checkVALUHazards(MI));
if (SIInstrInfo::isVMEM(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
@@ -122,19 +156,25 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
if (isRWLane(MI->getOpcode()))
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
+ if (TII.isVINTRP(*MI))
+ WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
+
return WaitStates;
}
if (isSGetReg(MI->getOpcode()))
- return std::max(0, checkGetRegHazards(MI));
+ return std::max(WaitStates, checkGetRegHazards(MI));
if (isSSetReg(MI->getOpcode()))
- return std::max(0, checkSetRegHazards(MI));
+ return std::max(WaitStates, checkSetRegHazards(MI));
if (isRFE(MI->getOpcode()))
- return std::max(0, checkRFEHazards(MI));
+ return std::max(WaitStates, checkRFEHazards(MI));
- return 0;
+ if (TII.isVINTRP(*MI) || isSMovRel(MI->getOpcode()))
+ return std::max(WaitStates, checkReadM0Hazards(MI));
+
+ return WaitStates;
}
void GCNHazardRecognizer::EmitNoop() {
@@ -142,14 +182,12 @@ void GCNHazardRecognizer::EmitNoop() {
}
void GCNHazardRecognizer::AdvanceCycle() {
-
// When the scheduler detects a stall, it will call AdvanceCycle() without
// emitting any instructions.
if (!CurrCycleInstr)
return;
- const SIInstrInfo *TII = ST.getInstrInfo();
- unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);
+ unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
// Keep track of emitted instructions
EmittedInstrs.push_front(CurrCycleInstr);
@@ -180,7 +218,6 @@ void GCNHazardRecognizer::RecedeCycle() {
int GCNHazardRecognizer::getWaitStatesSince(
function_ref<bool(MachineInstr *)> IsHazard) {
-
int WaitStates = -1;
for (MachineInstr *MI : EmittedInstrs) {
++WaitStates;
@@ -204,7 +241,6 @@ int GCNHazardRecognizer::getWaitStatesSinceDef(
int GCNHazardRecognizer::getWaitStatesSinceSetReg(
function_ref<bool(MachineInstr *)> IsHazard) {
-
auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
return isSSetReg(MI->getOpcode()) && IsHazard(MI);
};
@@ -281,7 +317,6 @@ int GCNHazardRecognizer::checkSMEMSoftClauseHazards(MachineInstr *SMEM) {
int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIInstrInfo *TII = ST.getInstrInfo();
int WaitStatesNeeded = 0;
WaitStatesNeeded = checkSMEMSoftClauseHazards(SMRD);
@@ -293,7 +328,7 @@ int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
// A read of an SGPR by SMRD instruction requires 4 wait states when the
// SGPR was written by a VALU instruction.
int SmrdSgprWaitStates = 4;
- auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+ auto IsHazardDefFn = [this] (MachineInstr *MI) { return TII.isVALU(*MI); };
for (const MachineOperand &Use : SMRD->uses()) {
if (!Use.isReg())
@@ -486,7 +521,6 @@ int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
}
int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
-
if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 0;
@@ -500,3 +534,42 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
return RFEWaitStates - WaitStatesNeeded;
}
+
+int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return 0;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ if (!ST.hasSMovFedHazard())
+ return 0;
+
+ // Check for any instruction reading an SGPR after a write from
+ // s_mov_fed_b32.
+ int MovFedWaitStates = 1;
+ int WaitStatesNeeded = 0;
+
+ for (const MachineOperand &Use : MI->uses()) {
+ if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
+ continue;
+ auto IsHazardFn = [] (MachineInstr *MI) {
+ return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
+ };
+ int WaitStatesNeededForUse =
+ MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
+ if (!ST.hasReadM0Hazard())
+ return 0;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ int SMovRelWaitStates = 1;
+ auto IsHazardFn = [TII] (MachineInstr *MI) {
+ return TII->isSALU(*MI);
+ };
+ return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
+}
diff --git a/lib/Target/AMDGPU/GCNHazardRecognizer.h b/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 0ab82ff4635b..5680c3de6a1a 100644
--- a/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -34,6 +34,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
std::list<MachineInstr*> EmittedInstrs;
const MachineFunction &MF;
const SISubtarget &ST;
+ const SIInstrInfo &TII;
int getWaitStatesSince(function_ref<bool(MachineInstr *)> IsHazard);
int getWaitStatesSinceDef(unsigned Reg,
@@ -52,6 +53,8 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkVALUHazards(MachineInstr *VALU);
int checkRWLaneHazards(MachineInstr *RWLane);
int checkRFEHazards(MachineInstr *RFE);
+ int checkAnyInstHazards(MachineInstr *MI);
+ int checkReadM0Hazards(MachineInstr *SMovRel);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
new file mode 100644
index 000000000000..3bb5c9bc22b7
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -0,0 +1,528 @@
+//===--------------------- GCNIterativeScheduler.cpp - --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNIterativeScheduler.h"
+#include "GCNSchedStrategy.h"
+#include "SIMachineFunctionInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+namespace llvm {
+ std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG);
+}
+
+// shim accessors for different order containers
+static inline MachineInstr *getMachineInstr(MachineInstr *MI) {
+ return MI;
+}
+static inline MachineInstr *getMachineInstr(const SUnit *SU) {
+ return SU->getInstr();
+}
+static inline MachineInstr *getMachineInstr(const SUnit &SU) {
+ return SU.getInstr();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+static void printRegion(raw_ostream &OS,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ const LiveIntervals *LIS,
+ unsigned MaxInstNum =
+ std::numeric_limits<unsigned>::max()) {
+ auto BB = Begin->getParent();
+ OS << BB->getParent()->getName() << ":BB#" << BB->getNumber()
+ << ' ' << BB->getName() << ":\n";
+ auto I = Begin;
+ MaxInstNum = std::max(MaxInstNum, 1u);
+ for (; I != End && MaxInstNum; ++I, --MaxInstNum) {
+ if (!I->isDebugValue() && LIS)
+ OS << LIS->getInstructionIndex(*I);
+ OS << '\t' << *I;
+ }
+ if (I != End) {
+ OS << "\t...\n";
+ I = std::prev(End);
+ if (!I->isDebugValue() && LIS)
+ OS << LIS->getInstructionIndex(*I);
+ OS << '\t' << *I;
+ }
+ if (End != BB->end()) { // print boundary inst if present
+ OS << "----\n";
+ if (LIS) OS << LIS->getInstructionIndex(*End) << '\t';
+ OS << *End;
+ }
+}
+
+LLVM_DUMP_METHOD
+static void printLivenessInfo(raw_ostream &OS,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ const LiveIntervals *LIS) {
+ const auto BB = Begin->getParent();
+ const auto &MRI = BB->getParent()->getRegInfo();
+
+ const auto LiveIns = getLiveRegsBefore(*Begin, *LIS);
+ OS << "LIn RP: ";
+ getRegPressure(MRI, LiveIns).print(OS);
+
+ const auto BottomMI = End == BB->end() ? std::prev(End) : End;
+ const auto LiveOuts = getLiveRegsAfter(*BottomMI, *LIS);
+ OS << "LOt RP: ";
+ getRegPressure(MRI, LiveOuts).print(OS);
+}
+
+LLVM_DUMP_METHOD
+void GCNIterativeScheduler::printRegions(raw_ostream &OS) const {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ for (const auto R : Regions) {
+ OS << "Region to schedule ";
+ printRegion(OS, R->Begin, R->End, LIS, 1);
+ printLivenessInfo(OS, R->Begin, R->End, LIS);
+ OS << "Max RP: ";
+ R->MaxPressure.print(OS, &ST);
+ }
+}
+
+LLVM_DUMP_METHOD
+void GCNIterativeScheduler::printSchedResult(raw_ostream &OS,
+ const Region *R,
+ const GCNRegPressure &RP) const {
+ OS << "\nAfter scheduling ";
+ printRegion(OS, R->Begin, R->End, LIS);
+ printSchedRP(OS, R->MaxPressure, RP);
+ OS << '\n';
+}
+
+LLVM_DUMP_METHOD
+void GCNIterativeScheduler::printSchedRP(raw_ostream &OS,
+ const GCNRegPressure &Before,
+ const GCNRegPressure &After) const {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ OS << "RP before: ";
+ Before.print(OS, &ST);
+ OS << "RP after: ";
+ After.print(OS, &ST);
+}
+
+#endif
+
+// DAG builder helper
+class GCNIterativeScheduler::BuildDAG {
+ GCNIterativeScheduler &Sch;
+ SmallVector<SUnit*, 8> TopRoots;
+public:
+ BuildDAG(const Region &R, GCNIterativeScheduler &_Sch)
+ : Sch(_Sch) {
+ auto BB = R.Begin->getParent();
+ Sch.BaseClass::startBlock(BB);
+ Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
+
+ Sch.buildSchedGraph(Sch.AA, nullptr, nullptr, nullptr,
+ /*TrackLaneMask*/true);
+ Sch.Topo.InitDAGTopologicalSorting();
+
+ SmallVector<SUnit*, 8> BotRoots;
+ Sch.findRootsAndBiasEdges(TopRoots, BotRoots);
+ }
+ ~BuildDAG() {
+ Sch.BaseClass::exitRegion();
+ Sch.BaseClass::finishBlock();
+ }
+ ArrayRef<const SUnit*> getTopRoots() const {
+ return TopRoots;
+ }
+};
+
+class GCNIterativeScheduler::OverrideLegacyStrategy {
+ GCNIterativeScheduler &Sch;
+ Region &Rgn;
+ std::unique_ptr<MachineSchedStrategy> SaveSchedImpl;
+ GCNRegPressure SaveMaxRP;
+public:
+ OverrideLegacyStrategy(Region &R,
+ MachineSchedStrategy &OverrideStrategy,
+ GCNIterativeScheduler &_Sch)
+ : Sch(_Sch)
+ , Rgn(R)
+ , SaveSchedImpl(std::move(_Sch.SchedImpl))
+ , SaveMaxRP(R.MaxPressure) {
+ Sch.SchedImpl.reset(&OverrideStrategy);
+ auto BB = R.Begin->getParent();
+ Sch.BaseClass::startBlock(BB);
+ Sch.BaseClass::enterRegion(BB, R.Begin, R.End, R.NumRegionInstrs);
+ }
+ ~OverrideLegacyStrategy() {
+ Sch.BaseClass::exitRegion();
+ Sch.BaseClass::finishBlock();
+ Sch.SchedImpl.release();
+ Sch.SchedImpl = std::move(SaveSchedImpl);
+ }
+ void schedule() {
+ assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
+ DEBUG(dbgs() << "\nScheduling ";
+ printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
+ Sch.BaseClass::schedule();
+
+ // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
+ Sch.RegionEnd = Rgn.End;
+ //assert(Rgn.End == Sch.RegionEnd);
+ Rgn.Begin = Sch.RegionBegin;
+ Rgn.MaxPressure.clear();
+ }
+ void restoreOrder() {
+ assert(Sch.RegionBegin == Rgn.Begin && Sch.RegionEnd == Rgn.End);
+ // DAG SUnits are stored using original region's order
+ // so just use SUnits as the restoring schedule
+ Sch.scheduleRegion(Rgn, Sch.SUnits, SaveMaxRP);
+ }
+};
+
+// just a stub to make base class happy
+class SchedStrategyStub : public MachineSchedStrategy {
+public:
+ bool shouldTrackPressure() const override { return false; }
+ bool shouldTrackLaneMasks() const override { return false; }
+ void initialize(ScheduleDAGMI *DAG) override {}
+ SUnit *pickNode(bool &IsTopNode) override { return nullptr; }
+ void schedNode(SUnit *SU, bool IsTopNode) override {}
+ void releaseTopNode(SUnit *SU) override {}
+ void releaseBottomNode(SUnit *SU) override {}
+};
+
+GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C,
+ StrategyKind S)
+ : BaseClass(C, llvm::make_unique<SchedStrategyStub>())
+ , Context(C)
+ , Strategy(S)
+ , UPTracker(*LIS) {
+}
+
+// returns max pressure for a region
+GCNRegPressure
+GCNIterativeScheduler::getRegionPressure(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End)
+ const {
+ // For the purpose of pressure tracking bottom inst of the region should
+ // be also processed. End is either BB end, BB terminator inst or sched
+ // boundary inst.
+ auto const BBEnd = Begin->getParent()->end();
+ auto const BottomMI = End == BBEnd ? std::prev(End) : End;
+
+ // scheduleRegions walks bottom to top, so its likely we just get next
+ // instruction to track
+ auto AfterBottomMI = std::next(BottomMI);
+ if (AfterBottomMI == BBEnd ||
+ &*AfterBottomMI != UPTracker.getLastTrackedMI()) {
+ UPTracker.reset(*BottomMI);
+ } else {
+ assert(UPTracker.isValid());
+ }
+
+ for (auto I = BottomMI; I != Begin; --I)
+ UPTracker.recede(*I);
+
+ UPTracker.recede(*Begin);
+
+ assert(UPTracker.isValid() ||
+ (dbgs() << "Tracked region ",
+ printRegion(dbgs(), Begin, End, LIS), false));
+ return UPTracker.moveMaxPressure();
+}
+
+// returns max pressure for a tentative schedule
+template <typename Range> GCNRegPressure
+GCNIterativeScheduler::getSchedulePressure(const Region &R,
+ Range &&Schedule) const {
+ auto const BBEnd = R.Begin->getParent()->end();
+ GCNUpwardRPTracker RPTracker(*LIS);
+ if (R.End != BBEnd) {
+ // R.End points to the boundary instruction but the
+ // schedule doesn't include it
+ RPTracker.reset(*R.End);
+ RPTracker.recede(*R.End);
+ } else {
+ // R.End doesn't point to the boundary instruction
+ RPTracker.reset(*std::prev(BBEnd));
+ }
+ for (auto I = Schedule.end(), B = Schedule.begin(); I != B;) {
+ RPTracker.recede(*getMachineInstr(*--I));
+ }
+ return RPTracker.moveMaxPressure();
+}
+
+void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned NumRegionInstrs) {
+ BaseClass::enterRegion(BB, Begin, End, NumRegionInstrs);
+ if (NumRegionInstrs > 2) {
+ Regions.push_back(
+ new (Alloc.Allocate())
+ Region { Begin, End, NumRegionInstrs,
+ getRegionPressure(Begin, End), nullptr });
+ }
+}
+
+void GCNIterativeScheduler::schedule() { // overriden
+ // do nothing
+ DEBUG(
+ printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
+ if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
+ dbgs() << "Max RP: ";
+ Regions.back()->MaxPressure.print(dbgs(), &MF.getSubtarget<SISubtarget>());
+ }
+ dbgs() << '\n';
+ );
+}
+
+void GCNIterativeScheduler::finalizeSchedule() { // overriden
+ if (Regions.empty())
+ return;
+ switch (Strategy) {
+ case SCHEDULE_MINREGONLY: scheduleMinReg(); break;
+ case SCHEDULE_MINREGFORCED: scheduleMinReg(true); break;
+ case SCHEDULE_LEGACYMAXOCCUPANCY: scheduleLegacyMaxOccupancy(); break;
+ }
+}
+
+// Detach schedule from SUnits and interleave it with debug values.
+// Returned schedule becomes independent of DAG state.
+std::vector<MachineInstr*>
+GCNIterativeScheduler::detachSchedule(ScheduleRef Schedule) const {
+ std::vector<MachineInstr*> Res;
+ Res.reserve(Schedule.size() * 2);
+
+ if (FirstDbgValue)
+ Res.push_back(FirstDbgValue);
+
+ const auto DbgB = DbgValues.begin(), DbgE = DbgValues.end();
+ for (auto SU : Schedule) {
+ Res.push_back(SU->getInstr());
+ const auto &D = std::find_if(DbgB, DbgE, [SU](decltype(*DbgB) &P) {
+ return P.second == SU->getInstr();
+ });
+ if (D != DbgE)
+ Res.push_back(D->first);
+ }
+ return Res;
+}
+
+void GCNIterativeScheduler::setBestSchedule(Region &R,
+ ScheduleRef Schedule,
+ const GCNRegPressure &MaxRP) {
+ R.BestSchedule.reset(
+ new TentativeSchedule{ detachSchedule(Schedule), MaxRP });
+}
+
+void GCNIterativeScheduler::scheduleBest(Region &R) {
+ assert(R.BestSchedule.get() && "No schedule specified");
+ scheduleRegion(R, R.BestSchedule->Schedule, R.BestSchedule->MaxPressure);
+ R.BestSchedule.reset();
+}
+
+// minimal required region scheduler, works for ranges of SUnits*,
+// SUnits or MachineIntrs*
+template <typename Range>
+void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
+ const GCNRegPressure &MaxRP) {
+ assert(RegionBegin == R.Begin && RegionEnd == R.End);
+ assert(LIS != nullptr);
+#ifndef NDEBUG
+ const auto SchedMaxRP = getSchedulePressure(R, Schedule);
+#endif
+ auto BB = R.Begin->getParent();
+ auto Top = R.Begin;
+ for (const auto &I : Schedule) {
+ auto MI = getMachineInstr(I);
+ if (MI != &*Top) {
+ BB->remove(MI);
+ BB->insert(Top, MI);
+ if (!MI->isDebugValue())
+ LIS->handleMove(*MI, true);
+ }
+ if (!MI->isDebugValue()) {
+ // Reset read - undef flags and update them later.
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Op.setIsUndef(false);
+
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
+ /*IgnoreDead*/false);
+ // Adjust liveness and add missing dead+read-undef flags.
+ auto SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ }
+ Top = std::next(MI->getIterator());
+ }
+ RegionBegin = getMachineInstr(Schedule.front());
+
+ // Schedule consisting of MachineInstr* is considered 'detached'
+ // and already interleaved with debug values
+ if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
+ placeDebugValues();
+ // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
+ //assert(R.End == RegionEnd);
+ RegionEnd = R.End;
+ }
+
+ R.Begin = RegionBegin;
+ R.MaxPressure = MaxRP;
+
+#ifndef NDEBUG
+ const auto RegionMaxRP = getRegionPressure(R);
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+#endif
+ assert((SchedMaxRP == RegionMaxRP && (MaxRP.empty() || SchedMaxRP == MaxRP))
+ || (dbgs() << "Max RP mismatch!!!\n"
+ "RP for schedule (calculated): ",
+ SchedMaxRP.print(dbgs(), &ST),
+ dbgs() << "RP for schedule (reported): ",
+ MaxRP.print(dbgs(), &ST),
+ dbgs() << "RP after scheduling: ",
+ RegionMaxRP.print(dbgs(), &ST),
+ false));
+}
+
+// Sort recorded regions by pressure - highest at the front
+void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ std::sort(Regions.begin(), Regions.end(),
+ [&ST, TargetOcc](const Region *R1, const Region *R2) {
+ return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
+ });
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Legacy MaxOccupancy Strategy
+
+// Tries to increase occupancy applying minreg scheduler for a sequence of
+// most demanding regions. Obtained schedules are saved as BestSchedule for a
+// region.
+// TargetOcc is the best achievable occupancy for a kernel.
+// Returns better occupancy on success or current occupancy on fail.
+// BestSchedules aren't deleted on fail.
+unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
+ // TODO: assert Regions are sorted descending by pressure
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ const auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+ DEBUG(dbgs() << "Trying to to improve occupancy, target = " << TargetOcc
+ << ", current = " << Occ << '\n');
+
+ auto NewOcc = TargetOcc;
+ for (auto R : Regions) {
+ if (R->MaxPressure.getOccupancy(ST) >= NewOcc)
+ break;
+
+ DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
+ printLivenessInfo(dbgs(), R->Begin, R->End, LIS));
+
+ BuildDAG DAG(*R, *this);
+ const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
+ const auto MaxRP = getSchedulePressure(*R, MinSchedule);
+ DEBUG(dbgs() << "Occupancy improvement attempt:\n";
+ printSchedRP(dbgs(), R->MaxPressure, MaxRP));
+
+ NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST));
+ if (NewOcc <= Occ)
+ break;
+
+ setBestSchedule(*R, MinSchedule, MaxRP);
+ }
+ DEBUG(dbgs() << "New occupancy = " << NewOcc
+ << ", prev occupancy = " << Occ << '\n');
+ return std::max(NewOcc, Occ);
+}
+
+void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
+ bool TryMaximizeOccupancy) {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+
+ sortRegionsByPressure(TgtOcc);
+ auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
+
+ if (TryMaximizeOccupancy && Occ < TgtOcc)
+ Occ = tryMaximizeOccupancy(TgtOcc);
+
+ // This is really weird but for some magic scheduling regions twice
+ // gives performance improvement
+ const int NumPasses = Occ < TgtOcc ? 2 : 1;
+
+ TgtOcc = std::min(Occ, TgtOcc);
+ DEBUG(dbgs() << "Scheduling using default scheduler, "
+ "target occupancy = " << TgtOcc << '\n');
+ GCNMaxOccupancySchedStrategy LStrgy(Context);
+
+ for (int I = 0; I < NumPasses; ++I) {
+ // running first pass with TargetOccupancy = 0 mimics previous scheduling
+ // approach and is a performance magic
+ LStrgy.setTargetOccupancy(I == 0 ? 0 : TgtOcc);
+ for (auto R : Regions) {
+ OverrideLegacyStrategy Ovr(*R, LStrgy, *this);
+
+ Ovr.schedule();
+ const auto RP = getRegionPressure(*R);
+ DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
+
+ if (RP.getOccupancy(ST) < TgtOcc) {
+ DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
+ if (R->BestSchedule.get() &&
+ R->BestSchedule->MaxPressure.getOccupancy(ST) >= TgtOcc) {
+ DEBUG(dbgs() << ", scheduling minimal register\n");
+ scheduleBest(*R);
+ } else {
+ DEBUG(dbgs() << ", restoring\n");
+ Ovr.restoreOrder();
+ assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
+ }
+ }
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// Minimal Register Strategy
+
+void GCNIterativeScheduler::scheduleMinReg(bool force) {
+ const auto &ST = MF.getSubtarget<SISubtarget>();
+ const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+ sortRegionsByPressure(TgtOcc);
+
+ auto MaxPressure = Regions.front()->MaxPressure;
+ for (auto R : Regions) {
+ if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
+ break;
+
+ BuildDAG DAG(*R, *this);
+ const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
+
+ const auto RP = getSchedulePressure(*R, MinSchedule);
+ DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
+ dbgs() << "\nWarning: Pressure becomes worse after minreg!";
+ printSchedRP(dbgs(), R->MaxPressure, RP);
+ });
+
+ if (!force && MaxPressure.less(ST, RP, TgtOcc))
+ break;
+
+ scheduleRegion(*R, MinSchedule, RP);
+ DEBUG(printSchedResult(dbgs(), R, RP));
+
+ MaxPressure = RP;
+ }
+}
diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.h b/lib/Target/AMDGPU/GCNIterativeScheduler.h
new file mode 100644
index 000000000000..df3afce21ebc
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNIterativeScheduler.h
@@ -0,0 +1,118 @@
+//===--------- GCNIterativeScheduler.h - GCN Scheduler -*- C++ -*----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
+
+#include "GCNRegPressure.h"
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+class GCNIterativeScheduler : public ScheduleDAGMILive {
+ typedef ScheduleDAGMILive BaseClass;
+public:
+ enum StrategyKind {
+ SCHEDULE_MINREGONLY,
+ SCHEDULE_MINREGFORCED,
+ SCHEDULE_LEGACYMAXOCCUPANCY
+ };
+
+ GCNIterativeScheduler(MachineSchedContext *C,
+ StrategyKind S);
+
+ void schedule() override;
+
+ void enterRegion(MachineBasicBlock *BB,
+ MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End,
+ unsigned RegionInstrs) override;
+
+ void finalizeSchedule() override;
+
+protected:
+
+ typedef ArrayRef<const SUnit*> ScheduleRef;
+
+ struct TentativeSchedule {
+ std::vector<MachineInstr*> Schedule;
+ GCNRegPressure MaxPressure;
+ };
+
+ struct Region {
+ // Fields except for BestSchedule are supposed to reflect current IR state
+ // `const` fields are to emphasize they shouldn't change for any schedule.
+ MachineBasicBlock::iterator Begin;
+ // End is either a boundary instruction or end of basic block
+ const MachineBasicBlock::iterator End;
+ const unsigned NumRegionInstrs;
+ GCNRegPressure MaxPressure;
+
+ // best schedule for the region so far (not scheduled yet)
+ std::unique_ptr<TentativeSchedule> BestSchedule;
+ };
+
+ SpecificBumpPtrAllocator<Region> Alloc;
+ std::vector<Region*> Regions;
+
+ MachineSchedContext *Context;
+ const StrategyKind Strategy;
+ mutable GCNUpwardRPTracker UPTracker;
+
+ class BuildDAG;
+ class OverrideLegacyStrategy;
+
+ template <typename Range>
+ GCNRegPressure getSchedulePressure(const Region &R,
+ Range &&Schedule) const;
+
+ GCNRegPressure getRegionPressure(MachineBasicBlock::iterator Begin,
+ MachineBasicBlock::iterator End) const;
+
+ GCNRegPressure getRegionPressure(const Region &R) const {
+ return getRegionPressure(R.Begin, R.End);
+ }
+
+ void setBestSchedule(Region &R,
+ ScheduleRef Schedule,
+ const GCNRegPressure &MaxRP = GCNRegPressure());
+
+ void scheduleBest(Region &R);
+
+ std::vector<MachineInstr*> detachSchedule(ScheduleRef Schedule) const;
+
+ void sortRegionsByPressure(unsigned TargetOcc);
+
+ template <typename Range>
+ void scheduleRegion(Region &R, Range &&Schedule,
+ const GCNRegPressure &MaxRP = GCNRegPressure());
+
+ unsigned tryMaximizeOccupancy(unsigned TargetOcc =
+ std::numeric_limits<unsigned>::max());
+
+ void scheduleLegacyMaxOccupancy(bool TryMaximizeOccupancy = true);
+ void scheduleMinReg(bool force = false);
+
+ void printRegions(raw_ostream &OS) const;
+ void printSchedResult(raw_ostream &OS,
+ const Region *R,
+ const GCNRegPressure &RP) const;
+ void printSchedRP(raw_ostream &OS,
+ const GCNRegPressure &Before,
+ const GCNRegPressure &After) const;
+};
+
+} // End namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNITERATIVESCHEDULER_H
diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
new file mode 100644
index 000000000000..c6d0f2179950
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -0,0 +1,266 @@
+//===----------------------- GCNMinRegStrategy.cpp - ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+class GCNMinRegScheduler {
+ struct Candidate : ilist_node<Candidate> {
+ const SUnit *SU;
+ int Priority;
+
+ Candidate(const SUnit *SU_, int Priority_ = 0)
+ : SU(SU_), Priority(Priority_) {}
+ };
+
+ SpecificBumpPtrAllocator<Candidate> Alloc;
+ typedef simple_ilist<Candidate> Queue;
+ Queue RQ; // Ready queue
+
+ std::vector<unsigned> NumPreds;
+
+ bool isScheduled(const SUnit *SU) const {
+ assert(!SU->isBoundaryNode());
+ return NumPreds[SU->NodeNum] == std::numeric_limits<unsigned>::max();
+ }
+
+ void setIsScheduled(const SUnit *SU) {
+ assert(!SU->isBoundaryNode());
+ NumPreds[SU->NodeNum] = std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getNumPreds(const SUnit *SU) const {
+ assert(!SU->isBoundaryNode());
+ assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
+ return NumPreds[SU->NodeNum];
+ }
+
+ unsigned decNumPreds(const SUnit *SU) {
+ assert(!SU->isBoundaryNode());
+ assert(NumPreds[SU->NodeNum] != std::numeric_limits<unsigned>::max());
+ return --NumPreds[SU->NodeNum];
+ }
+
+ void initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits);
+
+ int getReadySuccessors(const SUnit *SU) const;
+ int getNotReadySuccessors(const SUnit *SU) const;
+
+ template <typename Calc>
+ unsigned findMax(unsigned Num, Calc C);
+
+ Candidate* pickCandidate();
+
+ void bumpPredsPriority(const SUnit *SchedSU, int Priority);
+ void releaseSuccessors(const SUnit* SU, int Priority);
+
+public:
+ std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG);
+};
+
+void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) {
+ NumPreds.resize(SUnits.size());
+ for (unsigned I = 0; I < SUnits.size(); ++I)
+ NumPreds[I] = SUnits[I].NumPredsLeft;
+}
+
+int GCNMinRegScheduler::getReadySuccessors(const SUnit *SU) const {
+ unsigned NumSchedSuccs = 0;
+ for (auto SDep : SU->Succs) {
+ bool wouldBeScheduled = true;
+ for (auto PDep : SDep.getSUnit()->Preds) {
+ auto PSU = PDep.getSUnit();
+ assert(!PSU->isBoundaryNode());
+ if (PSU != SU && !isScheduled(PSU)) {
+ wouldBeScheduled = false;
+ break;
+ }
+ }
+ NumSchedSuccs += wouldBeScheduled ? 1 : 0;
+ }
+ return NumSchedSuccs;
+}
+
+int GCNMinRegScheduler::getNotReadySuccessors(const SUnit *SU) const {
+ return SU->Succs.size() - getReadySuccessors(SU);
+}
+
+template <typename Calc>
+unsigned GCNMinRegScheduler::findMax(unsigned Num, Calc C) {
+ assert(!RQ.empty() && Num <= RQ.size());
+ typedef decltype(C(*RQ.begin())) T;
+ T Max = std::numeric_limits<T>::min();
+ unsigned NumMax = 0;
+ for (auto I = RQ.begin(); Num; --Num) {
+ T Cur = C(*I);
+ if (Cur >= Max) {
+ if (Cur > Max) {
+ Max = Cur;
+ NumMax = 1;
+ } else
+ ++NumMax;
+ auto &Cand = *I++;
+ RQ.remove(Cand);
+ RQ.push_front(Cand);
+ continue;
+ }
+ ++I;
+ }
+ return NumMax;
+}
+
+GCNMinRegScheduler::Candidate* GCNMinRegScheduler::pickCandidate() {
+ do {
+ unsigned Num = RQ.size();
+ if (Num == 1) break;
+
+ DEBUG(dbgs() << "\nSelecting max priority candidates among " << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) { return C.Priority; });
+ if (Num == 1) break;
+
+ DEBUG(dbgs() << "\nSelecting min non-ready producing candidate among "
+ << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) {
+ auto SU = C.SU;
+ int Res = getNotReadySuccessors(SU);
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would left non-ready "
+ << Res << " successors, metric = " << -Res << '\n');
+ return -Res;
+ });
+ if (Num == 1) break;
+
+ DEBUG(dbgs() << "\nSelecting most producing candidate among "
+ << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) {
+ auto SU = C.SU;
+ auto Res = getReadySuccessors(SU);
+ DEBUG(dbgs() << "SU(" << SU->NodeNum << ") would make ready "
+ << Res << " successors, metric = " << Res << '\n');
+ return Res;
+ });
+ if (Num == 1) break;
+
+ Num = Num ? Num : RQ.size();
+ DEBUG(dbgs() << "\nCan't find best candidate, selecting in program order among "
+ << Num << '\n');
+ Num = findMax(Num, [=](const Candidate &C) { return -(int64_t)C.SU->NodeNum; });
+ assert(Num == 1);
+ } while (false);
+
+ return &RQ.front();
+}
+
+void GCNMinRegScheduler::bumpPredsPriority(const SUnit *SchedSU, int Priority) {
+ SmallPtrSet<const SUnit*, 32> Set;
+ for (const auto &S : SchedSU->Succs) {
+ if (S.getSUnit()->isBoundaryNode() || isScheduled(S.getSUnit()) ||
+ S.getKind() != SDep::Data)
+ continue;
+ for (const auto &P : S.getSUnit()->Preds) {
+ auto PSU = P.getSUnit();
+ assert(!PSU->isBoundaryNode());
+ if (PSU != SchedSU && !isScheduled(PSU)) {
+ Set.insert(PSU);
+ }
+ }
+ }
+ SmallVector<const SUnit*, 32> Worklist(Set.begin(), Set.end());
+ while (!Worklist.empty()) {
+ auto SU = Worklist.pop_back_val();
+ assert(!SU->isBoundaryNode());
+ for (const auto &P : SU->Preds) {
+ if (!P.getSUnit()->isBoundaryNode() && !isScheduled(P.getSUnit()) &&
+ Set.insert(P.getSUnit()).second)
+ Worklist.push_back(P.getSUnit());
+ }
+ }
+ DEBUG(dbgs() << "Make the predecessors of SU(" << SchedSU->NodeNum
+ << ")'s non-ready successors of " << Priority
+ << " priority in ready queue: ");
+ const auto SetEnd = Set.end();
+ for (auto &C : RQ) {
+ if (Set.find(C.SU) != SetEnd) {
+ C.Priority = Priority;
+ DEBUG(dbgs() << " SU(" << C.SU->NodeNum << ')');
+ }
+ }
+ DEBUG(dbgs() << '\n');
+}
+
+void GCNMinRegScheduler::releaseSuccessors(const SUnit* SU, int Priority) {
+ for (const auto &S : SU->Succs) {
+ auto SuccSU = S.getSUnit();
+ if (S.isWeak())
+ continue;
+ assert(SuccSU->isBoundaryNode() || getNumPreds(SuccSU) > 0);
+ if (!SuccSU->isBoundaryNode() && decNumPreds(SuccSU) == 0)
+ RQ.push_front(*new (Alloc.Allocate()) Candidate(SuccSU, Priority));
+ }
+}
+
+std::vector<const SUnit*>
+GCNMinRegScheduler::schedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG) {
+ const auto &SUnits = DAG.SUnits;
+ std::vector<const SUnit*> Schedule;
+ Schedule.reserve(SUnits.size());
+
+ initNumPreds(SUnits);
+
+ int StepNo = 0;
+
+ for (auto SU : TopRoots) {
+ RQ.push_back(*new (Alloc.Allocate()) Candidate(SU, StepNo));
+ }
+ releaseSuccessors(&DAG.EntrySU, StepNo);
+
+ while (!RQ.empty()) {
+ DEBUG(
+ dbgs() << "\n=== Picking candidate, Step = " << StepNo << "\n"
+ "Ready queue:";
+ for (auto &C : RQ)
+ dbgs() << ' ' << C.SU->NodeNum << "(P" << C.Priority << ')';
+ dbgs() << '\n';
+ );
+
+ auto C = pickCandidate();
+ assert(C);
+ RQ.remove(*C);
+ auto SU = C->SU;
+ DEBUG(dbgs() << "Selected "; SU->dump(&DAG));
+
+ releaseSuccessors(SU, StepNo);
+ Schedule.push_back(SU);
+ setIsScheduled(SU);
+
+ if (getReadySuccessors(SU) == 0)
+ bumpPredsPriority(SU, StepNo);
+
+ ++StepNo;
+ }
+ assert(SUnits.size() == Schedule.size());
+
+ return Schedule;
+}
+
+namespace llvm {
+std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots,
+ const ScheduleDAG &DAG) {
+ GCNMinRegScheduler S;
+ return S.schedule(TopRoots, DAG);
+}
+}
diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp
new file mode 100644
index 000000000000..4ecfa118fb27
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -0,0 +1,355 @@
+//===------------------------- GCNRegPressure.cpp - -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#include "GCNRegPressure.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void llvm::printLivesAt(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ dbgs() << "Live regs at " << SI << ": "
+ << *LIS.getInstructionFromIndex(SI);
+ unsigned Num = 0;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ const auto &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ bool firstTime = true;
+ for (const auto &S : LI.subranges()) {
+ if (!S.liveAt(SI)) continue;
+ if (firstTime) {
+ dbgs() << " " << PrintReg(Reg, MRI.getTargetRegisterInfo())
+ << '\n';
+ firstTime = false;
+ }
+ dbgs() << " " << S << '\n';
+ ++Num;
+ }
+ } else if (LI.liveAt(SI)) {
+ dbgs() << " " << LI << '\n';
+ ++Num;
+ }
+ }
+ if (!Num) dbgs() << " <none>\n";
+}
+
+static bool isEqual(const GCNRPTracker::LiveRegSet &S1,
+ const GCNRPTracker::LiveRegSet &S2) {
+ if (S1.size() != S2.size())
+ return false;
+
+ for (const auto &P : S1) {
+ auto I = S2.find(P.first);
+ if (I == S2.end() || I->second != P.second)
+ return false;
+ }
+ return true;
+}
+
+static GCNRPTracker::LiveRegSet
+stripEmpty(const GCNRPTracker::LiveRegSet &LR) {
+ GCNRPTracker::LiveRegSet Res;
+ for (const auto &P : LR) {
+ if (P.second.any())
+ Res.insert(P);
+ }
+ return Res;
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// GCNRegPressure
+
+unsigned GCNRegPressure::getRegKind(unsigned Reg,
+ const MachineRegisterInfo &MRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ const auto RC = MRI.getRegClass(Reg);
+ auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+ return STI->isSGPRClass(RC) ?
+ (RC->getSize() == 4 ? SGPR32 : SGPR_TUPLE) :
+ (RC->getSize() == 4 ? VGPR32 : VGPR_TUPLE);
+}
+
+void GCNRegPressure::inc(unsigned Reg,
+ LaneBitmask PrevMask,
+ LaneBitmask NewMask,
+ const MachineRegisterInfo &MRI) {
+ if (NewMask == PrevMask)
+ return;
+
+ int Sign = 1;
+ if (NewMask < PrevMask) {
+ std::swap(NewMask, PrevMask);
+ Sign = -1;
+ }
+#ifndef NDEBUG
+ const auto MaxMask = MRI.getMaxLaneMaskForVReg(Reg);
+#endif
+ switch (auto Kind = getRegKind(Reg, MRI)) {
+ case SGPR32:
+ case VGPR32:
+ assert(PrevMask.none() && NewMask == MaxMask);
+ Value[Kind] += Sign;
+ break;
+
+ case SGPR_TUPLE:
+ case VGPR_TUPLE:
+ assert(NewMask < MaxMask || NewMask == MaxMask);
+ assert(PrevMask < NewMask);
+
+ Value[Kind == SGPR_TUPLE ? SGPR32 : VGPR32] +=
+ Sign * countPopulation((~PrevMask & NewMask).getAsInteger());
+
+ if (PrevMask.none()) {
+ assert(NewMask.any());
+ Value[Kind] += Sign * MRI.getPressureSets(Reg).getWeight();
+ }
+ break;
+
+ default: llvm_unreachable("Unknown register kind");
+ }
+}
+
+bool GCNRegPressure::less(const SISubtarget &ST,
+ const GCNRegPressure& O,
+ unsigned MaxOccupancy) const {
+ const auto SGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumSGPRs(getSGRPNum()));
+ const auto VGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumVGPRs(getVGRPNum()));
+ const auto OtherSGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumSGPRs(O.getSGRPNum()));
+ const auto OtherVGPROcc = std::min(MaxOccupancy,
+ ST.getOccupancyWithNumVGPRs(O.getVGRPNum()));
+
+ const auto Occ = std::min(SGPROcc, VGPROcc);
+ const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
+ if (Occ != OtherOcc)
+ return Occ > OtherOcc;
+
+ bool SGPRImportant = SGPROcc < VGPROcc;
+ const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;
+
+ // if both pressures disagree on what is more important compare vgprs
+ if (SGPRImportant != OtherSGPRImportant) {
+ SGPRImportant = false;
+ }
+
+ // compare large regs pressure
+ bool SGPRFirst = SGPRImportant;
+ for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
+ if (SGPRFirst) {
+ auto SW = getSGPRTuplesWeight();
+ auto OtherSW = O.getSGPRTuplesWeight();
+ if (SW != OtherSW)
+ return SW < OtherSW;
+ } else {
+ auto VW = getVGPRTuplesWeight();
+ auto OtherVW = O.getVGPRTuplesWeight();
+ if (VW != OtherVW)
+ return VW < OtherVW;
+ }
+ }
+ return SGPRImportant ? (getSGRPNum() < O.getSGRPNum()):
+ (getVGRPNum() < O.getVGRPNum());
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+void GCNRegPressure::print(raw_ostream &OS, const SISubtarget *ST) const {
+ OS << "VGPRs: " << getVGRPNum();
+ if (ST) OS << "(O" << ST->getOccupancyWithNumVGPRs(getVGRPNum()) << ')';
+ OS << ", SGPRs: " << getSGRPNum();
+ if (ST) OS << "(O" << ST->getOccupancyWithNumSGPRs(getSGRPNum()) << ')';
+ OS << ", LVGPR WT: " << getVGPRTuplesWeight()
+ << ", LSGPR WT: " << getSGPRTuplesWeight();
+ if (ST) OS << " -> Occ: " << getOccupancy(*ST);
+ OS << '\n';
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// GCNRPTracker
+
+LaneBitmask llvm::getLiveLaneMask(unsigned Reg,
+ SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ assert(!MRI.reg_nodbg_empty(Reg));
+ LaneBitmask LiveMask;
+ const auto &LI = LIS.getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ for (const auto &S : LI.subranges())
+ if (S.liveAt(SI)) {
+ LiveMask |= S.LaneMask;
+ assert(LiveMask < MRI.getMaxLaneMaskForVReg(Reg) ||
+ LiveMask == MRI.getMaxLaneMaskForVReg(Reg));
+ }
+ } else if (LI.liveAt(SI)) {
+ LiveMask = MRI.getMaxLaneMaskForVReg(Reg);
+ }
+ return LiveMask;
+}
+
+GCNRPTracker::LiveRegSet llvm::getLiveRegs(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI) {
+ GCNRPTracker::LiveRegSet LiveRegs;
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ auto Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ auto LiveMask = getLiveLaneMask(Reg, SI, LIS, MRI);
+ if (LiveMask.any())
+ LiveRegs[Reg] = LiveMask;
+ }
+ return LiveRegs;
+}
+
+void GCNUpwardRPTracker::reset(const MachineInstr &MI) {
+ MRI = &MI.getParent()->getParent()->getRegInfo();
+ LiveRegs = getLiveRegsAfter(MI, LIS);
+ MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
+}
+
+LaneBitmask GCNUpwardRPTracker::getDefRegMask(const MachineOperand &MO) const {
+ assert(MO.isDef() && MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+
+ // We don't rely on read-undef flag because in case of tentative schedule
+ // tracking it isn't set correctly yet. This works correctly however since
+ // use mask has been tracked before using LIS.
+ return MO.getSubReg() == 0 ?
+ MRI->getMaxLaneMaskForVReg(MO.getReg()) :
+ MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(MO.getSubReg());
+}
+
+LaneBitmask GCNUpwardRPTracker::getUsedRegMask(const MachineOperand &MO) const {
+ assert(MO.isUse() && MO.isReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()));
+
+ if (auto SubReg = MO.getSubReg())
+ return MRI->getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg);
+
+ auto MaxMask = MRI->getMaxLaneMaskForVReg(MO.getReg());
+ if (MaxMask.getAsInteger() == 1) // cannot have subregs
+ return MaxMask;
+
+ // For a tentative schedule LIS isn't updated yet but livemask should remain
+ // the same on any schedule. Subreg defs can be reordered but they all must
+ // dominate uses anyway.
+ auto SI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
+ return getLiveLaneMask(MO.getReg(), SI, LIS, *MRI);
+}
+
+void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
+ assert(MRI && "call reset first");
+
+ LastTrackedMI = &MI;
+
+ if (MI.isDebugValue())
+ return;
+
+ // process all defs first to ensure early clobbers are handled correctly
+ // iterating over operands() to catch implicit defs
+ for (const auto &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() ||
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ auto Reg = MO.getReg();
+ auto &LiveMask = LiveRegs[Reg];
+ auto PrevMask = LiveMask;
+ LiveMask &= ~getDefRegMask(MO);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ }
+
+ // then all uses
+ for (const auto &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.readsReg() ||
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ auto Reg = MO.getReg();
+ auto &LiveMask = LiveRegs[Reg];
+ auto PrevMask = LiveMask;
+ LiveMask |= getUsedRegMask(MO);
+ CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
+ }
+
+ MaxPressure = max(MaxPressure, CurPressure);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
+static void reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
+ const GCNRPTracker::LiveRegSet &TrackedLR,
+ const TargetRegisterInfo *TRI) {
+ for (auto const &P : TrackedLR) {
+ auto I = LISLR.find(P.first);
+ if (I == LISLR.end()) {
+ dbgs() << " " << PrintReg(P.first, TRI)
+ << ":L" << PrintLaneMask(P.second)
+ << " isn't found in LIS reported set\n";
+ }
+ else if (I->second != P.second) {
+ dbgs() << " " << PrintReg(P.first, TRI)
+ << " masks doesn't match: LIS reported "
+ << PrintLaneMask(I->second)
+ << ", tracked "
+ << PrintLaneMask(P.second)
+ << '\n';
+ }
+ }
+ for (auto const &P : LISLR) {
+ auto I = TrackedLR.find(P.first);
+ if (I == TrackedLR.end()) {
+ dbgs() << " " << PrintReg(P.first, TRI)
+ << ":L" << PrintLaneMask(P.second)
+ << " isn't found in tracked set\n";
+ }
+ }
+}
+
+bool GCNUpwardRPTracker::isValid() const {
+ const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
+ const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
+ const auto TrackedLR = stripEmpty(LiveRegs);
+
+ if (!isEqual(LISLR, TrackedLR)) {
+ dbgs() << "\nGCNUpwardRPTracker error: Tracked and"
+ " LIS reported livesets mismatch:\n";
+ printLivesAt(SI, LIS, *MRI);
+ reportMismatch(LISLR, TrackedLR, MRI->getTargetRegisterInfo());
+ return false;
+ }
+
+ auto LISPressure = getRegPressure(*MRI, LISLR);
+ if (LISPressure != CurPressure) {
+ dbgs() << "GCNUpwardRPTracker error: Pressure sets different\nTracked: ";
+ CurPressure.print(dbgs());
+ dbgs() << "LIS rpt: ";
+ LISPressure.print(dbgs());
+ return false;
+ }
+ return true;
+}
+
+#endif
diff --git a/lib/Target/AMDGPU/GCNRegPressure.h b/lib/Target/AMDGPU/GCNRegPressure.h
new file mode 100644
index 000000000000..82e76a7bfddc
--- /dev/null
+++ b/lib/Target/AMDGPU/GCNRegPressure.h
@@ -0,0 +1,170 @@
+//===---------------------- GCNRegPressure.h -*- C++ -*--------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
+
+#include "AMDGPUSubtarget.h"
+
+#include <limits>
+
+namespace llvm {
+
+struct GCNRegPressure {
+ enum RegKind {
+ SGPR32,
+ SGPR_TUPLE,
+ VGPR32,
+ VGPR_TUPLE,
+ TOTAL_KINDS
+ };
+
+ GCNRegPressure() {
+ clear();
+ }
+
+ bool empty() const { return getSGRPNum() == 0 && getVGRPNum() == 0; }
+
+ void clear() { std::fill(&Value[0], &Value[TOTAL_KINDS], 0); }
+
+ unsigned getSGRPNum() const { return Value[SGPR32]; }
+ unsigned getVGRPNum() const { return Value[VGPR32]; }
+
+ unsigned getVGPRTuplesWeight() const { return Value[VGPR_TUPLE]; }
+ unsigned getSGPRTuplesWeight() const { return Value[SGPR_TUPLE]; }
+
+ unsigned getOccupancy(const SISubtarget &ST) const {
+ return std::min(ST.getOccupancyWithNumSGPRs(getSGRPNum()),
+ ST.getOccupancyWithNumVGPRs(getVGRPNum()));
+ }
+
+ void inc(unsigned Reg,
+ LaneBitmask PrevMask,
+ LaneBitmask NewMask,
+ const MachineRegisterInfo &MRI);
+
+ bool higherOccupancy(const SISubtarget &ST, const GCNRegPressure& O) const {
+ return getOccupancy(ST) > O.getOccupancy(ST);
+ }
+
+ bool less(const SISubtarget &ST, const GCNRegPressure& O,
+ unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
+
+ bool operator==(const GCNRegPressure &O) const {
+ return std::equal(&Value[0], &Value[TOTAL_KINDS], O.Value);
+ }
+
+ bool operator!=(const GCNRegPressure &O) const {
+ return !(*this == O);
+ }
+
+ void print(raw_ostream &OS, const SISubtarget *ST=nullptr) const;
+ void dump() const { print(dbgs()); }
+
+private:
+ unsigned Value[TOTAL_KINDS];
+
+ static unsigned getRegKind(unsigned Reg, const MachineRegisterInfo &MRI);
+
+ friend GCNRegPressure max(const GCNRegPressure &P1,
+ const GCNRegPressure &P2);
+};
+
+inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
+ GCNRegPressure Res;
+ for (unsigned I = 0; I < GCNRegPressure::TOTAL_KINDS; ++I)
+ Res.Value[I] = std::max(P1.Value[I], P2.Value[I]);
+ return Res;
+}
+
+class GCNRPTracker {
+public:
+ typedef DenseMap<unsigned, LaneBitmask> LiveRegSet;
+
+protected:
+ LiveRegSet LiveRegs;
+ GCNRegPressure CurPressure, MaxPressure;
+ const MachineInstr *LastTrackedMI = nullptr;
+ mutable const MachineRegisterInfo *MRI = nullptr;
+ GCNRPTracker() {}
+public:
+ // live regs for the current state
+ const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
+ const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }
+
+ // returns MaxPressure, resetting it
+ decltype(MaxPressure) moveMaxPressure() {
+ auto Res = MaxPressure;
+ MaxPressure.clear();
+ return Res;
+ }
+ decltype(LiveRegs) moveLiveRegs() {
+ return std::move(LiveRegs);
+ }
+};
+
+class GCNUpwardRPTracker : public GCNRPTracker {
+ const LiveIntervals &LIS;
+ LaneBitmask getDefRegMask(const MachineOperand &MO) const;
+ LaneBitmask getUsedRegMask(const MachineOperand &MO) const;
+public:
+ GCNUpwardRPTracker(const LiveIntervals &LIS_) : LIS(LIS_) {}
+ // reset tracker to the point just below MI
+ // filling live regs upon this point using LIS
+ void reset(const MachineInstr &MI);
+
+ // move to the state just above the MI
+ void recede(const MachineInstr &MI);
+
+ // checks whether the tracker's state after receding MI corresponds
+ // to reported by LIS
+ bool isValid() const;
+};
+
+LaneBitmask getLiveLaneMask(unsigned Reg,
+ SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
+GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
+inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS,
+ MI.getParent()->getParent()->getRegInfo());
+}
+
+inline GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI,
+ const LiveIntervals &LIS) {
+ return getLiveRegs(LIS.getInstructionIndex(MI).getBaseIndex(), LIS,
+ MI.getParent()->getParent()->getRegInfo());
+}
+
+template <typename Range>
+GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI,
+ Range &&LiveRegs) {
+ GCNRegPressure Res;
+ for (const auto &RM : LiveRegs)
+ Res.inc(RM.first, LaneBitmask::getNone(), RM.second, MRI);
+ return Res;
+}
+
+void printLivesAt(SlotIndex SI,
+ const LiveIntervals &LIS,
+ const MachineRegisterInfo &MRI);
+
+} // End namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNREGPRESSURE_H
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 2f88033c807f..ea305a92fc60 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -18,6 +18,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/Support/MathExtras.h"
#define DEBUG_TYPE "misched"
@@ -25,7 +26,7 @@ using namespace llvm;
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C) :
- GenericScheduler(C) { }
+ GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { }
static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
const MachineFunction &MF) {
@@ -35,18 +36,46 @@ static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
ST.getOccupancyWithNumVGPRs(VGPRs));
return std::min(MinRegOccupancy,
- ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
+ ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
+ *MF.getFunction()));
+}
+
+void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
+ GenericScheduler::initialize(DAG);
+
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+
+ MF = &DAG->MF;
+
+ const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+
+ // FIXME: This is also necessary, because some passes that run after
+ // scheduling and before regalloc increase register pressure.
+ const int ErrorMargin = 3;
+
+ SGPRExcessLimit = Context->RegClassInfo
+ ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
+ VGPRExcessLimit = Context->RegClassInfo
+ ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
+ if (TargetOccupancy) {
+ SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
+ VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
+ } else {
+ SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
+ SRI->getSGPRPressureSet());
+ VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
+ SRI->getVGPRPressureSet());
+ }
+
+ SGPRCriticalLimit -= ErrorMargin;
+ VGPRCriticalLimit -= ErrorMargin;
}
void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop, const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
- int SGPRPressure,
- int VGPRPressure,
- int SGPRExcessLimit,
- int VGPRExcessLimit,
- int SGPRCriticalLimit,
- int VGPRCriticalLimit) {
+ unsigned SGPRPressure,
+ unsigned VGPRPressure) {
Cand.SU = SU;
Cand.AtTop = AtTop;
@@ -66,8 +95,8 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
}
- int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
- int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+ unsigned NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+ unsigned NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
// If two instructions increase the pressure of different register sets
// by the same amount, the generic scheduler will prefer to schedule the
@@ -77,7 +106,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// only for VGPRs or only for SGPRs.
// FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
- const int MaxVGPRPressureInc = 16;
+ const unsigned MaxVGPRPressureInc = 16;
bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
@@ -86,11 +115,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// to increase the likelihood we don't go over the limits. We should improve
// the analysis to look through dependencies to find the path with the least
// register pressure.
- // FIXME: This is also necessary, because some passes that run after
- // scheduling and before regalloc increase register pressure.
- const int ErrorMargin = 3;
- VGPRExcessLimit -= ErrorMargin;
- SGPRExcessLimit -= ErrorMargin;
// We only need to update the RPDelata for instructions that increase
// register pressure. Instructions that decrease or keep reg pressure
@@ -103,7 +127,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
- Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);
+ Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
}
// Register pressure is considered 'CRITICAL' if it is approaching a value
@@ -111,9 +135,6 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
// has the same cost, so we don't need to prefer one over the other.
- VGPRCriticalLimit -= ErrorMargin;
- SGPRCriticalLimit -= ErrorMargin;
-
int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
@@ -134,27 +155,16 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
const CandPolicy &ZonePolicy,
const RegPressureTracker &RPTracker,
SchedCandidate &Cand) {
- const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
- unsigned SGPRExcessLimit =
- Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
- unsigned VGPRExcessLimit =
- Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
- unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
- unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
- unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
-
ReadyQueue &Q = Zone.Available;
for (SUnit *SU : Q) {
SchedCandidate TryCand(ZonePolicy);
initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
- SGPRPressure, VGPRPressure,
- SGPRExcessLimit, VGPRExcessLimit,
- SGPRCriticalLimit, VGPRCriticalLimit);
+ SGPRPressure, VGPRPressure);
// Pass SchedBoundary only when comparing nodes from the same boundary.
SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
@@ -167,16 +177,6 @@ void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
}
}
-static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
- switch (Reason) {
- default:
- return Reason;
- case GenericSchedulerBase::RegCritical:
- case GenericSchedulerBase::RegExcess:
- return -Reason;
- }
-}
-
// This function is mostly cut and pasted from
// GenericScheduler::pickNodeBidirectional()
SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
@@ -224,9 +224,9 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
// Pick best from BotCand and TopCand.
DEBUG(
dbgs() << "Top Cand: ";
- traceCandidate(BotCand);
- dbgs() << "Bot Cand: ";
traceCandidate(TopCand);
+ dbgs() << "Bot Cand: ";
+ traceCandidate(BotCand);
);
SchedCandidate Cand;
if (TopCand.Reason == BotCand.Reason) {
@@ -249,9 +249,7 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
} else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
Cand = BotCand;
} else {
- int TopRank = getBidirectionalReasonRank(TopCand.Reason);
- int BotRank = getBidirectionalReasonRank(BotCand.Reason);
- if (TopRank > BotRank) {
+ if (BotCand.Reason > TopCand.Reason) {
Cand = TopCand;
} else {
Cand = BotCand;
@@ -310,3 +308,255 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
return SU;
}
+
+GCNScheduleDAGMILive::GCNScheduleDAGMILive(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S) :
+ ScheduleDAGMILive(C, std::move(S)),
+ ST(MF.getSubtarget<SISubtarget>()),
+ MFI(*MF.getInfo<SIMachineFunctionInfo>()),
+ StartingOccupancy(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(),
+ *MF.getFunction())),
+ MinOccupancy(StartingOccupancy), Stage(0) {
+
+ DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
+}
+
+void GCNScheduleDAGMILive::schedule() {
+ std::vector<MachineInstr*> Unsched;
+ Unsched.reserve(NumRegionInstrs);
+ for (auto &I : *this)
+ Unsched.push_back(&I);
+
+ std::pair<unsigned, unsigned> PressureBefore;
+ if (LIS) {
+ DEBUG(dbgs() << "Pressure before scheduling:\n");
+ discoverLiveIns();
+ PressureBefore = getRealRegPressure();
+ }
+
+ ScheduleDAGMILive::schedule();
+ if (Stage == 0)
+ Regions.push_back(std::make_pair(RegionBegin, RegionEnd));
+
+ if (!LIS)
+ return;
+
+ // Check the results of scheduling.
+ GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
+ DEBUG(dbgs() << "Pressure after scheduling:\n");
+ auto PressureAfter = getRealRegPressure();
+ LiveIns.clear();
+
+ if (PressureAfter.first <= S.SGPRCriticalLimit &&
+ PressureAfter.second <= S.VGPRCriticalLimit) {
+ DEBUG(dbgs() << "Pressure in desired limits, done.\n");
+ return;
+ }
+ unsigned WavesAfter = getMaxWaves(PressureAfter.first,
+ PressureAfter.second, MF);
+ unsigned WavesBefore = getMaxWaves(PressureBefore.first,
+ PressureBefore.second, MF);
+ DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore <<
+ ", after " << WavesAfter << ".\n");
+
+ // We could not keep current target occupancy because of the just scheduled
+ // region. Record new occupancy for next scheduling cycle.
+ unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
+ if (NewOccupancy < MinOccupancy) {
+ MinOccupancy = NewOccupancy;
+ DEBUG(dbgs() << "Occupancy lowered for the function to "
+ << MinOccupancy << ".\n");
+ }
+
+ if (WavesAfter >= WavesBefore)
+ return;
+
+ DEBUG(dbgs() << "Attempting to revert scheduling.\n");
+ RegionEnd = RegionBegin;
+ for (MachineInstr *MI : Unsched) {
+ if (MI->getIterator() != RegionEnd) {
+ BB->remove(MI);
+ BB->insert(RegionEnd, MI);
+ LIS->handleMove(*MI, true);
+ }
+ // Reset read-undef flags and update them later.
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Op.setIsUndef(false);
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
+ if (ShouldTrackLaneMasks) {
+ // Adjust liveness and add missing dead+read-undef flags.
+ SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
+ RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
+ } else {
+ // Adjust for missing dead-def flags.
+ RegOpers.detectDeadDefs(*MI, *LIS);
+ }
+ RegionEnd = MI->getIterator();
+ ++RegionEnd;
+ DEBUG(dbgs() << "Scheduling " << *MI);
+ }
+ RegionBegin = Unsched.front()->getIterator();
+ if (Stage == 0)
+ Regions.back() = std::make_pair(RegionBegin, RegionEnd);
+
+ placeDebugValues();
+}
+
+static inline void setMask(const MachineRegisterInfo &MRI,
+ const SIRegisterInfo *SRI, unsigned Reg,
+ LaneBitmask &PrevMask, LaneBitmask NewMask,
+ unsigned &SGPRs, unsigned &VGPRs) {
+ int NewRegs = countPopulation(NewMask.getAsInteger()) -
+ countPopulation(PrevMask.getAsInteger());
+ if (SRI->isSGPRReg(MRI, Reg))
+ SGPRs += NewRegs;
+ if (SRI->isVGPR(MRI, Reg))
+ VGPRs += NewRegs;
+ assert ((int)SGPRs >= 0 && (int)VGPRs >= 0);
+ PrevMask = NewMask;
+}
+
+void GCNScheduleDAGMILive::discoverLiveIns() {
+ unsigned SGPRs = 0;
+ unsigned VGPRs = 0;
+
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+ SlotIndex SI = LIS->getInstructionIndex(*begin()).getBaseIndex();
+ assert (SI.isValid());
+
+ DEBUG(dbgs() << "Region live-ins:");
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ if (MRI.reg_nodbg_empty(Reg))
+ continue;
+ const LiveInterval &LI = LIS->getInterval(Reg);
+ LaneBitmask LaneMask = LaneBitmask::getNone();
+ if (LI.hasSubRanges()) {
+ for (const auto &S : LI.subranges())
+ if (S.liveAt(SI))
+ LaneMask |= S.LaneMask;
+ } else if (LI.liveAt(SI)) {
+ LaneMask = MRI.getMaxLaneMaskForVReg(Reg);
+ }
+
+ if (LaneMask.any()) {
+ setMask(MRI, SRI, Reg, LiveIns[Reg], LaneMask, SGPRs, VGPRs);
+
+ DEBUG(dbgs() << ' ' << PrintVRegOrUnit(Reg, SRI) << ':'
+ << PrintLaneMask(LiveIns[Reg]));
+ }
+ }
+
+ LiveInPressure = std::make_pair(SGPRs, VGPRs);
+
+ DEBUG(dbgs() << "\nLive-in pressure:\nSGPR = " << SGPRs
+ << "\nVGPR = " << VGPRs << '\n');
+}
+
+std::pair<unsigned, unsigned>
+GCNScheduleDAGMILive::getRealRegPressure() const {
+ unsigned SGPRs, MaxSGPRs, VGPRs, MaxVGPRs;
+ SGPRs = MaxSGPRs = LiveInPressure.first;
+ VGPRs = MaxVGPRs = LiveInPressure.second;
+
+ const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+ DenseMap<unsigned, LaneBitmask> LiveRegs(LiveIns);
+
+ for (const MachineInstr &MI : *this) {
+ if (MI.isDebugValue())
+ continue;
+ SlotIndex SI = LIS->getInstructionIndex(MI).getBaseIndex();
+ assert (SI.isValid());
+
+ // Remove dead registers or mask bits.
+ for (auto &It : LiveRegs) {
+ if (It.second.none())
+ continue;
+ const LiveInterval &LI = LIS->getInterval(It.first);
+ if (LI.hasSubRanges()) {
+ for (const auto &S : LI.subranges())
+ if (!S.liveAt(SI))
+ setMask(MRI, SRI, It.first, It.second, It.second & ~S.LaneMask,
+ SGPRs, VGPRs);
+ } else if (!LI.liveAt(SI)) {
+ setMask(MRI, SRI, It.first, It.second, LaneBitmask::getNone(),
+ SGPRs, VGPRs);
+ }
+ }
+
+ // Add new registers or mask bits.
+ for (const auto &MO : MI.defs()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ unsigned SubRegIdx = MO.getSubReg();
+ LaneBitmask LaneMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI.getMaxLaneMaskForVReg(Reg);
+ LaneBitmask &LM = LiveRegs[Reg];
+ setMask(MRI, SRI, Reg, LM, LM | LaneMask, SGPRs, VGPRs);
+ }
+ MaxSGPRs = std::max(MaxSGPRs, SGPRs);
+ MaxVGPRs = std::max(MaxVGPRs, VGPRs);
+ }
+
+ DEBUG(dbgs() << "Real region's register pressure:\nSGPR = " << MaxSGPRs
+ << "\nVGPR = " << MaxVGPRs << '\n');
+
+ return std::make_pair(MaxSGPRs, MaxVGPRs);
+}
+
+void GCNScheduleDAGMILive::finalizeSchedule() {
+ // Retry function scheduling if we found resulting occupancy and it is
+ // lower than used for first pass scheduling. This will give more freedom
+ // to schedule low register pressure blocks.
+ // Code is partially copied from MachineSchedulerBase::scheduleRegions().
+
+ if (!LIS || StartingOccupancy <= MinOccupancy)
+ return;
+
+ DEBUG(dbgs() << "Retrying function scheduling with lowest recorded occupancy "
+ << MinOccupancy << ".\n");
+
+ Stage++;
+ GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
+ S.setTargetOccupancy(MinOccupancy);
+
+ MachineBasicBlock *MBB = nullptr;
+ for (auto Region : Regions) {
+ RegionBegin = Region.first;
+ RegionEnd = Region.second;
+
+ if (RegionBegin->getParent() != MBB) {
+ if (MBB) finishBlock();
+ MBB = RegionBegin->getParent();
+ startBlock(MBB);
+ }
+
+ unsigned NumRegionInstrs = std::distance(begin(), end());
+ enterRegion(MBB, begin(), end(), NumRegionInstrs);
+
+ // Skip empty scheduling regions (0 or 1 schedulable instructions).
+ if (begin() == end() || begin() == std::prev(end())) {
+ exitRegion();
+ continue;
+ }
+ DEBUG(dbgs() << "********** MI Scheduling **********\n");
+ DEBUG(dbgs() << MF.getName()
+ << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+ << "\n From: " << *begin() << " To: ";
+ if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+ else dbgs() << "End";
+ dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
+
+ schedule();
+
+ exitRegion();
+ }
+ finishBlock();
+ LiveIns.shrink_and_clear();
+}
diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h
index 4cfc0cea81fb..15af232704ff 100644
--- a/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -18,13 +18,16 @@
namespace llvm {
+class SIMachineFunctionInfo;
class SIRegisterInfo;
+class SISubtarget;
/// This is a minimal scheduler strategy. The main difference between this
/// and the GenericScheduler is that GCNSchedStrategy uses different
/// heuristics to determine excess/critical pressure sets. Its goal is to
/// maximize kernel occupancy (i.e. maximum number of waves per simd).
class GCNMaxOccupancySchedStrategy : public GenericScheduler {
+ friend class GCNScheduleDAGMILive;
SUnit *pickNodeBidirectional(bool &IsTopNode);
@@ -35,18 +38,65 @@ class GCNMaxOccupancySchedStrategy : public GenericScheduler {
void initCandidate(SchedCandidate &Cand, SUnit *SU,
bool AtTop, const RegPressureTracker &RPTracker,
const SIRegisterInfo *SRI,
- int SGPRPressure, int VGPRPressure,
- int SGPRExcessLimit, int VGPRExcessLimit,
- int SGPRCriticalLimit, int VGPRCriticalLimit);
+ unsigned SGPRPressure, unsigned VGPRPressure);
- void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
- SchedBoundary *Zone, const SIRegisterInfo *SRI,
- unsigned SGPRPressure, unsigned VGPRPressure);
+ unsigned SGPRExcessLimit;
+ unsigned VGPRExcessLimit;
+ unsigned SGPRCriticalLimit;
+ unsigned VGPRCriticalLimit;
+
+ unsigned TargetOccupancy;
+
+ MachineFunction *MF;
public:
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
SUnit *pickNode(bool &IsTopNode) override;
+
+ void initialize(ScheduleDAGMI *DAG) override;
+
+ void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
+};
+
+class GCNScheduleDAGMILive : public ScheduleDAGMILive {
+
+ const SISubtarget &ST;
+
+ const SIMachineFunctionInfo &MFI;
+
+ // Occupancy target at the begining of function scheduling cycle.
+ unsigned StartingOccupancy;
+
+ // Minimal real occupancy recorder for the function.
+ unsigned MinOccupancy;
+
+ // Scheduling stage number.
+ unsigned Stage;
+
+ // Vecor of regions recorder for later rescheduling
+ SmallVector<std::pair<MachineBasicBlock::iterator,
+ MachineBasicBlock::iterator>, 32> Regions;
+
+ // Region live-ins.
+ DenseMap<unsigned, LaneBitmask> LiveIns;
+
+ // Number of live-ins to the current region, first SGPR then VGPR.
+ std::pair<unsigned, unsigned> LiveInPressure;
+
+ // Collect current region live-ins.
+ void discoverLiveIns();
+
+ // Return current region pressure. First value is SGPR number, second is VGPR.
+ std::pair<unsigned, unsigned> getRealRegPressure() const;
+
+public:
+ GCNScheduleDAGMILive(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S);
+
+ void schedule() override;
+
+ void finalizeSchedule() override;
};
} // End namespace llvm
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 7172a0aa7167..a817ff3cbaf0 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -113,7 +113,7 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
uint16_t Imm = MI->getOperand(OpNo).getImm();
if (Imm != 0) {
- O << " offset:";
+ O << ((OpNo == 0)? "offset:" : " offset:");
printU16ImmDecOperand(MI, OpNo, O);
}
}
@@ -375,6 +375,14 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
O << formatHex(static_cast<uint64_t>(Imm));
}
+void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ uint16_t Lo16 = static_cast<uint16_t>(Imm);
+ assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
+ printImmediate16(Lo16, STI, O);
+}
+
void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -489,6 +497,10 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_IMM_FP16:
printImmediate16(Op.getImm(), STI, O);
break;
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ printImmediateV216(Op.getImm(), STI, O);
+ break;
case MCOI::OPERAND_UNKNOWN:
case MCOI::OPERAND_PCREL:
O << formatDec(Op.getImm());
@@ -531,13 +543,34 @@ void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned InputModifiers = MI->getOperand(OpNo).getImm();
- if (InputModifiers & SISrcMods::NEG)
- O << '-';
+
+ // Use 'neg(...)' instead of '-' to avoid ambiguity.
+ // This is important for integer literals because
+ // -1 is not the same value as neg(1).
+ bool NegMnemo = false;
+
+ if (InputModifiers & SISrcMods::NEG) {
+ if (OpNo + 1 < MI->getNumOperands() &&
+ (InputModifiers & SISrcMods::ABS) == 0) {
+ const MCOperand &Op = MI->getOperand(OpNo + 1);
+ NegMnemo = Op.isImm() || Op.isFPImm();
+ }
+ if (NegMnemo) {
+ O << "neg(";
+ } else {
+ O << '-';
+ }
+ }
+
if (InputModifiers & SISrcMods::ABS)
O << '|';
printOperand(MI, OpNo + 1, STI, O);
if (InputModifiers & SISrcMods::ABS)
O << '|';
+
+ if (NegMnemo) {
+ O << ')';
+ }
}
void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
@@ -672,11 +705,19 @@ template <unsigned N>
void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- int EnIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::en);
+ unsigned Opc = MI->getOpcode();
+ int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
unsigned En = MI->getOperand(EnIdx).getImm();
- // FIXME: What do we do with compr? The meaning of en changes depending on if
- // compr is set.
+ int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
+
+ // If compr is set, print as src0, src0, src1, src1
+ if (MI->getOperand(ComprIdx).getImm()) {
+ if (N == 1 || N == 2)
+ --OpNo;
+ else if (N == 3)
+ OpNo -= 2;
+ }
if (En & (1 << N))
printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
@@ -730,6 +771,71 @@ void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
}
}
+static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod) {
+ int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
+
+ for (int I = 0; I < NumOps; ++I) {
+ if (!!(Ops[I] & Mod) != DefaultValue)
+ return false;
+ }
+
+ return true;
+}
+
+static void printPackedModifier(const MCInst *MI, StringRef Name, unsigned Mod,
+ raw_ostream &O) {
+ unsigned Opc = MI->getOpcode();
+ int NumOps = 0;
+ int Ops[3];
+
+ for (int OpName : { AMDGPU::OpName::src0_modifiers,
+ AMDGPU::OpName::src1_modifiers,
+ AMDGPU::OpName::src2_modifiers }) {
+ int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
+ if (Idx == -1)
+ break;
+
+ Ops[NumOps++] = MI->getOperand(Idx).getImm();
+ }
+
+ if (allOpsDefaultValue(Ops, NumOps, Mod))
+ return;
+
+ O << Name;
+ for (int I = 0; I < NumOps; ++I) {
+ if (I != 0)
+ O << ',';
+
+ O << !!(Ops[I] & Mod);
+ }
+
+ O << ']';
+}
+
+void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
+}
+
+void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
+}
+
+void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
+}
+
+void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1057,27 +1163,28 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- IsaVersion IV = getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
- decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
+ decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
bool NeedSpace = false;
- if (Vmcnt != getVmcntBitMask(IV)) {
+ if (Vmcnt != getVmcntBitMask(ISA)) {
O << "vmcnt(" << Vmcnt << ')';
NeedSpace = true;
}
- if (Expcnt != getExpcntBitMask(IV)) {
+ if (Expcnt != getExpcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "expcnt(" << Expcnt << ')';
NeedSpace = true;
}
- if (Lgkmcnt != getLgkmcntBitMask(IV)) {
+ if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "lgkmcnt(" << Lgkmcnt << ')';
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index a6d348ff0f12..c0b8e5c51089 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -90,6 +90,8 @@ private:
raw_ostream &O);
void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printImmediateV216(uint32_t Imm, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
raw_ostream &O);
void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
@@ -117,6 +119,14 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printOpSel(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printOpSelHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNegLo(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNegHi(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpSlot(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printInterpAttr(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/AMDGPU/LLVMBuild.txt b/lib/Target/AMDGPU/LLVMBuild.txt
index bbdd17737cf0..c54a13c4b4d8 100644
--- a/lib/Target/AMDGPU/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/LLVMBuild.txt
@@ -30,5 +30,5 @@ has_disassembler = 1
type = Library
name = AMDGPUCodeGen
parent = AMDGPU
-required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel
add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index ffb92aae599e..f3266fe82955 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -37,7 +37,7 @@ public:
bool &IsResolved) override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
@@ -131,7 +131,7 @@ void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm,
void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
if (!Value)
return; // Doesn't change encoding.
@@ -164,7 +164,20 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
}
bool AMDGPUAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- OW->WriteZeros(Count);
+ // If the count is not 4-byte aligned, we must be writing data into the text
+ // section (otherwise we have unaligned instructions, and thus have far
+ // bigger problems), so just write zeros instead.
+ OW->WriteZeros(Count % 4);
+
+ // We are properly aligned, so write NOPs as requested.
+ Count /= 4;
+
+ // FIXME: R600 support.
+ // s_nop 0
+ const uint32_t Encoded_S_NOP_0 = 0xbf800000;
+
+ for (uint64_t I = 0; I != Count; ++I)
+ OW->write32(Encoded_S_NOP_0);
return true;
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
new file mode 100644
index 000000000000..816e8c744b27
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
@@ -0,0 +1,422 @@
+//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata definitions and in-memory
+/// representations.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+
+#include <cstdint>
+#include <string>
+#include <system_error>
+#include <vector>
+
+namespace llvm {
+namespace AMDGPU {
+
+//===----------------------------------------------------------------------===//
+// Code Object Metadata.
+//===----------------------------------------------------------------------===//
+namespace CodeObject {
+
+/// \brief Code object metadata major version.
+constexpr uint32_t MetadataVersionMajor = 1;
+/// \brief Code object metadata minor version.
+constexpr uint32_t MetadataVersionMinor = 0;
+
+/// \brief Code object metadata beginning assembler directive.
+constexpr char MetadataAssemblerDirectiveBegin[] =
+ ".amdgpu_code_object_metadata";
+/// \brief Code object metadata ending assembler directive.
+constexpr char MetadataAssemblerDirectiveEnd[] =
+ ".end_amdgpu_code_object_metadata";
+
+/// \brief Access qualifiers.
+enum class AccessQualifier : uint8_t {
+ Default = 0,
+ ReadOnly = 1,
+ WriteOnly = 2,
+ ReadWrite = 3,
+ Unknown = 0xff
+};
+
+/// \brief Address space qualifiers.
+enum class AddressSpaceQualifier : uint8_t {
+ Private = 0,
+ Global = 1,
+ Constant = 2,
+ Local = 3,
+ Generic = 4,
+ Region = 5,
+ Unknown = 0xff
+};
+
+/// \brief Value kinds.
+enum class ValueKind : uint8_t {
+ ByValue = 0,
+ GlobalBuffer = 1,
+ DynamicSharedPointer = 2,
+ Sampler = 3,
+ Image = 4,
+ Pipe = 5,
+ Queue = 6,
+ HiddenGlobalOffsetX = 7,
+ HiddenGlobalOffsetY = 8,
+ HiddenGlobalOffsetZ = 9,
+ HiddenNone = 10,
+ HiddenPrintfBuffer = 11,
+ HiddenDefaultQueue = 12,
+ HiddenCompletionAction = 13,
+ Unknown = 0xff
+};
+
+/// \brief Value types.
+enum class ValueType : uint8_t {
+ Struct = 0,
+ I8 = 1,
+ U8 = 2,
+ I16 = 3,
+ U16 = 4,
+ F16 = 5,
+ I32 = 6,
+ U32 = 7,
+ F32 = 8,
+ I64 = 9,
+ U64 = 10,
+ F64 = 11,
+ Unknown = 0xff
+};
+
+//===----------------------------------------------------------------------===//
+// Kernel Metadata.
+//===----------------------------------------------------------------------===//
+namespace Kernel {
+
+//===----------------------------------------------------------------------===//
+// Kernel Attributes Metadata.
+//===----------------------------------------------------------------------===//
+namespace Attrs {
+
+namespace Key {
+/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
+constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
+/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
+constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
+/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
+constexpr char VecTypeHint[] = "VecTypeHint";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel attributes metadata.
+struct Metadata final {
+ /// \brief 'reqd_work_group_size' attribute. Optional.
+ std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
+ /// \brief 'work_group_size_hint' attribute. Optional.
+ std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
+ /// \brief 'vec_type_hint' attribute. Optional.
+ std::string mVecTypeHint = std::string();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \returns True if kernel attributes metadata is empty, false otherwise.
+ bool empty() const {
+ return mReqdWorkGroupSize.empty() &&
+ mWorkGroupSizeHint.empty() &&
+ mVecTypeHint.empty();
+ }
+
+ /// \returns True if kernel attributes metadata is not empty, false otherwise.
+ bool notEmpty() const {
+ return !empty();
+ }
+};
+
+} // end namespace Attrs
+
+//===----------------------------------------------------------------------===//
+// Kernel Argument Metadata.
+//===----------------------------------------------------------------------===//
+namespace Arg {
+
+namespace Key {
+/// \brief Key for Kernel::Arg::Metadata::mSize.
+constexpr char Size[] = "Size";
+/// \brief Key for Kernel::Arg::Metadata::mAlign.
+constexpr char Align[] = "Align";
+/// \brief Key for Kernel::Arg::Metadata::mValueKind.
+constexpr char ValueKind[] = "ValueKind";
+/// \brief Key for Kernel::Arg::Metadata::mValueType.
+constexpr char ValueType[] = "ValueType";
+/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
+constexpr char PointeeAlign[] = "PointeeAlign";
+/// \brief Key for Kernel::Arg::Metadata::mAccQual.
+constexpr char AccQual[] = "AccQual";
+/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
+constexpr char AddrSpaceQual[] = "AddrSpaceQual";
+/// \brief Key for Kernel::Arg::Metadata::mIsConst.
+constexpr char IsConst[] = "IsConst";
+/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
+constexpr char IsPipe[] = "IsPipe";
+/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
+constexpr char IsRestrict[] = "IsRestrict";
+/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
+constexpr char IsVolatile[] = "IsVolatile";
+/// \brief Key for Kernel::Arg::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Arg::Metadata::mTypeName.
+constexpr char TypeName[] = "TypeName";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel argument metadata.
+struct Metadata final {
+ /// \brief Size in bytes. Required.
+ uint32_t mSize = 0;
+ /// \brief Alignment in bytes. Required.
+ uint32_t mAlign = 0;
+ /// \brief Value kind. Required.
+ ValueKind mValueKind = ValueKind::Unknown;
+ /// \brief Value type. Required.
+ ValueType mValueType = ValueType::Unknown;
+ /// \brief Pointee alignment in bytes. Optional.
+ uint32_t mPointeeAlign = 0;
+ /// \brief Access qualifier. Optional.
+ AccessQualifier mAccQual = AccessQualifier::Unknown;
+ /// \brief Address space qualifier. Optional.
+ AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
+ /// \brief True if 'const' qualifier is specified. Optional.
+ bool mIsConst = false;
+ /// \brief True if 'pipe' qualifier is specified. Optional.
+ bool mIsPipe = false;
+ /// \brief True if 'restrict' qualifier is specified. Optional.
+ bool mIsRestrict = false;
+ /// \brief True if 'volatile' qualifier is specified. Optional.
+ bool mIsVolatile = false;
+ /// \brief Name. Optional.
+ std::string mName = std::string();
+ /// \brief Type name. Optional.
+ std::string mTypeName = std::string();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+};
+
+} // end namespace Arg
+
+//===----------------------------------------------------------------------===//
+// Kernel Code Properties Metadata.
+//===----------------------------------------------------------------------===//
+namespace CodeProps {
+
+namespace Key {
+/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize.
+constexpr char KernargSegmentSize[] = "KernargSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize.
+constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize.
+constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize";
+/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs.
+constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs";
+/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs.
+constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs";
+/// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign.
+constexpr char KernargSegmentAlign[] = "KernargSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign.
+constexpr char GroupSegmentAlign[] = "GroupSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign.
+constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign";
+/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize.
+constexpr char WavefrontSize[] = "WavefrontSize";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel code properties metadata.
+struct Metadata final {
+ /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory
+ /// holds the values of the arguments to the kernel. Optional.
+ uint64_t mKernargSegmentSize = 0;
+ /// \brief Size in bytes of the group segment memory required by a workgroup.
+ /// This value does not include any dynamically allocated group segment memory
+ /// that may be added when the kernel is dispatched. Optional.
+ uint32_t mWorkgroupGroupSegmentSize = 0;
+ /// \brief Size in bytes of the private segment memory required by a workitem.
+ /// Private segment memory includes arg, spill and private segments. Optional.
+ uint32_t mWorkitemPrivateSegmentSize = 0;
+ /// \brief Total number of SGPRs used by a wavefront. Optional.
+ uint16_t mWavefrontNumSGPRs = 0;
+ /// \brief Total number of VGPRs used by a workitem. Optional.
+ uint16_t mWorkitemNumVGPRs = 0;
+ /// \brief Maximum byte alignment of variables used by the kernel in the
+ /// kernarg memory segment. Expressed as a power of two. Optional.
+ uint8_t mKernargSegmentAlign = 0;
+ /// \brief Maximum byte alignment of variables used by the kernel in the
+ /// group memory segment. Expressed as a power of two. Optional.
+ uint8_t mGroupSegmentAlign = 0;
+ /// \brief Maximum byte alignment of variables used by the kernel in the
+ /// private memory segment. Expressed as a power of two. Optional.
+ uint8_t mPrivateSegmentAlign = 0;
+ /// \brief Wavefront size. Expressed as a power of two. Optional.
+ uint8_t mWavefrontSize = 0;
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \returns True if kernel code properties metadata is empty, false
+ /// otherwise.
+ bool empty() const {
+ return !notEmpty();
+ }
+
+ /// \returns True if kernel code properties metadata is not empty, false
+ /// otherwise.
+ bool notEmpty() const {
+ return mKernargSegmentSize || mWorkgroupGroupSegmentSize ||
+ mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs ||
+ mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign ||
+ mPrivateSegmentAlign || mWavefrontSize;
+ }
+};
+
+} // end namespace CodeProps
+
+//===----------------------------------------------------------------------===//
+// Kernel Debug Properties Metadata.
+//===----------------------------------------------------------------------===//
+namespace DebugProps {
+
+namespace Key {
+/// \brief Key for Kernel::DebugProps::Metadata::mDebuggerABIVersion.
+constexpr char DebuggerABIVersion[] = "DebuggerABIVersion";
+/// \brief Key for Kernel::DebugProps::Metadata::mReservedNumVGPRs.
+constexpr char ReservedNumVGPRs[] = "ReservedNumVGPRs";
+/// \brief Key for Kernel::DebugProps::Metadata::mReservedFirstVGPR.
+constexpr char ReservedFirstVGPR[] = "ReservedFirstVGPR";
+/// \brief Key for Kernel::DebugProps::Metadata::mPrivateSegmentBufferSGPR.
+constexpr char PrivateSegmentBufferSGPR[] = "PrivateSegmentBufferSGPR";
+/// \brief Key for
+/// Kernel::DebugProps::Metadata::mWavefrontPrivateSegmentOffsetSGPR.
+constexpr char WavefrontPrivateSegmentOffsetSGPR[] =
+ "WavefrontPrivateSegmentOffsetSGPR";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel debug properties metadata.
+struct Metadata final {
+ /// \brief Debugger ABI version. Optional.
+ std::vector<uint32_t> mDebuggerABIVersion = std::vector<uint32_t>();
+ /// \brief Consecutive number of VGPRs reserved for debugger use. Must be 0 if
+ /// mDebuggerABIVersion is not set. Optional.
+ uint16_t mReservedNumVGPRs = 0;
+ /// \brief First fixed VGPR reserved. Must be uint16_t(-1) if
+ /// mDebuggerABIVersion is not set or mReservedFirstVGPR is 0. Optional.
+ uint16_t mReservedFirstVGPR = uint16_t(-1);
+ /// \brief Fixed SGPR of the first of 4 SGPRs used to hold the scratch V# used
+ /// for the entire kernel execution. Must be uint16_t(-1) if
+ /// mDebuggerABIVersion is not set or SGPR not used or not known. Optional.
+ uint16_t mPrivateSegmentBufferSGPR = uint16_t(-1);
+ /// \brief Fixed SGPR used to hold the wave scratch offset for the entire
+ /// kernel execution. Must be uint16_t(-1) if mDebuggerABIVersion is not set
+ /// or SGPR is not used or not known. Optional.
+ uint16_t mWavefrontPrivateSegmentOffsetSGPR = uint16_t(-1);
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \returns True if kernel debug properties metadata is empty, false
+ /// otherwise.
+ bool empty() const {
+ return !notEmpty();
+ }
+
+ /// \returns True if kernel debug properties metadata is not empty, false
+ /// otherwise.
+ bool notEmpty() const {
+ return !mDebuggerABIVersion.empty();
+ }
+};
+
+} // end namespace DebugProps
+
+namespace Key {
+/// \brief Key for Kernel::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Metadata::mLanguage.
+constexpr char Language[] = "Language";
+/// \brief Key for Kernel::Metadata::mLanguageVersion.
+constexpr char LanguageVersion[] = "LanguageVersion";
+/// \brief Key for Kernel::Metadata::mAttrs.
+constexpr char Attrs[] = "Attrs";
+/// \brief Key for Kernel::Metadata::mArgs.
+constexpr char Args[] = "Args";
+/// \brief Key for Kernel::Metadata::mCodeProps.
+constexpr char CodeProps[] = "CodeProps";
+/// \brief Key for Kernel::Metadata::mDebugProps.
+constexpr char DebugProps[] = "DebugProps";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel metadata.
+struct Metadata final {
+ /// \brief Name. Required.
+ std::string mName = std::string();
+ /// \brief Language. Optional.
+ std::string mLanguage = std::string();
+ /// \brief Language version. Optional.
+ std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
+ /// \brief Attributes metadata. Optional.
+ Attrs::Metadata mAttrs = Attrs::Metadata();
+ /// \brief Arguments metadata. Optional.
+ std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
+ /// \brief Code properties metadata. Optional.
+ CodeProps::Metadata mCodeProps = CodeProps::Metadata();
+ /// \brief Debug properties metadata. Optional.
+ DebugProps::Metadata mDebugProps = DebugProps::Metadata();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+};
+
+} // end namespace Kernel
+
+namespace Key {
+/// \brief Key for CodeObject::Metadata::mVersion.
+constexpr char Version[] = "Version";
+/// \brief Key for CodeObject::Metadata::mPrintf.
+constexpr char Printf[] = "Printf";
+/// \brief Key for CodeObject::Metadata::mKernels.
+constexpr char Kernels[] = "Kernels";
+} // end namespace Key
+
+/// \brief In-memory representation of code object metadata.
+struct Metadata final {
+ /// \brief Code object metadata version. Required.
+ std::vector<uint32_t> mVersion = std::vector<uint32_t>();
+ /// \brief Printf metadata. Optional.
+ std::vector<std::string> mPrintf = std::vector<std::string>();
+ /// \brief Kernels metadata. Optional.
+ std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
+
+ /// \brief Default constructor.
+ Metadata() = default;
+
+ /// \brief Converts \p YamlString to \p CodeObjectMetadata.
+ static std::error_code fromYamlString(std::string YamlString,
+ Metadata &CodeObjectMetadata);
+
+ /// \brief Converts \p CodeObjectMetadata to \p YamlString.
+ static std::error_code toYamlString(Metadata CodeObjectMetadata,
+ std::string &YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
new file mode 100644
index 000000000000..29a6ab9fbe93
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
@@ -0,0 +1,625 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::CodeObject;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+
+namespace llvm {
+
+static cl::opt<bool> DumpCodeObjectMetadata(
+ "amdgpu-dump-comd",
+ cl::desc("Dump AMDGPU Code Object Metadata"));
+static cl::opt<bool> VerifyCodeObjectMetadata(
+ "amdgpu-verify-comd",
+ cl::desc("Verify AMDGPU Code Object Metadata"));
+
+namespace yaml {
+
+template <>
+struct ScalarEnumerationTraits<AccessQualifier> {
+ static void enumeration(IO &YIO, AccessQualifier &EN) {
+ YIO.enumCase(EN, "Default", AccessQualifier::Default);
+ YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
+ YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
+ YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<AddressSpaceQualifier> {
+ static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
+ YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
+ YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
+ YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
+ YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
+ YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
+ YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueKind> {
+ static void enumeration(IO &YIO, ValueKind &EN) {
+ YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
+ YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
+ YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
+ YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
+ YIO.enumCase(EN, "Image", ValueKind::Image);
+ YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
+ YIO.enumCase(EN, "Queue", ValueKind::Queue);
+ YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
+ YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
+ YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
+ YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
+ YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
+ YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
+ YIO.enumCase(EN, "HiddenCompletionAction",
+ ValueKind::HiddenCompletionAction);
+ }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueType> {
+ static void enumeration(IO &YIO, ValueType &EN) {
+ YIO.enumCase(EN, "Struct", ValueType::Struct);
+ YIO.enumCase(EN, "I8", ValueType::I8);
+ YIO.enumCase(EN, "U8", ValueType::U8);
+ YIO.enumCase(EN, "I16", ValueType::I16);
+ YIO.enumCase(EN, "U16", ValueType::U16);
+ YIO.enumCase(EN, "F16", ValueType::F16);
+ YIO.enumCase(EN, "I32", ValueType::I32);
+ YIO.enumCase(EN, "U32", ValueType::U32);
+ YIO.enumCase(EN, "F32", ValueType::F32);
+ YIO.enumCase(EN, "I64", ValueType::I64);
+ YIO.enumCase(EN, "U64", ValueType::U64);
+ YIO.enumCase(EN, "F64", ValueType::F64);
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::Attrs::Metadata> {
+ static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
+ YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
+ MD.mReqdWorkGroupSize, std::vector<uint32_t>());
+ YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
+ MD.mWorkGroupSizeHint, std::vector<uint32_t>());
+ YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
+ MD.mVecTypeHint, std::string());
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::Arg::Metadata> {
+ static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
+ YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
+ YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
+ YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind);
+ YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
+ YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
+ uint32_t(0));
+ YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
+ AccessQualifier::Unknown);
+ YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
+ AddressSpaceQualifier::Unknown);
+ YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
+ YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
+ YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
+ YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
+ YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
+ YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::CodeProps::Metadata> {
+ static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) {
+ YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize,
+ MD.mKernargSegmentSize, uint64_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize,
+ MD.mWorkgroupGroupSegmentSize, uint32_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize,
+ MD.mWorkitemPrivateSegmentSize, uint32_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs,
+ MD.mWavefrontNumSGPRs, uint16_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs,
+ MD.mWorkitemNumVGPRs, uint16_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign,
+ MD.mKernargSegmentAlign, uint8_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign,
+ MD.mGroupSegmentAlign, uint8_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign,
+ MD.mPrivateSegmentAlign, uint8_t(0));
+ YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize,
+ MD.mWavefrontSize, uint8_t(0));
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::DebugProps::Metadata> {
+ static void mapping(IO &YIO, Kernel::DebugProps::Metadata &MD) {
+ YIO.mapOptional(Kernel::DebugProps::Key::DebuggerABIVersion,
+ MD.mDebuggerABIVersion, std::vector<uint32_t>());
+ YIO.mapOptional(Kernel::DebugProps::Key::ReservedNumVGPRs,
+ MD.mReservedNumVGPRs, uint16_t(0));
+ YIO.mapOptional(Kernel::DebugProps::Key::ReservedFirstVGPR,
+ MD.mReservedFirstVGPR, uint16_t(-1));
+ YIO.mapOptional(Kernel::DebugProps::Key::PrivateSegmentBufferSGPR,
+ MD.mPrivateSegmentBufferSGPR, uint16_t(-1));
+ YIO.mapOptional(Kernel::DebugProps::Key::WavefrontPrivateSegmentOffsetSGPR,
+ MD.mWavefrontPrivateSegmentOffsetSGPR, uint16_t(-1));
+ }
+};
+
+template <>
+struct MappingTraits<Kernel::Metadata> {
+ static void mapping(IO &YIO, Kernel::Metadata &MD) {
+ YIO.mapRequired(Kernel::Key::Name, MD.mName);
+ YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
+ YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
+ std::vector<uint32_t>());
+ if (!MD.mAttrs.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
+ if (!MD.mArgs.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
+ if (!MD.mCodeProps.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::CodeProps, MD.mCodeProps);
+ if (!MD.mDebugProps.empty() || !YIO.outputting())
+ YIO.mapOptional(Kernel::Key::DebugProps, MD.mDebugProps);
+ }
+};
+
+template <>
+struct MappingTraits<CodeObject::Metadata> {
+ static void mapping(IO &YIO, CodeObject::Metadata &MD) {
+ YIO.mapRequired(Key::Version, MD.mVersion);
+ YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
+ if (!MD.mKernels.empty() || !YIO.outputting())
+ YIO.mapOptional(Key::Kernels, MD.mKernels);
+ }
+};
+
+} // end namespace yaml
+
+namespace AMDGPU {
+
+/* static */
+std::error_code CodeObject::Metadata::fromYamlString(
+ std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
+ yaml::Input YamlInput(YamlString);
+ YamlInput >> CodeObjectMetadata;
+ return YamlInput.error();
+}
+
+/* static */
+std::error_code CodeObject::Metadata::toYamlString(
+ CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
+ raw_string_ostream YamlStream(YamlString);
+ yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
+ YamlOutput << CodeObjectMetadata;
+ return std::error_code();
+}
+
+namespace CodeObject {
+
+void MetadataStreamer::dump(StringRef YamlString) const {
+ errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n';
+}
+
+void MetadataStreamer::verify(StringRef YamlString) const {
+ errs() << "AMDGPU Code Object Metadata Parser Test: ";
+
+ CodeObject::Metadata FromYamlString;
+ if (Metadata::fromYamlString(YamlString, FromYamlString)) {
+ errs() << "FAIL\n";
+ return;
+ }
+
+ std::string ToYamlString;
+ if (Metadata::toYamlString(FromYamlString, ToYamlString)) {
+ errs() << "FAIL\n";
+ return;
+ }
+
+ errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n';
+ if (YamlString != ToYamlString) {
+ errs() << "Original input: " << YamlString << '\n'
+ << "Produced output: " << ToYamlString << '\n';
+ }
+}
+
+AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+ if (AccQual.empty())
+ return AccessQualifier::Unknown;
+
+ return StringSwitch<AccessQualifier>(AccQual)
+ .Case("read_only", AccessQualifier::ReadOnly)
+ .Case("write_only", AccessQualifier::WriteOnly)
+ .Case("read_write", AccessQualifier::ReadWrite)
+ .Default(AccessQualifier::Default);
+}
+
+AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+ unsigned AddressSpace) const {
+ if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
+ return AddressSpaceQualifier::Private;
+ if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
+ return AddressSpaceQualifier::Global;
+ if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
+ return AddressSpaceQualifier::Constant;
+ if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
+ return AddressSpaceQualifier::Local;
+ if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
+ return AddressSpaceQualifier::Generic;
+ if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
+ return AddressSpaceQualifier::Region;
+
+ llvm_unreachable("Unknown address space qualifier");
+}
+
+ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
+ if (TypeQual.find("pipe") != StringRef::npos)
+ return ValueKind::Pipe;
+
+ return StringSwitch<ValueKind>(BaseTypeName)
+ .Case("sampler_t", ValueKind::Sampler)
+ .Case("queue_t", ValueKind::Queue)
+ .Cases("image1d_t",
+ "image1d_array_t",
+ "image1d_buffer_t",
+ "image2d_t" ,
+ "image2d_array_t",
+ "image2d_array_depth_t",
+ "image2d_array_msaa_t"
+ "image2d_array_msaa_depth_t"
+ "image2d_depth_t",
+ "image2d_msaa_t",
+ "image2d_msaa_depth_t",
+ "image3d_t", ValueKind::Image)
+ .Default(isa<PointerType>(Ty) ?
+ (Ty->getPointerAddressSpace() ==
+ AMDGPUASI.LOCAL_ADDRESS ?
+ ValueKind::DynamicSharedPointer :
+ ValueKind::GlobalBuffer) :
+ ValueKind::ByValue);
+}
+
+ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ auto Signed = !TypeName.startswith("u");
+ switch (Ty->getIntegerBitWidth()) {
+ case 8:
+ return Signed ? ValueType::I8 : ValueType::U8;
+ case 16:
+ return Signed ? ValueType::I16 : ValueType::U16;
+ case 32:
+ return Signed ? ValueType::I32 : ValueType::U32;
+ case 64:
+ return Signed ? ValueType::I64 : ValueType::U64;
+ default:
+ return ValueType::Struct;
+ }
+ }
+ case Type::HalfTyID:
+ return ValueType::F16;
+ case Type::FloatTyID:
+ return ValueType::F32;
+ case Type::DoubleTyID:
+ return ValueType::F64;
+ case Type::PointerTyID:
+ return getValueType(Ty->getPointerElementType(), TypeName);
+ case Type::VectorTyID:
+ return getValueType(Ty->getVectorElementType(), TypeName);
+ default:
+ return ValueType::Struct;
+ }
+}
+
+std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ if (!Signed)
+ return (Twine('u') + getTypeName(Ty, true)).str();
+
+ auto BitWidth = Ty->getIntegerBitWidth();
+ switch (BitWidth) {
+ case 8:
+ return "char";
+ case 16:
+ return "short";
+ case 32:
+ return "int";
+ case 64:
+ return "long";
+ default:
+ return (Twine('i') + Twine(BitWidth)).str();
+ }
+ }
+ case Type::HalfTyID:
+ return "half";
+ case Type::FloatTyID:
+ return "float";
+ case Type::DoubleTyID:
+ return "double";
+ case Type::VectorTyID: {
+ auto VecTy = cast<VectorType>(Ty);
+ auto ElTy = VecTy->getElementType();
+ auto NumElements = VecTy->getVectorNumElements();
+ return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+ }
+ default:
+ return "unknown";
+ }
+}
+
+std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
+ MDNode *Node) const {
+ std::vector<uint32_t> Dims;
+ if (Node->getNumOperands() != 3)
+ return Dims;
+
+ for (auto &Op : Node->operands())
+ Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
+ return Dims;
+}
+
+void MetadataStreamer::emitVersion() {
+ auto &Version = CodeObjectMetadata.mVersion;
+
+ Version.push_back(MetadataVersionMajor);
+ Version.push_back(MetadataVersionMinor);
+}
+
+void MetadataStreamer::emitPrintf(const Module &Mod) {
+ auto &Printf = CodeObjectMetadata.mPrintf;
+
+ auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+ if (!Node)
+ return;
+
+ for (auto Op : Node->operands())
+ if (Op->getNumOperands())
+ Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
+}
+
+void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+ auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+ // TODO: What about other languages?
+ auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+ if (!Node || !Node->getNumOperands())
+ return;
+ auto Op0 = Node->getOperand(0);
+ if (Op0->getNumOperands() <= 1)
+ return;
+
+ Kernel.mLanguage = "OpenCL C";
+ Kernel.mLanguageVersion.push_back(
+ mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
+ Kernel.mLanguageVersion.push_back(
+ mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
+}
+
+void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+ auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs;
+
+ if (auto Node = Func.getMetadata("reqd_work_group_size"))
+ Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("work_group_size_hint"))
+ Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("vec_type_hint")) {
+ Attrs.mVecTypeHint = getTypeName(
+ cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+ mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
+ }
+}
+
+void MetadataStreamer::emitKernelArgs(const Function &Func) {
+ for (auto &Arg : Func.args())
+ emitKernelArg(Arg);
+
+ // TODO: What about other languages?
+ if (!Func.getParent()->getNamedMetadata("opencl.ocl.version"))
+ return;
+
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
+ emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
+
+ if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ return;
+
+ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
+ AMDGPUASI.GLOBAL_ADDRESS);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+}
+
+void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+ auto Func = Arg.getParent();
+ auto ArgNo = Arg.getArgNo();
+ const MDNode *Node;
+
+ StringRef TypeQual;
+ Node = Func->getMetadata("kernel_arg_type_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef BaseTypeName;
+ Node = Func->getMetadata("kernel_arg_base_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef AccQual;
+ if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+ Arg.hasNoAliasAttr()) {
+ AccQual = "read_only";
+ } else {
+ Node = Func->getMetadata("kernel_arg_access_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ }
+
+ StringRef Name;
+ Node = Func->getMetadata("kernel_arg_name");
+ if (Node && ArgNo < Node->getNumOperands())
+ Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef TypeName;
+ Node = Func->getMetadata("kernel_arg_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+ getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual,
+ BaseTypeName, AccQual, Name, TypeName);
+}
+
+void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
+ ValueKind ValueKind, StringRef TypeQual,
+ StringRef BaseTypeName, StringRef AccQual,
+ StringRef Name, StringRef TypeName) {
+ CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
+ auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back();
+
+ Arg.mSize = DL.getTypeAllocSize(Ty);
+ Arg.mAlign = DL.getABITypeAlignment(Ty);
+ Arg.mValueKind = ValueKind;
+ Arg.mValueType = getValueType(Ty, BaseTypeName);
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+ auto ElTy = PtrTy->getElementType();
+ if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized())
+ Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
+ }
+
+ Arg.mAccQual = getAccessQualifier(AccQual);
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty))
+ Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+
+ SmallVector<StringRef, 1> SplitTypeQuals;
+ TypeQual.split(SplitTypeQuals, " ", -1, false);
+ for (StringRef Key : SplitTypeQuals) {
+ auto P = StringSwitch<bool*>(Key)
+ .Case("const", &Arg.mIsConst)
+ .Case("pipe", &Arg.mIsPipe)
+ .Case("restrict", &Arg.mIsRestrict)
+ .Case("volatile", &Arg.mIsVolatile)
+ .Default(nullptr);
+ if (P)
+ *P = true;
+ }
+
+ Arg.mName = Name;
+ Arg.mTypeName = TypeName;
+}
+
+void MetadataStreamer::emitKernelCodeProps(
+ const amd_kernel_code_t &KernelCode) {
+ auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps;
+
+ CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size;
+ CodeProps.mWorkgroupGroupSegmentSize =
+ KernelCode.workgroup_group_segment_byte_size;
+ CodeProps.mWorkitemPrivateSegmentSize =
+ KernelCode.workitem_private_segment_byte_size;
+ CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count;
+ CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count;
+ CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment;
+ CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment;
+ CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment;
+ CodeProps.mWavefrontSize = KernelCode.wavefront_size;
+}
+
+void MetadataStreamer::emitKernelDebugProps(
+ const amd_kernel_code_t &KernelCode) {
+ if (!(KernelCode.code_properties & AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED))
+ return;
+
+ auto &DebugProps = CodeObjectMetadata.mKernels.back().mDebugProps;
+
+ // FIXME: Need to pass down debugger ABI version through features. This is ok
+ // for now because we only have one version.
+ DebugProps.mDebuggerABIVersion.push_back(1);
+ DebugProps.mDebuggerABIVersion.push_back(0);
+ DebugProps.mReservedNumVGPRs = KernelCode.reserved_vgpr_count;
+ DebugProps.mReservedFirstVGPR = KernelCode.reserved_vgpr_first;
+ DebugProps.mPrivateSegmentBufferSGPR =
+ KernelCode.debug_private_segment_buffer_sgpr;
+ DebugProps.mWavefrontPrivateSegmentOffsetSGPR =
+ KernelCode.debug_wavefront_private_segment_offset_sgpr;
+}
+
+void MetadataStreamer::begin(const Module &Mod) {
+ AMDGPUASI = getAMDGPUAS(Mod);
+ emitVersion();
+ emitPrintf(Mod);
+}
+
+void MetadataStreamer::emitKernel(const Function &Func,
+ const amd_kernel_code_t &KernelCode) {
+ if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
+ return;
+
+ CodeObjectMetadata.mKernels.push_back(Kernel::Metadata());
+ auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+ Kernel.mName = Func.getName();
+ emitKernelLanguage(Func);
+ emitKernelAttrs(Func);
+ emitKernelArgs(Func);
+ emitKernelCodeProps(KernelCode);
+ emitKernelDebugProps(KernelCode);
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString() {
+ std::string YamlString;
+ if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString))
+ return Error;
+
+ if (DumpCodeObjectMetadata)
+ dump(YamlString);
+ if (VerifyCodeObjectMetadata)
+ verify(YamlString);
+
+ return YamlString;
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString(StringRef YamlString) {
+ if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata))
+ return Error;
+
+ return toYamlString();
+}
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
new file mode 100644
index 000000000000..8d4c51763f63
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
@@ -0,0 +1,99 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+
+#include "AMDGPU.h"
+#include "AMDGPUCodeObjectMetadata.h"
+#include "AMDKernelCodeT.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorOr.h"
+
+namespace llvm {
+
+class Argument;
+class DataLayout;
+class Function;
+class MDNode;
+class Module;
+class Type;
+
+namespace AMDGPU {
+namespace CodeObject {
+
+class MetadataStreamer final {
+private:
+ Metadata CodeObjectMetadata;
+ AMDGPUAS AMDGPUASI;
+
+ void dump(StringRef YamlString) const;
+
+ void verify(StringRef YamlString) const;
+
+ AccessQualifier getAccessQualifier(StringRef AccQual) const;
+
+ AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+
+ ValueKind getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const;
+
+ ValueType getValueType(Type *Ty, StringRef TypeName) const;
+
+ std::string getTypeName(Type *Ty, bool Signed) const;
+
+ std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
+
+ void emitVersion();
+
+ void emitPrintf(const Module &Mod);
+
+ void emitKernelLanguage(const Function &Func);
+
+ void emitKernelAttrs(const Function &Func);
+
+ void emitKernelArgs(const Function &Func);
+
+ void emitKernelArg(const Argument &Arg);
+
+ void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind,
+ StringRef TypeQual = "", StringRef BaseTypeName = "",
+ StringRef AccQual = "", StringRef Name = "",
+ StringRef TypeName = "");
+
+ void emitKernelCodeProps(const amd_kernel_code_t &KernelCode);
+
+ void emitKernelDebugProps(const amd_kernel_code_t &KernelCode);
+
+public:
+ MetadataStreamer() = default;
+ ~MetadataStreamer() = default;
+
+ void begin(const Module &Mod);
+
+ void end() {}
+
+ void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode);
+
+ ErrorOr<std::string> toYamlString();
+
+ ErrorOr<std::string> toYamlString(StringRef YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 1847d7a67328..073d19422e86 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -1,16 +1,20 @@
-//===-- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------==//
+//===- AMDGPUELFObjectWriter.cpp - AMDGPU ELF Writer ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-/// \file
//===----------------------------------------------------------------------===//
#include "AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -19,20 +23,21 @@ namespace {
class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter {
public:
AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend);
+
protected:
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
};
-} // End anonymous namespace
+} // end anonymous namespace
AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit,
bool HasRelocationAddend)
: MCELFObjectTargetWriter(Is64Bit,
ELF::ELFOSABI_AMDGPU_HSA,
ELF::EM_AMDGPU,
- HasRelocationAddend) { }
+ HasRelocationAddend) {}
unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
@@ -77,7 +82,6 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
llvm_unreachable("unhandled relocation type");
}
-
MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit,
bool HasRelocationAddend,
raw_pwrite_stream &OS) {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 548bad56e174..f80b5f3a6dba 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -54,11 +54,17 @@ MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_OPERAND_ENUM
+#undef GET_INSTRINFO_ENUM
+
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
deleted file mode 100644
index 95387ad1627c..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
+++ /dev/null
@@ -1,408 +0,0 @@
-//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Generates AMDGPU runtime metadata for YAML mapping.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include "AMDGPU.h"
-#include "AMDGPURuntimeMetadata.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <vector>
-#include "AMDGPURuntimeMD.h"
-
-using namespace llvm;
-using namespace ::AMDGPU::RuntimeMD;
-
-static cl::opt<bool>
-DumpRuntimeMD("amdgpu-dump-rtmd",
- cl::desc("Dump AMDGPU runtime metadata"));
-
-static cl::opt<bool>
-CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
- cl::desc("Check AMDGPU runtime metadata YAML parser"));
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
-
-namespace llvm {
-namespace yaml {
-
-template <> struct MappingTraits<KernelArg::Metadata> {
- static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
- YamlIO.mapRequired(KeyName::ArgSize, A.Size);
- YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
- YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
- YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
- YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
- YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
- YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
- YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
- YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
- YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
- YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
- YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
- YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
- }
- static const bool flow = true;
-};
-
-template <> struct MappingTraits<Kernel::Metadata> {
- static void mapping(IO &YamlIO, Kernel::Metadata &K) {
- YamlIO.mapRequired(KeyName::KernelName, K.Name);
- YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
- YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
- YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
- YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
- YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
- YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
- INVALID_KERNEL_INDEX);
- YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
- uint8_t(0));
- YamlIO.mapRequired(KeyName::Args, K.Args);
- }
- static const bool flow = true;
-};
-
-template <> struct MappingTraits<Program::Metadata> {
- static void mapping(IO &YamlIO, Program::Metadata &Prog) {
- YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
- YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
- YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
- }
- static const bool flow = true;
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-// Get a vector of three integer values from MDNode \p Node;
-static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
- assert(Node->getNumOperands() == 3);
- std::vector<uint32_t> V;
- for (const MDOperand &Op : Node->operands()) {
- const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
- V.push_back(CI->getZExtValue());
- }
- return V;
-}
-
-static std::string getOCLTypeName(Type *Ty, bool Signed) {
- switch (Ty->getTypeID()) {
- case Type::HalfTyID:
- return "half";
- case Type::FloatTyID:
- return "float";
- case Type::DoubleTyID:
- return "double";
- case Type::IntegerTyID: {
- if (!Signed)
- return (Twine('u') + getOCLTypeName(Ty, true)).str();
- unsigned BW = Ty->getIntegerBitWidth();
- switch (BW) {
- case 8:
- return "char";
- case 16:
- return "short";
- case 32:
- return "int";
- case 64:
- return "long";
- default:
- return (Twine('i') + Twine(BW)).str();
- }
- }
- case Type::VectorTyID: {
- VectorType *VecTy = cast<VectorType>(Ty);
- Type *EleTy = VecTy->getElementType();
- unsigned Size = VecTy->getVectorNumElements();
- return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
- }
- default:
- return "unknown";
- }
-}
-
-static KernelArg::ValueType getRuntimeMDValueType(
- Type *Ty, StringRef TypeName) {
- switch (Ty->getTypeID()) {
- case Type::HalfTyID:
- return KernelArg::F16;
- case Type::FloatTyID:
- return KernelArg::F32;
- case Type::DoubleTyID:
- return KernelArg::F64;
- case Type::IntegerTyID: {
- bool Signed = !TypeName.startswith("u");
- switch (Ty->getIntegerBitWidth()) {
- case 8:
- return Signed ? KernelArg::I8 : KernelArg::U8;
- case 16:
- return Signed ? KernelArg::I16 : KernelArg::U16;
- case 32:
- return Signed ? KernelArg::I32 : KernelArg::U32;
- case 64:
- return Signed ? KernelArg::I64 : KernelArg::U64;
- default:
- // Runtime does not recognize other integer types. Report as struct type.
- return KernelArg::Struct;
- }
- }
- case Type::VectorTyID:
- return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
- case Type::PointerTyID:
- return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
- default:
- return KernelArg::Struct;
- }
-}
-
-static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
- AMDGPUAS::AddressSpaces A) {
- switch (A) {
- case AMDGPUAS::GLOBAL_ADDRESS:
- return KernelArg::Global;
- case AMDGPUAS::CONSTANT_ADDRESS:
- return KernelArg::Constant;
- case AMDGPUAS::LOCAL_ADDRESS:
- return KernelArg::Local;
- case AMDGPUAS::FLAT_ADDRESS:
- return KernelArg::Generic;
- case AMDGPUAS::REGION_ADDRESS:
- return KernelArg::Region;
- default:
- return KernelArg::Private;
- }
-}
-
-static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
- Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
- StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
- StringRef AccQual = "") {
-
- KernelArg::Metadata Arg;
-
- // Set ArgSize and ArgAlign.
- Arg.Size = DL.getTypeAllocSize(T);
- Arg.Align = DL.getABITypeAlignment(T);
- if (auto PT = dyn_cast<PointerType>(T)) {
- auto ET = PT->getElementType();
- if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
- Arg.PointeeAlign = DL.getABITypeAlignment(ET);
- }
-
- // Set ArgTypeName.
- Arg.TypeName = TypeName;
-
- // Set ArgName.
- Arg.Name = ArgName;
-
- // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
- SmallVector<StringRef, 1> SplitQ;
- TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
-
- for (StringRef KeyName : SplitQ) {
- auto *P = StringSwitch<uint8_t *>(KeyName)
- .Case("volatile", &Arg.IsVolatile)
- .Case("restrict", &Arg.IsRestrict)
- .Case("const", &Arg.IsConst)
- .Case("pipe", &Arg.IsPipe)
- .Default(nullptr);
- if (P)
- *P = 1;
- }
-
- // Set ArgKind.
- Arg.Kind = Kind;
-
- // Set ArgValueType.
- Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
-
- // Set ArgAccQual.
- if (!AccQual.empty()) {
- Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
- .Case("read_only", KernelArg::ReadOnly)
- .Case("write_only", KernelArg::WriteOnly)
- .Case("read_write", KernelArg::ReadWrite)
- .Default(KernelArg::AccNone);
- }
-
- // Set ArgAddrQual.
- if (auto *PT = dyn_cast<PointerType>(T)) {
- Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
- PT->getAddressSpace()));
- }
-
- return Arg;
-}
-
-static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
- Kernel::Metadata Kernel;
- Kernel.Name = F.getName();
- auto &M = *F.getParent();
-
- // Set Language and LanguageVersion.
- if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
- if (MD->getNumOperands() != 0) {
- auto Node = MD->getOperand(0);
- if (Node->getNumOperands() > 1) {
- Kernel.Language = "OpenCL C";
- uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
- ->getZExtValue();
- uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
- ->getZExtValue();
- Kernel.LanguageVersion.push_back(Major);
- Kernel.LanguageVersion.push_back(Minor);
- }
- }
- }
-
- const DataLayout &DL = F.getParent()->getDataLayout();
- for (auto &Arg : F.args()) {
- unsigned I = Arg.getArgNo();
- Type *T = Arg.getType();
- auto TypeName = dyn_cast<MDString>(F.getMetadata(
- "kernel_arg_type")->getOperand(I))->getString();
- auto BaseTypeName = cast<MDString>(F.getMetadata(
- "kernel_arg_base_type")->getOperand(I))->getString();
- StringRef ArgName;
- if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
- ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
- auto TypeQual = cast<MDString>(F.getMetadata(
- "kernel_arg_type_qual")->getOperand(I))->getString();
- auto AccQual = cast<MDString>(F.getMetadata(
- "kernel_arg_access_qual")->getOperand(I))->getString();
- KernelArg::Kind Kind;
- if (TypeQual.find("pipe") != StringRef::npos)
- Kind = KernelArg::Pipe;
- else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
- .Case("sampler_t", KernelArg::Sampler)
- .Case("queue_t", KernelArg::Queue)
- .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
- "image2d_t" , "image2d_array_t", KernelArg::Image)
- .Cases("image2d_depth_t", "image2d_array_depth_t",
- "image2d_msaa_t", "image2d_array_msaa_t",
- "image2d_msaa_depth_t", KernelArg::Image)
- .Cases("image2d_array_msaa_depth_t", "image3d_t",
- KernelArg::Image)
- .Default(isa<PointerType>(T) ?
- (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
- KernelArg::DynamicSharedPointer :
- KernelArg::GlobalBuffer) :
- KernelArg::ByValue);
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
- BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
- }
-
- // Emit hidden kernel arguments for OpenCL kernels.
- if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
- auto Int64T = Type::getInt64Ty(F.getContext());
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
- KernelArg::HiddenGlobalOffsetX));
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
- KernelArg::HiddenGlobalOffsetY));
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
- KernelArg::HiddenGlobalOffsetZ));
- if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
- auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
- KernelArg::Global);
- Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
- KernelArg::HiddenPrintfBuffer));
- }
- }
-
- // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
- if (auto RWGS = F.getMetadata("reqd_work_group_size"))
- Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
-
- if (auto WGSH = F.getMetadata("work_group_size_hint"))
- Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
-
- if (auto VTH = F.getMetadata("vec_type_hint"))
- Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
- VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
- VTH->getOperand(1))->getZExtValue());
-
- return Kernel;
-}
-
-Program::Metadata::Metadata(const std::string &YAML) {
- yaml::Input Input(YAML);
- Input >> *this;
-}
-
-std::string Program::Metadata::toYAML(void) {
- std::string Text;
- raw_string_ostream Stream(Text);
- yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */);
- Output << *this;
- return Stream.str();
-}
-
-Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
- return Program::Metadata(S);
-}
-
-// Check if the YAML string can be parsed.
-static void checkRuntimeMDYAMLString(const std::string &YAML) {
- auto P = Program::Metadata::fromYAML(YAML);
- auto S = P.toYAML();
- llvm::errs() << "AMDGPU runtime metadata parser test "
- << (YAML == S ? "passes" : "fails") << ".\n";
- if (YAML != S) {
- llvm::errs() << "First output: " << YAML << '\n'
- << "Second output: " << S << '\n';
- }
-}
-
-std::string llvm::getRuntimeMDYAMLString(Module &M) {
- Program::Metadata Prog;
- Prog.MDVersionSeq.push_back(MDVersion);
- Prog.MDVersionSeq.push_back(MDRevision);
-
- // Set PrintfInfo.
- if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
- for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
- auto Node = MD->getOperand(I);
- if (Node->getNumOperands() > 0)
- Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
- ->getString());
- }
- }
-
- // Set Kernels.
- for (auto &F: M.functions()) {
- if (!F.getMetadata("kernel_arg_type"))
- continue;
- Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
- }
-
- auto YAML = Prog.toYAML();
-
- if (DumpRuntimeMD)
- llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
-
- if (CheckRuntimeMDParser)
- checkRuntimeMDYAMLString(YAML);
-
- return YAML;
-}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
deleted file mode 100644
index a92fdd4bebc2..000000000000
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares functions for generating runtime metadata.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-
-#include <string>
-
-namespace llvm {
-class Module;
-
-// Get runtime metadata as YAML string.
-std::string getRuntimeMDYAMLString(Module &M);
-
-}
-#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 3392183d33c3..8dc863f723e2 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -27,7 +27,6 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
-#include "AMDGPURuntimeMD.h"
namespace llvm {
#include "AMDGPUPTNote.h"
@@ -36,9 +35,27 @@ namespace llvm {
using namespace llvm;
using namespace llvm::AMDGPU;
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetStreamer
+//===----------------------------------------------------------------------===//
+
AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S) {}
+void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(const Module &Mod) {
+ CodeObjectMetadataStreamer.begin(Mod);
+}
+
+void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(
+ const Function &Func, const amd_kernel_code_t &KernelCode) {
+ CodeObjectMetadataStreamer.emitKernel(Func, KernelCode);
+}
+
+void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata() {
+ CodeObjectMetadataStreamer.end();
+ EmitCodeObjectMetadata(CodeObjectMetadataStreamer.toYamlString().get());
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetAsmStreamer
//===----------------------------------------------------------------------===//
@@ -93,16 +110,16 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
}
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
- OS << "\t.amdgpu_runtime_metadata\n";
- OS << getRuntimeMDYAMLString(M);
- OS << "\n\t.end_amdgpu_runtime_metadata\n";
-}
+bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
+ auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
+ if (!VerifiedYamlString)
+ return false;
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) {
- OS << "\t.amdgpu_runtime_metadata";
- OS << Metadata;
- OS << "\t.end_amdgpu_runtime_metadata\n";
+ OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n';
+ OS << VerifiedYamlString.get();
+ OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n';
+
+ return true;
}
//===----------------------------------------------------------------------===//
@@ -116,22 +133,21 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
-void
-AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ,
- PT_NOTE::NoteType Type,
- std::function<void(MCELFStreamer &)> EmitDesc) {
+void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
+ const MCExpr *DescSZ, ElfNote::NoteType Type,
+ function_ref<void(MCELFStreamer &)> EmitDesc) {
auto &S = getStreamer();
auto &Context = S.getContext();
- auto NameSZ = sizeof(PT_NOTE::NoteName);
+ auto NameSZ = sizeof(ElfNote::NoteName);
S.PushSection();
S.SwitchSection(Context.getELFSection(
- PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
+ ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
S.EmitIntValue(NameSZ, 4); // namesz
S.EmitValue(DescSZ, 4); // descz
- S.EmitIntValue(Type, 4); // type
- S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ)); // name
+ S.EmitIntValue(Type, 4); // type
+ S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
@@ -144,7 +160,7 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
EmitAMDGPUNote(
MCConstantExpr::create(8, getContext()),
- PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
+ ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
[&](MCELFStreamer &OS){
OS.EmitIntValue(Major, 4);
OS.EmitIntValue(Minor, 4);
@@ -160,14 +176,14 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
StringRef ArchName) {
uint16_t VendorNameSize = VendorName.size() + 1;
uint16_t ArchNameSize = ArchName.size() + 1;
-
+
unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
EmitAMDGPUNote(
MCConstantExpr::create(DescSZ, getContext()),
- PT_NOTE::NT_AMDGPU_HSA_ISA,
+ ElfNote::NT_AMDGPU_HSA_ISA,
[&](MCELFStreamer &OS) {
OS.EmitIntValue(VendorNameSize, 2);
OS.EmitIntValue(ArchNameSize, 2);
@@ -216,7 +232,11 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
Symbol->setBinding(ELF::STB_GLOBAL);
}
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
+bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) {
+ auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString);
+ if (!VerifiedYamlString)
+ return false;
+
// Create two labels to mark the beginning and end of the desc field
// and a MCExpr to calculate the size of the desc field.
auto &Context = getContext();
@@ -228,15 +248,13 @@ void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
EmitAMDGPUNote(
DescSZ,
- PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA,
+ ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA,
[&](MCELFStreamer &OS) {
OS.EmitLabel(DescBegin);
- OS.EmitBytes(Metadata);
+ OS.EmitBytes(VerifiedYamlString.get());
OS.EmitLabel(DescEnd);
}
);
-}
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
- EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
+ return true;
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index e2f20586903d..5c588bbded9c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -10,6 +10,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+#include "AMDGPUCodeObjectMetadataStreamer.h"
#include "AMDKernelCodeT.h"
#include "llvm/MC/MCStreamer.h"
@@ -26,6 +27,7 @@ class Type;
class AMDGPUTargetStreamer : public MCTargetStreamer {
protected:
+ AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer;
MCContext &getContext() const { return Streamer.getContext(); }
public:
@@ -46,12 +48,18 @@ public:
virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
- virtual void EmitRuntimeMetadata(Module &M) = 0;
+ virtual void EmitStartOfCodeObjectMetadata(const Module &Mod);
- virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
+ virtual void EmitKernelCodeObjectMetadata(
+ const Function &Func, const amd_kernel_code_t &KernelCode);
+
+ virtual void EmitEndOfCodeObjectMetadata();
+
+ /// \returns True on success, false on failure.
+ virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0;
};
-class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
formatted_raw_ostream &OS;
public:
AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
@@ -70,17 +78,16 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
- void EmitRuntimeMetadata(Module &M) override;
-
- void EmitRuntimeMetadata(StringRef Metadata) override;
+ /// \returns True on success, false on failure.
+ bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
-class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
- void EmitAMDGPUNote(const MCExpr* DescSize,
- AMDGPU::PT_NOTE::NoteType Type,
- std::function<void(MCELFStreamer &)> EmitDesc);
+ void EmitAMDGPUNote(const MCExpr *DescSize,
+ AMDGPU::ElfNote::NoteType Type,
+ function_ref<void(MCELFStreamer &)> EmitDesc);
public:
AMDGPUTargetELFStreamer(MCStreamer &S);
@@ -102,9 +109,8 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
- void EmitRuntimeMetadata(Module &M) override;
-
- void EmitRuntimeMetadata(StringRef Metadata) override;
+ /// \returns True on success, false on failure.
+ bool EmitCodeObjectMetadata(StringRef YamlString) override;
};
}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 8a6d00ce69ed..09e3efad10af 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -1,13 +1,12 @@
-
add_llvm_library(LLVMAMDGPUDesc
AMDGPUAsmBackend.cpp
+ AMDGPUCodeObjectMetadataStreamer.cpp
AMDGPUELFObjectWriter.cpp
AMDGPUELFStreamer.cpp
+ AMDGPUMCAsmInfo.cpp
AMDGPUMCCodeEmitter.cpp
AMDGPUMCTargetDesc.cpp
- AMDGPUMCAsmInfo.cpp
- AMDGPURuntimeMD.cpp
AMDGPUTargetStreamer.cpp
R600MCCodeEmitter.cpp
SIMCCodeEmitter.cpp
- )
+)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 0c5bb0648a16..bda0928036fd 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -220,13 +220,35 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
Imm = MO.getImm();
}
- switch (AMDGPU::getOperandSize(OpInfo)) {
- case 4:
+ switch (OpInfo.OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
- case 8:
+
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
- case 2:
+
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ // FIXME Is this correct? What do inline immediates do on SI for f16 src
+ // which does not have f16 support?
return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ uint16_t Lo16 = static_cast<uint16_t>(Imm);
+ assert(Lo16 == static_cast<uint16_t>(Imm >> 16));
+ uint32_t Encoding = getLit16Encoding(Lo16, STI);
+ assert(Encoding != 255 && "packed constants can only be inline immediates");
+ return Encoding;
+ }
default:
llvm_unreachable("invalid operand size");
}
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 46803e555711..a515eecc222a 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -475,106 +475,6 @@ class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : Pat <
sub0)
>;
-// ======= SI Image Intrinsics ================
-
-// Image load
-defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">;
-defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
-def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
-
-// Basic sample
-defm : SampleRawPatterns<int_SI_image_sample, "IMAGE_SAMPLE">;
-defm : SampleRawPatterns<int_SI_image_sample_cl, "IMAGE_SAMPLE_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_d, "IMAGE_SAMPLE_D">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl, "IMAGE_SAMPLE_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_l, "IMAGE_SAMPLE_L">;
-defm : SampleRawPatterns<int_SI_image_sample_b, "IMAGE_SAMPLE_B">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl, "IMAGE_SAMPLE_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_lz, "IMAGE_SAMPLE_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_cd, "IMAGE_SAMPLE_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl, "IMAGE_SAMPLE_CD_CL">;
-
-// Sample with comparison
-defm : SampleRawPatterns<int_SI_image_sample_c, "IMAGE_SAMPLE_C">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl, "IMAGE_SAMPLE_C_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d, "IMAGE_SAMPLE_C_D">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl, "IMAGE_SAMPLE_C_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l, "IMAGE_SAMPLE_C_L">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b, "IMAGE_SAMPLE_C_B">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl, "IMAGE_SAMPLE_C_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz, "IMAGE_SAMPLE_C_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd, "IMAGE_SAMPLE_C_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl, "IMAGE_SAMPLE_C_CD_CL">;
-
-// Sample with offsets
-defm : SampleRawPatterns<int_SI_image_sample_o, "IMAGE_SAMPLE_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cl_o, "IMAGE_SAMPLE_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_o, "IMAGE_SAMPLE_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl_o, "IMAGE_SAMPLE_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_l_o, "IMAGE_SAMPLE_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_o, "IMAGE_SAMPLE_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl_o, "IMAGE_SAMPLE_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_lz_o, "IMAGE_SAMPLE_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_o, "IMAGE_SAMPLE_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o, "IMAGE_SAMPLE_CD_CL_O">;
-
-// Sample with comparison and offsets
-defm : SampleRawPatterns<int_SI_image_sample_c_o, "IMAGE_SAMPLE_C_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl_o, "IMAGE_SAMPLE_C_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_o, "IMAGE_SAMPLE_C_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o, "IMAGE_SAMPLE_C_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l_o, "IMAGE_SAMPLE_C_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_o, "IMAGE_SAMPLE_C_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o, "IMAGE_SAMPLE_C_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz_o, "IMAGE_SAMPLE_C_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_o, "IMAGE_SAMPLE_C_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
-
-// Gather opcodes
-// Only the variants which make sense are defined.
-def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
-
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
-
// ======= amdgcn Image Intrinsics ==============
// Image load
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index 3c07cc76b9a1..0e4eda982139 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -187,3 +187,10 @@ def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
[FeatureISAVersion8_1_0]
>;
+def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
+ [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
+ [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
+>;
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 45b36d3d3ebb..811b905588b4 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -19,10 +19,26 @@
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <new>
+#include <set>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -43,13 +59,12 @@ struct CFStack {
std::vector<StackItem> BranchStack;
std::vector<StackItem> LoopStack;
unsigned MaxStackSize;
- unsigned CurrentEntries;
- unsigned CurrentSubEntries;
+ unsigned CurrentEntries = 0;
+ unsigned CurrentSubEntries = 0;
CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
// We need to reserve a stack entry for CALL_FS in vertex shaders.
- MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0),
- CurrentEntries(0), CurrentSubEntries(0) { }
+ MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
unsigned getLoopDepth();
bool branchStackContains(CFStack::StackItem);
@@ -198,9 +213,8 @@ void CFStack::popLoop() {
}
class R600ControlFlowFinalizer : public MachineFunctionPass {
-
private:
- typedef std::pair<MachineInstr *, std::vector<MachineInstr *> > ClauseFile;
+ typedef std::pair<MachineInstr *, std::vector<MachineInstr *>> ClauseFile;
enum ControlFlowInstruction {
CF_TC,
@@ -217,10 +231,10 @@ private:
};
static char ID;
- const R600InstrInfo *TII;
- const R600RegisterInfo *TRI;
+ const R600InstrInfo *TII = nullptr;
+ const R600RegisterInfo *TRI = nullptr;
unsigned MaxFetchInst;
- const R600Subtarget *ST;
+ const R600Subtarget *ST = nullptr;
bool IsTrivialInst(MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -355,7 +369,7 @@ private:
continue;
int64_t Imm = Src.second;
std::vector<MachineOperand *>::iterator It =
- find_if(Lits, [&](MachineOperand *val) {
+ llvm::find_if(Lits, [&](MachineOperand *val) {
return val->isImm() && (val->getImm() == Imm);
});
@@ -485,8 +499,7 @@ private:
}
public:
- R600ControlFlowFinalizer(TargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), ST(nullptr) {}
+ R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override {
ST = &MF.getSubtarget<R600Subtarget>();
@@ -501,7 +514,7 @@ public:
++MB) {
MachineBasicBlock &MBB = *MB;
unsigned CfCount = 0;
- std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack;
+ std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
std::vector<MachineInstr * > IfThenElseStack;
if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_VS) {
BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
@@ -554,7 +567,7 @@ public:
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
.addImm(1);
- std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount,
+ std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
std::set<MachineInstr *>());
Pair.second.insert(MIb);
LoopStack.push_back(std::move(Pair));
@@ -564,7 +577,7 @@ public:
}
case AMDGPU::ENDLOOP: {
CFStack.popLoop();
- std::pair<unsigned, std::set<MachineInstr *> > Pair =
+ std::pair<unsigned, std::set<MachineInstr *>> Pair =
std::move(LoopStack.back());
LoopStack.pop_back();
CounterPropagateAddr(Pair.second, CfCount);
@@ -693,7 +706,6 @@ char R600ControlFlowFinalizer::ID = 0;
} // end anonymous namespace
-
-llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
+FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) {
return new R600ControlFlowFinalizer(TM);
}
diff --git a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 9a5db6ccc672..03fc1aff5ec1 100644
--- a/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -17,26 +17,37 @@
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
-#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
#include "AMDGPUSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
using namespace llvm;
namespace llvm {
+
void initializeR600EmitClauseMarkersPass(PassRegistry&);
-}
+
+} // end namespace llvm
namespace {
class R600EmitClauseMarkers : public MachineFunctionPass {
-
private:
- const R600InstrInfo *TII;
- int Address;
+ const R600InstrInfo *TII = nullptr;
+ int Address = 0;
unsigned OccupiedDwords(MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -118,7 +129,7 @@ private:
SubstituteKCacheBank(MachineInstr &MI,
std::vector<std::pair<unsigned, unsigned>> &CachedConsts,
bool UpdateInstr = true) const {
- std::vector<std::pair<unsigned, unsigned> > UsedKCache;
+ std::vector<std::pair<unsigned, unsigned>> UsedKCache;
if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
return true;
@@ -181,10 +192,11 @@ private:
bool canClauseLocalKillFitInClause(
unsigned AluInstCount,
- std::vector<std::pair<unsigned, unsigned> > KCacheBanks,
+ std::vector<std::pair<unsigned, unsigned>> KCacheBanks,
MachineBasicBlock::iterator Def,
MachineBasicBlock::iterator BBEnd) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
+ //TODO: change this to defs?
for (MachineInstr::const_mop_iterator
MOI = Def->operands_begin(),
MOE = Def->operands_end(); MOI != MOE; ++MOI) {
@@ -207,15 +219,17 @@ private:
if (AluInstCount >= TII->getMaxAlusPerClause())
return false;
+ // TODO: Is this true? kill flag appears to work OK below
// Register kill flags have been cleared by the time we get to this
// pass, but it is safe to assume that all uses of this register
// occur in the same basic block as its definition, because
// it is illegal for the scheduler to schedule them in
// different blocks.
- if (UseI->findRegisterUseOperandIdx(MOI->getReg()))
+ if (UseI->readsRegister(MOI->getReg()))
LastUseCount = AluInstCount;
- if (UseI != Def && UseI->findRegisterDefOperandIdx(MOI->getReg()) != -1)
+ // Exit early if the current use kills the register
+ if (UseI != Def && UseI->killsRegister(MOI->getReg()))
break;
}
if (LastUseCount)
@@ -228,7 +242,7 @@ private:
MachineBasicBlock::iterator
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
MachineBasicBlock::iterator ClauseHead = I;
- std::vector<std::pair<unsigned, unsigned> > KCacheBanks;
+ std::vector<std::pair<unsigned, unsigned>> KCacheBanks;
bool PushBeforeModifier = false;
unsigned AluInstCount = 0;
for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
@@ -294,8 +308,8 @@ private:
public:
static char ID;
- R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) {
+ R600EmitClauseMarkers() : MachineFunctionPass(ID) {
initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
}
@@ -310,9 +324,11 @@ public:
if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
continue; // BB was already parsed
for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
- if (isALU(*I))
- I = MakeALUClause(MBB, I);
- else
+ if (isALU(*I)) {
+ auto next = MakeALUClause(MBB, I);
+ assert(next != I);
+ I = next;
+ } else
++I;
}
}
@@ -333,7 +349,6 @@ INITIALIZE_PASS_BEGIN(R600EmitClauseMarkers, "emitclausemarkers",
INITIALIZE_PASS_END(R600EmitClauseMarkers, "emitclausemarkers",
"R600 Emit Clause Markters", false, false)
-llvm::FunctionPass *llvm::createR600EmitClauseMarkers() {
+FunctionPass *llvm::createR600EmitClauseMarkers() {
return new R600EmitClauseMarkers();
}
-
diff --git a/lib/Target/AMDGPU/R600FrameLowering.cpp b/lib/Target/AMDGPU/R600FrameLowering.cpp
index 5813786abe01..1f01ad732e00 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -8,7 +8,43 @@
//==-----------------------------------------------------------------------===//
#include "R600FrameLowering.h"
+#include "AMDGPUSubtarget.h"
+#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
R600FrameLowering::~R600FrameLowering() = default;
+
+/// \returns The number of registers allocated for \p FI.
+int R600FrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const R600RegisterInfo *RI
+ = MF.getSubtarget<R600Subtarget>().getRegisterInfo();
+
+ // Fill in FrameReg output argument.
+ FrameReg = RI->getFrameRegister(MF);
+
+ // Start the offset at 2 so we don't overwrite work group information.
+ // FIXME: We should only do this when the shader actually uses this
+ // information.
+ unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
+ int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
+
+ for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
+ OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
+ OffsetBytes += MFI.getObjectSize(i);
+ // Each register holds 4 bytes, so we must always align the offset to at
+ // least 4 bytes, so that 2 frame objects won't share the same register.
+ OffsetBytes = alignTo(OffsetBytes, 4);
+ }
+
+ if (FI != -1)
+ OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
+
+ return OffsetBytes / (getStackWidth(MF) * 4);
+}
diff --git a/lib/Target/AMDGPU/R600FrameLowering.h b/lib/Target/AMDGPU/R600FrameLowering.h
index 874435f35ce4..142f70967eda 100644
--- a/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/lib/Target/AMDGPU/R600FrameLowering.h
@@ -25,6 +25,8 @@ public:
MachineBasicBlock &MBB) const override {}
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override {}
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 77fee4356b65..3590a9b05e1d 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -221,6 +221,15 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, VT, Expand);
}
+ // LLVM will expand these to atomic_cmp_swap(0)
+ // and atomic_swap, respectively.
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+
+ // We need to custom lower some of the intrinsics
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
setSchedulingPreference(Sched::Source);
setTargetDAGCombine(ISD::FP_ROUND);
@@ -266,7 +275,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
- NewMI.addOperand(MI.getOperand(i));
+ NewMI.add(MI.getOperand(i));
}
} else {
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
@@ -339,34 +348,34 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
.addImm(isEOP(I)); // Set End of program bit
break;
case AMDGPU::RAT_STORE_TYPED_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addImm(isEOP(I)); // Set End of program bit
break;
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
- .addOperand(MI.getOperand(0));
+ .add(MI.getOperand(0));
break;
case AMDGPU::BRANCH_COND_f32: {
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(AMDGPU::PRED_SETNE)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
@@ -375,12 +384,12 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineInstr *NewMI =
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
AMDGPU::PREDICATE_BIT)
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(AMDGPU::PRED_SETNE_INT)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
}
@@ -408,13 +417,13 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return BB;
unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3))
- .addOperand(MI.getOperand(4))
- .addOperand(MI.getOperand(5))
- .addOperand(MI.getOperand(6))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6))
.addImm(CfInst)
.addImm(EOP);
break;
@@ -490,8 +499,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
- switch(IntrinsicID) {
- default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ switch (IntrinsicID) {
case AMDGPUIntrinsic::r600_tex:
case AMDGPUIntrinsic::r600_texc: {
unsigned TextureOp;
@@ -552,7 +560,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
}
case Intrinsic::r600_implicitarg_ptr: {
- MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS);
+ MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
return DAG.getConstant(ByteOffset, DL, PtrVT);
}
@@ -599,6 +607,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_recipsqrt_clamped:
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
+ default:
+ return Op;
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
@@ -702,12 +712,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
- if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
+ if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
const DataLayout &DL = DAG.getDataLayout();
const GlobalValue *GV = GSD->getGlobal();
- MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
@@ -864,7 +874,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
+ AMDGPUASI.CONSTANT_BUFFER_0);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@@ -911,7 +921,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
if (VT == MVT::f32) {
DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
- SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
+ SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
if (MinMax)
return MinMax;
}
@@ -1102,7 +1112,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
//TODO: Who creates the i8 stores?
assert(Store->isTruncatingStore()
|| Store->getValue().getValueType() == MVT::i8);
- assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+ assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
SDValue Mask;
if (Store->getMemoryVT() == MVT::i8) {
@@ -1200,9 +1210,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
// Neither LOCAL nor PRIVATE can do vectors at the moment
- if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
+ if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
VT.isVector()) {
- if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+ if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
+ StoreNode->isTruncatingStore()) {
// Add an extra level of chain to isolate this vector
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
// TODO: can the chain be replaced without creating a new store?
@@ -1225,7 +1236,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
DAG.getConstant(2, DL, PtrVT));
- if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
// It is beneficial to create MSKOR here instead of combiner to avoid
// artificial dependencies introduced by RMW
if (StoreNode->isTruncatingStore()) {
@@ -1278,7 +1289,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
- if (AS != AMDGPUAS::PRIVATE_ADDRESS)
+ if (AS != AMDGPUASI.PRIVATE_ADDRESS)
return SDValue();
if (MemVT.bitsLT(MVT::i32))
@@ -1297,39 +1308,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// return (512 + (kc_bank << 12)
static int
-ConstantAddressBlock(unsigned AddressSpace) {
+ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) {
switch (AddressSpace) {
- case AMDGPUAS::CONSTANT_BUFFER_0:
+ case AMDGPUASI.CONSTANT_BUFFER_0:
return 512;
- case AMDGPUAS::CONSTANT_BUFFER_1:
+ case AMDGPUASI.CONSTANT_BUFFER_1:
return 512 + 4096;
- case AMDGPUAS::CONSTANT_BUFFER_2:
+ case AMDGPUASI.CONSTANT_BUFFER_2:
return 512 + 4096 * 2;
- case AMDGPUAS::CONSTANT_BUFFER_3:
+ case AMDGPUASI.CONSTANT_BUFFER_3:
return 512 + 4096 * 3;
- case AMDGPUAS::CONSTANT_BUFFER_4:
+ case AMDGPUASI.CONSTANT_BUFFER_4:
return 512 + 4096 * 4;
- case AMDGPUAS::CONSTANT_BUFFER_5:
+ case AMDGPUASI.CONSTANT_BUFFER_5:
return 512 + 4096 * 5;
- case AMDGPUAS::CONSTANT_BUFFER_6:
+ case AMDGPUASI.CONSTANT_BUFFER_6:
return 512 + 4096 * 6;
- case AMDGPUAS::CONSTANT_BUFFER_7:
+ case AMDGPUASI.CONSTANT_BUFFER_7:
return 512 + 4096 * 7;
- case AMDGPUAS::CONSTANT_BUFFER_8:
+ case AMDGPUASI.CONSTANT_BUFFER_8:
return 512 + 4096 * 8;
- case AMDGPUAS::CONSTANT_BUFFER_9:
+ case AMDGPUASI.CONSTANT_BUFFER_9:
return 512 + 4096 * 9;
- case AMDGPUAS::CONSTANT_BUFFER_10:
+ case AMDGPUASI.CONSTANT_BUFFER_10:
return 512 + 4096 * 10;
- case AMDGPUAS::CONSTANT_BUFFER_11:
+ case AMDGPUASI.CONSTANT_BUFFER_11:
return 512 + 4096 * 11;
- case AMDGPUAS::CONSTANT_BUFFER_12:
+ case AMDGPUASI.CONSTANT_BUFFER_12:
return 512 + 4096 * 12;
- case AMDGPUAS::CONSTANT_BUFFER_13:
+ case AMDGPUASI.CONSTANT_BUFFER_13:
return 512 + 4096 * 13;
- case AMDGPUAS::CONSTANT_BUFFER_14:
+ case AMDGPUASI.CONSTANT_BUFFER_14:
return 512 + 4096 * 14;
- case AMDGPUAS::CONSTANT_BUFFER_15:
+ case AMDGPUASI.CONSTANT_BUFFER_15:
return 512 + 4096 * 15;
default:
return -1;
@@ -1397,7 +1408,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = LoadNode->getMemoryVT();
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
- if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
+ if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
return lowerPrivateExtLoad(Op, DAG);
}
@@ -1407,13 +1418,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LoadNode->getChain();
SDValue Ptr = LoadNode->getBasePtr();
- if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
+ if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
+ LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
VT.isVector()) {
return scalarizeVectorLoad(LoadNode, DAG);
}
- int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
+ int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(),
+ AMDGPUASI);
if (ConstantBlock > -1 &&
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
@@ -1445,7 +1457,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(4, DL, MVT::i32)),
DAG.getConstant(LoadNode->getAddressSpace() -
- AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32)
+ AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
);
}
@@ -1481,7 +1493,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues(MergedValues, DL);
}
- if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
+ if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
return SDValue();
}
@@ -1535,7 +1547,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<ISD::InputArg, 8> LocalIns;
if (AMDGPU::isShader(CallConv)) {
- AnalyzeFormalArguments(CCInfo, Ins);
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
} else {
analyzeFormalArgumentsCompute(CCInfo, Ins);
}
@@ -1558,7 +1570,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
- AMDGPUAS::CONSTANT_BUFFER_0);
+ AMDGPUASI.CONSTANT_BUFFER_0);
// i64 isn't a legal type, so the register type used ends up as i32, which
// isn't expected here. It attempts to create this sextload, but it ends up
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index e88bd076718e..2422d57269eb 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -12,16 +12,34 @@
//
//===----------------------------------------------------------------------===//
-#include "R600InstrInfo.h"
#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDGPUTargetMachine.h"
#include "R600Defines.h"
-#include "R600MachineFunctionInfo.h"
+#include "R600FrameLowering.h"
+#include "R600InstrInfo.h"
#include "R600RegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -191,7 +209,7 @@ bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
return (AMDGPU::isCompute(MF->getFunction()->getCallingConv()) &&
usesVertexCache(MI.getOpcode())) ||
- usesTextureCache(MI.getOpcode());
+ usesTextureCache(MI.getOpcode());
}
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
@@ -321,7 +339,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
unsigned &ConstCount) const {
ConstCount = 0;
const std::pair<int, unsigned> DummyPair(-1, 0);
- std::vector<std::pair<int, unsigned> > Result;
+ std::vector<std::pair<int, unsigned>> Result;
unsigned i = 0;
for (const auto &Src : getSrcs(MI)) {
++i;
@@ -348,8 +366,8 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
return Result;
}
-static std::vector<std::pair<int, unsigned> >
-Swizzle(std::vector<std::pair<int, unsigned> > Src,
+static std::vector<std::pair<int, unsigned>>
+Swizzle(std::vector<std::pair<int, unsigned>> Src,
R600InstrInfo::BankSwizzle Swz) {
if (Src[0] == Src[1])
Src[1].first = -1;
@@ -404,14 +422,14 @@ static unsigned getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
/// in the same Instruction Group while meeting read port limitations given a
/// Swz swizzle sequence.
unsigned R600InstrInfo::isLegalUpTo(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ const std::vector<std::pair<int, unsigned>> &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
- const std::vector<std::pair<int, unsigned> > &Srcs =
+ const std::vector<std::pair<int, unsigned>> &Srcs =
Swizzle(IGSrcs[i], Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
@@ -473,9 +491,9 @@ NextPossibleSolution(
/// Enumerate all possible Swizzle sequence to find one that can meet all
/// read port requirements.
bool R600InstrInfo::FindSwizzleForVectorSlot(
- const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
+ const std::vector<std::vector<std::pair<int, unsigned>>> &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
- const std::vector<std::pair<int, unsigned> > &TransSrcs,
+ const std::vector<std::pair<int, unsigned>> &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
unsigned ValidUpTo = 0;
do {
@@ -490,7 +508,7 @@ bool R600InstrInfo::FindSwizzleForVectorSlot(
/// a const, and can't read a gpr at cycle 1 if they read 2 const.
static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
- const std::vector<std::pair<int, unsigned> > &TransOps,
+ const std::vector<std::pair<int, unsigned>> &TransOps,
unsigned ConstCount) {
// TransALU can't read 3 constants
if (ConstCount > 2)
@@ -516,7 +534,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
const {
//Todo : support shared src0 - src1 operand
- std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
+ std::vector<std::vector<std::pair<int, unsigned>>> IGSrcs;
ValidSwizzle.clear();
unsigned ConstCount;
BankSwizzle TransBS = ALU_VEC_012_SCL_210;
@@ -527,7 +545,7 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
- std::vector<std::pair<int, unsigned> > TransOps;
+ std::vector<std::pair<int, unsigned>> TransOps;
if (!isLastAluTrans)
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
@@ -556,7 +574,6 @@ R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
return false;
}
-
bool
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
const {
@@ -780,7 +797,7 @@ unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- assert(!BytesRemoved && "code size not handled");
+ assert(!BytesRemoved && "code size not handled");
// Note : we leave PRED* instructions there.
// They may be needed when predicating instructions.
@@ -852,7 +869,7 @@ bool R600InstrInfo::isPredicated(const MachineInstr &MI) const {
}
}
-bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
+bool R600InstrInfo::isPredicable(const MachineInstr &MI) const {
// XXX: KILL* instructions can be predicated, but they must be the last
// instruction in a clause, so this means any instructions after them cannot
// be predicated. Until we have proper support for instruction clauses in the
@@ -863,7 +880,7 @@ bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
} else if (MI.getOpcode() == AMDGPU::CF_ALU) {
// If the clause start in the middle of MBB then the MBB has more
// than a single clause, unable to predicate several clauses.
- if (MI.getParent()->begin() != MachineBasicBlock::iterator(MI))
+ if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
return false;
// TODO: We don't support KC merging atm
return MI.getOperand(3).getImm() == 0 && MI.getOperand(4).getImm() == 0;
@@ -874,10 +891,9 @@ bool R600InstrInfo::isPredicable(MachineInstr &MI) const {
}
}
-
bool
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
+ unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const{
return true;
@@ -896,7 +912,7 @@ R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
bool
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCyles,
+ unsigned NumCycles,
BranchProbability Probability)
const {
return true;
@@ -908,7 +924,6 @@ R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
return false;
}
-
bool
R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MachineOperand &MO = Cond[1];
@@ -948,7 +963,6 @@ bool R600InstrInfo::DefinesPredicate(MachineInstr &MI,
return isPredicateSetter(MI.getOpcode());
}
-
bool R600InstrInfo::PredicateInstruction(MachineInstr &MI,
ArrayRef<MachineOperand> Pred) const {
int PIdx = MI.findFirstPredOperandIdx();
@@ -1067,7 +1081,7 @@ bool R600InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
-void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
+void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
const R600FrameLowering *TFL = ST.getFrameLowering();
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index a280052dbd4a..3b828006807e 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -177,12 +177,12 @@ public:
bool isPredicated(const MachineInstr &MI) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
- bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
BranchProbability Probability) const override;
- bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const override ;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 9210e66b0fe7..bac557ba989e 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -316,7 +316,7 @@ class VTX_READ <string name, dag outs, list<dag> pattern>
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isConstantLoad(cast<LoadSDNode>(N), 0) ||
- (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
+ (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }]
>;
def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
@@ -326,8 +326,8 @@ def vtx_id3_load : LoadParamFrag<load>;
class LoadVtxId1 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+ (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
!isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout())));
}]>;
@@ -339,7 +339,7 @@ def vtx_id1_load : LoadVtxId1 <load>;
class LoadVtxId2 <PatFrag load> : PatFrag <
(ops node:$ptr), (load node:$ptr), [{
const MemSDNode *LD = cast<MemSDNode>(N);
- return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
isa<GlobalValue>(GetUnderlyingObject(
LD->getMemOperand()->getValue(), CurDAG->getDataLayout()));
}]>;
@@ -1013,7 +1013,7 @@ multiclass CUBE_Common <bits<11> inst> {
(outs R600_Reg128:$dst),
(ins R600_Reg128:$src0),
"CUBE $dst $src0",
- [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))],
+ [(set v4f32:$dst, (int_r600_cube v4f32:$src0))],
VecALU
> {
let isPseudo = 1;
diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index d70f52e0f295..b7e62075244b 100644
--- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -34,15 +35,6 @@ namespace {
typedef std::pair<BasicBlock *, Value *> StackEntry;
typedef SmallVector<StackEntry, 16> StackVector;
-// Intrinsic names the control flow is annotated with
-static const char *const IfIntrinsic = "llvm.amdgcn.if";
-static const char *const ElseIntrinsic = "llvm.amdgcn.else";
-static const char *const BreakIntrinsic = "llvm.amdgcn.break";
-static const char *const IfBreakIntrinsic = "llvm.amdgcn.if.break";
-static const char *const ElseBreakIntrinsic = "llvm.amdgcn.else.break";
-static const char *const LoopIntrinsic = "llvm.amdgcn.loop";
-static const char *const EndCfIntrinsic = "llvm.amdgcn.end.cf";
-
class SIAnnotateControlFlow : public FunctionPass {
DivergenceAnalysis *DA;
@@ -56,13 +48,13 @@ class SIAnnotateControlFlow : public FunctionPass {
UndefValue *BoolUndef;
Constant *Int64Zero;
- Constant *If;
- Constant *Else;
- Constant *Break;
- Constant *IfBreak;
- Constant *ElseBreak;
- Constant *Loop;
- Constant *EndCf;
+ Function *If;
+ Function *Else;
+ Function *Break;
+ Function *IfBreak;
+ Function *ElseBreak;
+ Function *Loop;
+ Function *EndCf;
DominatorTree *DT;
StackVector Stack;
@@ -86,7 +78,8 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
Value *handleLoopCondition(Value *Cond, PHINode *Broken,
- llvm::Loop *L, BranchInst *Term);
+ llvm::Loop *L, BranchInst *Term,
+ SmallVectorImpl<WeakVH> &LoopPhiConditions);
void handleLoop(BranchInst *Term);
@@ -118,6 +111,7 @@ public:
INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
@@ -138,30 +132,13 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
BoolUndef = UndefValue::get(Boolean);
Int64Zero = ConstantInt::get(Int64, 0);
- If = M.getOrInsertFunction(
- IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr);
-
- Else = M.getOrInsertFunction(
- ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr);
-
- Break = M.getOrInsertFunction(
- BreakIntrinsic, Int64, Int64, (Type *)nullptr);
- cast<Function>(Break)->setDoesNotAccessMemory();
-
- IfBreak = M.getOrInsertFunction(
- IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr);
- cast<Function>(IfBreak)->setDoesNotAccessMemory();;
-
- ElseBreak = M.getOrInsertFunction(
- ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr);
- cast<Function>(ElseBreak)->setDoesNotAccessMemory();
-
- Loop = M.getOrInsertFunction(
- LoopIntrinsic, Boolean, Int64, (Type *)nullptr);
-
- EndCf = M.getOrInsertFunction(
- EndCfIntrinsic, Void, Int64, (Type *)nullptr);
-
+ If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
+ Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
+ Break = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_break);
+ IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
+ ElseBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else_break);
+ Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
+ EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
return false;
}
@@ -208,15 +185,16 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
// \brief Erase "Phi" if it is not used any more
void SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
- if (!Phi->hasNUsesOrMore(1))
- Phi->eraseFromParent();
+ if (llvm::RecursivelyDeleteDeadPHINode(Phi)) {
+ DEBUG(dbgs() << "Erased unused condition phi\n");
+ }
}
/// \brief Open a new "If" block
void SIAnnotateControlFlow::openIf(BranchInst *Term) {
- if (isUniform(Term)) {
+ if (isUniform(Term))
return;
- }
+
Value *Ret = CallInst::Create(If, Term->getCondition(), "", Term);
Term->setCondition(ExtractValueInst::Create(Ret, 0, "", Term));
push(Term->getSuccessor(1), ExtractValueInst::Create(Ret, 1, "", Term));
@@ -233,8 +211,10 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
- llvm::Loop *L, BranchInst *Term) {
+Value *SIAnnotateControlFlow::handleLoopCondition(
+ Value *Cond, PHINode *Broken,
+ llvm::Loop *L, BranchInst *Term,
+ SmallVectorImpl<WeakVH> &LoopPhiConditions) {
// Only search through PHI nodes which are inside the loop. If we try this
// with PHI nodes that are outside of the loop, we end up inserting new PHI
@@ -245,7 +225,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
if ((Phi = dyn_cast<PHINode>(Cond)) && L->contains(Phi)) {
BasicBlock *Parent = Phi->getParent();
- PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
+ PHINode *NewPhi = PHINode::Create(Int64, 0, "loop.phi", &Parent->front());
Value *Ret = NewPhi;
// Handle all non-constant incoming values first
@@ -258,14 +238,14 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
}
Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken, L, Term);
+ Value *PhiArg = handleLoopCondition(Incoming, Broken, L,
+ Term, LoopPhiConditions);
NewPhi->addIncoming(PhiArg, From);
}
BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
-
Value *Incoming = Phi->getIncomingValue(i);
if (Incoming != BoolTrue)
continue;
@@ -295,14 +275,17 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
continue;
}
}
+
TerminatorInst *Insert = From->getTerminator();
Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
NewPhi->setIncomingValue(i, PhiArg);
}
- eraseIfUnused(Phi);
+
+ LoopPhiConditions.push_back(WeakVH(Phi));
return Ret;
+ }
- } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
+ if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
Instruction *Insert;
if (L->contains(Inst)) {
@@ -310,46 +293,55 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
} else {
Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
}
+
Value *Args[] = { Cond, Broken };
return CallInst::Create(IfBreak, Args, "", Insert);
+ }
- // Insert IfBreak before TERM for constant COND.
- } else if (isa<ConstantInt>(Cond)) {
- Value *Args[] = { Cond, Broken };
- return CallInst::Create(IfBreak, Args, "", Term);
+ // Insert IfBreak in the loop header TERM for constant COND other than true.
+ if (isa<Constant>(Cond)) {
+ Instruction *Insert = Cond == BoolTrue ?
+ Term : L->getHeader()->getTerminator();
- } else {
- llvm_unreachable("Unhandled loop condition!");
+ Value *Args[] = { Cond, Broken };
+ return CallInst::Create(IfBreak, Args, "", Insert);
}
- return nullptr;
+
+ llvm_unreachable("Unhandled loop condition!");
}
/// \brief Handle a back edge (loop)
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
- if (isUniform(Term)) {
+ if (isUniform(Term))
return;
- }
BasicBlock *BB = Term->getParent();
llvm::Loop *L = LI->getLoopFor(BB);
if (!L)
return;
+
BasicBlock *Target = Term->getSuccessor(1);
- PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
+ PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
+ SmallVector<WeakVH, 8> LoopPhiConditions;
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
+ Value *Arg = handleLoopCondition(Cond, Broken, L, Term, LoopPhiConditions);
- for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
- PI != PE; ++PI) {
+ for (BasicBlock *Pred : predecessors(Target))
+ Broken->addIncoming(Pred == BB ? Arg : Int64Zero, Pred);
+
+ Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
- Broken->addIncoming(*PI == BB ? Arg : Int64Zero, *PI);
+ for (WeakVH Val : reverse(LoopPhiConditions)) {
+ if (PHINode *Cond = cast_or_null<PHINode>(Val))
+ eraseIfUnused(Cond);
}
- Term->setCondition(CallInst::Create(Loop, Arg, "", Term));
push(Term->getSuccessor(0), Arg);
-}/// \brief Close the last opened control flow
+}
+
+/// \brief Close the last opened control flow
void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
llvm::Loop *L = LI->getLoopFor(BB);
@@ -359,59 +351,62 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
// We can't insert an EndCF call into a loop header, because it will
// get executed on every iteration of the loop, when it should be
// executed only once before the loop.
- SmallVector <BasicBlock*, 8> Latches;
+ SmallVector <BasicBlock *, 8> Latches;
L->getLoopLatches(Latches);
- std::vector<BasicBlock*> Preds;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!is_contained(Latches, *PI))
- Preds.push_back(*PI);
+ SmallVector<BasicBlock *, 2> Preds;
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (!is_contained(Latches, Pred))
+ Preds.push_back(Pred);
}
+
BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
}
Value *Exec = popSaved();
- if (!isa<UndefValue>(Exec))
- CallInst::Create(EndCf, Exec, "", &*BB->getFirstInsertionPt());
+ Instruction *FirstInsertionPt = &*BB->getFirstInsertionPt();
+ if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt))
+ CallInst::Create(EndCf, Exec, "", FirstInsertionPt);
}
/// \brief Annotate the control flow with intrinsics so the backend can
/// recognize if/then/else and loops.
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
-
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DA = &getAnalysis<DivergenceAnalysis>();
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
E = df_end(&F.getEntryBlock()); I != E; ++I) {
-
- BranchInst *Term = dyn_cast<BranchInst>((*I)->getTerminator());
+ BasicBlock *BB = *I;
+ BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
if (!Term || Term->isUnconditional()) {
- if (isTopOfStack(*I))
- closeControlFlow(*I);
+ if (isTopOfStack(BB))
+ closeControlFlow(BB);
continue;
}
if (I.nodeVisited(Term->getSuccessor(1))) {
- if (isTopOfStack(*I))
- closeControlFlow(*I);
+ if (isTopOfStack(BB))
+ closeControlFlow(BB);
handleLoop(Term);
continue;
}
- if (isTopOfStack(*I)) {
+ if (isTopOfStack(BB)) {
PHINode *Phi = dyn_cast<PHINode>(Term->getCondition());
- if (Phi && Phi->getParent() == *I && isElse(Phi)) {
+ if (Phi && Phi->getParent() == BB && isElse(Phi)) {
insertElse(Term);
eraseIfUnused(Phi);
continue;
}
- closeControlFlow(*I);
+
+ closeControlFlow(BB);
}
+
openIf(Term);
}
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index ff4e32147184..3dd372b32866 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -36,6 +36,7 @@ enum : uint64_t {
// TODO: Should this be spilt into VOP3 a and b?
VOP3 = 1 << 10,
+ VOP3P = 1 << 12,
VINTRP = 1 << 13,
SDWA = 1 << 14,
@@ -65,8 +66,8 @@ enum : uint64_t {
SOPK_ZEXT = UINT64_C(1) << 38,
SCALAR_STORE = UINT64_C(1) << 39,
FIXED_SIZE = UINT64_C(1) << 40,
- VOPAsmPrefer32Bit = UINT64_C(1) << 41
-
+ VOPAsmPrefer32Bit = UINT64_C(1) << 41,
+ HasFPClamp = UINT64_C(1) << 42
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -102,12 +103,14 @@ namespace AMDGPU {
OPERAND_REG_INLINE_C_FP16,
OPERAND_REG_INLINE_C_FP32,
OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_V2FP16,
+ OPERAND_REG_INLINE_C_V2INT16,
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
@@ -125,9 +128,12 @@ namespace AMDGPU {
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
namespace SISrcMods {
enum {
- NEG = 1 << 0, // Floating-point negate modifier
- ABS = 1 << 1, // Floating-point absolute modifier
- SEXT = 1 << 0 // Integer sign-extend modifier
+ NEG = 1 << 0, // Floating-point negate modifier
+ ABS = 1 << 1, // Floating-point absolute modifier
+ SEXT = 1 << 0, // Integer sign-extend modifier
+ NEG_HI = ABS, // Floating-point negate high packed component modifier.
+ OP_SEL_0 = 1 << 2,
+ OP_SEL_1 = 1 << 3
};
}
@@ -242,6 +248,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_LDS_ALLOC = 6,
ID_IB_STS = 7,
ID_SYMBOLIC_LAST_ = 8,
+ ID_MEM_BASES = 15,
ID_SHIFT_ = 0,
ID_WIDTH_ = 6,
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
@@ -251,14 +258,20 @@ enum Offset { // Offset, (5) [10:6]
OFFSET_DEFAULT_ = 0,
OFFSET_SHIFT_ = 6,
OFFSET_WIDTH_ = 5,
- OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_)
+ OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
+
+ OFFSET_SRC_SHARED_BASE = 16,
+ OFFSET_SRC_PRIVATE_BASE = 0
};
enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
WIDTH_M1_DEFAULT_ = 31,
WIDTH_M1_SHIFT_ = 11,
WIDTH_M1_WIDTH_ = 5,
- WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_)
+ WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_),
+
+ WIDTH_M1_SRC_SHARED_BASE = 15,
+ WIDTH_M1_SRC_PRIVATE_BASE = 15
};
} // namespace Hwreg
@@ -300,6 +313,9 @@ enum DstUnused {
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
+#define S_00B84C_TRAP_HANDLER(x) (((x) & 0x1) << 6)
+#define G_00B84C_TRAP_HANDLER(x) (((x) >> 6) & 0x1)
+#define C_00B84C_TRAP_HANDLER 0xFFFFFFBF
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
@@ -387,7 +403,6 @@ enum DstUnused {
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
-
} // End namespace llvm
#endif
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 6a422e70fe1f..f9d258f44a62 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -65,6 +65,7 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DenseSet.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
@@ -198,6 +199,10 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
if (!CopyUse.isCopy())
return false;
+ // It is illegal to have vreg inputs to a physreg defining reg_sequence.
+ if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg()))
+ return false;
+
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
@@ -234,8 +239,9 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
- BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
- .addOperand(MI.getOperand(I));
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY),
+ TmpReg)
+ .add(MI.getOperand(I));
MI.getOperand(I).setReg(TmpReg);
}
@@ -326,6 +332,27 @@ static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
return true;
}
+static bool predsHasDivergentTerminator(MachineBasicBlock *MBB,
+ const TargetRegisterInfo *TRI) {
+ DenseSet<MachineBasicBlock*> Visited;
+ SmallVector<MachineBasicBlock*, 4> Worklist(MBB->pred_begin(),
+ MBB->pred_end());
+
+ while (!Worklist.empty()) {
+ MachineBasicBlock *mbb = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Visited.insert(mbb).second)
+ continue;
+ if (hasTerminatorThatModifiesExec(*mbb, *TRI))
+ return true;
+
+ Worklist.insert(Worklist.end(), mbb->pred_begin(), mbb->pred_end());
+ }
+
+ return false;
+}
+
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -382,8 +409,8 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
- MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
- if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+ if (!predsHasDivergentTerminator(MBB0, TRI) &&
+ !predsHasDivergentTerminator(MBB1, TRI)) {
DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
break;
}
diff --git a/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
new file mode 100644
index 000000000000..3d3121788b5e
--- /dev/null
+++ b/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -0,0 +1,72 @@
+//===-- SIFixVGPRCopies.cpp - Fix VGPR Copies after regalloc --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Add implicit use of exec to vector register copies.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-vgpr-copies"
+
+namespace {
+
+class SIFixVGPRCopies : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixVGPRCopies() : MachineFunctionPass(ID) {
+ initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "SI Fix VGPR copies"; }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIFixVGPRCopies, DEBUG_TYPE, "SI Fix VGPR copies", false, false)
+
+char SIFixVGPRCopies::ID = 0;
+
+char &llvm::SIFixVGPRCopiesID = SIFixVGPRCopies::ID;
+
+bool SIFixVGPRCopies::runOnMachineFunction(MachineFunction &MF) {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ if (TII->isVGPRCopy(MI) && !MI.readsRegister(AMDGPU::EXEC, TRI)) {
+ MI.addOperand(MF,
+ MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ DEBUG(dbgs() << "Add exec use to " << MI);
+ Changed = true;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index a5c0d4923d6b..d63414735b95 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -12,6 +12,7 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -66,6 +67,7 @@ public:
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
+ const SISubtarget *ST;
void foldOperand(MachineOperand &OpToFold,
MachineInstr *UseMI,
@@ -75,6 +77,12 @@ public:
void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+ const MachineOperand *isClamp(const MachineInstr &MI) const;
+ bool tryFoldClamp(MachineInstr &MI);
+
+ std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
+ bool tryFoldOMod(MachineInstr &MI);
+
public:
SIFoldOperands() : MachineFunctionPass(ID) {
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
@@ -131,27 +139,6 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
return new SIFoldOperands();
}
-static bool isSafeToFold(const MachineInstr &MI) {
- switch (MI.getOpcode()) {
- case AMDGPU::V_MOV_B32_e32:
- case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_B64_PSEUDO: {
- // If there are additional implicit register operands, this may be used for
- // register indexing so the source register operand isn't simply copied.
- unsigned NumOps = MI.getDesc().getNumOperands() +
- MI.getDesc().getNumImplicitUses();
-
- return MI.getNumOperands() == NumOps;
- }
- case AMDGPU::S_MOV_B32:
- case AMDGPU::S_MOV_B64:
- case AMDGPU::COPY:
- return true;
- default:
- return false;
- }
-}
-
static bool updateOperand(FoldCandidate &Fold,
const TargetRegisterInfo &TRI) {
MachineInstr *MI = Fold.UseMI;
@@ -359,8 +346,6 @@ void SIFoldOperands::foldOperand(
const TargetRegisterClass *FoldRC =
TRI->getRegClass(FoldDesc.OpInfo[0].RegClass);
- APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType),
- OpToFold.getImm());
// Split 64-bit constants into 32-bits for folding.
if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
@@ -370,21 +355,25 @@ void SIFoldOperands::foldOperand(
MRI->getRegClass(UseReg) :
TRI->getPhysRegClass(UseReg);
- assert(Imm.getBitWidth() == 64);
-
if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
return;
+ APInt Imm(64, OpToFold.getImm());
if (UseOp.getSubReg() == AMDGPU::sub0) {
Imm = Imm.getLoBits(32);
} else {
assert(UseOp.getSubReg() == AMDGPU::sub1);
Imm = Imm.getHiBits(32);
}
+
+ MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+ return;
}
- MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
- tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+
+
+ tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
}
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
@@ -581,6 +570,32 @@ static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
return false;
}
+// Try to fold an instruction into a simpler one
+static bool tryFoldInst(const SIInstrInfo *TII,
+ MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == AMDGPU::V_CNDMASK_B32_e32 ||
+ Opc == AMDGPU::V_CNDMASK_B32_e64 ||
+ Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
+ const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
+ if (Src1->isIdenticalTo(*Src0)) {
+ DEBUG(dbgs() << "Folded " << *MI << " into ");
+ int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+ if (Src2Idx != -1)
+ MI->RemoveOperand(Src2Idx);
+ MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
+ mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
+ : getMovOpc(false)));
+ DEBUG(dbgs() << *MI << '\n');
+ return true;
+ }
+ }
+
+ return false;
+}
+
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
// We need mutate the operands of new mov instructions to add implicit
@@ -682,20 +697,213 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
}
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
+ tryFoldInst(TII, Fold.UseMI);
}
}
}
+const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
+ unsigned Op = MI.getOpcode();
+ switch (Op) {
+ case AMDGPU::V_MAX_F32_e64:
+ case AMDGPU::V_MAX_F16_e64:
+ case AMDGPU::V_MAX_F64: {
+ if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
+ return nullptr;
+
+ // Make sure sources are identical.
+ const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (!Src0->isReg() || Src0->getSubReg() != Src1->getSubReg() ||
+ Src0->getSubReg() != AMDGPU::NoSubRegister)
+ return nullptr;
+
+ // Can't fold up if we have modifiers.
+ if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return nullptr;
+ return Src0;
+ }
+ default:
+ return nullptr;
+ }
+}
+
+// We obviously have multiple uses in a clamp since the register is used twice
+// in the same instruction.
+static bool hasOneNonDBGUseInst(const MachineRegisterInfo &MRI, unsigned Reg) {
+ int Count = 0;
+ for (auto I = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
+ I != E; ++I) {
+ if (++Count > 1)
+ return false;
+ }
+
+ return true;
+}
+
+bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
+ const MachineOperand *ClampSrc = isClamp(MI);
+ if (!ClampSrc || !hasOneNonDBGUseInst(*MRI, ClampSrc->getReg()))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(ClampSrc->getReg());
+ if (!TII->hasFPClamp(*Def))
+ return false;
+ MachineOperand *DefClamp = TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
+ if (!DefClamp)
+ return false;
+
+ DEBUG(dbgs() << "Folding clamp " << *DefClamp << " into " << *Def << '\n');
+
+ // Clamp is applied after omod, so it is OK if omod is set.
+ DefClamp->setImm(1);
+ MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
+static int getOModValue(unsigned Opc, int64_t Val) {
+ switch (Opc) {
+ case AMDGPU::V_MUL_F32_e64: {
+ switch (static_cast<uint32_t>(Val)) {
+ case 0x3f000000: // 0.5
+ return SIOutMods::DIV2;
+ case 0x40000000: // 2.0
+ return SIOutMods::MUL2;
+ case 0x40800000: // 4.0
+ return SIOutMods::MUL4;
+ default:
+ return SIOutMods::NONE;
+ }
+ }
+ case AMDGPU::V_MUL_F16_e64: {
+ switch (static_cast<uint16_t>(Val)) {
+ case 0x3800: // 0.5
+ return SIOutMods::DIV2;
+ case 0x4000: // 2.0
+ return SIOutMods::MUL2;
+ case 0x4400: // 4.0
+ return SIOutMods::MUL4;
+ default:
+ return SIOutMods::NONE;
+ }
+ }
+ default:
+ llvm_unreachable("invalid mul opcode");
+ }
+}
+
+// FIXME: Does this really not support denormals with f16?
+// FIXME: Does this need to check IEEE mode bit? SNaNs are generally not
+// handled, so will anything other than that break?
+std::pair<const MachineOperand *, int>
+SIFoldOperands::isOMod(const MachineInstr &MI) const {
+ unsigned Op = MI.getOpcode();
+ switch (Op) {
+ case AMDGPU::V_MUL_F32_e64:
+ case AMDGPU::V_MUL_F16_e64: {
+ // If output denormals are enabled, omod is ignored.
+ if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) ||
+ (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals()))
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ const MachineOperand *RegOp = nullptr;
+ const MachineOperand *ImmOp = nullptr;
+ const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src0->isImm()) {
+ ImmOp = Src0;
+ RegOp = Src1;
+ } else if (Src1->isImm()) {
+ ImmOp = Src1;
+ RegOp = Src0;
+ } else
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ int OMod = getOModValue(Op, ImmOp->getImm());
+ if (OMod == SIOutMods::NONE ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::omod) ||
+ TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ return std::make_pair(RegOp, OMod);
+ }
+ case AMDGPU::V_ADD_F32_e64:
+ case AMDGPU::V_ADD_F16_e64: {
+ // If output denormals are enabled, omod is ignored.
+ if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) ||
+ (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals()))
+ return std::make_pair(nullptr, SIOutMods::NONE);
+
+ // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x
+ const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+
+ if (Src0->isReg() && Src1->isReg() && Src0->getReg() == Src1->getReg() &&
+ Src0->getSubReg() == Src1->getSubReg() &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) &&
+ !TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
+ return std::make_pair(Src0, SIOutMods::MUL2);
+
+ return std::make_pair(nullptr, SIOutMods::NONE);
+ }
+ default:
+ return std::make_pair(nullptr, SIOutMods::NONE);
+ }
+}
+
+// FIXME: Does this need to check IEEE bit on function?
+bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
+ const MachineOperand *RegOp;
+ int OMod;
+ std::tie(RegOp, OMod) = isOMod(MI);
+ if (OMod == SIOutMods::NONE || !RegOp->isReg() ||
+ RegOp->getSubReg() != AMDGPU::NoSubRegister ||
+ !hasOneNonDBGUseInst(*MRI, RegOp->getReg()))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(RegOp->getReg());
+ MachineOperand *DefOMod = TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
+ if (!DefOMod || DefOMod->getImm() != SIOutMods::NONE)
+ return false;
+
+ // Clamp is applied after omod. If the source already has clamp set, don't
+ // fold it.
+ if (TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
+ return false;
+
+ DEBUG(dbgs() << "Folding omod " << MI << " into " << *Def << '\n');
+
+ DefOMod->setImm(OMod);
+ MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+ MI.eraseFromParent();
+ return true;
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-
MRI = &MF.getRegInfo();
- TII = ST.getInstrInfo();
+ ST = &MF.getSubtarget<SISubtarget>();
+ TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // omod is ignored by hardware if IEEE bit is enabled. omod also does not
+ // correctly handle signed zeros.
+ //
+ // TODO: Check nsz on instructions when fast math flags are preserved to MI
+ // level.
+ bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
@@ -705,8 +913,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (!isSafeToFold(MI))
+ tryFoldInst(TII, &MI);
+
+ if (!TII->isFoldableCopy(MI)) {
+ if (IsIEEEMode || !tryFoldOMod(MI))
+ tryFoldClamp(MI);
continue;
+ }
MachineOperand &OpToFold = MI.getOperand(1);
bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 0b5715515880..abe6af9a6d3f 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -21,22 +21,24 @@
using namespace llvm;
-static ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF,
- const SIRegisterInfo *TRI) {
+static ArrayRef<MCPhysReg> getAllSGPR128(const SISubtarget &ST,
+ const MachineFunction &MF) {
return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
- TRI->getMaxNumSGPRs(MF) / 4);
+ ST.getMaxNumSGPRs(MF) / 4);
}
-static ArrayRef<MCPhysReg> getAllSGPRs(const MachineFunction &MF,
- const SIRegisterInfo *TRI) {
+static ArrayRef<MCPhysReg> getAllSGPRs(const SISubtarget &ST,
+ const MachineFunction &MF) {
return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
- TRI->getMaxNumSGPRs(MF));
+ ST.getMaxNumSGPRs(MF));
}
-void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
- const SIRegisterInfo* TRI,
+void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo* TRI = &TII->getRegisterInfo();
+
// We don't need this if we only have spills since there is no user facing
// scratch.
@@ -59,16 +61,28 @@ void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
MRI.addLiveIn(FlatScratchInitReg);
MBB.addLiveIn(FlatScratchInitReg);
- // Copy the size in bytes.
- unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
- .addReg(FlatScrInitHi, RegState::Kill);
-
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
+ unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ // Do a 64-bit pointer add.
+ if (ST.flatScratchIsPointer()) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
+ .addReg(FlatScrInitLo)
+ .addReg(ScratchWaveOffsetReg);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
+ .addReg(FlatScrInitHi)
+ .addImm(0);
+
+ return;
+ }
+
+ // Copy the size in bytes.
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
+ .addReg(FlatScrInitHi, RegState::Kill);
+
// Add wave offset in bytes to private base offset.
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
@@ -111,16 +125,15 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
- ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(MF, TRI);
+ ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
- // Skip the last 2 elements because the last one is reserved for VCC, and
- // this is the 2nd to last element already.
+ // Skip the last N reserved elements because they should have already been
+ // reserved for VCC etc.
for (MCPhysReg Reg : AllSGPR128s) {
// Pick the first unallocated one. Make sure we don't clobber the other
// reserved input we needed.
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
- //assert(MRI.isAllocatable(Reg));
MRI.replaceRegWith(ScratchRsrcReg, Reg);
MFI->setScratchRSrcReg(Reg);
return Reg;
@@ -143,10 +156,9 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
MachineRegisterInfo &MRI = MF.getRegInfo();
-
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
- ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(MF, TRI);
+ ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
return ScratchWaveOffsetReg;
@@ -190,6 +202,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
// specified.
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ auto AMDGPUASI = ST.getAMDGPUAS();
if (ST.debuggerEmitPrologue())
emitDebuggerPrologue(MF, MBB);
@@ -229,7 +242,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// emitted after frame indices are eliminated.
if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
- emitFlatScratchInit(TII, TRI, MF, MBB);
+ emitFlatScratchInit(ST, MF, MBB);
// We need to insert initialization of the scratch resource descriptor.
unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
@@ -328,7 +341,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
PointerType *PtrTy =
PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
- AMDGPUAS::CONSTANT_ADDRESS);
+ AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
auto MMO = MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad |
@@ -371,6 +384,24 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
}
+static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
+ for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
+ I != E; ++I) {
+ if (!MFI.isDeadObjectIndex(I))
+ return false;
+ }
+
+ return true;
+}
+
+int SIFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const {
+ const SIRegisterInfo *RI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
+
+ FrameReg = RI->getFrameRegister(MF);
+ return MF.getFrameInfo().getObjectOffset(FI);
+}
+
void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS) const {
@@ -379,15 +410,66 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (!MFI.hasStackObjects())
return;
- bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ bool AllSGPRSpilledToVGPRs = false;
+
+ if (TRI.spillSGPRToVGPR() && FuncInfo->hasSpilledSGPRs()) {
+ AllSGPRSpilledToVGPRs = true;
+
+ // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+ // are spilled to VGPRs, in which case we can eliminate the stack usage.
+ //
+ // XXX - This operates under the assumption that only other SGPR spills are
+ // users of the frame index. I'm not 100% sure this is correct. The
+ // StackColoring pass has a comment saying a future improvement would be to
+ // merging of allocas with spill slots, but for now according to
+ // MachineFrameInfo isSpillSlot can't alias any other object.
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator Next;
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+ MachineInstr &MI = *I;
+ Next = std::next(I);
+
+ if (TII->isSGPRSpill(MI)) {
+ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+ if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
+ bool Spilled = TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
+ (void)Spilled;
+ assert(Spilled && "failed to spill SGPR to VGPR when allocated");
+ } else
+ AllSGPRSpilledToVGPRs = false;
+ }
+ }
+ }
- assert((RS || !MayNeedScavengingEmergencySlot) &&
- "RegScavenger required if spilling");
+ FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
+ }
- if (MayNeedScavengingEmergencySlot) {
- int ScavengeFI = MFI.CreateStackObject(
- AMDGPU::SGPR_32RegClass.getSize(),
- AMDGPU::SGPR_32RegClass.getAlignment(), false);
+ // FIXME: The other checks should be redundant with allStackObjectsAreDead,
+ // but currently hasNonSpillStackObjects is set only from source
+ // allocas. Stack temps produced from legalization are not counted currently.
+ if (FuncInfo->hasNonSpillStackObjects() || FuncInfo->hasSpilledVGPRs() ||
+ !AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
+ assert(RS && "RegScavenger required if spilling");
+
+ // We force this to be at offset 0 so no user object ever has 0 as an
+ // address, so we may use 0 as an invalid pointer value. This is because
+ // LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
+ // is required to be address space 0, we are forced to accept this for
+ // now. Ideally we could have the stack in another address space with 0 as a
+ // valid pointer, and -1 as the null value.
+ //
+ // This will also waste additional space when user stack objects require > 4
+ // byte alignment.
+ //
+ // The main cost here is losing the offset for addressing modes. However
+ // this also ensures we shouldn't need a register for the offset when
+ // emergency scavenging.
+ int ScavengeFI = MFI.CreateFixedObject(
+ AMDGPU::SGPR_32RegClass.getSize(), 0, false);
RS->addScavengingFrameIndex(ScavengeFI);
}
}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
index 7657b4e03864..1bfc08093da2 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -30,14 +30,15 @@ public:
MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
private:
- void emitFlatScratchInit(const SIInstrInfo *TII,
- const SIRegisterInfo* TRI,
+ void emitFlatScratchInit(const SISubtarget &ST,
MachineFunction &MF,
MachineBasicBlock &MBB) const;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index b98f9f400ee7..7268131396dc 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,26 +15,70 @@
#ifdef _MSC_VER
// Provide M_PI.
#define _USE_MATH_DEFINES
-#include <cmath>
#endif
#include "AMDGPU.h"
#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUTargetMachine.h"
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -43,7 +87,6 @@ static cl::opt<bool> EnableVGPRIndexMode(
cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
cl::init(false));
-
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
@@ -84,6 +127,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
}
+ if (Subtarget->hasVOP3PInsts()) {
+ addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass);
+ addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);
+ }
+
computeRegisterProperties(STI.getRegisterInfo());
// We need to custom lower vector stores from local memory
@@ -110,7 +158,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
-
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
@@ -142,10 +189,17 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
+
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v2i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::v2f16, Custom);
+
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
@@ -153,9 +207,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::UADDO, MVT::i32, Legal);
+ setOperationAction(ISD::USUBO, MVT::i32, Legal);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
- for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
+ for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
+ MVT::v2i64, MVT::v2f64}) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch (Op) {
case ISD::LOAD:
@@ -202,6 +260,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
+ // Avoid stack access for these.
+ // TODO: Generalize to more vector types.
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+
// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
// and output demarshalling
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
@@ -222,7 +287,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
- setOperationAction(ISD::TRAP, MVT::Other, Custom);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -303,6 +369,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::f16, Promote);
setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote);
setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Custom);
// F16 - VOP2 Actions.
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
@@ -317,6 +384,85 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMAD, MVT::f16, Legal);
}
+ if (Subtarget->hasVOP3PInsts()) {
+ for (MVT VT : {MVT::v2i16, MVT::v2f16}) {
+ for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+ switch (Op) {
+ case ISD::LOAD:
+ case ISD::STORE:
+ case ISD::BUILD_VECTOR:
+ case ISD::BITCAST:
+ case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::INSERT_VECTOR_ELT:
+ case ISD::INSERT_SUBVECTOR:
+ case ISD::EXTRACT_SUBVECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ break;
+ case ISD::CONCAT_VECTORS:
+ setOperationAction(Op, VT, Custom);
+ break;
+ default:
+ setOperationAction(Op, VT, Expand);
+ break;
+ }
+ }
+ }
+
+ // XXX - Do these do anything? Vector constants turn into build_vector.
+ setOperationAction(ISD::Constant, MVT::v2i16, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::STORE, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::STORE, MVT::v2f16, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v2f16, MVT::i32);
+
+ setOperationAction(ISD::LOAD, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::LOAD, MVT::v2f16, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v2f16, MVT::i32);
+
+ setOperationAction(ISD::AND, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::AND, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::OR, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::OR, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::XOR, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::XOR, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::SELECT, MVT::v2i16, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::v2i16, MVT::i32);
+ setOperationAction(ISD::SELECT, MVT::v2f16, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::v2f16, MVT::i32);
+
+ setOperationAction(ISD::ADD, MVT::v2i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v2i16, Legal);
+ setOperationAction(ISD::MUL, MVT::v2i16, Legal);
+ setOperationAction(ISD::SHL, MVT::v2i16, Legal);
+ setOperationAction(ISD::SRL, MVT::v2i16, Legal);
+ setOperationAction(ISD::SRA, MVT::v2i16, Legal);
+ setOperationAction(ISD::SMIN, MVT::v2i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v2i16, Legal);
+ setOperationAction(ISD::SMAX, MVT::v2i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v2i16, Legal);
+
+ setOperationAction(ISD::FADD, MVT::v2f16, Legal);
+ setOperationAction(ISD::FNEG, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMUL, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::v2f16, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::v2f16, Legal);
+
+ // This isn't really legal, but this avoids the legalizer unrolling it (and
+ // allows matching fneg (fabs x) patterns)
+ setOperationAction(ISD::FABS, MVT::v2f16, Legal);
+
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);
+ }
+
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMINNUM);
@@ -332,6 +478,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::FCANONICALIZE);
+ setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
@@ -364,30 +512,49 @@ const SISubtarget *SITargetLowering::getSubtarget() const {
// TargetLowering queries
//===----------------------------------------------------------------------===//
+bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
+ EVT) const {
+ // SI has some legal vector types, but no legal vector operations. Say no
+ // shuffles are legal in order to prefer scalarizing some vector operations.
+ return false;
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
unsigned IntrID) const {
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_atomic_dec: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType());
Info.ptrVal = CI.getOperand(0);
Info.align = 0;
- Info.vol = false;
+
+ const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
+ Info.vol = !Vol || !Vol->isNullValue();
Info.readMem = true;
Info.writeMem = true;
return true;
+ }
default:
return false;
}
}
-bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
- EVT) const {
- // SI has some legal vector types, but no legal vector operations. Say no
- // shuffles are legal in order to prefer scalarizing some vector operations.
- return false;
+bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
+ SmallVectorImpl<Value*> &Ops,
+ Type *&AccessTy) const {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec: {
+ Value *Ptr = II->getArgOperand(0);
+ AccessTy = II->getType();
+ Ops.push_back(Ptr);
+ return true;
+ }
+ default:
+ return false;
+ }
}
bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
@@ -438,8 +605,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// Assume the we will use FLAT for all global memory accesses
// on VI.
@@ -454,8 +620,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
return isLegalMUBUFAddressingMode(AM);
- }
- case AMDGPUAS::CONSTANT_ADDRESS: {
+ } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -478,7 +643,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// in 8-bits, it can use a smaller encoding.
if (!isUInt<32>(AM.BaseOffs / 4))
return false;
- } else if (Subtarget->getGeneration() == SISubtarget::VOLCANIC_ISLANDS) {
+ } else if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
if (!isUInt<20>(AM.BaseOffs))
return false;
@@ -492,13 +657,11 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
- }
- case AMDGPUAS::PRIVATE_ADDRESS:
+ } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
return isLegalMUBUFAddressingMode(AM);
-
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
+ } else if (AS == AMDGPUASI.LOCAL_ADDRESS ||
+ AS == AMDGPUASI.REGION_ADDRESS) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -513,17 +676,15 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
- }
- case AMDGPUAS::FLAT_ADDRESS:
- case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
+ } else if (AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) {
// For an unknown address space, this usually means that this is for some
// reason being used for pure arithmetic, and not based on some addressing
// computation. We don't have instructions that compute pointers with any
// addressing modes, so treat them as having no offset like flat
// instructions.
return isLegalFlatAddressingMode(AM);
-
- default:
+ } else {
llvm_unreachable("unhandled address space");
}
}
@@ -544,8 +705,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
return false;
}
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS) {
+ if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUASI.REGION_ADDRESS) {
// ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
// aligned, 8 byte access in a single operation using ds_read2/write2_b32
// with adjacent offsets.
@@ -560,8 +721,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// will access scratch. If we had access to the IR function, then we
// could determine if any private memory was used in the function.
if (!Subtarget->hasUnalignedScratchAccess() &&
- (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
- AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
+ (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS ||
+ AddrSpace == AMDGPUASI.FLAT_ADDRESS)) {
return false;
}
@@ -569,7 +730,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
// If we have an uniform constant load, it still requires using a slow
// buffer instruction if unaligned.
if (IsFast) {
- *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ?
+ *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ?
(Align % 4 == 0) : true;
}
@@ -609,15 +770,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
return MVT::Other;
}
-static bool isFlatGlobalAddrSpace(unsigned AS) {
- return AS == AMDGPUAS::GLOBAL_ADDRESS ||
- AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::CONSTANT_ADDRESS;
+static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) {
+ return AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.FLAT_ADDRESS ||
+ AS == AMDGPUASI.CONSTANT_ADDRESS;
}
bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
- return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
+ return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) &&
+ isFlatGlobalAddrSpace(DestAS, AMDGPUASI);
}
bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
@@ -631,7 +793,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
unsigned DestAS) const {
// Flat -> private/local is a simple truncate.
// Flat -> global is no-op
- if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
+ if (SrcAS == AMDGPUASI.FLAT_ADDRESS)
return true;
return isNoopAddrSpaceCast(SrcAS, DestAS);
@@ -639,18 +801,8 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
- const Value *Ptr = MemNode->getMemOperand()->getValue();
- // UndefValue means this is a load of a kernel input. These are uniform.
- // Sometimes LDS instructions have constant pointers.
- // If Ptr is null, then that means this mem operand contains a
- // PseudoSourceValue like GOT.
- if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
- isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
- return true;
-
- const Instruction *I = dyn_cast<Instruction>(Ptr);
- return I && I->getMetadata("amdgpu.uniform");
+ return AMDGPU::isUniformMMO(MemNode->getMemOperand());
}
TargetLoweringBase::LegalizeTypeAction
@@ -693,40 +845,28 @@ bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
return TargetLowering::isTypeDesirableForOp(Op, VT);
}
-SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
- const SDLoc &SL, SDValue Chain,
- unsigned Offset) const {
+SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
+ const SDLoc &SL,
+ SDValue Chain,
+ uint64_t Offset) const {
const DataLayout &DL = DAG.getDataLayout();
MachineFunction &MF = DAG.getMachineFunction();
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
- unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
+ unsigned InputPtrReg = TRI->getPreloadedValue(MF,
+ SIRegisterInfo::KERNARG_SEGMENT_PTR);
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+ MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
DAG.getConstant(Offset, SL, PtrVT));
}
-SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
- const SDLoc &SL, SDValue Chain,
- unsigned Offset, bool Signed,
+SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Val,
+ bool Signed,
const ISD::InputArg *Arg) const {
- const DataLayout &DL = DAG.getDataLayout();
- Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
- MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
-
- unsigned Align = DL.getABITypeAlignment(Ty);
-
- SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
- SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
- MachineMemOperand::MONonTemporal |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
-
- SDValue Val = Load;
if (Arg && (Arg->Flags.isSExt() || Arg->Flags.isZExt()) &&
VT.bitsLT(MemVT)) {
unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
@@ -740,373 +880,434 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
else
Val = DAG.getZExtOrTrunc(Val, SL, VT);
- return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
+ return Val;
}
-SDValue SITargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
- const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
-
- MachineFunction &MF = DAG.getMachineFunction();
- FunctionType *FType = MF.getFunction()->getFunctionType();
- SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+SDValue SITargetLowering::lowerKernargMemParameter(
+ SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Chain,
+ uint64_t Offset, bool Signed,
+ const ISD::InputArg *Arg) const {
+ const DataLayout &DL = DAG.getDataLayout();
+ Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
- const Function *Fn = MF.getFunction();
- DiagnosticInfoUnsupported NoGraphicsHSA(
- *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
- DAG.getContext()->diagnose(NoGraphicsHSA);
- return DAG.getEntryNode();
- }
+ unsigned Align = DL.getABITypeAlignment(Ty);
- // Create stack objects that are used for emitting debugger prologue if
- // "amdgpu-debugger-emit-prologue" attribute was specified.
- if (ST.debuggerEmitPrologue())
- createDebuggerPrologueStackObjects(MF);
+ SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
+ SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
+ MachineMemOperand::MONonTemporal |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
- SmallVector<ISD::InputArg, 16> Splits;
- BitVector Skipped(Ins.size());
+ SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
+ return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
+}
- for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
- const ISD::InputArg &Arg = Ins[i];
+static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
+ CallingConv::ID CallConv,
+ ArrayRef<ISD::InputArg> Ins,
+ BitVector &Skipped,
+ FunctionType *FType,
+ SIMachineFunctionInfo *Info) {
+ for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
+ const ISD::InputArg &Arg = Ins[I];
- // First check if it's a PS input addr
+ // First check if it's a PS input addr.
if (CallConv == CallingConv::AMDGPU_PS && !Arg.Flags.isInReg() &&
!Arg.Flags.isByVal() && PSInputNum <= 15) {
if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
- // We can safely skip PS inputs
- Skipped.set(i);
+ // We can safely skip PS inputs.
+ Skipped.set(I);
++PSInputNum;
continue;
}
Info->markPSInputAllocated(PSInputNum);
if (Arg.Used)
- Info->PSInputEna |= 1 << PSInputNum;
+ Info->markPSInputEnabled(PSInputNum);
++PSInputNum;
}
- if (AMDGPU::isShader(CallConv)) {
- // Second split vertices into their elements
- if (Arg.VT.isVector()) {
- ISD::InputArg NewArg = Arg;
- NewArg.Flags.setSplit();
- NewArg.VT = Arg.VT.getVectorElementType();
-
- // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
- // three or five element vertex only needs three or five registers,
- // NOT four or eight.
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- for (unsigned j = 0; j != NumElements; ++j) {
- Splits.push_back(NewArg);
- NewArg.PartOffset += NewArg.VT.getStoreSize();
- }
- } else {
- Splits.push_back(Arg);
+ // Second split vertices into their elements.
+ if (Arg.VT.isVector()) {
+ ISD::InputArg NewArg = Arg;
+ NewArg.Flags.setSplit();
+ NewArg.VT = Arg.VT.getVectorElementType();
+
+ // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
+ // three or five element vertex only needs three or five registers,
+ // NOT four or eight.
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ for (unsigned J = 0; J != NumElements; ++J) {
+ Splits.push_back(NewArg);
+ NewArg.PartOffset += NewArg.VT.getStoreSize();
}
+ } else {
+ Splits.push_back(Arg);
}
}
+}
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+// Allocate special inputs passed in VGPRs.
+static void allocateSpecialInputVGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ if (Info.hasWorkItemIDX()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
- // At least one interpolation mode must be enabled or else the GPU will hang.
- //
- // Check PSInputAddr instead of PSInputEna. The idea is that if the user set
- // PSInputAddr, the user wants to enable some bits after the compilation
- // based on run-time states. Since we can't know what the final PSInputEna
- // will look like, so we shouldn't do anything here and the user should take
- // responsibility for the correct programming.
- //
- // Otherwise, the following restrictions apply:
- // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
- // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
- // enabled too.
- if (CallConv == CallingConv::AMDGPU_PS &&
- ((Info->getPSInputAddr() & 0x7F) == 0 ||
- ((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11)))) {
- CCInfo.AllocateReg(AMDGPU::VGPR0);
- CCInfo.AllocateReg(AMDGPU::VGPR1);
- Info->markPSInputAllocated(0);
- Info->PSInputEna |= 1;
- }
-
- if (!AMDGPU::isShader(CallConv)) {
- assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
- } else {
- assert(!Info->hasDispatchPtr() &&
- !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
- !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
- !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
- !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
- !Info->hasWorkItemIDZ());
+ if (Info.hasWorkItemIDY()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
}
- if (Info->hasPrivateMemoryInputPtr()) {
- unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
- MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+ if (Info.hasWorkItemIDZ()) {
+ unsigned Reg = TRI.getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
+ MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+ CCInfo.AllocateReg(Reg);
+ }
+}
+
+// Allocate special inputs passed in user SGPRs.
+static void allocateHSAUserSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
+ if (Info.hasPrivateMemoryInputPtr()) {
+ unsigned PrivateMemoryPtrReg = Info.addPrivateMemoryPtr(TRI);
+ MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(PrivateMemoryPtrReg);
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
- if (Info->hasPrivateSegmentBuffer()) {
- unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
- MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+ if (Info.hasPrivateSegmentBuffer()) {
+ unsigned PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
+ MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
CCInfo.AllocateReg(PrivateSegmentBufferReg);
}
- if (Info->hasDispatchPtr()) {
- unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+ if (Info.hasDispatchPtr()) {
+ unsigned DispatchPtrReg = Info.addDispatchPtr(TRI);
MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info->hasQueuePtr()) {
- unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
+ if (Info.hasQueuePtr()) {
+ unsigned QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
}
- if (Info->hasKernargSegmentPtr()) {
- unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+ if (Info.hasKernargSegmentPtr()) {
+ unsigned InputPtrReg = Info.addKernargSegmentPtr(TRI);
MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(InputPtrReg);
}
- if (Info->hasDispatchID()) {
- unsigned DispatchIDReg = Info->addDispatchID(*TRI);
+ if (Info.hasDispatchID()) {
+ unsigned DispatchIDReg = Info.addDispatchID(TRI);
MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(DispatchIDReg);
}
- if (Info->hasFlatScratchInit()) {
- unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+ if (Info.hasFlatScratchInit()) {
+ unsigned FlatScratchInitReg = Info.addFlatScratchInit(TRI);
MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(FlatScratchInitReg);
}
- if (!AMDGPU::isShader(CallConv))
- analyzeFormalArgumentsCompute(CCInfo, Ins);
- else
- AnalyzeFormalArguments(CCInfo, Splits);
-
- SmallVector<SDValue, 16> Chains;
-
- for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
-
- const ISD::InputArg &Arg = Ins[i];
- if (Skipped[i]) {
- InVals.push_back(DAG.getUNDEF(Arg.VT));
- continue;
- }
-
- CCValAssign &VA = ArgLocs[ArgIdx++];
- MVT VT = VA.getLocVT();
-
- if (VA.isMemLoc()) {
- VT = Ins[i].VT;
- EVT MemVT = VA.getLocVT();
- const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
- VA.getLocMemOffset();
- // The first 36 bytes of the input buffer contains information about
- // thread group and global sizes.
- SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, Chain,
- Offset, Ins[i].Flags.isSExt(),
- &Ins[i]);
- Chains.push_back(Arg.getValue(1));
-
- auto *ParamTy =
- dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
- if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
- ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
- // On SI local pointers are just offsets into LDS, so they are always
- // less than 16-bits. On CI and newer they could potentially be
- // real pointers, so we can't guarantee their size.
- Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
- DAG.getValueType(MVT::i16));
- }
-
- InVals.push_back(Arg);
- Info->setABIArgOffset(Offset + MemVT.getStoreSize());
- continue;
- }
- assert(VA.isRegLoc() && "Parameter must be in a register!");
-
- unsigned Reg = VA.getLocReg();
-
- if (VT == MVT::i64) {
- // For now assume it is a pointer
- Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
- &AMDGPU::SGPR_64RegClass);
- Reg = MF.addLiveIn(Reg, &AMDGPU::SGPR_64RegClass);
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- InVals.push_back(Copy);
- continue;
- }
-
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
-
- Reg = MF.addLiveIn(Reg, RC);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
-
- if (Arg.VT.isVector()) {
-
- // Build a vector from the registers
- Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
- unsigned NumElements = ParamType->getVectorNumElements();
-
- SmallVector<SDValue, 4> Regs;
- Regs.push_back(Val);
- for (unsigned j = 1; j != NumElements; ++j) {
- Reg = ArgLocs[ArgIdx++].getLocReg();
- Reg = MF.addLiveIn(Reg, RC);
-
- SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- Regs.push_back(Copy);
- }
-
- // Fill up the missing vector elements
- NumElements = Arg.VT.getVectorNumElements() - NumElements;
- Regs.append(NumElements, DAG.getUNDEF(VT));
-
- InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
- continue;
- }
-
- InVals.push_back(Val);
- }
-
// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
// these from the dispatch pointer.
+}
- // Start adding system SGPRs.
- if (Info->hasWorkGroupIDX()) {
- unsigned Reg = Info->addWorkGroupIDX();
+// Allocate special input registers that are initialized per-wave.
+static void allocateSystemSGPRs(CCState &CCInfo,
+ MachineFunction &MF,
+ SIMachineFunctionInfo &Info,
+ bool IsShader) {
+ if (Info.hasWorkGroupIDX()) {
+ unsigned Reg = Info.addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupIDY()) {
- unsigned Reg = Info->addWorkGroupIDY();
+ if (Info.hasWorkGroupIDY()) {
+ unsigned Reg = Info.addWorkGroupIDY();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupIDZ()) {
- unsigned Reg = Info->addWorkGroupIDZ();
+ if (Info.hasWorkGroupIDZ()) {
+ unsigned Reg = Info.addWorkGroupIDZ();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasWorkGroupInfo()) {
- unsigned Reg = Info->addWorkGroupInfo();
+ if (Info.hasWorkGroupInfo()) {
+ unsigned Reg = Info.addWorkGroupInfo();
MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
CCInfo.AllocateReg(Reg);
}
- if (Info->hasPrivateSegmentWaveByteOffset()) {
+ if (Info.hasPrivateSegmentWaveByteOffset()) {
// Scratch wave offset passed in system SGPR.
unsigned PrivateSegmentWaveByteOffsetReg;
- if (AMDGPU::isShader(CallConv)) {
+ if (IsShader) {
PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
} else
- PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset();
+ PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
}
+}
+static void reservePrivateMemoryRegs(const TargetMachine &TM,
+ MachineFunction &MF,
+ const SIRegisterInfo &TRI,
+ SIMachineFunctionInfo &Info) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
bool HasStackObjects = MF.getFrameInfo().hasStackObjects();
+
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
if (HasStackObjects)
- Info->setHasNonSpillStackObjects(true);
+ Info.setHasNonSpillStackObjects(true);
// Everything live out of a block is spilled with fast regalloc, so it's
// almost certain that spilling will be required.
- if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ if (TM.getOptLevel() == CodeGenOpt::None)
HasStackObjects = true;
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
// SGPR inputs. We can reserve those and use them directly.
- unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue(
+ unsigned PrivateSegmentBufferReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
- Info->setScratchRSrcReg(PrivateSegmentBufferReg);
+ Info.setScratchRSrcReg(PrivateSegmentBufferReg);
- unsigned PrivateSegmentWaveByteOffsetReg = TRI->getPreloadedValue(
+ unsigned PrivateSegmentWaveByteOffsetReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info->setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
+ Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
} else {
unsigned ReservedBufferReg
- = TRI->reservedPrivateSegmentBufferReg(MF);
+ = TRI.reservedPrivateSegmentBufferReg(MF);
unsigned ReservedOffsetReg
- = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
// We tentatively reserve the last registers (skipping the last two
// which may contain VCC). After register allocation, we'll replace
// these with the ones immediately after those which were really
// allocated. In the prologue copies will be inserted from the argument
// to these reserved registers.
- Info->setScratchRSrcReg(ReservedBufferReg);
- Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setScratchRSrcReg(ReservedBufferReg);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
} else {
- unsigned ReservedBufferReg = TRI->reservedPrivateSegmentBufferReg(MF);
+ unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
// Without HSA, relocations are used for the scratch pointer and the
// buffer resource setup is always inserted in the prologue. Scratch wave
// offset is still in an input SGPR.
- Info->setScratchRSrcReg(ReservedBufferReg);
+ Info.setScratchRSrcReg(ReservedBufferReg);
if (HasStackObjects) {
- unsigned ScratchWaveOffsetReg = TRI->getPreloadedValue(
+ unsigned ScratchWaveOffsetReg = TRI.getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
} else {
unsigned ReservedOffsetReg
- = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
}
+}
- if (Info->hasWorkItemIDX()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+SDValue SITargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ FunctionType *FType = MF.getFunction()->getFunctionType();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ if (Subtarget->isAmdHsaOS() && AMDGPU::isShader(CallConv)) {
+ const Function *Fn = MF.getFunction();
+ DiagnosticInfoUnsupported NoGraphicsHSA(
+ *Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
+ DAG.getContext()->diagnose(NoGraphicsHSA);
+ return DAG.getEntryNode();
}
- if (Info->hasWorkItemIDY()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+ // Create stack objects that are used for emitting debugger prologue if
+ // "amdgpu-debugger-emit-prologue" attribute was specified.
+ if (ST.debuggerEmitPrologue())
+ createDebuggerPrologueStackObjects(MF);
+
+ SmallVector<ISD::InputArg, 16> Splits;
+ SmallVector<CCValAssign, 16> ArgLocs;
+ BitVector Skipped(Ins.size());
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+
+ bool IsShader = AMDGPU::isShader(CallConv);
+ bool IsKernel = AMDGPU::isKernel(CallConv);
+ bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
+
+ if (IsShader) {
+ processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);
+
+ // At least one interpolation mode must be enabled or else the GPU will
+ // hang.
+ //
+ // Check PSInputAddr instead of PSInputEnable. The idea is that if the user
+ // set PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
+ if (CallConv == CallingConv::AMDGPU_PS &&
+ ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11)))) {
+ CCInfo.AllocateReg(AMDGPU::VGPR0);
+ CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->markPSInputEnabled(0);
+ }
+
+ assert(!Info->hasDispatchPtr() &&
+ !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
+ !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
+ !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
+ !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
+ !Info->hasWorkItemIDZ());
+ } else {
+ assert(!IsKernel || (Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()));
}
- if (Info->hasWorkItemIDZ()) {
- unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
- MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
- CCInfo.AllocateReg(Reg);
+ if (IsEntryFunc) {
+ allocateSpecialInputVGPRs(CCInfo, MF, *TRI, *Info);
+ allocateHSAUserSGPRs(CCInfo, MF, *TRI, *Info);
+ }
+
+ if (IsKernel) {
+ analyzeFormalArgumentsCompute(CCInfo, Ins);
+ } else {
+ CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
+ CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
+ }
+
+ SmallVector<SDValue, 16> Chains;
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ const ISD::InputArg &Arg = Ins[i];
+ if (Skipped[i]) {
+ InVals.push_back(DAG.getUNDEF(Arg.VT));
+ continue;
+ }
+
+ CCValAssign &VA = ArgLocs[ArgIdx++];
+ MVT VT = VA.getLocVT();
+
+ if (IsEntryFunc && VA.isMemLoc()) {
+ VT = Ins[i].VT;
+ EVT MemVT = VA.getLocVT();
+
+ const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(MF) +
+ VA.getLocMemOffset();
+ Info->setABIArgOffset(Offset + MemVT.getStoreSize());
+
+ // The first 36 bytes of the input buffer contains information about
+ // thread group and global sizes.
+ SDValue Arg = lowerKernargMemParameter(
+ DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt(), &Ins[i]);
+ Chains.push_back(Arg.getValue(1));
+
+ auto *ParamTy =
+ dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
+ if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS &&
+ ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ // On SI local pointers are just offsets into LDS, so they are always
+ // less than 16-bits. On CI and newer they could potentially be
+ // real pointers, so we can't guarantee their size.
+ Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
+ DAG.getValueType(MVT::i16));
+ }
+
+ InVals.push_back(Arg);
+ continue;
+ }
+
+ if (VA.isMemLoc())
+ report_fatal_error("memloc not supported with calling convention");
+
+ assert(VA.isRegLoc() && "Parameter must be in a register!");
+
+ unsigned Reg = VA.getLocReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
+
+ Reg = MF.addLiveIn(Reg, RC);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+
+ if (Arg.VT.isVector()) {
+ // Build a vector from the registers
+ Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
+ unsigned NumElements = ParamType->getVectorNumElements();
+
+ SmallVector<SDValue, 4> Regs;
+ Regs.push_back(Val);
+ for (unsigned j = 1; j != NumElements; ++j) {
+ Reg = ArgLocs[ArgIdx++].getLocReg();
+ Reg = MF.addLiveIn(Reg, RC);
+
+ SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+ Regs.push_back(Copy);
+ }
+
+ // Fill up the missing vector elements
+ NumElements = Arg.VT.getVectorNumElements() - NumElements;
+ Regs.append(NumElements, DAG.getUNDEF(VT));
+
+ InVals.push_back(DAG.getBuildVector(Arg.VT, DL, Regs));
+ continue;
+ }
+
+ InVals.push_back(Val);
}
- if (Chains.empty())
- return Chain;
+ // Start adding system SGPRs.
+ if (IsEntryFunc)
+ allocateSystemSGPRs(CCInfo, MF, *Info, IsShader);
+
+ reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ return Chains.empty() ? Chain :
+ DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
SDValue
@@ -1197,7 +1398,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Flag.getNode())
RetOps.push_back(Flag);
- unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN;
+ unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN_TO_EPILOG;
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
@@ -1470,16 +1671,16 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE;
if (Offset == 0) {
MachineInstr *SetOn =
- BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
- .addOperand(*Idx)
- .addImm(IdxMode);
+ BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
+ .add(*Idx)
+ .addImm(IdxMode);
SetOn->getOperand(3).setIsUndef();
} else {
unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
- .addOperand(*Idx)
- .addImm(Offset);
+ .add(*Idx)
+ .addImm(Offset);
MachineInstr *SetOn =
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
.addReg(Tmp, RegState::Kill)
@@ -1493,10 +1694,10 @@ static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
if (Offset == 0) {
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addOperand(*Idx);
+ .add(*Idx);
} else {
BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
- .addOperand(*Idx)
+ .add(*Idx)
.addImm(Offset);
}
@@ -1522,7 +1723,7 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
std::tie(SubReg, Offset)
= computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);
- bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode;
+ bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) {
MachineBasicBlock::iterator I(&MI);
@@ -1548,7 +1749,6 @@ static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
return &MBB;
}
-
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator I(&MI);
@@ -1625,7 +1825,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
SrcVec->getReg(),
Offset);
- bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode;
+ bool UseGPRIdxMode = ST.useVGPRIndexMode(EnableVGPRIndexMode);
if (Idx->getReg() == AMDGPU::NoRegister) {
MachineBasicBlock::iterator I(&MI);
@@ -1634,9 +1834,9 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
assert(Offset == 0);
BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dst)
- .addOperand(*SrcVec)
- .addOperand(*Val)
- .addImm(SubReg);
+ .add(*SrcVec)
+ .add(*Val)
+ .addImm(SubReg);
MI.eraseFromParent();
return &MBB;
@@ -1648,11 +1848,11 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
if (UseGPRIdxMode) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
- .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
- .addOperand(*Val)
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(SrcVec->getReg(), RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
+ .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
+ .add(*Val)
+ .addReg(Dst, RegState::ImplicitDefine)
+ .addReg(SrcVec->getReg(), RegState::Implicit)
+ .addReg(AMDGPU::M0, RegState::Implicit);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
} else {
@@ -1661,7 +1861,7 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
BuildMI(MBB, I, DL, MovRelDesc)
.addReg(Dst, RegState::Define)
.addReg(SrcVec->getReg())
- .addOperand(*Val)
+ .add(*Val)
.addImm(SubReg - AMDGPU::sub0);
}
@@ -1694,18 +1894,18 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
if (UseGPRIdxMode) {
BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
- .addReg(PhiReg, RegState::Undef, SubReg) // vdst
- .addOperand(*Val) // src0
- .addReg(Dst, RegState::ImplicitDefine)
- .addReg(PhiReg, RegState::Implicit)
- .addReg(AMDGPU::M0, RegState::Implicit);
+ .addReg(PhiReg, RegState::Undef, SubReg) // vdst
+ .add(*Val) // src0
+ .addReg(Dst, RegState::ImplicitDefine)
+ .addReg(PhiReg, RegState::Implicit)
+ .addReg(AMDGPU::M0, RegState::Implicit);
} else {
const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
.addReg(Dst, RegState::Define)
.addReg(PhiReg)
- .addOperand(*Val)
+ .add(*Val)
.addImm(SubReg - AMDGPU::sub0);
}
@@ -1741,18 +1941,62 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
switch (MI.getOpcode()) {
- case AMDGPU::SI_INIT_M0: {
+ case AMDGPU::S_TRAP_PSEUDO: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ const int TrapType = MI.getOperand(0).getImm();
+
+ if (Subtarget->getTrapHandlerAbi() == SISubtarget::TrapHandlerAbiHsa &&
+ Subtarget->isTrapHandlerEnabled()) {
+
+ MachineFunction *MF = BB->getParent();
+ SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+ unsigned UserSGPR = Info->getQueuePtrUserSGPR();
+ assert(UserSGPR != AMDGPU::NoRegister);
+
+ if (!BB->isLiveIn(UserSGPR))
+ BB->addLiveIn(UserSGPR);
+
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::SGPR0_SGPR1)
+ .addReg(UserSGPR);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_TRAP))
+ .addImm(TrapType)
+ .addReg(AMDGPU::SGPR0_SGPR1, RegState::Implicit);
+ } else {
+ switch (TrapType) {
+ case SISubtarget::TrapIDLLVMTrap:
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_ENDPGM));
+ break;
+ case SISubtarget::TrapIDLLVMDebugTrap: {
+ DiagnosticInfoUnsupported NoTrap(*MF->getFunction(),
+ "debugtrap handler not supported",
+ DL,
+ DS_Warning);
+ LLVMContext &C = MF->getFunction()->getContext();
+ C.diagnose(NoTrap);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_NOP))
+ .addImm(0);
+ break;
+ }
+ default:
+ llvm_unreachable("unsupported trap handler type!");
+ }
+ }
+
+ MI.eraseFromParent();
+ return BB;
+ }
+ case AMDGPU::SI_INIT_M0:
BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addOperand(MI.getOperand(0));
+ .add(MI.getOperand(0));
MI.eraseFromParent();
return BB;
- }
+
case AMDGPU::GET_GROUPSTATICSIZE: {
DebugLoc DL = MI.getDebugLoc();
BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
- .addOperand(MI.getOperand(0))
- .addImm(MFI->getLDSSize());
+ .add(MI.getOperand(0))
+ .addImm(MFI->getLDSSize());
MI.eraseFromParent();
return BB;
}
@@ -1803,7 +2047,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *Br = BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
- .addOperand(MI.getOperand(0));
+ .add(MI.getOperand(0));
Br->getOperand(1).setIsUndef(true); // read undef SCC
MI.eraseFromParent();
return BB;
@@ -1856,9 +2100,6 @@ MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
VT = VT.getScalarType();
- if (!VT.isSimple())
- return false;
-
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
// This is as fast on some subtargets. However, we always have full rate f32
@@ -1909,13 +2150,52 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
- case ISD::TRAP: return lowerTRAP(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return lowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::FP_ROUND:
return lowerFP_ROUND(Op, DAG);
}
return SDValue();
}
+void SITargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ case ISD::INSERT_VECTOR_ELT: {
+ if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG))
+ Results.push_back(Res);
+ return;
+ }
+ case ISD::EXTRACT_VECTOR_ELT: {
+ if (SDValue Res = lowerEXTRACT_VECTOR_ELT(SDValue(N, 0), DAG))
+ Results.push_back(Res);
+ return;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IID) {
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ SDLoc SL(N);
+ SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32,
+ Src0, Src1);
+
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
+ return;
+ }
+ default:
+ break;
+ }
+ }
+ default:
+ break;
+ }
+}
+
/// \brief Helper function for LowerBRCOND
static SDNode *findUser(SDValue Value, unsigned Opcode) {
@@ -1932,31 +2212,25 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
return nullptr;
}
-bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
+unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
- case AMDGPUIntrinsic::amdgcn_if:
- case AMDGPUIntrinsic::amdgcn_else:
- case AMDGPUIntrinsic::amdgcn_end_cf:
- case AMDGPUIntrinsic::amdgcn_loop:
- return true;
+ case Intrinsic::amdgcn_if:
+ return AMDGPUISD::IF;
+ case Intrinsic::amdgcn_else:
+ return AMDGPUISD::ELSE;
+ case Intrinsic::amdgcn_loop:
+ return AMDGPUISD::LOOP;
+ case Intrinsic::amdgcn_end_cf:
+ llvm_unreachable("should not occur");
default:
- return false;
+ return 0;
}
}
- if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
- switch (cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue()) {
- case AMDGPUIntrinsic::amdgcn_break:
- case AMDGPUIntrinsic::amdgcn_if_break:
- case AMDGPUIntrinsic::amdgcn_else_break:
- return true;
- default:
- return false;
- }
- }
-
- return false;
+ // break, if_break, else_break are all only used as inputs to loop, not
+ // directly as branch conditions.
+ return 0;
}
void SITargetLowering::createDebuggerPrologueStackObjects(
@@ -1987,13 +2261,13 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
const Triple &TT = getTargetMachine().getTargetTriple();
- return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
AMDGPU::shouldEmitConstantsToTextSection(TT);
}
bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
- return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+ return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+ GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
!shouldEmitFixup(GV) &&
!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
}
@@ -2006,7 +2280,6 @@ bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {
/// last parameter, also switches branch target with BR if the need arise
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SelectionDAG &DAG) const {
-
SDLoc DL(BRCOND);
SDNode *Intr = BRCOND.getOperand(1).getNode();
@@ -2032,7 +2305,8 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
// eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3
// => t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch<bb1 0x7fee5286d088>
- if (!isCFIntrinsic(Intr)) {
+ unsigned CFNode = isCFIntrinsic(Intr);
+ if (CFNode == 0) {
// This is a uniform branch so we don't need to legalize.
return BRCOND;
}
@@ -2050,15 +2324,13 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
if (HaveChain)
Ops.push_back(BRCOND.getOperand(0));
- Ops.append(Intr->op_begin() + (HaveChain ? 1 : 0), Intr->op_end());
+ Ops.append(Intr->op_begin() + (HaveChain ? 2 : 1), Intr->op_end());
Ops.push_back(Target);
ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
// build the new intrinsic call
- SDNode *Result = DAG.getNode(
- Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
- DAG.getVTList(Res), Ops).getNode();
+ SDNode *Result = DAG.getNode(CFNode, DL, DAG.getVTList(Res), Ops).getNode();
if (!HaveChain) {
SDValue Ops[] = {
@@ -2130,9 +2402,28 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
}
-SDValue SITargetLowering::getSegmentAperture(unsigned AS,
+SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const {
- SDLoc SL;
+ // FIXME: Use inline constants (src_{shared, private}_base) instead.
+ if (Subtarget->hasApertureRegs()) {
+ unsigned Offset = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
+ unsigned WidthM1 = AS == AMDGPUASI.LOCAL_ADDRESS ?
+ AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
+ AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
+ unsigned Encoding =
+ AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
+ Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
+ WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
+
+ SDValue EncodingImm = DAG.getTargetConstant(Encoding, DL, MVT::i16);
+ SDValue ApertureReg = SDValue(
+ DAG.getMachineNode(AMDGPU::S_GETREG_B32, DL, MVT::i32, EncodingImm), 0);
+ SDValue ShiftAmount = DAG.getTargetConstant(WidthM1 + 1, DL, MVT::i32);
+ return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
+ }
+
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
unsigned UserSGPR = Info->getQueuePtrUserSGPR();
@@ -2143,19 +2434,19 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
// Offset into amd_queue_t for group_segment_aperture_base_hi /
// private_segment_aperture_base_hi.
- uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
+ uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44;
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr,
- DAG.getConstant(StructOffset, SL, MVT::i64));
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, QueuePtr,
+ DAG.getConstant(StructOffset, DL, MVT::i64));
// TODO: Use custom target PseudoSourceValue.
// TODO: We should use the value from the IR intrinsic call, but it might not
// be available and how do we get it?
Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()),
- AMDGPUAS::CONSTANT_ADDRESS));
+ AMDGPUASI.CONSTANT_ADDRESS));
MachinePointerInfo PtrInfo(V, StructOffset);
- return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
+ return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
MinAlign(64, StructOffset),
MachineMemOperand::MODereferenceable |
MachineMemOperand::MOInvariant);
@@ -2167,15 +2458,19 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
SDValue Src = ASC->getOperand(0);
-
- // FIXME: Really support non-0 null pointers.
- SDValue SegmentNullPtr = DAG.getConstant(-1, SL, MVT::i32);
SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64);
+ const AMDGPUTargetMachine &TM =
+ static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
+
// flat -> local/private
- if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
- if (ASC->getDestAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- ASC->getDestAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ unsigned DestAS = ASC->getDestAddressSpace();
+
+ if (DestAS == AMDGPUASI.LOCAL_ADDRESS ||
+ DestAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ unsigned NullVal = TM.getNullPointerValue(DestAS);
+ SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);
SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
@@ -2185,13 +2480,18 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
}
// local/private -> flat
- if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
- if (ASC->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- ASC->getSrcAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
+ if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) {
+ unsigned SrcAS = ASC->getSrcAddressSpace();
+
+ if (SrcAS == AMDGPUASI.LOCAL_ADDRESS ||
+ SrcAS == AMDGPUASI.PRIVATE_ADDRESS) {
+ unsigned NullVal = TM.getNullPointerValue(SrcAS);
+ SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
+
SDValue NonNull
= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);
- SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), DAG);
+ SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
SDValue CvtPtr
= DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
@@ -2211,17 +2511,88 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
return DAG.getUNDEF(ASC->getValueType(0));
}
+SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Idx = Op.getOperand(2);
+ if (isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ // Avoid stack access for dynamic indexing.
+ SDLoc SL(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Val = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Op.getOperand(1));
+
+ // v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec
+ SDValue ExtVal = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Val);
+
+ // Convert vector index to bit-index.
+ SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx,
+ DAG.getConstant(16, SL, MVT::i32));
+
+ SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
+
+ SDValue BFM = DAG.getNode(ISD::SHL, SL, MVT::i32,
+ DAG.getConstant(0xffff, SL, MVT::i32),
+ ScaledIdx);
+
+ SDValue LHS = DAG.getNode(ISD::AND, SL, MVT::i32, BFM, ExtVal);
+ SDValue RHS = DAG.getNode(ISD::AND, SL, MVT::i32,
+ DAG.getNOT(SL, BFM, MVT::i32), BCVec);
+
+ SDValue BFI = DAG.getNode(ISD::OR, SL, MVT::i32, LHS, RHS);
+ return DAG.getNode(ISD::BITCAST, SL, Op.getValueType(), BFI);
+}
+
+SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+
+ EVT ResultVT = Op.getValueType();
+ SDValue Vec = Op.getOperand(0);
+ SDValue Idx = Op.getOperand(1);
+
+ if (const ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
+
+ if (CIdx->getZExtValue() == 1) {
+ Result = DAG.getNode(ISD::SRL, SL, MVT::i32, Result,
+ DAG.getConstant(16, SL, MVT::i32));
+ } else {
+ assert(CIdx->getZExtValue() == 0);
+ }
+
+ if (ResultVT.bitsLT(MVT::i32))
+ Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Result);
+ return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
+ }
+
+ SDValue Sixteen = DAG.getConstant(16, SL, MVT::i32);
+
+ // Convert vector index to bit-index.
+ SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, Sixteen);
+
+ SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
+ SDValue Elt = DAG.getNode(ISD::SRL, SL, MVT::i32, BC, ScaledIdx);
+
+ SDValue Result = Elt;
+ if (ResultVT.bitsLT(MVT::i32))
+ Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Result);
+
+ return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
+}
+
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// We can fold offsets for anything that doesn't require a GOT relocation.
- return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+ return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS ||
+ GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) &&
!shouldEmitGOTReloc(GA->getGlobal());
}
-static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
- SDLoc DL, unsigned Offset, EVT PtrVT,
- unsigned GAFlags = SIInstrInfo::MO_NONE) {
+static SDValue
+buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
+ const SDLoc &DL, unsigned Offset, EVT PtrVT,
+ unsigned GAFlags = SIInstrInfo::MO_NONE) {
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:
//
@@ -2265,8 +2636,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
- if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+ if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS &&
+ GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS)
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
SDLoc DL(GSD);
@@ -2283,7 +2654,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SIInstrInfo::MO_GOTPCREL32);
Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
- PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
const DataLayout &DataLayout = DAG.getDataLayout();
unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
// FIXME: Use a PseudoSourceValue once those can be assigned an address space.
@@ -2294,23 +2665,6 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
MachineMemOperand::MOInvariant);
}
-SDValue SITargetLowering::lowerTRAP(SDValue Op,
- SelectionDAG &DAG) const {
- const MachineFunction &MF = DAG.getMachineFunction();
- DiagnosticInfoUnsupported NoTrap(*MF.getFunction(),
- "trap handler not supported",
- Op.getDebugLoc(),
- DS_Warning);
- DAG.getContext()->diagnose(NoTrap);
-
- // Emit s_endpgm.
-
- // FIXME: This should really be selected to s_trap, but that requires
- // setting up the trap handler for it o do anything.
- return DAG.getNode(AMDGPUISD::ENDPGM, SDLoc(Op), MVT::Other,
- Op.getOperand(0));
-}
-
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
const SDLoc &DL, SDValue V) const {
// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
@@ -2332,14 +2686,15 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
MVT VT,
unsigned Offset) const {
SDLoc SL(Op);
- SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL,
- DAG.getEntryNode(), Offset, false);
+ SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL,
+ DAG.getEntryNode(), Offset, false);
// The local size values will have the hi 16-bits as zero.
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
DAG.getValueType(VT));
}
-static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
+static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
+ EVT VT) {
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
"non-hsa intrinsic with hsa target",
DL.getDebugLoc());
@@ -2347,7 +2702,8 @@ static SDValue emitNonHSAIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
return DAG.getUNDEF(VT);
}
-static SDValue emitRemovedIntrinsicError(SelectionDAG& DAG, SDLoc DL, EVT VT) {
+static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
+ EVT VT) {
DiagnosticInfoUnsupported BadIntrin(*DAG.getMachineFunction().getFunction(),
"intrinsic not supported on subtarget",
DL.getDebugLoc());
@@ -2389,7 +2745,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_implicitarg_ptr: {
unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
- return LowerParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
@@ -2403,19 +2759,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_rcp:
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
case Intrinsic::amdgcn_rsq:
- case AMDGPUIntrinsic::AMDGPU_rsq: // Legacy name
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
- case Intrinsic::amdgcn_rsq_legacy: {
+ case Intrinsic::amdgcn_rsq_legacy:
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
- }
- case Intrinsic::amdgcn_rcp_legacy: {
+ case Intrinsic::amdgcn_rcp_legacy:
if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
return emitRemovedIntrinsicError(DAG, DL, VT);
return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1));
- }
case Intrinsic::amdgcn_rsq_clamp: {
if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
@@ -2434,38 +2787,38 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_X, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_X, false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Y, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Y, false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Z, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::NGROUPS_Z, false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+ return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
@@ -2522,43 +2875,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
Op->getVTList(), Ops, VT, MMO);
}
- case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
+ case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
- }
- case AMDGPUIntrinsic::SI_vs_load_input:
- return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- Op.getOperand(1),
- Op.getOperand(2),
- Op.getOperand(3));
-
- case AMDGPUIntrinsic::SI_fs_constant: {
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
- SDValue Glue = M0.getValue(1);
- return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
- DAG.getConstant(2, DL, MVT::i32), // P0
- Op.getOperand(1), Op.getOperand(2), Glue);
- }
- case AMDGPUIntrinsic::SI_packf16:
- if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())
- return DAG.getUNDEF(MVT::i32);
- return Op;
- case AMDGPUIntrinsic::SI_fs_interp: {
- SDValue IJ = Op.getOperand(4);
- SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
- DAG.getConstant(1, DL, MVT::i32));
- I = DAG.getNode(ISD::BITCAST, DL, MVT::f32, I);
- J = DAG.getNode(ISD::BITCAST, DL, MVT::f32, J);
- SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
- SDValue Glue = M0.getValue(1);
- SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
- DAG.getVTList(MVT::f32, MVT::Glue),
- I, Op.getOperand(1), Op.getOperand(2), Glue);
- Glue = SDValue(P1.getNode(), 1);
- return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
- Op.getOperand(1), Op.getOperand(2), Glue);
- }
case Intrinsic::amdgcn_interp_mov: {
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
SDValue Glue = M0.getValue(1);
@@ -2639,10 +2957,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_icmp: {
const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- int CondCode = CD->getSExtValue();
+ if (!CD)
+ return DAG.getUNDEF(VT);
+ int CondCode = CD->getSExtValue();
if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
- CondCode >= ICmpInst::Predicate::BAD_ICMP_PREDICATE)
+ CondCode > ICmpInst::Predicate::LAST_ICMP_PREDICATE)
return DAG.getUNDEF(VT);
ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
@@ -2652,10 +2972,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
case Intrinsic::amdgcn_fcmp: {
const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- int CondCode = CD->getSExtValue();
+ if (!CD)
+ return DAG.getUNDEF(VT);
- if (CondCode <= FCmpInst::Predicate::FCMP_FALSE ||
- CondCode >= FCmpInst::Predicate::FCMP_TRUE)
+ int CondCode = CD->getSExtValue();
+ if (CondCode < FCmpInst::Predicate::FIRST_FCMP_PREDICATE ||
+ CondCode > FCmpInst::Predicate::LAST_FCMP_PREDICATE)
return DAG.getUNDEF(VT);
FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
@@ -2663,14 +2985,29 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
Op.getOperand(2), DAG.getCondCode(CCOpcode));
}
+ case Intrinsic::amdgcn_fmed3:
+ return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_fmul_legacy:
return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_sffbh:
- case AMDGPUIntrinsic::AMDGPU_flbit_i32: // Legacy name.
return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1));
+ case Intrinsic::amdgcn_sbfe:
+ return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_ubfe:
+ return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ // FIXME: Stop adding cast if v2f16 legal.
+ EVT VT = Op.getValueType();
+ SDValue Node = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, DL, MVT::i32,
+ Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(ISD::BITCAST, DL, VT, Node);
+ }
default:
- return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ return Op;
}
}
@@ -2718,6 +3055,64 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
}
+ // Basic sample.
+ case Intrinsic::amdgcn_image_sample:
+ case Intrinsic::amdgcn_image_sample_cl:
+ case Intrinsic::amdgcn_image_sample_d:
+ case Intrinsic::amdgcn_image_sample_d_cl:
+ case Intrinsic::amdgcn_image_sample_l:
+ case Intrinsic::amdgcn_image_sample_b:
+ case Intrinsic::amdgcn_image_sample_b_cl:
+ case Intrinsic::amdgcn_image_sample_lz:
+ case Intrinsic::amdgcn_image_sample_cd:
+ case Intrinsic::amdgcn_image_sample_cd_cl:
+
+ // Sample with comparison.
+ case Intrinsic::amdgcn_image_sample_c:
+ case Intrinsic::amdgcn_image_sample_c_cl:
+ case Intrinsic::amdgcn_image_sample_c_d:
+ case Intrinsic::amdgcn_image_sample_c_d_cl:
+ case Intrinsic::amdgcn_image_sample_c_l:
+ case Intrinsic::amdgcn_image_sample_c_b:
+ case Intrinsic::amdgcn_image_sample_c_b_cl:
+ case Intrinsic::amdgcn_image_sample_c_lz:
+ case Intrinsic::amdgcn_image_sample_c_cd:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl:
+
+ // Sample with offsets.
+ case Intrinsic::amdgcn_image_sample_o:
+ case Intrinsic::amdgcn_image_sample_cl_o:
+ case Intrinsic::amdgcn_image_sample_d_o:
+ case Intrinsic::amdgcn_image_sample_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_l_o:
+ case Intrinsic::amdgcn_image_sample_b_o:
+ case Intrinsic::amdgcn_image_sample_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_lz_o:
+ case Intrinsic::amdgcn_image_sample_cd_o:
+ case Intrinsic::amdgcn_image_sample_cd_cl_o:
+
+ // Sample with comparison and offsets.
+ case Intrinsic::amdgcn_image_sample_c_o:
+ case Intrinsic::amdgcn_image_sample_c_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_d_o:
+ case Intrinsic::amdgcn_image_sample_c_d_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_l_o:
+ case Intrinsic::amdgcn_image_sample_c_b_o:
+ case Intrinsic::amdgcn_image_sample_c_b_cl_o:
+ case Intrinsic::amdgcn_image_sample_c_lz_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_o:
+ case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
+
+ case Intrinsic::amdgcn_image_getlod: {
+ // Replace dmask with everything disabled with undef.
+ const ConstantSDNode *DMask = dyn_cast<ConstantSDNode>(Op.getOperand(5));
+ if (!DMask || DMask->isNullValue()) {
+ SDValue Undef = DAG.getUNDEF(Op.getValueType());
+ return DAG.getMergeValues({ Undef, Op.getOperand(0) }, SDLoc(Op));
+ }
+
+ return SDValue();
+ }
default:
return SDValue();
}
@@ -2731,17 +3126,60 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_sendmsg:
- case Intrinsic::amdgcn_s_sendmsg: {
- Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
- SDValue Glue = Chain.getValue(1);
- return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
- Op.getOperand(2), Glue);
+ case Intrinsic::amdgcn_exp: {
+ const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
+ const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
+ const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(8));
+ const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(9));
+
+ const SDValue Ops[] = {
+ Chain,
+ DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
+ DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
+ Op.getOperand(4), // src0
+ Op.getOperand(5), // src1
+ Op.getOperand(6), // src2
+ Op.getOperand(7), // src3
+ DAG.getTargetConstant(0, DL, MVT::i1), // compr
+ DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
+ };
+
+ unsigned Opc = Done->isNullValue() ?
+ AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
+ return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
+ }
+ case Intrinsic::amdgcn_exp_compr: {
+ const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
+ const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
+ SDValue Src0 = Op.getOperand(4);
+ SDValue Src1 = Op.getOperand(5);
+ const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
+ const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(7));
+
+ SDValue Undef = DAG.getUNDEF(MVT::f32);
+ const SDValue Ops[] = {
+ Chain,
+ DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
+ DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0),
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1),
+ Undef, // src2
+ Undef, // src3
+ DAG.getTargetConstant(1, DL, MVT::i1), // compr
+ DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
+ };
+
+ unsigned Opc = Done->isNullValue() ?
+ AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
+ return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
}
+ case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
+ unsigned NodeOp = (IntrinsicID == Intrinsic::amdgcn_s_sendmsg) ?
+ AMDGPUISD::SENDMSG : AMDGPUISD::SENDMSGHALT;
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
- return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
+ return DAG.getNode(NodeOp, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
case AMDGPUIntrinsic::SI_tbuffer_store: {
@@ -2784,31 +3222,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
}
- case AMDGPUIntrinsic::SI_export: {
- const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(2));
- const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(3));
- const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(4));
- const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(5));
- const ConstantSDNode *Compr = cast<ConstantSDNode>(Op.getOperand(6));
-
- const SDValue Ops[] = {
- Chain,
- DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8),
- DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1),
- DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8),
- DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1),
- Op.getOperand(7), // src0
- Op.getOperand(8), // src1
- Op.getOperand(9), // src2
- Op.getOperand(10) // src3
- };
-
- unsigned Opc = Done->isNullValue() ?
- AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
- return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
- }
- default:
+ case Intrinsic::amdgcn_s_barrier: {
+ if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned WGSize = ST.getFlatWorkGroupSizes(*MF.getFunction()).second;
+ if (WGSize <= ST.getWavefrontSize())
+ return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
+ Op.getOperand(0)), 0);
+ }
return SDValue();
+ };
+ default:
+ return Op;
}
}
@@ -2857,21 +3283,20 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUASI.FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+ AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
- switch (AS) {
- case AMDGPUAS::CONSTANT_ADDRESS:
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
if (isMemOpUniform(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
// loads.
//
- LLVM_FALLTHROUGH;
- case AMDGPUAS::GLOBAL_ADDRESS: {
+ }
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
@@ -2880,13 +3305,14 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// loads.
//
}
- LLVM_FALLTHROUGH;
- case AMDGPUAS::FLAT_ADDRESS:
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorLoad(Op, DAG);
// v4 loads are supported for private and global memory.
return SDValue();
- case AMDGPUAS::PRIVATE_ADDRESS: {
+ }
+ if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
// Depending on the setting of the private_element_size field in the
// resource descriptor, we can only make private accesses up to a certain
// size.
@@ -2905,8 +3331,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- }
- case AMDGPUAS::LOCAL_ADDRESS: {
+ } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
if (NumElements > 2)
return SplitVectorLoad(Op, DAG);
@@ -2916,9 +3341,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// If properly aligned, if we split we might be able to use ds_read_b64.
return SplitVectorLoad(Op, DAG);
}
- default:
- return SDValue();
- }
+ return SDValue();
}
SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
@@ -3287,18 +3710,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// If there is a possibilty that flat instruction access scratch memory
// then we need to use the same legalization rules we use for private.
- if (AS == AMDGPUAS::FLAT_ADDRESS)
+ if (AS == AMDGPUASI.FLAT_ADDRESS)
AS = MFI->hasFlatScratchInit() ?
- AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+ AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS;
unsigned NumElements = VT.getVectorNumElements();
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS:
- case AMDGPUAS::FLAT_ADDRESS:
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS ||
+ AS == AMDGPUASI.FLAT_ADDRESS) {
if (NumElements > 4)
return SplitVectorStore(Op, DAG);
return SDValue();
- case AMDGPUAS::PRIVATE_ADDRESS: {
+ } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) {
switch (Subtarget->getMaxPrivateElementSize()) {
case 4:
return scalarizeVectorStore(Store, DAG);
@@ -3313,8 +3735,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("unsupported private_element_size");
}
- }
- case AMDGPUAS::LOCAL_ADDRESS: {
+ } else if (AS == AMDGPUASI.LOCAL_ADDRESS) {
if (NumElements > 2)
return SplitVectorStore(Op, DAG);
@@ -3323,8 +3744,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// If properly aligned, if we split we might be able to use ds_write_b64.
return SplitVectorStore(Op, DAG);
- }
- default:
+ } else {
llvm_unreachable("unhandled address space");
}
}
@@ -3355,7 +3775,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co
unsigned AS = AtomicNode->getAddressSpace();
// No custom lowering required for local address space
- if (!isFlatGlobalAddrSpace(AS))
+ if (!isFlatGlobalAddrSpace(AS, AMDGPUASI))
return Op;
// Non-local address space requires custom lowering for atomic compare
@@ -3412,12 +3832,12 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
/// the immediate offsets of a memory instruction for the given address space.
static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
const SISubtarget &STI) {
- switch (AS) {
- case AMDGPUAS::GLOBAL_ADDRESS: {
+ auto AMDGPUASI = STI.getAMDGPUAS();
+ if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
// MUBUF instructions a 12-bit offset in bytes.
return isUInt<12>(OffsetSize);
}
- case AMDGPUAS::CONSTANT_ADDRESS: {
+ if (AS == AMDGPUASI.CONSTANT_ADDRESS) {
// SMRD instructions have an 8-bit offset in dwords on SI and
// a 20-bit offset in bytes on VI.
if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
@@ -3425,16 +3845,13 @@ static bool canFoldOffset(unsigned OffsetSize, unsigned AS,
else
return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4);
}
- case AMDGPUAS::LOCAL_ADDRESS:
- case AMDGPUAS::REGION_ADDRESS: {
+ if (AS == AMDGPUASI.LOCAL_ADDRESS ||
+ AS == AMDGPUASI.REGION_ADDRESS) {
// The single offset versions have a 16-bit offset in bytes.
return isUInt<16>(OffsetSize);
}
- case AMDGPUAS::PRIVATE_ADDRESS:
// Indirect register addressing does not use any offsets.
- default:
- return 0;
- }
+ return false;
}
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
@@ -3492,7 +3909,7 @@ SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
// TODO: We could also do this for multiplies.
unsigned AS = N->getAddressSpace();
- if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+ if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) {
SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
if (NewPtr) {
SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
@@ -3692,6 +4109,88 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
return SDValue();
}
+// Instructions that will be lowered with a final instruction that zeros the
+// high result bits.
+// XXX - probably only need to list legal operations.
+static bool fp16SrcZerosHighBits(unsigned Opc) {
+ switch (Opc) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FMA:
+ case ISD::FMAD:
+ case ISD::FCANONICALIZE:
+ case ISD::FP_ROUND:
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP:
+ case ISD::FABS:
+ // Fabs is lowered to a bit operation, but it's an and which will clear the
+ // high bits anyway.
+ case ISD::FSQRT:
+ case ISD::FSIN:
+ case ISD::FCOS:
+ case ISD::FPOWI:
+ case ISD::FPOW:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FFLOOR:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case AMDGPUISD::FRACT:
+ case AMDGPUISD::CLAMP:
+ case AMDGPUISD::COS_HW:
+ case AMDGPUISD::SIN_HW:
+ case AMDGPUISD::FMIN3:
+ case AMDGPUISD::FMAX3:
+ case AMDGPUISD::FMED3:
+ case AMDGPUISD::FMAD_FTZ:
+ case AMDGPUISD::RCP:
+ case AMDGPUISD::RSQ:
+ case AMDGPUISD::LDEXP:
+ return true;
+ default:
+ // fcopysign, select and others may be lowered to 32-bit bit operations
+ // which don't zero the high bits.
+ return false;
+ }
+}
+
+SDValue SITargetLowering::performZeroExtendCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (!Subtarget->has16BitInsts() ||
+ DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() != MVT::i16)
+ return SDValue();
+
+ // (i32 zext (i16 (bitcast f16:$src))) -> fp16_zext $src
+ // FIXME: It is not universally true that the high bits are zeroed on gfx9.
+ if (Src.getOpcode() == ISD::BITCAST) {
+ SDValue BCSrc = Src.getOperand(0);
+ if (BCSrc.getValueType() == MVT::f16 &&
+ fp16SrcZerosHighBits(BCSrc.getOpcode()))
+ return DCI.DAG.getNode(AMDGPUISD::FP16_ZEXT, SDLoc(N), VT, BCSrc);
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::performClassCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -3713,7 +4212,7 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
SDValue SITargetLowering::performFCanonicalizeCombine(
SDNode *N,
DAGCombinerInfo &DCI) const {
- ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
+ ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0));
if (!CFP)
return SDValue();
@@ -3723,13 +4222,14 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
// Flush denormals to 0 if not enabled.
if (C.isDenormal()) {
EVT VT = N->getValueType(0);
- if (VT == MVT::f32 && !Subtarget->hasFP32Denormals())
+ EVT SVT = VT.getScalarType();
+ if (SVT == MVT::f32 && !Subtarget->hasFP32Denormals())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
- if (VT == MVT::f64 && !Subtarget->hasFP64Denormals())
+ if (SVT == MVT::f64 && !Subtarget->hasFP64Denormals())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
- if (VT == MVT::f16 && !Subtarget->hasFP16Denormals())
+ if (SVT == MVT::f16 && !Subtarget->hasFP16Denormals())
return DAG.getConstantFP(0.0, SDLoc(N), VT);
}
@@ -3749,7 +4249,7 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
return DAG.getConstantFP(CanonicalQNaN, SDLoc(N), VT);
}
- return SDValue(CFP, 0);
+ return N->getOperand(0);
}
static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
@@ -3771,8 +4271,9 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
}
}
-static SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1, bool Signed) {
+SDValue SITargetLowering::performIntMed3ImmCombine(
+ SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Op0, SDValue Op1, bool Signed) const {
ConstantSDNode *K1 = dyn_cast<ConstantSDNode>(Op1);
if (!K1)
return SDValue();
@@ -3790,23 +4291,22 @@ static SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
}
EVT VT = K0->getValueType(0);
+ unsigned Med3Opc = Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3;
+ if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->hasMed3_16())) {
+ return DAG.getNode(Med3Opc, SL, VT,
+ Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
+ }
+ // If there isn't a 16-bit med3 operation, convert to 32-bit.
MVT NVT = MVT::i32;
unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- SDValue Tmp1, Tmp2, Tmp3;
- Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
- Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
- Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
-
- if (VT == MVT::i16) {
- Tmp1 = DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, NVT,
- Tmp1, Tmp2, Tmp3);
+ SDValue Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
+ SDValue Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
+ SDValue Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
- return DAG.getNode(ISD::TRUNCATE, SL, VT, Tmp1);
- } else
- return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
- Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
+ SDValue Med3 = DAG.getNode(Med3Opc, SL, NVT, Tmp1, Tmp2, Tmp3);
+ return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
}
static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
@@ -3816,8 +4316,10 @@ static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
return DAG.isKnownNeverNaN(Op);
}
-static SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1) {
+SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
+ const SDLoc &SL,
+ SDValue Op0,
+ SDValue Op1) const {
ConstantFPSDNode *K1 = dyn_cast<ConstantFPSDNode>(Op1);
if (!K1)
return SDValue();
@@ -3831,6 +4333,20 @@ static SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
if (Cmp == APFloat::cmpGreaterThan)
return SDValue();
+ // TODO: Check IEEE bit enabled?
+ EVT VT = K0->getValueType(0);
+ if (Subtarget->enableDX10Clamp()) {
+ // If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
+ // hardware fmed3 behavior converting to a min.
+ // FIXME: Should this be allowing -0.0?
+ if (K1->isExactlyValue(1.0) && K0->isExactlyValue(0.0))
+ return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Op0.getOperand(0));
+ }
+
+ // med3 for f16 is only available on gfx9+.
+ if (VT == MVT::f64 || (VT == MVT::f16 && !Subtarget->hasMed3_16()))
+ return SDValue();
+
// This isn't safe with signaling NaNs because in IEEE mode, min/max on a
// signaling NaN gives a quiet NaN. The quiet NaN input to the min would then
// give the other result, which is different from med3 with a NaN input.
@@ -3846,6 +4362,7 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
unsigned Opc = N->getOpcode();
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
@@ -3853,7 +4370,9 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
// Only do this if the inner op has one use since this will just increases
// register pressure for no benefit.
- if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY) {
+
+ if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
+ VT != MVT::f64) {
// max(max(a, b), c) -> max3(a, b, c)
// min(min(a, b), c) -> min3(a, b, c)
if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
@@ -3895,7 +4414,9 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) ||
(Opc == AMDGPUISD::FMIN_LEGACY &&
Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
- N->getValueType(0) == MVT::f32 && Op0.hasOneUse()) {
+ (VT == MVT::f32 || VT == MVT::f64 ||
+ (VT == MVT::f16 && Subtarget->has16BitInsts())) &&
+ Op0.hasOneUse()) {
if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
return Res;
}
@@ -3903,6 +4424,69 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
return SDValue();
}
+static bool isClampZeroToOne(SDValue A, SDValue B) {
+ if (ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) {
+ if (ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) {
+ // FIXME: Should this be allowing -0.0?
+ return (CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) ||
+ (CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
+ }
+ }
+
+ return false;
+}
+
+// FIXME: Should only worry about snans for version with chain.
+SDValue SITargetLowering::performFMed3Combine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ EVT VT = N->getValueType(0);
+ // v_med3_f32 and v_max_f32 behave identically wrt denorms, exceptions and
+ // NaNs. With a NaN input, the order of the operands may change the result.
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ SDValue Src2 = N->getOperand(2);
+
+ if (isClampZeroToOne(Src0, Src1)) {
+ // const_a, const_b, x -> clamp is safe in all cases including signaling
+ // nans.
+ // FIXME: Should this be allowing -0.0?
+ return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
+ }
+
+ // FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
+ // handling no dx10-clamp?
+ if (Subtarget->enableDX10Clamp()) {
+ // If NaNs is clamped to 0, we are free to reorder the inputs.
+
+ if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
+ std::swap(Src0, Src1);
+
+ if (isa<ConstantFPSDNode>(Src1) && !isa<ConstantFPSDNode>(Src2))
+ std::swap(Src1, Src2);
+
+ if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
+ std::swap(Src0, Src1);
+
+ if (isClampZeroToOne(Src1, Src2))
+ return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src0);
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue Src0 = N->getOperand(0);
+ SDValue Src1 = N->getOperand(1);
+ if (Src0.isUndef() && Src1.isUndef())
+ return DCI.DAG.getUNDEF(N->getValueType(0));
+ return SDValue();
+}
+
unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0,
const SDNode *N1) const {
@@ -3933,7 +4517,6 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- assert(!VT.isVector());
SDLoc SL(N);
SDValue LHS = N->getOperand(0);
@@ -4112,7 +4695,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::FMIN_LEGACY:
case AMDGPUISD::FMAX_LEGACY: {
if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
- N->getValueType(0) != MVT::f64 &&
getTargetMachine().getOptLevel() > CodeGenOpt::None)
return performMinMaxCombine(N, DCI);
break;
@@ -4135,17 +4717,18 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ATOMIC_LOAD_UMIN:
case ISD::ATOMIC_LOAD_UMAX:
case AMDGPUISD::ATOMIC_INC:
- case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics.
+ case AMDGPUISD::ATOMIC_DEC: // TODO: Target mem intrinsics.
if (DCI.isBeforeLegalize())
break;
return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
- }
case ISD::AND:
return performAndCombine(N, DCI);
case ISD::OR:
return performOrCombine(N, DCI);
case ISD::XOR:
return performXorCombine(N, DCI);
+ case ISD::ZERO_EXTEND:
+ return performZeroExtendCombine(N, DCI);
case AMDGPUISD::FP_CLASS:
return performClassCombine(N, DCI);
case ISD::FCANONICALIZE:
@@ -4170,6 +4753,28 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case AMDGPUISD::CVT_F32_UBYTE2:
case AMDGPUISD::CVT_F32_UBYTE3:
return performCvtF32UByteNCombine(N, DCI);
+ case AMDGPUISD::FMED3:
+ return performFMed3Combine(N, DCI);
+ case AMDGPUISD::CVT_PKRTZ_F16_F32:
+ return performCvtPkRTZCombine(N, DCI);
+ case ISD::SCALAR_TO_VECTOR: {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ // v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
+ if (VT == MVT::v2i16 || VT == MVT::v2f16) {
+ SDLoc SL(N);
+ SDValue Src = N->getOperand(0);
+ EVT EltVT = Src.getValueType();
+ if (EltVT == MVT::f16)
+ Src = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Src);
+
+ SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Src);
+ return DAG.getNode(ISD::BITCAST, SL, VT, Ext);
+ }
+
+ break;
+ }
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
@@ -4198,6 +4803,10 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
I != E; ++I) {
+ // Don't look at users of the chain.
+ if (I.getUse().getResNo() != 0)
+ continue;
+
// Abort if we can't understand the usage
if (!I->isMachineOpcode() ||
I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
@@ -4250,7 +4859,6 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Update the users of the node with the new indices
for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
-
SDNode *User = Users[i];
if (!User)
continue;
@@ -4277,8 +4885,33 @@ static bool isFrameIndexOp(SDValue Op) {
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
/// with frame index operands.
/// LLVM assumes that inputs are to these instructions are registers.
-void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
- SelectionDAG &DAG) const {
+SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
+ SelectionDAG &DAG) const {
+ if (Node->getOpcode() == ISD::CopyToReg) {
+ RegisterSDNode *DestReg = cast<RegisterSDNode>(Node->getOperand(1));
+ SDValue SrcVal = Node->getOperand(2);
+
+ // Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have
+ // to try understanding copies to physical registers.
+ if (SrcVal.getValueType() == MVT::i1 &&
+ TargetRegisterInfo::isPhysicalRegister(DestReg->getReg())) {
+ SDLoc SL(Node);
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ SDValue VReg = DAG.getRegister(
+ MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1);
+
+ SDNode *Glued = Node->getGluedNode();
+ SDValue ToVReg
+ = DAG.getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal,
+ SDValue(Glued, Glued ? Glued->getNumValues() - 1 : 0));
+ SDValue ToResultReg
+ = DAG.getCopyToReg(ToVReg, SL, SDValue(DestReg, 0),
+ VReg, ToVReg.getValue(1));
+ DAG.ReplaceAllUsesWith(Node, ToResultReg.getNode());
+ DAG.RemoveDeadNode(Node);
+ return ToResultReg.getNode();
+ }
+ }
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
@@ -4294,6 +4927,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
}
DAG.UpdateNodeOperands(Node, Ops);
+ return Node;
}
/// \brief Fold the instructions after selecting them.
@@ -4496,6 +5130,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
case 256:
return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+ case 512:
+ return std::make_pair(0U, &AMDGPU::SReg_512RegClass);
}
case 'v':
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index 6c04e4f30977..d177777ad5ee 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -21,11 +21,13 @@
namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
- SDValue LowerParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain,
- unsigned Offset) const;
- SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL,
- SDValue Chain, unsigned Offset, bool Signed,
- const ISD::InputArg *Arg = nullptr) const;
+ SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Chain, uint64_t Offset) const;
+ SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
+ const SDLoc &SL, SDValue Chain,
+ uint64_t Offset, bool Signed,
+ const ISD::InputArg *Arg = nullptr) const;
+
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
@@ -55,11 +57,19 @@ class SITargetLowering final : public AMDGPUTargetLowering {
const SDLoc &DL,
EVT VT) const;
+ SDValue convertArgType(
+ SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val,
+ bool Signed, const ISD::InputArg *Arg = nullptr) const;
+
/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
- SDValue getSegmentAperture(unsigned AS, SelectionDAG &DAG) const;
+ SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
@@ -79,10 +89,17 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performZeroExtendCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Op0, SDValue Op1) const;
+ SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
+ SDValue Op0, SDValue Op1, bool Signed) const;
SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
@@ -94,7 +111,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
- bool isCFIntrinsic(const SDNode *Intr) const;
+ unsigned isCFIntrinsic(const SDNode *Intr) const;
void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
@@ -115,11 +132,15 @@ public:
const SISubtarget *getSubtarget() const;
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+ EVT /*VT*/) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
unsigned IntrinsicID) const override;
- bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
- EVT /*VT*/) const override;
+ bool getAddrModeArguments(IntrinsicInst * /*I*/,
+ SmallVectorImpl<Value*> &/*Ops*/,
+ Type *&/*AccessTy*/) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
@@ -175,6 +196,9 @@ public:
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
void AdjustInstrPostInstrSelection(MachineInstr &MI,
@@ -182,7 +206,7 @@ public:
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const override;
- void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
+ SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL,
SDValue Ptr) const;
diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp
index 91e4bf755c53..ba346d2fad02 100644
--- a/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -1,4 +1,4 @@
-//===-- SIInsertSkips.cpp - Use predicates for control flow ----------===//
+//===-- SIInsertSkips.cpp - Use predicates for control flow ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,33 +12,46 @@
/// branches when it's expected that jumping over the untaken control flow will
/// be cheaper than having every workitem no-op through it.
//
+//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
#define DEBUG_TYPE "si-insert-skips"
-namespace {
-
static cl::opt<unsigned> SkipThresholdFlag(
"amdgpu-skip-threshold",
cl::desc("Number of instructions before jumping over divergent control flow"),
cl::init(12), cl::Hidden);
+namespace {
+
class SIInsertSkips : public MachineFunctionPass {
private:
- const SIRegisterInfo *TRI;
- const SIInstrInfo *TII;
- unsigned SkipThreshold;
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ unsigned SkipThreshold = 0;
bool shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const;
@@ -55,8 +68,7 @@ private:
public:
static char ID;
- SIInsertSkips() :
- MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { }
+ SIInsertSkips() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -69,7 +81,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
char SIInsertSkips::ID = 0;
@@ -195,8 +207,8 @@ void SIInsertSkips::kill(MachineInstr &MI) {
}
} else {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
- .addImm(0)
- .addOperand(Op);
+ .addImm(0)
+ .add(Op);
}
}
@@ -251,6 +263,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
BI != BE; BI = NextBB) {
NextBB = std::next(BI);
MachineBasicBlock &MBB = *BI;
+ bool HaveSkipBlock = false;
if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
// Reached convergence point for last divergent branch.
@@ -270,27 +283,33 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
switch (MI.getOpcode()) {
- case AMDGPU::SI_MASK_BRANCH: {
+ case AMDGPU::SI_MASK_BRANCH:
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
MadeChange |= skipMaskBranch(MI, MBB);
break;
- }
- case AMDGPU::S_BRANCH: {
+
+ case AMDGPU::S_BRANCH:
// Optimize out branches to the next block.
// FIXME: Shouldn't this be handled by BranchFolding?
- if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))
+ if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
+ MI.eraseFromParent();
+ } else if (HaveSkipBlock) {
+ // Remove the given unconditional branch when a skip block has been
+ // inserted after the current one and let skip the two instructions
+ // performing the kill if the exec mask is non-zero.
MI.eraseFromParent();
+ }
break;
- }
- case AMDGPU::SI_KILL_TERMINATOR: {
+
+ case AMDGPU::SI_KILL_TERMINATOR:
MadeChange = true;
kill(MI);
if (ExecBranchStack.empty()) {
if (skipIfDead(MI, *NextBB)) {
+ HaveSkipBlock = true;
NextBB = std::next(BI);
BE = MF.end();
- Next = MBB.end();
}
} else {
HaveKill = true;
@@ -298,15 +317,15 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
MI.eraseFromParent();
break;
- }
- case AMDGPU::SI_RETURN: {
+
+ case AMDGPU::SI_RETURN_TO_EPILOG:
// FIXME: Should move somewhere else
assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
// Graphics shaders returning non-void shouldn't contain S_ENDPGM,
// because external bytecode will be appended at the end.
if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
- // SI_RETURN is not the last instruction. Add an empty block at
+ // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
// the end and jump there.
if (!EmptyMBBAtEnd) {
EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
@@ -318,7 +337,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
.addMBB(EmptyMBBAtEnd);
I->eraseFromParent();
}
- }
+ break;
+
default:
break;
}
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
new file mode 100644
index 000000000000..c2a3e62aa827
--- /dev/null
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -0,0 +1,1863 @@
+//===-- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===/
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "si-insert-waitcnts"
+
+using namespace llvm;
+
+namespace {
+
+// Class of object that encapsulates latest instruction counter score
+// associated with the operand. Used for determining whether
+// s_waitcnt instruction needs to be emited.
+
+#define CNT_MASK(t) (1u << (t))
+
+enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS };
+
+typedef std::pair<signed, signed> RegInterval;
+
+struct {
+ int32_t VmcntMax;
+ int32_t ExpcntMax;
+ int32_t LgkmcntMax;
+ int32_t NumVGPRsMax;
+ int32_t NumSGPRsMax;
+} HardwareLimits;
+
+struct {
+ unsigned VGPR0;
+ unsigned VGPRL;
+ unsigned SGPR0;
+ unsigned SGPRL;
+} RegisterEncoding;
+
+enum WaitEventType {
+ VMEM_ACCESS, // vector-memory read & write
+ LDS_ACCESS, // lds read & write
+ GDS_ACCESS, // gds read & write
+ SQ_MESSAGE, // send message
+ SMEM_ACCESS, // scalar-memory read & write
+ EXP_GPR_LOCK, // export holding on its data src
+ GDS_GPR_LOCK, // GDS holding on its data and addr src
+ EXP_POS_ACCESS, // write to export position
+ EXP_PARAM_ACCESS, // write to export parameter
+ VMW_GPR_LOCK, // vector-memory write holding on its data src
+ NUM_WAIT_EVENTS,
+};
+
+// The mapping is:
+// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
+// SQ_MAX_PGM_VGPRS .. NUM_ALL_VGPRS-1 extra VGPR-like slots
+// NUM_ALL_VGPRS .. NUM_ALL_VGPRS+SQ_MAX_PGM_SGPRS-1 real SGPRs
+// We reserve a fixed number of VGPR slots in the scoring tables for
+// special tokens like SCMEM_LDS (needed for buffer load to LDS).
+enum RegisterMapping {
+ SQ_MAX_PGM_VGPRS = 256, // Maximum programmable VGPRs across all targets.
+ SQ_MAX_PGM_SGPRS = 256, // Maximum programmable SGPRs across all targets.
+ NUM_EXTRA_VGPRS = 1, // A reserved slot for DS.
+ EXTRA_VGPR_LDS = 0, // This is a placeholder the Shader algorithm uses.
+ NUM_ALL_VGPRS = SQ_MAX_PGM_VGPRS + NUM_EXTRA_VGPRS, // Where SGPR starts.
+};
+
+#define ForAllWaitEventType(w) \
+ for (enum WaitEventType w = (enum WaitEventType)0; \
+ (w) < (enum WaitEventType)NUM_WAIT_EVENTS; \
+ (w) = (enum WaitEventType)((w) + 1))
+
+// This is a per-basic-block object that maintains current score brackets
+// of each wait-counter, and a per-register scoreboard for each wait-couner.
+// We also maintain the latest score for every event type that can change the
+// waitcnt in order to know if there are multiple types of events within
+// the brackets. When multiple types of event happen in the bracket,
+// wait-count may get decreased out of order, therefore we need to put in
+// "s_waitcnt 0" before use.
+class BlockWaitcntBrackets {
+public:
+ static int32_t getWaitCountMax(InstCounterType T) {
+ switch (T) {
+ case VM_CNT:
+ return HardwareLimits.VmcntMax;
+ case LGKM_CNT:
+ return HardwareLimits.LgkmcntMax;
+ case EXP_CNT:
+ return HardwareLimits.ExpcntMax;
+ default:
+ break;
+ }
+ return 0;
+ };
+
+ void setScoreLB(InstCounterType T, int32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreLBs[T] = Val;
+ };
+
+ void setScoreUB(InstCounterType T, int32_t Val) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return;
+ ScoreUBs[T] = Val;
+ if (T == EXP_CNT) {
+ int32_t UB = (int)(ScoreUBs[T] - getWaitCountMax(EXP_CNT));
+ if (ScoreLBs[T] < UB)
+ ScoreLBs[T] = UB;
+ }
+ };
+
+ int32_t getScoreLB(InstCounterType T) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return 0;
+ return ScoreLBs[T];
+ };
+
+ int32_t getScoreUB(InstCounterType T) {
+ assert(T < NUM_INST_CNTS);
+ if (T >= NUM_INST_CNTS)
+ return 0;
+ return ScoreUBs[T];
+ };
+
+ // Mapping from event to counter.
+ InstCounterType eventCounter(WaitEventType E) {
+ switch (E) {
+ case VMEM_ACCESS:
+ return VM_CNT;
+ case LDS_ACCESS:
+ case GDS_ACCESS:
+ case SQ_MESSAGE:
+ case SMEM_ACCESS:
+ return LGKM_CNT;
+ case EXP_GPR_LOCK:
+ case GDS_GPR_LOCK:
+ case VMW_GPR_LOCK:
+ case EXP_POS_ACCESS:
+ case EXP_PARAM_ACCESS:
+ return EXP_CNT;
+ default:
+ llvm_unreachable("unhandled event type");
+ }
+ return NUM_INST_CNTS;
+ }
+
+ void setRegScore(int GprNo, InstCounterType T, int32_t Val) {
+ if (GprNo < NUM_ALL_VGPRS) {
+ if (GprNo > VgprUB) {
+ VgprUB = GprNo;
+ }
+ VgprScores[T][GprNo] = Val;
+ } else {
+ assert(T == LGKM_CNT);
+ if (GprNo - NUM_ALL_VGPRS > SgprUB) {
+ SgprUB = GprNo - NUM_ALL_VGPRS;
+ }
+ SgprScores[GprNo - NUM_ALL_VGPRS] = Val;
+ }
+ }
+
+ int32_t getRegScore(int GprNo, InstCounterType T) {
+ if (GprNo < NUM_ALL_VGPRS) {
+ return VgprScores[T][GprNo];
+ }
+ return SgprScores[GprNo - NUM_ALL_VGPRS];
+ }
+
+ void clear() {
+ memset(ScoreLBs, 0, sizeof(ScoreLBs));
+ memset(ScoreUBs, 0, sizeof(ScoreUBs));
+ memset(EventUBs, 0, sizeof(EventUBs));
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ memset(VgprScores[T], 0, sizeof(VgprScores[T]));
+ }
+ memset(SgprScores, 0, sizeof(SgprScores));
+ }
+
+ RegInterval getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII,
+ const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI, unsigned OpNo,
+ bool Def) const;
+
+ void setExpScore(const MachineInstr *MI, const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI,
+ unsigned OpNo, int32_t Val);
+
+ void setWaitAtBeginning() { WaitAtBeginning = true; }
+ void clearWaitAtBeginning() { WaitAtBeginning = false; }
+ bool getWaitAtBeginning() const { return WaitAtBeginning; }
+ void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; }
+ int32_t getMaxVGPR() const { return VgprUB; }
+ int32_t getMaxSGPR() const { return SgprUB; }
+ int32_t getEventUB(enum WaitEventType W) const {
+ assert(W < NUM_WAIT_EVENTS);
+ return EventUBs[W];
+ }
+ bool counterOutOfOrder(InstCounterType T);
+ unsigned int updateByWait(InstCounterType T, int ScoreToWait);
+ void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI, WaitEventType E,
+ MachineInstr &MI);
+
+ BlockWaitcntBrackets()
+ : WaitAtBeginning(false), ValidLoop(false), MixedExpTypes(false),
+ LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ memset(VgprScores[T], 0, sizeof(VgprScores[T]));
+ }
+ }
+ ~BlockWaitcntBrackets(){};
+
+ bool hasPendingSMEM() const {
+ return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]);
+ }
+
+ bool hasPendingFlat() const {
+ return ((LastFlat[LGKM_CNT] > ScoreLBs[LGKM_CNT] &&
+ LastFlat[LGKM_CNT] <= ScoreUBs[LGKM_CNT]) ||
+ (LastFlat[VM_CNT] > ScoreLBs[VM_CNT] &&
+ LastFlat[VM_CNT] <= ScoreUBs[VM_CNT]));
+ }
+
+ void setPendingFlat() {
+ LastFlat[VM_CNT] = ScoreUBs[VM_CNT];
+ LastFlat[LGKM_CNT] = ScoreUBs[LGKM_CNT];
+ }
+
+ int pendingFlat(InstCounterType Ct) const { return LastFlat[Ct]; }
+
+ void setLastFlat(InstCounterType Ct, int Val) { LastFlat[Ct] = Val; }
+
+ bool getRevisitLoop() const { return RevisitLoop; }
+ void setRevisitLoop(bool RevisitLoopIn) { RevisitLoop = RevisitLoopIn; }
+
+ void setPostOrder(int32_t PostOrderIn) { PostOrder = PostOrderIn; }
+ int32_t getPostOrder() const { return PostOrder; }
+
+ void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; }
+ void clearWaitcnt() { Waitcnt = NULL; }
+ MachineInstr *getWaitcnt() const { return Waitcnt; }
+
+ bool mixedExpTypes() const { return MixedExpTypes; }
+ void setMixedExpTypes(bool MixedExpTypesIn) {
+ MixedExpTypes = MixedExpTypesIn;
+ }
+
+ void print(raw_ostream &);
+ void dump() { print(dbgs()); }
+
+private:
+ bool WaitAtBeginning;
+ bool RevisitLoop;
+ bool ValidLoop;
+ bool MixedExpTypes;
+ MachineLoop *LoopRegion;
+ int32_t PostOrder;
+ MachineInstr *Waitcnt;
+ int32_t ScoreLBs[NUM_INST_CNTS] = {0};
+ int32_t ScoreUBs[NUM_INST_CNTS] = {0};
+ int32_t EventUBs[NUM_WAIT_EVENTS] = {0};
+ // Remember the last flat memory operation.
+ int32_t LastFlat[NUM_INST_CNTS] = {0};
+ // wait_cnt scores for every vgpr.
+ // Keep track of the VgprUB and SgprUB to make merge at join efficient.
+ int32_t VgprUB;
+ int32_t SgprUB;
+ int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS];
+ // Wait cnt scores for every sgpr, only lgkmcnt is relevant.
+ int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0};
+};
+
+// This is a per-loop-region object that records waitcnt status at the end of
+// loop footer from the previous iteration. We also maintain an iteration
+// count to track the number of times the loop has been visited. When it
+// doesn't converge naturally, we force convergence by inserting s_waitcnt 0
+// at the end of the loop footer.
+class LoopWaitcntData {
+public:
+ void incIterCnt() { IterCnt++; }
+ void resetIterCnt() { IterCnt = 0; }
+ int32_t getIterCnt() { return IterCnt; }
+
+ LoopWaitcntData() : LfWaitcnt(NULL), IterCnt(0) {}
+ ~LoopWaitcntData(){};
+
+ void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; }
+ MachineInstr *getWaitcnt() const { return LfWaitcnt; }
+
+ void print() {
+ DEBUG(dbgs() << " iteration " << IterCnt << '\n';);
+ return;
+ }
+
+private:
+ // s_waitcnt added at the end of loop footer to stablize wait scores
+ // at the end of the loop footer.
+ MachineInstr *LfWaitcnt;
+ // Number of iterations the loop has been visited, not including the initial
+ // walk over.
+ int32_t IterCnt;
+};
+
+class SIInsertWaitcnts : public MachineFunctionPass {
+
+private:
+ const SISubtarget *ST;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const MachineLoopInfo *MLI;
+ AMDGPU::IsaInfo::IsaVersion IV;
+ AMDGPUAS AMDGPUASI;
+
+ DenseSet<MachineBasicBlock *> BlockVisitedSet;
+ DenseSet<MachineInstr *> CompilerGeneratedWaitcntSet;
+ DenseSet<MachineInstr *> VCCZBugHandledSet;
+
+ DenseMap<MachineBasicBlock *, std::unique_ptr<BlockWaitcntBrackets>>
+ BlockWaitcntBracketsMap;
+
+ DenseSet<MachineBasicBlock *> BlockWaitcntProcessedSet;
+
+ DenseMap<MachineLoop *, std::unique_ptr<LoopWaitcntData>> LoopWaitcntDataMap;
+
+ std::vector<std::unique_ptr<BlockWaitcntBrackets>> KillWaitBrackets;
+
+public:
+ static char ID;
+
+ SIInsertWaitcnts()
+ : MachineFunctionPass(ID), ST(nullptr), TII(nullptr), TRI(nullptr),
+ MRI(nullptr), MLI(nullptr) {}
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "SI insert wait instructions";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ void addKillWaitBracket(BlockWaitcntBrackets *Bracket) {
+ // The waitcnt information is copied because it changes as the block is
+ // traversed.
+ KillWaitBrackets.push_back(make_unique<BlockWaitcntBrackets>(*Bracket));
+ }
+
+ MachineInstr *generateSWaitCntInstBefore(MachineInstr &MI,
+ BlockWaitcntBrackets *ScoreBrackets);
+ void updateEventWaitCntAfter(MachineInstr &Inst,
+ BlockWaitcntBrackets *ScoreBrackets);
+ void mergeInputScoreBrackets(MachineBasicBlock &Block);
+ MachineBasicBlock *loopBottom(const MachineLoop *Loop);
+ void insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block);
+ void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst);
+};
+
+} // End anonymous namespace.
+
+RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const MachineRegisterInfo *MRI,
+ const SIRegisterInfo *TRI,
+ unsigned OpNo,
+ bool Def) const {
+ const MachineOperand &Op = MI->getOperand(OpNo);
+ if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()) ||
+ (Def && !Op.isDef()))
+ return {-1, -1};
+
+ // A use via a PW operand does not need a waitcnt.
+ // A partial write is not a WAW.
+ assert(!Op.getSubReg() || !Op.isUndef());
+
+ RegInterval Result;
+ const MachineRegisterInfo &MRIA = *MRI;
+
+ unsigned Reg = TRI->getEncodingValue(Op.getReg());
+
+ if (TRI->isVGPR(MRIA, Op.getReg())) {
+ assert(Reg >= RegisterEncoding.VGPR0 && Reg <= RegisterEncoding.VGPRL);
+ Result.first = Reg - RegisterEncoding.VGPR0;
+ assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
+ } else if (TRI->isSGPRReg(MRIA, Op.getReg())) {
+ assert(Reg >= RegisterEncoding.SGPR0 && Reg < SQ_MAX_PGM_SGPRS);
+ Result.first = Reg - RegisterEncoding.SGPR0 + NUM_ALL_VGPRS;
+ assert(Result.first >= NUM_ALL_VGPRS &&
+ Result.first < SQ_MAX_PGM_SGPRS + NUM_ALL_VGPRS);
+ }
+ // TODO: Handle TTMP
+ // else if (TRI->isTTMP(MRIA, Reg.getReg())) ...
+ else
+ return {-1, -1};
+
+ const MachineInstr &MIA = *MI;
+ const TargetRegisterClass *RC = TII->getOpRegClass(MIA, OpNo);
+ unsigned Size = RC->getSize();
+ Result.second = Result.first + (Size / 4);
+
+ return Result;
+}
+
+void BlockWaitcntBrackets::setExpScore(const MachineInstr *MI,
+ const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI,
+ unsigned OpNo, int32_t Val) {
+ RegInterval Interval = getRegInterval(MI, TII, MRI, TRI, OpNo, false);
+ DEBUG({
+ const MachineOperand &Opnd = MI->getOperand(OpNo);
+ assert(TRI->isVGPR(*MRI, Opnd.getReg()));
+ });
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ setRegScore(RegNo, EXP_CNT, Val);
+ }
+}
+
+void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII,
+ const SIRegisterInfo *TRI,
+ const MachineRegisterInfo *MRI,
+ WaitEventType E, MachineInstr &Inst) {
+ const MachineRegisterInfo &MRIA = *MRI;
+ InstCounterType T = eventCounter(E);
+ int32_t CurrScore = getScoreUB(T) + 1;
+ // EventUB and ScoreUB need to be update regardless if this event changes
+ // the score of a register or not.
+ // Examples including vm_cnt when buffer-store or lgkm_cnt when send-message.
+ EventUBs[E] = CurrScore;
+ setScoreUB(T, CurrScore);
+
+ if (T == EXP_CNT) {
+ // Check for mixed export types. If they are mixed, then a waitcnt exp(0)
+ // is required.
+ if (!MixedExpTypes) {
+ MixedExpTypes = counterOutOfOrder(EXP_CNT);
+ }
+
+ // Put score on the source vgprs. If this is a store, just use those
+ // specific register(s).
+ if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
+ // All GDS operations must protect their address register (same as
+ // export.)
+ if (Inst.getOpcode() != AMDGPU::DS_APPEND &&
+ Inst.getOpcode() != AMDGPU::DS_CONSUME) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr),
+ CurrScore);
+ }
+ if (Inst.mayStore()) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
+ CurrScore);
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::data1) != -1) {
+ setExpScore(&Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::data1),
+ CurrScore);
+ }
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1 &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_INIT &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_P &&
+ Inst.getOpcode() != AMDGPU::DS_GWS_BARRIER &&
+ Inst.getOpcode() != AMDGPU::DS_APPEND &&
+ Inst.getOpcode() != AMDGPU::DS_CONSUME &&
+ Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) {
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ const MachineOperand &Op = Inst.getOperand(I);
+ if (Op.isReg() && !Op.isDef() && TRI->isVGPR(MRIA, Op.getReg())) {
+ setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
+ }
+ }
+ }
+ } else if (TII->isFLAT(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ }
+ } else if (TII->isMIMG(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ }
+ } else if (TII->isMTBUF(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
+ }
+ } else if (TII->isMUBUF(Inst)) {
+ if (Inst.mayStore()) {
+ setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore);
+ } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data),
+ CurrScore);
+ }
+ } else {
+ if (TII->isEXP(Inst)) {
+ // For export the destination registers are really temps that
+ // can be used as the actual source after export patching, so
+ // we need to treat them like sources and set the EXP_CNT
+ // score.
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ MachineOperand &DefMO = Inst.getOperand(I);
+ if (DefMO.isReg() && DefMO.isDef() &&
+ TRI->isVGPR(MRIA, DefMO.getReg())) {
+ setRegScore(TRI->getEncodingValue(DefMO.getReg()), EXP_CNT,
+ CurrScore);
+ }
+ }
+ }
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = Inst.getOperand(I);
+ if (MO.isReg() && !MO.isDef() && TRI->isVGPR(MRIA, MO.getReg())) {
+ setExpScore(&Inst, TII, TRI, MRI, I, CurrScore);
+ }
+ }
+ }
+#if 0 // TODO: check if this is handled by MUBUF code above.
+ } else if (Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORD ||
+ Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX2 ||
+ Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) {
+ MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data);
+ unsigned OpNo;//TODO: find the OpNo for this operand;
+ RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo, false);
+ for (signed RegNo = Interval.first; RegNo < Interval.second;
+ ++RegNo) {
+ setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore);
+ }
+#endif
+ } else {
+ // Match the score to the destination registers.
+ for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
+ RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, I, true);
+ if (T == VM_CNT && Interval.first >= NUM_ALL_VGPRS)
+ continue;
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ setRegScore(RegNo, T, CurrScore);
+ }
+ }
+ if (TII->isDS(Inst) && Inst.mayStore()) {
+ setRegScore(SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS, T, CurrScore);
+ }
+ }
+}
+
+void BlockWaitcntBrackets::print(raw_ostream &OS) {
+ OS << '\n';
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int LB = getScoreLB(T);
+ int UB = getScoreUB(T);
+
+ switch (T) {
+ case VM_CNT:
+ OS << " VM_CNT(" << UB - LB << "): ";
+ break;
+ case LGKM_CNT:
+ OS << " LGKM_CNT(" << UB - LB << "): ";
+ break;
+ case EXP_CNT:
+ OS << " EXP_CNT(" << UB - LB << "): ";
+ break;
+ default:
+ OS << " UNKNOWN(" << UB - LB << "): ";
+ break;
+ }
+
+ if (LB < UB) {
+ // Print vgpr scores.
+ for (int J = 0; J <= getMaxVGPR(); J++) {
+ int RegScore = getRegScore(J, T);
+ if (RegScore <= LB)
+ continue;
+ int RelScore = RegScore - LB - 1;
+ if (J < SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS) {
+ OS << RelScore << ":v" << J << " ";
+ } else {
+ OS << RelScore << ":ds ";
+ }
+ }
+ // Also need to print sgpr scores for lgkm_cnt.
+ if (T == LGKM_CNT) {
+ for (int J = 0; J <= getMaxSGPR(); J++) {
+ int RegScore = getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ if (RegScore <= LB)
+ continue;
+ int RelScore = RegScore - LB - 1;
+ OS << RelScore << ":s" << J << " ";
+ }
+ }
+ }
+ OS << '\n';
+ }
+ OS << '\n';
+ return;
+}
+
+unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T,
+ int ScoreToWait) {
+ unsigned int NeedWait = 0;
+ if (ScoreToWait == -1) {
+ // The score to wait is unknown. This implies that it was not encountered
+ // during the path of the CFG walk done during the current traversal but
+ // may be seen on a different path. Emit an s_wait counter with a
+ // conservative value of 0 for the counter.
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, getScoreUB(T));
+ return NeedWait;
+ }
+
+ // If the score of src_operand falls within the bracket, we need an
+ // s_waitcnt instruction.
+ const int32_t LB = getScoreLB(T);
+ const int32_t UB = getScoreUB(T);
+ if ((UB >= ScoreToWait) && (ScoreToWait > LB)) {
+ if (T == VM_CNT && hasPendingFlat()) {
+ // If there is a pending FLAT operation, and this is a VM waitcnt,
+ // then we need to force a waitcnt 0 for VM.
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, getScoreUB(T));
+ } else if (counterOutOfOrder(T)) {
+ // Counter can get decremented out-of-order when there
+ // are multiple types event in the brack. Also emit an s_wait counter
+ // with a conservative value of 0 for the counter.
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, getScoreUB(T));
+ } else {
+ NeedWait = CNT_MASK(T);
+ setScoreLB(T, ScoreToWait);
+ }
+ }
+
+ return NeedWait;
+}
+
+// Where there are multiple types of event in the bracket of a counter,
+// the decrement may go out of order.
+bool BlockWaitcntBrackets::counterOutOfOrder(InstCounterType T) {
+ switch (T) {
+ case VM_CNT:
+ return false;
+ case LGKM_CNT: {
+ if (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]) {
+ // Scalar memory read always can go out of order.
+ return true;
+ }
+ int NumEventTypes = 0;
+ if (EventUBs[LDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[LDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[GDS_ACCESS] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[GDS_ACCESS] <= ScoreUBs[LGKM_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[SQ_MESSAGE] > ScoreLBs[LGKM_CNT] &&
+ EventUBs[SQ_MESSAGE] <= ScoreUBs[LGKM_CNT]) {
+ NumEventTypes++;
+ }
+ if (NumEventTypes <= 1) {
+ return false;
+ }
+ break;
+ }
+ case EXP_CNT: {
+ // If there has been a mixture of export types, then a waitcnt exp(0) is
+ // required.
+ if (MixedExpTypes)
+ return true;
+ int NumEventTypes = 0;
+ if (EventUBs[EXP_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
+ EventUBs[EXP_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[GDS_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
+ EventUBs[GDS_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[VMW_GPR_LOCK] > ScoreLBs[EXP_CNT] &&
+ EventUBs[VMW_GPR_LOCK] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+ if (EventUBs[EXP_PARAM_ACCESS] > ScoreLBs[EXP_CNT] &&
+ EventUBs[EXP_PARAM_ACCESS] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+
+ if (EventUBs[EXP_POS_ACCESS] > ScoreLBs[EXP_CNT] &&
+ EventUBs[EXP_POS_ACCESS] <= ScoreUBs[EXP_CNT]) {
+ NumEventTypes++;
+ }
+
+ if (NumEventTypes <= 1) {
+ return false;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return true;
+}
+
+INITIALIZE_PASS_BEGIN(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
+ false)
+INITIALIZE_PASS_END(SIInsertWaitcnts, DEBUG_TYPE, "SI Insert Waitcnts", false,
+ false)
+
+char SIInsertWaitcnts::ID = 0;
+
+char &llvm::SIInsertWaitcntsID = SIInsertWaitcnts::ID;
+
+FunctionPass *llvm::createSIInsertWaitcntsPass() {
+ return new SIInsertWaitcnts();
+}
+
+static bool readsVCCZ(const MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
+ !MI.getOperand(1).isUndef();
+}
+
+/// \brief Generate s_waitcnt instruction to be placed before cur_Inst.
+/// Instructions of a given type are returned in order,
+/// but instructions of different types can complete out of order.
+/// We rely on this in-order completion
+/// and simply assign a score to the memory access instructions.
+/// We keep track of the active "score bracket" to determine
+/// if an access of a memory read requires an s_waitcnt
+/// and if so what the value of each counter is.
+/// The "score bracket" is bound by the lower bound and upper bound
+/// scores (*_score_LB and *_score_ub respectively).
+MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
+ MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets) {
+ // To emit, or not to emit - that's the question!
+ // Start with an assumption that there is no need to emit.
+ unsigned int EmitSwaitcnt = 0;
+ // s_waitcnt instruction to return; default is NULL.
+ MachineInstr *SWaitInst = nullptr;
+ // No need to wait before phi. If a phi-move exists, then the wait should
+ // has been inserted before the move. If a phi-move does not exist, then
+ // wait should be inserted before the real use. The same is true for
+ // sc-merge. It is not a coincident that all these cases correspond to the
+ // instructions that are skipped in the assembling loop.
+ bool NeedLineMapping = false; // TODO: Check on this.
+ if (MI.isDebugValue() &&
+ // TODO: any other opcode?
+ !NeedLineMapping) {
+ return SWaitInst;
+ }
+
+ // See if an s_waitcnt is forced at block entry, or is needed at
+ // program end.
+ if (ScoreBrackets->getWaitAtBeginning()) {
+ // Note that we have already cleared the state, so we don't need to update
+ // it.
+ ScoreBrackets->clearWaitAtBeginning();
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ EmitSwaitcnt |= CNT_MASK(T);
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ }
+ }
+
+ // See if this instruction has a forced S_WAITCNT VM.
+ // TODO: Handle other cases of NeedsWaitcntVmBefore()
+ else if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC ||
+ MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL) {
+ EmitSwaitcnt |=
+ ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ }
+
+ // All waits must be resolved at call return.
+ // NOTE: this could be improved with knowledge of all call sites or
+ // with knowledge of the called routines.
+ if (MI.getOpcode() == AMDGPU::RETURN ||
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ EmitSwaitcnt |= CNT_MASK(T);
+ }
+ }
+ }
+ // Resolve vm waits before gs-done.
+ else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
+ MI.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
+ ((MI.getOperand(0).getImm() & AMDGPU::SendMsg::ID_MASK_) ==
+ AMDGPU::SendMsg::ID_GS_DONE)) {
+ if (ScoreBrackets->getScoreUB(VM_CNT) > ScoreBrackets->getScoreLB(VM_CNT)) {
+ ScoreBrackets->setScoreLB(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ EmitSwaitcnt |= CNT_MASK(VM_CNT);
+ }
+ }
+#if 0 // TODO: the following blocks of logic when we have fence.
+ else if (MI.getOpcode() == SC_FENCE) {
+ const unsigned int group_size =
+ context->shader_info->GetMaxThreadGroupSize();
+ // group_size == 0 means thread group size is unknown at compile time
+ const bool group_is_multi_wave =
+ (group_size == 0 || group_size > target_info->GetWaveFrontSize());
+ const bool fence_is_global = !((SCInstInternalMisc*)Inst)->IsGroupFence();
+
+ for (unsigned int i = 0; i < Inst->NumSrcOperands(); i++) {
+ SCRegType src_type = Inst->GetSrcType(i);
+ switch (src_type) {
+ case SCMEM_LDS:
+ if (group_is_multi_wave ||
+ context->OptFlagIsOn(OPT_R1100_LDSMEM_FENCE_CHICKEN_BIT)) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT,
+ ScoreBrackets->getScoreUB(LGKM_CNT));
+ // LDS may have to wait for VM_CNT after buffer load to LDS
+ if (target_info->HasBufferLoadToLDS()) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT,
+ ScoreBrackets->getScoreUB(VM_CNT));
+ }
+ }
+ break;
+
+ case SCMEM_GDS:
+ if (group_is_multi_wave || fence_is_global) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT,
+ ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT,
+ ScoreBrackets->getScoreUB(LGKM_CNT));
+ }
+ break;
+
+ case SCMEM_UAV:
+ case SCMEM_TFBUF:
+ case SCMEM_RING:
+ case SCMEM_SCATTER:
+ if (group_is_multi_wave || fence_is_global) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT,
+ ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT,
+ ScoreBrackets->getScoreUB(VM_CNT));
+ }
+ break;
+
+ case SCMEM_SCRATCH:
+ default:
+ break;
+ }
+ }
+ }
+#endif
+
+ // Export & GDS instructions do not read the EXEC mask until after the export
+ // is granted (which can occur well after the instruction is issued).
+ // The shader program must flush all EXP operations on the export-count
+ // before overwriting the EXEC mask.
+ else {
+ if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+ // Export and GDS are tracked individually, either may trigger a waitcnt
+ // for EXEC.
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(EXP_GPR_LOCK));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(EXP_PARAM_ACCESS));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(EXP_POS_ACCESS));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getEventUB(GDS_GPR_LOCK));
+ }
+
+#if 0 // TODO: the following code to handle CALL.
+ // The argument passing for CALLs should suffice for VM_CNT and LGKM_CNT.
+ // However, there is a problem with EXP_CNT, because the call cannot
+ // easily tell if a register is used in the function, and if it did, then
+ // the referring instruction would have to have an S_WAITCNT, which is
+ // dependent on all call sites. So Instead, force S_WAITCNT for EXP_CNTs
+ // before the call.
+ if (MI.getOpcode() == SC_CALL) {
+ if (ScoreBrackets->getScoreUB(EXP_CNT) >
+ ScoreBrackets->getScoreLB(EXP_CNT)) {
+ ScoreBrackets->setScoreLB(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= CNT_MASK(EXP_CNT);
+ }
+ }
+#endif
+
+ // Look at the source operands of every instruction to see if
+ // any of them results from a previous memory operation that affects
+ // its current usage. If so, an s_waitcnt instruction needs to be
+ // emitted.
+ // If the source operand was defined by a load, add the s_waitcnt
+ // instruction.
+ for (const MachineMemOperand *Memop : MI.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ continue;
+ unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
+ // VM_CNT is only relevant to vgpr or LDS.
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ }
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &Op = MI.getOperand(I);
+ const MachineRegisterInfo &MRIA = *MRI;
+ RegInterval Interval =
+ ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, false);
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ if (TRI->isVGPR(MRIA, Op.getReg())) {
+ // VM_CNT is only relevant to vgpr or LDS.
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ }
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ }
+ }
+ // End of for loop that looks at all source operands to decide vm_wait_cnt
+ // and lgk_wait_cnt.
+
+ // Two cases are handled for destination operands:
+ // 1) If the destination operand was defined by a load, add the s_waitcnt
+ // instruction to guarantee the right WAW order.
+ // 2) If a destination operand that was used by a recent export/store ins,
+ // add s_waitcnt on exp_cnt to guarantee the WAR order.
+ if (MI.mayStore()) {
+ for (const MachineMemOperand *Memop : MI.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ if (AS != AMDGPUASI.LOCAL_ADDRESS)
+ continue;
+ unsigned RegNo = SQ_MAX_PGM_VGPRS + EXTRA_VGPR_LDS;
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ }
+ }
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ MachineOperand &Def = MI.getOperand(I);
+ const MachineRegisterInfo &MRIA = *MRI;
+ RegInterval Interval =
+ ScoreBrackets->getRegInterval(&MI, TII, MRI, TRI, I, true);
+ for (signed RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
+ if (TRI->isVGPR(MRIA, Def.getReg())) {
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getRegScore(RegNo, EXP_CNT));
+ }
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ LGKM_CNT, ScoreBrackets->getRegScore(RegNo, LGKM_CNT));
+ }
+ } // End of for loop that looks at all dest operands.
+ }
+
+ // TODO: Tie force zero to a compiler triage option.
+ bool ForceZero = false;
+
+ // Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0
+ // occurs before the instruction. Doing it here prevents any additional
+ // S_WAITCNTs from being emitted if the instruction was marked as
+ // requiring a WAITCNT beforehand.
+ if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) {
+ EmitSwaitcnt |=
+ ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
+ EmitSwaitcnt |= ScoreBrackets->updateByWait(
+ LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT));
+ }
+
+ // TODO: Remove this work-around, enable the assert for Bug 457939
+ // after fixing the scheduler. Also, the Shader Compiler code is
+ // independent of target.
+ if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
+ if (ScoreBrackets->getScoreLB(LGKM_CNT) <
+ ScoreBrackets->getScoreUB(LGKM_CNT) &&
+ ScoreBrackets->hasPendingSMEM()) {
+ // Wait on everything, not just LGKM. vccz reads usually come from
+ // terminators, and we always wait on everything at the end of the
+ // block, so if we only wait on LGKM here, we might end up with
+ // another s_waitcnt inserted right after this if there are non-LGKM
+ // instructions still outstanding.
+ ForceZero = true;
+ EmitSwaitcnt = true;
+ }
+ }
+
+ // Does this operand processing indicate s_wait counter update?
+ if (EmitSwaitcnt) {
+ int CntVal[NUM_INST_CNTS];
+
+ bool UseDefaultWaitcntStrategy = true;
+ if (ForceZero) {
+ // Force all waitcnts to 0.
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ }
+ CntVal[VM_CNT] = 0;
+ CntVal[EXP_CNT] = 0;
+ CntVal[LGKM_CNT] = 0;
+ UseDefaultWaitcntStrategy = false;
+ }
+
+ if (UseDefaultWaitcntStrategy) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ if (EmitSwaitcnt & CNT_MASK(T)) {
+ int Delta =
+ ScoreBrackets->getScoreUB(T) - ScoreBrackets->getScoreLB(T);
+ int MaxDelta = ScoreBrackets->getWaitCountMax(T);
+ if (Delta >= MaxDelta) {
+ Delta = -1;
+ if (T != EXP_CNT) {
+ ScoreBrackets->setScoreLB(
+ T, ScoreBrackets->getScoreUB(T) - MaxDelta);
+ }
+ EmitSwaitcnt &= ~CNT_MASK(T);
+ }
+ CntVal[T] = Delta;
+ } else {
+ // If we are not waiting for a particular counter then encode
+ // it as -1 which means "don't care."
+ CntVal[T] = -1;
+ }
+ }
+ }
+
+ // If we are not waiting on any counter we can skip the wait altogether.
+ if (EmitSwaitcnt != 0) {
+ MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
+ int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
+ if (!OldWaitcnt || (AMDGPU::decodeVmcnt(IV, Imm) !=
+ (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
+ (AMDGPU::decodeExpcnt(IV, Imm) !=
+ (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
+ (AMDGPU::decodeLgkmcnt(IV, Imm) !=
+ (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
+ MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
+ if (ContainingLoop) {
+ MachineBasicBlock *TBB = ContainingLoop->getTopBlock();
+ BlockWaitcntBrackets *ScoreBracket =
+ BlockWaitcntBracketsMap[TBB].get();
+ if (!ScoreBracket) {
+ assert(BlockVisitedSet.find(TBB) == BlockVisitedSet.end());
+ BlockWaitcntBracketsMap[TBB] = make_unique<BlockWaitcntBrackets>();
+ ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
+ }
+ ScoreBracket->setRevisitLoop(true);
+ DEBUG(dbgs() << "set-revisit: block"
+ << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+ }
+ }
+
+ // Update an existing waitcount, or make a new one.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ if (OldWaitcnt && OldWaitcnt->getOpcode() != AMDGPU::S_WAITCNT) {
+ SWaitInst = OldWaitcnt;
+ } else {
+ SWaitInst = MF.CreateMachineInstr(TII->get(AMDGPU::S_WAITCNT),
+ MI.getDebugLoc());
+ CompilerGeneratedWaitcntSet.insert(SWaitInst);
+ }
+
+ const MachineOperand &Op =
+ MachineOperand::CreateImm(AMDGPU::encodeWaitcnt(
+ IV, CntVal[VM_CNT], CntVal[EXP_CNT], CntVal[LGKM_CNT]));
+ SWaitInst->addOperand(MF, Op);
+
+ if (CntVal[EXP_CNT] == 0) {
+ ScoreBrackets->setMixedExpTypes(false);
+ }
+ }
+ }
+
+ return SWaitInst;
+}
+
+void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB,
+ MachineInstr *Waitcnt) {
+ if (MBB.empty()) {
+ MBB.push_back(Waitcnt);
+ return;
+ }
+
+ MachineBasicBlock::iterator It = MBB.end();
+ MachineInstr *MI = &*(--It);
+ if (MI->isBranch()) {
+ MBB.insert(It, Waitcnt);
+ } else {
+ MBB.push_back(Waitcnt);
+ }
+
+ return;
+}
+
+void SIInsertWaitcnts::updateEventWaitCntAfter(
+ MachineInstr &Inst, BlockWaitcntBrackets *ScoreBrackets) {
+ // Now look at the instruction opcode. If it is a memory access
+ // instruction, update the upper-bound of the appropriate counter's
+ // bracket and the destination operand scores.
+ // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
+ if (TII->isDS(Inst) && (Inst.mayLoad() || Inst.mayStore())) {
+ if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
+ } else {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
+ }
+ } else if (TII->isFLAT(Inst)) {
+ assert(Inst.mayLoad() || Inst.mayStore());
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst);
+
+ // This is a flat memory operation. Check to see if it has memory
+ // tokens for both LDS and Memory, and if so mark it as a flat.
+ bool FoundLDSMem = false;
+ for (const MachineMemOperand *Memop : Inst.memoperands()) {
+ unsigned AS = Memop->getAddrSpace();
+ if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS)
+ FoundLDSMem = true;
+ }
+
+ // This is a flat memory operation, so note it - it will require
+ // that both the VM and LGKM be flushed to zero if it is pending when
+ // a VM or LGKM dependency occurs.
+ if (FoundLDSMem) {
+ ScoreBrackets->setPendingFlat();
+ }
+ } else if (SIInstrInfo::isVMEM(Inst) &&
+ // TODO: get a better carve out.
+ Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1 &&
+ Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC &&
+ Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
+ if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() &&
+ (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()))) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
+ }
+ } else if (TII->isSMRD(Inst)) {
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+ } else {
+ switch (Inst.getOpcode()) {
+ case AMDGPU::S_SENDMSG:
+ case AMDGPU::S_SENDMSGHALT:
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst);
+ break;
+ case AMDGPU::EXP:
+ case AMDGPU::EXP_DONE: {
+ int Imm = TII->getNamedOperand(Inst, AMDGPU::OpName::tgt)->getImm();
+ if (Imm >= 32 && Imm <= 63)
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_PARAM_ACCESS, Inst);
+ else if (Imm >= 12 && Imm <= 15)
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_POS_ACCESS, Inst);
+ else
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, EXP_GPR_LOCK, Inst);
+ break;
+ }
+ case AMDGPU::S_MEMTIME:
+ case AMDGPU::S_MEMREALTIME:
+ ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) {
+ BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
+ int32_t MaxPending[NUM_INST_CNTS] = {0};
+ int32_t MaxFlat[NUM_INST_CNTS] = {0};
+ bool MixedExpTypes = false;
+
+ // Clear the score bracket state.
+ ScoreBrackets->clear();
+
+ // Compute the number of pending elements on block entry.
+
+ // IMPORTANT NOTE: If iterative handling of loops is added, the code will
+ // need to handle single BBs with backedges to themselves. This means that
+ // they will need to retain and not clear their initial state.
+
+ // See if there are any uninitialized predecessors. If so, emit an
+ // s_waitcnt 0 at the beginning of the block.
+ for (MachineBasicBlock *pred : Block.predecessors()) {
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[pred].get();
+ bool Visited = BlockVisitedSet.find(pred) != BlockVisitedSet.end();
+ if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
+ break;
+ }
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int span =
+ PredScoreBrackets->getScoreUB(T) - PredScoreBrackets->getScoreLB(T);
+ MaxPending[T] = std::max(MaxPending[T], span);
+ span =
+ PredScoreBrackets->pendingFlat(T) - PredScoreBrackets->getScoreLB(T);
+ MaxFlat[T] = std::max(MaxFlat[T], span);
+ }
+
+ MixedExpTypes |= PredScoreBrackets->mixedExpTypes();
+ }
+
+ // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
+ // Also handle kills for exit block.
+ if (Block.succ_empty() && !KillWaitBrackets.empty()) {
+ for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int Span = KillWaitBrackets[I]->getScoreUB(T) -
+ KillWaitBrackets[I]->getScoreLB(T);
+ MaxPending[T] = std::max(MaxPending[T], Span);
+ Span = KillWaitBrackets[I]->pendingFlat(T) -
+ KillWaitBrackets[I]->getScoreLB(T);
+ MaxFlat[T] = std::max(MaxFlat[T], Span);
+ }
+
+ MixedExpTypes |= KillWaitBrackets[I]->mixedExpTypes();
+ }
+ }
+
+ // Special handling for GDS_GPR_LOCK and EXP_GPR_LOCK.
+ for (MachineBasicBlock *Pred : Block.predecessors()) {
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[Pred].get();
+ bool Visited = BlockVisitedSet.find(Pred) != BlockVisitedSet.end();
+ if (!Visited || PredScoreBrackets->getWaitAtBeginning()) {
+ break;
+ }
+
+ int GDSSpan = PredScoreBrackets->getEventUB(GDS_GPR_LOCK) -
+ PredScoreBrackets->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
+ int EXPSpan = PredScoreBrackets->getEventUB(EXP_GPR_LOCK) -
+ PredScoreBrackets->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
+ }
+
+ // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
+ if (Block.succ_empty() && !KillWaitBrackets.empty()) {
+ for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
+ int GDSSpan = KillWaitBrackets[I]->getEventUB(GDS_GPR_LOCK) -
+ KillWaitBrackets[I]->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], GDSSpan);
+ int EXPSpan = KillWaitBrackets[I]->getEventUB(EXP_GPR_LOCK) -
+ KillWaitBrackets[I]->getScoreLB(EXP_CNT);
+ MaxPending[EXP_CNT] = std::max(MaxPending[EXP_CNT], EXPSpan);
+ }
+ }
+
+#if 0
+ // LC does not (unlike) add a waitcnt at beginning. Leaving it as marker.
+ // TODO: how does LC distinguish between function entry and main entry?
+ // If this is the entry to a function, force a wait.
+ MachineBasicBlock &Entry = Block.getParent()->front();
+ if (Entry.getNumber() == Block.getNumber()) {
+ ScoreBrackets->setWaitAtBeginning();
+ return;
+ }
+#endif
+
+ // Now set the current Block's brackets to the largest ending bracket.
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ ScoreBrackets->setScoreUB(T, MaxPending[T]);
+ ScoreBrackets->setScoreLB(T, 0);
+ ScoreBrackets->setLastFlat(T, MaxFlat[T]);
+ }
+
+ ScoreBrackets->setMixedExpTypes(MixedExpTypes);
+
+ // Set the register scoreboard.
+ for (MachineBasicBlock *Pred : Block.predecessors()) {
+ if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) {
+ break;
+ }
+
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[Pred].get();
+
+ // Now merge the gpr_reg_score information
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int PredLB = PredScoreBrackets->getScoreLB(T);
+ int PredUB = PredScoreBrackets->getScoreUB(T);
+ if (PredLB < PredUB) {
+ int PredScale = MaxPending[T] - PredUB;
+ // Merge vgpr scores.
+ for (int J = 0; J <= PredScoreBrackets->getMaxVGPR(); J++) {
+ int PredRegScore = PredScoreBrackets->getRegScore(J, T);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
+ }
+ // Also need to merge sgpr scores for lgkm_cnt.
+ if (T == LGKM_CNT) {
+ for (int J = 0; J <= PredScoreBrackets->getMaxSGPR(); J++) {
+ int PredRegScore =
+ PredScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J + NUM_ALL_VGPRS, LGKM_CNT,
+ std::max(
+ ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
+ NewRegScore));
+ }
+ }
+ }
+ }
+
+ // Also merge the WaitEvent information.
+ ForAllWaitEventType(W) {
+ enum InstCounterType T = PredScoreBrackets->eventCounter(W);
+ int PredEventUB = PredScoreBrackets->getEventUB(W);
+ if (PredEventUB > PredScoreBrackets->getScoreLB(T)) {
+ int NewEventUB =
+ MaxPending[T] + PredEventUB - PredScoreBrackets->getScoreUB(T);
+ if (NewEventUB > 0) {
+ ScoreBrackets->setEventUB(
+ W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
+ }
+ }
+ }
+ }
+
+ // TODO: Is SC Block->IsMainExit() same as Block.succ_empty()?
+ // Set the register scoreboard.
+ if (Block.succ_empty() && !KillWaitBrackets.empty()) {
+ for (unsigned int I = 0; I < KillWaitBrackets.size(); I++) {
+ // Now merge the gpr_reg_score information.
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ int PredLB = KillWaitBrackets[I]->getScoreLB(T);
+ int PredUB = KillWaitBrackets[I]->getScoreUB(T);
+ if (PredLB < PredUB) {
+ int PredScale = MaxPending[T] - PredUB;
+ // Merge vgpr scores.
+ for (int J = 0; J <= KillWaitBrackets[I]->getMaxVGPR(); J++) {
+ int PredRegScore = KillWaitBrackets[I]->getRegScore(J, T);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J, T, std::max(ScoreBrackets->getRegScore(J, T), NewRegScore));
+ }
+ // Also need to merge sgpr scores for lgkm_cnt.
+ if (T == LGKM_CNT) {
+ for (int J = 0; J <= KillWaitBrackets[I]->getMaxSGPR(); J++) {
+ int PredRegScore =
+ KillWaitBrackets[I]->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT);
+ if (PredRegScore <= PredLB)
+ continue;
+ int NewRegScore = PredScale + PredRegScore;
+ ScoreBrackets->setRegScore(
+ J + NUM_ALL_VGPRS, LGKM_CNT,
+ std::max(
+ ScoreBrackets->getRegScore(J + NUM_ALL_VGPRS, LGKM_CNT),
+ NewRegScore));
+ }
+ }
+ }
+ }
+
+ // Also merge the WaitEvent information.
+ ForAllWaitEventType(W) {
+ enum InstCounterType T = KillWaitBrackets[I]->eventCounter(W);
+ int PredEventUB = KillWaitBrackets[I]->getEventUB(W);
+ if (PredEventUB > KillWaitBrackets[I]->getScoreLB(T)) {
+ int NewEventUB =
+ MaxPending[T] + PredEventUB - KillWaitBrackets[I]->getScoreUB(T);
+ if (NewEventUB > 0) {
+ ScoreBrackets->setEventUB(
+ W, std::max(ScoreBrackets->getEventUB(W), NewEventUB));
+ }
+ }
+ }
+ }
+ }
+
+ // Special case handling of GDS_GPR_LOCK and EXP_GPR_LOCK. Merge this for the
+ // sequencing predecessors, because changes to EXEC require waitcnts due to
+ // the delayed nature of these operations.
+ for (MachineBasicBlock *Pred : Block.predecessors()) {
+ if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) {
+ break;
+ }
+
+ BlockWaitcntBrackets *PredScoreBrackets =
+ BlockWaitcntBracketsMap[Pred].get();
+
+ int pred_gds_ub = PredScoreBrackets->getEventUB(GDS_GPR_LOCK);
+ if (pred_gds_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
+ int new_gds_ub = MaxPending[EXP_CNT] + pred_gds_ub -
+ PredScoreBrackets->getScoreUB(EXP_CNT);
+ if (new_gds_ub > 0) {
+ ScoreBrackets->setEventUB(
+ GDS_GPR_LOCK,
+ std::max(ScoreBrackets->getEventUB(GDS_GPR_LOCK), new_gds_ub));
+ }
+ }
+ int pred_exp_ub = PredScoreBrackets->getEventUB(EXP_GPR_LOCK);
+ if (pred_exp_ub > PredScoreBrackets->getScoreLB(EXP_CNT)) {
+ int new_exp_ub = MaxPending[EXP_CNT] + pred_exp_ub -
+ PredScoreBrackets->getScoreUB(EXP_CNT);
+ if (new_exp_ub > 0) {
+ ScoreBrackets->setEventUB(
+ EXP_GPR_LOCK,
+ std::max(ScoreBrackets->getEventUB(EXP_GPR_LOCK), new_exp_ub));
+ }
+ }
+ }
+}
+
+/// Return the "bottom" block of a loop. This differs from
+/// MachineLoop::getBottomBlock in that it works even if the loop is
+/// discontiguous.
+MachineBasicBlock *SIInsertWaitcnts::loopBottom(const MachineLoop *Loop) {
+ MachineBasicBlock *Bottom = Loop->getHeader();
+ for (MachineBasicBlock *MBB : Loop->blocks())
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ return Bottom;
+}
+
+// Generate s_waitcnt instructions where needed.
+void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
+ MachineBasicBlock &Block) {
+ // Initialize the state information.
+ mergeInputScoreBrackets(Block);
+
+ BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&Block].get();
+
+ DEBUG({
+ dbgs() << "Block" << Block.getNumber();
+ ScoreBrackets->dump();
+ });
+
+ bool InsertNOP = false;
+
+ // Walk over the instructions.
+ for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end();
+ Iter != E;) {
+ MachineInstr &Inst = *Iter;
+ // Remove any previously existing waitcnts.
+ if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
+ // TODO: Register the old waitcnt and optimize the following waitcnts.
+ // Leaving the previously existing waitcnts is conservatively correct.
+ if (CompilerGeneratedWaitcntSet.find(&Inst) ==
+ CompilerGeneratedWaitcntSet.end())
+ ++Iter;
+ else {
+ ScoreBrackets->setWaitcnt(&Inst);
+ ++Iter;
+ Inst.removeFromParent();
+ }
+ continue;
+ }
+
+ // Kill instructions generate a conditional branch to the endmain block.
+ // Merge the current waitcnt state into the endmain block information.
+ // TODO: Are there other flavors of KILL instruction?
+ if (Inst.getOpcode() == AMDGPU::KILL) {
+ addKillWaitBracket(ScoreBrackets);
+ }
+
+ bool VCCZBugWorkAround = false;
+ if (readsVCCZ(Inst) &&
+ (VCCZBugHandledSet.find(&Inst) == VCCZBugHandledSet.end())) {
+ if (ScoreBrackets->getScoreLB(LGKM_CNT) <
+ ScoreBrackets->getScoreUB(LGKM_CNT) &&
+ ScoreBrackets->hasPendingSMEM()) {
+ if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS)
+ VCCZBugWorkAround = true;
+ }
+ }
+
+ // Generate an s_waitcnt instruction to be placed before
+ // cur_Inst, if needed.
+ MachineInstr *SWaitInst = generateSWaitCntInstBefore(Inst, ScoreBrackets);
+
+ if (SWaitInst) {
+ Block.insert(Inst, SWaitInst);
+ if (ScoreBrackets->getWaitcnt() != SWaitInst) {
+ DEBUG(dbgs() << "insertWaitcntInBlock\n"
+ << "Old Instr: " << Inst << '\n'
+ << "New Instr: " << *SWaitInst << '\n';);
+ }
+ }
+
+ updateEventWaitCntAfter(Inst, ScoreBrackets);
+
+#if 0 // TODO: implement resource type check controlled by options with ub = LB.
+ // If this instruction generates a S_SETVSKIP because it is an
+ // indexed resource, and we are on Tahiti, then it will also force
+ // an S_WAITCNT vmcnt(0)
+ if (RequireCheckResourceType(Inst, context)) {
+ // Force the score to as if an S_WAITCNT vmcnt(0) is emitted.
+ ScoreBrackets->setScoreLB(VM_CNT,
+ ScoreBrackets->getScoreUB(VM_CNT));
+ }
+#endif
+
+ ScoreBrackets->clearWaitcnt();
+
+ if (SWaitInst) {
+ DEBUG({ SWaitInst->print(dbgs() << '\n'); });
+ }
+ DEBUG({
+ Inst.print(dbgs());
+ ScoreBrackets->dump();
+ });
+
+ // Check to see if this is a GWS instruction. If so, and if this is CI or
+ // VI, then the generated code sequence will include an S_WAITCNT 0.
+ // TODO: Are these the only GWS instructions?
+ if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
+ // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
+ ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
+ ScoreBrackets->updateByWait(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT));
+ ScoreBrackets->updateByWait(LGKM_CNT,
+ ScoreBrackets->getScoreUB(LGKM_CNT));
+ }
+
+ // TODO: Remove this work-around after fixing the scheduler and enable the
+ // assert above.
+ if (VCCZBugWorkAround) {
+ // Restore the vccz bit. Any time a value is written to vcc, the vcc
+ // bit is updated, so we can restore the bit by reading the value of
+ // vcc and then writing it back to the register.
+ BuildMI(Block, Inst, Inst.getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
+ AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
+ VCCZBugHandledSet.insert(&Inst);
+ }
+
+ if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
+
+ // This avoids a s_nop after a waitcnt has just been inserted.
+ if (!SWaitInst && InsertNOP) {
+ BuildMI(Block, Inst, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
+ }
+ InsertNOP = false;
+
+ // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
+ // or SMEM clause, respectively.
+ //
+ // The temporary workaround is to break the clauses with S_NOP.
+ //
+ // The proper solution would be to allocate registers such that all source
+ // and destination registers don't overlap, e.g. this is illegal:
+ // r0 = load r2
+ // r2 = load r0
+ bool IsSMEM = false;
+ bool IsVMEM = false;
+ if (TII->isSMRD(Inst))
+ IsSMEM = true;
+ else if (TII->usesVM_CNT(Inst))
+ IsVMEM = true;
+
+ ++Iter;
+ if (Iter == E)
+ break;
+
+ MachineInstr &Next = *Iter;
+
+ // TODO: How about consecutive SMEM instructions?
+ // The comments above says break the clause but the code does not.
+ // if ((TII->isSMRD(next) && isSMEM) ||
+ if (!IsSMEM && TII->usesVM_CNT(Next) && IsVMEM &&
+ // TODO: Enable this check when hasSoftClause is upstreamed.
+ // ST->hasSoftClauses() &&
+ ST->isXNACKEnabled()) {
+ // Insert a NOP to break the clause.
+ InsertNOP = true;
+ continue;
+ }
+
+ // There must be "S_NOP 0" between an instruction writing M0 and
+ // S_SENDMSG.
+ if ((Next.getOpcode() == AMDGPU::S_SENDMSG ||
+ Next.getOpcode() == AMDGPU::S_SENDMSGHALT) &&
+ Inst.definesRegister(AMDGPU::M0))
+ InsertNOP = true;
+
+ continue;
+ }
+
+ ++Iter;
+ }
+
+ // Check if we need to force convergence at loop footer.
+ MachineLoop *ContainingLoop = MLI->getLoopFor(&Block);
+ if (ContainingLoop && loopBottom(ContainingLoop) == &Block) {
+ LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
+ WaitcntData->print();
+ DEBUG(dbgs() << '\n';);
+
+ // The iterative waitcnt insertion algorithm aims for optimal waitcnt
+ // placement and doesn't always guarantee convergence for a loop. Each
+ // loop should take at most 2 iterations for it to converge naturally.
+ // When this max is reached and result doesn't converge, we force
+ // convergence by inserting a s_waitcnt at the end of loop footer.
+ if (WaitcntData->getIterCnt() > 2) {
+ // To ensure convergence, need to make wait events at loop footer be no
+ // more than those from the previous iteration.
+ // As a simplification, Instead of tracking individual scores and
+ // generate the precise wait count, just wait on 0.
+ bool HasPending = false;
+ MachineInstr *SWaitInst = WaitcntData->getWaitcnt();
+ for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
+ T = (enum InstCounterType)(T + 1)) {
+ if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
+ ScoreBrackets->setScoreLB(T, ScoreBrackets->getScoreUB(T));
+ HasPending = true;
+ }
+ }
+
+ if (HasPending) {
+ if (!SWaitInst) {
+ SWaitInst = Block.getParent()->CreateMachineInstr(
+ TII->get(AMDGPU::S_WAITCNT), DebugLoc());
+ CompilerGeneratedWaitcntSet.insert(SWaitInst);
+ const MachineOperand &Op = MachineOperand::CreateImm(0);
+ SWaitInst->addOperand(MF, Op);
+#if 0 // TODO: Format the debug output
+ OutputTransformBanner("insertWaitcntInBlock",0,"Create:",context);
+ OutputTransformAdd(SWaitInst, context);
+#endif
+ }
+#if 0 // TODO: ??
+ _DEV( REPORTED_STATS->force_waitcnt_converge = 1; )
+#endif
+ }
+
+ if (SWaitInst) {
+ DEBUG({
+ SWaitInst->print(dbgs());
+ dbgs() << "\nAdjusted score board:";
+ ScoreBrackets->dump();
+ });
+
+ // Add this waitcnt to the block. It is either newly created or
+ // created in previous iterations and added back since block traversal
+ // always remove waitcnt.
+ insertWaitcntBeforeCF(Block, SWaitInst);
+ WaitcntData->setWaitcnt(SWaitInst);
+ }
+ }
+ }
+}
+
+bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<SISubtarget>();
+ TII = ST->getInstrInfo();
+ TRI = &TII->getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+ AMDGPUASI = ST->getAMDGPUAS();
+
+ HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
+ HardwareLimits.ExpcntMax = AMDGPU::getExpcntBitMask(IV);
+ HardwareLimits.LgkmcntMax = AMDGPU::getLgkmcntBitMask(IV);
+
+ HardwareLimits.NumVGPRsMax = ST->getAddressableNumVGPRs();
+ HardwareLimits.NumSGPRsMax = ST->getAddressableNumSGPRs();
+ assert(HardwareLimits.NumVGPRsMax <= SQ_MAX_PGM_VGPRS);
+ assert(HardwareLimits.NumSGPRsMax <= SQ_MAX_PGM_SGPRS);
+
+ RegisterEncoding.VGPR0 = TRI->getEncodingValue(AMDGPU::VGPR0);
+ RegisterEncoding.VGPRL =
+ RegisterEncoding.VGPR0 + HardwareLimits.NumVGPRsMax - 1;
+ RegisterEncoding.SGPR0 = TRI->getEncodingValue(AMDGPU::SGPR0);
+ RegisterEncoding.SGPRL =
+ RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
+
+ // Walk over the blocks in reverse post-dominator order, inserting
+ // s_waitcnt where needed.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ bool Modified = false;
+ for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
+ I = RPOT.begin(),
+ E = RPOT.end(), J = RPOT.begin();
+ I != E;) {
+ MachineBasicBlock &MBB = **I;
+
+ BlockVisitedSet.insert(&MBB);
+
+ BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
+ if (!ScoreBrackets) {
+ BlockWaitcntBracketsMap[&MBB] = make_unique<BlockWaitcntBrackets>();
+ ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get();
+ }
+ ScoreBrackets->setPostOrder(MBB.getNumber());
+ MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB);
+ if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr)
+ LoopWaitcntDataMap[ContainingLoop] = make_unique<LoopWaitcntData>();
+
+ // If we are walking into the block from before the loop, then guarantee
+ // at least 1 re-walk over the loop to propagate the information, even if
+ // no S_WAITCNT instructions were generated.
+ if (ContainingLoop && ContainingLoop->getTopBlock() == &MBB && J < I &&
+ (BlockWaitcntProcessedSet.find(&MBB) ==
+ BlockWaitcntProcessedSet.end())) {
+ BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
+ DEBUG(dbgs() << "set-revisit: block"
+ << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+ }
+
+ // Walk over the instructions.
+ insertWaitcntInBlock(MF, MBB);
+
+ // Flag that waitcnts have been processed at least once.
+ BlockWaitcntProcessedSet.insert(&MBB);
+
+ // See if we want to revisit the loop.
+ if (ContainingLoop && loopBottom(ContainingLoop) == &MBB) {
+ MachineBasicBlock *EntryBB = ContainingLoop->getTopBlock();
+ BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get();
+ if (EntrySB && EntrySB->getRevisitLoop()) {
+ EntrySB->setRevisitLoop(false);
+ J = I;
+ int32_t PostOrder = EntrySB->getPostOrder();
+ // TODO: Avoid this loop. Find another way to set I.
+ for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
+ X = RPOT.begin(),
+ Y = RPOT.end();
+ X != Y; ++X) {
+ MachineBasicBlock &MBBX = **X;
+ if (MBBX.getNumber() == PostOrder) {
+ I = X;
+ break;
+ }
+ }
+ LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
+ WaitcntData->incIterCnt();
+ DEBUG(dbgs() << "revisit: block" << EntryBB->getNumber() << '\n';);
+ continue;
+ } else {
+ LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
+ // Loop converged, reset iteration count. If this loop gets revisited,
+ // it must be from an outer loop, the counter will restart, this will
+ // ensure we don't force convergence on such revisits.
+ WaitcntData->resetIterCnt();
+ }
+ }
+
+ J = I;
+ ++I;
+ }
+
+ SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+
+ bool HaveScalarStores = false;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
+ ++I) {
+
+ if (!HaveScalarStores && TII->isScalarStore(*I))
+ HaveScalarStores = true;
+
+ if (I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+ EndPgmBlocks.push_back(&MBB);
+ }
+ }
+
+ if (HaveScalarStores) {
+ // If scalar writes are used, the cache must be flushed or else the next
+ // wave to reuse the same scratch memory can be clobbered.
+ //
+ // Insert s_dcache_wb at wave termination points if there were any scalar
+ // stores, and only if the cache hasn't already been flushed. This could be
+ // improved by looking across blocks for flushes in postdominating blocks
+ // from the stores but an explicitly requested flush is probably very rare.
+ for (MachineBasicBlock *MBB : EndPgmBlocks) {
+ bool SeenDCacheWB = false;
+
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+
+ if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+ SeenDCacheWB = true;
+ else if (TII->isScalarStore(*I))
+ SeenDCacheWB = false;
+
+ // FIXME: It would be better to insert this before a waitcnt if any.
+ if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) &&
+ !SeenDCacheWB) {
+ Modified = true;
+ BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+ }
+ }
+ }
+ }
+
+ return Modified;
+}
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index fceabd7a8fdd..47257ce16ceb 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -21,16 +21,32 @@
#include "SIDefines.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <new>
+#include <utility>
#define DEBUG_TYPE "si-insert-waits"
using namespace llvm;
-using namespace llvm::AMDGPU;
namespace {
@@ -42,7 +58,6 @@ typedef union {
unsigned LGKM;
} Named;
unsigned Array[3];
-
} Counters;
typedef enum {
@@ -55,13 +70,12 @@ typedef Counters RegCounters[512];
typedef std::pair<unsigned, unsigned> RegInterval;
class SIInsertWaits : public MachineFunctionPass {
-
private:
- const SISubtarget *ST;
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
+ const SISubtarget *ST = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI;
- IsaVersion IV;
+ AMDGPU::IsaInfo::IsaVersion ISA;
/// \brief Constant zero value
static const Counters ZeroCounts;
@@ -86,7 +100,7 @@ private:
RegCounters DefinedRegs;
/// \brief Different export instruction types seen since last wait.
- unsigned ExpInstrTypesSeen;
+ unsigned ExpInstrTypesSeen = 0;
/// \brief Type of the last opcode.
InstType LastOpcodeType;
@@ -100,7 +114,7 @@ private:
bool ReturnsVoid;
/// Whether the VCCZ bit is possibly corrupt
- bool VCCZCorrupt;
+ bool VCCZCorrupt = false;
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -141,13 +155,7 @@ private:
public:
static char ID;
- SIInsertWaits() :
- MachineFunctionPass(ID),
- ST(nullptr),
- TII(nullptr),
- TRI(nullptr),
- ExpInstrTypesSeen(0),
- VCCZCorrupt(false) { }
+ SIInsertWaits() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -161,7 +169,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
INITIALIZE_PASS_BEGIN(SIInsertWaits, DEBUG_TYPE,
"SI Insert Waits", false, false)
@@ -294,7 +302,6 @@ RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Increment) {
-
// Get the hardware counter increments and sum them up
Counters Limit = ZeroCounts;
unsigned Sum = 0;
@@ -366,7 +373,6 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const Counters &Required) {
-
// End of program? No need to wait on anything
// A function not returning void needs to wait, because other bytecode will
// be appended after it and we don't know what it will be.
@@ -393,7 +399,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
bool NeedWait = false;
for (unsigned i = 0; i < 3; ++i) {
-
if (Required.Array[i] <= WaitedOn.Array[i])
continue;
@@ -421,10 +426,10 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
// Build the wait instruction
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(encodeWaitcnt(IV,
- Counts.Named.VM,
- Counts.Named.EXP,
- Counts.Named.LGKM));
+ .addImm(AMDGPU::encodeWaitcnt(ISA,
+ Counts.Named.VM,
+ Counts.Named.EXP,
+ Counts.Named.LGKM));
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
@@ -434,7 +439,6 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
/// \brief helper function for handleOperands
static void increaseCounters(Counters &Dst, const Counters &Src) {
-
for (unsigned i = 0; i < 3; ++i)
Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
}
@@ -453,9 +457,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
unsigned Imm = I->getOperand(0).getImm();
Counters Counts, WaitOn;
- Counts.Named.VM = decodeVmcnt(IV, Imm);
- Counts.Named.EXP = decodeExpcnt(IV, Imm);
- Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
+ Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm);
+ Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm);
+ Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm);
for (unsigned i = 0; i < 3; ++i) {
if (Counts.Array[i] <= LastIssued.Array[i])
@@ -468,7 +472,6 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
}
Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
-
Counters Result = ZeroCounts;
// For each register affected by this instruction increase the result
@@ -484,7 +487,6 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
RegInterval Interval = getRegInterval(RC, Op);
for (unsigned j = Interval.first; j < Interval.second; ++j) {
-
if (Op.isDef()) {
increaseCounters(Result, UsedRegs[j]);
increaseCounters(Result, DefinedRegs[j]);
@@ -522,6 +524,16 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
}
}
+/// Return true if \p MBB has one successor immediately following, and is its
+/// only predecessor
+static bool hasTrivialSuccessor(const MachineBasicBlock &MBB) {
+ if (MBB.succ_size() != 1)
+ return false;
+
+ const MachineBasicBlock *Succ = *MBB.succ_begin();
+ return (Succ->pred_size() == 1) && MBB.isLayoutSuccessor(Succ);
+}
+
// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
@@ -531,12 +543,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- IV = getIsaVersion(ST->getFeatureBits());
+ ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- HardwareLimits.Named.VM = getVmcntBitMask(IV);
- HardwareLimits.Named.EXP = getExpcntBitMask(IV);
- HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
+ HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA);
+ HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA);
+ HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA);
WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts;
@@ -636,12 +648,14 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
handleSendMsg(MBB, I);
if (I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN)
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
EndPgmBlocks.push_back(&MBB);
}
- // Wait for everything at the end of the MBB
- Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ // Wait for everything at the end of the MBB. If there is only one
+ // successor, we can defer this until the uses there.
+ if (!hasTrivialSuccessor(MBB))
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
}
if (HaveScalarStores) {
@@ -665,7 +679,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// FIXME: It would be better to insert this before a waitcnt if any.
if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
+ I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) && !SeenDCacheWB) {
Changes = true;
BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
}
@@ -676,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();
+ if (!MFI->isEntryFunction()) {
+ // Wait for any outstanding memory operations that the input registers may
+ // depend on. We can't track them and it's better to to the wait after the
+ // costly call sequence.
+
+ // TODO: Could insert earlier and schedule more liberally with operations
+ // that only use caller preserved registers.
+ MachineBasicBlock &EntryBB = MF.front();
+ BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ Changes = true;
+ }
+
return Changes;
}
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
index 5523ec142ba7..b83a1fe187eb 100644
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -31,6 +31,7 @@ class InstSI <dag outs, dag ins, string asm = "",
field bit VOP2 = 0;
field bit VOPC = 0;
field bit VOP3 = 0;
+ field bit VOP3P = 0;
field bit VINTRP = 0;
field bit SDWA = 0;
field bit DPP = 0;
@@ -78,6 +79,10 @@ class InstSI <dag outs, dag ins, string asm = "",
// is unable to infer the encoding from the operands.
field bit VOPAsmPrefer32Bit = 0;
+ // This bit indicates that this has a floating point result type, so
+ // the clamp modifier has floating point semantics.
+ field bit FPClamp = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -92,6 +97,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{8} = VOP2;
let TSFlags{9} = VOPC;
let TSFlags{10} = VOP3;
+ let TSFlags{12} = VOP3P;
let TSFlags{13} = VINTRP;
let TSFlags{14} = SDWA;
@@ -120,6 +126,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{39} = ScalarStore;
let TSFlags{40} = FixedSize;
let TSFlags{41} = VOPAsmPrefer32Bit;
+ let TSFlags{42} = FPClamp;
let SchedRW = [Write32Bit];
@@ -131,19 +138,19 @@ class InstSI <dag outs, dag ins, string asm = "",
let AsmVariantName = AMDGPUAsmVariants.Default;
}
-class PseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
- : InstSI<outs, ins, "", pattern> {
+class PseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
+ : InstSI<outs, ins, asm, pattern> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
-class SPseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
- : PseudoInstSI<outs, ins, pattern> {
+class SPseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
+ : PseudoInstSI<outs, ins, pattern, asm> {
let SALU = 1;
}
-class VPseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
- : PseudoInstSI<outs, ins, pattern> {
+class VPseudoInstSI<dag outs, dag ins, list<dag> pattern = [], string asm = "">
+ : PseudoInstSI<outs, ins, pattern, asm> {
let VALU = 1;
let Uses = [EXEC];
}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index 26a8d22062a9..05ac67d26620 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -36,7 +37,7 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
cl::desc("Restrict range of branch instructions (DEBUG)"));
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
- : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
+ : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -315,7 +316,8 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
const MachineOperand *SecondDst = nullptr;
if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
- (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) {
+ (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
+ (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
} else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
@@ -346,6 +348,21 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold;
}
+static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc) {
+ MachineFunction *MF = MBB.getParent();
+ DiagnosticInfoUnsupported IllegalCopy(*MF->getFunction(),
+ "illegal SGPR to VGPR copy",
+ DL, DS_Error);
+ LLVMContext &C = MF->getFunction()->getContext();
+ C.diagnose(IllegalCopy);
+
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_ILLEGAL_COPY), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg,
@@ -369,7 +386,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+ if (!AMDGPU::SReg_32RegClass.contains(SrcReg)) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
+
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
@@ -391,7 +412,11 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- assert(AMDGPU::SReg_64RegClass.contains(SrcReg));
+ if (!AMDGPU::SReg_64RegClass.contains(SrcReg)) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
+
BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
@@ -415,8 +440,14 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opcode = AMDGPU::S_MOV_B32;
EltSize = 4;
}
+
+ if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+ return;
+ }
}
+
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
@@ -870,9 +901,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineInstr *MovRel =
BuildMI(MBB, MI, DL, MovRelDesc)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(2))
.addReg(VecReg, RegState::ImplicitDefine)
- .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+ .addReg(VecReg,
+ RegState::Implicit | (IsUndef ? RegState::Undef : 0));
const int ImpDefIdx =
MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
@@ -897,14 +929,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// constant data.
Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
.addReg(RegLo)
- .addOperand(MI.getOperand(1)));
+ .add(MI.getOperand(1)));
MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
.addReg(RegHi);
if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
MIB.addImm(0);
else
- MIB.addOperand(MI.getOperand(2));
+ MIB.add(MI.getOperand(2));
Bundler.append(MIB);
llvm::finalizeBundle(MBB, Bundler.begin());
@@ -1290,6 +1322,13 @@ unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
return Count;
}
+// Copy the flags onto the implicit condition register operand.
+static void preserveCondRegFlags(MachineOperand &CondReg,
+ const MachineOperand &OrigCond) {
+ CondReg.setIsUndef(OrigCond.isUndef());
+ CondReg.setIsKill(OrigCond.isKill());
+}
+
unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -1317,9 +1356,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
.addMBB(TBB);
// Copy the flags onto the implicit condition register operand.
- MachineOperand &CondReg = CondBr->getOperand(1);
- CondReg.setIsUndef(Cond[1].isUndef());
- CondReg.setIsKill(Cond[1].isKill());
+ preserveCondRegFlags(CondBr->getOperand(1), Cond[1]);
if (BytesAdded)
*BytesAdded = 4;
@@ -1351,6 +1388,157 @@ bool SIInstrInfo::reverseBranchCondition(
return false;
}
+bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
+ switch (Cond[0].getImm()) {
+ case VCCNZ:
+ case VCCZ: {
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
+ assert(MRI.getRegClass(FalseReg) == RC);
+
+ int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
+ CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
+
+ // Limit to equal cost for branch vs. N v_cndmask_b32s.
+ return !RI.isSGPRClass(RC) && NumInsts <= 6;
+ }
+ case SCC_TRUE:
+ case SCC_FALSE: {
+ // FIXME: We could insert for VGPRs if we could replace the original compare
+ // with a vector one.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(TrueReg);
+ assert(MRI.getRegClass(FalseReg) == RC);
+
+ int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
+
+ // Multiples of 8 can do s_cselect_b64
+ if (NumInsts % 2 == 0)
+ NumInsts /= 2;
+
+ CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
+ return RI.isSGPRClass(RC);
+ }
+ default:
+ return false;
+ }
+}
+
+void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ BranchPredicate Pred = static_cast<BranchPredicate>(Cond[0].getImm());
+ if (Pred == VCCZ || Pred == SCC_FALSE) {
+ Pred = static_cast<BranchPredicate>(-Pred);
+ std::swap(TrueReg, FalseReg);
+ }
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
+ unsigned DstSize = DstRC->getSize();
+
+ if (DstSize == 4) {
+ unsigned SelOp = Pred == SCC_TRUE ?
+ AMDGPU::S_CSELECT_B32 : AMDGPU::V_CNDMASK_B32_e32;
+
+ // Instruction's operands are backwards from what is expected.
+ MachineInstr *Select =
+ BuildMI(MBB, I, DL, get(SelOp), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg);
+
+ preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+ return;
+ }
+
+ if (DstSize == 8 && Pred == SCC_TRUE) {
+ MachineInstr *Select =
+ BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), DstReg)
+ .addReg(FalseReg)
+ .addReg(TrueReg);
+
+ preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+ return;
+ }
+
+ static const int16_t Sub0_15[] = {
+ AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ };
+
+ static const int16_t Sub0_15_64[] = {
+ AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+ AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+ AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+ AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
+ };
+
+ unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
+ const TargetRegisterClass *EltRC = &AMDGPU::VGPR_32RegClass;
+ const int16_t *SubIndices = Sub0_15;
+ int NElts = DstSize / 4;
+
+ // 64-bit select is only avaialble for SALU.
+ if (Pred == SCC_TRUE) {
+ SelOp = AMDGPU::S_CSELECT_B64;
+ EltRC = &AMDGPU::SGPR_64RegClass;
+ SubIndices = Sub0_15_64;
+
+ assert(NElts % 2 == 0);
+ NElts /= 2;
+ }
+
+ MachineInstrBuilder MIB = BuildMI(
+ MBB, I, DL, get(AMDGPU::REG_SEQUENCE), DstReg);
+
+ I = MIB->getIterator();
+
+ SmallVector<unsigned, 8> Regs;
+ for (int Idx = 0; Idx != NElts; ++Idx) {
+ unsigned DstElt = MRI.createVirtualRegister(EltRC);
+ Regs.push_back(DstElt);
+
+ unsigned SubIdx = SubIndices[Idx];
+
+ MachineInstr *Select =
+ BuildMI(MBB, I, DL, get(SelOp), DstElt)
+ .addReg(FalseReg, 0, SubIdx)
+ .addReg(TrueReg, 0, SubIdx);
+ preserveCondRegFlags(Select->getOperand(3), Cond[1]);
+
+ MIB.addReg(DstElt)
+ .addImm(SubIdx);
+ }
+}
+
+bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case AMDGPU::V_MOV_B32_e32:
+ case AMDGPU::V_MOV_B32_e64:
+ case AMDGPU::V_MOV_B64_PSEUDO: {
+ // If there are additional implicit register operands, this may be used for
+ // register indexing so the source register operand isn't simply copied.
+ unsigned NumOps = MI.getDesc().getNumOperands() +
+ MI.getDesc().getNumImplicitUses();
+
+ return MI.getNumOperands() == NumOps;
+ }
+ case AMDGPU::S_MOV_B32:
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::COPY:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void removeModOperands(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
@@ -1400,15 +1588,10 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
- bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
-
- // Don't fold if we are using source modifiers. The new VOP2 instructions
- // don't have them.
- if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) ||
- hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) ||
- hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) {
+ // Don't fold if we are using source or output modifiers. The new VOP2
+ // instructions don't have them.
+ if (hasAnyModifiersSet(UseMI))
return false;
- }
const MachineOperand &ImmOp = DefMI.getOperand(1);
@@ -1421,6 +1604,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (isInlineConstant(UseMI, *Src0, ImmOp))
return false;
+ bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
@@ -1633,20 +1817,26 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
+ const MachineOperand *Src0Mods =
+ getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
+ const MachineOperand *Src1Mods =
+ getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
+ const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
+ const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
return BuildMI(*MBB, MI, MI.getDebugLoc(),
get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
- .addOperand(*Dst)
- .addImm(0) // Src0 mods
- .addOperand(*Src0)
- .addImm(0) // Src1 mods
- .addOperand(*Src1)
+ .add(*Dst)
+ .addImm(Src0Mods ? Src0Mods->getImm() : 0)
+ .add(*Src0)
+ .addImm(Src1Mods ? Src1Mods->getImm() : 0)
+ .add(*Src1)
.addImm(0) // Src mods
- .addOperand(*Src2)
- .addImm(0) // clamp
- .addImm(0); // omod
+ .add(*Src2)
+ .addImm(Clamp ? Clamp->getImm() : 0)
+ .addImm(Omod ? Omod->getImm() : 0);
}
// It's not generally safe to move VALU instructions across these since it will
@@ -1687,7 +1877,8 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
case 16:
- return AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
ST.hasInv2PiInlineImm());
default:
llvm_unreachable("invalid bitwidth");
@@ -1705,24 +1896,43 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// would be for any 32-bit integer operand, but would not be for a 64-bit one.
int64_t Imm = MO.getImm();
- switch (operandBitWidth(OperandType)) {
- case 32: {
+ switch (OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32: {
int32_t Trunc = static_cast<int32_t>(Imm);
return Trunc == Imm &&
AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
}
- case 64: {
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64: {
return AMDGPU::isInlinableLiteral64(MO.getImm(),
ST.hasInv2PiInlineImm());
}
- case 16: {
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
+ // A few special case instructions have 16-bit operands on subtargets
+ // where 16-bit instructions are not legal.
+ // TODO: Do the 32-bit immediates work? We shouldn't really need to handle
+ // constants in these cases
int16_t Trunc = static_cast<int16_t>(Imm);
- return AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+ return ST.has16BitInsts() &&
+ AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
}
return false;
}
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: {
+ uint32_t Trunc = static_cast<uint32_t>(Imm);
+ return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
+ }
default:
llvm_unreachable("invalid bitwidth");
}
@@ -1801,6 +2011,14 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
return Mods && Mods->getImm();
}
+bool SIInstrInfo::hasAnyModifiersSet(const MachineInstr &MI) const {
+ return hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers) ||
+ hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers) ||
+ hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
+ hasModifiersSet(MI, AMDGPU::OpName::omod);
+}
+
bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
const MachineOperand &MO,
const MCOperandInfo &OpInfo) const {
@@ -2238,7 +2456,7 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
unsigned Reg = MRI.createVirtualRegister(VRC);
DebugLoc DL = MBB->findDebugLoc(I);
- BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).addOperand(MO);
+ BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
MO.ChangeToRegister(Reg, false);
}
@@ -2564,8 +2782,8 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
unsigned DstReg = MRI.createVirtualRegister(DstRC);
- MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg)
- .addOperand(Op);
+ MachineInstr *Copy =
+ BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Op.setReg(DstReg);
Op.setSubReg(0);
@@ -2810,13 +3028,13 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
// Regular buffer load / store.
MachineInstrBuilder MIB =
BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
+ .add(*VData)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset);
+ .add(*SRsrc)
+ .add(*SOffset)
+ .add(*Offset);
// Atomics do not have this operand.
if (const MachineOperand *GLC =
@@ -2836,14 +3054,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
} else {
// Atomics with return.
Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode))
- .addOperand(*VData)
- .addOperand(*VDataIn)
+ .add(*VData)
+ .add(*VDataIn)
.addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
// This will be replaced later
// with the new value of vaddr.
- .addOperand(*SRsrc)
- .addOperand(*SOffset)
- .addOperand(*Offset)
+ .add(*SRsrc)
+ .add(*SOffset)
+ .add(*Offset)
.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc))
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
@@ -2970,6 +3188,14 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
case AMDGPU::S_BFE_U64:
case AMDGPU::S_BFM_B64:
llvm_unreachable("Moving this op to VALU not implemented");
+
+ case AMDGPU::S_PACK_LL_B32_B16:
+ case AMDGPU::S_PACK_LH_B32_B16:
+ case AMDGPU::S_PACK_HH_B32_B16: {
+ movePackToVALU(Worklist, MRI, Inst);
+ Inst.eraseFromParent();
+ continue;
+ }
}
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
@@ -3027,12 +3253,15 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef();
unsigned NewDstReg = AMDGPU::NoRegister;
if (HasDst) {
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ continue;
+
// Update the destination register class.
const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
if (!NewDstRC)
continue;
- unsigned DstReg = Inst.getOperand(0).getReg();
if (Inst.isCopy() &&
TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
@@ -3112,15 +3341,13 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub0)
- .addOperand(SrcReg0Sub0);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
- BuildMI(MBB, MII, DL, InstDesc, DestSub1)
- .addOperand(SrcReg0Sub1);
+ BuildMI(MBB, MII, DL, InstDesc, DestSub1).add(SrcReg0Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
@@ -3174,8 +3401,8 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
- .addOperand(SrcReg0Sub0)
- .addOperand(SrcReg1Sub0);
+ .add(SrcReg0Sub0)
+ .add(SrcReg1Sub0);
MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub1, Src0SubRC);
@@ -3184,8 +3411,8 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1)
- .addOperand(SrcReg0Sub1)
- .addOperand(SrcReg1Sub1);
+ .add(SrcReg0Sub1)
+ .add(SrcReg1Sub1);
unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
@@ -3231,13 +3458,9 @@ void SIInstrInfo::splitScalar64BitBCNT(
MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
AMDGPU::sub1, SrcSubRC);
- BuildMI(MBB, MII, DL, InstDesc, MidReg)
- .addOperand(SrcRegSub0)
- .addImm(0);
+ BuildMI(MBB, MII, DL, InstDesc, MidReg).add(SrcRegSub0).addImm(0);
- BuildMI(MBB, MII, DL, InstDesc, ResultReg)
- .addOperand(SrcRegSub1)
- .addReg(MidReg);
+ BuildMI(MBB, MII, DL, InstDesc, ResultReg).add(SrcRegSub1).addReg(MidReg);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
@@ -3326,6 +3549,68 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
}
}
+void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineRegisterInfo &MRI,
+ MachineInstr &Inst) const {
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MachineBasicBlock *MBB = Inst.getParent();
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+ const DebugLoc &DL = Inst.getDebugLoc();
+
+ switch (Inst.getOpcode()) {
+ case AMDGPU::S_PACK_LL_B32_B16: {
+ unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
+ // 0.
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
+ .addImm(0xffff);
+
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
+ .addReg(ImmReg, RegState::Kill)
+ .add(Src0);
+
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
+ .add(Src1)
+ .addImm(16)
+ .addReg(TmpReg, RegState::Kill);
+ break;
+ }
+ case AMDGPU::S_PACK_LH_B32_B16: {
+ unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
+ .addImm(0xffff);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_BFI_B32), ResultReg)
+ .addReg(ImmReg, RegState::Kill)
+ .add(Src0)
+ .add(Src1);
+ break;
+ }
+ case AMDGPU::S_PACK_HH_B32_B16: {
+ unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
+ .addImm(16)
+ .add(Src0);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
+ .addImm(0xffff);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_OR_B32), ResultReg)
+ .add(Src1)
+ .addReg(ImmReg, RegState::Kill)
+ .addReg(TmpReg, RegState::Kill);
+ break;
+ }
+ default:
+ llvm_unreachable("unhandled s_pack_* instruction");
+ }
+
+ MachineOperand &Dest = Inst.getOperand(0);
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+ addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+}
+
void SIInstrInfo::addSCCDefUsersToVALUWorklist(
MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const {
// This assumes that all the users of SCC are in the same block
@@ -3448,10 +3733,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
if (ST.isAmdHsaOS()) {
- RsrcDataFormat |= (1ULL << 56);
+ // Set ATC = 1. GFX9 doesn't have this bit.
+ if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
+ RsrcDataFormat |= (1ULL << 56);
- if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
- // Set MTYPE = 2
+ // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
+ // BTW, it disables TC L2 and therefore decreases performance.
+ if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
RsrcDataFormat |= (2ULL << 59);
}
@@ -3463,11 +3751,14 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size;
- uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
+ // GFX9 doesn't have ELEMENT_SIZE.
+ if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
+ uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
+ Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
+ }
- Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
- // IndexStride = 64
- (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
+ // IndexStride = 64.
+ Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.
@@ -3496,7 +3787,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
return AMDGPU::NoRegister;
assert(!MI.memoperands_empty() &&
- (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+ (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -3552,16 +3843,11 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (DescSize != 0 && DescSize != 4)
return DescSize;
- if (Opc == AMDGPU::WAVE_BARRIER)
- return 0;
-
// 4-byte instructions may have a 32-bit literal encoded after them. Check
// operands that coud ever be literals.
if (isVALU(MI) || isSALU(MI)) {
- if (isFixedSize(MI)) {
- assert(DescSize == 4);
+ if (isFixedSize(MI))
return DescSize;
- }
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
if (Src0Idx == -1)
@@ -3584,7 +3870,6 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return 4;
switch (Opc) {
- case AMDGPU::SI_MASK_BRANCH:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
case TargetOpcode::DBG_VALUE:
@@ -3609,7 +3894,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
return true;
for (const MachineMemOperand *MMO : MI.memoperands()) {
- if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
+ if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
return true;
}
return false;
@@ -3640,3 +3925,21 @@ ScheduleHazardRecognizer *
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
return new GCNHazardRecognizer(MF);
}
+
+bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
+ return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
+ MI.modifiesRegister(AMDGPU::EXEC, &RI);
+}
+
+MachineInstrBuilder
+SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ unsigned DestReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+ return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+ .addReg(UnusedCarry, RegState::Define | RegState::Dead);
+}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index e68f6f92ba96..659473ca6a47 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -69,6 +69,9 @@ private:
MachineInstr &Inst) const;
void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
MachineInstr &Inst) const;
+ void movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineRegisterInfo &MRI,
+ MachineInstr &Inst) const;
void addUsersToMoveToVALUWorklist(
unsigned Reg, MachineRegisterInfo &MRI,
@@ -203,10 +206,24 @@ public:
bool reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const override;
+
+ void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ unsigned DstReg, ArrayRef<MachineOperand> Cond,
+ unsigned TrueReg, unsigned FalseReg) const override;
+
bool
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
+ bool isFoldableCopy(const MachineInstr &MI) const;
+
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const final;
@@ -308,6 +325,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::VOP3;
}
+ static bool isSDWA(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
+ }
+
+ bool isSDWA(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SDWA;
+ }
+
static bool isVOPC(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
}
@@ -420,6 +445,22 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::DPP;
}
+ static bool isVOP3P(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
+ }
+
+ bool isVOP3P(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
+ }
+
+ static bool isVINTRP(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
+ }
+
+ bool isVINTRP(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
+ }
+
static bool isScalarUnit(const MachineInstr &MI) {
return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
}
@@ -454,6 +495,14 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
}
+ static bool hasFPClamp(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::HasFPClamp;
+ }
+
+ bool hasFPClamp(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::HasFPClamp;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
@@ -462,28 +511,6 @@ public:
return !RI.isSGPRReg(MRI, Dest);
}
- static int operandBitWidth(uint8_t OperandType) {
- switch (OperandType) {
- case AMDGPU::OPERAND_REG_IMM_INT32:
- case AMDGPU::OPERAND_REG_IMM_FP32:
- case AMDGPU::OPERAND_REG_INLINE_C_INT32:
- case AMDGPU::OPERAND_REG_INLINE_C_FP32:
- return 32;
- case AMDGPU::OPERAND_REG_IMM_INT64:
- case AMDGPU::OPERAND_REG_IMM_FP64:
- case AMDGPU::OPERAND_REG_INLINE_C_INT64:
- case AMDGPU::OPERAND_REG_INLINE_C_FP64:
- return 64;
- case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
- case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_IMM_FP16:
- return 16;
- default:
- llvm_unreachable("unexpected operand type");
- }
- }
-
bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
@@ -571,6 +598,7 @@ public:
bool hasModifiersSet(const MachineInstr &MI,
unsigned OpName) const;
+ bool hasAnyModifiersSet(const MachineInstr &MI) const;
bool verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const override;
@@ -731,6 +759,17 @@ public:
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
+
+ bool isBasicBlockPrologue(const MachineInstr &MI) const override;
+
+ /// \brief Return a partially built integer add instruction without carry.
+ /// Caller must add source operands.
+ /// For pre-GFX9 it will generate unused carry destination operand.
+ /// TODO: After GFX9 it should return a no-carry operation.
+ MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL,
+ unsigned DestReg) const;
};
namespace AMDGPU {
@@ -741,6 +780,9 @@ namespace AMDGPU {
int getVOPe32(uint16_t Opcode);
LLVM_READONLY
+ int getSDWAOp(uint16_t Opcode);
+
+ LLVM_READONLY
int getCommuteRev(uint16_t Opcode);
LLVM_READONLY
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index ebaefae3bfef..c6daf743f3ac 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -71,11 +71,6 @@ def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
[SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
-def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
- SDTCisVT<3, i32>]>
->;
-
class SDSample<string opcode> : SDNode <opcode,
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v8i32>,
SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
@@ -107,7 +102,7 @@ def SIld_local : SDNode <"ISD::LOAD", SDTLoad,
>;
def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{
- return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{
@@ -144,7 +139,7 @@ def SIst_local : SDNode <"ISD::STORE", SDTStore,
def si_st_local : PatFrag <
(ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+ return cast<StoreSDNode>(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS;
}]>;
def si_store_local : PatFrag <
@@ -196,6 +191,21 @@ def si_uniform_br_scc : PatFrag <
return isCBranchSCC(N);
}]>;
+def lshr_rev : PatFrag <
+ (ops node:$src1, node:$src0),
+ (srl $src0, $src1)
+>;
+
+def ashr_rev : PatFrag <
+ (ops node:$src1, node:$src0),
+ (sra $src0, $src1)
+>;
+
+def lshl_rev : PatFrag <
+ (ops node:$src1, node:$src0),
+ (shl $src0, $src1)
+>;
+
multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0> {
def _glue : SDNode <
@@ -266,10 +276,6 @@ def SIMM16bit : PatLeaf <(imm),
[{return isInt<16>(N->getSExtValue());}]
>;
-def IMM20bit : PatLeaf <(imm),
- [{return isUInt<20>(N->getZExtValue());}]
->;
-
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return isInlineImmediate(N);
}]>;
@@ -299,6 +305,19 @@ class VGPRImm <dag frag> : PatLeaf<frag, [{
return Limit < 10;
}]>;
+def NegateImm : SDNodeXForm<imm, [{
+ return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+// TODO: When FP inline imm values work?
+def NegSubInlineConst32 : ImmLeaf<i32, [{
+ return Imm < -16 && Imm >= -64;
+}], NegateImm>;
+
+def NegSubInlineConst16 : ImmLeaf<i16, [{
+ return Imm < -16 && Imm >= -64;
+}], NegateImm>;
+
//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
@@ -449,6 +468,12 @@ class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
let ParserMatchClass = MatchClass;
}
+class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
+ OperandWithDefaultOps<i32, (ops (i32 0))> {
+ let PrintMethod = "print"#Name;
+ let ParserMatchClass = MatchClass;
+}
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
@@ -486,6 +511,11 @@ def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
+def op_sel : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
+def op_sel_hi : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
+def neg_lo : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
+def neg_hi : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
@@ -525,6 +555,7 @@ class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let ParserMethod = "parseRegOrImmWithFPInputMods";
let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
}
+
def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
@@ -577,6 +608,33 @@ def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> {
let PrintMethod = "printOperandAndIntInputMods";
}
+class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass {
+ let Name = "PackedFP"#opSize#"InputMods";
+ let ParserMethod = "parseRegOrImm";
+ let PredicateMethod = "isRegOrImm";
+// let PredicateMethod = "isPackedFP"#opSize#"InputMods";
+}
+
+class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
+ let Name = "PackedInt"#opSize#"InputMods";
+ let ParserMethod = "parseRegOrImm";
+ let PredicateMethod = "isRegOrImm";
+// let PredicateMethod = "isPackedInt"#opSize#"InputMods";
+}
+
+def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
+def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
+
+class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
+// let PrintMethod = "printPackedFPInputMods";
+}
+
+class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> {
+ //let PrintMethod = "printPackedIntInputMods";
+}
+
+def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
+def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
//===----------------------------------------------------------------------===//
// Complex patterns
@@ -593,6 +651,14 @@ def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
+// VOP3Mods, but the input source is known to never be NaN.
+def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;
+
+def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
+
+def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
+def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;
+
//===----------------------------------------------------------------------===//
// SI assembler operands
@@ -604,19 +670,32 @@ def SIOperand {
int FLAT_SCR = 0x68;
}
+// This should be kept in sync with SISrcMods enum
def SRCMODS {
int NONE = 0;
int NEG = 1;
+ int ABS = 2;
+ int NEG_ABS = 3;
+
+ int NEG_HI = ABS;
+ int OP_SEL_0 = 4;
+ int OP_SEL_1 = 8;
}
def DSTCLAMP {
int NONE = 0;
+ int ENABLE = 1;
}
def DSTOMOD {
int NONE = 0;
}
+def TRAPID{
+ int LLVM_TRAP = 2;
+ int LLVM_DEBUG_TRAP = 3;
+}
+
//===----------------------------------------------------------------------===//
//
// SI Instruction multiclass helpers.
@@ -648,8 +727,9 @@ class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
exp_vm:$vm, exp_compr:$compr, i8imm:$en),
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
- [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr),
- f32:$src0, f32:$src1, f32:$src2, f32:$src3)]> {
+ [(node (i8 timm:$tgt), (i8 timm:$en),
+ f32:$src0, f32:$src1, f32:$src2, f32:$src3,
+ (i1 timm:$compr), (i1 timm:$vm))]> {
let AsmMatchConverter = "cvtExp";
}
@@ -666,6 +746,7 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _si : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
EXPe {
+ let AssemblerPredicates = [isSICI];
let DecoderNamespace = "SICI";
let DisableDecoder = DisableSIDecoder;
}
@@ -673,6 +754,7 @@ multiclass EXP_m<bit done, SDPatternOperator node> {
def _vi : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
EXPe_vi {
+ let AssemblerPredicates = [isVI];
let DecoderNamespace = "VI";
let DisableDecoder = DisableVIDecoder;
}
@@ -706,12 +788,34 @@ class getVALUDstForVT<ValueType VT> {
// instructions for the given VT.
class getVOPSrc0ForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
- 0)));
- RegisterOperand ret = !if(isFP,
- !if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Size, 16), VSrc_f16, VSrc_f32)),
- !if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Size, 16), VSrc_b16, VSrc_b32)));
+ 0))));
+
+ RegisterOperand ret =
+ !if(isFP,
+ !if(!eq(VT.Size, 64),
+ VSrc_f64,
+ !if(!eq(VT.Value, f16.Value),
+ VSrc_f16,
+ !if(!eq(VT.Value, v2f16.Value),
+ VCSrc_v2f16,
+ VSrc_f32
+ )
+ )
+ ),
+ !if(!eq(VT.Size, 64),
+ VSrc_b64,
+ !if(!eq(VT.Value, i16.Value),
+ VSrc_b16,
+ !if(!eq(VT.Value, v2i16.Value),
+ VCSrc_v2b16,
+ VSrc_b32
+ )
+ )
+ )
+ );
}
// Returns the vreg register class to use for source operand given VT
@@ -725,25 +829,38 @@ class getVregSrcForVT<ValueType VT> {
// given VT.
class getVOP3SrcForVT<ValueType VT> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+ !if(!eq(VT.Value, v2f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
- 0)));
+ 0))));
RegisterOperand ret =
!if(!eq(VT.Size, 128),
- VSrc_128,
- !if(!eq(VT.Size, 64),
+ VSrc_128,
+ !if(!eq(VT.Size, 64),
!if(isFP,
- VCSrc_f64,
- VCSrc_b64),
+ VCSrc_f64,
+ VCSrc_b64),
!if(!eq(VT.Value, i1.Value),
- SCSrc_b64,
- !if(isFP,
- !if(!eq(VT.Size, 16), VCSrc_f16, VCSrc_f32),
- !if(!eq(VT.Size, 16), VCSrc_b16, VCSrc_b32)
- )
- )
- )
- );
+ SCSrc_b64,
+ !if(isFP,
+ !if(!eq(VT.Value, f16.Value),
+ VCSrc_f16,
+ !if(!eq(VT.Value, v2f16.Value),
+ VCSrc_v2f16,
+ VCSrc_f32
+ )
+ ),
+ !if(!eq(VT.Value, i16.Value),
+ VCSrc_b16,
+ !if(!eq(VT.Value, v2i16.Value),
+ VCSrc_v2b16,
+ VCSrc_b32
+ )
+ )
+ )
+ )
+ )
+ );
}
// Returns 1 if the source arguments have modifiers, 0 if they do not.
@@ -753,7 +870,8 @@ class isFloatType<ValueType SrcVT> {
!if(!eq(SrcVT.Value, f16.Value), 1,
!if(!eq(SrcVT.Value, f32.Value), 1,
!if(!eq(SrcVT.Value, f64.Value), 1,
- 0)));
+ !if(!eq(SrcVT.Value, v2f16.Value), 1,
+ 0))));
}
class isIntType<ValueType SrcVT> {
@@ -764,6 +882,23 @@ class isIntType<ValueType SrcVT> {
0)));
}
+class isPackedType<ValueType SrcVT> {
+ bit ret =
+ !if(!eq(SrcVT.Value, v2i16.Value), 1,
+ !if(!eq(SrcVT.Value, v2f16.Value), 1, 0)
+ );
+}
+
+// Float or packed int
+class isModifierType<ValueType SrcVT> {
+ bit ret =
+ !if(!eq(SrcVT.Value, f16.Value), 1,
+ !if(!eq(SrcVT.Value, f32.Value), 1,
+ !if(!eq(SrcVT.Value, f64.Value), 1,
+ !if(!eq(SrcVT.Value, v2f16.Value), 1,
+ !if(!eq(SrcVT.Value, v2i16.Value), 1,
+ 0)))));
+}
// Return type of input modifiers operand for specified input operand
class getSrcMod <ValueType VT> {
@@ -771,6 +906,7 @@ class getSrcMod <ValueType VT> {
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
0)));
+ bit isPacked = isPackedType<VT>.ret;
Operand ret = !if(!eq(VT.Size, 64),
!if(isFP, FP64InputMods, Int64InputMods),
!if(isFP,
@@ -801,8 +937,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasModifiers, Operand Src0Mod, Operand Src1Mod,
- Operand Src2Mod> {
+ bit HasModifiers, bit HasOMod,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
!if (!eq(NumSrcArgs, 0),
@@ -821,9 +957,13 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
!if (!eq(NumSrcArgs, 2),
!if (!eq(HasModifiers, 1),
// VOP 2 with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- clampmod:$clamp, omod:$omod)
+ !if( !eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp, omod:$omod),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp))
/* else */,
// VOP2 without modifiers
(ins Src0RC:$src0, Src1RC:$src1)
@@ -831,16 +971,57 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
// VOP3 with modifiers
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp, omod:$omod)
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp, omod:$omod),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp))
/* else */,
// VOP3 without modifiers
(ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
/* endif */ ))));
}
+/// XXX - src1 may only allow VGPRs?
+
+// The modifiers (except clamp) are dummy operands for the benefit of
+// printing and parsing. They defer their values to looking at the
+// srcN_modifiers for what to print.
+class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
+ RegisterOperand Src2RC, int NumSrcArgs,
+ bit HasClamp,
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ dag ret = !if (!eq(NumSrcArgs, 2),
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ clampmod:$clamp,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi)),
+ // else NumSrcArgs == 3
+ !if (HasClamp,
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ op_sel:$op_sel, op_sel_hi:$op_sel_hi,
+ neg_lo:$neg_lo, neg_hi:$neg_hi))
+ );
+}
+
class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
@@ -924,7 +1105,8 @@ class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> {
// Returns the assembly string for the inputs and outputs of a VOP3
// instruction.
-class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers,
+ bit HasOMod, ValueType DstVT = i32> {
string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC
string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@@ -934,7 +1116,26 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT =
string ret =
!if(!eq(HasModifiers, 0),
getAsm32<HasDst, NumSrcArgs, DstVT>.ret,
- dst#", "#src0#src1#src2#"$clamp"#"$omod");
+ dst#", "#src0#src1#src2#"$clamp"#!if(HasOMod, "$omod", ""));
+}
+
+// Returns the assembly string for the inputs and outputs of a VOP3P
+// instruction.
+class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
+ bit HasClamp, ValueType DstVT = i32> {
+ string dst = " $vdst";
+ string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
+ string src1 = !if(!eq(NumSrcArgs, 1), "",
+ !if(!eq(NumSrcArgs, 2), " $src1",
+ " $src1,"));
+ string src2 = !if(!eq(NumSrcArgs, 3), " $src2", "");
+
+ string mods = !if(HasModifiers, "$neg_lo$neg_hi", "");
+ string clamp = !if(HasClamp, "$clamp", "");
+
+ // Each modifier is printed as an array of bits for each operand, so
+ // all operands are printed as part of src0_modifiers.
+ string ret = dst#", "#src0#src1#src2#"$op_sel$op_sel_hi"#mods#clamp;
}
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@@ -1035,7 +1236,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
field Operand Src0ModSDWA = getSrcModExt<Src0VT>.ret;
field Operand Src1ModSDWA = getSrcModExt<Src1VT>.ret;
-
+
field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
field bit HasDst32 = HasDst;
@@ -1046,7 +1247,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
- field bit HasModifiers = isFloatType<Src0VT>.ret;
+ field bit HasModifiers = isModifierType<Src0VT>.ret;
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
@@ -1060,12 +1261,20 @@ class VOPProfile <list<ValueType> _ArgVT> {
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
- field bit HasOMod = HasModifiers;
field bit HasClamp = HasModifiers;
field bit HasSDWAClamp = HasSrc0;
+ field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
+
+ field bit IsPacked = isPackedType<Src0VT>.ret;
+ field bit HasOpSel = IsPacked;
+ field bit HasOMod = !if(HasOpSel, 0, HasModifiers);
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
+ field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
+ field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
+
field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs));
// VOP3b instructions are a special case with a second explicit
@@ -1077,7 +1286,12 @@ class VOPProfile <list<ValueType> _ArgVT> {
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+ HasModifiers, HasOMod, Src0Mod, Src1Mod,
+ Src2Mod>.ret;
+ field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
+ NumSrcArgs, HasClamp,
+ Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
+
field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
@@ -1085,7 +1299,8 @@ class VOPProfile <list<ValueType> _ArgVT> {
DstVT>.ret;
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
- field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
+ field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, HasOMod, DstVT>.ret;
+ field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
field string AsmDPP = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
}
@@ -1101,11 +1316,18 @@ def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
-def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>;
-def VOP_I16_I16_I16_I16 : VOPProfile <[i32, i32, i32, i32, untyped]>;
+def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>;
def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>;
+def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>;
+def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>;
+def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>;
+
+def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>;
+def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>;
+
def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
@@ -1117,6 +1339,8 @@ def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>;
def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
+def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>;
+def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>;
def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
@@ -1126,6 +1350,7 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
+def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
@@ -1213,6 +1438,15 @@ def getVOPe32 : InstrMapping {
let ValueCols = [["4", "0"]];
}
+// Maps ordinary instructions to their SDWA counterparts
+def getSDWAOp : InstrMapping {
+ let FilterClass = "VOP";
+ let RowFields = ["OpName"];
+ let ColFields = ["AsmVariantName"];
+ let KeyCol = ["Default"];
+ let ValueCols = [["SDWA"]];
+}
+
def getMaskedMIMGOp : InstrMapping {
let FilterClass = "MIMG_Mask";
let RowFields = ["Op"];
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 38e31e75ee67..2f89503e129a 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -111,6 +111,12 @@ def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
(ins VSrc_b64:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
+def S_TRAP_PSEUDO : SPseudoInstSI <(outs), (ins i16imm:$simm16)> {
+ let hasSideEffects = 1;
+ let SALU = 1;
+ let usesCustomInserter = 1;
+}
+
let usesCustomInserter = 1, SALU = 1 in {
def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
@@ -146,6 +152,8 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
let mayStore = 1;
let isBarrier = 1;
let isConvergent = 1;
+ let FixedSize = 1;
+ let Size = 0;
}
// SI pseudo instructions. These are used by the CFG structurizer pass
@@ -153,48 +161,44 @@ def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
// Dummy terminator instruction to use after control flow instructions
// replaced with exec mask operations.
-def SI_MASK_BRANCH : PseudoInstSI <
+def SI_MASK_BRANCH : VPseudoInstSI <
(outs), (ins brtarget:$target)> {
let isBranch = 0;
let isTerminator = 1;
let isBarrier = 0;
- let Uses = [EXEC];
let SchedRW = [];
let hasNoSchedulingInfo = 1;
+ let FixedSize = 1;
+ let Size = 0;
}
let isTerminator = 1 in {
def SI_IF: CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
- [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> {
+ [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
let Constraints = "";
let Size = 12;
- let mayLoad = 1;
- let mayStore = 1;
let hasSideEffects = 1;
}
def SI_ELSE : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
let Constraints = "$src = $dst";
let Size = 12;
- let mayStore = 1;
- let mayLoad = 1;
let hasSideEffects = 1;
}
def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_64:$saved, brtarget:$target),
- [(int_amdgcn_loop i64:$saved, bb:$target)], 1, 1> {
+ [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
let Size = 8;
- let isBranch = 1;
+ let isBranch = 0;
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
}
-} // End isBranch = 1, isTerminator = 1
+} // End isTerminator = 1
def SI_END_CF : CFPseudoInstSI <
(outs), (ins SReg_64:$saved),
@@ -202,9 +206,9 @@ def SI_END_CF : CFPseudoInstSI <
let Size = 4;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
- let mayLoad = 1;
- let mayStore = 1;
let hasSideEffects = 1;
+ let mayLoad = 1; // FIXME: Should not need memory flags
+ let mayStore = 1;
}
def SI_BREAK : CFPseudoInstSI <
@@ -244,6 +248,10 @@ def SI_KILL_TERMINATOR : SPseudoInstSI <
let isTerminator = 1;
}
+def SI_ILLEGAL_COPY : SPseudoInstSI <
+ (outs unknown:$dst), (ins unknown:$src),
+ [], " ; illegal copy $src to $dst">;
+
} // End Uses = [EXEC], Defs = [EXEC,VCC]
// Branch on undef scc. Used to avoid intermediate copy from
@@ -259,6 +267,14 @@ def SI_PS_LIVE : PseudoInstSI <
let SALU = 1;
}
+def SI_MASKED_UNREACHABLE : SPseudoInstSI <(outs), (ins),
+ [(int_amdgcn_unreachable)],
+ "; divergent unreachable"> {
+ let Size = 0;
+ let hasNoSchedulingInfo = 1;
+ let FixedSize = 1;
+}
+
// Used as an isel pseudo to directly emit initialization with an
// s_mov_b32 rather than a copy of another initialized
// register. MachineCSE skips copies, and we don't want to have to
@@ -270,12 +286,12 @@ def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
let isReMaterializable = 1;
}
-def SI_RETURN : SPseudoInstSI <
- (outs), (ins variable_ops), [(AMDGPUreturn)]> {
+// Return for returning shaders to a shader variant epilog.
+def SI_RETURN_TO_EPILOG : SPseudoInstSI <
+ (outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
let isTerminator = 1;
let isBarrier = 1;
let isReturn = 1;
- let hasSideEffects = 1;
let hasNoSchedulingInfo = 1;
let DisableWQM = 1;
}
@@ -383,9 +399,18 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
} // End SubtargetPredicate = isGCN
let Predicates = [isGCN] in {
+def : Pat<
+ (trap),
+ (S_TRAP_PSEUDO TRAPID.LLVM_TRAP)
+>;
def : Pat<
- (int_amdgcn_else i64:$src, bb:$target),
+ (debugtrap),
+ (S_TRAP_PSEUDO TRAPID.LLVM_DEBUG_TRAP)
+>;
+
+def : Pat<
+ (AMDGPUelse i64:$src, bb:$target),
(SI_ELSE $src, $target, 0)
>;
@@ -423,24 +448,37 @@ def : Pat <
} // End Predicates = [UnsafeFPMath]
+
+// f16_to_fp patterns
def : Pat <
- (f32 (fpextend f16:$src)),
- (V_CVT_F32_F16_e32 $src)
+ (f32 (f16_to_fp i32:$src0)),
+ (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
- (f64 (fpextend f16:$src)),
- (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
+ (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))),
+ (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : Pat <
+ (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))),
+ (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : Pat <
+ (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))),
+ (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
def : Pat <
- (f16 (fpround f32:$src)),
- (V_CVT_F16_F32_e32 $src)
+ (f64 (fpextend f16:$src)),
+ (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
>;
+// fp_to_fp16 patterns
def : Pat <
- (f16 (fpround f64:$src)),
- (V_CVT_F16_F32_e32 (V_CVT_F32_F64_e32 $src))
+ (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)))),
+ (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, $clamp, $omod)
>;
def : Pat <
@@ -480,6 +518,16 @@ multiclass FMADPat <ValueType vt, Instruction inst> {
defm : FMADPat <f16, V_MAC_F16_e64>;
defm : FMADPat <f32, V_MAC_F32_e64>;
+class FMADModsPat<Instruction inst, SDPatternOperator mad_opr> : Pat<
+ (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod),
+ (VOP3Mods f32:$src1, i32:$src1_mod),
+ (VOP3Mods f32:$src2, i32:$src2_mod))),
+ (inst $src0_mod, $src0, $src1_mod, $src1,
+ $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : FMADModsPat<V_MAD_F32, AMDGPUfmad_ftz>;
+
multiclass SelectPat <ValueType vt, Instruction inst> {
def : Pat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
@@ -578,6 +626,16 @@ def : BitConvert <i32, f32, VGPR_32>;
def : BitConvert <f32, i32, VGPR_32>;
def : BitConvert <i32, f32, SReg_32>;
def : BitConvert <f32, i32, SReg_32>;
+def : BitConvert <v2i16, i32, SReg_32>;
+def : BitConvert <i32, v2i16, SReg_32>;
+def : BitConvert <v2f16, i32, SReg_32>;
+def : BitConvert <i32, v2f16, SReg_32>;
+def : BitConvert <v2i16, v2f16, SReg_32>;
+def : BitConvert <v2f16, v2i16, SReg_32>;
+def : BitConvert <v2f16, f32, SReg_32>;
+def : BitConvert <f32, v2f16, SReg_32>;
+def : BitConvert <v2i16, f32, SReg_32>;
+def : BitConvert <f32, v2i16, SReg_32>;
// 64-bit bitcast
def : BitConvert <i64, f64, VReg_64>;
@@ -619,12 +677,20 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
/********** Src & Dst modifiers **********/
/********** =================== **********/
-def : Pat <
- (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
- (f32 FP_ZERO), (f32 FP_ONE)),
- (V_ADD_F32_e64 $src0_modifiers, $src0, 0, (i32 0), 1, $omod)
+
+// If denormals are not enabled, it only impacts the compare of the
+// inputs. The output result is not flushed.
+class ClampPat<Instruction inst, ValueType vt> : Pat <
+ (vt (AMDGPUclamp
+ (VOP3Mods0Clamp vt:$src0, i32:$src0_modifiers, i32:$omod))),
+ (inst i32:$src0_modifiers, vt:$src0,
+ i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, $omod)
>;
+def : ClampPat<V_MAX_F32_e64, f32>;
+def : ClampPat<V_MAX_F64, f64>;
+def : ClampPat<V_MAX_F16_e64, f16>;
+
/********** ================================ **********/
/********** Floating point absolute/negative **********/
/********** ================================ **********/
@@ -678,6 +744,37 @@ def : Pat <
>;
def : Pat <
+ (fcopysign f16:$src0, f16:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1)
+>;
+
+def : Pat <
+ (fcopysign f32:$src0, f16:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0,
+ (V_LSHLREV_B32_e64 (i32 16), $src1))
+>;
+
+def : Pat <
+ (fcopysign f64:$src0, f16:$src1),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+ (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (V_LSHLREV_B32_e64 (i32 16), $src1)), sub1)
+>;
+
+def : Pat <
+ (fcopysign f16:$src0, f32:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
+ (V_LSHRREV_B32_e64 (i32 16), $src1))
+>;
+
+def : Pat <
+ (fcopysign f16:$src0, f64:$src1),
+ (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0,
+ (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1)))
+>;
+
+def : Pat <
(fneg f16:$src),
(V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000)))
>;
@@ -692,6 +789,25 @@ def : Pat <
(S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
>;
+def : Pat <
+ (fneg v2f16:$src),
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), $src)
+>;
+
+def : Pat <
+ (fabs v2f16:$src),
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), $src)
+>;
+
+// This is really (fneg (fabs v2f16:$src))
+//
+// fabs is not reported as free because there is modifier for it in
+// VOP3P instructions, so it is turned into the bit op.
+def : Pat <
+ (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))),
+ (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
+>;
+
/********** ================== **********/
/********** Immediate Patterns **********/
/********** ================== **********/
@@ -759,27 +875,6 @@ def : Pat <
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : Pat <
- (int_AMDGPU_cube v4f32:$src),
- (REG_SEQUENCE VReg_128,
- (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub0,
- (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub1,
- (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub2,
- (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
- 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
- 0 /* clamp */, 0 /* omod */), sub3)
->;
-
-def : Pat <
(i32 (sext i1:$src0)),
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
>;
@@ -985,6 +1080,11 @@ def : Pat <
//===----------------------------------------------------------------------===//
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//
+def : Pat <
+ (i32 (AMDGPUfp16_zext f16:$src)),
+ (COPY $src)
+>;
+
def : Pat <
(i32 (trunc i64:$a)),
@@ -1028,24 +1128,72 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+defm : BFEPattern <V_BFE_U32, V_BFE_I32, S_MOV_B32>;
-def : BFEPattern <V_BFE_U32, S_MOV_B32>;
+def : Pat<
+ (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
+ (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0)
+>;
def : Pat<
- (fcanonicalize f16:$src),
- (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0)
+ (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))),
+ (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0)
>;
def : Pat<
- (fcanonicalize f32:$src),
- (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0)
+ (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))),
+ (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0)
>;
def : Pat<
- (fcanonicalize f64:$src),
- (V_MUL_F64 0, CONST.FP64_ONE, 0, $src, 0, 0)
+ (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))),
+ (V_PK_MUL_F16 SRCMODS.OP_SEL_1, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE)
+>;
+
+
+// Allow integer inputs
+class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : Pat<
+ (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
+ (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
+>;
+
+def : ExpPattern<AMDGPUexport, i32, EXP>;
+def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
+
+def : Pat <
+ (v2i16 (build_vector i16:$src0, i16:$src1)),
+ (v2i16 (S_PACK_LL_B32_B16 $src0, $src1))
+>;
+
+// With multiple uses of the shift, this will duplicate the shift and
+// increase register pressure.
+def : Pat <
+ (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
+ (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1))
>;
+def : Pat <
+ (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))),
+ (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))),
+ (v2i16 (S_PACK_HH_B32_B16 $src0, $src1))
+>;
+
+// TODO: Should source modifiers be matched to v_pack_b32_f16?
+def : Pat <
+ (v2f16 (build_vector f16:$src0, f16:$src1)),
+ (v2f16 (S_PACK_LL_B32_B16 $src0, $src1))
+>;
+
+// def : Pat <
+// (v2f16 (scalar_to_vector f16:$src0)),
+// (COPY $src0)
+// >;
+
+// def : Pat <
+// (v2i16 (scalar_to_vector i16:$src0)),
+// (COPY $src0)
+// >;
+
//===----------------------------------------------------------------------===//
// Fract Patterns
//===----------------------------------------------------------------------===//
@@ -1083,11 +1231,39 @@ def : Pat <
// Miscellaneous Optimization Patterns
//============================================================================//
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// TODO: Also do for 64-bit.
+def : Pat<
+ (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
+ (S_SUB_I32 $src0, NegSubInlineConst32:$src1)
+>;
+
def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
def : IntMed3Pat<V_MED3_I32, smax, smax_oneuse, smin_oneuse>;
def : IntMed3Pat<V_MED3_U32, umax, umax_oneuse, umin_oneuse>;
+// This matches 16 permutations of
+// max(min(x, y), min(max(x, y), z))
+class FPMed3Pat<ValueType vt,
+ Instruction med3Inst> : Pat<
+ (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods),
+ (VOP3Mods_nnan vt:$src1, i32:$src1_mods)),
+ (vt (VOP3Mods_nnan vt:$src2, i32:$src2_mods)))),
+ (med3Inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : FPMed3Pat<f32, V_MED3_F32>;
+
+let Predicates = [isGFX9] in {
+def : FPMed3Pat<f16, V_MED3_F16>;
+def : IntMed3Pat<V_MED3_I16, smax, smax_oneuse, smin_oneuse, i16>;
+def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
+} // End Predicates = [isGFX9]
+
//============================================================================//
// Assembler aliases
//============================================================================//
diff --git a/lib/Target/AMDGPU/SIIntrinsics.td b/lib/Target/AMDGPU/SIIntrinsics.td
index 5da375468713..7b7cf1635050 100644
--- a/lib/Target/AMDGPU/SIIntrinsics.td
+++ b/lib/Target/AMDGPU/SIIntrinsics.td
@@ -14,23 +14,7 @@
let TargetPrefix = "SI", isTarget = 1 in {
- def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-
- def int_SI_export : Intrinsic <[],
- [llvm_i32_ty, // en
- llvm_i32_ty, // vm (FIXME: should be i1)
- llvm_i32_ty, // done (FIXME: should be i1)
- llvm_i32_ty, // tgt
- llvm_i32_ty, // compr (FIXME: should be i1)
- llvm_float_ty, // src0
- llvm_float_ty, // src1
- llvm_float_ty, // src2
- llvm_float_ty], // src3
- []
- >;
-
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
// Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed
def int_SI_tbuffer_store : Intrinsic <
@@ -64,146 +48,4 @@ let TargetPrefix = "SI", isTarget = 1 in {
llvm_i32_ty], // tfe(imm)
[IntrReadMem, IntrArgMemOnly]>;
- def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
-
- // Fully-flexible SAMPLE instruction.
- class SampleRaw : Intrinsic <
- [llvm_v4f32_ty], // vdata(VGPR)
- [llvm_anyint_ty, // vaddr(VGPR)
- llvm_v8i32_ty, // rsrc(SGPR)
- llvm_v4i32_ty, // sampler(SGPR)
- llvm_i32_ty, // dmask(imm)
- llvm_i32_ty, // unorm(imm)
- llvm_i32_ty, // r128(imm)
- llvm_i32_ty, // da(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty, // tfe(imm)
- llvm_i32_ty], // lwe(imm)
- [IntrNoMem]>;
-
- // Image instruction without a sampler.
- class Image : Intrinsic <
- [llvm_v4f32_ty], // vdata(VGPR)
- [llvm_anyint_ty, // vaddr(VGPR)
- llvm_v8i32_ty, // rsrc(SGPR)
- llvm_i32_ty, // dmask(imm)
- llvm_i32_ty, // unorm(imm)
- llvm_i32_ty, // r128(imm)
- llvm_i32_ty, // da(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty, // tfe(imm)
- llvm_i32_ty], // lwe(imm)
- [IntrNoMem]>;
-
- // Basic sample
- def int_SI_image_sample : SampleRaw;
- def int_SI_image_sample_cl : SampleRaw;
- def int_SI_image_sample_d : SampleRaw;
- def int_SI_image_sample_d_cl : SampleRaw;
- def int_SI_image_sample_l : SampleRaw;
- def int_SI_image_sample_b : SampleRaw;
- def int_SI_image_sample_b_cl : SampleRaw;
- def int_SI_image_sample_lz : SampleRaw;
- def int_SI_image_sample_cd : SampleRaw;
- def int_SI_image_sample_cd_cl : SampleRaw;
-
- // Sample with comparison
- def int_SI_image_sample_c : SampleRaw;
- def int_SI_image_sample_c_cl : SampleRaw;
- def int_SI_image_sample_c_d : SampleRaw;
- def int_SI_image_sample_c_d_cl : SampleRaw;
- def int_SI_image_sample_c_l : SampleRaw;
- def int_SI_image_sample_c_b : SampleRaw;
- def int_SI_image_sample_c_b_cl : SampleRaw;
- def int_SI_image_sample_c_lz : SampleRaw;
- def int_SI_image_sample_c_cd : SampleRaw;
- def int_SI_image_sample_c_cd_cl : SampleRaw;
-
- // Sample with offsets
- def int_SI_image_sample_o : SampleRaw;
- def int_SI_image_sample_cl_o : SampleRaw;
- def int_SI_image_sample_d_o : SampleRaw;
- def int_SI_image_sample_d_cl_o : SampleRaw;
- def int_SI_image_sample_l_o : SampleRaw;
- def int_SI_image_sample_b_o : SampleRaw;
- def int_SI_image_sample_b_cl_o : SampleRaw;
- def int_SI_image_sample_lz_o : SampleRaw;
- def int_SI_image_sample_cd_o : SampleRaw;
- def int_SI_image_sample_cd_cl_o : SampleRaw;
-
- // Sample with comparison and offsets
- def int_SI_image_sample_c_o : SampleRaw;
- def int_SI_image_sample_c_cl_o : SampleRaw;
- def int_SI_image_sample_c_d_o : SampleRaw;
- def int_SI_image_sample_c_d_cl_o : SampleRaw;
- def int_SI_image_sample_c_l_o : SampleRaw;
- def int_SI_image_sample_c_b_o : SampleRaw;
- def int_SI_image_sample_c_b_cl_o : SampleRaw;
- def int_SI_image_sample_c_lz_o : SampleRaw;
- def int_SI_image_sample_c_cd_o : SampleRaw;
- def int_SI_image_sample_c_cd_cl_o : SampleRaw;
-
- // Basic gather4
- def int_SI_gather4 : SampleRaw;
- def int_SI_gather4_cl : SampleRaw;
- def int_SI_gather4_l : SampleRaw;
- def int_SI_gather4_b : SampleRaw;
- def int_SI_gather4_b_cl : SampleRaw;
- def int_SI_gather4_lz : SampleRaw;
-
- // Gather4 with comparison
- def int_SI_gather4_c : SampleRaw;
- def int_SI_gather4_c_cl : SampleRaw;
- def int_SI_gather4_c_l : SampleRaw;
- def int_SI_gather4_c_b : SampleRaw;
- def int_SI_gather4_c_b_cl : SampleRaw;
- def int_SI_gather4_c_lz : SampleRaw;
-
- // Gather4 with offsets
- def int_SI_gather4_o : SampleRaw;
- def int_SI_gather4_cl_o : SampleRaw;
- def int_SI_gather4_l_o : SampleRaw;
- def int_SI_gather4_b_o : SampleRaw;
- def int_SI_gather4_b_cl_o : SampleRaw;
- def int_SI_gather4_lz_o : SampleRaw;
-
- // Gather4 with comparison and offsets
- def int_SI_gather4_c_o : SampleRaw;
- def int_SI_gather4_c_cl_o : SampleRaw;
- def int_SI_gather4_c_l_o : SampleRaw;
- def int_SI_gather4_c_b_o : SampleRaw;
- def int_SI_gather4_c_b_cl_o : SampleRaw;
- def int_SI_gather4_c_lz_o : SampleRaw;
-
- def int_SI_getlod : SampleRaw;
-
- // Image instrinsics.
- def int_SI_image_load : Image;
- def int_SI_image_load_mip : Image;
- def int_SI_getresinfo : Image;
-
- /* Interpolation Intrinsics */
-
- def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>;
} // End TargetPrefix = "SI", isTarget = 1
-
-let TargetPrefix = "amdgcn", isTarget = 1 in {
- // Emit 2.5 ulp, no denormal division. Should only be inserted by
- // pass based on !fpmath metadata.
- def int_amdgcn_fdiv_fast : Intrinsic<
- [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
- >;
-
- /* Control flow Intrinsics */
-
- def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
-}
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 99fe96c0be22..933a16646746 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -39,15 +39,27 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
using namespace llvm;
@@ -56,39 +68,36 @@ using namespace llvm;
namespace {
class SILoadStoreOptimizer : public MachineFunctionPass {
+
+ typedef struct {
+ MachineBasicBlock::iterator I;
+ MachineBasicBlock::iterator Paired;
+ unsigned EltSize;
+ unsigned Offset0;
+ unsigned Offset1;
+ unsigned BaseOff;
+ bool UseST64;
+ SmallVector<MachineInstr*, 8> InstsToMove;
+ } CombineInfo;
+
private:
- const SIInstrInfo *TII;
- const SIRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- AliasAnalysis *AA;
-
- static bool offsetsCanBeCombined(unsigned Offset0,
- unsigned Offset1,
- unsigned EltSize);
-
- MachineBasicBlock::iterator findMatchingDSInst(
- MachineBasicBlock::iterator I,
- unsigned EltSize,
- SmallVectorImpl<MachineInstr*> &InstsToMove);
-
- MachineBasicBlock::iterator mergeRead2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove);
-
- MachineBasicBlock::iterator mergeWrite2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove);
+ const SIInstrInfo *TII = nullptr;
+ const SIRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ AliasAnalysis *AA = nullptr;
+
+ static bool offsetsCanBeCombined(CombineInfo &CI);
+
+ bool findMatchingDSInst(CombineInfo &CI);
+
+ MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI);
+
+ MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI);
public:
static char ID;
- SILoadStoreOptimizer()
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
- AA(nullptr) {}
+ SILoadStoreOptimizer() : MachineFunctionPass(ID) {}
SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
@@ -108,7 +117,7 @@ public:
}
};
-} // End anonymous namespace.
+} // end anonymous namespace.
INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
"SI Load / Store Optimizer", false, false)
@@ -141,11 +150,10 @@ static void addDefsToList(const MachineInstr &MI,
}
}
-static bool memAccessesCanBeReordered(
- MachineBasicBlock::iterator A,
- MachineBasicBlock::iterator B,
- const SIInstrInfo *TII,
- llvm::AliasAnalysis * AA) {
+static bool memAccessesCanBeReordered(MachineBasicBlock::iterator A,
+ MachineBasicBlock::iterator B,
+ const SIInstrInfo *TII,
+ AliasAnalysis * AA) {
return (TII->areMemAccessesTriviallyDisjoint(*A, *B, AA) ||
// RAW or WAR - cannot reorder
// WAW - cannot reorder
@@ -179,7 +187,6 @@ canMoveInstsAcrossMemOp(MachineInstr &MemOp,
ArrayRef<MachineInstr*> InstsToMove,
const SIInstrInfo *TII,
AliasAnalysis *AA) {
-
assert(MemOp.mayLoadOrStore());
for (MachineInstr *InstToMove : InstsToMove) {
@@ -191,47 +198,68 @@ canMoveInstsAcrossMemOp(MachineInstr &MemOp,
return true;
}
-bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
- unsigned Offset1,
- unsigned Size) {
+bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) {
// XXX - Would the same offset be OK? Is there any reason this would happen or
// be useful?
- if (Offset0 == Offset1)
+ if (CI.Offset0 == CI.Offset1)
return false;
// This won't be valid if the offset isn't aligned.
- if ((Offset0 % Size != 0) || (Offset1 % Size != 0))
+ if ((CI.Offset0 % CI.EltSize != 0) || (CI.Offset1 % CI.EltSize != 0))
return false;
- unsigned EltOffset0 = Offset0 / Size;
- unsigned EltOffset1 = Offset1 / Size;
+ unsigned EltOffset0 = CI.Offset0 / CI.EltSize;
+ unsigned EltOffset1 = CI.Offset1 / CI.EltSize;
+ CI.UseST64 = false;
+ CI.BaseOff = 0;
+
+ // If the offset in elements doesn't fit in 8-bits, we might be able to use
+ // the stride 64 versions.
+ if ((EltOffset0 % 64 == 0) && (EltOffset1 % 64) == 0 &&
+ isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64)) {
+ CI.Offset0 = EltOffset0 / 64;
+ CI.Offset1 = EltOffset1 / 64;
+ CI.UseST64 = true;
+ return true;
+ }
// Check if the new offsets fit in the reduced 8-bit range.
- if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1))
+ if (isUInt<8>(EltOffset0) && isUInt<8>(EltOffset1)) {
+ CI.Offset0 = EltOffset0;
+ CI.Offset1 = EltOffset1;
return true;
+ }
- // If the offset in elements doesn't fit in 8-bits, we might be able to use
- // the stride 64 versions.
- if ((EltOffset0 % 64 != 0) || (EltOffset1 % 64) != 0)
- return false;
+ // Try to shift base address to decrease offsets.
+ unsigned OffsetDiff = std::abs((int)EltOffset1 - (int)EltOffset0);
+ CI.BaseOff = std::min(CI.Offset0, CI.Offset1);
+
+ if ((OffsetDiff % 64 == 0) && isUInt<8>(OffsetDiff / 64)) {
+ CI.Offset0 = (EltOffset0 - CI.BaseOff / CI.EltSize) / 64;
+ CI.Offset1 = (EltOffset1 - CI.BaseOff / CI.EltSize) / 64;
+ CI.UseST64 = true;
+ return true;
+ }
+
+ if (isUInt<8>(OffsetDiff)) {
+ CI.Offset0 = EltOffset0 - CI.BaseOff / CI.EltSize;
+ CI.Offset1 = EltOffset1 - CI.BaseOff / CI.EltSize;
+ return true;
+ }
- return isUInt<8>(EltOffset0 / 64) && isUInt<8>(EltOffset1 / 64);
+ return false;
}
-MachineBasicBlock::iterator
-SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
- unsigned EltSize,
- SmallVectorImpl<MachineInstr*> &InstsToMove) {
- MachineBasicBlock::iterator E = I->getParent()->end();
- MachineBasicBlock::iterator MBBI = I;
+bool SILoadStoreOptimizer::findMatchingDSInst(CombineInfo &CI) {
+ MachineBasicBlock::iterator E = CI.I->getParent()->end();
+ MachineBasicBlock::iterator MBBI = CI.I;
++MBBI;
SmallVector<const MachineOperand *, 8> DefsToMove;
- addDefsToList(*I, DefsToMove);
+ addDefsToList(*CI.I, DefsToMove);
for ( ; MBBI != E; ++MBBI) {
-
- if (MBBI->getOpcode() != I->getOpcode()) {
+ if (MBBI->getOpcode() != CI.I->getOpcode()) {
// This is not a matching DS instruction, but we can keep looking as
// long as one of these conditions are met:
@@ -242,14 +270,14 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
if (MBBI->hasUnmodeledSideEffects())
// We can't re-order this instruction with respect to other memory
// opeations, so we fail both conditions mentioned above.
- return E;
+ return false;
if (MBBI->mayLoadOrStore() &&
- !memAccessesCanBeReordered(*I, *MBBI, TII, AA)) {
+ !memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA)) {
// We fail condition #1, but we may still be able to satisfy condition
// #2. Add this instruction to the move list and then we will check
// if condition #2 holds once we have selected the matching instruction.
- InstsToMove.push_back(&*MBBI);
+ CI.InstsToMove.push_back(&*MBBI);
addDefsToList(*MBBI, DefsToMove);
continue;
}
@@ -257,13 +285,13 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
// When we match I with another DS instruction we will be moving I down
// to the location of the matched instruction any uses of I will need to
// be moved down as well.
- addToListsIfDependent(*MBBI, DefsToMove, InstsToMove);
+ addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove);
continue;
}
// Don't merge volatiles.
if (MBBI->hasOrderedMemoryRef())
- return E;
+ return false;
// Handle a case like
// DS_WRITE_B32 addr, v, idx0
@@ -271,77 +299,67 @@ SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
// DS_WRITE_B32 addr, f(w), idx1
// where the DS_READ_B32 ends up in InstsToMove and therefore prevents
// merging of the two writes.
- if (addToListsIfDependent(*MBBI, DefsToMove, InstsToMove))
+ if (addToListsIfDependent(*MBBI, DefsToMove, CI.InstsToMove))
continue;
- int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
- const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
+ int AddrIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
+ AMDGPU::OpName::addr);
+ const MachineOperand &AddrReg0 = CI.I->getOperand(AddrIdx);
const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
// Check same base pointer. Be careful of subregisters, which can occur with
// vectors of pointers.
if (AddrReg0.getReg() == AddrReg1.getReg() &&
AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
- int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
+ int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(),
AMDGPU::OpName::offset);
- unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
- unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+ CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm() & 0xffff;
+ CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+ CI.Paired = MBBI;
// Check both offsets fit in the reduced range.
// We also need to go through the list of instructions that we plan to
// move and make sure they are all safe to move down past the merged
// instruction.
- if (offsetsCanBeCombined(Offset0, Offset1, EltSize) &&
- canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA))
- return MBBI;
+ if (offsetsCanBeCombined(CI))
+ if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
+ return true;
}
// We've found a load/store that we couldn't merge for some reason.
// We could potentially keep looking, but we'd need to make sure that
// it was safe to move I and also all the instruction in InstsToMove
// down past this instruction.
- if (!memAccessesCanBeReordered(*I, *MBBI, TII, AA) || // check if we can move I across MBBI
- !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA) // check if we can move all I's users
- )
+ // check if we can move I across MBBI and if we can move all I's users
+ if (!memAccessesCanBeReordered(*CI.I, *MBBI, TII, AA) ||
+ !canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, TII, AA))
break;
}
- return E;
+ return false;
}
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove) {
- MachineBasicBlock *MBB = I->getParent();
+ CombineInfo &CI) {
+ MachineBasicBlock *MBB = CI.I->getParent();
// Be careful, since the addresses could be subregisters themselves in weird
// cases, like vectors of pointers.
- const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
-
- const MachineOperand *Dest0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst);
- const MachineOperand *Dest1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst);
-
- unsigned Offset0
- = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
- unsigned Offset1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
-
- unsigned NewOffset0 = Offset0 / EltSize;
- unsigned NewOffset1 = Offset1 / EltSize;
- unsigned Opc = (EltSize == 4) ? AMDGPU::DS_READ2_B32 : AMDGPU::DS_READ2_B64;
-
- // Prefer the st64 form if we can use it, even if we can fit the offset in the
- // non st64 version. I'm not sure if there's any real reason to do this.
- bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
- if (UseST64) {
- NewOffset0 /= 64;
- NewOffset1 /= 64;
- Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
- }
+ const auto *AddrReg = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
+
+ const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdst);
+ const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdst);
+
+ unsigned NewOffset0 = CI.Offset0;
+ unsigned NewOffset1 = CI.Offset1;
+ unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2_B32
+ : AMDGPU::DS_READ2_B64;
+
+ if (CI.UseST64)
+ Opc = (CI.EltSize == 4) ? AMDGPU::DS_READ2ST64_B32
+ : AMDGPU::DS_READ2ST64_B64;
- unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
- unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+ unsigned SubRegIdx0 = (CI.EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+ unsigned SubRegIdx1 = (CI.EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
if (NewOffset0 > NewOffset1) {
// Canonicalize the merged instruction so the smaller offset comes first.
@@ -356,72 +374,70 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeRead2Pair(
const MCInstrDesc &Read2Desc = TII->get(Opc);
const TargetRegisterClass *SuperRC
- = (EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
+ = (CI.EltSize == 4) ? &AMDGPU::VReg_64RegClass : &AMDGPU::VReg_128RegClass;
unsigned DestReg = MRI->createVirtualRegister(SuperRC);
- DebugLoc DL = I->getDebugLoc();
- MachineInstrBuilder Read2
- = BuildMI(*MBB, Paired, DL, Read2Desc, DestReg)
- .addOperand(*AddrReg) // addr
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .addMemOperand(*I->memoperands_begin())
- .addMemOperand(*Paired->memoperands_begin());
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ unsigned BaseReg = AddrReg->getReg();
+ unsigned BaseRegFlags = 0;
+ if (CI.BaseOff) {
+ BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BaseRegFlags = RegState::Kill;
+ BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg)
+ .addImm(CI.BaseOff)
+ .addReg(AddrReg->getReg());
+ }
+
+ MachineInstrBuilder Read2 =
+ BuildMI(*MBB, CI.Paired, DL, Read2Desc, DestReg)
+ .addReg(BaseReg, BaseRegFlags) // addr
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
+
(void)Read2;
const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
// Copy to the old destination registers.
- BuildMI(*MBB, Paired, DL, CopyDesc)
- .addOperand(*Dest0) // Copy to same destination including flags and sub reg.
- .addReg(DestReg, 0, SubRegIdx0);
- MachineInstr *Copy1 = BuildMI(*MBB, Paired, DL, CopyDesc)
- .addOperand(*Dest1)
- .addReg(DestReg, RegState::Kill, SubRegIdx1);
+ BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest0) // Copy to same destination including flags and sub reg.
+ .addReg(DestReg, 0, SubRegIdx0);
+ MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc)
+ .add(*Dest1)
+ .addReg(DestReg, RegState::Kill, SubRegIdx1);
- moveInstsAfter(Copy1, InstsToMove);
+ moveInstsAfter(Copy1, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(I);
- I->eraseFromParent();
- Paired->eraseFromParent();
+ MachineBasicBlock::iterator Next = std::next(CI.I);
+ CI.I->eraseFromParent();
+ CI.Paired->eraseFromParent();
DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
return Next;
}
MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- unsigned EltSize,
- ArrayRef<MachineInstr*> InstsToMove) {
- MachineBasicBlock *MBB = I->getParent();
+ CombineInfo &CI) {
+ MachineBasicBlock *MBB = CI.I->getParent();
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
// sure we preserve the subregister index and any register flags set on them.
- const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
- const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
+ const MachineOperand *Addr = TII->getNamedOperand(*CI.I, AMDGPU::OpName::addr);
+ const MachineOperand *Data0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::data0);
const MachineOperand *Data1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
+ = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::data0);
+ unsigned NewOffset0 = CI.Offset0;
+ unsigned NewOffset1 = CI.Offset1;
+ unsigned Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2_B32
+ : AMDGPU::DS_WRITE2_B64;
- unsigned Offset0
- = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
- unsigned Offset1
- = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
-
- unsigned NewOffset0 = Offset0 / EltSize;
- unsigned NewOffset1 = Offset1 / EltSize;
- unsigned Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2_B32 : AMDGPU::DS_WRITE2_B64;
-
- // Prefer the st64 form if we can use it, even if we can fit the offset in the
- // non st64 version. I'm not sure if there's any real reason to do this.
- bool UseST64 = (NewOffset0 % 64 == 0) && (NewOffset1 % 64 == 0);
- if (UseST64) {
- NewOffset0 /= 64;
- NewOffset1 /= 64;
- Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
- }
+ if (CI.UseST64)
+ Opc = (CI.EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32
+ : AMDGPU::DS_WRITE2ST64_B64;
if (NewOffset0 > NewOffset1) {
// Canonicalize the merged instruction so the smaller offset comes first.
@@ -434,24 +450,33 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
"Computed offset doesn't fit");
const MCInstrDesc &Write2Desc = TII->get(Opc);
- DebugLoc DL = I->getDebugLoc();
+ DebugLoc DL = CI.I->getDebugLoc();
+
+ unsigned BaseReg = Addr->getReg();
+ unsigned BaseRegFlags = 0;
+ if (CI.BaseOff) {
+ BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BaseRegFlags = RegState::Kill;
+ BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::V_ADD_I32_e32), BaseReg)
+ .addImm(CI.BaseOff)
+ .addReg(Addr->getReg());
+ }
- MachineInstrBuilder Write2
- = BuildMI(*MBB, Paired, DL, Write2Desc)
- .addOperand(*Addr) // addr
- .addOperand(*Data0) // data0
- .addOperand(*Data1) // data1
- .addImm(NewOffset0) // offset0
- .addImm(NewOffset1) // offset1
- .addImm(0) // gds
- .addMemOperand(*I->memoperands_begin())
- .addMemOperand(*Paired->memoperands_begin());
+ MachineInstrBuilder Write2 =
+ BuildMI(*MBB, CI.Paired, DL, Write2Desc)
+ .addReg(BaseReg, BaseRegFlags) // addr
+ .add(*Data0) // data0
+ .add(*Data1) // data1
+ .addImm(NewOffset0) // offset0
+ .addImm(NewOffset1) // offset1
+ .addImm(0) // gds
+ .setMemRefs(CI.I->mergeMemRefsWith(*CI.Paired));
- moveInstsAfter(Write2, InstsToMove);
+ moveInstsAfter(Write2, CI.InstsToMove);
- MachineBasicBlock::iterator Next = std::next(I);
- I->eraseFromParent();
- Paired->eraseFromParent();
+ MachineBasicBlock::iterator Next = std::next(CI.I);
+ CI.I->eraseFromParent();
+ CI.Paired->eraseFromParent();
DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
return Next;
@@ -472,27 +497,24 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
continue;
}
- SmallVector<MachineInstr*, 8> InstsToMove;
+ CombineInfo CI;
+ CI.I = I;
unsigned Opc = MI.getOpcode();
if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
- unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
- MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size,
- InstsToMove);
- if (Match != E) {
+ CI.EltSize = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
+ if (findMatchingDSInst(CI)) {
Modified = true;
- I = mergeRead2Pair(I, Match, Size, InstsToMove);
+ I = mergeRead2Pair(CI);
} else {
++I;
}
continue;
} else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
- unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
- MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size,
- InstsToMove);
- if (Match != E) {
+ CI.EltSize = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
+ if (findMatchingDSInst(CI)) {
Modified = true;
- I = mergeWrite2Pair(I, Match, Size, InstsToMove);
+ I = mergeWrite2Pair(CI);
} else {
++I;
}
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 7ed18f27e591..35d3a93d8710 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -51,13 +51,23 @@
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/LivePhysRegs.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <iterator>
using namespace llvm;
@@ -67,10 +77,10 @@ namespace {
class SILowerControlFlow : public MachineFunctionPass {
private:
- const SIRegisterInfo *TRI;
- const SIInstrInfo *TII;
- LiveIntervals *LIS;
- MachineRegisterInfo *MRI;
+ const SIRegisterInfo *TRI = nullptr;
+ const SIInstrInfo *TII = nullptr;
+ LiveIntervals *LIS = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
void emitIf(MachineInstr &MI);
void emitElse(MachineInstr &MI);
@@ -88,12 +98,7 @@ private:
public:
static char ID;
- SILowerControlFlow() :
- MachineFunctionPass(ID),
- TRI(nullptr),
- TII(nullptr),
- LIS(nullptr),
- MRI(nullptr) {}
+ SILowerControlFlow() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -113,7 +118,7 @@ public:
}
};
-} // End anonymous namespace
+} // end anonymous namespace
char SILowerControlFlow::ID = 0;
@@ -175,9 +180,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
// Insert a pseudo terminator to help keep the verifier happy. This will also
// be used later when inserting skips.
- MachineInstr *NewBr =
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
- .addOperand(MI.getOperand(2));
+ MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+ .add(MI.getOperand(2));
if (!LIS) {
MI.eraseFromParent();
@@ -220,8 +224,9 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
// tied. In order to correctly tie the registers, split this into a copy of
// the src like it does.
unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
- BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
- .addOperand(MI.getOperand(1)); // Saved EXEC
+ MachineInstr *CopyExec =
+ BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
+ .add(MI.getOperand(1)); // Saved EXEC
// This must be inserted before phis and any spill code inserted before the
// else.
@@ -262,6 +267,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
LIS->RemoveMachineInstrFromMaps(MI);
MI.eraseFromParent();
+ LIS->InsertMachineInstrInMaps(*CopyExec);
LIS->InsertMachineInstrInMaps(*OrSaveExec);
LIS->InsertMachineInstrInMaps(*Xor);
@@ -283,10 +289,9 @@ void SILowerControlFlow::emitBreak(MachineInstr &MI) {
const DebugLoc &DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
- MachineInstr *Or =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
- .addReg(AMDGPU::EXEC)
- .addOperand(MI.getOperand(1));
+ MachineInstr *Or = BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
+ .addReg(AMDGPU::EXEC)
+ .add(MI.getOperand(1));
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *Or);
@@ -306,13 +311,13 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *AndN2 =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addOperand(MI.getOperand(0));
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .add(MI.getOperand(0));
MachineInstr *Branch =
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .add(MI.getOperand(1));
if (LIS) {
LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
@@ -328,9 +333,9 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock::iterator InsPt = MBB.begin();
MachineInstr *NewMI =
- BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addOperand(MI.getOperand(0));
+ BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .add(MI.getOperand(0));
if (LIS)
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index be2e14fd4623..3680e02da576 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -114,18 +114,18 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
assert(Val == 0 || Val == -1);
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32))
- .addOperand(Dst)
- .addImm(Val);
+ .add(Dst)
+ .addImm(Val);
MI.eraseFromParent();
continue;
}
}
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64))
- .addOperand(Dst)
- .addImm(0)
- .addImm(-1)
- .addOperand(Src);
+ .add(Dst)
+ .addImm(0)
+ .addImm(-1)
+ .add(Src);
MI.eraseFromParent();
} else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
SrcRC == &AMDGPU::VReg_1RegClass) {
@@ -140,14 +140,14 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
MRI.getRegClass(DefInst->getOperand(3).getReg()),
&AMDGPU::SGPR_64RegClass)) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64))
- .addOperand(Dst)
- .addReg(AMDGPU::EXEC)
- .addOperand(DefInst->getOperand(3));
+ .add(Dst)
+ .addReg(AMDGPU::EXEC)
+ .add(DefInst->getOperand(3));
} else {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64))
- .addOperand(Dst)
- .addOperand(Src)
- .addImm(0);
+ .add(Dst)
+ .add(Src)
+ .addImm(0);
}
MI.eraseFromParent();
}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index ecd46b95ca6f..8e612d2ddfda 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -20,12 +20,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToVGPR(
- "amdgpu-spill-sgpr-to-vgpr",
- cl::desc("Enable spilling VGPRs to SGPRs"),
- cl::ReallyHidden,
- cl::init(true));
-
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
@@ -47,13 +41,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
PSInputAddr(0),
+ PSInputEnable(0),
ReturnsVoid(true),
FlatWorkGroupSizes(0, 0),
WavesPerEU(0, 0),
DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
LDSWaveSpillSize(0),
- PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
@@ -81,34 +75,48 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateMemoryInputPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
+ FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
+ WavesPerEU = ST.getWavesPerEU(*F);
- PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+ // Non-entry functions have no special inputs for now.
+ // TODO: Return early for non-entry CCs.
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC == CallingConv::AMDGPU_PS)
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
- if (!AMDGPU::isShader(F->getCallingConv())) {
+ if (AMDGPU::isKernel(CC)) {
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
}
- if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
+ if (ST.debuggerEmitPrologue()) {
+ // Enable everything.
WorkGroupIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
WorkItemIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
+ }
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
if (WorkItemIDZ)
WorkItemIDY = true;
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo.hasStackObjects();
@@ -135,12 +143,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// We don't need to worry about accessing spills with flat instructions.
// TODO: On VI where we must use flat for global, we should be able to omit
// this if it is never used for generic access.
- if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
- ST.isAmdHsaOS())
+ if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
FlatScratchInit = true;
-
- FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
- WavesPerEU = ST.getWavesPerEU(*F);
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
@@ -193,45 +197,60 @@ unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
return PrivateMemoryPtrUserSGPR;
}
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
- MachineFunction *MF,
- unsigned FrameIndex,
- unsigned SubIdx) {
- if (!EnableSpillSGPRToVGPR)
- return SpilledReg();
-
- const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
-
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
- Offset += SubIdx * 4;
-
- unsigned LaneVGPRIdx = Offset / (64 * 4);
- unsigned Lane = (Offset / 4) % 64;
-
- struct SpilledReg Spill;
- Spill.Lane = Lane;
-
- if (!LaneVGPRs.count(LaneVGPRIdx)) {
- unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
- *MF);
+/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
+bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
+ int FI) {
+ std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
- if (LaneVGPR == AMDGPU::NoRegister)
- // We have no VGPRs left for spilling SGPRs.
- return Spill;
+ // This has already been allocated.
+ if (!SpillLanes.empty())
+ return true;
- LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
-
- // Add this register as live-in to all blocks to avoid machine verifer
- // complaining about use of an undefined physical register.
- for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
- BI != BE; ++BI) {
- BI->addLiveIn(LaneVGPR);
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned WaveSize = ST.getWavefrontSize();
+
+ unsigned Size = FrameInfo.getObjectSize(FI);
+ assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
+ assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
+
+ int NumLanes = Size / 4;
+
+ // Make sure to handle the case where a wide SGPR spill may span between two
+ // VGPRs.
+ for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
+ unsigned LaneVGPR;
+ unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
+
+ if (VGPRIndex == 0) {
+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (LaneVGPR == AMDGPU::NoRegister) {
+ // We have no VGPRs left for spilling SGPRs. Reset because we won't
+ // partially spill the SGPR to VGPRs.
+ SGPRToVGPRSpills.erase(FI);
+ NumVGPRSpillLanes -= I;
+ return false;
+ }
+
+ SpillVGPRs.push_back(LaneVGPR);
+
+ // Add this register as live-in to all blocks to avoid machine verifer
+ // complaining about use of an undefined physical register.
+ for (MachineBasicBlock &BB : MF)
+ BB.addLiveIn(LaneVGPR);
+ } else {
+ LaneVGPR = SpillVGPRs.back();
}
+
+ SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
}
- Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
- return Spill;
+ return true;
+}
+
+void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
+ for (auto &R : SGPRToVGPRSpills)
+ MFI.RemoveStackObject(R.first);
}
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 6fc8d18bceba..a84f3e274f82 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -16,13 +16,17 @@
#include "AMDGPUMachineFunction.h"
#include "SIRegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include <array>
+#include <cassert>
#include <map>
+#include <utility>
namespace llvm {
-class MachineRegisterInfo;
-
class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
public:
explicit AMDGPUImagePseudoSourceValue() :
@@ -109,6 +113,8 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// Graphics info.
unsigned PSInputAddr;
+ unsigned PSInputEnable;
+
bool ReturnsVoid;
// A pair of default/requested minimum/maximum flat work group sizes.
@@ -130,8 +136,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
public:
// FIXME: Make private
unsigned LDSWaveSpillSize;
- unsigned PSInputEna;
- std::map<unsigned, unsigned> LaneVGPRs;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
unsigned NumSystemSGPRs;
@@ -182,19 +186,39 @@ private:
public:
struct SpilledReg {
- unsigned VGPR;
- int Lane;
+ unsigned VGPR = AMDGPU::NoRegister;
+ int Lane = -1;
+
+ SpilledReg() = default;
SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
- SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
+
bool hasLane() { return Lane != -1;}
bool hasReg() { return VGPR != AMDGPU::NoRegister;}
};
- // SIMachineFunctionInfo definition
+private:
+ // SGPR->VGPR spilling support.
+ typedef std::pair<unsigned, unsigned> SpillRegMask;
+
+ // Track VGPR + wave index for each subregister of the SGPR spilled to
+ // frameindex key.
+ DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
+ unsigned NumVGPRSpillLanes = 0;
+ SmallVector<unsigned, 2> SpillVGPRs;
+
+public:
SIMachineFunctionInfo(const MachineFunction &MF);
- SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
- unsigned SubIdx);
+
+ ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
+ auto I = SGPRToVGPRSpills.find(FrameIndex);
+ return (I == SGPRToVGPRSpills.end()) ?
+ ArrayRef<SpilledReg>() : makeArrayRef(I->second);
+ }
+
+ bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
+ void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
@@ -399,6 +423,10 @@ public:
return PSInputAddr;
}
+ unsigned getPSInputEnable() const {
+ return PSInputEnable;
+ }
+
bool isPSInputAllocated(unsigned Index) const {
return PSInputAddr & (1 << Index);
}
@@ -407,6 +435,10 @@ public:
PSInputAddr |= 1 << Index;
}
+ void markPSInputEnabled(unsigned Index) {
+ PSInputEnable |= 1 << Index;
+ }
+
bool returnsVoid() const {
return ReturnsVoid;
}
@@ -512,6 +544,6 @@ public:
}
};
-} // End namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
index da86bbf9dd2a..9d4e677400e6 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -539,21 +539,30 @@ void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
Preds.push_back(Pred);
assert(none_of(Succs,
- [=](SIScheduleBlock *S) { return PredID == S->getID(); }) &&
+ [=](std::pair<SIScheduleBlock*,
+ SIScheduleBlockLinkKind> S) {
+ return PredID == S.first->getID();
+ }) &&
"Loop in the Block Graph!");
}
-void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) {
+void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
+ SIScheduleBlockLinkKind Kind) {
unsigned SuccID = Succ->getID();
// Check if not already predecessor.
- for (SIScheduleBlock* S : Succs) {
- if (SuccID == S->getID())
+ for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
+ if (SuccID == S.first->getID()) {
+ if (S.second == SIScheduleBlockLinkKind::NoData &&
+ Kind == SIScheduleBlockLinkKind::Data)
+ S.second = Kind;
return;
+ }
}
if (Succ->isHighLatencyBlock())
++NumHighLatencySuccessors;
- Succs.push_back(Succ);
+ Succs.push_back(std::make_pair(Succ, Kind));
+
assert(none_of(Preds,
[=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&
"Loop in the Block Graph!");
@@ -573,8 +582,10 @@ void SIScheduleBlock::printDebug(bool full) {
}
dbgs() << "\nSuccessors:\n";
- for (SIScheduleBlock* S : Succs) {
- S->printDebug(false);
+ for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
+ if (S.second == SIScheduleBlockLinkKind::Data)
+ dbgs() << "(Data Dep) ";
+ S.first->printDebug(false);
}
if (Scheduled) {
@@ -651,11 +662,21 @@ void SIScheduleBlockCreator::colorHighLatenciesAlone() {
}
}
+static bool
+hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
+ for (const auto &PredDep : SU.Preds) {
+ if (PredDep.getSUnit() == &FromSU &&
+ PredDep.getKind() == llvm::SDep::Data)
+ return true;
+ }
+ return false;
+}
+
void SIScheduleBlockCreator::colorHighLatenciesGroups() {
unsigned DAGSize = DAG->SUnits.size();
unsigned NumHighLatencies = 0;
unsigned GroupSize;
- unsigned Color = NextReservedID;
+ int Color = NextReservedID;
unsigned Count = 0;
std::set<unsigned> FormingGroup;
@@ -675,35 +696,102 @@ void SIScheduleBlockCreator::colorHighLatenciesGroups() {
else
GroupSize = 4;
- for (unsigned i = 0, e = DAGSize; i != e; ++i) {
- SUnit *SU = &DAG->SUnits[i];
- if (DAG->IsHighLatencySU[SU->NodeNum]) {
+ for (unsigned SUNum : DAG->TopDownIndex2SU) {
+ const SUnit &SU = DAG->SUnits[SUNum];
+ if (DAG->IsHighLatencySU[SU.NodeNum]) {
unsigned CompatibleGroup = true;
- unsigned ProposedColor = Color;
+ int ProposedColor = Color;
+ std::vector<int> AdditionalElements;
+
+ // We don't want to put in the same block
+ // two high latency instructions that depend
+ // on each other.
+ // One way would be to check canAddEdge
+ // in both directions, but that currently is not
+ // enough because there the high latency order is
+ // enforced (via links).
+ // Instead, look at the dependencies between the
+ // high latency instructions and deduce if it is
+ // a data dependency or not.
for (unsigned j : FormingGroup) {
- // TODO: Currently CompatibleGroup will always be false,
- // because the graph enforces the load order. This
- // can be fixed, but as keeping the load order is often
- // good for performance that causes a performance hit (both
- // the default scheduler and this scheduler).
- // When this scheduler determines a good load order,
- // this can be fixed.
- if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) ||
- !DAG->canAddEdge(&DAG->SUnits[j], SU))
+ bool HasSubGraph;
+ std::vector<int> SubGraph;
+ // By construction (topological order), if SU and
+ // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
+ // in the parent graph of SU.
+#ifndef NDEBUG
+ SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
+ HasSubGraph);
+ assert(!HasSubGraph);
+#endif
+ SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
+ HasSubGraph);
+ if (!HasSubGraph)
+ continue; // No dependencies between each other
+ else if (SubGraph.size() > 5) {
+ // Too many elements would be required to be added to the block.
CompatibleGroup = false;
+ break;
+ }
+ else {
+ // Check the type of dependency
+ for (unsigned k : SubGraph) {
+ // If in the path to join the two instructions,
+ // there is another high latency instruction,
+ // or instructions colored for another block
+ // abort the merge.
+ if (DAG->IsHighLatencySU[k] ||
+ (CurrentColoring[k] != ProposedColor &&
+ CurrentColoring[k] != 0)) {
+ CompatibleGroup = false;
+ break;
+ }
+ // If one of the SU in the subgraph depends on the result of SU j,
+ // there'll be a data dependency.
+ if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
+ CompatibleGroup = false;
+ break;
+ }
+ }
+ if (!CompatibleGroup)
+ break;
+ // Same check for the SU
+ if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
+ CompatibleGroup = false;
+ break;
+ }
+ // Add all the required instructions to the block
+ // These cannot live in another block (because they
+ // depend (order dependency) on one of the
+ // instruction in the block, and are required for the
+ // high latency instruction we add.
+ AdditionalElements.insert(AdditionalElements.end(),
+ SubGraph.begin(), SubGraph.end());
+ }
}
- if (!CompatibleGroup || ++Count == GroupSize) {
+ if (CompatibleGroup) {
+ FormingGroup.insert(SU.NodeNum);
+ for (unsigned j : AdditionalElements)
+ CurrentColoring[j] = ProposedColor;
+ CurrentColoring[SU.NodeNum] = ProposedColor;
+ ++Count;
+ }
+ // Found one incompatible instruction,
+ // or has filled a big enough group.
+ // -> start a new one.
+ if (!CompatibleGroup) {
FormingGroup.clear();
Color = ++NextReservedID;
- if (!CompatibleGroup) {
- ProposedColor = Color;
- FormingGroup.insert(SU->NodeNum);
- }
+ ProposedColor = Color;
+ FormingGroup.insert(SU.NodeNum);
+ CurrentColoring[SU.NodeNum] = ProposedColor;
+ Count = 0;
+ } else if (Count == GroupSize) {
+ FormingGroup.clear();
+ Color = ++NextReservedID;
+ ProposedColor = Color;
Count = 0;
- } else {
- FormingGroup.insert(SU->NodeNum);
}
- CurrentColoring[SU->NodeNum] = ProposedColor;
}
}
}
@@ -835,6 +923,17 @@ void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
unsigned DAGSize = DAG->SUnits.size();
std::vector<int> PendingColoring = CurrentColoring;
+ assert(DAGSize >= 1 &&
+ CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
+ CurrentTopDownReservedDependencyColoring.size() == DAGSize);
+ // If there is no reserved block at all, do nothing. We don't want
+ // everything in one block.
+ if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
+ CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
+ *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
+ CurrentTopDownReservedDependencyColoring.end()) == 0)
+ return;
+
for (unsigned SUNum : DAG->BottomUpIndex2SU) {
SUnit *SU = &DAG->SUnits[SUNum];
std::set<unsigned> SUColors;
@@ -856,6 +955,9 @@ void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
SUColors.insert(CurrentColoring[Succ->NodeNum]);
SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
}
+ // If there is only one child/parent block, and that block
+ // is not among the ones we are removing in this path, then
+ // merge the instruction to that block
if (SUColors.size() == 1 && SUColorsPending.size() == 1)
PendingColoring[SU->NodeNum] = *SUColors.begin();
else // TODO: Attribute new colors depending on color
@@ -974,12 +1076,7 @@ void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
for (unsigned SUNum : DAG->BottomUpIndex2SU) {
SUnit *SU = &DAG->SUnits[SUNum];
unsigned color = CurrentColoring[SU->NodeNum];
- std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color);
- if (Pos != ColorCount.end()) {
- ++ColorCount[color];
- } else {
- ColorCount[color] = 1;
- }
+ ++ColorCount[color];
}
for (unsigned SUNum : DAG->BottomUpIndex2SU) {
@@ -1087,7 +1184,8 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
continue;
if (Node2CurrentBlock[Succ->NodeNum] != SUID)
- CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]);
+ CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
+ SuccDep.isCtrl() ? NoData : Data);
}
for (SDep& PredDep : SU->Preds) {
SUnit *Pred = PredDep.getSUnit();
@@ -1281,10 +1379,8 @@ void SIScheduleBlockCreator::fillStats() {
Block->Height = 0;
else {
unsigned Height = 0;
- for (SIScheduleBlock *Succ : Block->getSuccs()) {
- if (Height < Succ->Height + 1)
- Height = Succ->Height + 1;
- }
+ for (const auto &Succ : Block->getSuccs())
+ Height = std::min(Height, Succ.first->Height + 1);
Block->Height = Height;
}
}
@@ -1331,13 +1427,7 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
continue;
int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
- std::map<unsigned, unsigned>::iterator RegPos =
- LiveOutRegsNumUsages[PredID].find(Reg);
- if (RegPos != LiveOutRegsNumUsages[PredID].end()) {
- ++LiveOutRegsNumUsages[PredID][Reg];
- } else {
- LiveOutRegsNumUsages[PredID][Reg] = 1;
- }
+ ++LiveOutRegsNumUsages[PredID][Reg];
}
}
@@ -1361,6 +1451,24 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
std::set<unsigned> InRegs = DAG->getInRegs();
addLiveRegs(InRegs);
+ // Increase LiveOutRegsNumUsages for blocks
+ // producing registers consumed in another
+ // scheduling region.
+ for (unsigned Reg : DAG->getOutRegs()) {
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ // Do reverse traversal
+ int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
+ SIScheduleBlock *Block = Blocks[ID];
+ const std::set<unsigned> &OutRegs = Block->getOutRegs();
+
+ if (OutRegs.find(Reg) == OutRegs.end())
+ continue;
+
+ ++LiveOutRegsNumUsages[ID][Reg];
+ break;
+ }
+ }
+
// Fill LiveRegsConsumers for regs that were already
// defined before scheduling.
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -1377,12 +1485,8 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
}
}
- if (!Found) {
- if (LiveRegsConsumers.find(Reg) == LiveRegsConsumers.end())
- LiveRegsConsumers[Reg] = 1;
- else
- ++LiveRegsConsumers[Reg];
- }
+ if (!Found)
+ ++LiveRegsConsumers[Reg];
}
}
@@ -1403,6 +1507,7 @@ SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
for (SIScheduleBlock* Block : BlocksScheduled) {
dbgs() << ' ' << Block->getID();
}
+ dbgs() << '\n';
);
}
@@ -1464,8 +1569,8 @@ SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
VregCurrentUsage, SregCurrentUsage);
if (VregCurrentUsage > maxVregUsage)
maxVregUsage = VregCurrentUsage;
- if (VregCurrentUsage > maxSregUsage)
- maxSregUsage = VregCurrentUsage;
+ if (SregCurrentUsage > maxSregUsage)
+ maxSregUsage = SregCurrentUsage;
DEBUG(
dbgs() << "Picking New Blocks\n";
dbgs() << "Available: ";
@@ -1556,17 +1661,13 @@ void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
}
void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
- for (SIScheduleBlock* Block : Parent->getSuccs()) {
- --BlockNumPredsLeft[Block->getID()];
- if (BlockNumPredsLeft[Block->getID()] == 0) {
- ReadyBlocks.push_back(Block);
- }
- // TODO: Improve check. When the dependency between the high latency
- // instructions and the instructions of the other blocks are WAR or WAW
- // there will be no wait triggered. We would like these cases to not
- // update LastPosHighLatencyParentScheduled.
- if (Parent->isHighLatencyBlock())
- LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled;
+ for (const auto &Block : Parent->getSuccs()) {
+ if (--BlockNumPredsLeft[Block.first->getID()] == 0)
+ ReadyBlocks.push_back(Block.first);
+
+ if (Parent->isHighLatencyBlock() &&
+ Block.second == SIScheduleBlockLinkKind::Data)
+ LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
}
}
@@ -1578,12 +1679,10 @@ void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
LiveOutRegsNumUsages[Block->getID()].begin(),
E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
std::pair<unsigned, unsigned> RegP = *RegI;
- if (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end())
- LiveRegsConsumers[RegP.first] = RegP.second;
- else {
- assert(LiveRegsConsumers[RegP.first] == 0);
- LiveRegsConsumers[RegP.first] += RegP.second;
- }
+ // We produce this register, thus it must not be previously alive.
+ assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
+ LiveRegsConsumers[RegP.first] == 0);
+ LiveRegsConsumers[RegP.first] += RegP.second;
}
if (LastPosHighLatencyParentScheduled[Block->getID()] >
(unsigned)LastPosWaitedHighLatency)
@@ -1825,7 +1924,9 @@ void SIScheduleDAGMI::schedule()
// if VGPR usage is extremely high, try other good performing variants
// which could lead to lower VGPR usage
if (Best.MaxVGPRUsage > 180) {
- std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+ static const std::pair<SISchedulerBlockCreatorVariant,
+ SISchedulerBlockSchedulerVariant>
+ Variants[] = {
{ LatenciesAlone, BlockRegUsageLatency },
// { LatenciesAlone, BlockRegUsage },
{ LatenciesGrouped, BlockLatencyRegUsage },
@@ -1844,7 +1945,9 @@ void SIScheduleDAGMI::schedule()
// if VGPR usage is still extremely high, we may spill. Try other variants
// which are less performing, but that could lead to lower VGPR usage.
if (Best.MaxVGPRUsage > 200) {
- std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+ static const std::pair<SISchedulerBlockCreatorVariant,
+ SISchedulerBlockSchedulerVariant>
+ Variants[] = {
// { LatenciesAlone, BlockRegUsageLatency },
{ LatenciesAlone, BlockRegUsage },
// { LatenciesGrouped, BlockLatencyRegUsage },
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h
index 77c07350d325..122d0f67ca8c 100644
--- a/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -40,13 +40,12 @@ enum SIScheduleCandReason {
struct SISchedulerCandidate {
// The reason for this candidate.
- SIScheduleCandReason Reason;
+ SIScheduleCandReason Reason = NoCand;
// Set of reasons that apply to multiple candidates.
- uint32_t RepeatReasonSet;
+ uint32_t RepeatReasonSet = 0;
- SISchedulerCandidate()
- : Reason(NoCand), RepeatReasonSet(0) {}
+ SISchedulerCandidate() = default;
bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
@@ -55,6 +54,11 @@ struct SISchedulerCandidate {
class SIScheduleDAGMI;
class SIScheduleBlockCreator;
+enum SIScheduleBlockLinkKind {
+ NoData,
+ Data
+};
+
class SIScheduleBlock {
SIScheduleDAGMI *DAG;
SIScheduleBlockCreator *BC;
@@ -84,8 +88,8 @@ class SIScheduleBlock {
std::set<unsigned> LiveInRegs;
std::set<unsigned> LiveOutRegs;
- bool Scheduled;
- bool HighLatencyBlock;
+ bool Scheduled = false;
+ bool HighLatencyBlock = false;
std::vector<unsigned> HasLowLatencyNonWaitedParent;
@@ -93,14 +97,14 @@ class SIScheduleBlock {
unsigned ID;
std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
- std::vector<SIScheduleBlock*> Succs; // All blocks successors.
- unsigned NumHighLatencySuccessors;
+ // All blocks successors, and the kind of link
+ std::vector<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>> Succs;
+ unsigned NumHighLatencySuccessors = 0;
public:
SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
unsigned ID):
- DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false),
- HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {}
+ DAG(DAG), BC(BC), TopRPTracker(TopPressure), ID(ID) {}
~SIScheduleBlock() = default;
@@ -114,10 +118,11 @@ public:
// Add block pred, which has instruction predecessor of SU.
void addPred(SIScheduleBlock *Pred);
- void addSucc(SIScheduleBlock *Succ);
+ void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind);
const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
- const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
+ ArrayRef<std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>>
+ getSuccs() const { return Succs; }
unsigned Height; // Maximum topdown path length to block without outputs
unsigned Depth; // Maximum bottomup path length to block without inputs
@@ -213,9 +218,9 @@ struct SIScheduleBlocks {
};
enum SISchedulerBlockCreatorVariant {
- LatenciesAlone,
- LatenciesGrouped,
- LatenciesAlonePlusConsecutive
+ LatenciesAlone,
+ LatenciesGrouped,
+ LatenciesAlonePlusConsecutive
};
class SIScheduleBlockCreator {
@@ -451,6 +456,7 @@ public:
LiveIntervals *getLIS() { return LIS; }
MachineRegisterInfo *getMRI() { return &MRI; }
const TargetRegisterInfo *getTRI() { return TRI; }
+ ScheduleDAGTopologicalSort *GetTopo() { return &Topo; }
SUnit& getEntrySU() { return EntrySU; }
SUnit& getExitSU() { return ExitSU; }
@@ -469,6 +475,14 @@ public:
return InRegs;
}
+ std::set<unsigned> getOutRegs() {
+ std::set<unsigned> OutRegs;
+ for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
+ OutRegs.insert(RegMaskPair.RegUnit);
+ }
+ return OutRegs;
+ };
+
unsigned getVGPRSetID() const { return VGPRSetID; }
unsigned getSGPRSetID() const { return SGPRSetID; }
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
new file mode 100644
index 000000000000..e02c2e3240e8
--- /dev/null
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -0,0 +1,713 @@
+//===-- SIPeepholeSDWA.cpp - Peephole optimization for SDWA instructions --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass tries to apply several peephole SDWA patterns.
+///
+/// E.g. original:
+/// V_LSHRREV_B32_e32 %vreg0, 16, %vreg1
+/// V_ADD_I32_e32 %vreg2, %vreg0, %vreg3
+/// V_LSHLREV_B32_e32 %vreg4, 16, %vreg2
+///
+/// Replace:
+/// V_ADD_I32_sdwa %vreg4, %vreg1, %vreg3
+/// dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+///
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include <unordered_map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-peephole-sdwa"
+
+STATISTIC(NumSDWAPatternsFound, "Number of SDWA patterns found.");
+STATISTIC(NumSDWAInstructionsPeepholed,
+ "Number of instruction converted to SDWA.");
+
+namespace {
+
+class SDWAOperand;
+
+class SIPeepholeSDWA : public MachineFunctionPass {
+private:
+ MachineRegisterInfo *MRI;
+ const SIRegisterInfo *TRI;
+ const SIInstrInfo *TII;
+
+ std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
+
+ Optional<int64_t> foldToImm(const MachineOperand &Op) const;
+
+public:
+ static char ID;
+
+ typedef SmallVector<std::unique_ptr<SDWAOperand>, 4> SDWAOperandsVector;
+
+ SIPeepholeSDWA() : MachineFunctionPass(ID) {
+ initializeSIPeepholeSDWAPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void matchSDWAOperands(MachineFunction &MF);
+ bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
+
+ StringRef getPassName() const override { return "SI Peephole SDWA"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+class SDWAOperand {
+private:
+ MachineOperand *Target; // Operand that would be used in converted instruction
+ MachineOperand *Replaced; // Operand that would be replace by Target
+
+public:
+ SDWAOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp)
+ : Target(TargetOp), Replaced(ReplacedOp) {
+ assert(Target->isReg());
+ assert(Replaced->isReg());
+ }
+
+ virtual ~SDWAOperand() {}
+
+ virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0;
+ virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0;
+
+ MachineOperand *getTargetOperand() const { return Target; }
+ MachineOperand *getReplacedOperand() const { return Replaced; }
+ MachineInstr *getParentInst() const { return Target->getParent(); }
+ MachineRegisterInfo *getMRI() const {
+ return &getParentInst()->getParent()->getParent()->getRegInfo();
+ }
+};
+
+using namespace AMDGPU::SDWA;
+
+class SDWASrcOperand : public SDWAOperand {
+private:
+ SdwaSel SrcSel;
+ bool Abs;
+ bool Neg;
+ bool Sext;
+
+public:
+ SDWASrcOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
+ SdwaSel SrcSel_ = DWORD, bool Abs_ = false, bool Neg_ = false,
+ bool Sext_ = false)
+ : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_),
+ Neg(Neg_), Sext(Sext_) {}
+
+ virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
+ virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
+
+ SdwaSel getSrcSel() const { return SrcSel; }
+ bool getAbs() const { return Abs; }
+ bool getNeg() const { return Neg; }
+ bool getSext() const { return Sext; }
+
+ uint64_t getSrcMods() const;
+};
+
+class SDWADstOperand : public SDWAOperand {
+private:
+ SdwaSel DstSel;
+ DstUnused DstUn;
+
+public:
+ SDWADstOperand(MachineOperand *TargetOp, MachineOperand *ReplacedOp,
+ SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD)
+ : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {}
+
+ virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) override;
+ virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override;
+
+ SdwaSel getDstSel() const { return DstSel; }
+ DstUnused getDstUnused() const { return DstUn; }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIPeepholeSDWA, DEBUG_TYPE, "SI Peephole SDWA", false, false)
+
+char SIPeepholeSDWA::ID = 0;
+
+char &llvm::SIPeepholeSDWAID = SIPeepholeSDWA::ID;
+
+FunctionPass *llvm::createSIPeepholeSDWAPass() {
+ return new SIPeepholeSDWA();
+}
+
+#ifndef NDEBUG
+
+static raw_ostream& operator<<(raw_ostream &OS, const SdwaSel &Sel) {
+ switch(Sel) {
+ case BYTE_0: OS << "BYTE_0"; break;
+ case BYTE_1: OS << "BYTE_1"; break;
+ case BYTE_2: OS << "BYTE_2"; break;
+ case BYTE_3: OS << "BYTE_3"; break;
+ case WORD_0: OS << "WORD_0"; break;
+ case WORD_1: OS << "WORD_1"; break;
+ case DWORD: OS << "DWORD"; break;
+ }
+ return OS;
+}
+
+static raw_ostream& operator<<(raw_ostream &OS, const DstUnused &Un) {
+ switch(Un) {
+ case UNUSED_PAD: OS << "UNUSED_PAD"; break;
+ case UNUSED_SEXT: OS << "UNUSED_SEXT"; break;
+ case UNUSED_PRESERVE: OS << "UNUSED_PRESERVE"; break;
+ }
+ return OS;
+}
+
+static raw_ostream& operator<<(raw_ostream &OS, const SDWASrcOperand &Src) {
+ OS << "SDWA src: " << *Src.getTargetOperand()
+ << " src_sel:" << Src.getSrcSel()
+ << " abs:" << Src.getAbs() << " neg:" << Src.getNeg()
+ << " sext:" << Src.getSext() << '\n';
+ return OS;
+}
+
+static raw_ostream& operator<<(raw_ostream &OS, const SDWADstOperand &Dst) {
+ OS << "SDWA dst: " << *Dst.getTargetOperand()
+ << " dst_sel:" << Dst.getDstSel()
+ << " dst_unused:" << Dst.getDstUnused() << '\n';
+ return OS;
+}
+
+#endif
+
+static void copyRegOperand(MachineOperand &To, const MachineOperand &From) {
+ assert(To.isReg() && From.isReg());
+ To.setReg(From.getReg());
+ To.setSubReg(From.getSubReg());
+ To.setIsUndef(From.isUndef());
+ if (To.isUse()) {
+ To.setIsKill(From.isKill());
+ } else {
+ To.setIsDead(From.isDead());
+ }
+}
+
+static bool isSameReg(const MachineOperand &LHS, const MachineOperand &RHS) {
+ return LHS.isReg() &&
+ RHS.isReg() &&
+ LHS.getReg() == RHS.getReg() &&
+ LHS.getSubReg() == RHS.getSubReg();
+}
+
+static bool isSubregOf(const MachineOperand &SubReg,
+ const MachineOperand &SuperReg,
+ const TargetRegisterInfo *TRI) {
+
+ if (!SuperReg.isReg() || !SubReg.isReg())
+ return false;
+
+ if (isSameReg(SuperReg, SubReg))
+ return true;
+
+ if (SuperReg.getReg() != SubReg.getReg())
+ return false;
+
+ LaneBitmask SuperMask = TRI->getSubRegIndexLaneMask(SuperReg.getSubReg());
+ LaneBitmask SubMask = TRI->getSubRegIndexLaneMask(SubReg.getSubReg());
+ SuperMask |= ~SubMask;
+ return SuperMask.all();
+}
+
+uint64_t SDWASrcOperand::getSrcMods() const {
+ uint64_t Mods = 0;
+ if (Abs || Neg) {
+ assert(!Sext &&
+ "Float and integer src modifiers can't be set simulteniously");
+ Mods |= Abs ? SISrcMods::ABS : 0;
+ Mods |= Neg ? SISrcMods::NEG : 0;
+ } else if (Sext) {
+ Mods |= SISrcMods::SEXT;
+ }
+
+ return Mods;
+}
+
+MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) {
+ // For SDWA src operand potential instruction is one that use register
+ // defined by parent instruction
+ MachineRegisterInfo *MRI = getMRI();
+ MachineOperand *Replaced = getReplacedOperand();
+ assert(Replaced->isReg());
+
+ MachineInstr *PotentialMI = nullptr;
+ for (MachineOperand &PotentialMO : MRI->use_operands(Replaced->getReg())) {
+ // If this is use of another subreg of dst reg then do nothing
+ if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
+ continue;
+
+ // If there exist use of superreg of dst then we should not combine this
+ // opernad
+ if (!isSameReg(PotentialMO, *Replaced))
+ return nullptr;
+
+ // Check that PotentialMI is only instruction that uses dst reg
+ if (PotentialMI == nullptr) {
+ PotentialMI = PotentialMO.getParent();
+ } else if (PotentialMI != PotentialMO.getParent()) {
+ return nullptr;
+ }
+ }
+
+ return PotentialMI;
+}
+
+bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
+ // Find operand in instruction that matches source operand and replace it with
+ // target operand. Set corresponding src_sel
+
+ MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
+ MachineOperand *SrcMods =
+ TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
+ assert(Src && Src->isReg());
+ if (!isSameReg(*Src, *getReplacedOperand())) {
+ // If this is not src0 then it should be src1
+ Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src1_sel);
+ SrcMods = TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
+
+ assert(Src && Src->isReg());
+
+ if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
+ !isSameReg(*Src, *getReplacedOperand())) {
+ // In case of v_mac_f16/32_sdwa this pass can try to apply src operand to
+ // src2. This is not allowed.
+ return false;
+ }
+
+ assert(isSameReg(*Src, *getReplacedOperand()) && SrcSel && SrcMods);
+ }
+ copyRegOperand(*Src, *getTargetOperand());
+ SrcSel->setImm(getSrcSel());
+ SrcMods->setImm(getSrcMods());
+ getTargetOperand()->setIsKill(false);
+ return true;
+}
+
+MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) {
+ // For SDWA dst operand potential instruction is one that defines register
+ // that this operand uses
+ MachineRegisterInfo *MRI = getMRI();
+ MachineInstr *ParentMI = getParentInst();
+ MachineOperand *Replaced = getReplacedOperand();
+ assert(Replaced->isReg());
+
+ for (MachineOperand &PotentialMO : MRI->def_operands(Replaced->getReg())) {
+ if (!isSubregOf(*Replaced, PotentialMO, MRI->getTargetRegisterInfo()))
+ continue;
+
+ if (!isSameReg(*Replaced, PotentialMO))
+ return nullptr;
+
+ // Check that ParentMI is the only instruction that uses replaced register
+ for (MachineOperand &UseMO : MRI->use_operands(PotentialMO.getReg())) {
+ if (isSubregOf(UseMO, PotentialMO, MRI->getTargetRegisterInfo()) &&
+ UseMO.getParent() != ParentMI) {
+ return nullptr;
+ }
+ }
+
+ // Due to SSA this should be onle def of replaced register, so return it
+ return PotentialMO.getParent();
+ }
+
+ return nullptr;
+}
+
+bool SDWADstOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
+ // Replace vdst operand in MI with target operand. Set dst_sel and dst_unused
+
+ if ((MI.getOpcode() == AMDGPU::V_MAC_F16_sdwa ||
+ MI.getOpcode() == AMDGPU::V_MAC_F32_sdwa) &&
+ getDstSel() != AMDGPU::SDWA::DWORD) {
+ // v_mac_f16/32_sdwa allow dst_sel to be equal only to DWORD
+ return false;
+ }
+
+ MachineOperand *Operand = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ assert(Operand &&
+ Operand->isReg() &&
+ isSameReg(*Operand, *getReplacedOperand()));
+ copyRegOperand(*Operand, *getTargetOperand());
+ MachineOperand *DstSel= TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel);
+ assert(DstSel);
+ DstSel->setImm(getDstSel());
+ MachineOperand *DstUnused= TII->getNamedOperand(MI, AMDGPU::OpName::dst_unused);
+ assert(DstUnused);
+ DstUnused->setImm(getDstUnused());
+
+ // Remove original instruction because it would conflict with our new
+ // instruction by register definition
+ getParentInst()->eraseFromParent();
+ return true;
+}
+
+Optional<int64_t> SIPeepholeSDWA::foldToImm(const MachineOperand &Op) const {
+ if (Op.isImm()) {
+ return Op.getImm();
+ }
+
+ // If this is not immediate then it can be copy of immediate value, e.g.:
+ // %vreg1<def> = S_MOV_B32 255;
+ if (Op.isReg()) {
+ for (const MachineOperand &Def : MRI->def_operands(Op.getReg())) {
+ if (!isSameReg(Op, Def))
+ continue;
+
+ const MachineInstr *DefInst = Def.getParent();
+ if (!TII->isFoldableCopy(*DefInst))
+ return None;
+
+ const MachineOperand &Copied = DefInst->getOperand(1);
+ if (!Copied.isImm())
+ return None;
+
+ return Copied.getImm();
+ }
+ }
+
+ return None;
+}
+
+void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ case AMDGPU::V_LSHRREV_B32_e32:
+ case AMDGPU::V_ASHRREV_I32_e32:
+ case AMDGPU::V_LSHLREV_B32_e32: {
+ // from: v_lshrrev_b32_e32 v1, 16/24, v0
+ // to SDWA src:v0 src_sel:WORD_1/BYTE_3
+
+ // from: v_ashrrev_i32_e32 v1, 16/24, v0
+ // to SDWA src:v0 src_sel:WORD_1/BYTE_3 sext:1
+
+ // from: v_lshlrev_b32_e32 v1, 16/24, v0
+ // to SDWA dst:v1 dst_sel:WORD_1/BYTE_3 dst_unused:UNUSED_PAD
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm)
+ break;
+
+ if (*Imm != 16 && *Imm != 24)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ if (Opcode == AMDGPU::V_LSHLREV_B32_e32) {
+ auto SDWADst = make_unique<SDWADstOperand>(
+ Dst, Src1, *Imm == 16 ? WORD_1 : BYTE_3, UNUSED_PAD);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
+ SDWAOperands[&MI] = std::move(SDWADst);
+ ++NumSDWAPatternsFound;
+ } else {
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, *Imm == 16 ? WORD_1 : BYTE_3, false, false,
+ Opcode == AMDGPU::V_LSHRREV_B32_e32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ }
+ break;
+ }
+
+ case AMDGPU::V_LSHRREV_B16_e32:
+ case AMDGPU::V_ASHRREV_I16_e32:
+ case AMDGPU::V_LSHLREV_B16_e32: {
+ // from: v_lshrrev_b16_e32 v1, 8, v0
+ // to SDWA src:v0 src_sel:BYTE_1
+
+ // from: v_ashrrev_i16_e32 v1, 8, v0
+ // to SDWA src:v0 src_sel:BYTE_1 sext:1
+
+ // from: v_lshlrev_b16_e32 v1, 8, v0
+ // to SDWA dst:v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm || *Imm != 8)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ if (Opcode == AMDGPU::V_LSHLREV_B16_e32) {
+ auto SDWADst =
+ make_unique<SDWADstOperand>(Dst, Src1, BYTE_1, UNUSED_PAD);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWADst << '\n');
+ SDWAOperands[&MI] = std::move(SDWADst);
+ ++NumSDWAPatternsFound;
+ } else {
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, BYTE_1, false, false,
+ Opcode == AMDGPU::V_LSHRREV_B16_e32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ }
+ break;
+ }
+
+ case AMDGPU::V_BFE_I32:
+ case AMDGPU::V_BFE_U32: {
+ // e.g.:
+ // from: v_bfe_u32 v1, v0, 8, 8
+ // to SDWA src:v0 src_sel:BYTE_1
+
+ // offset | width | src_sel
+ // ------------------------
+ // 0 | 8 | BYTE_0
+ // 0 | 16 | WORD_0
+ // 0 | 32 | DWORD ?
+ // 8 | 8 | BYTE_1
+ // 16 | 8 | BYTE_2
+ // 16 | 16 | WORD_1
+ // 24 | 8 | BYTE_3
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ auto Offset = foldToImm(*Src1);
+ if (!Offset)
+ break;
+
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ auto Width = foldToImm(*Src2);
+ if (!Width)
+ break;
+
+ SdwaSel SrcSel = DWORD;
+
+ if (*Offset == 0 && *Width == 8)
+ SrcSel = BYTE_0;
+ else if (*Offset == 0 && *Width == 16)
+ SrcSel = WORD_0;
+ else if (*Offset == 0 && *Width == 32)
+ SrcSel = DWORD;
+ else if (*Offset == 8 && *Width == 8)
+ SrcSel = BYTE_1;
+ else if (*Offset == 16 && *Width == 8)
+ SrcSel = BYTE_2;
+ else if (*Offset == 16 && *Width == 16)
+ SrcSel = WORD_1;
+ else if (*Offset == 24 && *Width == 8)
+ SrcSel = BYTE_3;
+ else
+ break;
+
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src0->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src0, Dst, SrcSel, false, false,
+ Opcode == AMDGPU::V_BFE_U32 ? false : true);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ break;
+ }
+ case AMDGPU::V_AND_B32_e32: {
+ // e.g.:
+ // from: v_and_b32_e32 v1, 0x0000ffff/0x000000ff, v0
+ // to SDWA src:v0 src_sel:WORD_0/BYTE_0
+
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ auto Imm = foldToImm(*Src0);
+ if (!Imm)
+ break;
+
+ if (*Imm != 0x0000ffff && *Imm != 0x000000ff)
+ break;
+
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+
+ if (TRI->isPhysicalRegister(Src1->getReg()) ||
+ TRI->isPhysicalRegister(Dst->getReg()))
+ break;
+
+ auto SDWASrc = make_unique<SDWASrcOperand>(
+ Src1, Dst, *Imm == 0x0000ffff ? WORD_0 : BYTE_0);
+ DEBUG(dbgs() << "Match: " << MI << "To: " << *SDWASrc << '\n');
+ SDWAOperands[&MI] = std::move(SDWASrc);
+ ++NumSDWAPatternsFound;
+ break;
+ }
+ }
+ }
+ }
+}
+
+bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
+ const SDWAOperandsVector &SDWAOperands) {
+ // Check if this instruction can be converted to SDWA:
+ // 1. Does this opcode support SDWA
+ if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
+ return false;
+
+ // 2. Are all operands - VGPRs
+ for (const MachineOperand &Operand : MI.explicit_operands()) {
+ if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg()))
+ return false;
+ }
+
+ // Convert to sdwa
+ int SDWAOpcode = AMDGPU::getSDWAOp(MI.getOpcode());
+ assert(SDWAOpcode != -1);
+
+ const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
+
+ // Create SDWA version of instruction MI and initialize its operands
+ MachineInstrBuilder SDWAInst =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
+
+ // Copy dst, if it is present in original then should also be present in SDWA
+ MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
+ if (Dst) {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
+ SDWAInst.add(*Dst);
+ } else {
+ assert(TII->isVOPC(MI));
+ }
+
+ // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
+ // src0_modifiers (except for v_nop_sdwa, but it can't get here)
+ MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0);
+ assert(
+ Src0 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0) != -1 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_modifiers) != -1);
+ SDWAInst.addImm(0);
+ SDWAInst.add(*Src0);
+
+ // Copy src1 if present, initialize src1_modifiers.
+ MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
+ if (Src1) {
+ assert(
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1) != -1 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_modifiers) != -1);
+ SDWAInst.addImm(0);
+ SDWAInst.add(*Src1);
+ } else {
+ assert(TII->isVOP1(MI));
+ }
+
+ if (SDWAOpcode == AMDGPU::V_MAC_F16_sdwa ||
+ SDWAOpcode == AMDGPU::V_MAC_F32_sdwa) {
+ // v_mac_f16/32 has additional src2 operand tied to vdst
+ MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+ assert(Src2);
+ SDWAInst.add(*Src2);
+ }
+
+ // Initialize clamp.
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::clamp) != -1);
+ SDWAInst.addImm(0);
+
+ // Initialize dst_sel and dst_unused if present
+ if (Dst) {
+ assert(
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
+ AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
+ SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+ SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
+ }
+
+ // Initialize src0_sel
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src0_sel) != -1);
+ SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+
+
+ // Initialize src1_sel if present
+ if (Src1) {
+ assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::src1_sel) != -1);
+ SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+ }
+
+ // Apply all sdwa operand pattenrs
+ bool Converted = false;
+ for (auto &Operand : SDWAOperands) {
+ Converted |= Operand->convertToSDWA(*SDWAInst, TII);
+ }
+ if (!Converted) {
+ SDWAInst->eraseFromParent();
+ return false;
+ }
+
+ DEBUG(dbgs() << "Convert instruction:" << MI
+ << "Into:" << *SDWAInst << '\n');
+ ++NumSDWAInstructionsPeepholed;
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ if (!ST.hasSDWA() ||
+ !AMDGPU::isVI(ST)) { // TODO: Add support for SDWA on gfx9
+ return false;
+ }
+
+ MRI = &MF.getRegInfo();
+ TRI = ST.getRegisterInfo();
+ TII = ST.getInstrInfo();
+
+ std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
+
+ matchSDWAOperands(MF);
+
+ for (auto &OperandPair : SDWAOperands) {
+ auto &Operand = OperandPair.second;
+ MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
+ if (PotentialMI) {
+ PotentialMatches[PotentialMI].push_back(std::move(Operand));
+ }
+ }
+
+ for (auto &PotentialPair : PotentialMatches) {
+ MachineInstr &PotentialMI = *PotentialPair.first;
+ convertToSDWA(PotentialMI, PotentialPair.second);
+ }
+
+ SDWAOperands.clear();
+ return false;
+}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a1ed5e8441df..36d4df52ff0e 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -24,12 +24,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToSMEM(
- "amdgpu-spill-sgpr-to-smem",
- cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
- cl::init(false));
-
-
static bool hasPressureSet(const int *PSets, unsigned PSetID) {
for (unsigned i = 0; PSets[i] != -1; ++i) {
if (PSets[i] == (int)PSetID)
@@ -49,9 +43,28 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
}
}
-SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(),
- SGPRPressureSets(getNumRegPressureSets()),
- VGPRPressureSets(getNumRegPressureSets()) {
+static cl::opt<bool> EnableSpillSGPRToSMEM(
+ "amdgpu-spill-sgpr-to-smem",
+ cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
+ cl::init(false));
+
+static cl::opt<bool> EnableSpillSGPRToVGPR(
+ "amdgpu-spill-sgpr-to-vgpr",
+ cl::desc("Enable spilling VGPRs to SGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
+
+SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
+ AMDGPURegisterInfo(),
+ SGPRPressureSets(getNumRegPressureSets()),
+ VGPRPressureSets(getNumRegPressureSets()),
+ SpillSGPRToVGPR(false),
+ SpillSGPRToSMEM(false) {
+ if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
+ SpillSGPRToSMEM = true;
+ else if (EnableSpillSGPRToVGPR)
+ SpillSGPRToVGPR = true;
+
unsigned NumRegPressureSets = getNumRegPressureSets();
SGPRSetID = NumRegPressureSets;
@@ -97,14 +110,18 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
- unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
- unsigned RegCount = getMaxNumSGPRs(MF);
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned RegCount = ST.getMaxNumSGPRs(MF);
unsigned Reg;
// Try to place it in a hole after PrivateSegmentbufferReg.
@@ -129,6 +146,12 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
+ // Reserve the memory aperture registers.
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
+
// Reserve Trap Handler registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::TBA);
reserveRegisterTuples(Reserved, AMDGPU::TMA);
@@ -139,14 +162,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
- unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+
+ unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
}
- unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
+ unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
@@ -253,7 +278,6 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
}
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -263,8 +287,7 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
.addFrameIndex(FrameIdx);
- BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
- .addReg(UnusedCarry, RegState::Define | RegState::Dead)
+ TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
.addReg(OffsetReg, RegState::Kill)
.addReg(FIReg);
}
@@ -415,14 +438,14 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
- .addReg(Reg, getDefRegState(!IsStore))
- .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
- .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+ .addReg(Reg, getDefRegState(!IsStore))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+ .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
return true;
}
@@ -545,11 +568,20 @@ static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
}
-void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
+bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int Index,
- RegScavenger *RS) const {
+ RegScavenger *RS,
+ bool OnlyToVGPR) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
+ = MFI->getSGPRToVGPRSpills(Index);
+ bool SpillToVGPR = !VGPRSpills.empty();
+ if (OnlyToVGPR && !SpillToVGPR)
+ return false;
+
MachineRegisterInfo &MRI = MF->getRegInfo();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -558,10 +590,11 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
bool IsKill = MI->getOperand(0).isKill();
const DebugLoc &DL = MI->getDebugLoc();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+ bool SpillToSMEM = spillSGPRToSMEM();
+ if (SpillToSMEM && OnlyToVGPR)
+ return false;
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
@@ -634,9 +667,9 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
continue;
}
- struct SIMachineFunctionInfo::SpilledReg Spill =
- MFI->getSpilledReg(MF, Index, i);
- if (Spill.hasReg()) {
+ if (SpillToVGPR) {
+ SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
+
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
@@ -647,6 +680,10 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// frame index, we should delete the frame index when all references to
// it are fixed.
} else {
+ // XXX - Can to VGPR spill fail for some subregisters but not others?
+ if (OnlyToVGPR)
+ return false;
+
// Spill SGPR to a frame index.
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -690,22 +727,33 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MI->eraseFromParent();
MFI->addToSpilledSGPRs(NumSubRegs);
+ return true;
}
-void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
+bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
int Index,
- RegScavenger *RS) const {
+ RegScavenger *RS,
+ bool OnlyToVGPR) const {
MachineFunction *MF = MI->getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
+ = MFI->getSGPRToVGPRSpills(Index);
+ bool SpillToVGPR = !VGPRSpills.empty();
+ if (OnlyToVGPR && !SpillToVGPR)
+ return false;
+
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const DebugLoc &DL = MI->getDebugLoc();
unsigned SuperReg = MI->getOperand(0).getReg();
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+ bool SpillToSMEM = spillSGPRToSMEM();
+ if (SpillToSMEM && OnlyToVGPR)
+ return false;
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
@@ -773,10 +821,8 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
continue;
}
- SIMachineFunctionInfo::SpilledReg Spill
- = MFI->getSpilledReg(MF, Index, i);
-
- if (Spill.hasReg()) {
+ if (SpillToVGPR) {
+ SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
auto MIB =
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
@@ -786,6 +832,9 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
if (NumSubRegs > 1)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
} else {
+ if (OnlyToVGPR)
+ return false;
+
// Restore SGPR from a stack slot.
// FIXME: We should use S_LOAD_DWORD here for VI.
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -820,6 +869,32 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
}
MI->eraseFromParent();
+ return true;
+}
+
+/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
+/// a VGPR and the stack slot can be safely eliminated when all other users are
+/// handled.
+bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
+ MachineBasicBlock::iterator MI,
+ int FI,
+ RegScavenger *RS) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S32_SAVE:
+ return spillSGPR(MI, FI, RS, true);
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ return restoreSGPR(MI, FI, RS, true);
+ default:
+ llvm_unreachable("not an SGPR spill instruction");
+ }
}
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
@@ -1156,210 +1231,6 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
return AMDGPU::NoRegister;
}
-unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 800;
- return 512;
-}
-
-unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 102;
- return 104;
-}
-
-unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
- const SIMachineFunctionInfo &MFI) const {
- if (MFI.hasFlatScratchInit()) {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 6; // FLAT_SCRATCH, XNACK, VCC (in that order)
-
- if (ST.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
- return 4; // FLAT_SCRATCH, VCC (in that order)
- }
-
- if (ST.isXNACKEnabled())
- return 4; // XNACK, VCC (in that order)
-
- return 2; // VCC.
-}
-
-unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
- unsigned WavesPerEU) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 0;
- case 8: return 81;
- default: return 97;
- }
- } else {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 49;
- case 8: return 57;
- case 7: return 65;
- case 6: return 73;
- case 5: return 81;
- default: return 97;
- }
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
- unsigned WavesPerEU,
- bool Addressable) const {
- if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- switch (WavesPerEU) {
- case 0: return 80;
- case 10: return 80;
- case 9: return 80;
- case 8: return 96;
- default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
- }
- } else {
- switch (WavesPerEU) {
- case 0: return 48;
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return getNumAddressableSGPRs(ST);
- }
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
- const Function &F = *MF.getFunction();
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-
- // Compute maximum number of SGPRs function can use using default/requested
- // minimum number of waves per execution unit.
- std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
- unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
- unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
-
- // Check if maximum number of SGPRs was explicitly requested using
- // "amdgpu-num-sgpr" attribute.
- if (F.hasFnAttribute("amdgpu-num-sgpr")) {
- unsigned Requested = AMDGPU::getIntegerAttribute(
- F, "amdgpu-num-sgpr", MaxNumSGPRs);
-
- // Make sure requested value does not violate subtarget's specifications.
- if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI)))
- Requested = 0;
-
- // If more SGPRs are required to support the input user/system SGPRs,
- // increase to accommodate them.
- //
- // FIXME: This really ends up using the requested number of SGPRs + number
- // of reserved special registers in total. Theoretically you could re-use
- // the last input registers for these special registers, but this would
- // require a lot of complexity to deal with the weird aliasing.
- unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
- if (Requested && Requested < NumInputSGPRs)
- Requested = NumInputSGPRs;
-
- // Make sure requested value is compatible with values implied by
- // default/requested minimum/maximum number of waves per execution unit.
- if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
- Requested = 0;
- if (WavesPerEU.second &&
- Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
- Requested = 0;
-
- if (Requested)
- MaxNumSGPRs = Requested;
- }
-
- if (ST.hasSGPRInitBug())
- MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
-
- return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI),
- MaxNumAddressableSGPRs);
-}
-
-unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
- const SISubtarget &ST) const {
- if (ST.debuggerReserveRegs())
- return 4;
- return 0;
-}
-
-unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 25;
- case 8: return 29;
- case 7: return 33;
- case 6: return 37;
- case 5: return 41;
- case 4: return 49;
- case 3: return 65;
- case 2: return 85;
- default: return 129;
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
- switch (WavesPerEU) {
- case 0: return 24;
- case 10: return 24;
- case 9: return 28;
- case 8: return 32;
- case 7: return 36;
- case 6: return 40;
- case 5: return 48;
- case 4: return 64;
- case 3: return 84;
- case 2: return 128;
- default: return getTotalNumVGPRs();
- }
-}
-
-unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const {
- const Function &F = *MF.getFunction();
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-
- // Compute maximum number of VGPRs function can use using default/requested
- // minimum number of waves per execution unit.
- std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
- unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
-
- // Check if maximum number of VGPRs was explicitly requested using
- // "amdgpu-num-vgpr" attribute.
- if (F.hasFnAttribute("amdgpu-num-vgpr")) {
- unsigned Requested = AMDGPU::getIntegerAttribute(
- F, "amdgpu-num-vgpr", MaxNumVGPRs);
-
- // Make sure requested value does not violate subtarget's specifications.
- if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
- Requested = 0;
-
- // Make sure requested value is compatible with values implied by
- // default/requested minimum/maximum number of waves per execution unit.
- if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
- Requested = 0;
- if (WavesPerEU.second &&
- Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
- Requested = 0;
-
- if (Requested)
- MaxNumVGPRs = Requested;
- }
-
- return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
-}
-
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const {
if (EltSize == 4) {
@@ -1476,3 +1347,62 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
unsigned Reg) const {
return hasVGPRs(getRegClassForReg(MRI, Reg));
}
+
+bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const {
+ unsigned SrcSize = SrcRC->getSize();
+ unsigned DstSize = DstRC->getSize();
+ unsigned NewSize = NewRC->getSize();
+
+ // Do not increase size of registers beyond dword, we would need to allocate
+ // adjacent registers and constraint regalloc more than needed.
+
+ // Always allow dword coalescing.
+ if (SrcSize <= 4 || DstSize <= 4)
+ return true;
+
+ return NewSize <= DstSize || NewSize <= SrcSize;
+}
+
+unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
+ *MF.getFunction());
+ switch (RC->getID()) {
+ default:
+ return AMDGPURegisterInfo::getRegPressureLimit(RC, MF);
+ case AMDGPU::VGPR_32RegClassID:
+ return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
+ case AMDGPU::SGPR_32RegClassID:
+ return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
+ }
+}
+
+unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const {
+ if (Idx == getVGPRPressureSet())
+ return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
+ const_cast<MachineFunction &>(MF));
+
+ if (Idx == getSGPRPressureSet())
+ return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
+ const_cast<MachineFunction &>(MF));
+
+ return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
+}
+
+const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
+ static const int Empty[] = { -1 };
+
+ if (hasRegUnit(AMDGPU::M0, RegUnit))
+ return Empty;
+ return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
+}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 0bcae7d9840c..679ed229758a 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -21,8 +21,8 @@
namespace llvm {
-class SISubtarget;
class MachineRegisterInfo;
+class SISubtarget;
class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPURegisterInfo {
@@ -31,13 +31,22 @@ private:
unsigned VGPRSetID;
BitVector SGPRPressureSets;
BitVector VGPRPressureSets;
+ bool SpillSGPRToVGPR;
+ bool SpillSGPRToSMEM;
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
void classifyPressureSet(unsigned PSetID, unsigned Reg,
BitVector &PressureSets) const;
-
public:
- SIRegisterInfo();
+ SIRegisterInfo(const SISubtarget &ST);
+
+ bool spillSGPRToVGPR() const {
+ return SpillSGPRToVGPR;
+ }
+
+ bool spillSGPRToSMEM() const {
+ return SpillSGPRToSMEM;
+ }
/// Return the end register initially reserved for the scratch buffer in case
/// spilling is needed.
@@ -78,16 +87,22 @@ public:
const TargetRegisterClass *getPointerRegClass(
const MachineFunction &MF, unsigned Kind = 0) const override;
- void spillSGPR(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS) const;
+ /// If \p OnlyToVGPR is true, this will only succeed if this
+ bool spillSGPR(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS,
+ bool OnlyToVGPR = false) const;
- void restoreSGPR(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS) const;
+ bool restoreSGPR(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS,
+ bool OnlyToVGPR = false) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
+ bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS) const;
+
unsigned getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
@@ -195,74 +210,23 @@ public:
return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID);
}
- /// \returns SGPR allocation granularity supported by the subtarget.
- unsigned getSGPRAllocGranule() const {
- return 8;
- }
-
- /// \returns Total number of SGPRs supported by the subtarget.
- unsigned getTotalNumSGPRs(const SISubtarget &ST) const;
-
- /// \returns Number of addressable SGPRs supported by the subtarget.
- unsigned getNumAddressableSGPRs(const SISubtarget &ST) const;
-
- /// \returns Number of reserved SGPRs supported by the subtarget.
- unsigned getNumReservedSGPRs(const SISubtarget &ST,
- const SIMachineFunctionInfo &MFI) const;
-
- /// \returns Minimum number of SGPRs that meets given number of waves per
- /// execution unit requirement for given subtarget.
- unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const;
-
- /// \returns Maximum number of SGPRs that meets given number of waves per
- /// execution unit requirement for given subtarget.
- unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU,
- bool Addressable) const;
-
- /// \returns Maximum number of SGPRs that meets number of waves per execution
- /// unit requirement for function \p MF, or number of SGPRs explicitly
- /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
- ///
- /// \returns Value that meets number of waves per execution unit requirement
- /// if explicitly requested value cannot be converted to integer, violates
- /// subtarget's specifications, or does not meet number of waves per execution
- /// unit requirement.
- unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
-
- /// \returns VGPR allocation granularity supported by the subtarget.
- unsigned getVGPRAllocGranule() const {
- return 4;
- }
-
- /// \returns Total number of VGPRs supported by the subtarget.
- unsigned getTotalNumVGPRs() const {
- return 256;
- }
-
- /// \returns Number of reserved VGPRs for debugger use supported by the
- /// subtarget.
- unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const;
+ ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
+ unsigned EltSize) const;
- /// \returns Minimum number of SGPRs that meets given number of waves per
- /// execution unit requirement.
- unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
+ bool shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC,
+ unsigned SubReg,
+ const TargetRegisterClass *DstRC,
+ unsigned DstSubReg,
+ const TargetRegisterClass *NewRC) const override;
- /// \returns Maximum number of VGPRs that meets given number of waves per
- /// execution unit requirement.
- unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const override;
- /// \returns Maximum number of VGPRs that meets number of waves per execution
- /// unit requirement for function \p MF, or number of VGPRs explicitly
- /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
- ///
- /// \returns Value that meets number of waves per execution unit requirement
- /// if explicitly requested value cannot be converted to integer, violates
- /// subtarget's specifications, or does not meet number of waves per execution
- /// unit requirement.
- unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+ unsigned getRegPressureSetLimit(const MachineFunction &MF,
+ unsigned Idx) const override;
- ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
- unsigned EltSize) const;
+ const int *getRegUnitPressureSets(unsigned RegUnit) const override;
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index 31e714b9f6b9..fc808011cd88 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -44,6 +44,11 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
def SCC : SIReg<"scc", 253>;
def M0 : SIReg <"m0", 124>;
+def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
+def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
+def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
+def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
+
// Trap handler registers
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;
@@ -128,7 +133,7 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
// TODO: Do we need to set DwarfRegAlias on register tuples?
// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 103))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
@@ -179,7 +184,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
(add (decimate (shl SGPR_32, 15), 4))]>;
// Trap handler TMP 32-bit registers
-def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
(add (sequence "TTMP%u", 0, 11))> {
let isAllocatable = 0;
}
@@ -197,7 +202,8 @@ def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
(add (decimate (shl TTMP_32, 3), 4))]>;
// VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+// i16/f16 only on VI+
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
@@ -258,19 +264,20 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
- TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
+ TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
let AllocationPriority = 7;
}
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 7;
}
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 7;
}
@@ -319,7 +326,7 @@ def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
let AllocationPriority = 11;
}
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
+def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
let AllocationPriority = 12;
@@ -366,7 +373,7 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
let Size = 32;
}
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32)> {
let isAllocatable = 0;
}
@@ -417,6 +424,18 @@ multiclass SIRegOperand <string rc, string MatchName, string opType> {
let OperandType = opType#"_FP64";
let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
}
+
+ def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_V2INT16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
+ let DecoderMethod = "decodeOperand_VSrcV216";
+ }
+
+ def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+ let OperandType = opType#"_V2FP16";
+ let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
+ let DecoderMethod = "decodeOperand_VSrcV216";
+ }
}
}
diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td
index be27966fd5f1..0f02f5825cb0 100644
--- a/lib/Target/AMDGPU/SISchedule.td
+++ b/lib/Target/AMDGPU/SISchedule.td
@@ -53,6 +53,11 @@ class SISchedMachineModel : SchedMachineModel {
let MicroOpBufferSize = 1;
let IssueWidth = 1;
let PostRAScheduler = 1;
+
+ // FIXME:Approximate 2 * branch cost. Try to hack around bad
+ // early-ifcvt heuristics. These need improvement to avoid the OOE
+ // heuristics.
+ int MispredictPenalty = 20;
}
def SIFullSpeedModel : SISchedMachineModel;
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index dd31dc690840..c5f121757e62 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -497,24 +497,24 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
if (Op32DstIdx != -1) {
// dst
- Inst32.addOperand(MI.getOperand(0));
+ Inst32.add(MI.getOperand(0));
} else {
assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
"Unexpected case");
}
- Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+ Inst32.add(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
const MachineOperand *Src1 =
TII->getNamedOperand(MI, AMDGPU::OpName::src1);
if (Src1)
- Inst32.addOperand(*Src1);
+ Inst32.add(*Src1);
if (Src2) {
int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
if (Op32Src2Idx != -1) {
- Inst32.addOperand(*Src2);
+ Inst32.add(*Src2);
} else {
// In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
// replaced with an implicit read of vcc. This was already added
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td
index 02656483cd74..5b840a14dbc3 100644
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -226,9 +226,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>
def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
auto Ld = cast<LoadSDNode>(N);
return Ld->getAlignment() >= 4 &&
- ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+ ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
- (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
@@ -293,12 +293,6 @@ def : Pat <
let Predicates = [isVI] in {
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
- (SIload_constant v4i32:$sbase, IMM20bit:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset), 0)
->;
-
def : Pat <
(i64 (readcyclecounter)),
(S_MEMREALTIME)
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index 73cd5774128e..b4adbdd1df07 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -82,6 +82,12 @@ class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
let has_sdst = 0;
}
+class SOP1_0_32R <string opName, list<dag> pattern = []> : SOP1_Pseudo <
+ opName, (outs), (ins SReg_32:$src0),
+ "$src0", pattern> {
+ let has_sdst = 0;
+}
+
class SOP1_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0),
"$sdst, $src0", pattern
@@ -210,7 +216,7 @@ def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
} // End Uses = [M0]
-def S_CBRANCH_JOIN : SOP1_1 <"s_cbranch_join">;
+def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">;
def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">;
let Defs = [SCC] in {
def S_ABS_I32 : SOP1_32 <"s_abs_i32">;
@@ -428,7 +434,7 @@ def S_BFE_I64 : SOP2_64_32 <"s_bfe_i64">;
def S_CBRANCH_G_FORK : SOP2_Pseudo <
"s_cbranch_g_fork", (outs),
- (ins SReg_64:$src0, SReg_64:$src1),
+ (ins SCSrc_b64:$src0, SCSrc_b64:$src1),
"$src0, $src1"
> {
let has_sdst = 0;
@@ -438,6 +444,22 @@ let Defs = [SCC] in {
def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
} // End Defs = [SCC]
+let SubtargetPredicate = isVI in {
+ def S_RFE_RESTORE_B64 : SOP2_Pseudo <
+ "s_rfe_restore_b64", (outs),
+ (ins SSrc_b64:$src0, SSrc_b32:$src1),
+ "$src0, $src1"
+ > {
+ let hasSideEffects = 1;
+ let has_sdst = 0;
+ }
+}
+
+let SubtargetPredicate = isGFX9 in {
+ def S_PACK_LL_B32_B16 : SOP2_32<"s_pack_ll_b32_b16">;
+ def S_PACK_LH_B32_B16 : SOP2_32<"s_pack_lh_b32_b16">;
+ def S_PACK_HH_B32_B16 : SOP2_32<"s_pack_hh_b32_b16">;
+}
//===----------------------------------------------------------------------===//
// SOPK Instructions
@@ -751,6 +773,14 @@ def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
let isReturn = 1;
}
+let SubtargetPredicate = isVI in {
+def S_ENDPGM_SAVED : SOPP <0x0000001B, (ins), "s_endpgm_saved"> {
+ let simm16 = 0;
+ let isBarrier = 1;
+ let isReturn = 1;
+}
+}
+
let isBranch = 1, SchedRW = [WriteBranch] in {
def S_BRANCH : SOPP <
0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
@@ -792,6 +822,25 @@ def S_CBRANCH_EXECNZ : SOPP <
>;
} // End Uses = [EXEC]
+def S_CBRANCH_CDBGSYS : SOPP <
+ 0x00000017, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys $simm16"
+>;
+
+def S_CBRANCH_CDBGSYS_AND_USER : SOPP <
+ 0x0000001A, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys_and_user $simm16"
+>;
+
+def S_CBRANCH_CDBGSYS_OR_USER : SOPP <
+ 0x00000019, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys_or_user $simm16"
+>;
+
+def S_CBRANCH_CDBGUSER : SOPP <
+ 0x00000018, (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbguser $simm16"
+>;
} // End isBranch = 1
} // End isTerminator = 1
@@ -806,9 +855,18 @@ def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
let isConvergent = 1;
}
+let SubtargetPredicate = isVI in {
+def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
+ let simm16 = 0;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+}
+
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
+def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
// On SI the documentation says sleep for approximately 64 * low 2
// bits, consistent with the reported maximum of 448. On VI the
@@ -1207,6 +1265,10 @@ def S_BFE_U64_vi : SOP2_Real_vi <0x27, S_BFE_U64>;
def S_BFE_I64_vi : SOP2_Real_vi <0x28, S_BFE_I64>;
def S_CBRANCH_G_FORK_vi : SOP2_Real_vi <0x29, S_CBRANCH_G_FORK>;
def S_ABSDIFF_I32_vi : SOP2_Real_vi <0x2a, S_ABSDIFF_I32>;
+def S_PACK_LL_B32_B16_vi : SOP2_Real_vi <0x32, S_PACK_LL_B32_B16>;
+def S_PACK_LH_B32_B16_vi : SOP2_Real_vi <0x33, S_PACK_LH_B32_B16>;
+def S_PACK_HH_B32_B16_vi : SOP2_Real_vi <0x34, S_PACK_HH_B32_B16>;
+def S_RFE_RESTORE_B64_vi : SOP2_Real_vi <0x2b, S_RFE_RESTORE_B64>;
def S_MOVK_I32_vi : SOPK_Real_vi <0x00, S_MOVK_I32>;
def S_CMOVK_I32_vi : SOPK_Real_vi <0x01, S_CMOVK_I32>;
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5f651d4da5d2..86095a8e1142 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
+//===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,32 +6,42 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUBaseInfo.h"
+
#include "AMDGPU.h"
+#include "AMDGPUBaseInfo.h"
#include "SIDefines.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <utility>
-#define GET_SUBTARGETINFO_ENUM
-#include "AMDGPUGenSubtargetInfo.inc"
-#undef GET_SUBTARGETINFO_ENUM
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-#undef GET_REGINFO_ENUM
#define GET_INSTRINFO_NAMED_OPS
-#define GET_INSTRINFO_ENUM
#include "AMDGPUGenInstrInfo.inc"
#undef GET_INSTRINFO_NAMED_OPS
-#undef GET_INSTRINFO_ENUM
namespace {
@@ -56,11 +66,11 @@ unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
return (Src & getBitMask(Shift, Width)) >> Shift;
}
-/// \returns Vmcnt bit shift.
-unsigned getVmcntBitShift() { return 0; }
+/// \returns Vmcnt bit shift (lower bits).
+unsigned getVmcntBitShiftLo() { return 0; }
-/// \returns Vmcnt bit width.
-unsigned getVmcntBitWidth() { return 4; }
+/// \returns Vmcnt bit width (lower bits).
+unsigned getVmcntBitWidthLo() { return 4; }
/// \returns Expcnt bit shift.
unsigned getExpcntBitShift() { return 4; }
@@ -74,52 +84,224 @@ unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
-} // anonymous namespace
+/// \returns Vmcnt bit shift (higher bits).
+unsigned getVmcntBitShiftHi() { return 14; }
+
+/// \returns Vmcnt bit width (higher bits).
+unsigned getVmcntBitWidthHi() { return 2; }
+
+} // end namespace anonymous
namespace llvm {
namespace AMDGPU {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
+namespace IsaInfo {
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+ // CI.
if (Features.test(FeatureISAVersion7_0_0))
return {7, 0, 0};
-
if (Features.test(FeatureISAVersion7_0_1))
return {7, 0, 1};
-
if (Features.test(FeatureISAVersion7_0_2))
return {7, 0, 2};
+ // VI.
if (Features.test(FeatureISAVersion8_0_0))
return {8, 0, 0};
-
if (Features.test(FeatureISAVersion8_0_1))
return {8, 0, 1};
-
if (Features.test(FeatureISAVersion8_0_2))
return {8, 0, 2};
-
if (Features.test(FeatureISAVersion8_0_3))
return {8, 0, 3};
-
if (Features.test(FeatureISAVersion8_0_4))
return {8, 0, 4};
-
if (Features.test(FeatureISAVersion8_1_0))
return {8, 1, 0};
- return {0, 0, 0};
+ // GFX9.
+ if (Features.test(FeatureISAVersion9_0_0))
+ return {9, 0, 0};
+ if (Features.test(FeatureISAVersion9_0_1))
+ return {9, 0, 1};
+
+ if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+ return {0, 0, 0};
+ return {7, 0, 0};
+}
+
+unsigned getWavefrontSize(const FeatureBitset &Features) {
+ if (Features.test(FeatureWavefrontSize16))
+ return 16;
+ if (Features.test(FeatureWavefrontSize32))
+ return 32;
+
+ return 64;
+}
+
+unsigned getLocalMemorySize(const FeatureBitset &Features) {
+ if (Features.test(FeatureLocalMemorySize32768))
+ return 32768;
+ if (Features.test(FeatureLocalMemorySize65536))
+ return 65536;
+
+ return 0;
+}
+
+unsigned getEUsPerCU(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ if (N == 1)
+ return 40;
+ N = 40 / N;
+ return std::min(N, 16u);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
+ return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+}
+
+unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ // FIXME: Need to take scratch memory into account.
+ return 10;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
+ getEUsPerCU(Features)) / getEUsPerCU(Features);
+}
+
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 2048;
+}
+
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
+ getWavefrontSize(Features);
+}
+
+unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 16;
+ return 8;
+}
+
+unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+ return 8;
+}
+
+unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 800;
+ return 512;
+}
+
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
+ if (Features.test(FeatureSGPRInitBug))
+ return FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 102;
+ return 104;
+}
+
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ if (WavesPerEU >= getMaxWavesPerEU(Features))
+ return 0;
+ unsigned MinNumSGPRs =
+ alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
+ getSGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+}
+
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable) {
+ assert(WavesPerEU != 0);
+
+ IsaVersion Version = getIsaVersion(Features);
+ unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
+ getSGPRAllocGranule(Features));
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+ if (Version.Major >= 8 && !Addressable)
+ AddressableNumSGPRs = 112;
+ return std::min(MaxNumSGPRs, AddressableNumSGPRs);
+}
+
+unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
+ return getVGPRAllocGranule(Features);
+}
+
+unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+ return 256;
}
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
+ return getTotalNumVGPRs(Features);
+}
+
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ if (WavesPerEU >= getMaxWavesPerEU(Features))
+ return 0;
+ unsigned MinNumVGPRs =
+ alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
+ getVGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+}
+
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ assert(WavesPerEU != 0);
+
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
+ getVGPRAllocGranule(Features));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+ return std::min(MaxNumVGPRs, AddressableNumVGPRs);
+}
+
+} // end namespace IsaInfo
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features) {
-
- IsaVersion ISA = getIsaVersion(Features);
+ IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
- Header.amd_kernel_code_version_minor = 0;
+ Header.amd_kernel_code_version_minor = 1;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = ISA.Major;
Header.amd_machine_version_minor = ISA.Minor;
@@ -127,6 +309,11 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
+
+ // If the code object does not support indirect functions, then the value must
+ // be 0xffffffff.
+ Header.call_convention = -1;
+
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Header.kernarg_segment_alignment = 4;
@@ -161,16 +348,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
ELF::SHF_AMDGPU_HSA_AGENT);
}
-bool isGroupSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS;
}
-bool isGlobalSegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS;
}
-bool isReadOnlySegment(const GlobalValue *GV) {
- return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) {
+ return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS;
}
bool shouldEmitConstantsToTextSection(const Triple &TT) {
@@ -208,7 +395,7 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Default;
}
if (Strs.second.trim().getAsInteger(0, Ints.second)) {
- if (!OnlyFirstRequired || Strs.second.trim().size()) {
+ if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
Ctx.emitError("can't parse second integer attribute " + Name);
return Default;
}
@@ -217,57 +404,84 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
return Ints;
}
-unsigned getWaitcntBitMask(IsaVersion Version) {
- unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
- unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
- unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
- return Vmcnt | Expcnt | Lgkmcnt;
-}
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+ unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
+ if (Version.Major < 9)
+ return VmcntLo;
-unsigned getVmcntBitMask(IsaVersion Version) {
- return (1 << getVmcntBitWidth()) - 1;
+ unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(IsaVersion Version) {
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(IsaVersion Version) {
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+ unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+ unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ return Waitcnt | VmcntHi;
+}
+
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+ unsigned VmcntLo =
+ unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return VmcntLo;
+
+ unsigned VmcntHi =
+ unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ VmcntHi <<= getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
- return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt) {
+ Waitcnt =
+ packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ Vmcnt >>= getVmcntBitWidthLo();
+ return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
@@ -296,6 +510,10 @@ bool isCompute(CallingConv::ID cc) {
return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
}
+bool isEntryFunctionCC(CallingConv::ID CC) {
+ return true;
+}
+
bool isSI(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
}
@@ -327,13 +545,34 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
return Reg;
}
+unsigned mc2PseudoReg(unsigned Reg) {
+ switch (Reg) {
+ case AMDGPU::FLAT_SCR_ci:
+ case AMDGPU::FLAT_SCR_vi:
+ return FLAT_SCR;
+
+ case AMDGPU::FLAT_SCR_LO_ci:
+ case AMDGPU::FLAT_SCR_LO_vi:
+ return AMDGPU::FLAT_SCR_LO;
+
+ case AMDGPU::FLAT_SCR_HI_ci:
+ case AMDGPU::FLAT_SCR_HI_vi:
+ return AMDGPU::FLAT_SCR_HI;
+
+ default:
+ return Reg;
+ }
+}
+
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
OpType <= AMDGPU::OPERAND_SRC_LAST;
}
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
switch (OpType) {
case AMDGPU::OPERAND_REG_IMM_FP32:
@@ -342,6 +581,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return true;
default:
return false;
@@ -349,6 +589,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
}
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
@@ -392,6 +633,7 @@ unsigned getRegBitWidth(const MCRegisterClass &RC) {
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo) {
+ assert(OpNo < Desc.NumOperands);
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
}
@@ -440,7 +682,8 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
}
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
- assert(HasInv2Pi);
+ if (!HasInv2Pi)
+ return false;
if (Literal >= -16 && Literal <= 64)
return true;
@@ -457,5 +700,92 @@ bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
Val == 0x3118; // 1/2pi
}
-} // End namespace AMDGPU
-} // End namespace llvm
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
+ assert(HasInv2Pi);
+
+ int16_t Lo16 = static_cast<int16_t>(Literal);
+ int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
+ return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
+}
+
+bool isUniformMMO(const MachineMemOperand *MMO) {
+ const Value *Ptr = MMO->getValue();
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
+ return true;
+
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
+
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ if (isSI(ST) || isCI(ST))
+ return ByteOffset >> 2;
+
+ return ByteOffset;
+}
+
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
+ int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
+ return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) :
+ isUInt<20>(EncodedOffset);
+}
+} // end namespace AMDGPU
+
+} // end namespace llvm
+
+const unsigned AMDGPUAS::MAX_COMMON_ADDRESS;
+const unsigned AMDGPUAS::GLOBAL_ADDRESS;
+const unsigned AMDGPUAS::LOCAL_ADDRESS;
+const unsigned AMDGPUAS::PARAM_D_ADDRESS;
+const unsigned AMDGPUAS::PARAM_I_ADDRESS;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_0;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_1;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_2;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_3;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_4;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_5;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_6;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_7;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_8;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_9;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_10;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_11;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_12;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_13;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_14;
+const unsigned AMDGPUAS::CONSTANT_BUFFER_15;
+const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
+
+namespace llvm {
+namespace AMDGPU {
+
+AMDGPUAS getAMDGPUAS(Triple T) {
+ auto Env = T.getEnvironmentName();
+ AMDGPUAS AS;
+ if (Env == "amdgiz" || Env == "amdgizcl") {
+ AS.FLAT_ADDRESS = 0;
+ AS.PRIVATE_ADDRESS = 5;
+ AS.REGION_ADDRESS = 4;
+ }
+ else {
+ AS.FLAT_ADDRESS = 4;
+ AS.PRIVATE_ADDRESS = 0;
+ AS.REGION_ADDRESS = 5;
+ }
+ return AS;
+}
+
+AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
+ return getAMDGPUAS(M.getTargetTriple());
+}
+
+AMDGPUAS getAMDGPUAS(const Module &M) {
+ return getAMDGPUAS(Triple(M.getTargetTriple()));
+}
+} // namespace AMDGPU
+} // namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index ea5fc366d205..d6c836eb748b 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- AMDGPUBaseInfo.h - Top level definitions for AMDGPU -----*- C++ -*-===//
+//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,39 +10,143 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
+#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
-#include "llvm/IR/CallingConv.h"
-
#include "SIDefines.h"
-
-#define GET_INSTRINFO_OPERAND_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRINFO_OPERAND_ENUM
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
+#include <utility>
namespace llvm {
class FeatureBitset;
class Function;
class GlobalValue;
+class MachineMemOperand;
class MCContext;
-class MCInstrDesc;
class MCRegisterClass;
class MCRegisterInfo;
class MCSection;
class MCSubtargetInfo;
+class Triple;
namespace AMDGPU {
+namespace IsaInfo {
-LLVM_READONLY
-int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+enum {
+ // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+ // doesn't spill SGPRs as much as when 80 is set.
+ FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
+};
+/// \brief Instruction set architecture version.
struct IsaVersion {
unsigned Major;
unsigned Minor;
unsigned Stepping;
};
+/// \returns Isa version for given subtarget \p Features.
IsaVersion getIsaVersion(const FeatureBitset &Features);
+
+/// \returns Wavefront size for given subtarget \p Features.
+unsigned getWavefrontSize(const FeatureBitset &Features);
+
+/// \returns Local memory size in bytes for given subtarget \p Features.
+unsigned getLocalMemorySize(const FeatureBitset &Features);
+
+/// \returns Number of execution units per compute unit for given subtarget \p
+/// Features.
+unsigned getEUsPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of work groups per compute unit for given subtarget
+/// \p Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum number of waves per execution unit for given subtarget \p
+/// Features.
+unsigned getMinWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum flat work group size for given subtarget \p Features.
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Maximum flat work group size for given subtarget \p Features.
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Number of waves per work group for given subtarget \p Features and
+/// limited by given \p FlatWorkGroupSize.
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns SGPR allocation granularity for given subtarget \p Features.
+unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns SGPR encoding granularity for given subtarget \p Features.
+unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of SGPRs for given subtarget \p Features.
+unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of SGPRs for given subtarget \p Features.
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable);
+
+/// \returns VGPR allocation granularity for given subtarget \p Features.
+unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns VGPR encoding granularity for given subtarget \p Features.
+unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of VGPRs for given subtarget \p Features.
+unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of VGPRs for given subtarget \p Features.
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+} // end namespace IsaInfo
+
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
MCSection *getHSATextSection(MCContext &Ctx);
@@ -53,9 +157,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
-bool isGroupSegment(const GlobalValue *GV);
-bool isGlobalSegment(const GlobalValue *GV);
-bool isReadOnlySegment(const GlobalValue *GV);
+bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS);
+bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS);
/// \returns True if constants should be emitted to .text section for given
/// target triple \p TT, false otherwise.
@@ -83,64 +187,89 @@ std::pair<int, int> getIntegerPairAttribute(const Function &F,
std::pair<int, int> Default,
bool OnlyFirstRequired = false);
-/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(IsaVersion Version);
-
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(IsaVersion Version);
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(IsaVersion Version);
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(IsaVersion Version);
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
/// \p Lgkmcnt respectively.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-/// \p Vmcnt = \p Waitcnt[3:0]
+/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
+/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt);
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
/// \p Version.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-/// Waitcnt[3:0] = \p Vmcnt
-/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
+/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
+/// Waitcnt[6:4] = \p Expcnt
+/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F);
-bool isShader(CallingConv::ID cc);
-bool isCompute(CallingConv::ID cc);
+LLVM_READNONE
+bool isShader(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isCompute(CallingConv::ID CC);
+
+LLVM_READNONE
+bool isEntryFunctionCC(CallingConv::ID CC);
+
+// FIXME: Remove this when calling conventions cleaned up
+LLVM_READNONE
+inline bool isKernel(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ default:
+ return false;
+ }
+}
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
@@ -150,6 +279,10 @@ bool isVI(const MCSubtargetInfo &STI);
/// \p STI otherwise return \p Reg.
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
+/// \brief Convert hardware register \p Reg to a pseudo register
+LLVM_READNONE
+unsigned mc2PseudoReg(unsigned Reg);
+
/// \brief Can this operand also contain immediate values?
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
@@ -188,6 +321,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
return 2;
default:
@@ -210,7 +345,21 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
LLVM_READNONE
bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+LLVM_READNONE
+bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
+
+bool isUniformMMO(const MachineMemOperand *MMO);
+
+/// \returns The encoding that will be used for \p ByteOffset in the SMRD
+/// offset field.
+int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
+/// \returns true if this offset is small enough to fit in the SMRD
+/// offset field. \p ByteOffset should be the offset in bytes and
+/// not the encoded offset.
+bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
+
} // end namespace AMDGPU
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
diff --git a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index c55eaab077d1..991408c81c92 100644
--- a/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -87,7 +87,7 @@ COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
COMPPGM2(user_sgpr_count, compute_pgm_rsrc2_user_sgpr, USER_SGPR),
-// TODO: enable_trap_handler
+COMPPGM2(enable_trap_handler, compute_pgm_rsrc2_trap_handler, TRAP_HANDLER),
COMPPGM2(enable_sgpr_workgroup_id_x, compute_pgm_rsrc2_tgid_x_en, TGID_X_EN),
COMPPGM2(enable_sgpr_workgroup_id_y, compute_pgm_rsrc2_tgid_y_en, TGID_Y_EN),
COMPPGM2(enable_sgpr_workgroup_id_z, compute_pgm_rsrc2_tgid_z_en, TGID_Z_EN),
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td
index 8cae83cd9d1a..1febc6bf8ec2 100644
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -23,18 +23,18 @@ class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
bits<8> vdst;
-
+
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = op;
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
let Inst{31-25} = 0x3f; // encoding
}
-class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
+class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1Only = 0> :
InstSI <P.Outs32, P.Ins32, "", pattern>,
VOP <opName>,
- SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
- MnemonicAlias<opName#"_e32", opName> {
+ SIMCInstr <!if(VOP1Only, opName, opName#"_e32"), SIEncodingFamily.NONE>,
+ MnemonicAlias<!if(VOP1Only, opName, opName#"_e32"), opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -75,6 +75,8 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -83,10 +85,17 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
}
class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
- list<dag> ret = !if(P.HasModifiers,
- [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
- i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
- [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
+ list<dag> ret =
+ !if(P.HasModifiers,
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+ i32:$src0_modifiers,
+ i1:$clamp, i32:$omod))))],
+ !if(P.HasOMod,
+ [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
+ i1:$clamp, i32:$omod))))],
+ [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]
+ )
+ );
}
multiclass VOP1Inst <string opName, VOPProfile P,
@@ -96,6 +105,23 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
}
+// Special profile for instructions which have clamp
+// and output modifiers (but have no input modifiers)
+class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
+ VOPProfile<[dstVt, srcVt, untyped, untyped]> {
+
+ let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
+ let Asm64 = "$vdst, $src0$clamp$omod";
+
+ let HasModifiers = 0;
+ let HasClamp = 1;
+ let HasOMod = 1;
+}
+
+def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
+def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
+def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;
+
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
@@ -142,24 +168,24 @@ def V_READFIRSTLANE_B32 :
let SchedRW = [WriteQuarterRate32] in {
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
-defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
-defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
-defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
+defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
+defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
+defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
-defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
-defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
+defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
-defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
-defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
-defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
-defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
-defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
+defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
+defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
+defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
+defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
-defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
} // End SchedRW = [WriteQuarterRate32]
defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
@@ -237,7 +263,7 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
src0_sel:$src0_sel);
let Asm32 = getAsm32<1, 1>.ret;
- let Asm64 = getAsm64<1, 1, 0>.ret;
+ let Asm64 = getAsm64<1, 1, 0, 1>.ret;
let AsmDPP = getAsmDPP<1, 1, 0>.ret;
let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
@@ -258,11 +284,14 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
} // End Uses = [M0, EXEC]
+let SchedRW = [WriteQuarterRate32] in {
+defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
+}
+
// These instruction only exist on SI and CI
let SubtargetPredicate = isSICI in {
let SchedRW = [WriteQuarterRate32] in {
-defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
@@ -297,8 +326,8 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
let SubtargetPredicate = isVI in {
-defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>;
-defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>;
+defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
+defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
@@ -326,12 +355,31 @@ def : Pat<
>;
def : Pat<
- (i16 (fp_to_f16 f32:$src)),
+ (i16 (AMDGPUfp_to_f16 f32:$src)),
(V_CVT_F16_F32_e32 $src)
>;
}
+def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+ let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1);
+ let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1);
+ let Outs64 = Outs32;
+ let Asm32 = " $vdst, $src0";
+ let Asm64 = "";
+ let Ins64 = (ins);
+}
+
+let SubtargetPredicate = isGFX9 in {
+ let Constraints = "$vdst = $src1, $vdst1 = $src0",
+ DisableEncoding="$vdst1,$src1",
+ SchedRW = [Write64Bit, Write64Bit] in {
+// Never VOP3. Takes as long as 2 v_mov_b32s
+def V_SWAP_B32 : VOP1_Pseudo <"v_swap_b32", VOP_SWAP_I32, [], 1>;
+}
+
+} // End SubtargetPredicate = isGFX9
+
//===----------------------------------------------------------------------===//
// Target
//===----------------------------------------------------------------------===//
@@ -453,6 +501,14 @@ class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
let Inst{31-25} = 0x3f; //encoding
}
+multiclass VOP1Only_Real_vi <bits<10> op> {
+ let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+ def _vi :
+ VOP1_Real<!cast<VOP1_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME).Pfl>;
+ }
+}
+
multiclass VOP1_Real_vi <bits<10> op> {
let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
def _e32_vi :
@@ -480,6 +536,7 @@ defm V_CVT_F32_I32 : VOP1_Real_vi <0x5>;
defm V_CVT_F32_U32 : VOP1_Real_vi <0x6>;
defm V_CVT_U32_F32 : VOP1_Real_vi <0x7>;
defm V_CVT_I32_F32 : VOP1_Real_vi <0x8>;
+defm V_MOV_FED_B32 : VOP1_Real_vi <0x9>;
defm V_CVT_F16_F32 : VOP1_Real_vi <0xa>;
defm V_CVT_F32_F16 : VOP1_Real_vi <0xb>;
defm V_CVT_RPI_I32_F32 : VOP1_Real_vi <0xc>;
@@ -547,7 +604,7 @@ defm V_RNDNE_F16 : VOP1_Real_vi <0x47>;
defm V_FRACT_F16 : VOP1_Real_vi <0x48>;
defm V_SIN_F16 : VOP1_Real_vi <0x49>;
defm V_COS_F16 : VOP1_Real_vi <0x4a>;
-
+defm V_SWAP_B32 : VOP1Only_Real_vi <0x51>;
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
// indexing mode. vdst can't be treated as a def for codegen purposes,
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index 00e5ab3db0b7..2281f338ab45 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -40,7 +40,7 @@ class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
bits<8> vdst;
bits<8> src1;
-
+
let Inst{8-0} = 0xf9; // sdwa
let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0);
let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
@@ -93,6 +93,8 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -119,8 +121,7 @@ multiclass VOP2Inst <string opName,
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
- Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
}
// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst
@@ -134,10 +135,10 @@ multiclass VOP2bInst <string opName,
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
-
- def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
- Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
+
+ def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
}
+
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
}
@@ -154,6 +155,7 @@ multiclass VOP2eInst <string opName,
def _e32 : VOP2_Pseudo <opName, P>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
}
+
def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
}
@@ -179,10 +181,12 @@ class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MADMK_F16 : VOP_MADMK <f16>;
def VOP_MADMK_F32 : VOP_MADMK <f32>;
+// FIXME: Remove src2_modifiers. It isn't used, so is wasting memory
+// and processing time but it makes it easier to convert to mad.
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+ HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
VGPR_32:$src2, // stub argument
@@ -194,6 +198,7 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
src0_sel:$src0_sel, src1_sel:$src1_sel);
let Asm32 = getAsm32<1, 2, vt>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret;
let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret;
let HasSrc2 = 0;
@@ -204,13 +209,13 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
def VOP_MAC_F16 : VOP_MAC <f16> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, f16>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f16>.ret;
}
def VOP_MAC_F32 : VOP_MAC <f32> {
// FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
// 'not a string initializer' error.
- let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
+ let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, f32>.ret;
}
// Write out to vcc or arbitrary SGPR.
@@ -280,7 +285,7 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
let Outs32 = (outs VGPR_32:$vdst);
let Outs64 = Outs32;
- let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1);
+ let Ins32 = (ins SCSrc_b32:$src0, SCSrc_b32:$src1);
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
@@ -354,7 +359,7 @@ defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst"
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>;
-defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, AMDGPUpkrtz_f16_f32>;
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>;
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>;
@@ -494,6 +499,14 @@ def : Pat <
(V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
>;
+// Undo sub x, c -> add x, -c canonicalization since c is more likely
+// an inline immediate than -c.
+// TODO: Also do for 64-bit.
+def : Pat<
+ (add i16:$src0, (i16 NegSubInlineConst16:$src1)),
+ (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1)
+>;
+
} // End Predicates = [isVI]
//===----------------------------------------------------------------------===//
@@ -566,7 +579,10 @@ defm V_SUBB_U32 : VOP2be_Real_e32e64_si <0x29>;
defm V_SUBBREV_U32 : VOP2be_Real_e32e64_si <0x2a>;
defm V_READLANE_B32 : VOP2_Real_si <0x01>;
+
+let InOperandList = (ins SSrc_b32:$src0, SCSrc_b32:$src1) in {
defm V_WRITELANE_B32 : VOP2_Real_si <0x02>;
+}
defm V_MAC_LEGACY_F32 : VOP2_Real_e32e64_si <0x6>;
defm V_MIN_LEGACY_F32 : VOP2_Real_e32e64_si <0xd>;
@@ -646,7 +662,7 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
-
+
multiclass VOP2_SDWA_Real <bits<6> op> {
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index c2a4d4ba99b1..217a07488853 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -29,6 +29,26 @@ class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
ret1));
}
+class getVOP3PModPat<VOPProfile P, SDPatternOperator node> {
+ list<dag> ret3 = [(set P.DstVT:$vdst,
+ (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp),
+ (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)),
+ (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+ list<dag> ret2 = [(set P.DstVT:$vdst,
+ (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)),
+ (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))),
+ (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+ list<dag> ret1 = [(set P.DstVT:$vdst,
+ (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))];
+
+ list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+ !if(!eq(P.NumSrcArgs, 2), ret2,
+ ret1));
+}
+
class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
@@ -86,6 +106,14 @@ def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
let DstRC = RegisterOperand<VReg_64>;
}
+def VOP3b_I64_I1_I32_I32_I64 : VOPProfile<[i64, i32, i32, i64]> {
+ // FIXME: Hack to stop printing _e64
+ let DstRC = RegisterOperand<VReg_64>;
+
+ let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+ let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
+}
+
//===----------------------------------------------------------------------===//
// VOP3 Instructions
//===----------------------------------------------------------------------===//
@@ -209,10 +237,8 @@ def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I3
def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
let isCommutable = 1 in {
-def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3_Profile<VOP_I64_I32_I32_I64>>;
-
-// XXX - Does this set VCC?
-def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3_Profile<VOP_I64_I32_I32_I64>>;
+def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>;
+def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
} // End isCommutable = 1
} // End SubtargetPredicate = isCIVI
@@ -234,12 +260,14 @@ def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
} // End isCommutable = 1
+def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+
} // End SubtargetPredicate = isVI
let Predicates = [isVI] in {
-multiclass Tenary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst, SDPatternOperator op3> {
+multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
+ Instruction inst, SDPatternOperator op3> {
def : Pat<
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst i16:$src0, i16:$src1, i16:$src2)
@@ -258,11 +286,26 @@ def : Pat<
>;
}
-defm: Tenary_i16_Pats<mul, add, V_MAD_U16, zext>;
-defm: Tenary_i16_Pats<mul, add, V_MAD_I16, sext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_U16, zext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_I16, sext>;
} // End Predicates = [isVI]
+let SubtargetPredicate = isGFX9 in {
+def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16>>;
+def V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+
+def V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUfmed3>;
+def V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUsmed3>;
+def V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16>, AMDGPUumed3>;
+}
+
//===----------------------------------------------------------------------===//
// Target
@@ -351,11 +394,19 @@ multiclass VOP3_Real_ci<bits<9> op> {
}
}
+multiclass VOP3be_Real_ci<bits<9> op> {
+ def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+ VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
+ let AssemblerPredicates = [isCIOnly];
+ let DecoderNamespace = "CI";
+ }
+}
+
defm V_MQSAD_U16_U8 : VOP3_Real_ci <0x172>;
defm V_QSAD_PK_U16_U8 : VOP3_Real_ci <0x172>;
-defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x174>;
-defm V_MAD_U64_U32 : VOP3_Real_ci <0x176>;
-defm V_MAD_I64_I32 : VOP3_Real_ci <0x177>;
+defm V_MQSAD_U32_U8 : VOP3_Real_ci <0x175>;
+defm V_MAD_U64_U32 : VOP3be_Real_ci <0x176>;
+defm V_MAD_I64_I32 : VOP3be_Real_ci <0x177>;
//===----------------------------------------------------------------------===//
// VI
@@ -376,8 +427,8 @@ multiclass VOP3be_Real_vi<bits<10> op> {
} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
defm V_MQSAD_U16_U8 : VOP3_Real_vi <0x172>;
-defm V_MAD_U64_U32 : VOP3_Real_vi <0x176>;
-defm V_MAD_I64_I32 : VOP3_Real_vi <0x177>;
+defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
+defm V_MAD_I64_I32 : VOP3be_Real_vi <0x1E9>;
defm V_MAD_LEGACY_F32 : VOP3_Real_vi <0x1c0>;
defm V_MAD_F32 : VOP3_Real_vi <0x1c1>;
@@ -424,6 +475,8 @@ defm V_MAD_F16 : VOP3_Real_vi <0x1ea>;
defm V_MAD_U16 : VOP3_Real_vi <0x1eb>;
defm V_MAD_I16 : VOP3_Real_vi <0x1ec>;
+defm V_PERM_B32 : VOP3_Real_vi <0x1ed>;
+
defm V_FMA_F16 : VOP3_Real_vi <0x1ee>;
defm V_DIV_FIXUP_F16 : VOP3_Real_vi <0x1ef>;
@@ -449,3 +502,16 @@ defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>;
defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>;
defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>;
defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>;
+
+defm V_LSHL_ADD_U32 : VOP3_Real_vi <0x1fd>;
+defm V_ADD_LSHL_U32 : VOP3_Real_vi <0x1fe>;
+defm V_ADD3_U32 : VOP3_Real_vi <0x1ff>;
+defm V_LSHL_OR_B32 : VOP3_Real_vi <0x200>;
+defm V_AND_OR_B32 : VOP3_Real_vi <0x201>;
+defm V_OR3_B32 : VOP3_Real_vi <0x202>;
+defm V_PACK_B32_F16 : VOP3_Real_vi <0x2a0>;
+
+defm V_XAD_U32 : VOP3_Real_vi <0x1f3>;
+defm V_MED3_F16 : VOP3_Real_vi <0x1fa>;
+defm V_MED3_I16 : VOP3_Real_vi <0x1fb>;
+defm V_MED3_U16 : VOP3_Real_vi <0x1fc>;
diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td
new file mode 100644
index 000000000000..96d343099132
--- /dev/null
+++ b/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -0,0 +1,82 @@
+//===-- VOP3PInstructions.td - Vector Instruction Defintions --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP3P Classes
+//===----------------------------------------------------------------------===//
+
+class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+ VOP3P_Pseudo<OpName, P,
+ !if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
+>;
+
+// Non-packed instructions that use the VOP3P encoding. i.e. where
+// omod/abs are used.
+class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
+ VOP3P_Pseudo<OpName, P,
+ !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret)
+>;
+
+let isCommutable = 1 in {
+def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>;
+def V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fadd>;
+def V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmul>;
+def V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum>;
+def V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16>, fminnum>;
+
+def V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, add>;
+def V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
+def V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
+def V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, mul>;
+
+def V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smin>;
+def V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umin>;
+def V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
+def V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
+}
+
+def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshl_rev>;
+def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
+def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
+
+// XXX - Commutable?
+def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+
+
+multiclass VOP3P_Real_vi<bits<10> op> {
+ def _vi : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.VI>,
+ VOP3Pe <op, !cast<VOP3P_Pseudo>(NAME).Pfl> {
+ let AssemblerPredicates = [HasVOP3PInsts];
+ let DecoderNamespace = "VI";
+ }
+}
+
+defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>;
+defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>;
+defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>;
+defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>;
+defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>;
+defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>;
+defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>;
+defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>;
+
+defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>;
+defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>;
+defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>;
+defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>;
+defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>;
+defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>;
+defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>;
+defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>;
+
+defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>;
+defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>;
+defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>;
diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td
index 16a456da3c67..a3550a63677b 100644
--- a/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/lib/Target/AMDGPU/VOPCInstructions.td
@@ -93,6 +93,8 @@ class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
@@ -165,13 +167,11 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
- def _sdwa : VOPC_SDWA_Pseudo <opName, P>,
- Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> {
+ def _sdwa : VOPC_SDWA_Pseudo <opName, P> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = P.Schedule;
let isConvergent = DefExec;
let isCompare = 1;
- let isCommutable = 1;
}
}
@@ -563,7 +563,7 @@ multiclass VOPC_CLASS_F16 <string opName> :
VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 0>;
multiclass VOPCX_CLASS_F16 <string opName> :
- VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 1>;
+ VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 1>;
multiclass VOPC_CLASS_F32 <string opName> :
VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td
index 5f72f97d9e28..69906c419db3 100644
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -68,8 +68,9 @@ class VOP3Common <dag outs, dag ins, string asm = "",
let hasPostISelHook = 1;
}
-class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
- InstSI <P.Outs64, P.Ins64, "", pattern>,
+class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
+ bit VOP3Only = 0, bit isVOP3P = 0> :
+ InstSI <P.Outs64, !if(!and(isVOP3P, P.IsPacked), P.InsVOP3P, P.Ins64), "", pattern>,
VOP <opName>,
SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
MnemonicAlias<opName#"_e64", opName> {
@@ -79,7 +80,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
let UseNamedOperandTable = 1;
string Mnemonic = opName;
- string AsmOperands = P.Asm64;
+ string AsmOperands = !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64);
let Size = 8;
let mayLoad = 0;
@@ -100,23 +101,34 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3On
let VOP3 = 1;
let VALU = 1;
+ let FPClamp = P.HasFPClamp;
let Uses = [EXEC];
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
!if(!eq(VOP3Only,1),
- "cvtVOP3",
- !if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
+ !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
+ !if(!eq(P.HasModifiers, 1),
+ "cvtVOP3_2_mod",
+ !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "")
+ )
+ );
VOPProfile Pfl = P;
}
+class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
+ VOP3_Pseudo<opName, P, pattern, 1, 1> {
+ let VOP3P = 1;
+}
+
class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
SIMCInstr <ps.PseudoInstr, EncodingFamily> {
let isPseudo = 0;
let isCodeGenOnly = 0;
+ let UseNamedOperandTable = 1;
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
@@ -128,8 +140,15 @@ class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
let Constraints = ps.Constraints;
let DisableEncoding = ps.DisableEncoding;
let TSFlags = ps.TSFlags;
+ let UseNamedOperandTable = ps.UseNamedOperandTable;
+ let Uses = ps.Uses;
}
+// XXX - Is there any reason to distingusih this from regular VOP3
+// here?
+class VOP3P_Real<VOP3P_Pseudo ps, int EncodingFamily> :
+ VOP3_Real<ps, EncodingFamily>;
+
class VOP3a<VOPProfile P> : Enc64 {
bits<2> src0_modifiers;
bits<9> src0;
@@ -197,6 +216,42 @@ class VOP3be <VOPProfile P> : Enc64 {
let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
}
+class VOP3Pe <bits<10> op, VOPProfile P> : Enc64 {
+ bits<8> vdst;
+ // neg, neg_hi, op_sel put in srcN_modifiers
+ bits<4> src0_modifiers;
+ bits<9> src0;
+ bits<4> src1_modifiers;
+ bits<9> src1;
+ bits<4> src2_modifiers;
+ bits<9> src2;
+ bits<1> clamp;
+
+ let Inst{7-0} = vdst;
+ let Inst{8} = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // neg_hi src0
+ let Inst{9} = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // neg_hi src1
+ let Inst{10} = !if(P.HasSrc2Mods, src2_modifiers{1}, 0); // neg_hi src2
+
+ let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
+ let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
+ let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
+
+ let Inst{14} = !if(P.HasOpSel, src2_modifiers{3}, 0); // op_sel_hi(2)
+
+ let Inst{15} = !if(P.HasClamp, clamp{0}, 0);
+
+ let Inst{25-16} = op;
+ let Inst{31-26} = 0x34; //encoding
+ let Inst{40-32} = !if(P.HasSrc0, src0, 0);
+ let Inst{49-41} = !if(P.HasSrc1, src1, 0);
+ let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+ let Inst{59} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel_hi(0)
+ let Inst{60} = !if(P.HasOpSel, src1_modifiers{3}, 0); // op_sel_hi(1)
+ let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo)
+ let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo)
+ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo)
+}
+
class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
let Inst{25-17} = op;
}
@@ -250,7 +305,7 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
VOP <opName>,
SIMCInstr <opName#"_sdwa", SIEncodingFamily.NONE>,
MnemonicAlias <opName#"_sdwa", opName> {
-
+
let isPseudo = 1;
let isCodeGenOnly = 1;
let UseNamedOperandTable = 1;
@@ -261,14 +316,14 @@ class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
let Size = 8;
let mayLoad = 0;
let mayStore = 0;
- let hasSideEffects = 0;
+ let hasSideEffects = 0;
let VALU = 1;
let SDWA = 1;
let Uses = [EXEC];
-
- let SubtargetPredicate = isVI;
- let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+
+ let SubtargetPredicate = !if(P.HasExt, HasSDWA, DisableInst);
+ let AssemblerPredicate = !if(P.HasExt, HasSDWA, DisableInst);
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
AMDGPUAsmVariants.Disable);
let DecoderNamespace = "SDWA";
@@ -337,8 +392,8 @@ class VOP_DPP <string OpName, VOPProfile P> :
let Size = 8;
let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
- let SubtargetPredicate = isVI;
- let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+ let SubtargetPredicate = HasDPP;
+ let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
AMDGPUAsmVariants.Disable);
let DecoderNamespace = "DPP";
@@ -348,3 +403,4 @@ include "VOPCInstructions.td"
include "VOP1Instructions.td"
include "VOP2Instructions.td"
include "VOP3Instructions.td"
+include "VOP3PInstructions.td"
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 89859ba063d9..8640c873f441 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -427,13 +427,11 @@ unsigned A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
unsigned Lane, bool QPR) {
unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
&ARM::DPRRegClass);
- AddDefaultPred(BuildMI(MBB,
- InsertBefore,
- DL,
- TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
- Out)
- .addReg(Reg)
- .addImm(Lane));
+ BuildMI(MBB, InsertBefore, DL,
+ TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), Out)
+ .addReg(Reg)
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
return Out;
}
@@ -476,13 +474,11 @@ unsigned A15SDOptimizer::createVExt(MachineBasicBlock &MBB,
const DebugLoc &DL, unsigned Ssub0,
unsigned Ssub1) {
unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
- AddDefaultPred(BuildMI(MBB,
- InsertBefore,
- DL,
- TII->get(ARM::VEXTd32), Out)
- .addReg(Ssub0)
- .addReg(Ssub1)
- .addImm(1));
+ BuildMI(MBB, InsertBefore, DL, TII->get(ARM::VEXTd32), Out)
+ .addReg(Ssub0)
+ .addReg(Ssub1)
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
return Out;
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index be3048252bbc..39f7988200ea 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,21 +16,21 @@
#define LLVM_LIB_TARGET_ARM_ARM_H
#include "llvm/Support/CodeGen.h"
-#include "ARMBasicBlockInfo.h"
#include <functional>
+#include <vector>
namespace llvm {
class ARMAsmPrinter;
class ARMBaseTargetMachine;
+struct BasicBlockInfo;
class Function;
class FunctionPass;
-class ImmutablePass;
+class MachineBasicBlock;
+class MachineFunction;
class MachineInstr;
class MCInst;
class PassRegistry;
-class TargetLowering;
-class TargetMachine;
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -53,7 +53,8 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+void initializeARMConstantIslandsPass(PassRegistry &);
-} // end namespace llvm;
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARM_H
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 2a090faeee6a..57f9d1c6b610 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -72,8 +72,6 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
def FeatureHWDivARM : SubtargetFeature<"hwdiv-arm",
"HasHardwareDivideInARM", "true",
"Enable divide instructions in ARM mode">;
-def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
- "Enable Thumb2 extract and pack instructions">;
def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
"Has data barrier (dmb / dsb) instructions">;
def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true",
@@ -263,6 +261,12 @@ def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
"Don't use movt/movw pairs for 32-bit "
"imms">;
+def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
+ "NegativeImmediates", "false",
+ "Convert immediates and instructions "
+ "to their negated or complemented "
+ "equivalent when the immediate does "
+ "not fit in the encoding.">;
//===----------------------------------------------------------------------===//
// ARM ISAa.
@@ -297,8 +301,7 @@ def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true",
FeatureV7Clrex]>;
def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
- [HasV7Ops, FeatureAcquireRelease,
- FeatureT2XtPk]>;
+ [HasV7Ops, FeatureAcquireRelease]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
[HasV8Ops]>;
@@ -342,7 +345,9 @@ def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", []>;
def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
- "Qualcomm ARM processors", []>;
+ "Qualcomm Krait processors", []>;
+def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
+ "Qualcomm Kryo processors", []>;
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors", []>;
@@ -393,8 +398,7 @@ def ARMv5tej : Architecture<"armv5tej", "ARMv5tej", [HasV5TEOps]>;
def ARMv6 : Architecture<"armv6", "ARMv6", [HasV6Ops]>;
def ARMv6t2 : Architecture<"armv6t2", "ARMv6t2", [HasV6T2Ops,
- FeatureDSP,
- FeatureT2XtPk]>;
+ FeatureDSP]>;
def ARMv6k : Architecture<"armv6k", "ARMv6k", [HasV6KOps]>;
@@ -415,15 +419,22 @@ def ARMv7a : Architecture<"armv7-a", "ARMv7a", [HasV7Ops,
FeatureNEON,
FeatureDB,
FeatureDSP,
- FeatureAClass,
- FeatureT2XtPk]>;
+ FeatureAClass]>;
+
+def ARMv7ve : Architecture<"armv7ve", "ARMv7ve", [HasV7Ops,
+ FeatureNEON,
+ FeatureDB,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureAClass]>;
def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops,
FeatureDB,
FeatureDSP,
FeatureHWDiv,
- FeatureRClass,
- FeatureT2XtPk]>;
+ FeatureRClass]>;
def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops,
FeatureThumb2,
@@ -438,8 +449,7 @@ def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops,
FeatureDB,
FeatureHWDiv,
FeatureMClass,
- FeatureDSP,
- FeatureT2XtPk]>;
+ FeatureDSP]>;
def ARMv8a : Architecture<"armv8-a", "ARMv8a", [HasV8Ops,
FeatureAClass,
@@ -481,9 +491,6 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps,
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
- FeatureHWDiv,
- FeatureHWDivARM,
- FeatureT2XtPk,
FeatureDSP,
FeatureCRC,
FeatureMP,
@@ -603,8 +610,6 @@ def : ProcessorModel<"cortex-a7", CortexA8Model, [ARMv7a, ProcA7,
FeatureVMLxForwarding,
FeatureMP,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureVirtualization]>;
def : ProcessorModel<"cortex-a8", CortexA8Model, [ARMv7a, ProcA8,
@@ -636,8 +641,6 @@ def : ProcessorModel<"cortex-a12", CortexA9Model, [ARMv7a, ProcA12,
FeatureTrustZone,
FeatureVMLxForwarding,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization,
FeatureMP]>;
@@ -651,8 +654,6 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, [ARMv7a, ProcA15,
FeatureVFP4,
FeatureMP,
FeatureCheckVLDnAlign,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
@@ -663,8 +664,6 @@ def : ProcessorModel<"cortex-a17", CortexA9Model, [ARMv7a, ProcA17,
FeatureMP,
FeatureVMLxForwarding,
FeatureVFP4,
- FeatureHWDiv,
- FeatureHWDivARM,
FeatureAvoidPartialCPSR,
FeatureVirtualization]>;
@@ -759,6 +758,15 @@ def : ProcNoItin<"cortex-m7", [ARMv7em,
FeatureFPARMv8,
FeatureD16]>;
+def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
+ FeatureNoMovt]>;
+
+def : ProcNoItin<"cortex-m33", [ARMv8mMainline,
+ FeatureDSP,
+ FeatureFPARMv8,
+ FeatureD16,
+ FeatureVFPOnlySP]>;
+
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDiv,
FeatureHWDivARM,
@@ -829,6 +837,12 @@ def : ProcNoItin<"exynos-m3", [ARMv8a, ProcExynosM1,
FeatureCrypto,
FeatureCRC]>;
+def : ProcNoItin<"kryo", [ARMv8a, ProcKryo,
+ FeatureHWDiv,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC]>;
+
def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
FeatureFPAO]>;
@@ -838,6 +852,8 @@ def : ProcessorModel<"cortex-r52", CortexR52Model, [ARMv8r, ProcR52,
include "ARMRegisterInfo.td"
+include "ARMRegisterBanks.td"
+
include "ARMCallingConv.td"
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 95db35ce8ffb..eb0d410b596b 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -844,7 +844,7 @@ void ARMAsmPrinter::emitAttributes() {
ARMBuildAttrs::Allowed);
else
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model,
- ARMBuildAttrs::AllowIEE754);
+ ARMBuildAttrs::AllowIEEE754);
if (STI.allowsUnalignedMem())
ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access,
@@ -1142,6 +1142,11 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
const MachineOperand &MO1 = MI->getOperand(1);
unsigned JTI = MO1.getIndex();
+ // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
+ // ARM mode tables.
+ EmitAlignment(2);
+
+ // Emit a label for the jump table.
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
OutStreamer->EmitLabel(JTISymbol);
@@ -1255,7 +1260,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
switch (Opc) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::tPUSH:
// Special case here: no src & dst reg, but two extra imp ops.
@@ -1291,7 +1296,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
int64_t Offset = 0;
switch (Opc) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
case ARM::MOVr:
case ARM::tMOVr:
@@ -1346,11 +1351,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
}
} else if (DstReg == ARM::SP) {
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
else {
- MI->dump();
+ MI->print(errs());
llvm_unreachable("Unsupported opcode for unwinding information");
}
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 70a3246e34f1..4f5711ca9a79 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -11,34 +11,58 @@
//
//===----------------------------------------------------------------------===//
-#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMFeatures.h"
#include "ARMHazardRecognizer.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -168,9 +192,8 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
} else if (Amt != 0) {
ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
@@ -180,17 +203,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
.addReg(OffReg)
.addReg(0)
.addImm(SOOpc)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
} else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
break;
}
case ARMII::AddrMode3 : {
@@ -202,17 +223,15 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg)
.addImm(Amt)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
else
UpdateMI = BuildMI(MF, MI.getDebugLoc(),
get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
.addReg(BaseReg)
.addReg(OffReg)
- .addImm(Pred)
- .addReg(0)
- .addReg(0);
+ .add(predOps(Pred))
+ .add(condCodeOp());
break;
}
}
@@ -306,7 +325,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Walk backwards from the end of the basic block until the branch is
// analyzed or we give up.
while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {
-
// Flag to be raised on unanalyzeable instructions. This is useful in cases
// where we want to clean up on the end of the basic block before we bail
// out.
@@ -381,7 +399,6 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return false;
}
-
unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
assert(!BytesRemoved && "code size not handled");
@@ -433,20 +450,24 @@ unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
if (!FBB) {
if (Cond.empty()) { // Unconditional branch?
if (isThumb)
- BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
} else
- BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
- .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
- .addImm(Cond[0].getImm()).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(BccOpc))
+ .addMBB(TBB)
+ .addImm(Cond[0].getImm())
+ .add(Cond[1]);
if (isThumb)
- BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));
else
BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
@@ -576,7 +597,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) {
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
-bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
+bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
if (!MI.isPredicable())
return false;
@@ -586,7 +607,7 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
if (!isEligibleForITBlock(&MI))
return false;
- ARMFunctionInfo *AFI =
+ const ARMFunctionInfo *AFI =
MI.getParent()->getParent()->getInfo<ARMFunctionInfo>();
if (AFI->isThumb2Function()) {
@@ -601,7 +622,8 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
}
namespace llvm {
-template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
+
+template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
@@ -614,7 +636,8 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
// all definitions of CPSR are dead
return true;
}
-}
+
+} // end namespace llvm
/// GetInstSize - Return the size of the specified MachineInstr.
///
@@ -698,9 +721,8 @@ void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,
if (Subtarget.isMClass())
MIB.addImm(0x800);
- AddDefaultPred(MIB);
-
- MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
+ MIB.add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));
}
void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
@@ -718,11 +740,9 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
else
MIB.addImm(8);
- MIB.addReg(SrcReg, getKillRegState(KillSrc));
-
- AddDefaultPred(MIB);
-
- MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Implicit | RegState::Define);
}
void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -733,8 +753,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
if (GPRDest && GPRSrc) {
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))));
+ BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
return;
}
@@ -758,7 +780,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::VORRq)
MIB.addReg(SrcReg, getKillRegState(KillSrc));
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
return;
}
@@ -845,10 +867,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// VORR takes two source operands.
if (Opc == ARM::VORRq)
Mov.addReg(Src);
- Mov = AddDefaultPred(Mov);
+ Mov = Mov.add(predOps(ARMCC::AL));
// MOVr can set CC.
if (Opc == ARM::MOVr)
- Mov = AddDefaultCC(Mov);
+ Mov = Mov.add(condCodeOp());
}
// Add implicit super-register defs and kills to the last instruction.
Mov->addRegisterDefined(DestReg, TRI);
@@ -886,35 +908,44 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (RC->getSize()) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTRD))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
if (Subtarget.hasV5TEOps()) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
- MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
-
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
// Fallback to STM instruction, which has existed since the dawn of
// time.
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
- .addFrameIndex(FI).addMemOperand(MMO));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
}
@@ -925,15 +956,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
@@ -942,15 +976,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
// Use aligned spills if the stack can be realigned.
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -963,15 +999,17 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
- .addFrameIndex(FI).addImm(16)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -982,10 +1020,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -1068,19 +1106,28 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
switch (RC->getSize()) {
case 4:
if (ARM::GPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else
llvm_unreachable("Unknown reg class!");
break;
case 8:
if (ARM::DPRRegClass.hasSubClassEq(RC)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB;
@@ -1088,14 +1135,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
- MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
-
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
// Fallback to LDM instruction, which has existed since the dawn of
// time.
- MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
- .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
}
@@ -1108,13 +1156,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 16:
if (ARM::DPairRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
} else
llvm_unreachable("Unknown reg class!");
@@ -1122,14 +1173,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 24:
if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI)
- .addMemOperand(MMO));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1142,14 +1195,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
case 32:
if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
- .addFrameIndex(FI).addImm(16)
- .addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
} else {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1162,10 +1217,10 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
break;
case 64:
if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
@@ -1248,7 +1303,7 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
: isThumb1 ? ARM::tLDMIA_UPD
: ARM::LDMIA_UPD))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(1));
} else {
LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
}
@@ -1257,17 +1312,17 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
: isThumb1 ? ARM::tSTMIA_UPD
: ARM::STMIA_UPD))
- .addOperand(MI->getOperand(0));
+ .add(MI->getOperand(0));
} else {
STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
}
- AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
- AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+ LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL));
+ STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL));
// Sort the scratch registers into ascending order.
const TargetRegisterInfo &TRI = getRegisterInfo();
- llvm::SmallVector<unsigned, 6> ScratchRegs;
+ SmallVector<unsigned, 6> ScratchRegs;
for(unsigned I = 5; I < MI->getNumOperands(); ++I)
ScratchRegs.push_back(MI->getOperand(I).getReg());
std::sort(ScratchRegs.begin(), ScratchRegs.end(),
@@ -1285,7 +1340,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
BB->erase(MI);
}
-
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
@@ -1346,7 +1400,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(ARM::VMOVD));
MI.getOperand(0).setReg(DstRegD);
MI.getOperand(1).setReg(SrcRegD);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// We are now reading SrcRegD instead of SrcRegS. This may upset the
// register scavenger and machine verifier, so we need to indicate that we
@@ -1735,25 +1789,17 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
}
}
}
-
- // Attempt to estimate the relative costs of predication versus branching.
- // Here we scale up each component of UnpredCost to avoid precision issue when
- // scaling NumCycles by Probability.
- const unsigned ScalingUpFactor = 1024;
- unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
- UnpredCost += ScalingUpFactor; // The branch itself
- UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
-
- return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
+ return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,
+ MBB, 0, 0, Probability);
}
bool ARMBaseInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB,
+isProfitableToIfCvt(MachineBasicBlock &,
unsigned TCycles, unsigned TExtra,
- MachineBasicBlock &FMBB,
+ MachineBasicBlock &,
unsigned FCycles, unsigned FExtra,
BranchProbability Probability) const {
- if (!TCycles || !FCycles)
+ if (!TCycles)
return false;
// Attempt to estimate the relative costs of predication versus branching.
@@ -1793,7 +1839,6 @@ ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,
return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
}
-
unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
if (Opc == ARM::B)
return ARM::Bcc;
@@ -1920,25 +1965,25 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands();
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
- NewMI.addOperand(DefMI->getOperand(i));
+ NewMI.add(DefMI->getOperand(i));
unsigned CondCode = MI.getOperand(3).getImm();
if (Invert)
NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
else
NewMI.addImm(CondCode);
- NewMI.addOperand(MI.getOperand(4));
+ NewMI.add(MI.getOperand(4));
// DefMI is not the -S version that sets CPSR, so add an optional %noreg.
if (NewMI->hasOptionalDef())
- AddDefaultCC(NewMI);
+ NewMI.add(condCodeOp());
// The output register value when the predicate is false is an implicit
// register operand tied to the first def.
// The tie makes the register allocator ensure the FalseReg is allocated the
// same register as operand 0.
FalseReg.setImplicit();
- NewMI.addOperand(FalseReg);
+ NewMI.add(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// Update SeenMIs set: register newly created MI and erase removed DefMI.
@@ -1983,6 +2028,16 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
{ARM::RSBSrsi, ARM::RSBrsi},
{ARM::RSBSrsr, ARM::RSBrsr},
+ {ARM::tADDSi3, ARM::tADDi3},
+ {ARM::tADDSi8, ARM::tADDi8},
+ {ARM::tADDSrr, ARM::tADDrr},
+ {ARM::tADCS, ARM::tADC},
+
+ {ARM::tSUBSi3, ARM::tSUBi3},
+ {ARM::tSUBSi8, ARM::tSUBi8},
+ {ARM::tSUBSrr, ARM::tSUBrr},
+ {ARM::tSBCS, ARM::tSBC},
+
{ARM::t2ADDSri, ARM::t2ADDri},
{ARM::t2ADDSrr, ARM::t2ADDrr},
{ARM::t2ADDSrs, ARM::t2ADDrs},
@@ -2011,9 +2066,10 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
unsigned MIFlags) {
if (NumBytes == 0 && DestReg != BaseReg) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)
- .addReg(BaseReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
return;
}
@@ -2033,9 +2089,11 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
// Build the new ADD / SUB.
unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
BaseReg = DestReg;
}
}
@@ -2154,7 +2212,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
// Add the complete list back in.
MachineInstrBuilder MIB(MF, &*MI);
for (int i = RegList.size() - 1; i >= 0; --i)
- MIB.addOperand(RegList[i]);
+ MIB.add(RegList[i]);
return true;
}
@@ -2213,33 +2271,30 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned NumBits = 0;
unsigned Scale = 1;
switch (AddrMode) {
- case ARMII::AddrMode_i12: {
+ case ARMII::AddrMode_i12:
ImmIdx = FrameRegIdx + 1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = 12;
break;
- }
- case ARMII::AddrMode2: {
+ case ARMII::AddrMode2:
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 12;
break;
- }
- case ARMII::AddrMode3: {
+ case ARMII::AddrMode3:
ImmIdx = FrameRegIdx+2;
InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs *= -1;
NumBits = 8;
break;
- }
case ARMII::AddrMode4:
case ARMII::AddrMode6:
// Can't fold any offset even if it's zero.
return false;
- case ARMII::AddrMode5: {
+ case ARMII::AddrMode5:
ImmIdx = FrameRegIdx+1;
InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
@@ -2247,7 +2302,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
NumBits = 8;
Scale = 4;
break;
- }
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -2401,6 +2455,63 @@ inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
return false;
}
+static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
+ switch (MI->getOpcode()) {
+ default: return false;
+ case ARM::tLSLri:
+ case ARM::tLSRri:
+ case ARM::tLSLrr:
+ case ARM::tLSRrr:
+ case ARM::tSUBrr:
+ case ARM::tADDrr:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tMUL:
+ IsThumb1 = true;
+ LLVM_FALLTHROUGH;
+ case ARM::RSBrr:
+ case ARM::RSBri:
+ case ARM::RSCrr:
+ case ARM::RSCri:
+ case ARM::ADDrr:
+ case ARM::ADDri:
+ case ARM::ADCrr:
+ case ARM::ADCri:
+ case ARM::SUBrr:
+ case ARM::SUBri:
+ case ARM::SBCrr:
+ case ARM::SBCri:
+ case ARM::t2RSBri:
+ case ARM::t2ADDrr:
+ case ARM::t2ADDri:
+ case ARM::t2ADCrr:
+ case ARM::t2ADCri:
+ case ARM::t2SUBrr:
+ case ARM::t2SUBri:
+ case ARM::t2SBCrr:
+ case ARM::t2SBCri:
+ case ARM::ANDrr:
+ case ARM::ANDri:
+ case ARM::t2ANDrr:
+ case ARM::t2ANDri:
+ case ARM::ORRrr:
+ case ARM::ORRri:
+ case ARM::t2ORRrr:
+ case ARM::t2ORRri:
+ case ARM::EORrr:
+ case ARM::EORri:
+ case ARM::t2EORrr:
+ case ARM::t2EORri:
+ case ARM::t2LSRri:
+ case ARM::t2LSRrr:
+ case ARM::t2LSLri:
+ case ARM::t2LSLrr:
+ return true;
+ }
+}
+
/// optimizeCompareInstr - Convert the instruction supplying the argument to the
/// comparison into one that sets the zero bit in the flags register;
/// Remove a redundant Compare instruction if an earlier instruction can set the
@@ -2462,6 +2573,41 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
return false;
}
+ bool IsThumb1 = false;
+ if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))
+ return false;
+
+ // We also want to do this peephole for cases like this: if (a*b == 0),
+ // and optimise away the CMP instruction from the generated code sequence:
+ // MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values
+ // resulting from the select instruction, but these MOVS instructions for
+ // Thumb1 (V6M) are flag setting and are thus preventing this optimisation.
+ // However, if we only have MOVS instructions in between the CMP and the
+ // other instruction (the MULS in this example), then the CPSR is dead so we
+ // can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this
+ // reordering and then continue the analysis hoping we can eliminate the
+ // CMP. This peephole works on the vregs, so is still in SSA form. As a
+ // consequence, the movs won't redefine/kill the MUL operands which would
+ // make this reordering illegal.
+ if (MI && IsThumb1) {
+ --I;
+ bool CanReorder = true;
+ const bool HasStmts = I != E;
+ for (; I != E; --I) {
+ if (I->getOpcode() != ARM::tMOVi8) {
+ CanReorder = false;
+ break;
+ }
+ }
+ if (HasStmts && CanReorder) {
+ MI = MI->removeFromParent();
+ E = CmpInstr;
+ CmpInstr.getParent()->insert(E, MI);
+ }
+ I = CmpInstr;
+ E = MI;
+ }
+
// Check that CPSR isn't set between the comparison instruction and the one we
// want to change. At the same time, search for Sub.
const TargetRegisterInfo *TRI = &getRegisterInfo();
@@ -2497,183 +2643,128 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
if (isPredicated(*MI))
return false;
- bool IsThumb1 = false;
- switch (MI->getOpcode()) {
- default: break;
- case ARM::tLSLri:
- case ARM::tLSRri:
- case ARM::tLSLrr:
- case ARM::tLSRrr:
- case ARM::tSUBrr:
- case ARM::tADDrr:
- case ARM::tADDi3:
- case ARM::tADDi8:
- case ARM::tSUBi3:
- case ARM::tSUBi8:
- IsThumb1 = true;
- LLVM_FALLTHROUGH;
- case ARM::RSBrr:
- case ARM::RSBri:
- case ARM::RSCrr:
- case ARM::RSCri:
- case ARM::ADDrr:
- case ARM::ADDri:
- case ARM::ADCrr:
- case ARM::ADCri:
- case ARM::SUBrr:
- case ARM::SUBri:
- case ARM::SBCrr:
- case ARM::SBCri:
- case ARM::t2RSBri:
- case ARM::t2ADDrr:
- case ARM::t2ADDri:
- case ARM::t2ADCrr:
- case ARM::t2ADCri:
- case ARM::t2SUBrr:
- case ARM::t2SUBri:
- case ARM::t2SBCrr:
- case ARM::t2SBCri:
- case ARM::ANDrr:
- case ARM::ANDri:
- case ARM::t2ANDrr:
- case ARM::t2ANDri:
- case ARM::ORRrr:
- case ARM::ORRri:
- case ARM::t2ORRrr:
- case ARM::t2ORRri:
- case ARM::EORrr:
- case ARM::EORri:
- case ARM::t2EORrr:
- case ARM::t2EORri:
- case ARM::t2LSRri:
- case ARM::t2LSRrr:
- case ARM::t2LSLri:
- case ARM::t2LSLrr: {
- // Scan forward for the use of CPSR
- // When checking against MI: if it's a conditional code that requires
- // checking of the V bit or C bit, then this is not safe to do.
- // It is safe to remove CmpInstr if CPSR is redefined or killed.
- // If we are done with the basic block, we need to check whether CPSR is
- // live-out.
- SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
- OperandsToUpdate;
- bool isSafe = false;
- I = CmpInstr;
- E = CmpInstr.getParent()->end();
- while (!isSafe && ++I != E) {
- const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands();
- !isSafe && IO != EO; ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
- isSafe = true;
- break;
- }
- if (!MO.isReg() || MO.getReg() != ARM::CPSR)
- continue;
- if (MO.isDef()) {
- isSafe = true;
- break;
- }
- // Condition code is after the operand before CPSR except for VSELs.
- ARMCC::CondCodes CC;
- bool IsInstrVSel = true;
- switch (Instr.getOpcode()) {
- default:
- IsInstrVSel = false;
- CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
- break;
- case ARM::VSELEQD:
- case ARM::VSELEQS:
- CC = ARMCC::EQ;
- break;
- case ARM::VSELGTD:
- case ARM::VSELGTS:
- CC = ARMCC::GT;
- break;
- case ARM::VSELGED:
- case ARM::VSELGES:
- CC = ARMCC::GE;
- break;
- case ARM::VSELVSS:
- case ARM::VSELVSD:
- CC = ARMCC::VS;
- break;
- }
+ // Scan forward for the use of CPSR
+ // When checking against MI: if it's a conditional code that requires
+ // checking of the V bit or C bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if CPSR is redefined or killed.
+ // If we are done with the basic block, we need to check whether CPSR is
+ // live-out.
+ SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
+ OperandsToUpdate;
+ bool isSafe = false;
+ I = CmpInstr;
+ E = CmpInstr.getParent()->end();
+ while (!isSafe && ++I != E) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands();
+ !isSafe && IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+ isSafe = true;
+ break;
+ }
+ if (!MO.isReg() || MO.getReg() != ARM::CPSR)
+ continue;
+ if (MO.isDef()) {
+ isSafe = true;
+ break;
+ }
+ // Condition code is after the operand before CPSR except for VSELs.
+ ARMCC::CondCodes CC;
+ bool IsInstrVSel = true;
+ switch (Instr.getOpcode()) {
+ default:
+ IsInstrVSel = false;
+ CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();
+ break;
+ case ARM::VSELEQD:
+ case ARM::VSELEQS:
+ CC = ARMCC::EQ;
+ break;
+ case ARM::VSELGTD:
+ case ARM::VSELGTS:
+ CC = ARMCC::GT;
+ break;
+ case ARM::VSELGED:
+ case ARM::VSELGES:
+ CC = ARMCC::GE;
+ break;
+ case ARM::VSELVSS:
+ case ARM::VSELVSD:
+ CC = ARMCC::VS;
+ break;
+ }
- if (Sub) {
- ARMCC::CondCodes NewCC = getSwappedCondition(CC);
- if (NewCC == ARMCC::AL)
- return false;
- // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
- // on CMP needs to be updated to be based on SUB.
- // Push the condition code operands to OperandsToUpdate.
- // If it is safe to remove CmpInstr, the condition code of these
- // operands will be modified.
- if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg) {
- // VSel doesn't support condition code update.
- if (IsInstrVSel)
- return false;
- OperandsToUpdate.push_back(
- std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
- }
- } else {
- // No Sub, so this is x = <op> y, z; cmp x, 0.
- switch (CC) {
- case ARMCC::EQ: // Z
- case ARMCC::NE: // Z
- case ARMCC::MI: // N
- case ARMCC::PL: // N
- case ARMCC::AL: // none
- // CPSR can be used multiple times, we should continue.
- break;
- case ARMCC::HS: // C
- case ARMCC::LO: // C
- case ARMCC::VS: // V
- case ARMCC::VC: // V
- case ARMCC::HI: // C Z
- case ARMCC::LS: // C Z
- case ARMCC::GE: // N V
- case ARMCC::LT: // N V
- case ARMCC::GT: // Z N V
- case ARMCC::LE: // Z N V
- // The instruction uses the V bit or C bit which is not safe.
+ if (Sub) {
+ ARMCC::CondCodes NewCC = getSwappedCondition(CC);
+ if (NewCC == ARMCC::AL)
+ return false;
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
+ // on CMP needs to be updated to be based on SUB.
+ // Push the condition code operands to OperandsToUpdate.
+ // If it is safe to remove CmpInstr, the condition code of these
+ // operands will be modified.
+ if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg) {
+ // VSel doesn't support condition code update.
+ if (IsInstrVSel)
return false;
- }
+ OperandsToUpdate.push_back(
+ std::make_pair(&((*I).getOperand(IO - 1)), NewCC));
}
- }
- }
-
- // If CPSR is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if (!isSafe) {
- MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(ARM::CPSR))
+ } else {
+ // No Sub, so this is x = <op> y, z; cmp x, 0.
+ switch (CC) {
+ case ARMCC::EQ: // Z
+ case ARMCC::NE: // Z
+ case ARMCC::MI: // N
+ case ARMCC::PL: // N
+ case ARMCC::AL: // none
+ // CPSR can be used multiple times, we should continue.
+ break;
+ case ARMCC::HS: // C
+ case ARMCC::LO: // C
+ case ARMCC::VS: // V
+ case ARMCC::VC: // V
+ case ARMCC::HI: // C Z
+ case ARMCC::LS: // C Z
+ case ARMCC::GE: // N V
+ case ARMCC::LT: // N V
+ case ARMCC::GT: // Z N V
+ case ARMCC::LE: // Z N V
+ // The instruction uses the V bit or C bit which is not safe.
return false;
+ }
+ }
}
+ }
- // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
- // set CPSR so this is represented as an explicit output)
- if (!IsThumb1) {
- MI->getOperand(5).setReg(ARM::CPSR);
- MI->getOperand(5).setIsDef(true);
- }
- assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
- CmpInstr.eraseFromParent();
-
- // Modify the condition code of operands in OperandsToUpdate.
- // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
- // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
- for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
- OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
- return true;
+ // If CPSR is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!isSafe) {
+ MachineBasicBlock *MBB = CmpInstr.getParent();
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if ((*SI)->isLiveIn(ARM::CPSR))
+ return false;
}
+
+ // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
+ // set CPSR so this is represented as an explicit output)
+ if (!IsThumb1) {
+ MI->getOperand(5).setReg(ARM::CPSR);
+ MI->getOperand(5).setIsDef(true);
}
-
- return false;
+ assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
+ CmpInstr.eraseFromParent();
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
+ OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
+
+ return true;
}
bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
@@ -2728,7 +2819,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
switch (UseOpc) {
default: break;
case ARM::ADDrr:
- case ARM::SUBrr: {
+ case ARM::SUBrr:
if (UseOpc == ARM::SUBrr && Commute)
return false;
@@ -2744,9 +2835,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
break;
- }
case ARM::ORRrr:
- case ARM::EORrr: {
+ case ARM::EORrr:
if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
@@ -2757,9 +2847,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
case ARM::EORrr: NewUseOpc = ARM::EORri; break;
}
break;
- }
case ARM::t2ADDrr:
- case ARM::t2SUBrr: {
+ case ARM::t2SUBrr:
if (UseOpc == ARM::t2SUBrr && Commute)
return false;
@@ -2775,9 +2864,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
break;
- }
case ARM::t2ORRrr:
- case ARM::t2EORrr: {
+ case ARM::t2EORrr:
if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
return false;
SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
@@ -2789,7 +2877,6 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
break;
}
- }
}
}
@@ -2797,11 +2884,12 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg1 = UseMI.getOperand(OpIdx).getReg();
bool isKill = UseMI.getOperand(OpIdx).isKill();
unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
- AddDefaultCC(
- AddDefaultPred(BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(),
- get(NewUseOpc), NewReg)
- .addReg(Reg1, getKillRegState(isKill))
- .addImm(SOImmValV1)));
+ BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),
+ NewReg)
+ .addReg(Reg1, getKillRegState(isKill))
+ .addImm(SOImmValV1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
UseMI.setDesc(get(NewUseOpc));
UseMI.getOperand(1).setReg(NewReg);
UseMI.getOperand(1).setIsKill();
@@ -3413,7 +3501,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDMDB:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB_UPD:
- LdmBypass = 1;
+ LdmBypass = true;
DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
break;
}
@@ -3888,12 +3976,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::t2LDRs:
case ARM::t2LDRBs:
case ARM::t2LDRHs:
- case ARM::t2LDRSHs: {
+ case ARM::t2LDRSHs:
// Thumb2 mode: lsl 0-3 only.
Latency -= 2;
break;
}
- }
}
if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())
@@ -4180,14 +4267,14 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineMemOperand::MOInvariant;
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
- MIB.addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
}
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
- AddDefaultPred(MIB);
+ MIB.addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .add(predOps(ARMCC::AL));
}
bool
@@ -4222,6 +4309,7 @@ enum ARMExeDomain {
ExeVFP = 1,
ExeNEON = 2
};
+
//
// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
//
@@ -4345,8 +4433,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
// Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
MI.setDesc(get(ARM::VORRd));
- AddDefaultPred(
- MIB.addReg(DstReg, RegState::Define).addReg(SrcReg).addReg(SrcReg));
+ MIB.addReg(DstReg, RegState::Define)
+ .addReg(SrcReg)
+ .addReg(SrcReg)
+ .add(predOps(ARMCC::AL));
break;
case ARM::VMOVRS:
if (Domain != ExeNEON)
@@ -4366,9 +4456,10 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
// Note that DSrc has been widened and the other lane may be undef, which
// contaminates the entire register.
MI.setDesc(get(ARM::VGETLNi32));
- AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
- .addReg(DReg, RegState::Undef)
- .addImm(Lane));
+ MIB.addReg(DstReg, RegState::Define)
+ .addReg(DReg, RegState::Undef)
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
// The old source should be an implicit use, otherwise we might think it
// was dead before here.
@@ -4398,8 +4489,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
MIB.addReg(DReg, RegState::Define)
.addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))
.addReg(SrcReg)
- .addImm(Lane);
- AddDefaultPred(MIB);
+ .addImm(Lane)
+ .add(predOps(ARMCC::AL));
// The narrower destination must be marked as set to keep previous chains
// in place.
@@ -4433,8 +4524,8 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
MI.setDesc(get(ARM::VDUPLN32d));
MIB.addReg(DDst, RegState::Define)
.addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))
- .addImm(SrcLane);
- AddDefaultPred(MIB);
+ .addImm(SrcLane)
+ .add(predOps(ARMCC::AL));
// Neither the source or the destination are naturally represented any
// more, so add them in manually.
@@ -4470,10 +4561,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
CurUndef = !MI.readsRegister(CurReg, TRI);
- NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
-
- NewMIB.addImm(1);
- AddDefaultPred(NewMIB);
+ NewMIB.addReg(CurReg, getUndefRegState(CurUndef))
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
if (SrcLane == DstLane)
NewMIB.addReg(SrcReg, RegState::Implicit);
@@ -4489,10 +4579,9 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);
- MIB.addReg(CurReg, getUndefRegState(CurUndef));
-
- MIB.addImm(1);
- AddDefaultPred(MIB);
+ MIB.addReg(CurReg, getUndefRegState(CurUndef))
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
if (SrcLane != DstLane)
MIB.addReg(SrcReg, RegState::Implicit);
@@ -4505,7 +4594,6 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,
break;
}
}
-
}
//===----------------------------------------------------------------------===//
@@ -4613,9 +4701,9 @@ void ARMBaseInstrInfo::breakPartialRegDependency(
// Insert the dependency-breaking FCONSTD before MI.
// 96 is the encoding of 0.5, but the actual value doesn't matter here.
- AddDefaultPred(
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
- .addImm(96));
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)
+ .addImm(96)
+ .add(predOps(ARMCC::AL));
MI.addRegisterKilled(DReg, TRI, true);
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b01d5c8ec85f..23777b821f9f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -17,16 +17,21 @@
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CodeGen.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <array>
+#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "ARMGenInstrInfo.inc"
namespace llvm {
- class ARMSubtarget;
- class ARMBaseRegisterInfo;
+
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
class ARMBaseInstrInfo : public ARMGenInstrInfo {
const ARMSubtarget &Subtarget;
@@ -106,7 +111,7 @@ public:
// Return the non-pre/post incrementing version of 'Opc'. Return 0
// if there is not such an opcode.
- virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+ virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr &MI,
@@ -156,7 +161,7 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
/// GetInstSize - Returns the size of the specified MachineInstr.
///
@@ -401,25 +406,28 @@ public:
bool isSwiftFastImmShift(const MachineInstr *MI) const;
};
-static inline
-const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
- return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+/// Get the operands corresponding to the given \p Pred value. By default, the
+/// predicate register is assumed to be 0 (no register), but you can pass in a
+/// \p PredReg if that is not the case.
+static inline std::array<MachineOperand, 2> predOps(ARMCC::CondCodes Pred,
+ unsigned PredReg = 0) {
+ return {{MachineOperand::CreateImm(static_cast<int64_t>(Pred)),
+ MachineOperand::CreateReg(PredReg, false)}};
}
-static inline
-const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
- return MIB.addReg(0);
+/// Get the operand corresponding to the conditional code result. By default,
+/// this is 0 (no register).
+static inline MachineOperand condCodeOp(unsigned CCReg = 0) {
+ return MachineOperand::CreateReg(CCReg, false);
}
-static inline
-const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
- bool isDead = false) {
- return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
-}
-
-static inline
-const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
- return MIB.addReg(0);
+/// Get the operand corresponding to the conditional code result for Thumb1.
+/// This operand will always refer to CPSR and it will have the Define flag set.
+/// You can optionally set the Dead flag by means of \p isDead.
+static inline MachineOperand t1CondCodeOp(bool isDead = false) {
+ return MachineOperand::CreateReg(ARM::CPSR,
+ /*Define*/ true, /*Implicit*/ false,
+ /*Kill*/ false, isDead);
}
static inline
@@ -517,6 +525,6 @@ bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int &Offset,
const ARMBaseInstrInfo &TII);
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d995c631dd1c..70a44eaaceb8 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -11,32 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMBaseRegisterInfo.h"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <utility>
#define DEBUG_TYPE "arm-register-info"
@@ -46,7 +56,7 @@
using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo()
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC) {}
static unsigned getFramePointerReg(const ARMSubtarget &STI) {
return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
@@ -140,7 +150,6 @@ ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) con
return CSR_FPRegs_RegMask;
}
-
const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -425,10 +434,11 @@ void ARMBaseRegisterInfo::emitLoadConstPool(
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx)
- .addImm(0).addImm(Pred).addReg(PredReg)
- .setMIFlags(MIFlags);
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(Pred, PredReg))
+ .setMIFlags(MIFlags);
}
bool ARMBaseRegisterInfo::
@@ -474,26 +484,23 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
Scale = 4;
break;
}
- case ARMII::AddrMode2: {
+ case ARMII::AddrMode2:
ImmIdx = Idx+2;
InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs = -InstrOffs;
break;
- }
- case ARMII::AddrMode3: {
+ case ARMII::AddrMode3:
ImmIdx = Idx+2;
InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
InstrOffs = -InstrOffs;
break;
- }
- case ARMII::AddrModeT1_s: {
+ case ARMII::AddrModeT1_s:
ImmIdx = Idx+1;
InstrOffs = MI->getOperand(ImmIdx).getImm();
Scale = 4;
break;
- }
default:
llvm_unreachable("Unsupported addressing mode!");
}
@@ -609,7 +616,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
.addFrameIndex(FrameIdx).addImm(Offset);
if (!AFI->isThumb1OnlyFunction())
- AddDefaultCC(AddDefaultPred(MIB));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
}
void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -636,7 +643,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
assert(AFI->isThumb2Function());
Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
}
- assert (Done && "Unable to resolve frame index!");
+ assert(Done && "Unable to resolve frame index!");
(void)Done;
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 330e1535e863..2e91d9d4be24 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -15,24 +15,33 @@
#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cstdint>
#define GET_REGINFO_HEADER
#include "ARMGenRegisterInfo.inc"
namespace llvm {
+
/// Register allocation hints.
namespace ARMRI {
+
enum {
RegPairOdd = 1,
RegPairEven = 2
};
-}
+
+} // end namespace ARMRI
/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
/// or a stack/pc register that we should push/pop.
static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case R0: case R1: case R2: case R3:
case R4: case R5: case R6: case R7:
@@ -48,6 +57,7 @@ static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case R8: case R9: case R10: case R11: case R12:
// iOS has this second area.
@@ -59,6 +69,7 @@ static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
using namespace ARM;
+
switch (Reg) {
case D15: case D14: case D13: case D12:
case D11: case D10: case D9: case D8:
@@ -87,7 +98,7 @@ protected:
/// BasePtr - ARM physical register used as a base ptr in complex stack
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
/// variable size stack objects.
- unsigned BasePtr;
+ unsigned BasePtr = ARM::R6;
// Can be only subclassed.
explicit ARMBaseRegisterInfo();
@@ -198,4 +209,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H
diff --git a/lib/Target/ARM/ARMBasicBlockInfo.h b/lib/Target/ARM/ARMBasicBlockInfo.h
index 780544f865df..e0cb0aa676a6 100644
--- a/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -14,9 +14,9 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
#define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
-#include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
-using namespace llvm;
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cstdint>
namespace llvm {
@@ -44,31 +44,30 @@ struct BasicBlockInfo {
///
/// Because worst case padding is used, the computed offset of an aligned
/// block may not actually be aligned.
- unsigned Offset;
+ unsigned Offset = 0;
/// Size - Size of the basic block in bytes. If the block contains
/// inline assembly, this is a worst case estimate.
///
/// The size does not include any alignment padding whether from the
/// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
+ unsigned Size = 0;
/// KnownBits - The number of low bits in Offset that are known to be
/// exact. The remaining bits of Offset are an upper bound.
- uint8_t KnownBits;
+ uint8_t KnownBits = 0;
/// Unalign - When non-zero, the block contains instructions (inline asm)
/// of unknown size. The real size may be smaller than Size bytes by a
/// multiple of 1 << Unalign.
- uint8_t Unalign;
+ uint8_t Unalign = 0;
/// PostAlign - When non-zero, the block terminator contains a .align
/// directive, so the end of the block is aligned to 1 << PostAlign
/// bytes.
- uint8_t PostAlign;
+ uint8_t PostAlign = 0;
- BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
- PostAlign(0) {}
+ BasicBlockInfo() = default;
/// Compute the number of known offset bits internally to this block.
/// This number should be used to predict worst case padding when
@@ -107,4 +106,4 @@ struct BasicBlockInfo {
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 52c95b6244ac..94b317a8f986 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -17,7 +17,9 @@
#include "ARMBaseInstrInfo.h"
#include "ARMISelLowering.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -30,25 +32,47 @@ using namespace llvm;
ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
: CallLowering(&TLI) {}
-static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI,
+static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
- EVT VT = TLI.getValueType(DL, T);
- if (!VT.isSimple() || !VT.isInteger() || VT.isVector())
+ EVT VT = TLI.getValueType(DL, T, true);
+ if (!VT.isSimple() || VT.isVector())
return false;
unsigned VTSize = VT.getSimpleVT().getSizeInBits();
- return VTSize == 8 || VTSize == 16 || VTSize == 32;
+
+ if (VTSize == 64)
+ // FIXME: Support i64 too
+ return VT.isFloatingPoint();
+
+ return VTSize == 1 || VTSize == 8 || VTSize == 16 || VTSize == 32;
}
namespace {
-struct FuncReturnHandler : public CallLowering::ValueHandler {
- FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
- MachineInstrBuilder &MIB)
- : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+/// Helper class for values going out through an ABI boundary (used for handling
+/// function return values and call parameters).
+struct OutgoingValueHandler : public CallLowering::ValueHandler {
+ OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
- llvm_unreachable("Don't know how to get a stack address yet");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ LLT p0 = LLT::pointer(0, 32);
+ LLT s32 = LLT::scalar(32);
+ unsigned SPReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildCopy(SPReg, ARM::SP);
+
+ unsigned OffsetReg = MRI.createGenericVirtualRegister(s32);
+ MIRBuilder.buildConstant(OffsetReg, Offset);
+
+ unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+ MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+
+ MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+ return AddrReg;
}
void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
@@ -56,26 +80,92 @@ struct FuncReturnHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
- assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
-
- assert(VA.getLocInfo() != CCValAssign::SExt &&
- VA.getLocInfo() != CCValAssign::ZExt &&
- "ABI extensions not supported yet");
+ assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- MIRBuilder.buildCopy(PhysReg, ValVReg);
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- llvm_unreachable("Don't know how to assign a value to an address yet");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(),
+ /* Alignment */ 0);
+ MIRBuilder.buildStore(ExtReg, Addr, *MMO);
+ }
+
+ unsigned assignCustomValue(const CallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs) override {
+ CCValAssign VA = VAs[0];
+ assert(VA.needsCustom() && "Value doesn't need custom handling");
+ assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ CCValAssign NextVA = VAs[1];
+ assert(NextVA.needsCustom() && "Value doesn't need custom handling");
+ assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
+
+ assert(VA.getValNo() == NextVA.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VA.isRegLoc() && "Value should be in reg");
+ assert(NextVA.isRegLoc() && "Value should be in reg");
+
+ unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+ MIRBuilder.buildExtract(NewRegs[0], Arg.Reg, 0);
+ MIRBuilder.buildExtract(NewRegs[1], Arg.Reg, 32);
+
+ bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
+ if (!IsLittle)
+ std::swap(NewRegs[0], NewRegs[1]);
+
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+
+ return 1;
+ }
+
+ bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info, CCState &State) override {
+ if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State))
+ return true;
+
+ StackSize =
+ std::max(StackSize, static_cast<uint64_t>(State.getNextStackOffset()));
+ return false;
}
MachineInstrBuilder &MIB;
+ uint64_t StackSize;
};
} // End anonymous namespace.
+void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL,
+ MachineRegisterInfo &MRI) const {
+ const ARMTargetLowering &TLI = *getTLI<ARMTargetLowering>();
+ LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+ SmallVector<EVT, 4> SplitVTs;
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+
+ assert(SplitVTs.size() == 1 && "Unsupported type");
+
+ // Even if there is no splitting to do, we still want to replace the original
+ // type (e.g. pointer type -> integer).
+ SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
+ OrigArg.Flags, OrigArg.IsFixed);
+}
+
/// Lower the return value for the already existing \p Ret. This assumes that
/// \p MIRBuilder's insertion point is correct.
bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
@@ -93,21 +183,23 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
if (!isSupportedType(DL, TLI, Val->getType()))
return false;
+ SmallVector<ArgInfo, 4> SplitVTs;
+ ArgInfo RetInfo(VReg, Val->getType());
+ setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F);
+ splitToValueTypes(RetInfo, SplitVTs, DL, MF.getRegInfo());
+
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
- ArgInfo RetInfo(VReg, Val->getType());
- setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F);
-
- FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
- return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler);
+ OutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret, AssignFn);
+ return handleAssignments(MIRBuilder, SplitVTs, RetHandler);
}
bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
assert(!Val == !VReg && "Return value without a vreg");
- auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET));
+ auto Ret = MIRBuilder.buildInstrNoInsert(ARM::BX_RET).add(predOps(ARMCC::AL));
if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
return false;
@@ -117,13 +209,17 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
}
namespace {
-struct FormalArgHandler : public CallLowering::ValueHandler {
- FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
- : ValueHandler(MIRBuilder, MRI) {}
+/// Helper class for values coming in through an ABI boundary (used for handling
+/// formal arguments and call return values).
+struct IncomingValueHandler : public CallLowering::ValueHandler {
+ IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn) {}
unsigned getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
- assert(Size == 4 && "Unsupported size");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
auto &MFI = MIRBuilder.getMF().getFrameInfo();
@@ -139,7 +235,17 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
MachinePointerInfo &MPO, CCValAssign &VA) override {
- assert(Size == 4 && "Unsupported size");
+ assert((Size == 1 || Size == 2 || Size == 4 || Size == 8) &&
+ "Unsupported size");
+
+ if (VA.getLocInfo() == CCValAssign::SExt ||
+ VA.getLocInfo() == CCValAssign::ZExt) {
+ // If the value is zero- or sign-extended, its size becomes 4 bytes, so
+ // that's what we should load.
+ Size = 4;
+ assert(MRI.getType(ValVReg).isScalar() && "Only scalars supported atm");
+ MRI.setType(ValVReg, LLT::scalar(32));
+ }
auto MMO = MIRBuilder.getMF().getMachineMemOperand(
MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0);
@@ -151,12 +257,60 @@ struct FormalArgHandler : public CallLowering::ValueHandler {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
- assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
- assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
+ assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size");
+ assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size");
- MIRBuilder.getMBB().addLiveIn(PhysReg);
+ // The necesary extensions are handled on the other side of the ABI
+ // boundary.
+ markPhysRegUsed(PhysReg);
MIRBuilder.buildCopy(ValVReg, PhysReg);
}
+
+ unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg,
+ ArrayRef<CCValAssign> VAs) override {
+ CCValAssign VA = VAs[0];
+ assert(VA.needsCustom() && "Value doesn't need custom handling");
+ assert(VA.getValVT() == MVT::f64 && "Unsupported type");
+
+ CCValAssign NextVA = VAs[1];
+ assert(NextVA.needsCustom() && "Value doesn't need custom handling");
+ assert(NextVA.getValVT() == MVT::f64 && "Unsupported type");
+
+ assert(VA.getValNo() == NextVA.getValNo() &&
+ "Values belong to different arguments");
+
+ assert(VA.isRegLoc() && "Value should be in reg");
+ assert(NextVA.isRegLoc() && "Value should be in reg");
+
+ unsigned NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+ MRI.createGenericVirtualRegister(LLT::scalar(32))};
+
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+
+ bool IsLittle = MIRBuilder.getMF().getSubtarget<ARMSubtarget>().isLittle();
+ if (!IsLittle)
+ std::swap(NewRegs[0], NewRegs[1]);
+
+ MIRBuilder.buildSequence(Arg.Reg, NewRegs, {0, 32});
+
+ return 1;
+ }
+
+ /// Marking a physical register as used is different between formal
+ /// parameters, where it's a basic block live-in, and call returns, where it's
+ /// an implicit-def of the call instruction.
+ virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+};
+
+struct FormalArgHandler : public IncomingValueHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ }
};
} // End anonymous namespace
@@ -170,34 +324,111 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (F.isVarArg())
return false;
- auto DL = MIRBuilder.getMF().getDataLayout();
+ auto &MF = MIRBuilder.getMF();
+ auto DL = MF.getDataLayout();
auto &TLI = *getTLI<ARMTargetLowering>();
- auto &Args = F.getArgumentList();
- unsigned ArgIdx = 0;
- for (auto &Arg : Args) {
- ArgIdx++;
- if (!isSupportedType(DL, TLI, Arg.getType()))
- return false;
+ auto Subtarget = TLI.getSubtarget();
- // FIXME: This check as well as ArgIdx are going away as soon as we support
- // loading values < 32 bits.
- if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32)
+ if (Subtarget->isThumb())
+ return false;
+
+ for (auto &Arg : F.args())
+ if (!isSupportedType(DL, TLI, Arg.getType()))
return false;
- }
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());
SmallVector<ArgInfo, 8> ArgInfos;
unsigned Idx = 0;
- for (auto &Arg : Args) {
+ for (auto &Arg : F.args()) {
ArgInfo AInfo(VRegs[Idx], Arg.getType());
setArgFlags(AInfo, Idx + 1, DL, F);
- ArgInfos.push_back(AInfo);
+ splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo());
Idx++;
}
- FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo());
- return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler);
+ FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
+ AssignFn);
+ return handleAssignments(MIRBuilder, ArgInfos, ArgHandler);
+}
+
+namespace {
+struct CallReturnHandler : public IncomingValueHandler {
+ CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB, CCAssignFn *AssignFn)
+ : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+ void markPhysRegUsed(unsigned PhysReg) override {
+ MIB.addDef(PhysReg, RegState::Implicit);
+ }
+
+ MachineInstrBuilder MIB;
+};
+} // End anonymous namespace.
+
+bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+ CallingConv::ID CallConv,
+ const MachineOperand &Callee,
+ const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const auto &TLI = *getTLI<ARMTargetLowering>();
+ const auto &DL = MF.getDataLayout();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (MF.getSubtarget<ARMSubtarget>().genLongCalls())
+ return false;
+
+ auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN);
+
+ // Create the call instruction so we can add the implicit uses of arg
+ // registers, but don't insert it yet.
+ auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask(
+ TRI->getCallPreservedMask(MF, CallConv));
+
+ SmallVector<ArgInfo, 8> ArgInfos;
+ for (auto Arg : OrigArgs) {
+ if (!isSupportedType(DL, TLI, Arg.Ty))
+ return false;
+
+ if (!Arg.IsFixed)
+ return false;
+
+ splitToValueTypes(Arg, ArgInfos, DL, MRI);
+ }
+
+ auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ OutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
+ if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler))
+ return false;
+
+ // Now we can add the actual call instruction to the correct basic block.
+ MIRBuilder.insertInstr(MIB);
+
+ if (!OrigRet.Ty->isVoidTy()) {
+ if (!isSupportedType(DL, TLI, OrigRet.Ty))
+ return false;
+
+ ArgInfos.clear();
+ splitToValueTypes(OrigRet, ArgInfos, DL, MRI);
+
+ auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false);
+ CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
+ if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler))
+ return false;
+ }
+
+ // We now know the size of the stack - update the ADJCALLSTACKDOWN
+ // accordingly.
+ CallSeqStart.addImm(ArgHandler.StackSize).add(predOps(ARMCC::AL));
+
+ MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP)
+ .addImm(ArgHandler.StackSize)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+
+ return true;
}
diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h
index 6a1b886b501f..6404c7a2689e 100644
--- a/lib/Target/ARM/ARMCallLowering.h
+++ b/lib/Target/ARM/ARMCallLowering.h
@@ -34,9 +34,19 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+ bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
+ const MachineOperand &Callee, const ArgInfo &OrigRet,
+ ArrayRef<ArgInfo> OrigArgs) const override;
+
private:
bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
unsigned VReg, MachineInstrBuilder &Ret) const;
+
+ /// Split an argument into one or more arguments that the CC lowering can cope
+ /// with (e.g. replace pointers with integers).
+ void splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI) const;
};
} // End of namespace llvm
#endif
diff --git a/lib/Target/ARM/ARMComputeBlockSize.cpp b/lib/Target/ARM/ARMComputeBlockSize.cpp
index 64f187d17e64..e145d0a49ae6 100644
--- a/lib/Target/ARM/ARMComputeBlockSize.cpp
+++ b/lib/Target/ARM/ARMComputeBlockSize.cpp
@@ -8,7 +8,15 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBasicBlockInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <vector>
+
using namespace llvm;
namespace llvm {
@@ -69,4 +77,4 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) {
return BBInfo;
}
-} // end namespace
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index be1a37e3e362..23722f1b7f3f 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -14,30 +14,50 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBasicBlockInfo.h"
#include "ARMMachineFunctionInfo.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "arm-cp-islands"
+#define ARM_CP_ISLANDS_OPT_NAME \
+ "ARM constant island placement and branch shortening pass"
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumSplit, "Number of uncond branches inserted");
STATISTIC(NumCBrFixed, "Number of cond branches fixed");
@@ -49,7 +69,6 @@ STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
-
static cl::opt<bool>
AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
cl::desc("Adjust basic block layout to better use TB[BH]"));
@@ -64,6 +83,7 @@ static cl::opt<bool> SynthesizeThumb1TBB(
"equivalent to the TBB/TBH instructions"));
namespace {
+
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
/// requires constant pool entries to be scattered among the instructions
/// inside a function. To do this, it completely ignores the normal LLVM
@@ -76,7 +96,6 @@ namespace {
/// CPE - A constant pool entry that has been placed somewhere, which
/// tracks a list of users.
class ARMConstantIslands : public MachineFunctionPass {
-
std::vector<BasicBlockInfo> BBInfo;
/// WaterList - A sorted list of basic blocks where islands could be placed
@@ -110,12 +129,14 @@ namespace {
bool NegOk;
bool IsSoImm;
bool KnownAlignment;
+
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg, bool soimm)
: MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
KnownAlignment(false) {
HighWaterMark = CPEMI->getParent();
}
+
/// getMaxDisp - Returns the maximum displacement supported by MI.
/// Correct for unknown alignment.
/// Conservatively subtract 2 bytes to handle weird alignment effects.
@@ -135,6 +156,7 @@ namespace {
MachineInstr *CPEMI;
unsigned CPI;
unsigned RefCount;
+
CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
: CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
};
@@ -148,7 +170,7 @@ namespace {
/// The first half of CPEntries contains generic constants, the second half
/// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
/// which vector it will be in here.
- std::vector<std::vector<CPEntry> > CPEntries;
+ std::vector<std::vector<CPEntry>> CPEntries;
/// Maps a JT index to the offset in CPEntries containing copies of that
/// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
@@ -167,6 +189,7 @@ namespace {
unsigned MaxDisp : 31;
bool isCond : 1;
unsigned UncondBr;
+
ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
: MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
};
@@ -195,8 +218,10 @@ namespace {
bool isThumb1;
bool isThumb2;
bool isPositionIndependentOrROPI;
+
public:
static char ID;
+
ARMConstantIslands() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -207,7 +232,7 @@ namespace {
}
StringRef getPassName() const override {
- return "ARM constant island placement and branch shortening pass";
+ return ARM_CP_ISLANDS_OPT_NAME;
}
private:
@@ -264,8 +289,10 @@ namespace {
U.getMaxDisp(), U.NegOk, U.IsSoImm);
}
};
+
char ARMConstantIslands::ID = 0;
-}
+
+} // end anonymous namespace
/// verify - check BBOffsets, BBSizes, alignment of islands
void ARMConstantIslands::verify() {
@@ -295,8 +322,9 @@ void ARMConstantIslands::verify() {
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void ARMConstantIslands::dumpBBs() {
+LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
DEBUG({
for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
const BasicBlockInfo &BBI = BBInfo[J];
@@ -308,12 +336,7 @@ void ARMConstantIslands::dumpBBs() {
}
});
}
-
-/// createARMConstantIslandPass - returns an instance of the constpool
-/// island pass.
-FunctionPass *llvm::createARMConstantIslandPass() {
- return new ARMConstantIslands();
-}
+#endif
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
@@ -782,6 +805,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
case ARM::LDRcp:
case ARM::t2LDRpci:
case ARM::t2LDRHpci:
+ case ARM::t2LDRBpci:
Bits = 12; // +-offset_12
NegOk = true;
break;
@@ -873,7 +897,6 @@ void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
WaterList.insert(IP, NewBB);
}
-
/// Split the basic block containing MI into two blocks, which are joined by
/// an unconditional branch. Update data structures and renumber blocks to
/// account for this change and returns the newly created block.
@@ -897,8 +920,9 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
if (!isThumb)
BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
else
- BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(OrigBB, DebugLoc(), TII->get(Opc))
+ .addMBB(NewBB)
+ .add(predOps(ARMCC::AL));
++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
@@ -1296,8 +1320,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
if (!isThumb)
BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
else
- BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr))
+ .addMBB(NewMBB)
+ .add(predOps(ARMCC::AL));
unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
ImmBranches.push_back(ImmBranch(&UserMBB->back(),
MaxDisp, false, UncondBr));
@@ -1477,7 +1502,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
// add it to the island.
U.HighWaterMark = NewIsland;
U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
- .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
+ .addImm(ID)
+ .add(CPEMI->getOperand(1))
+ .addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
++NumCPEs;
@@ -1681,8 +1708,9 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
Br.MI = &MBB->back();
BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
if (isThumb)
- BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
- .addImm(ARMCC::AL).addReg(0);
+ BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr))
+ .addMBB(DestBB)
+ .add(predOps(ARMCC::AL));
else
BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
@@ -1709,8 +1737,15 @@ bool ARMConstantIslands::undoLRSpillRestore() {
MI->getNumExplicitOperands() == 3) {
// Create the new insn and copy the predicate from the old.
BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1));
+ MI->eraseFromParent();
+ MadeChange = true;
+ }
+ if (MI->getOpcode() == ARM::tPUSH &&
+ MI->getOperand(2).getReg() == ARM::LR &&
+ MI->getNumExplicitOperands() == 3) {
+ // Just remove the push.
MI->eraseFromParent();
MadeChange = true;
}
@@ -1792,13 +1827,12 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
Bits = 11;
Scale = 2;
break;
- case ARM::t2Bcc: {
+ case ARM::t2Bcc:
NewOpc = ARM::tBcc;
Bits = 8;
Scale = 2;
break;
}
- }
if (NewOpc) {
unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
@@ -1983,6 +2017,54 @@ static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
&*MBB->begin() == CPEMI;
}
+static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
+ MachineInstr *JumpMI,
+ unsigned &DeadSize) {
+ // Remove a dead add between the LEA and JT, which used to compute EntryReg,
+ // but the JT now uses PC. Finds the last ADD (if any) that def's EntryReg
+ // and is not clobbered / used.
+ MachineInstr *RemovableAdd = nullptr;
+ unsigned EntryReg = JumpMI->getOperand(0).getReg();
+
+ // Find the last ADD to set EntryReg
+ MachineBasicBlock::iterator I(LEAMI);
+ for (++I; &*I != JumpMI; ++I) {
+ if (I->getOpcode() == ARM::t2ADDrs && I->getOperand(0).getReg() == EntryReg)
+ RemovableAdd = &*I;
+ }
+
+ if (!RemovableAdd)
+ return;
+
+ // Ensure EntryReg is not clobbered or used.
+ MachineBasicBlock::iterator J(RemovableAdd);
+ for (++J; &*J != JumpMI; ++J) {
+ for (unsigned K = 0, E = J->getNumOperands(); K != E; ++K) {
+ const MachineOperand &MO = J->getOperand(K);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ if (MO.isDef() && MO.getReg() == EntryReg)
+ return;
+ if (MO.isUse() && MO.getReg() == EntryReg)
+ return;
+ }
+ }
+
+ DEBUG(dbgs() << "Removing Dead Add: " << *RemovableAdd);
+ RemovableAdd->eraseFromParent();
+ DeadSize += 4;
+}
+
+static bool registerDefinedBetween(unsigned Reg,
+ MachineBasicBlock::iterator From,
+ MachineBasicBlock::iterator To,
+ const TargetRegisterInfo *TRI) {
+ for (auto I = From; I != To; ++I)
+ if (I->modifiesRegister(Reg, TRI))
+ return true;
+ return false;
+}
+
/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::optimizeThumb2JumpTables() {
@@ -2060,6 +2142,12 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
IdxReg = Shift->getOperand(2).getReg();
unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
+ // It's important that IdxReg is live until the actual TBB/TBH. Most of
+ // the range is checked later, but the LEA might still clobber it and not
+ // actually get removed.
+ if (BaseReg == IdxReg && !jumpTableFollowsTB(MI, User.CPEMI))
+ continue;
+
MachineInstr *Load = User.MI->getNextNode();
if (Load->getOpcode() != ARM::tLDRr)
continue;
@@ -2069,6 +2157,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
continue;
// If we're in PIC mode, there should be another ADD following.
+ auto *TRI = STI->getRegisterInfo();
if (isPositionIndependentOrROPI) {
MachineInstr *Add = Load->getNextNode();
if (Add->getOpcode() != ARM::tADDrr ||
@@ -2078,22 +2167,26 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
continue;
if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
continue;
-
+ if (registerDefinedBetween(IdxReg, Add->getNextNode(), MI, TRI))
+ // IdxReg gets redefined in the middle of the sequence.
+ continue;
Add->eraseFromParent();
DeadSize += 2;
} else {
if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
continue;
+ if (registerDefinedBetween(IdxReg, Load->getNextNode(), MI, TRI))
+ // IdxReg gets redefined in the middle of the sequence.
+ continue;
}
-
-
+
// Now safe to delete the load and lsl. The LEA will be removed later.
CanDeleteLEA = true;
Shift->eraseFromParent();
Load->eraseFromParent();
DeadSize += 4;
}
-
+
DEBUG(dbgs() << "Shrink JT: " << *MI);
MachineInstr *CPEMI = User.CPEMI;
unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
@@ -2117,7 +2210,10 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
NewJTMI->getOperand(0).setReg(ARM::PC);
NewJTMI->getOperand(0).setIsKill(false);
- if (CanDeleteLEA) {
+ if (CanDeleteLEA) {
+ if (isThumb2)
+ RemoveDeadAddBetweenLEAAndJT(User.MI, MI, DeadSize);
+
User.MI->eraseFromParent();
DeadSize += isThumb2 ? 4 : 2;
@@ -2238,13 +2334,11 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
if (isThumb2)
BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
.addMBB(BB)
- .addImm(ARMCC::AL)
- .addReg(0);
+ .add(predOps(ARMCC::AL));
else
BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
.addMBB(BB)
- .addImm(ARMCC::AL)
- .addReg(0);
+ .add(predOps(ARMCC::AL));
// Update internal data structures to account for the newly inserted MBB.
MF->RenumberBlocks(NewBB);
@@ -2256,3 +2350,12 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
++NumJTInserted;
return NewBB;
}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+ return new ARMConstantIslands();
+}
+
+INITIALIZE_PASS(ARMConstantIslands, "arm-cp-islands", ARM_CP_ISLANDS_OPT_NAME,
+ false, false)
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 2d1602873ce0..9705c8b718b7 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,13 +13,17 @@
#include "ARMConstantPoolValue.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include <cstdlib>
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -44,7 +48,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
LabelId(id), Kind(kind), PCAdjust(PCAdj), Modifier(modifier),
AddCurrentAddress(addCurrentAddress) {}
-ARMConstantPoolValue::~ARMConstantPoolValue() {}
+ARMConstantPoolValue::~ARMConstantPoolValue() = default;
StringRef ARMConstantPoolValue::getModifierText() const {
switch (Modifier) {
@@ -94,9 +98,11 @@ ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
return false;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
+#endif
void ARMConstantPoolValue::print(raw_ostream &O) const {
if (Modifier) O << "(" << getModifierText() << ")";
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 5f61832aa740..61c521581f79 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -14,10 +14,11 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cstddef>
+#include <string>
+#include <vector>
namespace llvm {
@@ -29,6 +30,7 @@ class LLVMContext;
class MachineBasicBlock;
namespace ARMCP {
+
enum ARMCPKind {
CPValue,
CPExtSymbol,
@@ -47,7 +49,8 @@ namespace ARMCP {
SECREL, /// Section Relative (Windows TLS)
SBREL, /// Static Base Relative (RWPI)
};
-}
+
+} // end namespace ARMCP
/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
/// represent PC-relative displacement between the address of the load
@@ -169,9 +172,11 @@ public:
const GlobalValue *getGV() const;
const BlockAddress *getBlockAddress() const;
+
const GlobalVariable *getPromotedGlobal() const {
return dyn_cast_or_null<GlobalVariable>(GVar);
}
+
const Constant *getPromotedGlobalInit() const {
return CVal;
}
@@ -186,6 +191,7 @@ public:
void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
void print(raw_ostream &O) const override;
+
static bool classof(const ARMConstantPoolValue *APV) {
return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() ||
APV->isPromotedGlobal();
@@ -267,6 +273,6 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index baa4e0330cf4..e0aecff2633b 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -19,6 +19,7 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -30,6 +31,7 @@
#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "arm-pseudo"
@@ -97,9 +99,9 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
const MachineOperand &MO = OldMI.getOperand(i);
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
- UseMI.addOperand(MO);
+ UseMI.add(MO);
else
- DefMI.addOperand(MO);
+ DefMI.add(MO);
}
}
@@ -415,14 +417,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// For an instruction writing double-spaced subregs, the pseudo instruction
// has an extra operand that is a use of the super-register. Record the
@@ -432,15 +434,15 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
SrcOpIdx = OpIdx++;
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the super-register source operand used for double-spaced subregs over
// to the new instruction as an implicit operand.
if (SrcOpIdx != 0) {
MachineOperand MO = MI.getOperand(SrcOpIdx);
MO.setImplicit(true);
- MIB.addOperand(MO);
+ MIB.add(MO);
}
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
@@ -467,14 +469,14 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
TII->get(TableEntry->RealOpc));
unsigned OpIdx = 0;
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
@@ -490,8 +492,8 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB.addReg(D3, getUndefRegState(SrcIsUndef));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
MIB->addRegisterKilled(SrcReg, TRI, true);
@@ -549,14 +551,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
}
if (TableEntry->isUpdating)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the addrmode6 operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the am6offset operand.
if (TableEntry->hasWritebackOperand)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Grab the super-register source.
MachineOperand MO = MI.getOperand(OpIdx++);
@@ -579,12 +581,12 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
OpIdx += 1;
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the super-register source to be an implicit source.
MO.setImplicit(true);
- MIB.addOperand(MO);
+ MIB.add(MO);
if (TableEntry->IsLoad)
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
@@ -605,9 +607,9 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned OpIdx = 0;
// Transfer the destination register operand.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
if (IsExt)
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
bool SrcIsKill = MI.getOperand(OpIdx).isKill();
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
@@ -616,11 +618,11 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MIB.addReg(D0);
// Copy the other source register operand.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add an implicit kill and use for the super-reg.
MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
@@ -659,6 +661,7 @@ static bool IsAnAddressOperand(const MachineOperand &MO) {
return false;
case MachineOperand::MO_IntrinsicID:
case MachineOperand::MO_Predicate:
+ case MachineOperand::MO_Placeholder:
llvm_unreachable("should not exist post-isel");
}
llvm_unreachable("unhandled machine operand type");
@@ -696,8 +699,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16 = HI16.addImm(SOImmValV2);
LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- LO16.addImm(Pred).addReg(PredReg).addReg(0);
- HI16.addImm(Pred).addReg(PredReg).addReg(0);
+ LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
+ HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
return;
@@ -797,7 +800,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
.addReg(Desired.getReg(), RegState::Kill);
if (!IsThumb)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
// .Lloadcmp:
@@ -814,12 +817,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MIB.addReg(Addr.getReg());
if (LdrexOp == ARM::t2LDREX)
MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
- AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
- .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .addOperand(Desired));
+ BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
+ .add(Desired)
+ .add(predOps(ARMCC::AL));
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
.addMBB(DoneBB)
@@ -838,16 +842,17 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg);
- MIB.addOperand(New);
- MIB.addOperand(Addr);
+ MIB.add(New);
+ MIB.add(Addr);
if (StrexOp == ARM::t2STREX)
MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
- AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
- .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(StatusReg, RegState::Kill)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
.addMBB(LoadCmpBB)
.addImm(ARMCC::NE)
@@ -927,13 +932,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MachineInstrBuilder MIB;
MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
- MIB.addReg(Addr.getReg());
- AddDefaultPred(MIB);
+ MIB.addReg(Addr.getReg()).add(predOps(ARMCC::AL));
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
- AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
- .addReg(DestLo, getKillRegState(Dest.isDead()))
- .addReg(DesiredLo, getKillRegState(Desired.isDead())));
+ BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
+ .addReg(DestLo, getKillRegState(Dest.isDead()))
+ .addReg(DesiredLo, getKillRegState(Desired.isDead()))
+ .add(predOps(ARMCC::AL));
BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
.addReg(DestHi, getKillRegState(Dest.isDead()))
@@ -959,13 +964,13 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
- MIB.addOperand(Addr);
- AddDefaultPred(MIB);
+ MIB.add(Addr).add(predOps(ARMCC::AL));
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
- AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
- .addReg(StatusReg, RegState::Kill)
- .addImm(0));
+ BuildMI(StoreBB, DL, TII->get(CMPri))
+ .addReg(StatusReg, RegState::Kill)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(StoreBB, DL, TII->get(Bcc))
.addMBB(LoadCmpBB)
.addImm(ARMCC::NE)
@@ -1026,7 +1031,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add the default predicate in Thumb mode.
if (STI->isThumb())
- MIB.addImm(ARMCC::AL).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
} else if (RetOpcode == ARM::TCRETURNri) {
BuildMI(MBB, MBBI, dl,
TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
@@ -1047,9 +1052,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4));
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4));
MI.eraseFromParent();
return true;
@@ -1059,10 +1064,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1070,11 +1075,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsi: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
(MI.getOperand(1).getReg()))
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm())
- .addImm(MI.getOperand(4).getImm()) // 'pred'
- .addOperand(MI.getOperand(5))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .add(MI.getOperand(5))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1082,12 +1087,12 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVCCsr: {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
(MI.getOperand(1).getReg()))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3))
- .addImm(MI.getOperand(4).getImm())
- .addImm(MI.getOperand(5).getImm()) // 'pred'
- .addOperand(MI.getOperand(6))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .addImm(MI.getOperand(4).getImm())
+ .addImm(MI.getOperand(5).getImm()) // 'pred'
+ .add(MI.getOperand(6))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1097,9 +1102,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4));
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4));
MI.eraseFromParent();
return true;
}
@@ -1108,10 +1113,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1121,10 +1126,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
MI.getOperand(1).getReg())
- .addImm(MI.getOperand(2).getImm())
- .addImm(MI.getOperand(3).getImm()) // 'pred'
- .addOperand(MI.getOperand(4))
- .addReg(0); // 's' bit
+ .addImm(MI.getOperand(2).getImm())
+ .addImm(MI.getOperand(3).getImm()) // 'pred'
+ .add(MI.getOperand(4))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
@@ -1143,11 +1148,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
MI.getOperand(1).getReg())
- .addOperand(MI.getOperand(2))
- .addImm(MI.getOperand(3).getImm())
- .addImm(MI.getOperand(4).getImm()) // 'pred'
- .addOperand(MI.getOperand(5))
- .addReg(0); // 's' bit
+ .add(MI.getOperand(2))
+ .addImm(MI.getOperand(3).getImm())
+ .addImm(MI.getOperand(4).getImm()) // 'pred'
+ .add(MI.getOperand(5))
+ .add(condCodeOp()); // 's' bit
MI.eraseFromParent();
return true;
}
@@ -1187,10 +1192,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
"bits set.");
unsigned bicOpc = AFI->isThumbFunction() ?
ARM::t2BICri : ARM::BICri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(bicOpc), ARM::R6)
- .addReg(ARM::R6, RegState::Kill)
- .addImm(MaxAlign-1)));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
+ .addReg(ARM::R6, RegState::Kill)
+ .addImm(MaxAlign - 1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
}
@@ -1201,24 +1207,25 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::MOVsrl_flag:
case ARM::MOVsra_flag: {
// These are just fancy MOVs instructions.
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
- MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
- ARM_AM::lsr : ARM_AM::asr),
- 1)))
- .addReg(ARM::CPSR, RegState::Define);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(
+ (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
return true;
}
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),TII->get(ARM::MOVsi),
- MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
- .addReg(0);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
@@ -1241,18 +1248,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
if (!Thumb)
MIB.addImm(0);
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Thumb ? ARM::tBLXr : ARM::BLX));
if (Thumb)
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB.addReg(Reg, RegState::Kill);
} else {
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(Thumb ? ARM::tBL : ARM::BL));
if (Thumb)
- MIB.addImm(static_cast<unsigned>(ARMCC::AL)).addReg(0);
+ MIB.add(predOps(ARMCC::AL));
MIB.addExternalSymbol("__aeabi_read_tp", 0);
}
@@ -1268,15 +1275,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(NewLdOpc), DstReg)
- .addOperand(MI.getOperand(1)));
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
+ .add(MI.getOperand(1))
+ .add(predOps(ARMCC::AL));
MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(ARM::tPICADD))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addOperand(MI.getOperand(2));
+ MachineInstrBuilder MIB2 =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .add(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
return true;
@@ -1319,7 +1326,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, 4));
if (IsARM)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (IsPIC) {
MachineInstrBuilder MIB =
@@ -1329,7 +1336,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addImm(ARMPCLabelIndex);
if (IsARM)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
MI.eraseFromParent();
@@ -1368,7 +1375,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg).addImm(LabelId);
if (isARM) {
- AddDefaultPred(MIB3);
+ MIB3.add(predOps(ARMCC::AL));
if (Opcode == ARM::MOV_ga_pcrel_ldr)
MIB3->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
}
@@ -1388,9 +1395,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
.addReg(ARM::LR)
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
.addReg(ARM::CPSR, RegState::Undef);
TransferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
@@ -1407,11 +1414,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
// Copy the source register.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add the destination operands (D subregs).
unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
@@ -1438,11 +1445,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
// Copy the destination register.
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Copy the predicate operands.
- MIB.addOperand(MI.getOperand(OpIdx++));
- MIB.addOperand(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
+ MIB.add(MI.getOperand(OpIdx++));
// Add the source operands (D subregs).
unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index df4dcb375750..01e062bd185c 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
@@ -21,30 +22,61 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+
using namespace llvm;
namespace {
@@ -54,24 +86,22 @@ namespace {
enum {
RegBase,
FrameIndexBase
- } BaseType;
+ } BaseType = RegBase;
union {
unsigned Reg;
int FI;
} Base;
- int Offset;
+ int Offset = 0;
// Innocuous defaults for our address.
- Address()
- : BaseType(RegBase), Offset(0) {
- Base.Reg = 0;
- }
+ Address() {
+ Base.Reg = 0;
+ }
} Address;
class ARMFastISel final : public FastISel {
-
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
@@ -99,8 +129,9 @@ class ARMFastISel final : public FastISel {
Context = &funcInfo.Fn->getContext();
}
- // Code from FastISel.cpp.
private:
+ // Code from FastISel.cpp.
+
unsigned fastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill);
@@ -117,18 +148,18 @@ class ARMFastISel final : public FastISel {
uint64_t Imm);
// Backend specific FastISel code.
- private:
+
bool fastSelectInstruction(const Instruction *I) override;
unsigned fastMaterializeConstant(const Constant *C) override;
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) override;
bool fastLowerArguments() override;
- private:
+
#include "ARMGenFastISel.inc"
// Instruction selection routines.
- private:
+
bool SelectLoad(const Instruction *I);
bool SelectStore(const Instruction *I);
bool SelectBranch(const Instruction *I);
@@ -151,12 +182,12 @@ class ARMFastISel final : public FastISel {
bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
// Utility routines.
- private:
+
bool isPositionIndependent() const;
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt);
+ bool isZExt, bool isEquality);
bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment = 0, bool isZExt = true,
bool allocReg = true);
@@ -179,7 +210,7 @@ class ARMFastISel final : public FastISel {
const TargetLowering *getTargetLowering() { return &TLI; }
// Call handling routines.
- private:
+
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
bool Return,
bool isVarArg);
@@ -198,7 +229,7 @@ class ARMFastISel final : public FastISel {
bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
// OptionalDef handling routines.
- private:
+
bool isARMNEONPred(const MachineInstr *MI);
bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
@@ -256,17 +287,13 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
if (isARMNEONPred(MI))
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
// defines CPSR. All other OptionalDefines in ARM are the CCR register.
bool CPSR = false;
- if (DefinesOptionalPredicate(MI, &CPSR)) {
- if (CPSR)
- AddDefaultT1CC(MIB);
- else
- AddDefaultCC(MIB);
- }
+ if (DefinesOptionalPredicate(MI, &CPSR))
+ MIB.add(CPSR ? t1CondCodeOp() : condCodeOp());
return MIB;
}
@@ -434,7 +461,6 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
}
unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
-
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
return 0;
@@ -739,7 +765,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -971,7 +997,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
// Create the base instruction, then add the operands.
if (allocReg)
ResultReg = createResultReg(RC);
- assert (ResultReg > 255 && "Expected an allocated virtual register.");
+ assert(ResultReg > 255 && "Expected an allocated virtual register.");
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg);
AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
@@ -1216,7 +1242,6 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
// behavior.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
-
// Get the compare predicate.
// Try to take advantage of fallthrough opportunities.
CmpInst::Predicate Predicate = CI->getPredicate();
@@ -1231,7 +1256,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
@@ -1318,14 +1344,16 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
}
bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
- bool isZExt) {
+ bool isZExt, bool isEquality) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(DL, Ty, true);
if (!SrcEVT.isSimple()) return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
- if (isFloat && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ return false;
+
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
return false;
// Check to see if the 2nd operand is a constant that we can encode directly
@@ -1364,10 +1392,18 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
// TODO: Verify compares.
case MVT::f32:
isICmp = false;
- CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
+ else
+ CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
break;
case MVT::f64:
isICmp = false;
+ // Equality comparisons shouldn't raise Invalid on uordered inputs.
+ if (isEquality)
+ CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
+ else
CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
break;
case MVT::i1:
@@ -1444,7 +1480,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
if (ARMPred == ARMCC::AL) return false;
// Emit the compare.
- if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+ if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CI->isEquality()))
return false;
// Now set a register based on the comparison. Explicitly set the predicates
@@ -1466,7 +1503,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) {
bool ARMFastISel::SelectFPExt(const Instruction *I) {
// Make sure we have VFP and that we're extending float to double.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() ||
@@ -1485,7 +1522,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) {
bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
// Make sure we have VFP and that we're truncating double to float.
- if (!Subtarget->hasVFP2()) return false;
+ if (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()) return false;
Value *V = I->getOperand(0);
if (!(I->getType()->isFloatTy() &&
@@ -1536,7 +1573,8 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
unsigned Opc;
if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
- else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
+ else if (Ty->isDoubleTy() && !Subtarget->isFPOnlySP())
+ Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
else return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
@@ -1561,7 +1599,8 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
unsigned Opc;
Type *OpTy = I->getOperand(0)->getType();
if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
- else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
+ else if (OpTy->isDoubleTy() && !Subtarget->isFPOnlySP())
+ Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
else return false;
// f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
@@ -1596,7 +1635,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
bool UseImm = false;
bool isNegativeImm = false;
if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
- assert (VT == MVT::i32 && "Expecting an i32.");
+ assert(VT == MVT::i32 && "Expecting an i32.");
Imm = (int)ConstInt->getValue().getZExtValue();
if (Imm < 0) {
isNegativeImm = true;
@@ -1765,8 +1804,9 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
// if we have them.
// FIXME: It'd be nice to use NEON instructions.
Type *Ty = I->getType();
- bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
- if (isFloat && !Subtarget->hasVFP2())
+ if (Ty->isFloatTy() && !Subtarget->hasVFP2())
+ return false;
+ if (Ty->isDoubleTy() && (!Subtarget->hasVFP2() || Subtarget->isFPOnlySP()))
return false;
unsigned Opc;
@@ -1926,7 +1966,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case CCValAssign::SExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
- assert (Arg != 0 && "Failed to emit a sext");
+ assert(Arg != 0 && "Failed to emit a sext");
ArgVT = DestVT;
break;
}
@@ -1935,7 +1975,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
case CCValAssign::ZExt: {
MVT DestVT = VA.getLocVT();
Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
- assert (Arg != 0 && "Failed to emit a zext");
+ assert(Arg != 0 && "Failed to emit a zext");
ArgVT = DestVT;
break;
}
@@ -2230,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
DbgLoc, TII.get(CallOpc));
// BL / BLX don't take a predicate, but tBL / tBLX do.
if (isThumb2)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (Subtarget->genLongCalls())
MIB.addReg(CalleeReg);
else
@@ -2311,19 +2351,19 @@ bool ARMFastISel::SelectCall(const Instruction *I,
break;
ISD::ArgFlagsTy Flags;
- unsigned AttrInd = i - CS.arg_begin() + 1;
- if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ unsigned ArgIdx = i - CS.arg_begin();
+ if (CS.paramHasAttr(ArgIdx, Attribute::SExt))
Flags.setSExt();
- if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ if (CS.paramHasAttr(ArgIdx, Attribute::ZExt))
Flags.setZExt();
// FIXME: Only handle *easy* calls for now.
- if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
- CS.paramHasAttr(AttrInd, Attribute::SwiftSelf) ||
- CS.paramHasAttr(AttrInd, Attribute::SwiftError) ||
- CS.paramHasAttr(AttrInd, Attribute::Nest) ||
- CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ if (CS.paramHasAttr(ArgIdx, Attribute::InReg) ||
+ CS.paramHasAttr(ArgIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(ArgIdx, Attribute::SwiftSelf) ||
+ CS.paramHasAttr(ArgIdx, Attribute::SwiftError) ||
+ CS.paramHasAttr(ArgIdx, Attribute::Nest) ||
+ CS.paramHasAttr(ArgIdx, Attribute::ByVal))
return false;
Type *ArgTy = (*i)->getType();
@@ -2373,7 +2413,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
// ARM calls don't take a predicate, but tBL / tBLX do.
if(isThumb2)
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (UseReg)
MIB.addReg(CalleeReg);
else if (!IntrMemName)
@@ -2418,7 +2458,7 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
else if (Len >= 2)
VT = MVT::i16;
else {
- assert (Len == 1 && "Expected a length of 1!");
+ assert(Len == 1 && "Expected a length of 1!");
VT = MVT::i8;
}
} else {
@@ -2433,9 +2473,9 @@ bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
bool RV;
unsigned ResultReg;
RV = ARMEmitLoad(VT, ResultReg, Src);
- assert (RV == true && "Should be able to handle this load.");
+ assert(RV && "Should be able to handle this load.");
RV = ARMEmitStore(VT, ResultReg, Dest);
- assert (RV == true && "Should be able to handle this store.");
+ assert(RV && "Should be able to handle this store.");
(void)RV;
unsigned Size = VT.getSizeInBits()/8;
@@ -2687,9 +2727,11 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
if (setsCPSR)
MIB.addReg(ARM::CPSR, RegState::Define);
SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
- AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
+ MIB.addReg(SrcReg, isKill * RegState::Kill)
+ .addImm(ImmEnc)
+ .add(predOps(ARMCC::AL));
if (hasS)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
// Second instruction consumes the first's result.
SrcReg = ResultReg;
}
@@ -2779,7 +2821,6 @@ bool ARMFastISel::SelectShift(const Instruction *I,
// TODO: SoftFP support.
bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
-
switch (I->getOpcode()) {
case Instruction::Load:
return SelectLoad(I);
@@ -2849,6 +2890,7 @@ bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
}
namespace {
+
// This table describes sign- and zero-extend instructions which can be
// folded into a preceding load. All of these extends have an immediate
// (sometimes a mask and sometimes a shift) that's applied after
@@ -2865,7 +2907,8 @@ const struct FoldableLoadExtendsStruct {
{ { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 },
{ { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 }
};
-}
+
+} // end anonymous namespace
/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
@@ -2933,7 +2976,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
.addConstantPoolIndex(Idx);
if (Opc == ARM::LDRcp)
MIB.addImm(0);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Fix the address by adding pc.
unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -2944,7 +2987,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
.addReg(TempReg)
.addImm(ARMPCLabelIndex);
if (!Subtarget->isThumb())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
if (UseGOT_PREL && Subtarget->isThumb()) {
unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
@@ -3010,7 +3053,6 @@ bool ARMFastISel::fastLowerArguments() {
}
}
-
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -3035,6 +3077,7 @@ bool ARMFastISel::fastLowerArguments() {
}
namespace llvm {
+
FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
@@ -3042,4 +3085,5 @@ namespace llvm {
return nullptr;
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index 0c910ab6130f..8c0df4c2cbf9 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -19,10 +19,10 @@
namespace llvm {
template<typename InstrType> // could be MachineInstr or MCInst
-bool IsCPSRDead(InstrType *Instr);
+bool IsCPSRDead(const InstrType *Instr);
template<typename InstrType> // could be MachineInstr or MCInst
-inline bool isV8EligibleForIT(InstrType *Instr) {
+inline bool isV8EligibleForIT(const InstrType *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index c72db8aca108..37be22bed540 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -16,19 +16,49 @@
#include "ARMBaseRegisterInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+#include <vector>
#define DEBUG_TYPE "arm-frame-lowering"
@@ -180,6 +210,7 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF,
}
namespace {
+
struct StackAdjustingInsts {
struct InstInfo {
MachineBasicBlock::iterator I;
@@ -196,7 +227,8 @@ struct StackAdjustingInsts {
}
void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
- auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
+ auto Info =
+ llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
assert(Info != Insts.end() && "invalid sp adjusting instruction");
Info->SPAdjust += ExtraBytes;
}
@@ -219,7 +251,8 @@ struct StackAdjustingInsts {
}
}
};
-}
+
+} // end anonymous namespace
/// Emit an instruction sequence that will align the address in
/// register Reg by zero-ing out the lower bits. For versions of the
@@ -252,35 +285,40 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
// lsr Reg, Reg, log2(Alignment)
// lsl Reg, Reg, log2(Alignment)
if (CanUseBFC) {
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(~AlignMask));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask)
+ .add(predOps(ARMCC::AL));
} else if (AlignMask <= 255) {
- AddDefaultCC(
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(AlignMask)));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(AlignMask)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
} else {
assert(!MustBeSingleInstruction &&
"Shouldn't call emitAligningInstructions demanding a single "
"instruction to be emitted for large stack alignment for a target "
"without BFC.");
- AddDefaultCC(AddDefaultPred(
- BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))));
- AddDefaultCC(AddDefaultPred(
- BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
} else {
// Since this is only reached for Thumb-2 targets, the BFC instruction
// should always be available.
assert(CanUseBFC);
- AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(~AlignMask));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(~AlignMask)
+ .add(predOps(ARMCC::AL));
}
}
@@ -448,9 +486,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
uint32_t NumWords = NumBytes >> 2;
if (NumWords < 65536)
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
- .addImm(NumWords)
- .setMIFlags(MachineInstr::FrameSetup));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
else
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
.addImm(NumWords)
@@ -462,10 +501,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__chkstk")
- .addReg(ARM::R4, RegState::Implicit)
- .setMIFlags(MachineInstr::FrameSetup);
+ .add(predOps(ARMCC::AL))
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
case CodeModel::Large:
case CodeModel::JITDefault:
@@ -474,18 +513,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlags(MachineInstr::FrameSetup);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(ARM::R12, RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit)
- .setMIFlags(MachineInstr::FrameSetup);
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::R12, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
break;
}
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
- ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
NumBytes = 0;
}
@@ -657,12 +697,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// -- out lower bits in r4
// mov sp, r4
// FIXME: It will be better just to find spare register here.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
- .addReg(ARM::SP, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill)
+ .add(predOps(ARMCC::AL));
emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
false);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
- .addReg(ARM::R4, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
AFI->setShouldRestoreSPFromFP(true);
@@ -675,14 +717,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME: Clarify FrameSetup flags here.
if (RegInfo->hasBasePointer(MF)) {
if (isARM)
- BuildMI(MBB, MBBI, dl,
- TII.get(ARM::MOVr), RegInfo->getBaseRegister())
- .addReg(ARM::SP)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- RegInfo->getBaseRegister())
- .addReg(ARM::SP));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
}
// If the frame has variable sized objects then the epilogue must restore
@@ -757,19 +799,21 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"No scratch register to restore SP from FP!");
emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
ARMCC::AL, 0, TII);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(ARM::R4));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
}
} else {
// Thumb2 or ARM.
if (isARM)
BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(FramePtr));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL));
}
} else if (NumBytes &&
!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
@@ -829,7 +873,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
// When dynamically realigning the stack, use the frame pointer for
// parameters, and the stack/base pointer for locals.
if (RegInfo->needsStackRealignment(MF)) {
- assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+ assert(hasFP(MF) && "dynamic stack realignment without a FP!");
if (isFixed) {
FrameReg = RegInfo->getFrameRegister(MF);
Offset = FPOffset;
@@ -936,18 +980,19 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
});
if (Regs.size() > 1 || StrOpc== 0) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
- .addReg(ARM::SP).setMIFlags(MIFlags));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+ .addReg(ARM::SP)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
} else if (Regs.size() == 1) {
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
- ARM::SP)
- .addReg(Regs[0].first, getKillRegState(Regs[0].second))
- .addReg(ARM::SP).setMIFlags(MIFlags)
- .addImm(-4);
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
+ .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(ARM::SP)
+ .setMIFlags(MIFlags)
+ .addImm(-4)
+ .add(predOps(ARMCC::AL));
}
Regs.clear();
@@ -1027,9 +1072,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
});
if (Regs.size() > 1 || LdrOpc == 0) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
- .addReg(ARM::SP));
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
MIB.addReg(Regs[i], getDefRegState(true));
if (DeleteRet && MI != MBB.end()) {
@@ -1053,7 +1098,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
} else
MIB.addImm(4);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
Regs.clear();
@@ -1114,9 +1159,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// sub r4, sp, #numregs * 8
// The immediate is <= 64, so it doesn't need any special encoding.
unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addReg(ARM::SP)
- .addImm(8 * NumAlignedDPRCS2Regs)));
+ BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addReg(ARM::SP)
+ .addImm(8 * NumAlignedDPRCS2Regs)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
// We must set parameter MustBeSingleInstruction to true, since
@@ -1132,10 +1179,10 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// Leave r4 live, it is used below.
Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
- .addReg(ARM::R4);
- MIB = AddDefaultPred(MIB);
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
if (!isThumb)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
// Now spill NumAlignedDPRCS2Regs registers starting from d8.
// r4 holds the stack slot address.
@@ -1147,11 +1194,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
- ARM::R4)
- .addReg(ARM::R4, RegState::Kill).addImm(16)
- .addReg(NextReg)
- .addReg(SupReg, RegState::ImplicitKill));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1165,9 +1213,12 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
- .addReg(ARM::R4).addImm(16).addReg(NextReg)
- .addReg(SupReg, RegState::ImplicitKill));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(NextReg)
+ .addReg(SupReg, RegState::ImplicitKill)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1177,8 +1228,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QPRRegClass);
MBB.addLiveIn(SupReg);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
- .addReg(ARM::R4).addImm(16).addReg(SupReg));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(SupReg)
+ .add(predOps(ARMCC::AL));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}
@@ -1187,9 +1241,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs) {
MBB.addLiveIn(NextReg);
// vstr.64 uses addrmode5 which has an offset scale of 4.
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
- .addReg(NextReg)
- .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+ .addReg(NextReg)
+ .addReg(ARM::R4)
+ .addImm((NextReg - R4BaseReg) * 2)
+ .add(predOps(ARMCC::AL));
}
// The last spill instruction inserted should kill the scratch register r4.
@@ -1254,8 +1310,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
- .addFrameIndex(D8SpillFI).addImm(0)));
+ BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+ .addFrameIndex(D8SpillFI)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
// Now restore NumAlignedDPRCS2Regs registers starting from d8.
unsigned NextReg = ARM::D8;
@@ -1264,10 +1323,12 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 6) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
- .addReg(ARM::R4, RegState::Define)
- .addReg(ARM::R4, RegState::Kill).addImm(16)
- .addReg(SupReg, RegState::ImplicitDefine));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+ .addReg(ARM::R4, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1280,9 +1341,11 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 4) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QQPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
- .addReg(ARM::R4).addImm(16)
- .addReg(SupReg, RegState::ImplicitDefine));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+ .addReg(ARM::R4)
+ .addImm(16)
+ .addReg(SupReg, RegState::ImplicitDefine)
+ .add(predOps(ARMCC::AL));
NextReg += 4;
NumAlignedDPRCS2Regs -= 4;
}
@@ -1291,16 +1354,20 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
if (NumAlignedDPRCS2Regs >= 2) {
unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
&ARM::QPRRegClass);
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
- .addReg(ARM::R4).addImm(16));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+ .addReg(ARM::R4)
+ .addImm(16)
+ .add(predOps(ARMCC::AL));
NextReg += 2;
NumAlignedDPRCS2Regs -= 2;
}
// Finally, use a vanilla vldr.64 for the remaining odd register.
if (NumAlignedDPRCS2Regs)
- AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
- .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+ BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+ .addReg(ARM::R4)
+ .addImm(2 * (NextReg - R4BaseReg))
+ .add(predOps(ARMCC::AL));
// Last store kills r4.
std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
@@ -1633,13 +1700,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
// worth the effort and added fragility?
unsigned EstimatedStackSize =
MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
- if (hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
if (AFI->hasStackFrame())
EstimatedStackSize += 4;
} else {
// If FP is not used, SP will be used to access arguments, so count the
// size of arguments into the estimation.
- EstimatedStackSize += MF.getInfo<ARMFunctionInfo>()->getArgumentStackSize();
+ EstimatedStackSize += AFI->getArgumentStackSize();
}
EstimatedStackSize += 16; // For possible paddings.
@@ -1650,7 +1718,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
- if (hasFP(MF)) {
+ if (HasFP) {
SavedRegs.set(FramePtr);
// If the frame pointer is required by the ABI, also spill LR so that we
// emit a complete frame record.
@@ -1658,11 +1726,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(ARM::LR);
LRSpilled = true;
NumGPRSpills++;
- auto LRPos = find(UnspilledCS1GPRs, ARM::LR);
+ auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
}
- auto FPPos = find(UnspilledCS1GPRs, FramePtr);
+ auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
if (FPPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(FPPos);
NumGPRSpills++;
@@ -1721,7 +1789,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
// r7 can be used if it is not being used as the frame pointer.
- if (!hasFP(MF)) {
+ if (!HasFP) {
if (SavedRegs.test(ARM::R7)) {
--RegDeficit;
DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
@@ -1773,7 +1841,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
NumGPRSpills++;
CS1Spilled = true;
ExtraCSSpill = true;
- UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg));
+ UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
if (Reg == ARM::LR)
LRSpilled = true;
}
@@ -1786,7 +1854,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
SavedRegs.set(ARM::LR);
NumGPRSpills++;
SmallVectorImpl<unsigned>::iterator LRPos;
- LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR);
+ LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
if (LRPos != UnspilledCS1GPRs.end())
UnspilledCS1GPRs.erase(LRPos);
@@ -2081,12 +2149,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// SR1: Scratch Register #1
// push {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)))
- .addReg(ScratchReg0).addReg(ScratchReg1);
+ BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
- .addReg(ARM::SP, RegState::Define).addReg(ARM::SP))
- .addReg(ScratchReg0).addReg(ScratchReg1);
+ BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Emit the relevant DWARF information about the change in stack pointer as
@@ -2106,21 +2179,29 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// mov SR1, sp
if (Thumb) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
- .addReg(ARM::SP));
+ BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
} else if (CompareStackPointer) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
- .addReg(ARM::SP)).addReg(0);
+ BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// sub SR1, sp, #StackSize
if (!CompareStackPointer && Thumb) {
- AddDefaultPred(
- AddDefaultCC(BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1))
- .addReg(ScratchReg1).addImm(AlignedStackSize));
+ BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
+ .add(condCodeOp())
+ .addReg(ScratchReg1)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
} else if (!CompareStackPointer) {
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
- .addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
+ BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
+ .addReg(ARM::SP)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
if (Thumb && ST->isThumb1Only()) {
@@ -2131,21 +2212,25 @@ void ARMFrameLowering::adjustForSegmentedStacks(
unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
// ldr SR0, [pc, offset(STACK_LIMIT)]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
- .addConstantPoolIndex(CPI));
+ BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
+ .addConstantPoolIndex(CPI)
+ .add(predOps(ARMCC::AL));
// ldr SR0, [SR0]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
- .addReg(ScratchReg0).addImm(0));
+ BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
+ .addReg(ScratchReg0)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
} else {
// Get TLS base address from the coprocessor
// mrc p15, #0, SR0, c13, c0, #3
- AddDefaultPred(BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
- .addImm(15)
- .addImm(0)
- .addImm(13)
- .addImm(0)
- .addImm(3));
+ BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0)
+ .addImm(15)
+ .addImm(0)
+ .addImm(13)
+ .addImm(0)
+ .addImm(3)
+ .add(predOps(ARMCC::AL));
// Use the last tls slot on android and a private field of the TCP on linux.
assert(ST->isTargetAndroid() || ST->isTargetLinux());
@@ -2153,16 +2238,19 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Get the stack limit from the right offset
// ldr SR0, [sr0, #4 * TlsOffset]
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
- .addReg(ScratchReg0).addImm(4 * TlsOffset));
+ BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
+ .addReg(ScratchReg0)
+ .addImm(4 * TlsOffset)
+ .add(predOps(ARMCC::AL));
}
// Compare stack limit with stack size requested.
// cmp SR0, SR1
Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
- AddDefaultPred(BuildMI(GetMBB, DL, TII.get(Opcode))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1));
+ BuildMI(GetMBB, DL, TII.get(Opcode))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1)
+ .add(predOps(ARMCC::AL));
// This jump is taken if StackLimit < SP - stack required.
Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
@@ -2178,32 +2266,40 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Pass first argument for the __morestack by Scratch Register #0.
// The amount size of stack required
if (Thumb) {
- AddDefaultPred(AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8),
- ScratchReg0)).addImm(AlignedStackSize));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
+ .add(condCodeOp())
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
- .addImm(AlignedStackSize)).addReg(0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
+ .addImm(AlignedStackSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// Pass second argument for the __morestack by Scratch Register #1.
// The amount size of stack consumed to save function arguments.
if (Thumb) {
- AddDefaultPred(
- AddDefaultCC(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1))
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
+ .add(condCodeOp())
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
- .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())))
- .addReg(0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
+ .addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
}
// push {lr} - Save return address of this function.
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH)))
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
.addReg(ARM::LR);
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
+ BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
.addReg(ARM::LR);
}
@@ -2220,7 +2316,8 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// Call __morestack().
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tBL)))
+ BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
+ .add(predOps(ARMCC::AL))
.addExternalSymbol("__morestack");
} else {
BuildMI(AllocMBB, DL, TII.get(ARM::BL))
@@ -2230,22 +2327,26 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// pop {lr} - Restore return address of this original function.
if (Thumb) {
if (ST->isThumb1Only()) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0);
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
- .addReg(ScratchReg0));
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0);
+ BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
+ .addReg(ScratchReg0)
+ .add(predOps(ARMCC::AL));
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
- .addReg(ARM::LR, RegState::Define)
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP)
- .addImm(4));
+ BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .addImm(4)
+ .add(predOps(ARMCC::AL));
}
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ARM::LR);
+ BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::LR);
}
// Restore SR0 and SR1 in case of __morestack() was called.
@@ -2253,15 +2354,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// scratch registers from here.
// pop {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Update the CFA offset now that we've popped
@@ -2271,20 +2374,22 @@ void ARMFrameLowering::adjustForSegmentedStacks(
// bx lr - Return from this function.
Opcode = Thumb ? ARM::tBX_RET : ARM::BX_RET;
- AddDefaultPred(BuildMI(AllocMBB, DL, TII.get(Opcode)));
+ BuildMI(AllocMBB, DL, TII.get(Opcode)).add(predOps(ARMCC::AL));
// Restore SR0 and SR1 in case of __morestack() was not called.
// pop {SR0, SR1}
if (Thumb) {
- AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
} else {
- AddDefaultPred(BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
- .addReg(ARM::SP, RegState::Define)
- .addReg(ARM::SP))
- .addReg(ScratchReg0)
- .addReg(ScratchReg1);
+ BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL))
+ .addReg(ScratchReg0)
+ .addReg(ScratchReg1);
}
// Update the CFA offset now that we've popped
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c3e9591d5c70..b07b4e1f5cfb 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -244,11 +244,8 @@ private:
bool tryInlineAsm(SDNode *N);
- void SelectConcatVector(SDNode *N);
void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
- bool trySMLAWSMULW(SDNode *N);
-
void SelectCMP_SWAP(SDNode *N);
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
@@ -2559,141 +2556,6 @@ bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
return false;
}
-static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
- bool Accumulate) {
- // For SM*WB, we need to some form of sext.
- // For SM*WT, we need to search for (sra X, 16)
- // Src1 then gets set to X.
- if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
- SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
- SignExt.getOpcode() == ISD::AssertSext) &&
- SignExt.getValueType() == MVT::i32) {
-
- *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
- Src1 = SignExt.getOperand(0);
- return true;
- }
-
- if (SignExt.getOpcode() != ISD::SRA)
- return false;
-
- ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
- if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
- return false;
-
- SDValue Op0 = SignExt.getOperand(0);
-
- // The sign extend operand for SM*WB could be generated by a shl and ashr.
- if (Op0.getOpcode() == ISD::SHL) {
- SDValue SHL = Op0;
- ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
- if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
- return false;
-
- *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
- Src1 = Op0.getOperand(0);
- return true;
- }
- *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
- Src1 = SignExt.getOperand(0);
- return true;
-}
-
-static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
- SDValue &Src1, bool Accumulate) {
- // First we look for:
- // (add (or (srl ?, 16), (shl ?, 16)))
- if (OR.getOpcode() != ISD::OR)
- return false;
-
- SDValue SRL = OR.getOperand(0);
- SDValue SHL = OR.getOperand(1);
-
- if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
- SRL = OR.getOperand(1);
- SHL = OR.getOperand(0);
- if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
- return false;
- }
-
- ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
- ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
- if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
- SHLSrc1->getZExtValue() != 16)
- return false;
-
- // The first operands to the shifts need to be the two results from the
- // same smul_lohi node.
- if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
- SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
- return false;
-
- SDNode *SMULLOHI = SRL.getOperand(0).getNode();
- if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
- SHL.getOperand(0) != SDValue(SMULLOHI, 1))
- return false;
-
- // Now we have:
- // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
- // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
- // For SMLAWB the 16-bit value will signed extended somehow.
- // For SMLAWT only the SRA is required.
-
- // Check both sides of SMUL_LOHI
- if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
- Src0 = SMULLOHI->getOperand(1);
- } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
- Accumulate)) {
- Src0 = SMULLOHI->getOperand(0);
- } else {
- return false;
- }
- return true;
-}
-
-bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
- if (!Subtarget->hasV6Ops() ||
- (Subtarget->isThumb() && !Subtarget->hasThumb2()))
- return false;
-
- SDLoc dl(N);
- SDValue Src0 = N->getOperand(0);
- SDValue Src1 = N->getOperand(1);
- SDValue A, B;
- unsigned Opc = 0;
-
- if (N->getOpcode() == ISD::ADD) {
- if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
- return false;
-
- SDValue Acc;
- if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
- Acc = Src1;
- } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
- Acc = Src0;
- } else {
- return false;
- }
- if (Opc == 0)
- return false;
-
- SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32) };
- CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
- return true;
- } else if (N->getOpcode() == ISD::OR &&
- SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
- if (Opc == 0)
- return false;
-
- SDValue Ops[] = { A, B, getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32)};
- CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
- return true;
- }
- return false;
-}
-
/// We've got special pseudo-instructions for these
void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
unsigned Opcode;
@@ -2722,15 +2584,6 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
- // The only time a CONCAT_VECTORS operation can have legal types is when
- // two 64-bit vectors are concatenated to a 128-bit vector.
- EVT VT = N->getValueType(0);
- if (!VT.is128BitVector() || N->getNumOperands() != 2)
- llvm_unreachable("unexpected CONCAT_VECTORS");
- ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
-}
-
static Optional<std::pair<unsigned, unsigned>>
getContiguousRangeOfSetBits(const APInt &A) {
unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
@@ -2822,11 +2675,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD:
- case ISD::OR:
- if (trySMLAWSMULW(N))
- return;
- break;
case ISD::WRITE_REGISTER:
if (tryWriteRegister(N))
return;
@@ -3042,49 +2890,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
- case ARMISD::VMOVRRD:
- ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
- N->getOperand(0), getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32)));
- return;
- case ISD::UMUL_LOHI: {
- if (Subtarget->isThumb1Only())
- break;
- if (Subtarget->isThumb()) {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(
- N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
- return;
- } else {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(N, CurDAG->getMachineNode(
- Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
- MVT::i32, MVT::i32, Ops));
- return;
- }
- }
- case ISD::SMUL_LOHI: {
- if (Subtarget->isThumb1Only())
- break;
- if (Subtarget->isThumb()) {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(
- N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
- return;
- } else {
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- ReplaceNode(N, CurDAG->getMachineNode(
- Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
- MVT::i32, MVT::i32, Ops));
- return;
- }
- }
case ARMISD::UMAAL: {
unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
@@ -3095,38 +2900,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
case ARMISD::UMLAL:{
- // UMAAL is similar to UMLAL but it adds two 32-bit values to the
- // 64-bit multiplication result.
- if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
- N->getOperand(2).getOpcode() == ARMISD::ADDC &&
- N->getOperand(3).getOpcode() == ARMISD::ADDE) {
-
- SDValue Addc = N->getOperand(2);
- SDValue Adde = N->getOperand(3);
-
- if (Adde.getOperand(2).getNode() == Addc.getNode()) {
-
- ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
- ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
-
- if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
- {
- // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
- // RdLo = one operand to be added, lower 32-bits of res
- // RdHi = other operand to be added, upper 32-bits of res
- // Rn = first multiply operand
- // Rm = second multiply operand
- SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- Addc.getOperand(0), Addc.getOperand(1),
- getAL(CurDAG, dl),
- CurDAG->getRegister(0, MVT::i32) };
- unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
- CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
- return;
- }
- }
- }
-
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG, dl),
@@ -3277,26 +3050,23 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
int64_t Addend = -C->getSExtValue();
SDNode *Add = nullptr;
- // In T2 mode, ADDS can be better than CMN if the immediate fits in a
+ // ADDS can be better than CMN if the immediate fits in a
// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
// Outside that range we can just use a CMN which is 32-bit but has a
// 12-bit immediate range.
- if (Subtarget->isThumb2() && Addend < 1<<8) {
- SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
- } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
- // FIXME: Add T1 tADDi8 code.
- SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
- CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
- Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
- } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
- SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
- CurDAG->getTargetConstant(Addend, dl, MVT::i32),
- getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
- Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
+ if (Addend < 1<<8) {
+ if (Subtarget->isThumb2()) {
+ SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+ CurDAG->getRegister(0, MVT::i32) };
+ Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
+ } else {
+ unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
+ SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
+ CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+ getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+ Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
+ }
}
if (Add) {
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
@@ -4013,10 +3783,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
- case ISD::CONCAT_VECTORS:
- SelectConcatVector(N);
- return;
-
case ISD::ATOMIC_CMP_SWAP:
SelectCMP_SWAP(N);
return;
@@ -4123,11 +3889,10 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
// The flags here are common to those allowed for apsr in the A class cores and
// those allowed for the special registers in the M class cores. Returns a
// value representing which flags were present, -1 if invalid.
-static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
- if (Flags.empty())
- return 0x2 | (int)hasDSP;
-
+static inline int getMClassFlagsMask(StringRef Flags) {
return StringSwitch<int>(Flags)
+ .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
+ // correct when flags are not permitted
.Case("g", 0x1)
.Case("nzcvq", 0x2)
.Case("nzcvqg", 0x3)
@@ -4170,7 +3935,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
}
// We know we are now handling a write so need to get the mask for the flags.
- int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
+ int Mask = getMClassFlagsMask(Flags);
// Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
// shouldn't have flags present.
@@ -4185,10 +3950,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
// The register was valid so need to put the mask in the correct place
// (the flags need to be in bits 11-10) and combine with the SYSmvalue to
// construct the operand for the instruction node.
- if (SYSmvalue < 0x4)
- return SYSmvalue | Mask << 10;
-
- return SYSmvalue;
+ return SYSmvalue | Mask << 10;
}
static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
@@ -4201,7 +3963,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// The flags permitted for apsr are the same flags that are allowed in
// M class registers. We get the flag value and then shift the flags into
// the correct place to combine with the mask.
- Mask = getMClassFlagsMask(Flags, true);
+ Mask = getMClassFlagsMask(Flags);
if (Mask == -1)
return -1;
return Mask << 2;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0f84a2359160..e697c8ca5339 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,47 +12,101 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMISelLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
-#include "ARMTargetMachine.h"
-#include "ARMTargetObjectFile.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <tuple>
+#include <string>
#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "arm-isel"
@@ -82,21 +136,6 @@ static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
cl::desc("Maximum size of ALL constants to promote into a constant pool"),
cl::init(128));
-namespace {
- class ARMCCState : public CCState {
- public:
- ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
- ParmContext PC)
- : CCState(CC, isVarArg, MF, locs, C) {
- assert(((PC == Call) || (PC == Prologue)) &&
- "ARMCCState users must specify whether their context is call"
- "or prologue generation.");
- CallOrPrologue = PC;
- }
- };
-}
-
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -685,10 +724,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
}
}
- // ARM and Thumb2 support UMLAL/SMLAL.
- if (!Subtarget->isThumb1Only())
- setTargetDAGCombine(ISD::ADDC);
-
if (Subtarget->isFPOnlySP()) {
// When targeting a floating-point unit with only single-precision
// operations, f64 is legal for the few double-precision instructions which
@@ -787,13 +822,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
- if (!Subtarget->isThumb1Only()) {
- // FIXME: We should do this for Thumb1 as well.
- setOperationAction(ISD::ADDC, MVT::i32, Custom);
- setOperationAction(ISD::ADDE, MVT::i32, Custom);
- setOperationAction(ISD::SUBC, MVT::i32, Custom);
- setOperationAction(ISD::SUBE, MVT::i32, Custom);
- }
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@@ -1305,6 +1337,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
+ case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
+ case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
+ case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
+ case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
+ case ARMISD::SMULWB: return "ARMISD::SMULWB";
+ case ARMISD::SMULWT: return "ARMISD::SMULWT";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
@@ -1414,6 +1452,40 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Lowering Code
//===----------------------------------------------------------------------===//
+static bool isSRL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSRA16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRA)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSHL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SHL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+// Check for a signed 16-bit value. We special case SRA because it makes it
+// more simple when also looking for SRAs that aren't sign extending a
+// smaller value. Without the check, we'd need to take extra care with
+// checking order for some operations.
+static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
+ if (isSRA16(Op))
+ return isSHL16(Op.getOperand(0));
+ return DAG.ComputeNumSignBits(Op) == 17;
+}
+
/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
switch (CC) {
@@ -1433,22 +1505,34 @@ static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
- ARMCC::CondCodes &CondCode2) {
+ ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
CondCode2 = ARMCC::AL;
+ InvalidOnQNaN = true;
switch (CC) {
default: llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
- case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+ case ISD::SETOEQ:
+ CondCode = ARMCC::EQ;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETGT:
case ISD::SETOGT: CondCode = ARMCC::GT; break;
case ISD::SETGE:
case ISD::SETOGE: CondCode = ARMCC::GE; break;
case ISD::SETOLT: CondCode = ARMCC::MI; break;
case ISD::SETOLE: CondCode = ARMCC::LS; break;
- case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+ case ISD::SETONE:
+ CondCode = ARMCC::MI;
+ CondCode2 = ARMCC::GT;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETO: CondCode = ARMCC::VC; break;
case ISD::SETUO: CondCode = ARMCC::VS; break;
- case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+ case ISD::SETUEQ:
+ CondCode = ARMCC::EQ;
+ CondCode2 = ARMCC::VS;
+ InvalidOnQNaN = false;
+ break;
case ISD::SETUGT: CondCode = ARMCC::HI; break;
case ISD::SETUGE: CondCode = ARMCC::PL; break;
case ISD::SETLT:
@@ -1456,7 +1540,10 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
case ISD::SETLE:
case ISD::SETULE: CondCode = ARMCC::LE; break;
case ISD::SETNE:
- case ISD::SETUNE: CondCode = ARMCC::NE; break;
+ case ISD::SETUNE:
+ CondCode = ARMCC::NE;
+ InvalidOnQNaN = false;
+ break;
}
}
@@ -1549,8 +1636,8 @@ SDValue ARMTargetLowering::LowerCallResult(
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
// Copy all of the result registers out of their specified physreg.
@@ -1710,8 +1797,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
@@ -2088,10 +2175,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// this.
void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
unsigned Align) const {
- assert((State->getCallOrPrologue() == Prologue ||
- State->getCallOrPrologue() == Call) &&
- "unhandled ParmContext");
-
// Byval (as with any stack) slots are always at least 4 byte aligned.
Align = std::max(Align, 4U);
@@ -2148,7 +2231,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueSizeInBits() / 8;
- int FI = INT_MAX;
+ int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!TargetRegisterInfo::isVirtualRegister(VR))
@@ -2178,7 +2261,7 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
} else
return false;
- assert(FI != INT_MAX);
+ assert(FI != std::numeric_limits<int>::max());
if (!MFI.isFixedObjectIndex(FI))
return false;
return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
@@ -2260,7 +2343,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Check if stack adjustment is needed. For now, do not do this if any
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
+ CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
if (CCInfo.getNextStackOffset()) {
// Check if the arguments are already laid out in the right way as
@@ -2362,8 +2445,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slots.
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext(), Call);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
@@ -2790,9 +2873,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
// FIXME: is there useful debug info available here?
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2935,7 +3018,7 @@ static bool isSimpleType(Type *T) {
}
static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
- EVT PtrVT, SDLoc dl) {
+ EVT PtrVT, const SDLoc &dl) {
// If we're creating a pool entry for a constant global with unnamed address,
// and the global is small enough, we can emit it inline into the constant pool
// to save ourselves an indirection.
@@ -2980,7 +3063,8 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
unsigned RequiredPadding = 4 - (Size % 4);
bool PaddingPossible =
RequiredPadding == 4 || (CDAInit && CDAInit->isString());
- if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize)
+ if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
+ Size == 0)
return SDValue();
unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
@@ -3080,15 +3164,22 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
return Result;
} else if (Subtarget->isRWPI() && !IsRO) {
// SB-relative.
- ARMConstantPoolValue *CPV =
- ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
- SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
- CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
- SDValue G = DAG.getLoad(
- PtrVT, dl, DAG.getEntryNode(), CPAddr,
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ SDValue RelAddr;
+ if (Subtarget->useMovt(DAG.getMachineFunction())) {
+ ++NumMovwMovt;
+ SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
+ RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
+ } else { // use literal pool for address constant
+ ARMConstantPoolValue *CPV =
+ ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
+ SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+ CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ RelAddr = DAG.getLoad(
+ PtrVT, dl, DAG.getEntryNode(), CPAddr,
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+ }
SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
- SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G);
+ SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
return Result;
}
@@ -3462,8 +3553,8 @@ SDValue ARMTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), Prologue);
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
SmallVector<SDValue, 16> ArgValues;
@@ -3595,7 +3686,6 @@ SDValue ARMTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
-
// sanity check
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
@@ -3734,13 +3824,15 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, const SDLoc &dl) const {
+ SelectionDAG &DAG, const SDLoc &dl,
+ bool InvalidOnQNaN) const {
assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64);
SDValue Cmp;
+ SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+ Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
else
- Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+ Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
}
@@ -3757,10 +3849,12 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
Cmp = Cmp.getOperand(0);
Opc = Cmp.getOpcode();
if (Opc == ARMISD::CMPFP)
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
+ Cmp.getOperand(1), Cmp.getOperand(2));
else {
assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+ Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
+ Cmp.getOperand(1));
}
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
@@ -3808,7 +3902,6 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
return std::make_pair(Value, OverflowCmp);
}
-
SDValue
ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Let legalize expand this if it isn't a legal type yet.
@@ -3832,7 +3925,6 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
-
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
@@ -4025,7 +4117,6 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
// Additionally, the variable is returned in parameter V and the constant in K.
static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
uint64_t &K) {
-
SDValue LHS1 = Op.getOperand(0);
SDValue RHS1 = Op.getOperand(1);
SDValue TrueVal1 = Op.getOperand(2);
@@ -4046,10 +4137,10 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
// in each conditional
SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
? &RHS1
- : NULL;
+ : nullptr;
SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
? &RHS2
- : NULL;
+ : nullptr;
SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
@@ -4073,13 +4164,15 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
const SDValue *LowerCheckOp =
isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
- : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
- : NULL;
+ : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
+ ? &Op2
+ : nullptr;
const SDValue *UpperCheckOp =
isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
? &Op
- : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2) ? &Op2
- : NULL;
+ : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
+ ? &Op2
+ : nullptr;
if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
return false;
@@ -4104,7 +4197,6 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
}
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
-
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -4162,7 +4254,8 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
// Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
@@ -4181,13 +4274,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
}
return Result;
@@ -4348,10 +4441,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
}
ARMCC::CondCodes CondCode, CondCode2;
- FPCCToARMCC(CC, CondCode, CondCode2);
+ bool InvalidOnQNaN;
+ FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
@@ -4853,9 +4947,10 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
- SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
- DAG.getConstant(Intrinsic::arm_get_fpscr, dl,
- MVT::i32));
+ SDValue Ops[] = { DAG.getEntryNode(),
+ DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
+
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
@@ -5584,7 +5679,6 @@ static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
return true;
}
-
static bool isVEXTMask(ArrayRef<int> M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
@@ -6027,10 +6121,10 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (ValueCounts.size() != 1)
usesOnlyOneValue = false;
- if (!Value.getNode() && ValueCounts.size() > 0)
+ if (!Value.getNode() && !ValueCounts.empty())
Value = ValueCounts.begin()->first;
- if (ValueCounts.size() == 0)
+ if (ValueCounts.empty())
return DAG.getUNDEF(VT);
// Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
@@ -6182,8 +6276,8 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
struct ShuffleSourceInfo {
SDValue Vec;
- unsigned MinElt;
- unsigned MaxElt;
+ unsigned MinElt = std::numeric_limits<unsigned>::max();
+ unsigned MaxElt = 0;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
@@ -6192,13 +6286,12 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
- int WindowBase;
- int WindowScale;
+ int WindowBase = 0;
+ int WindowScale = 1;
+
+ ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
- ShuffleSourceInfo(SDValue Vec)
- : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
- WindowScale(1) {}
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
@@ -6220,7 +6313,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
- auto Source = find(Sources, SourceVec);
+ auto Source = llvm::find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
@@ -6336,7 +6429,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
if (Entry.isUndef())
continue;
- auto Src = find(Sources, Entry.getOperand(0));
+ auto Src = llvm::find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
@@ -6633,7 +6726,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT SubVT = SubV1.getValueType();
// We expect these to have been canonicalized to -1.
- assert(all_of(ShuffleMask, [&](int i) {
+ assert(llvm::all_of(ShuffleMask, [&](int i) {
return i < (int)VT.getVectorNumElements();
}) && "Unexpected shuffle index into UNDEF operand!");
@@ -6896,8 +6989,19 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
N->getValueType(0),
N->getOpcode());
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
- return SkipLoadExtensionForVMULL(LD, DAG);
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
+ "Expected extending load");
+
+ SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
+ unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue extLoad =
+ DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
+
+ return newLoad;
+ }
// Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
// have been legalized as a BITCAST from v4i32.
@@ -7258,9 +7362,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
ArgListEntry Entry;
Entry.Node = SRet;
Entry.Ty = RetTy->getPointerTo();
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isSRet = true;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsSRet = true;
Args.push_back(Entry);
RetTy = Type::getVoidTy(*DAG.getContext());
}
@@ -7268,8 +7372,8 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
const char *LibcallName =
@@ -7480,12 +7584,12 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
Entry.Node = Val;
Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isZExt = true;
+ Entry.IsZExt = true;
Args.push_back(Entry);
Entry.Node = Exponent;
Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isZExt = true;
+ Entry.IsZExt = true;
Args.push_back(Entry);
Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
@@ -7702,24 +7806,27 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// add r5, pc
// str r5, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
// Set the low bit because of thumb mode.
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
- .addReg(NewVReg1, RegState::Kill)
- .addImm(0x01)));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(0x01)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
.addReg(NewVReg2, RegState::Kill)
.addImm(PCLabelId);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
- .addReg(NewVReg3, RegState::Kill)
- .addFrameIndex(FI)
- .addImm(36) // &jbuf[1] :: pc
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+ .addReg(NewVReg3, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
} else if (isThumb) {
// Incoming value: jbuf
// ldr.n r1, LCPI1_4
@@ -7729,51 +7836,58 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
// add r2, $jbuf, #+4 ; &jbuf[1]
// str r1, [r2]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
.addReg(NewVReg1, RegState::Kill)
.addImm(PCLabelId);
// Set the low bit because of thumb mode.
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
- .addReg(ARM::CPSR, RegState::Define)
- .addImm(1));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3, RegState::Kill));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3, RegState::Kill)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
.addFrameIndex(FI)
.addImm(36); // &jbuf[1] :: pc
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
- .addReg(NewVReg4, RegState::Kill)
- .addReg(NewVReg5, RegState::Kill)
- .addImm(0)
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+ .addReg(NewVReg4, RegState::Kill)
+ .addReg(NewVReg5, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
} else {
// Incoming value: jbuf
// ldr r1, LCPI1_1
// add r1, pc, r1
// str r1, [$jbuf, #+4] ; &jbuf[1]
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
- .addConstantPoolIndex(CPI)
- .addImm(0)
- .addMemOperand(CPMMO));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addConstantPoolIndex(CPI)
+ .addImm(0)
+ .addMemOperand(CPMMO)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
- .addReg(NewVReg1, RegState::Kill)
- .addImm(PCLabelId));
- AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
- .addReg(NewVReg2, RegState::Kill)
- .addFrameIndex(FI)
- .addImm(36) // &jbuf[1] :: pc
- .addMemOperand(FIMMOSt));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+ .addReg(NewVReg1, RegState::Kill)
+ .addImm(PCLabelId)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+ .addReg(NewVReg2, RegState::Kill)
+ .addFrameIndex(FI)
+ .addImm(36) // &jbuf[1] :: pc
+ .addMemOperand(FIMMOSt)
+ .add(predOps(ARMCC::AL));
}
}
@@ -7791,7 +7905,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Get a mapping of the call site numbers to all of the landing pads they're
// associated with.
- DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+ DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2>> CallSiteNumToLPad;
unsigned MaxCSNum = 0;
for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
++BB) {
@@ -7886,31 +8000,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned NumLPads = LPadList.size();
if (Subtarget->isThumb2()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
- .addReg(NewVReg1)
- .addImm(LPadList.size()));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+ .addReg(NewVReg1)
+ .addImm(LPadList.size())
+ .add(predOps(ARMCC::AL));
} else {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
- .addImm(NumLPads & 0xFFFF));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF)
+ .add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
- .addReg(VReg1)
- .addImm(NumLPads >> 16));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16)
+ .add(predOps(ARMCC::AL));
}
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg2));
+ BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
@@ -7919,16 +8038,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+ BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
.addReg(NewVReg3, RegState::Kill)
.addReg(NewVReg1)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
.addReg(NewVReg4, RegState::Kill)
@@ -7936,15 +8056,17 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addJumpTableIndex(MJTI);
} else if (Subtarget->isThumb()) {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
- .addFrameIndex(FI)
- .addImm(1)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
- .addReg(NewVReg1)
- .addImm(NumLPads));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+ .addReg(NewVReg1)
+ .addImm(NumLPads)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -7957,12 +8079,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
- .addReg(VReg1, RegState::Define)
- .addConstantPoolIndex(Idx));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
- .addReg(NewVReg1)
- .addReg(VReg1));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL));
+ BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+ .addReg(NewVReg1)
+ .addReg(VReg1)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
@@ -7971,37 +8095,42 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg1)
- .addImm(2));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg1)
+ .addImm(2)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg2, RegState::Kill)
- .addReg(NewVReg3));
+ BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg2, RegState::Kill)
+ .addReg(NewVReg3)
+ .add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
- .addReg(NewVReg4, RegState::Kill)
- .addImm(0)
- .addMemOperand(JTMMOLd));
+ BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+ .addReg(NewVReg4, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(JTMMOLd)
+ .add(predOps(ARMCC::AL));
unsigned NewVReg6 = NewVReg5;
if (IsPositionIndependent) {
NewVReg6 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
- .addReg(ARM::CPSR, RegState::Define)
- .addReg(NewVReg5, RegState::Kill)
- .addReg(NewVReg3));
+ BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(NewVReg5, RegState::Kill)
+ .addReg(NewVReg3)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
@@ -8009,31 +8138,36 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addJumpTableIndex(MJTI);
} else {
unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
- .addFrameIndex(FI)
- .addImm(4)
- .addMemOperand(FIMMOLd));
+ BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+ .addFrameIndex(FI)
+ .addImm(4)
+ .addMemOperand(FIMMOLd)
+ .add(predOps(ARMCC::AL));
if (NumLPads < 256) {
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
- .addReg(NewVReg1)
- .addImm(NumLPads));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+ .addReg(NewVReg1)
+ .addImm(NumLPads)
+ .add(predOps(ARMCC::AL));
} else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
- .addImm(NumLPads & 0xFFFF));
+ BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+ .addImm(NumLPads & 0xFFFF)
+ .add(predOps(ARMCC::AL));
unsigned VReg2 = VReg1;
if ((NumLPads & 0xFFFF0000) != 0) {
VReg2 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
- .addReg(VReg1)
- .addImm(NumLPads >> 16));
+ BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+ .addReg(VReg1)
+ .addImm(NumLPads >> 16)
+ .add(predOps(ARMCC::AL));
}
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg2));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg2)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -8046,13 +8180,15 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
unsigned VReg1 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
- .addReg(VReg1, RegState::Define)
- .addConstantPoolIndex(Idx)
- .addImm(0));
- AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
- .addReg(NewVReg1)
- .addReg(VReg1, RegState::Kill));
+ BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+ .addReg(VReg1, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+ .addReg(NewVReg1)
+ .addReg(VReg1, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
@@ -8061,23 +8197,25 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
.addReg(ARM::CPSR);
unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
- AddDefaultCC(
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
- .addReg(NewVReg1)
- .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+ BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
+ .addReg(NewVReg1)
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
- .addJumpTableIndex(MJTI));
+ BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
+ .addJumpTableIndex(MJTI)
+ .add(predOps(ARMCC::AL));
MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
- AddDefaultPred(
- BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
- .addReg(NewVReg3, RegState::Kill)
- .addReg(NewVReg4)
- .addImm(0)
- .addMemOperand(JTMMOLd));
+ BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+ .addReg(NewVReg3, RegState::Kill)
+ .addReg(NewVReg4)
+ .addImm(0)
+ .addMemOperand(JTMMOLd)
+ .add(predOps(ARMCC::AL));
if (IsPositionIndependent) {
BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
@@ -8222,26 +8360,35 @@ static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
assert(LdOpc != 0 && "Should have a load opcode");
if (LdSize >= 8) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addImm(0));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// load + update AddrIn
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrIn).addImm(0));
- MachineInstrBuilder MIB =
- BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(AddrIn).addImm(LdSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
+ .add(t1CondCodeOp())
+ .addReg(AddrIn)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb2) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addImm(LdSize));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
} else { // arm
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
- .addReg(AddrOut, RegState::Define).addReg(AddrIn)
- .addReg(0).addImm(LdSize));
+ BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
+ .addReg(AddrOut, RegState::Define)
+ .addReg(AddrIn)
+ .addReg(0)
+ .addImm(LdSize)
+ .add(predOps(ARMCC::AL));
}
}
@@ -8254,24 +8401,36 @@ static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos,
unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
assert(StOpc != 0 && "Should have a store opcode");
if (StSize >= 8) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(AddrIn).addImm(0).addReg(Data));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(AddrIn)
+ .addImm(0)
+ .addReg(Data)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb1) {
// store + update AddrIn
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data)
- .addReg(AddrIn).addImm(0));
- MachineInstrBuilder MIB =
- BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(AddrIn).addImm(StSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, Pos, dl, TII->get(StOpc))
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
+ BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
+ .add(t1CondCodeOp())
+ .addReg(AddrIn)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
} else if (IsThumb2) {
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(Data).addReg(AddrIn).addImm(StSize));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
} else { // arm
- AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
- .addReg(Data).addReg(AddrIn).addReg(0)
- .addImm(StSize));
+ BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
+ .addReg(Data)
+ .addReg(AddrIn)
+ .addReg(0)
+ .addImm(StSize)
+ .add(predOps(ARMCC::AL));
}
}
@@ -8402,16 +8561,15 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned Vtmp = varEnd;
if ((LoopSize & 0xFFFF0000) != 0)
Vtmp = MRI.createVirtualRegister(TRC);
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16),
- Vtmp).addImm(LoopSize & 0xFFFF));
+ BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
+ .addImm(LoopSize & 0xFFFF)
+ .add(predOps(ARMCC::AL));
if ((LoopSize & 0xFFFF0000) != 0)
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16),
- varEnd)
- .addReg(Vtmp)
- .addImm(LoopSize >> 16));
+ BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
+ .addReg(Vtmp)
+ .addImm(LoopSize >> 16)
+ .add(predOps(ARMCC::AL));
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
@@ -8424,11 +8582,16 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
if (IsThumb)
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
- varEnd, RegState::Define).addConstantPoolIndex(Idx));
+ BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL));
else
- AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg(
- varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0));
+ BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
+ .addReg(varEnd, RegState::Define)
+ .addConstantPoolIndex(Idx)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
}
BB->addSuccessor(loopMBB);
@@ -8465,16 +8628,19 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// Decrement loop variable by UnitSize.
if (IsThumb1) {
- MachineInstrBuilder MIB =
- BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop);
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(varPhi).addImm(UnitSize);
- AddDefaultPred(MIB);
+ BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
+ .add(t1CondCodeOp())
+ .addReg(varPhi)
+ .addImm(UnitSize)
+ .add(predOps(ARMCC::AL));
} else {
MachineInstrBuilder MIB =
BuildMI(*BB, BB->end(), dl,
TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
- AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
+ MIB.addReg(varPhi)
+ .addImm(UnitSize)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
MIB->getOperand(5).setReg(ARM::CPSR);
MIB->getOperand(5).setIsDef(true);
}
@@ -8545,11 +8711,12 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
case CodeModel::Default:
case CodeModel::Kernel:
BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addExternalSymbol("__chkstk")
- .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Define)
- .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ .add(predOps(ARMCC::AL))
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead);
break;
case CodeModel::Large:
case CodeModel::JITDefault: {
@@ -8559,20 +8726,22 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
.addExternalSymbol("__chkstk");
BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
- .addImm((unsigned)ARMCC::AL).addReg(0)
- .addReg(Reg, RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
- .addReg(ARM::R4, RegState::Implicit | RegState::Define)
- .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ .add(predOps(ARMCC::AL))
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12,
+ RegState::Implicit | RegState::Define | RegState::Dead);
break;
}
}
- AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
- ARM::SP)
- .addReg(ARM::SP, RegState::Kill)
- .addReg(ARM::R4, RegState::Kill)
- .setMIFlags(MachineInstr::FrameSetup)));
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
+ .addReg(ARM::SP, RegState::Kill)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
MI.eraseFromParent();
return MBB;
@@ -8597,9 +8766,10 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
MF->push_back(TrapBB);
MBB->addSuccessor(TrapBB);
- AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
- .addReg(MI.getOperand(0).getReg())
- .addImm(0));
+ BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
+ .addReg(MI.getOperand(0).getReg())
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
.addMBB(TrapBB)
.addImm(ARMCC::EQ)
@@ -8617,18 +8787,18 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
bool isThumb2 = Subtarget->isThumb2();
switch (MI.getOpcode()) {
default: {
- MI.dump();
+ MI.print(errs());
llvm_unreachable("Unexpected instr type to insert");
}
// Thumb1 post-indexed loads are really just single-register LDMs.
case ARM::tLDR_postidx: {
BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
- .addOperand(MI.getOperand(1)) // Rn_wb
- .addOperand(MI.getOperand(2)) // Rn
- .addOperand(MI.getOperand(3)) // PredImm
- .addOperand(MI.getOperand(4)) // PredReg
- .addOperand(MI.getOperand(0)); // Rt
+ .add(MI.getOperand(1)) // Rn_wb
+ .add(MI.getOperand(2)) // Rn
+ .add(MI.getOperand(3)) // PredImm
+ .add(MI.getOperand(4)) // PredReg
+ .add(MI.getOperand(0)); // Rt
MI.eraseFromParent();
return BB;
}
@@ -8659,12 +8829,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineMemOperand *MMO = *MI.memoperands_begin();
BuildMI(*BB, MI, dl, TII->get(NewOpc))
- .addOperand(MI.getOperand(0)) // Rn_wb
- .addOperand(MI.getOperand(1)) // Rt
- .addOperand(MI.getOperand(2)) // Rn
- .addImm(Offset) // offset (skip GPR==zero_reg)
- .addOperand(MI.getOperand(5)) // pred
- .addOperand(MI.getOperand(6))
+ .add(MI.getOperand(0)) // Rn_wb
+ .add(MI.getOperand(1)) // Rt
+ .add(MI.getOperand(2)) // Rn
+ .addImm(Offset) // offset (skip GPR==zero_reg)
+ .add(MI.getOperand(5)) // pred
+ .add(MI.getOperand(6))
.addMemOperand(MMO);
MI.eraseFromParent();
return BB;
@@ -8681,7 +8851,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
for (unsigned i = 0; i < MI.getNumOperands(); ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
MI.eraseFromParent();
return BB;
}
@@ -8754,18 +8924,20 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
unsigned LHS1 = MI.getOperand(1).getReg();
unsigned LHS2 = MI.getOperand(2).getReg();
if (RHSisZero) {
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(LHS2).addImm(0)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
} else {
unsigned RHS1 = MI.getOperand(3).getReg();
unsigned RHS2 = MI.getOperand(4).getReg();
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
- .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1)
+ .addReg(RHS1)
+ .add(predOps(ARMCC::AL));
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
.addReg(LHS2).addReg(RHS2)
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
@@ -8779,7 +8951,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
if (isThumb2)
- AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
+ BuildMI(BB, dl, TII->get(ARM::t2B))
+ .addMBB(exitMBB)
+ .add(predOps(ARMCC::AL));
else
BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
@@ -8842,9 +9016,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
RSBBB->addSuccessor(SinkBB);
// insert a cmp at the end of BB
- AddDefaultPred(BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(ABSSrcReg).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(ABSSrcReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL));
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
@@ -8855,9 +9030,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Note: BCC and rsbri will be converted into predicated rsbmi
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
- TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
- .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+ .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
// insert PHI in SinkBB,
// reuse ABSDstReg to not change uses of ABS instruction
@@ -8927,19 +9104,45 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
// Rename pseudo opcodes.
unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
+ unsigned ccOutIdx;
if (NewOpc) {
const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
MCID = &TII->get(NewOpc);
- assert(MCID->getNumOperands() == MI.getDesc().getNumOperands() + 1 &&
- "converted opcode should be the same except for cc_out");
+ assert(MCID->getNumOperands() ==
+ MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
+ && "converted opcode should be the same except for cc_out"
+ " (and, on Thumb1, pred)");
MI.setDesc(*MCID);
// Add the optional cc_out operand
MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
- }
- unsigned ccOutIdx = MCID->getNumOperands() - 1;
+
+ // On Thumb1, move all input operands to the end, then add the predicate
+ if (Subtarget->isThumb1Only()) {
+ for (unsigned c = MCID->getNumOperands() - 4; c--;) {
+ MI.addOperand(MI.getOperand(1));
+ MI.RemoveOperand(1);
+ }
+
+ // Restore the ties
+ for (unsigned i = MI.getNumOperands(); i--;) {
+ const MachineOperand& op = MI.getOperand(i);
+ if (op.isReg() && op.isUse()) {
+ int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
+ if (DefIdx != -1)
+ MI.tieOperands(DefIdx, i);
+ }
+ }
+
+ MI.addOperand(MachineOperand::CreateImm(ARMCC::AL));
+ MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
+ ccOutIdx = 1;
+ } else
+ ccOutIdx = MCID->getNumOperands() - 1;
+ } else
+ ccOutIdx = MCID->getNumOperands() - 1;
// Any ARM instruction that sets the 's' bit should specify an optional
// "cc_out" operand in the last operand position.
@@ -8970,7 +9173,9 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (deadCPSR) {
assert(!MI.getOperand(ccOutIdx).getReg() &&
"expect uninitialized optional cc_out operand");
- return;
+ // Thumb1 instructions must have the S bit even if the CPSR is dead.
+ if (!Subtarget->isThumb1Only())
+ return;
}
// If this instruction was defined with an optional CPSR def and its dag node
@@ -9032,7 +9237,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
SDLoc dl(N);
EVT VT = N->getValueType(0);
CC = N->getOperand(0);
- if (CC.getValueType() != MVT::i1)
+ if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
return false;
Invert = !AllOnes;
if (AllOnes)
@@ -9308,10 +9513,90 @@ static SDValue findMUL_LOHI(SDValue V) {
return SDValue();
}
-static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
+static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ if (Subtarget->isThumb()) {
+ if (!Subtarget->hasDSP())
+ return SDValue();
+ } else if (!Subtarget->hasV5TEOps())
+ return SDValue();
+
+ // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
+ // accumulates the product into a 64-bit value. The 16-bit values will
+ // be sign extended somehow or SRA'd into 32-bit values
+ // (addc (adde (mul 16bit, 16bit), lo), hi)
+ SDValue Mul = AddcNode->getOperand(0);
+ SDValue Lo = AddcNode->getOperand(1);
+ if (Mul.getOpcode() != ISD::MUL) {
+ Lo = AddcNode->getOperand(0);
+ Mul = AddcNode->getOperand(1);
+ if (Mul.getOpcode() != ISD::MUL)
+ return SDValue();
+ }
+
+ SDValue SRA = AddeNode->getOperand(0);
+ SDValue Hi = AddeNode->getOperand(1);
+ if (SRA.getOpcode() != ISD::SRA) {
+ SRA = AddeNode->getOperand(1);
+ Hi = AddeNode->getOperand(0);
+ if (SRA.getOpcode() != ISD::SRA)
+ return SDValue();
+ }
+ if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
+ if (Const->getZExtValue() != 31)
+ return SDValue();
+ } else
+ return SDValue();
+
+ if (SRA.getOperand(0) != Mul)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(AddcNode);
+ unsigned Opcode = 0;
+ SDValue Op0;
+ SDValue Op1;
+
+ if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
+ Opcode = ARMISD::SMLALBB;
+ Op0 = Mul.getOperand(0);
+ Op1 = Mul.getOperand(1);
+ } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
+ Opcode = ARMISD::SMLALBT;
+ Op0 = Mul.getOperand(0);
+ Op1 = Mul.getOperand(1).getOperand(0);
+ } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
+ Opcode = ARMISD::SMLALTB;
+ Op0 = Mul.getOperand(0).getOperand(0);
+ Op1 = Mul.getOperand(1);
+ } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
+ Opcode = ARMISD::SMLALTT;
+ Op0 = Mul->getOperand(0).getOperand(0);
+ Op1 = Mul->getOperand(1).getOperand(0);
+ }
+
+ if (!Op0 || !Op1)
+ return SDValue();
+
+ SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ Op0, Op1, Lo, Hi);
+ // Replace the ADDs' nodes uses by the MLA node's values.
+ SDValue HiMLALResult(SMLAL.getNode(), 1);
+ SDValue LoMLALResult(SMLAL.getNode(), 0);
+
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
+
+ // Return original node to notify the driver to stop replacing.
+ SDValue resNode(AddcNode, 0);
+ return resNode;
+}
+
+static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
-
// Look for multiply add opportunities.
// The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
// each add nodes consumes a value from ISD::UMUL_LOHI and there is
@@ -9326,7 +9611,17 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
// \ /
// ADDC <- hiAdd
//
- assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
+ assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
+
+ assert(AddeNode->getNumOperands() == 3 &&
+ AddeNode->getOperand(2).getValueType() == MVT::i32 &&
+ "ADDE node has the wrong inputs");
+
+ // Check that we have a glued ADDC node.
+ SDNode* AddcNode = AddeNode->getOperand(2).getNode();
+ if (AddcNode->getOpcode() != ARMISD::ADDC)
+ return SDValue();
+
SDValue AddcOp0 = AddcNode->getOperand(0);
SDValue AddcOp1 = AddcNode->getOperand(1);
@@ -9338,29 +9633,13 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
AddcNode->getValueType(0) == MVT::i32 &&
"Expect ADDC with two result values. First: i32");
- // Check that we have a glued ADDC node.
- if (AddcNode->getValueType(1) != MVT::Glue)
- return SDValue();
-
- // Check that the ADDC adds the low result of the S/UMUL_LOHI.
+ // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
+ // maybe a SMLAL which multiplies two 16-bit values.
if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
AddcOp1->getOpcode() != ISD::SMUL_LOHI)
- return SDValue();
-
- // Look for the glued ADDE.
- SDNode* AddeNode = AddcNode->getGluedUser();
- if (!AddeNode)
- return SDValue();
-
- // Make sure it is really an ADDE.
- if (AddeNode->getOpcode() != ISD::ADDE)
- return SDValue();
-
- assert(AddeNode->getNumOperands() == 3 &&
- AddeNode->getOperand(2).getValueType() == MVT::Glue &&
- "ADDE node has the wrong inputs");
+ return AddCombineTo64BitSMLAL16(AddcNode, AddeNode, DCI, Subtarget);
// Check for the triangle shape.
SDValue AddeOp0 = AddeNode->getOperand(0);
@@ -9435,38 +9714,25 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
// Return original node to notify the driver to stop replacing.
- SDValue resNode(AddcNode, 0);
- return resNode;
+ return SDValue(AddeNode, 0);
}
-static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
+static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
// UMAAL is similar to UMLAL except that it adds two unsigned values.
// While trying to combine for the other MLAL nodes, first search for the
- // chance to use UMAAL. Check if Addc uses another addc node which can first
- // be combined into a UMLAL. The other pattern is AddcNode being combined
- // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
-
- if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() ||
- (Subtarget->isThumb() && !Subtarget->hasThumb2()))
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
-
- SDNode *PrevAddc = nullptr;
- if (AddcNode->getOperand(0).getOpcode() == ISD::ADDC)
- PrevAddc = AddcNode->getOperand(0).getNode();
- else if (AddcNode->getOperand(1).getOpcode() == ISD::ADDC)
- PrevAddc = AddcNode->getOperand(1).getNode();
-
- // If there's no addc chains, just return a search for any MLAL.
- if (PrevAddc == nullptr)
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
-
- // Try to convert the addc operand to an MLAL and if that fails try to
- // combine AddcNode.
- SDValue MLAL = AddCombineTo64bitMLAL(PrevAddc, DCI, Subtarget);
- if (MLAL != SDValue(PrevAddc, 0))
- return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
+ // chance to use UMAAL. Check if Addc uses a node which has already
+ // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
+ // as the addend, and it's handled in PerformUMLALCombine.
+
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
+ return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
+
+ // Check that we have a glued ADDC node.
+ SDNode* AddcNode = AddeNode->getOperand(2).getNode();
+ if (AddcNode->getOpcode() != ARMISD::ADDC)
+ return SDValue();
// Find the converted UMAAL or quit if it doesn't exist.
SDNode *UmlalNode = nullptr;
@@ -9478,29 +9744,18 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
UmlalNode = AddcNode->getOperand(1).getNode();
AddHi = AddcNode->getOperand(0);
} else {
- return SDValue();
+ return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
}
// The ADDC should be glued to an ADDE node, which uses the same UMLAL as
// the ADDC as well as Zero.
- auto *Zero = dyn_cast<ConstantSDNode>(UmlalNode->getOperand(3));
-
- if (!Zero || Zero->getZExtValue() != 0)
+ if (!isNullConstant(UmlalNode->getOperand(3)))
return SDValue();
- // Check that we have a glued ADDC node.
- if (AddcNode->getValueType(1) != MVT::Glue)
- return SDValue();
-
- // Look for the glued ADDE.
- SDNode* AddeNode = AddcNode->getGluedUser();
- if (!AddeNode)
- return SDValue();
-
- if ((AddeNode->getOperand(0).getNode() == Zero &&
+ if ((isNullConstant(AddeNode->getOperand(0)) &&
AddeNode->getOperand(1).getNode() == UmlalNode) ||
(AddeNode->getOperand(0).getNode() == UmlalNode &&
- AddeNode->getOperand(1).getNode() == Zero)) {
+ isNullConstant(AddeNode->getOperand(1)))) {
SelectionDAG &DAG = DCI.DAG;
SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
@@ -9513,19 +9768,84 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
// Return original node to notify the driver to stop replacing.
- return SDValue(AddcNode, 0);
+ return SDValue(AddeNode, 0);
}
return SDValue();
}
-/// PerformADDCCombine - Target-specific dag combine transform from
-/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL or
-/// ISD::ADDC, ISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
-static SDValue PerformADDCCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
+static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
+ return SDValue();
+
+ // Check that we have a pair of ADDC and ADDE as operands.
+ // Both addends of the ADDE must be zero.
+ SDNode* AddcNode = N->getOperand(2).getNode();
+ SDNode* AddeNode = N->getOperand(3).getNode();
+ if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
+ (AddeNode->getOpcode() == ARMISD::ADDE) &&
+ isNullConstant(AddeNode->getOperand(0)) &&
+ isNullConstant(AddeNode->getOperand(1)) &&
+ (AddeNode->getOperand(2).getNode() == AddcNode))
+ return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::i32),
+ {N->getOperand(0), N->getOperand(1),
+ AddcNode->getOperand(0), AddcNode->getOperand(1)});
+ else
+ return SDValue();
+}
+
+static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (Subtarget->isThumb1Only()) {
+ SDValue RHS = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int32_t imm = C->getSExtValue();
+ if (imm < 0 && imm > INT_MIN) {
+ SDLoc DL(N);
+ RHS = DAG.getConstant(-imm, DL, MVT::i32);
+ unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
+ : ARMISD::ADDC;
+ return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
+ }
+ }
+ }
+ return SDValue();
+}
- if (Subtarget->isThumb1Only()) return SDValue();
+static SDValue PerformAddeSubeCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (Subtarget->isThumb1Only()) {
+ SDValue RHS = N->getOperand(1);
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int64_t imm = C->getSExtValue();
+ if (imm < 0) {
+ SDLoc DL(N);
+
+ // The with-carry-in form matches bitwise not instead of the negation.
+ // Effectively, the inverse interpretation of the carry flag already
+ // accounts for part of the negation.
+ RHS = DAG.getConstant(~imm, DL, MVT::i32);
+
+ unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
+ : ARMISD::ADDE;
+ return DAG.getNode(Opcode, DL, N->getVTList(),
+ N->getOperand(0), RHS, N->getOperand(2));
+ }
+ }
+ }
+ return SDValue();
+}
+
+/// PerformADDECombine - Target-specific dag combine transform from
+/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
+/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
+static SDValue PerformADDECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Only ARM and Thumb2 support UMLAL/SMLAL.
+ if (Subtarget->isThumb1Only())
+ return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
// Only perform the checks after legalize when the pattern is available.
if (DCI.isBeforeLegalize()) return SDValue();
@@ -9722,7 +10042,6 @@ static SDValue PerformMULCombine(SDNode *N,
static SDValue PerformANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
-
// Attempt to use immediate-form VBIC
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
SDLoc dl(N);
@@ -9761,6 +10080,67 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
+// Try combining OR nodes to SMULWB, SMULWT.
+static SDValue PerformORCombineToSMULWBT(SDNode *OR,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasV6Ops() ||
+ (Subtarget->isThumb() &&
+ (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
+ return SDValue();
+
+ SDValue SRL = OR->getOperand(0);
+ SDValue SHL = OR->getOperand(1);
+
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+ SRL = OR->getOperand(1);
+ SHL = OR->getOperand(0);
+ }
+ if (!isSRL16(SRL) || !isSHL16(SHL))
+ return SDValue();
+
+ // The first operands to the shifts need to be the two results from the
+ // same smul_lohi node.
+ if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+ SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+ if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+ SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+ return SDValue();
+
+ // Now we have:
+ // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+ // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+ // For SMUWB the 16-bit value will signed extended somehow.
+ // For SMULWT only the SRA is required.
+ // Check both sides of SMUL_LOHI
+ SDValue OpS16 = SMULLOHI->getOperand(0);
+ SDValue OpS32 = SMULLOHI->getOperand(1);
+
+ SelectionDAG &DAG = DCI.DAG;
+ if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
+ OpS16 = OpS32;
+ OpS32 = SMULLOHI->getOperand(0);
+ }
+
+ SDLoc dl(OR);
+ unsigned Opcode = 0;
+ if (isS16(OpS16, DAG))
+ Opcode = ARMISD::SMULWB;
+ else if (isSRA16(OpS16)) {
+ Opcode = ARMISD::SMULWT;
+ OpS16 = OpS16->getOperand(0);
+ }
+ else
+ return SDValue();
+
+ SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
+ return SDValue(OR, 0);
+}
+
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -9798,6 +10178,8 @@ static SDValue PerformORCombine(SDNode *N,
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
+ if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
+ return Result;
}
// The code below optimizes (or (and X, Y), Z).
@@ -9906,7 +10288,7 @@ static SDValue PerformORCombine(SDNode *N,
(Mask == ~Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
- if (Subtarget->hasT2ExtractPack() &&
+ if (Subtarget->hasDSP() &&
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
@@ -9922,7 +10304,7 @@ static SDValue PerformORCombine(SDNode *N,
(~Mask == Mask2)) {
// The pack halfword instruction works better for masks that fit it,
// so use that when it's available.
- if (Subtarget->hasT2ExtractPack() &&
+ if (Subtarget->hasDSP() &&
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
@@ -11324,8 +11706,8 @@ static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
if (Op.getOpcode() == ARMISD::CMOV) {
APInt KZ2(KnownZero.getBitWidth(), 0);
APInt KO2(KnownOne.getBitWidth(), 0);
- computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
- computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
+ computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(DAG, Op.getOperand(1), KZ2, KO2);
KnownZero &= KZ2;
KnownOne &= KO2;
@@ -11555,13 +11937,17 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget);
+ case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
+ case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
+ case ARMISD::ADDC:
+ case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
case ARMISD::BFI: return PerformBFICombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
@@ -11593,6 +11979,56 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ARMISD::SMULWB: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMULWT: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALBB: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALBT: {
+ unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
+ APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
+ unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
+ APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALTB: {
+ unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
+ APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
+ unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
+ APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMLALTT: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+ if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
+ (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
+ return SDValue();
+ break;
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -12180,6 +12616,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = KnownOne.getBitWidth();
@@ -12588,8 +13025,8 @@ static TargetLowering::ArgListTy getDivRemArgList(
Type *ArgTy = ArgVT.getTypeForEVT(*Context);
Entry.Node = N->getOperand(i);
Entry.Ty = ArgTy;
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = isSigned;
+ Entry.IsZExt = !isSigned;
Args.push_back(Entry);
}
if (Subtarget->isTargetWindows() && Args.size() >= 2)
@@ -12861,7 +13298,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
case Intrinsic::arm_stlexd:
- case Intrinsic::arm_strexd: {
+ case Intrinsic::arm_strexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(2);
@@ -12871,9 +13308,9 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = false;
Info.writeMem = true;
return true;
- }
+
case Intrinsic::arm_ldaexd:
- case Intrinsic::arm_ldrexd: {
+ case Intrinsic::arm_ldrexd:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);
@@ -12883,7 +13320,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.readMem = true;
Info.writeMem = false;
return true;
- }
+
default:
break;
}
@@ -12921,7 +13358,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
- Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
+ Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
Builder.getInt32(0), Builder.getInt32(7),
Builder.getInt32(10), Builder.getInt32(5)};
@@ -12932,7 +13369,7 @@ Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
llvm_unreachable("makeDMB on a target so old that it has no barriers");
}
} else {
- Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+ Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
// Only a full system barrier exists in the M-class architectures.
Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
Constant *CDomain = Builder.getInt32(Domain);
@@ -13089,7 +13526,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
if (ValTy->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+ Function *Ldrex = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
@@ -13106,7 +13543,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+ Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
return Builder.CreateTruncOrBitCast(
Builder.CreateCall(Ldrex, Addr),
@@ -13118,7 +13555,7 @@ void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
if (!Subtarget->hasV7Ops())
return;
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+ Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
}
Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
@@ -13154,6 +13591,39 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
Addr});
}
+/// A helper function for determining the number of interleaved accesses we
+/// will generate when lowering accesses of the given type.
+unsigned
+ARMTargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const {
+ return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+}
+
+bool ARMTargetLowering::isLegalInterleavedAccessType(
+ VectorType *VecTy, const DataLayout &DL) const {
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+ // Ensure the vector doesn't have f16 elements. Even though we could do an
+ // i16 vldN, we can't hold the f16 vectors and will end up converting via
+ // f32.
+ if (VecTy->getElementType()->isHalfTy())
+ return false;
+
+ // Ensure the number of vector elements is greater than 1.
+ if (VecTy->getNumElements() < 2)
+ return false;
+
+ // Ensure the element type is legal.
+ if (ElSize != 8 && ElSize != 16 && ElSize != 32)
+ return false;
+
+ // Ensure the total vector size is 64 or a multiple of 128. Types larger than
+ // 128 will be split into multiple interleaved accesses.
+ return VecSize == 64 || VecSize % 128 == 0;
+}
+
/// \brief Lower an interleaved load into a vldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -13178,64 +13648,97 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Type *EltTy = VecTy->getVectorElementType();
const DataLayout &DL = LI->getModule()->getDataLayout();
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vldN doesn't support i64/f64 elements).
- if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
return false;
+ unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
+
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
if (EltTy->isPointerTy())
VecTy =
VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
+ IRBuilder<> Builder(LI);
+
+ // The base address of the load.
+ Value *BaseAddr = LI->getPointerOperand();
+
+ if (NumLoads > 1) {
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ VecTy = VectorType::get(VecTy->getVectorElementType(),
+ VecTy->getVectorNumElements() / NumLoads);
+
+ // We will compute the pointer operand of each load from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, VecTy->getVectorElementType()->getPointerTo(
+ LI->getPointerAddressSpace()));
+ }
+
+ assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
+ Type *Tys[] = {VecTy, Int8Ptr};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
+ Function *VldnFunc =
+ Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- IRBuilder<> Builder(LI);
- SmallVector<Value *, 2> Ops;
+ // Holds sub-vectors extracted from the load intrinsic return values. The
+ // sub-vectors are associated with the shufflevector instructions they will
+ // replace.
+ DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
- Type *Int8Ptr = Builder.getInt8PtrTy(LI->getPointerAddressSpace());
- Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
- Ops.push_back(Builder.getInt32(LI->getAlignment()));
+ for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
- Type *Tys[] = { VecTy, Int8Ptr };
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
- CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
+ // If we're generating more than one load, compute the base address of
+ // subsequent loads as an offset from the previous.
+ if (LoadCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(
+ BaseAddr, VecTy->getVectorNumElements() * Factor);
- // Replace uses of each shufflevector with the corresponding vector loaded
- // by ldN.
- for (unsigned i = 0; i < Shuffles.size(); i++) {
- ShuffleVectorInst *SV = Shuffles[i];
- unsigned Index = Indices[i];
+ SmallVector<Value *, 2> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+ Ops.push_back(Builder.getInt32(LI->getAlignment()));
- Value *SubVec = Builder.CreateExtractValue(VldN, Index);
+ CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
- // Convert the integer vector to pointer vector if the element is pointer.
- if (EltTy->isPointerTy())
- SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
+ // Replace uses of each shufflevector with the corresponding vector loaded
+ // by ldN.
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ ShuffleVectorInst *SV = Shuffles[i];
+ unsigned Index = Indices[i];
- SV->replaceAllUsesWith(SubVec);
- }
+ Value *SubVec = Builder.CreateExtractValue(VldN, Index);
- return true;
-}
+ // Convert the integer vector to pointer vector if the element is pointer.
+ if (EltTy->isPointerTy())
+ SubVec = Builder.CreateIntToPtr(SubVec, SV->getType());
-/// \brief Get a mask consisting of sequential integers starting from \p Start.
-///
-/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
-static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
- unsigned NumElts) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < NumElts; i++)
- Mask.push_back(Builder.getInt32(Start + i));
+ SubVecs[SV].push_back(SubVec);
+ }
+ }
+
+ // Replace uses of the shufflevector instructions with the sub-vectors
+ // returned by the load intrinsic. If a shufflevector instruction is
+ // associated with more than one sub-vector, those sub-vectors will be
+ // concatenated into a single wide vector.
+ for (ShuffleVectorInst *SVI : Shuffles) {
+ auto &SubVec = SubVecs[SVI];
+ auto *WideVec =
+ SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+ SVI->replaceAllUsesWith(WideVec);
+ }
- return ConstantVector::get(Mask);
+ return true;
}
/// \brief Lower an interleaved store into a vstN intrinsic.
@@ -13279,15 +13782,15 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
- bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
- // Skip if we do not have NEON and skip illegal vector types and vector types
- // with i64/f64 elements (vstN doesn't support i64/f64 elements).
- if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
- EltIs64Bits)
+ // Skip if we do not have NEON and skip illegal vector types. We can
+ // "legalize" wide vector types into multiple interleaved accesses as long as
+ // the vector types are divisible by 128.
+ if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
return false;
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
@@ -13306,44 +13809,75 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = VectorType::get(IntTy, LaneLen);
}
+ // The base address of the store.
+ Value *BaseAddr = SI->getPointerOperand();
+
+ if (NumStores > 1) {
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = VectorType::get(SubVecTy->getVectorElementType(), LaneLen);
+
+ // We will compute the pointer operand of each store from the original base
+ // address using GEPs. Cast the base address to a pointer to the scalar
+ // element type.
+ BaseAddr = Builder.CreateBitCast(
+ BaseAddr, SubVecTy->getVectorElementType()->getPointerTo(
+ SI->getPointerAddressSpace()));
+ }
+
+ assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
+
+ auto Mask = SVI->getShuffleMask();
+
+ Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
+ Type *Tys[] = {Int8Ptr, SubVecTy};
static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
Intrinsic::arm_neon_vst3,
Intrinsic::arm_neon_vst4};
- SmallVector<Value *, 6> Ops;
- Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
- Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
+ for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
- Type *Tys[] = { Int8Ptr, SubVecTy };
- Function *VstNFunc = Intrinsic::getDeclaration(
- SI->getModule(), StoreInts[Factor - 2], Tys);
+ // If we generating more than one store, we compute the base address of
+ // subsequent stores as an offset from the previous.
+ if (StoreCount > 0)
+ BaseAddr = Builder.CreateConstGEP1_32(BaseAddr, LaneLen * Factor);
- // Split the shufflevector operands into sub vectors for the new vstN call.
- auto Mask = SVI->getShuffleMask();
- for (unsigned i = 0; i < Factor; i++) {
- if (Mask[i] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
- } else {
- unsigned StartMask = 0;
- for (unsigned j = 1; j < LaneLen; j++) {
- if (Mask[j*Factor + i] >= 0) {
- StartMask = Mask[j*Factor + i] - j;
- break;
+ SmallVector<Value *, 6> Ops;
+ Ops.push_back(Builder.CreateBitCast(BaseAddr, Int8Ptr));
+
+ Function *VstNFunc =
+ Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+
+ // Split the shufflevector operands into sub vectors for the new vstN call.
+ for (unsigned i = 0; i < Factor; i++) {
+ unsigned IdxI = StoreCount * LaneLen * Factor + i;
+ if (Mask[IdxI] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ unsigned IdxJ = StoreCount * LaneLen * Factor + j;
+ if (Mask[IdxJ * Factor + IdxI] >= 0) {
+ StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
+ break;
+ }
}
+ // Note: If all elements in a chunk are undefs, StartMask=0!
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in
+ // isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
}
- // Note: If all elements in a chunk are undefs, StartMask=0!
- // Note: Filling undef gaps with random elements is ok, since
- // those elements were being written anyway (with undefs).
- // In the case of all undefs we're defaulting to using elems from 0
- // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
}
- }
- Ops.push_back(Builder.getInt32(SI->getAlignment()));
- Builder.CreateCall(VstNFunc, Ops);
+ Ops.push_back(Builder.getInt32(SI->getAlignment()));
+ Builder.CreateCall(VstNFunc, Ops);
+ }
return true;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 84c6eb845bb8..70a0b1380ec9 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -175,9 +175,15 @@ class InstrItineraryData;
VMULLs, // ...signed
VMULLu, // ...unsigned
+ SMULWB, // Signed multiply word by half word, bottom
+ SMULWT, // Signed multiply word by half word, top
UMLAL, // 64bit Unsigned Accumulate Multiply
SMLAL, // 64bit Signed Accumulate Multiply
UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply
+ SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16
+ SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
+ SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
+ SMLALTT, // 64-bit signed accumulate multiply top, top 16
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
@@ -346,6 +352,7 @@ class InstrItineraryData;
void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
@@ -500,9 +507,18 @@ class InstrItineraryData;
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
unsigned &Cost) const override;
+ bool canMergeStoresTo(EVT MemVT) const override {
+ // Do not merge to larger than i32.
+ return (MemVT.getSizeInBits() <= 32);
+ }
+
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
bool supportSwiftError() const override {
return true;
}
@@ -514,6 +530,17 @@ class InstrItineraryData;
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
+ /// Returns true if \p VecTy is a legal interleaved access type. This
+ /// function checks the vector element type and the overall width of the
+ /// vector.
+ bool isLegalInterleavedAccessType(VectorType *VecTy,
+ const DataLayout &DL) const;
+
+ /// Returns the number of interleaved accesses that will be generated when
+ /// lowering accesses of the given type.
+ unsigned getNumInterleavedAccesses(VectorType *VecTy,
+ const DataLayout &DL) const;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -698,7 +725,7 @@ class InstrItineraryData;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const;
SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- const SDLoc &dl) const;
+ const SDLoc &dl, bool InvalidOnQNaN) const;
SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 488439fc24e0..1bbe7f0d275e 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -184,7 +184,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
// ARM special operands for disassembly only.
//
-def SetEndAsmOperand : ImmAsmOperand {
+def SetEndAsmOperand : ImmAsmOperand<0,1> {
let Name = "SetEndImm";
let ParserMethod = "parseSetEndImm";
}
@@ -221,25 +221,25 @@ def banked_reg : Operand<i32> {
// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
// 64 64 - <imm> is encoded in imm6<5:0>
-def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
+def shr_imm8_asm_operand : ImmAsmOperand<1,8> { let Name = "ShrImm8"; }
def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> {
let EncoderMethod = "getShiftRight8Imm";
let DecoderMethod = "DecodeShiftRight8Imm";
let ParserMatchClass = shr_imm8_asm_operand;
}
-def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
+def shr_imm16_asm_operand : ImmAsmOperand<1,16> { let Name = "ShrImm16"; }
def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> {
let EncoderMethod = "getShiftRight16Imm";
let DecoderMethod = "DecodeShiftRight16Imm";
let ParserMatchClass = shr_imm16_asm_operand;
}
-def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
+def shr_imm32_asm_operand : ImmAsmOperand<1,32> { let Name = "ShrImm32"; }
def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
let EncoderMethod = "getShiftRight32Imm";
let DecoderMethod = "DecodeShiftRight32Imm";
let ParserMatchClass = shr_imm32_asm_operand;
}
-def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
+def shr_imm64_asm_operand : ImmAsmOperand<1,64> { let Name = "ShrImm64"; }
def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> {
let EncoderMethod = "getShiftRight64Imm";
let DecoderMethod = "DecodeShiftRight64Imm";
@@ -261,10 +261,19 @@ def const_pool_asm_imm : Operand<i32> {
// Note: When EmitPriority == 1, the alias will be used for printing
class ARMInstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsARM]>;
+class ARMInstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsARM,UseNegativeImmediates]>;
class tInstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb]>;
+class tInstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsThumb,UseNegativeImmediates]>;
class t2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[IsThumb2]>;
+class t2InstSubst<string Asm, dag Result, bit EmitPriority = 0>
+ : InstAlias<Asm, Result, EmitPriority>,
+ Requires<[IsThumb2,UseNegativeImmediates]>;
class VFP2InstAlias<string Asm, dag Result, bit EmitPriority = 0>
: InstAlias<Asm, Result, EmitPriority>, Requires<[HasVFP2]>;
class VFP2DPInstAlias<string Asm, dag Result, bit EmitPriority = 0>
@@ -948,7 +957,7 @@ class ADivA1I<bits<3> opcod, dag oops, dag iops,
}
// PKH instructions
-def PKHLSLAsmOperand : ImmAsmOperand {
+def PKHLSLAsmOperand : ImmAsmOperand<0,31> {
let Name = "PKHLSLImm";
let ParserMethod = "parsePKHLSLImm";
}
@@ -1013,9 +1022,6 @@ class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> {
class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP];
}
-class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> {
- list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack];
-}
//===----------------------------------------------------------------------===//
// Thumb Instruction Format Definitions.
//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 27b64322dfa9..3b3606ef462a 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -129,8 +129,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
MIB.addMemOperand(MMO);
- MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end())
+ .add(predOps(ARMCC::AL));
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c47393990e97..cc0e7d4d9c35 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -51,6 +51,8 @@ def SDT_ARMAnd : SDTypeProfile<1, 2,
SDTCisVT<2, i32>]>;
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_ARMFCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
@@ -90,6 +92,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
+def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameAs<0, 4>,
+ SDTCisSameAs<0, 5>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
@@ -181,6 +190,13 @@ def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPMayStore, SDNPMayLoad]>;
+def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>;
+def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>;
+def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>;
+def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>;
+def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>;
+def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -247,9 +263,6 @@ def HasDivide : Predicate<"Subtarget->hasDivide()">,
AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">,
AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">;
-def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
- AssemblerPredicate<"FeatureT2XtPk",
- "pack/extract">;
def HasDSP : Predicate<"Subtarget->hasDSP()">,
AssemblerPredicate<"FeatureDSP", "dsp">;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
@@ -298,6 +311,11 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
+def UseNegativeImmediates :
+ Predicate<"false">,
+ AssemblerPredicate<"!FeatureNoNegativeImmediates",
+ "NegativeImmediates">;
+
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
@@ -423,7 +441,16 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
//
// Immediate operands with a shared generic asm render method.
-class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+class ImmAsmOperand<int Low, int High> : AsmOperandClass {
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImmediate<" # Low # "," # High # ">";
+ let DiagnosticType = "ImmRange" # Low # "_" # High;
+}
+
+class ImmAsmOperandMinusOne<int Low, int High> : AsmOperandClass {
+ let PredicateMethod = "isImmediate<" # Low # "," # High # ">";
+ let DiagnosticType = "ImmRange" # Low # "_" # High;
+}
// Operands that are part of a memory addressing mode.
class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
@@ -645,35 +672,45 @@ def arm_i32imm : PatLeaf<(imm), [{
}]>;
/// imm0_1 predicate - Immediate in the range [0,1].
-def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def Imm0_1AsmOperand: ImmAsmOperand<0,1> { let Name = "Imm0_1"; }
def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
/// imm0_3 predicate - Immediate in the range [0,3].
-def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def Imm0_3AsmOperand: ImmAsmOperand<0,3> { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
/// imm0_7 predicate - Immediate in the range [0,7].
-def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
+def Imm0_7AsmOperand: ImmAsmOperand<0,7> {
+ let Name = "Imm0_7";
+}
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 8;
}]> {
let ParserMatchClass = Imm0_7AsmOperand;
}
+/// imm8_255 predicate - Immediate in the range [8,255].
+def Imm8_255AsmOperand: ImmAsmOperand<8,255> { let Name = "Imm8_255"; }
+def imm8_255 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 8 && Imm < 256;
+}]> {
+ let ParserMatchClass = Imm8_255AsmOperand;
+}
+
/// imm8 predicate - Immediate is exactly 8.
-def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def Imm8AsmOperand: ImmAsmOperand<8,8> { let Name = "Imm8"; }
def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
let ParserMatchClass = Imm8AsmOperand;
}
/// imm16 predicate - Immediate is exactly 16.
-def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def Imm16AsmOperand: ImmAsmOperand<16,16> { let Name = "Imm16"; }
def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
let ParserMatchClass = Imm16AsmOperand;
}
/// imm32 predicate - Immediate is exactly 32.
-def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def Imm32AsmOperand: ImmAsmOperand<32,32> { let Name = "Imm32"; }
def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
let ParserMatchClass = Imm32AsmOperand;
}
@@ -681,25 +718,25 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>;
/// imm1_7 predicate - Immediate in the range [1,7].
-def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def Imm1_7AsmOperand: ImmAsmOperand<1,7> { let Name = "Imm1_7"; }
def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
let ParserMatchClass = Imm1_7AsmOperand;
}
/// imm1_15 predicate - Immediate in the range [1,15].
-def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def Imm1_15AsmOperand: ImmAsmOperand<1,15> { let Name = "Imm1_15"; }
def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
let ParserMatchClass = Imm1_15AsmOperand;
}
/// imm1_31 predicate - Immediate in the range [1,31].
-def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def Imm1_31AsmOperand: ImmAsmOperand<1,31> { let Name = "Imm1_31"; }
def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
let ParserMatchClass = Imm1_31AsmOperand;
}
/// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: ImmAsmOperand {
+def Imm0_15AsmOperand: ImmAsmOperand<0,15> {
let Name = "Imm0_15";
let DiagnosticType = "ImmRange0_15";
}
@@ -710,7 +747,7 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
+def Imm0_31AsmOperand: ImmAsmOperand<0,31> { let Name = "Imm0_31"; }
def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 32;
}]> {
@@ -718,15 +755,15 @@ def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
-def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def Imm0_32AsmOperand: ImmAsmOperand<0,32> { let Name = "Imm0_32"; }
def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm < 32;
+ return Imm >= 0 && Imm < 33;
}]> {
let ParserMatchClass = Imm0_32AsmOperand;
}
/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
-def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def Imm0_63AsmOperand: ImmAsmOperand<0,63> { let Name = "Imm0_63"; }
def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 64;
}]> {
@@ -734,7 +771,7 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
}
/// imm0_239 predicate - Immediate in the range [0,239].
-def Imm0_239AsmOperand : ImmAsmOperand {
+def Imm0_239AsmOperand : ImmAsmOperand<0,239> {
let Name = "Imm0_239";
let DiagnosticType = "ImmRange0_239";
}
@@ -743,13 +780,13 @@ def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> {
}
/// imm0_255 predicate - Immediate in the range [0,255].
-def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
+def Imm0_255AsmOperand : ImmAsmOperand<0,255> { let Name = "Imm0_255"; }
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
let ParserMatchClass = Imm0_255AsmOperand;
}
-/// imm0_65535 - An immediate is in the range [0.65535].
-def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+/// imm0_65535 - An immediate is in the range [0,65535].
+def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; }
def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 65536;
}]> {
@@ -767,19 +804,23 @@ def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{
// FIXME: This really needs a Thumb version separate from the ARM version.
// While the range is the same, and can thus use the same match class,
// the encoding is different so it should have a different encoder method.
-def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
+def Imm0_65535ExprAsmOperand: AsmOperandClass {
+ let Name = "Imm0_65535Expr";
+ let RenderMethod = "addImmOperands";
+}
+
def imm0_65535_expr : Operand<i32> {
let EncoderMethod = "getHiLo16ImmOpValue";
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
-def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; }
+def Imm256_65535ExprAsmOperand: ImmAsmOperand<256,65535> { let Name = "Imm256_65535Expr"; }
def imm256_65535_expr : Operand<i32> {
let ParserMatchClass = Imm256_65535ExprAsmOperand;
}
/// imm24b - True if the 32-bit immediate is encodable in 24 bits.
-def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
+def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { let Name = "Imm24bit"; }
def imm24b : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm <= 0xffffff;
}]> {
@@ -808,7 +849,9 @@ def imm1_32_XFORM: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
MVT::i32);
}]>;
-def Imm1_32AsmOperand: AsmOperandClass { let Name = "Imm1_32"; }
+def Imm1_32AsmOperand: ImmAsmOperandMinusOne<1,32> {
+ let Name = "Imm1_32";
+}
def imm1_32 : Operand<i32>, PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue();
return Imm > 0 && Imm <= 32;
@@ -822,7 +865,7 @@ def imm1_16_XFORM: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((int)N->getZExtValue() - 1, SDLoc(N),
MVT::i32);
}]>;
-def Imm1_16AsmOperand: AsmOperandClass { let Name = "Imm1_16"; }
+def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; }
def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
imm1_16_XFORM> {
let PrintMethod = "printImmPlusOneOperand";
@@ -3850,6 +3893,7 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm,
let Inst{11-0} = imm;
}
+let AddedComplexity = 1 in
def : ARMPat<(and GPR:$src, mod_imm_not:$imm),
(BICri GPR:$src, mod_imm_not:$imm)>;
@@ -3899,7 +3943,8 @@ def MUL : AsMul1I32<0b0000000, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm),
IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b0000;
let Unpredictable{15-12} = 0b1111;
}
@@ -3910,14 +3955,16 @@ def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
4, IIC_iMUL32,
[(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
(MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6, UseMulOps]>;
+ Requires<[IsARM, NoV6, UseMulOps]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
}
def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra),
IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>,
- Requires<[IsARM, HasV6, UseMulOps]> {
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
bits<4> Ra;
let Inst{15-12} = Ra;
}
@@ -3928,12 +3975,14 @@ def MLAv5: ARMPseudoExpand<(outs GPRnopc:$Rd),
pred:$p, cc_out:$s), 4, IIC_iMAC32,
[(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))],
(MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
- Requires<[IsARM, HasV6T2, UseMulOps]> {
+ Requires<[IsARM, HasV6T2, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
bits<4> Rd;
bits<4> Rm;
bits<4> Rn;
@@ -3949,26 +3998,38 @@ let hasSideEffects = 0 in {
let isCommutable = 1 in {
def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
- "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (smullohi GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
- "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]>;
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm",
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (umullohi GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>;
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
- 4, IIC_iMUL64, [],
+ 4, IIC_iMUL64,
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (smullohi GPR:$Rn, GPR:$Rm))],
(SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
- 4, IIC_iMUL64, [],
+ 4, IIC_iMUL64,
+ [(set GPR:$RdLo, GPR:$RdHi,
+ (umullohi GPR:$Rn, GPR:$Rm))],
(UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
}
}
@@ -3976,17 +4037,20 @@ def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64,
"umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>;
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
IIC_iMAC64,
"umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rm;
@@ -4004,13 +4068,15 @@ def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
4, IIC_iMAC64, [],
(SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
(ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s),
4, IIC_iMAC64, [],
(UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi,
pred:$p, cc_out:$s)>,
- Requires<[IsARM, NoV6]>;
+ Requires<[IsARM, NoV6]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
}
} // hasSideEffects
@@ -4019,13 +4085,15 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b1111;
}
def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsARM, HasV6]> {
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{15-12} = 0b1111;
}
@@ -4033,57 +4101,67 @@ def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
[(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
- Requires<[IsARM, HasV6, UseMulOps]>;
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6, UseMulOps]>;
+ Requires<[IsARM, HasV6, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
(ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
multiclass AI_smul<string opc> {
def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sext_inreg GPR:$Rm, i16)))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16),
(sra GPR:$Rm, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sext_inreg GPR:$Rm, i16)))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)),
(sra GPR:$Rm, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]>;
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsARM, HasV5TE]>;
+ [(set GPR:$Rd, (ARMsmulwb GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
- []>,
- Requires<[IsARM, HasV5TE]>;
+ [(set GPR:$Rd, (ARMsmulwt GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]>;
}
@@ -4095,7 +4173,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd, (add GPR:$Ra,
(mul (sext_inreg GPRnopc:$Rn, i16),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4103,7 +4182,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4111,7 +4191,8 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sext_inreg GPRnopc:$Rm, i16))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
@@ -4119,19 +4200,24 @@ multiclass AI_smla<string opc> {
[(set GPRnopc:$Rd,
(add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)),
(sra GPRnopc:$Rm, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (ARMsmulwb GPRnopc:$Rn, GPRnopc:$Rm)))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
- []>,
- Requires<[IsARM, HasV5TE, UseMulOps]>;
+ [(set GPRnopc:$Rd,
+ (add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>,
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
}
}
@@ -4139,25 +4225,28 @@ defm SMUL : AI_smul<"smul">;
defm SMLA : AI_smla<"smla">;
// Halfword multiply accumulate long: SMLAL<x><y>.
-def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
-
-def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm),
- IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- Requires<[IsARM, HasV5TE]>;
+class SMLAL<bits<2> opc1, string asm>
+ : AMulxyI64<0b0001010, opc1,
+ (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ IIC_iMAC64, asm, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Requires<[IsARM, HasV5TE]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
+
+def SMLALBB : SMLAL<0b00, "smlalbb">;
+def SMLALBT : SMLAL<0b10, "smlalbt">;
+def SMLALTB : SMLAL<0b01, "smlaltb">;
+def SMLALTT : SMLAL<0b11, "smlaltt">;
+
+def : ARMV5TEPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALBB $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALBT $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALTB $Rn, $Rm, $RLo, $RHi)>;
+def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (SMLALTT $Rn, $Rm, $RLo, $RHi)>;
// Helper class for AI_smld.
class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
@@ -4203,19 +4292,23 @@ multiclass AI_smld<bit sub, string opc> {
def D : AMulDualIa<0, sub, 0, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd),
(ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
(ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
- !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
(ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
- !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
}
@@ -4225,9 +4318,11 @@ defm SMLS : AI_smld<1, "smls">;
multiclass AI_sdml<bit sub, string opc> {
def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
- NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm),
- NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
}
defm SMUA : AI_sdml<0, "smua">;
@@ -4239,12 +4334,14 @@ defm SMUS : AI_sdml<1, "smus">;
def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasDivideInARM]>;
+ Requires<[IsARM, HasDivideInARM]>,
+ Sched<[WriteDIV]>;
def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>,
- Requires<[IsARM, HasDivideInARM]>;
+ Requires<[IsARM, HasDivideInARM]>,
+ Sched<[WriteDIV]>;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
@@ -4831,14 +4928,15 @@ let AddedComplexity = 8 in {
def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>;
}
-// SWP/SWPB are deprecated in V6/V7.
+// SWP/SWPB are deprecated in V6/V7 and optional in v7VE.
+// FIXME Use InstAlias to generate LDREX/STREX pairs instead.
let mayLoad = 1, mayStore = 1 in {
def SWP : AIswp<0, (outs GPRnopc:$Rt),
(ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def SWPB: AIswp<1, (outs GPRnopc:$Rt),
(ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
}
//===----------------------------------------------------------------------===//
@@ -4850,7 +4948,7 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
bits<4> opc1;
bits<4> CRn;
bits<4> CRd;
@@ -4872,7 +4970,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
bits<4> CRn;
@@ -5048,13 +5146,13 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
-defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
-defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
-defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8]>;
+defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
+defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
//===----------------------------------------------------------------------===//
// Move between coprocessor and ARM core register.
@@ -5132,7 +5230,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -5140,7 +5238,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRwithAPSR:$Rt),
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>,
- Requires<[PreV8]>;
+ Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
@@ -5183,7 +5281,7 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern = []>
: ABXI<0b1100, oops, iops, NoItinerary,
!strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>,
- Requires<[PreV8]> {
+ Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
let Inst{23-21} = 0b010;
let Inst{20} = direction;
@@ -5525,20 +5623,26 @@ def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
// smul* and smla*
def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
- (SMULBB GPR:$a, GPR:$b)>;
+ (SMULBB GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
- (SMULBT GPR:$a, GPR:$b)>;
+ (SMULBT GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
- (SMULTB GPR:$a, GPR:$b)>;
+ (SMULTB GPR:$a, GPR:$b)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, sext_16_node:$b)),
- (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
- (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
def : ARMV5MOPat<(add GPR:$acc,
(mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
- (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
// Pre-v7 uses MCR for synchronization barriers.
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
@@ -5717,33 +5821,49 @@ def : MnemonicAlias<"usubaddx", "usax">;
// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
-def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+def : ARMInstSubst<"mov${s}${p} $Rd, $imm",
(MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+def : ARMInstSubst<"mvn${s}${p} $Rd, $imm",
(MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
// Same for AND <--> BIC
-def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"bic${s}${p} $Rd, $Rn, $imm",
(ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+def : ARMInstSubst<"bic${s}${p} $Rdn, $imm",
(ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"and${s}${p} $Rd, $Rn, $imm",
(BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+def : ARMInstSubst<"and${s}${p} $Rdn, $imm",
(BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, mod_imm_neg" -> sub
-def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : ARMInstSubst<"add${s}${p} $Rd, $Rn, $imm",
(SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
-def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+def : ARMInstSubst<"add${s}${p} $Rd, $imm",
(SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Likewise, "sub Rd, mod_imm_neg" -> add
+def : ARMInstSubst<"sub${s}${p} $Rd, $Rn, $imm",
+ (ADDri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sub${s}${p} $Rd, $imm",
+ (ADDri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>;
+
+
+def : ARMInstSubst<"adc${s}${p} $Rd, $Rn, $imm",
+ (SBCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"adc${s}${p} $Rdn, $imm",
+ (SBCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sbc${s}${p} $Rd, $Rn, $imm",
+ (ADCri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstSubst<"sbc${s}${p} $Rdn, $imm",
+ (ADCri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
+
// Same for CMP <--> CMN via mod_imm_neg
-def : ARMInstAlias<"cmp${p} $Rd, $imm",
+def : ARMInstSubst<"cmp${p} $Rd, $imm",
(CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
-def : ARMInstAlias<"cmn${p} $Rd, $imm",
+def : ARMInstSubst<"cmn${p} $Rd, $imm",
(CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>;
// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index b5fa8e999e2a..681e235d78f0 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -7139,6 +7139,17 @@ let Predicates = [IsBE] in {
(f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
}
+def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+
//===----------------------------------------------------------------------===//
// Assembler aliases
//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index a681f64b05e6..f2f426e86701 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -19,7 +19,7 @@ def imm_sr_XFORM: SDNodeXForm<imm, [{
unsigned Imm = N->getZExtValue();
return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
}]>;
-def ThumbSRImmAsmOperand: AsmOperandClass { let Name = "ImmThumbSR"; }
+def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; }
def imm_sr : Operand<i32>, PatLeaf<(imm), [{
uint64_t Imm = N->getZExtValue();
return Imm > 0 && Imm <= 32;
@@ -28,22 +28,31 @@ def imm_sr : Operand<i32>, PatLeaf<(imm), [{
let ParserMatchClass = ThumbSRImmAsmOperand;
}
-def imm_comp_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), SDLoc(N),
- MVT::i32);
-}]>;
-
def imm0_7_neg : PatLeaf<(i32 imm), [{
return (uint32_t)-N->getZExtValue() < 8;
}], imm_neg_XFORM>;
+def ThumbModImmNeg1_7AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg1_7"; }
+def mod_imm1_7_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return 0 < Value && Value < 8;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ThumbModImmNeg1_7AsmOperand;
+}
+
+def ThumbModImmNeg8_255AsmOperand : AsmOperandClass { let Name = "ThumbModImmNeg8_255"; }
+def mod_imm8_255_neg : Operand<i32>, PatLeaf<(imm), [{
+ unsigned Value = -(unsigned)N->getZExtValue();
+ return 7 < Value && Value < 256;
+ }], imm_neg_XFORM> {
+ let ParserMatchClass = ThumbModImmNeg8_255AsmOperand;
+}
+
+
def imm0_255_comp : PatLeaf<(i32 imm), [{
return ~((uint32_t)N->getZExtValue()) < 256;
}]>;
-def imm8_255 : ImmLeaf<i32, [{
- return Imm >= 8 && Imm < 256;
-}]>;
def imm8_255_neg : PatLeaf<(i32 imm), [{
unsigned Val = -N->getZExtValue();
return Val >= 8 && Val < 256;
@@ -407,9 +416,9 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
let DecoderMethod = "DecodeThumbAddSPImm";
}
-def : tInstAlias<"add${p} sp, $imm",
+def : tInstSubst<"add${p} sp, $imm",
(tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
-def : tInstAlias<"add${p} sp, sp, $imm",
+def : tInstSubst<"add${p} sp, sp, $imm",
(tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
// Can optionally specify SP as a three operand instruction.
@@ -910,7 +919,7 @@ let isAdd = 1 in {
def tADC : // A8.6.2
T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
"adc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+ []>, Sched<[WriteALU]>;
// Add immediate
def tADDi3 : // A8.6.4 T1
@@ -938,6 +947,43 @@ let isAdd = 1 in {
"add", "\t$Rd, $Rn, $Rm",
[(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+ /// Similar to the above except these set the 's' bit so the
+ /// instruction modifies the CPSR register.
+ ///
+ /// These opcodes will be converted to the real non-S opcodes by
+ /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+ let hasPostISelHook = 1, Defs = [CPSR] in {
+ let isCommutable = 1 in
+ def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm,
+ CPSR))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tADDSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rm,
+ imm0_7:$imm3))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rdn, CPSR, (ARMaddc tGPR:$Rn,
+ imm8_255:$imm8))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ let isCommutable = 1 in
+ def tADDSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMaddc tGPR:$Rn,
+ tGPR:$Rm))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+ }
+
let hasSideEffects = 0 in
def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
"add", "\t$Rdn, $Rm", []>,
@@ -951,6 +997,12 @@ let isAdd = 1 in {
}
}
+def : tInstSubst<"sub${s}${p} $rd, $rn, $imm",
+ (tADDi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>;
+def : tInstSubst<"sub${s}${p} $rdn, $imm",
+ (tADDi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>;
+
+
// AND register
let isCommutable = 1 in
def tAND : // A8.6.12
@@ -1197,7 +1249,7 @@ def tSBC : // A8.6.151
T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"sbc", "\t$Rdn, $Rm",
- [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>,
+ []>,
Sched<[WriteALU]>;
// Subtract immediate
@@ -1218,6 +1270,14 @@ def tSUBi8 : // A8.6.210 T2
[(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>,
Sched<[WriteALU]>;
+def : tInstSubst<"add${s}${p} $rd, $rn, $imm",
+ (tSUBi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>;
+
+
+def : tInstSubst<"add${s}${p} $rdn, $imm",
+ (tSUBi8 tGPR:$rdn, s_cc_out:$s, mod_imm8_255_neg:$imm, pred:$p)>;
+
+
// Subtract register
def tSUBrr : // A8.6.212
T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
@@ -1226,6 +1286,41 @@ def tSUBrr : // A8.6.212
[(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>,
Sched<[WriteALU]>;
+/// Similar to the above except these set the 's' bit so the
+/// instruction modifies the CPSR register.
+///
+/// These opcodes will be converted to the real non-S opcodes by
+/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+ def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm,
+ CPSR))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSi3 : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rm,
+ imm0_7:$imm3))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
+ 2, IIC_iALUi,
+ [(set tGPR:$Rdn, CPSR, (ARMsubc tGPR:$Rn,
+ imm8_255:$imm8))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+
+ def tSUBSrr : tPseudoInst<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+ 2, IIC_iALUr,
+ [(set tGPR:$Rd, CPSR, (ARMsubc tGPR:$Rn,
+ tGPR:$Rm))]>,
+ Requires<[IsThumb1Only]>,
+ Sched<[WriteALU]>;
+}
+
// Sign-extend byte
def tSXTB : // A8.6.222
T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1386,22 +1481,6 @@ def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
(tCMPr tGPR:$Rn, tGPR:$Rm)>;
-// Add with carry
-def : T1Pat<(addc tGPR:$lhs, imm0_7:$rhs),
- (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, imm8_255:$rhs),
- (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, tGPR:$rhs),
- (tADDrr tGPR:$lhs, tGPR:$rhs)>;
-
-// Subtract with carry
-def : T1Pat<(addc tGPR:$lhs, imm0_7_neg:$rhs),
- (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
-def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
- (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
-def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
- (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
-
// Bswap 16 with load/store
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
@@ -1477,7 +1556,7 @@ def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>;
// post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is
// different to how ISel expects them for a post-inc load, so use a pseudo
// and expand it just after ISel.
-let usesCustomInserter = 1,
+let usesCustomInserter = 1, mayLoad =1,
Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in
def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb),
(ins rGPR:$Rn, pred:$p),
@@ -1547,7 +1626,7 @@ def : T1Pat<(i32 thumb_immshifted:$src),
(thumb_immshifted_shamt imm:$src))>;
def : T1Pat<(i32 imm0_255_comp:$src),
- (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+ (tMVN (tMOVi8 (imm_not_XFORM imm:$src)))>;
def : T1Pat<(i32 imm256_510:$src),
(tADDi8 (tMOVi8 255),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 603d66403e65..f5b673b78ad7 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -76,7 +76,11 @@ def t2_so_imm_notSext16_XFORM : SDNodeXForm<imm, [{
// t2_so_imm - Match a 32-bit immediate operand, which is an
// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
// immediate splatted into multiple bytes of the word.
-def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
+def t2_so_imm_asmoperand : AsmOperandClass {
+ let Name = "T2SOImm";
+ let RenderMethod = "addImmOperands";
+
+}
def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
return ARM_AM::getT2SOImmVal(Imm) != -1;
}]> {
@@ -110,15 +114,14 @@ def t2_so_imm_notSext : Operand<i32>, PatLeaf<(imm), [{
// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
-def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
- int64_t Value = -(int)N->getZExtValue();
- return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+def t2_so_imm_neg : Operand<i32>, ImmLeaf<i32, [{
+ return Imm && ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
}], t2_so_imm_neg_XFORM> {
let ParserMatchClass = t2_so_imm_neg_asmoperand;
}
-/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0,4095].
+def imm0_4095_asmoperand: ImmAsmOperand<0,4095> { let Name = "Imm0_4095"; }
def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
return Imm >= 0 && Imm < 4096;
}]> {
@@ -139,7 +142,7 @@ def imm1_255_neg : PatLeaf<(i32 imm), [{
def imm0_255_not : PatLeaf<(i32 imm), [{
return (uint32_t)(~N->getZExtValue()) < 255;
-}], imm_comp_XFORM>;
+}], imm_not_XFORM>;
def lo5AllOne : PatLeaf<(i32 imm), [{
// Returns true if all low 5-bits are 1.
@@ -538,7 +541,8 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin,
class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
string opc, list<dag> pattern>
: T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
- opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> {
+ opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern>,
+ Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -556,7 +560,8 @@ class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc>
: T2I<(outs rGPR:$RdLo, rGPR:$RdHi),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
- RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> {
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> {
bits<4> RdLo;
bits<4> RdHi;
bits<4> Rn;
@@ -977,7 +982,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
PatFrag opnode> {
def i12 : T2Ii12<(outs target:$Rt), (ins t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_imm12:$addr))]>,
+ Sched<[WriteLd]> {
bits<4> Rt;
bits<17> addr;
let Inst{31-25} = 0b1111100;
@@ -993,7 +999,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
}
def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>,
+ Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1015,7 +1022,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
}
def s : T2Iso <(outs target:$Rt), (ins t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+ [(set target:$Rt, (opnode t2addrmode_so_reg:$addr))]>,
+ Sched<[WriteLd]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = signed;
@@ -1039,7 +1047,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
// from the PC.
def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+ [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]>,
+ Sched<[WriteLd]> {
let isReMaterializable = 1;
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -1065,7 +1074,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
PatFrag opnode> {
def i12 : T2Ii12<(outs), (ins target:$Rt, t2addrmode_imm12:$addr), iii,
opc, ".w\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_imm12:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_imm12:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0001;
let Inst{22-21} = opcod;
@@ -1082,7 +1092,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
}
def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
opc, "\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_negimm8:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -1102,7 +1113,8 @@ multiclass T2I_st<bits<2> opcod, string opc,
}
def s : T2Iso <(outs), (ins target:$Rt, t2addrmode_so_reg:$addr), iis,
opc, ".w\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_so_reg:$addr)]> {
+ [(opnode target:$Rt, t2addrmode_so_reg:$addr)]>,
+ Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
let Inst{22-21} = opcod;
@@ -1121,28 +1133,10 @@ multiclass T2I_st<bits<2> opcod, string opc,
/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-class T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, ".w\t$Rd, $Rm$rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
-
- bits<2> rot;
- let Inst{5-4} = rot{1-0}; // rotate
-}
-
-// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTr, opc, "\t$Rd, $Rm$rot",
- [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+class T2I_ext_rrot_base<bits<3> opcod, dag iops, dag oops,
+ string opc, string oprs,
+ list<dag> pattern>
+ : T2TwoReg<iops, oops, IIC_iEXTr, opc, oprs, pattern> {
bits<2> rot;
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -1150,46 +1144,34 @@ class T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode>
let Inst{19-16} = 0b1111; // Rn
let Inst{15-12} = 0b1111;
let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
-
-// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
-// supported yet.
-class T2I_ext_rrot_sxtb16<bits<3> opcod, string opc>
- : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
- opc, "\t$Rd, $Rm$rot", []>,
- Requires<[IsThumb2, HasT2ExtractPack]> {
- bits<2> rot;
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{19-16} = 0b1111; // Rn
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
+ let Inst{5-4} = rot; // rotate
+}
+
+class T2I_ext_rrot<bits<3> opcod, string opc>
+ : T2I_ext_rrot_base<opcod,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rm, rot_imm:$rot),
+ opc, ".w\t$Rd, $Rm$rot", []>,
+ Requires<[IsThumb2]>,
+ Sched<[WriteALU, ReadALU]>;
+
+// UXTB16, SXTB16 - Requires HasDSP, does not need the .w qualifier.
+class T2I_ext_rrot_xtb16<bits<3> opcod, string opc>
+ : T2I_ext_rrot_base<opcod,
+ (outs rGPR:$Rd),
+ (ins rGPR:$Rm, rot_imm:$rot),
+ opc, "\t$Rd, $Rm$rot", []>,
+ Requires<[HasDSP, IsThumb2]>,
+ Sched<[WriteALU, ReadALU]>;
/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-class T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode>
+class T2I_exta_rrot<bits<3> opcod, string opc>
: T2ThreeReg<(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
- IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot",
- [(set rGPR:$Rd, (opnode rGPR:$Rn, (rotr rGPR:$Rm,rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
- bits<2> rot;
- let Inst{31-27} = 0b11111;
- let Inst{26-23} = 0b0100;
- let Inst{22-20} = opcod;
- let Inst{15-12} = 0b1111;
- let Inst{7} = 1;
- let Inst{5-4} = rot;
-}
-
-class T2I_exta_rrot_np<bits<3> opcod, string opc>
- : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm,rot_imm:$rot),
IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm$rot", []>,
- Requires<[HasT2ExtractPack, IsThumb2]> {
+ Requires<[HasDSP, IsThumb2]>,
+ Sched<[WriteALU, ReadALU]> {
bits<2> rot;
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -1279,7 +1261,8 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>;
+ IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>,
+ Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
@@ -1333,17 +1316,20 @@ let mayLoad = 1, hasSideEffects = 0 in {
def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
- "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldr", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDR_POST : T2Ipostldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
- "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldr", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRB_PRE : T2Ipreldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
@@ -1353,41 +1339,45 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>;
+ "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRH_POST : T2Ipostldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRSB_PRE : T2Ipreldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []>;
+ []>, Sched<[WriteLd]>;
def t2LDRSB_POST : T2Ipostldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrsb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
def t2LDRSH_PRE : T2Ipreldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
(ins t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn_wb",
- []>;
+ []>, Sched<[WriteLd]>;
def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
(ins addr_offset_none:$Rn, t2am_imm8_offset:$offset),
AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
- "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>;
+ "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>,
+ Sched<[WriteLd]>;
} // mayLoad = 1, hasSideEffects = 0
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
: T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
- "\t$Rt, $addr", []> {
+ "\t$Rt, $addr", []>, Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1431,11 +1421,14 @@ class T2Ildacq<bits<4> bits23_20, bits<2> bit54, dag oops, dag iops,
}
def t2LDA : T2Ildacq<0b1101, 0b10, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "lda", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
def t2LDAB : T2Ildacq<0b1101, 0b00, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "ldab", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
def t2LDAH : T2Ildacq<0b1101, 0b01, (outs rGPR:$Rt),
- (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>;
+ (ins addr_offset_none:$addr), "ldah", "\t$Rt, $addr", []>,
+ Sched<[WriteLd]>;
// Store
defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si, GPR, store>;
@@ -1448,7 +1441,8 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
- IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
+ IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>,
+ Sched<[WriteST]>;
// Indexed stores
@@ -1457,19 +1451,22 @@ def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
(ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
def t2STRH_PRE : T2Ipreldst<0, 0b01, 0, 1, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"strh", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, t2addrmode_imm8_pre:$addr),
AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
"strb", "\t$Rt, $addr!",
- "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>;
+ "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>,
+ Sched<[WriteST]>;
} // mayStore = 1, hasSideEffects = 0
def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -1480,7 +1477,8 @@ def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_store GPRnopc:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, addr_offset_none:$Rn,
@@ -1490,7 +1488,8 @@ def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_truncsti16 rGPR:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, addr_offset_none:$Rn,
@@ -1500,7 +1499,8 @@ def t2STRB_POST : T2Ipostldst<0, 0b00, 0, 0, (outs GPRnopc:$Rn_wb),
"$Rn = $Rn_wb,@earlyclobber $Rn_wb",
[(set GPRnopc:$Rn_wb,
(post_truncsti8 rGPR:$Rt, addr_offset_none:$Rn,
- t2am_imm8_offset:$offset))]>;
+ t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
// Pseudo-instructions for pattern matching the pre-indexed stores. We can't
// put the patterns on the instruction definitions directly as ISel wants
@@ -1513,17 +1513,20 @@ def t2STR_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_store rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRB_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti8 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
(ins rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset, pred:$p),
4, IIC_iStore_ru,
[(set GPRnopc:$Rn_wb,
- (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
+ (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>,
+ Sched<[WriteST]>;
}
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
@@ -1531,7 +1534,7 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
: T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
- "\t$Rt, $addr", []> {
+ "\t$Rt, $addr", []>, Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = 0; // not signed
@@ -1557,7 +1560,8 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
let mayLoad = 1 in
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
- "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
+ "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []>,
+ Sched<[WriteLd]> {
let DecoderMethod = "DecodeT2LDRDPreInstruction";
}
@@ -1565,13 +1569,13 @@ let mayLoad = 1 in
def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
(ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
- "$addr.base = $wb", []>;
+ "$addr.base = $wb", []>, Sched<[WriteLd]>;
let mayStore = 1 in
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
- "$addr.base = $wb", []> {
+ "$addr.base = $wb", []>, Sched<[WriteST]> {
let DecoderMethod = "DecodeT2STRDPreInstruction";
}
@@ -1580,12 +1584,13 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
(ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
t2am_imm8s4_offset:$imm),
IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr$imm",
- "$addr.base = $wb", []>;
+ "$addr.base = $wb", []>, Sched<[WriteST]>;
class T2Istrrel<bits<2> bit54, dag oops, dag iops,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, 4, NoItinerary, opc,
- asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]> {
+ asm, "", pattern>, Requires<[IsThumb, HasAcquireRelease]>,
+ Sched<[WriteST]> {
bits<4> Rt;
bits<4> addr;
@@ -1861,7 +1866,7 @@ defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
//
let hasSideEffects = 0 in
-def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rm), IIC_iMOVr,
"mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -1870,11 +1875,11 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
let Inst{14-12} = 0b000;
let Inst{7-4} = 0b0000;
}
-def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, zero_reg)>;
-def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, CPSR)>;
-def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm,
pred:$p, CPSR)>;
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
@@ -1926,10 +1931,11 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
def : InstAlias<"mov${p} $Rd, $imm",
(t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p), 0>,
- Requires<[IsThumb, HasV8MBaseline]>;
+ Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteALU]>;
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
- (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+ (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>,
+ Sched<[WriteALU]>;
let Constraints = "$src = $Rd" in {
def t2MOVTi16 : T2I<(outs rGPR:$Rd),
@@ -1969,31 +1975,39 @@ def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
// Sign extenders
-def t2SXTB : T2I_ext_rrot<0b100, "sxtb",
- UnOpFrag<(sext_inreg node:$Src, i8)>>;
-def t2SXTH : T2I_ext_rrot<0b000, "sxth",
- UnOpFrag<(sext_inreg node:$Src, i16)>>;
-def t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+def t2SXTB : T2I_ext_rrot<0b100, "sxtb">;
+def t2SXTH : T2I_ext_rrot<0b000, "sxth">;
+def t2SXTB16 : T2I_ext_rrot_xtb16<0b010, "sxtb16">;
+
+def t2SXTAB : T2I_exta_rrot<0b100, "sxtab">;
+def t2SXTAH : T2I_exta_rrot<0b000, "sxtah">;
+def t2SXTAB16 : T2I_exta_rrot<0b010, "sxtab16">;
+
+def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i8),
+ (t2SXTB rGPR:$Rn, rot_imm:$rot)>;
+def : T2Pat<(sext_inreg (rotr rGPR:$Rn, rot_imm:$rot), i16),
+ (t2SXTH rGPR:$Rn, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn,
+ (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i8)),
+ (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn,
+ (sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)),
+ (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
- BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-def t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
- BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
// A simple right-shift can also be used in most cases (the exception is the
// SXTH operations with a rotate of 24: there the non-contiguous bits are
// relevant).
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(srl rGPR:$Rm, rot_imm:$rot), i8)),
(t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(srl rGPR:$Rm, imm8_or_16:$rot), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(rotr rGPR:$Rm, (i32 24)), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+def : Thumb2DSPPat<(add rGPR:$Rn, (sext_inreg
(or (srl rGPR:$Rm, (i32 24)),
(shl rGPR:$Rm, (i32 8))), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
@@ -2001,12 +2015,16 @@ def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
// Zero extenders
let AddedComplexity = 16 in {
-def t2UXTB : T2I_ext_rrot<0b101, "uxtb",
- UnOpFrag<(and node:$Src, 0x000000FF)>>;
-def t2UXTH : T2I_ext_rrot<0b001, "uxth",
- UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
- UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+def t2UXTB : T2I_ext_rrot<0b101, "uxtb">;
+def t2UXTH : T2I_ext_rrot<0b001, "uxth">;
+def t2UXTB16 : T2I_ext_rrot_xtb16<0b011, "uxtb16">;
+
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x000000FF),
+ (t2UXTB rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x0000FFFF),
+ (t2UXTH rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF),
+ (t2UXTB16 rGPR:$Rm, rot_imm:$rot)>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -2014,21 +2032,25 @@ def t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
// eight bits of the source into the lower eight bits of the result.
//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
// (t2UXTB16 rGPR:$Src, 3)>,
-// Requires<[HasT2ExtractPack, IsThumb2]>;
+// Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
(t2UXTB16 rGPR:$Src, 1)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
-def t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
- BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
+def t2UXTAB : T2I_exta_rrot<0b101, "uxtab">;
+def t2UXTAH : T2I_exta_rrot<0b001, "uxtah">;
+def t2UXTAB16 : T2I_exta_rrot<0b011, "uxtab16">;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot),
+ 0x00FF)),
+ (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (rotr rGPR:$Rm, rot_imm:$rot),
+ 0xFFFF)),
+ (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
0xFF)),
(t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
-def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
+def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
0xFFFF)),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
}
@@ -2060,6 +2082,19 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
}
+def : t2InstSubst<"adc${s}${p} $rd, $rn, $imm",
+ (t2SBCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm",
+ (t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+
+def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm",
+ (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"addw${p} $rd, $rn, $imm",
+ (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm",
+ (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"subw${p} $rd, $rn, $imm",
+ (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>;
// RSB
defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>;
@@ -2230,70 +2265,52 @@ def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
-class T2SatI<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, itin, opc, asm, pattern> {
+class T2SatI<dag iops, string opc, string asm>
+ : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> {
bits<4> Rd;
bits<4> Rn;
bits<5> sat_imm;
- bits<7> sh;
+ bits<6> sh;
- let Inst{11-8} = Rd;
+ let Inst{31-24} = 0b11110011;
+ let Inst{21} = sh{5};
+ let Inst{20} = 0;
let Inst{19-16} = Rn;
- let Inst{4-0} = sat_imm;
- let Inst{21} = sh{5};
+ let Inst{15} = 0;
let Inst{14-12} = sh{4-2};
- let Inst{7-6} = sh{1-0};
+ let Inst{11-8} = Rd;
+ let Inst{7-6} = sh{1-0};
+ let Inst{5} = 0;
+ let Inst{4-0} = sat_imm;
}
-def t2SSAT: T2SatI<
- (outs rGPR:$Rd),
- (ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
- NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
+def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ "ssat", "\t$Rd, $sat_imm, $Rn$sh">,
+ Requires<[IsThumb2]> {
+ let Inst{23-22} = 0b00;
let Inst{5} = 0;
}
-def t2SSAT16: T2SatI<
- (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
- "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
- Requires<[IsThumb2, HasDSP]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
- let Inst{14-12} = 0b000; // imm3 = '000'
- let Inst{7-6} = 0b00; // imm2 = '00'
- let Inst{5-4} = 0b00;
+def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn),
+ "ssat16", "\t$Rd, $sat_imm, $Rn">,
+ Requires<[IsThumb2, HasDSP]> {
+ let Inst{23-22} = 0b00;
+ let sh = 0b100000;
+ let Inst{4} = 0;
}
-def t2USAT: T2SatI<
- (outs rGPR:$Rd),
- (ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
- NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh", []>,
- Requires<[IsThumb2]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
+def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh),
+ "usat", "\t$Rd, $sat_imm, $Rn$sh">,
+ Requires<[IsThumb2]> {
+ let Inst{23-22} = 0b10;
}
-def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
- NoItinerary,
- "usat16", "\t$Rd, $sat_imm, $Rn", []>,
+def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn),
+ "usat16", "\t$Rd, $sat_imm, $Rn">,
Requires<[IsThumb2, HasDSP]> {
- let Inst{31-22} = 0b1111001110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
- let Inst{14-12} = 0b000; // imm3 = '000'
- let Inst{7-6} = 0b00; // imm2 = '00'
- let Inst{5-4} = 0b00;
+ let Inst{23-22} = 0b10;
+ let sh = 0b100000;
+ let Inst{4} = 0;
}
def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
@@ -2305,11 +2322,18 @@ def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
// Shift and rotate Instructions.
//
-defm t2LSL : T2I_sh_ir<0b00, "lsl", imm0_31, shl>;
+defm t2LSL : T2I_sh_ir<0b00, "lsl", imm1_31, shl>;
defm t2LSR : T2I_sh_ir<0b01, "lsr", imm_sr, srl>;
defm t2ASR : T2I_sh_ir<0b10, "asr", imm_sr, sra>;
defm t2ROR : T2I_sh_ir<0b11, "ror", imm0_31, rotr>;
+// LSL #0 is actually MOV, and has slightly different permitted registers to
+// LSL with non-zero shift
+def : t2InstAlias<"lsl${s}${p} $Rd, $Rm, #0",
+ (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"lsl${s}${p}.w $Rd, $Rm, #0",
+ (t2MOVr GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+
// (rotr x, (and y, 0x...1f)) ==> (ROR x, y)
def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
(t2RORrr rGPR:$lhs, rGPR:$rhs)>;
@@ -2547,7 +2571,8 @@ def : T2Pat<(t2_so_imm_not:$src),
let isCommutable = 1 in
def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
"mul", "\t$Rd, $Rn, $Rm",
- [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+ [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2558,7 +2583,8 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, UseMulOps]> {
+ Requires<[IsThumb2, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -2575,8 +2601,12 @@ def t2MLS: T2FourRegMLA<0b0001, "mls",
// Extra precision multiplies with low / high results
let hasSideEffects = 0 in {
let isCommutable = 1 in {
-def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>;
-def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>;
+def t2SMULL : T2MulLong<0b000, 0b0000, "smull",
+ [(set rGPR:$RdLo, rGPR:$RdHi,
+ (smullohi rGPR:$Rn, rGPR:$Rm))]>;
+def t2UMULL : T2MulLong<0b010, 0b0000, "umull",
+ [(set rGPR:$RdLo, rGPR:$RdHi,
+ (umullohi rGPR:$Rn, rGPR:$Rm))]>;
} // isCommutable
// Multiply + accumulate
@@ -2592,7 +2622,8 @@ class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern>
: T2ThreeReg<(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
opc, "\t$Rd, $Rn, $Rm", pattern>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMUL32, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -2607,7 +2638,8 @@ class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc,
list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2624,7 +2656,8 @@ class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
list<dag> pattern>
: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc,
"\t$Rd, $Rn, $Rm", pattern>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMUL16, ReadMUL, ReadMUL]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2645,8 +2678,10 @@ def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
[(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16))))]>;
-def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>;
-def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>;
+def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb",
+ [(set rGPR:$Rd, (ARMsmulwb rGPR:$Rn, rGPR:$Rm))]>;
+def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt",
+ [(set rGPR:$Rd, (ARMsmulwt rGPR:$Rn, rGPR:$Rm))]>;
def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
(t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
@@ -2659,7 +2694,8 @@ class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
list<dag> pattern>
: T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16,
opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
- Requires<[IsThumb2, HasDSP, UseMulOps]> {
+ Requires<[IsThumb2, HasDSP, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = op22_20;
@@ -2680,8 +2716,10 @@ def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
[(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
(sra rGPR:$Rm, (i32 16)))))]>;
-def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>;
-def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>;
+def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwb rGPR:$Rn, rGPR:$Rm)))]>;
+def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt",
+ [(set rGPR:$Rd, (add rGPR:$Ra, (ARMsmulwt rGPR:$Rn, rGPR:$Rm)))]>;
def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
(t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
@@ -2692,25 +2730,32 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra,
(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
-class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
- : T2FourReg_mac<1, op22_20, op7_4,
- (outs rGPR:$Ra, rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
-
// Halfword multiple accumulate long: SMLAL<x><y>
-def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>;
-def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>;
-def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>;
-def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>;
+def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALBT : T2MlaLong<0b100, 0b1001, "smlalbt">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTB : T2MlaLong<0b100, 0b1010, "smlaltb">,
+ Requires<[IsThumb2, HasDSP]>;
+def t2SMLALTT : T2MlaLong<0b100, 0b1011, "smlaltt">,
+ Requires<[IsThumb2, HasDSP]>;
+
+def : Thumb2DSPPat<(ARMsmlalbb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALBB $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlalbt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALBT $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALTB $Rn, $Rm, $RLo, $RHi)>;
+def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
+ (t2SMLALTT $Rn, $Rm, $RLo, $RHi)>;
class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
: T2ThreeReg_mac<0, op22_20, op7_4,
(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]> {
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{15-12} = 0b1111;
}
@@ -2737,7 +2782,8 @@ class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc>
(outs rGPR:$Ra, rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
- Requires<[IsThumb2, HasDSP]>;
+ Requires<[IsThumb2, HasDSP]>,
+ Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def t2SMLALD : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">;
def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">;
@@ -2751,7 +2797,8 @@ def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"sdiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
+ Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
let Inst{20} = 0b1;
@@ -2762,7 +2809,8 @@ def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iDIV,
"udiv", "\t$Rd, $Rn, $Rm",
[(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
- Requires<[HasDivide, IsThumb, HasV8MBaseline]> {
+ Requires<[HasDivide, IsThumb, HasV8MBaseline]>,
+ Sched<[WriteDIV]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
let Inst{20} = 0b1;
@@ -2819,7 +2867,7 @@ def t2PKHBT : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
(and (shl rGPR:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]>,
+ Requires<[HasDSP, IsThumb2]>,
Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2835,10 +2883,10 @@ def t2PKHBT : T2ThreeReg<
// Alternate cases for PKHBT where identities eliminate some nodes.
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
(t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
(t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
// will match the pattern below.
@@ -2848,7 +2896,7 @@ def t2PKHTB : T2ThreeReg<
[(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
(and (sra rGPR:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
- Requires<[HasT2ExtractPack, IsThumb2]>,
+ Requires<[HasDSP, IsThumb2]>,
Sched<[WriteALUsi, ReadALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2867,14 +2915,14 @@ def t2PKHTB : T2ThreeReg<
// pkhtb src1, src2, asr (17..31).
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm16:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (sra rGPR:$src2, imm16_31:$sh)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm16_31:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
(and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
(t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$sh)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
//===----------------------------------------------------------------------===//
// CRC32 Instructions
@@ -4216,13 +4264,13 @@ def : T2Pat<(and rGPR:$Rm, 0x000000FF), (t2UXTB rGPR:$Rm, 0)>,
def : T2Pat<(and rGPR:$Rm, 0x0000FFFF), (t2UXTH rGPR:$Rm, 0)>,
Requires<[IsThumb2]>;
def : T2Pat<(and rGPR:$Rm, 0x00FF00FF), (t2UXTB16 rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0x00FF)),
(t2UXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (and rGPR:$Rm, 0xFFFF)),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
}
def : T2Pat<(sext_inreg rGPR:$Src, i8), (t2SXTB rGPR:$Src, 0)>,
@@ -4231,10 +4279,10 @@ def : T2Pat<(sext_inreg rGPR:$Src, i16), (t2SXTH rGPR:$Src, 0)>,
Requires<[IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i8)),
(t2SXTAB rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : T2Pat<(add rGPR:$Rn, (sext_inreg rGPR:$Rm, i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, 0)>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// Atomic load/store patterns
def : T2Pat<(atomic_load_8 t2addrmode_imm12:$addr),
@@ -4325,26 +4373,26 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
pred:$p, cc_out:$s)>;
// add w/ negative immediates is just a sub.
-def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${p} $Rd, $Rn, $imm",
(t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+def : t2InstSubst<"add${s}${p} $Rdn, $imm",
(t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"add${p} $Rdn, $imm",
+def : t2InstSubst<"add${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p}.w $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"addw${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"addw${p} $Rd, $Rn, $imm",
(t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
-def : t2InstAlias<"add${s}${p}.w $Rdn, $imm",
+def : t2InstSubst<"add${s}${p}.w $Rdn, $imm",
(t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
cc_out:$s)>;
-def : t2InstAlias<"addw${p} $Rdn, $imm",
+def : t2InstSubst<"addw${p} $Rdn, $imm",
(t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
@@ -4431,10 +4479,10 @@ def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
// input operands swapped when the shift amount is zero (i.e., unspecified).
def : InstAlias<"pkhbt${p} $Rd, $Rn, $Rm",
(t2PKHBT rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"pkhtb${p} $Rd, $Rn, $Rm",
(t2PKHBT rGPR:$Rd, rGPR:$Rm, rGPR:$Rn, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
// PUSH/POP aliases for STM/LDM
def : t2InstAlias<"push${p}.w $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
@@ -4513,16 +4561,16 @@ def : t2InstAlias<"strh${p} $Rt, $addr",
// Extend instruction optional rotate operand.
def : InstAlias<"sxtab${p} $Rd, $Rn, $Rm",
(t2SXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtah${p} $Rd, $Rn, $Rm",
(t2SXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtab16${p} $Rd, $Rn, $Rm",
(t2SXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"sxtb16${p} $Rd, $Rm",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"sxtb${p} $Rd, $Rm",
(t2SXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
@@ -4535,16 +4583,16 @@ def : t2InstAlias<"sxth${p}.w $Rd, $Rm",
def : InstAlias<"uxtab${p} $Rd, $Rn, $Rm",
(t2UXTAB rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtah${p} $Rd, $Rn, $Rm",
(t2UXTAH rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtab16${p} $Rd, $Rn, $Rm",
(t2UXTAB16 rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : InstAlias<"uxtb16${p} $Rd, $Rm",
(t2UXTB16 rGPR:$Rd, rGPR:$Rm, 0, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"uxtb${p} $Rd, $Rm",
(t2UXTB rGPR:$Rd, rGPR:$Rm, 0, pred:$p)>;
@@ -4560,7 +4608,7 @@ def : t2InstAlias<"uxtb${p} $Rd, $Rm$rot",
(t2UXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"uxtb16${p} $Rd, $Rm$rot",
(t2UXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"uxth${p} $Rd, $Rm$rot",
(t2UXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
@@ -4568,41 +4616,41 @@ def : t2InstAlias<"sxtb${p} $Rd, $Rm$rot",
(t2SXTB rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
def : InstAlias<"sxtb16${p} $Rd, $Rm$rot",
(t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p), 0>,
- Requires<[HasT2ExtractPack, IsThumb2]>;
+ Requires<[HasDSP, IsThumb2]>;
def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
(t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
// for isel.
-def : t2InstAlias<"mov${p} $Rd, $imm",
+def : t2InstSubst<"mov${p} $Rd, $imm",
(t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
-def : t2InstAlias<"mvn${p} $Rd, $imm",
- (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstSubst<"mvn${s}${p} $Rd, $imm",
+ (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
// Same for AND <--> BIC
-def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"bic${s}${p} $Rd, $Rn, $imm",
(t2ANDri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+def : t2InstSubst<"bic${s}${p} $Rdn, $imm",
(t2ANDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"and${s}${p} $Rd, $Rn, $imm",
(t2BICri rGPR:$Rd, rGPR:$Rn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+def : t2InstSubst<"and${s}${p} $Rdn, $imm",
(t2BICri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_not:$imm,
pred:$p, cc_out:$s)>;
// Likewise, "add Rd, t2_so_imm_neg" -> sub
-def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
-def : t2InstAlias<"add${s}${p} $Rd, $imm",
+def : t2InstSubst<"add${s}${p} $Rd, $imm",
(t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
pred:$p, cc_out:$s)>;
// Same for CMP <--> CMN via t2_so_imm_neg
-def : t2InstAlias<"cmp${p} $Rd, $imm",
+def : t2InstSubst<"cmp${p} $Rd, $imm",
(t2CMNri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
-def : t2InstAlias<"cmn${p} $Rd, $imm",
+def : t2InstSubst<"cmn${p} $Rd, $imm",
(t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
@@ -4616,6 +4664,8 @@ def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
// these, unfortunately.
+// FIXME: LSL #0 in the shift should allow SP to be used as either the
+// source or destination (but not both).
def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
(ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e99048645685..0f225156d4ca 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -11,14 +11,17 @@
//
//===----------------------------------------------------------------------===//
-def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisVT<1, i32>]>;
def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
+def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, f64>]>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInGlue, SDNPOutGlue]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMFCmp, [SDNPOutGlue]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>;
//===----------------------------------------------------------------------===//
// Operand Definitions.
@@ -336,13 +339,15 @@ let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VADDD : ADbI<0b11100, 0b11, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -352,19 +357,22 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VADDH : AHbI<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPALU64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPALU32]>{
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -374,37 +382,43 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VSUBH : AHbI<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPALU32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPDIV64]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVS : ASbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VDIVH : AHbI<0b11101, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPDIV32]>;
let TwoOperandAliasConstraint = "$Dn = $Dd" in
def VMULD : ADbI<0b11100, 0b10, 0, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -414,17 +428,20 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in
def VMULH : AHbI<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
- [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>,
+ Sched<[WriteFPMUL64, ReadFPMUL, ReadFPMUL]>;
def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
- [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -433,7 +450,8 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
def VNMULH : AHbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm",
- []>;
+ []>,
+ Sched<[WriteFPMUL32, ReadFPMUL, ReadFPMUL]>;
multiclass vsel_inst<string op, bits<2> opc, int CC> {
let DecoderNamespace = "VFPV8", PostEncoderMethod = "",
@@ -501,12 +519,12 @@ let Defs = [FPSCR_NZCV] in {
def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
- [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 1))]>;
def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
- [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 1))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -517,17 +535,15 @@ def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0,
IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm",
[]>;
-
-// FIXME: Verify encoding after integrated assembler is working.
def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins DPR:$Dd, DPR:$Dm),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
- [/* For disassembly only; pattern left blank */]>;
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm), (i32 0))]>;
def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm, (i32 0))]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -566,7 +582,7 @@ let Defs = [FPSCR_NZCV] in {
def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
- [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -574,7 +590,7 @@ def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
- [(arm_cmpfp0 SPR:$Sd)]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 1))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -591,11 +607,10 @@ def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0,
let Inst{5} = 0;
}
-// FIXME: Verify encoding after integrated assembler is working.
def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins DPR:$Dd),
IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 (f64 DPR:$Dd), (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
}
@@ -603,7 +618,7 @@ def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
(outs), (ins SPR:$Sd),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
- [/* For disassembly only; pattern left blank */]> {
+ [(arm_cmpfp0 SPR:$Sd, (i32 0))]> {
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
@@ -624,7 +639,8 @@ def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
- [(set DPR:$Dd, (fpextend SPR:$Sm))]> {
+ [(set DPR:$Dd, (fpextend SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Dd;
bits<5> Sm;
@@ -641,7 +657,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
// Special case encoding: bits 11-8 is 0b1011.
def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (fpround DPR:$Dm))]> {
+ [(set SPR:$Sd, (fpround DPR:$Dm))]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sd;
bits<5> Dm;
@@ -663,31 +680,35 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
// Between half, single and double-precision. For disassembly only.
-// FIXME: Verify encoding after integrated assembler is working.
def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[HasFP16]>;
+ Requires<[HasFP16]>,
+ Sched<[WriteFPCVT]>;
def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
(outs DPR:$Dd), (ins SPR:$Sm),
NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
- []>, Requires<[HasFPARMv8, HasDPVFP]> {
+ []>, Requires<[HasFPARMv8, HasDPVFP]>,
+ Sched<[WriteFPCVT]> {
// Instruction operands.
bits<5> Sm;
@@ -946,12 +967,14 @@ defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs DPR:$Dd), (ins DPR:$Dm),
IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
- [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPSQRT64]>;
def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
- [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>,
+ Sched<[WriteFPSQRT32]>;
def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
@@ -987,7 +1010,8 @@ def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0,
def VMOVRS : AVConv2I<0b11100001, 0b1010,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
- [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1010,7 +1034,8 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
[(set SPR:$Sn, (bitconvert GPR:$Rt))]>,
- Requires<[HasVFP2, UseVMOVSR]> {
+ Requires<[HasVFP2, UseVMOVSR]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1032,7 +1057,8 @@ let hasSideEffects = 0 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
- [/* FIXME: Can't write pattern for multiple result instr*/]> {
+ [(set GPR:$Rt, GPR:$Rt2, (arm_fmrrd DPR:$Dm))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1059,7 +1085,8 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011,
def VMOVRRS : AVConv3I<0b11000101, 0b1010,
(outs GPR:$Rt, GPR:$Rt2), (ins SPR:$src1, SPR:$src2),
IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
bits<5> src1;
bits<4> Rt;
bits<4> Rt2;
@@ -1085,7 +1112,8 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
- [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Dm;
bits<4> Rt;
@@ -1128,7 +1156,8 @@ let hasSideEffects = 0 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
- [/* For disassembly only; pattern left blank */]> {
+ [/* For disassembly only; pattern left blank */]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> dst1;
bits<4> src1;
@@ -1154,7 +1183,8 @@ def VMOVRH : AVConv2I<0b11100001, 0b1001,
(outs GPR:$Rt), (ins SPR:$Sn),
IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<4> Rt;
bits<5> Sn;
@@ -1173,7 +1203,8 @@ def VMOVHR : AVConv4I<0b11100000, 0b1001,
(outs SPR:$Sn), (ins GPR:$Rt),
IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt",
[]>,
- Requires<[HasFullFP16]> {
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPMOV]> {
// Instruction operands.
bits<5> Sn;
bits<4> Rt;
@@ -1254,7 +1285,8 @@ class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
@@ -1269,7 +1301,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd),(ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1286,14 +1319,16 @@ def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // s32
}
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
(outs DPR:$Dd), (ins SPR:$Sm),
IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1308,7 +1343,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
// Some single precision VFP instructions may be executed on both NEON and
@@ -1325,7 +1361,8 @@ def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // u32
}
@@ -1390,7 +1427,8 @@ class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1405,7 +1443,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1423,14 +1462,16 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
@@ -1445,7 +1486,8 @@ let Predicates=[HasVFP2, HasDPVFP] in {
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
// Some single precision VFP instructions may be executed on both NEON and
@@ -1463,52 +1505,58 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 1; // Z bit
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
-// FIXME: Verify encoding after integrated assembler is working.
def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
- [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
- [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm",
- []> {
+ []>,
+ Sched<[WriteFPCVT]> {
let Inst{7} = 0; // Z bit
}
}
@@ -1528,8 +1576,7 @@ let Constraints = "$a = $dst" in {
class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{0};
@@ -1540,8 +1587,7 @@ class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
bit op5, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern>,
- Sched<[WriteCvtFP]> {
+ : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
bits<5> dst;
// if dp_operation then UInt(D:Vd) else UInt(Vd:D);
let Inst{22} = dst{4};
@@ -1553,26 +1599,31 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4,
def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1604,45 +1655,54 @@ def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
// Fixed-Point to FP:
def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>,
- Requires<[HasFullFP16]>;
+ Requires<[HasFullFP16]>,
+ Sched<[WriteFPCVT]>;
def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1650,7 +1710,8 @@ def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1658,7 +1719,8 @@ def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1666,7 +1728,8 @@ def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1674,19 +1737,23 @@ def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
(outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>,
+ Sched<[WriteFPCVT]>;
} // End of 'let Constraints = "$a = $dst" in'
@@ -1700,7 +1767,8 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1708,7 +1776,8 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1734,7 +1803,8 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
[(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1742,7 +1812,8 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
[(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1768,7 +1839,8 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
[(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1776,7 +1848,8 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
[(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1802,14 +1875,16 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
[(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines on A8.
let D = VFPNeonA8Domain;
@@ -1838,7 +1913,8 @@ def VFMAD : ADbI<0b11101, 0b10, 0, 0,
[(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
(f64 DPR:$Ddin)))]>,
RegConstraint<"$Ddin = $Dd">,
- Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>;
+ Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -1846,7 +1922,8 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
[(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+ Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> {
// Some single precision VFP instructions may be executed on both NEON and
// VFP pipelines.
}
@@ -1856,7 +1933,8 @@ def VFMAH : AHbI<0b11101, 0b10, 0, 0,
IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm",
[]>,
RegConstraint<"$Sdin = $Sd">,
- Requires<[HasFullFP16,UseFusedMAC]>;
+ Requires<[HasFullFP16,UseFusedMAC]>,
+ Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]>;
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
(VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp
index 2bdbe4fca3de..8d224d6a70fa 100644
--- a/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -54,9 +54,21 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
DstSize <= SrcSize)) &&
"Copy with different width?!");
- assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank");
+ assert((RegBank->getID() == ARM::GPRRegBankID ||
+ RegBank->getID() == ARM::FPRRegBankID) &&
+ "Unsupported reg bank");
+
const TargetRegisterClass *RC = &ARM::GPRRegClass;
+ if (RegBank->getID() == ARM::FPRRegBankID) {
+ if (DstSize == 32)
+ RC = &ARM::SPRRegClass;
+ else if (DstSize == 64)
+ RC = &ARM::DPRRegClass;
+ else
+ llvm_unreachable("Unsupported destination size");
+ }
+
// No need to constrain SrcReg. It will get constrained when
// we hit another of its uses or its defs.
// Copies do not have constraints.
@@ -68,6 +80,146 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return true;
}
+static bool selectFAdd(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select fp add without vfp");
+
+ LLT Ty = MRI.getType(MIB->getOperand(0).getReg());
+ unsigned ValSize = Ty.getSizeInBits();
+
+ if (ValSize == 32) {
+ if (TII.getSubtarget().useNEONForSinglePrecisionFP())
+ return false;
+ MIB->setDesc(TII.get(ARM::VADDS));
+ } else {
+ assert(ValSize == 64 && "Unsupported size for floating point value");
+ if (TII.getSubtarget().isFPOnlySP())
+ return false;
+ MIB->setDesc(TII.get(ARM::VADDD));
+ }
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+static bool selectSequence(MachineInstrBuilder &MIB,
+ const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select sequence without VFP");
+
+ // We only support G_SEQUENCE as a way to stick together two scalar GPRs
+ // into one DPR.
+ unsigned VReg0 = MIB->getOperand(0).getReg();
+ (void)VReg0;
+ assert(MRI.getType(VReg0).getSizeInBits() == 64 &&
+ RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+ "Unsupported operand for G_SEQUENCE");
+ unsigned VReg1 = MIB->getOperand(1).getReg();
+ (void)VReg1;
+ assert(MRI.getType(VReg1).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_SEQUENCE");
+ unsigned VReg2 = MIB->getOperand(3).getReg();
+ (void)VReg2;
+ assert(MRI.getType(VReg2).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg2, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_SEQUENCE");
+
+ // Remove the operands corresponding to the offsets.
+ MIB->RemoveOperand(4);
+ MIB->RemoveOperand(2);
+
+ MIB->setDesc(TII.get(ARM::VMOVDRR));
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+static bool selectExtract(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII,
+ MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ assert(TII.getSubtarget().hasVFP2() && "Can't select extract without VFP");
+
+ // We only support G_EXTRACT as a way to break up one DPR into two GPRs.
+ unsigned VReg0 = MIB->getOperand(0).getReg();
+ (void)VReg0;
+ assert(MRI.getType(VReg0).getSizeInBits() == 32 &&
+ RBI.getRegBank(VReg0, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Unsupported operand for G_EXTRACT");
+ unsigned VReg1 = MIB->getOperand(1).getReg();
+ (void)VReg1;
+ assert(MRI.getType(VReg1).getSizeInBits() == 64 &&
+ RBI.getRegBank(VReg1, MRI, TRI)->getID() == ARM::FPRRegBankID &&
+ "Unsupported operand for G_EXTRACT");
+ assert(MIB->getOperand(2).getImm() % 32 == 0 &&
+ "Unsupported operand for G_EXTRACT");
+
+ // Remove the operands corresponding to the offsets.
+ MIB->getOperand(2).setImm(MIB->getOperand(2).getImm() / 32);
+
+ MIB->setDesc(TII.get(ARM::VGETLNi32));
+ MIB.add(predOps(ARMCC::AL));
+
+ return true;
+}
+
+/// Select the opcode for simple extensions (that translate to a single SXT/UXT
+/// instruction). Extension operations more complicated than that should not
+/// invoke this. Returns the original opcode if it doesn't know how to select a
+/// better one.
+static unsigned selectSimpleExtOpc(unsigned Opc, unsigned Size) {
+ using namespace TargetOpcode;
+
+ if (Size != 8 && Size != 16)
+ return Opc;
+
+ if (Opc == G_SEXT)
+ return Size == 8 ? ARM::SXTB : ARM::SXTH;
+
+ if (Opc == G_ZEXT)
+ return Size == 8 ? ARM::UXTB : ARM::UXTH;
+
+ return Opc;
+}
+
+/// Select the opcode for simple loads and stores. For types smaller than 32
+/// bits, the value will be zero extended. Returns the original opcode if it
+/// doesn't know how to select a better one.
+static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank,
+ unsigned Size) {
+ bool isStore = Opc == TargetOpcode::G_STORE;
+
+ if (RegBank == ARM::GPRRegBankID) {
+ switch (Size) {
+ case 1:
+ case 8:
+ return isStore ? ARM::STRBi12 : ARM::LDRBi12;
+ case 16:
+ return isStore ? ARM::STRH : ARM::LDRH;
+ case 32:
+ return isStore ? ARM::STRi12 : ARM::LDRi12;
+ default:
+ return Opc;
+ }
+ }
+
+ if (RegBank == ARM::FPRRegBankID) {
+ switch (Size) {
+ case 32:
+ return isStore ? ARM::VSTRS : ARM::VLDRS;
+ case 64:
+ return isStore ? ARM::VSTRD : ARM::VLDRD;
+ default:
+ return Opc;
+ }
+ }
+
+ return Opc;
+}
+
bool ARMInstructionSelector::select(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -84,23 +236,129 @@ bool ARMInstructionSelector::select(MachineInstr &I) const {
}
MachineInstrBuilder MIB{MF, I};
+ bool isSExt = false;
using namespace TargetOpcode;
switch (I.getOpcode()) {
+ case G_SEXT:
+ isSExt = true;
+ LLVM_FALLTHROUGH;
+ case G_ZEXT: {
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ // FIXME: Smaller destination sizes coming soon!
+ if (DstTy.getSizeInBits() != 32) {
+ DEBUG(dbgs() << "Unsupported destination size for extension");
+ return false;
+ }
+
+ LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+ unsigned SrcSize = SrcTy.getSizeInBits();
+ switch (SrcSize) {
+ case 1: {
+ // ZExt boils down to & 0x1; for SExt we also subtract that from 0
+ I.setDesc(TII.get(ARM::ANDri));
+ MIB.addImm(1).add(predOps(ARMCC::AL)).add(condCodeOp());
+
+ if (isSExt) {
+ unsigned SExtResult = I.getOperand(0).getReg();
+
+ // Use a new virtual register for the result of the AND
+ unsigned AndResult = MRI.createVirtualRegister(&ARM::GPRRegClass);
+ I.getOperand(0).setReg(AndResult);
+
+ auto InsertBefore = std::next(I.getIterator());
+ auto SubI =
+ BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::RSBri))
+ .addDef(SExtResult)
+ .addUse(AndResult)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
+ return false;
+ }
+ break;
+ }
+ case 8:
+ case 16: {
+ unsigned NewOpc = selectSimpleExtOpc(I.getOpcode(), SrcSize);
+ if (NewOpc == I.getOpcode())
+ return false;
+ I.setDesc(TII.get(NewOpc));
+ MIB.addImm(0).add(predOps(ARMCC::AL));
+ break;
+ }
+ default:
+ DEBUG(dbgs() << "Unsupported source size for extension");
+ return false;
+ }
+ break;
+ }
case G_ADD:
+ case G_GEP:
I.setDesc(TII.get(ARM::ADDrr));
- AddDefaultCC(AddDefaultPred(MIB));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ case G_FADD:
+ if (!selectFAdd(MIB, TII, MRI))
+ return false;
break;
case G_FRAME_INDEX:
// Add 0 to the given frame index and hope it will eventually be folded into
// the user(s).
I.setDesc(TII.get(ARM::ADDri));
- AddDefaultCC(AddDefaultPred(MIB.addImm(0)));
+ MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp());
break;
- case G_LOAD:
- I.setDesc(TII.get(ARM::LDRi12));
- AddDefaultPred(MIB.addImm(0));
+ case G_CONSTANT: {
+ unsigned Reg = I.getOperand(0).getReg();
+ if (MRI.getType(Reg).getSizeInBits() != 32)
+ return false;
+
+ assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID &&
+ "Expected constant to live in a GPR");
+ I.setDesc(TII.get(ARM::MOVi));
+ MIB.add(predOps(ARMCC::AL)).add(condCodeOp());
+ break;
+ }
+ case G_STORE:
+ case G_LOAD: {
+ const auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
+ unsigned Reg = I.getOperand(0).getReg();
+ unsigned RegBank = RBI.getRegBank(Reg, MRI, TRI)->getID();
+
+ LLT ValTy = MRI.getType(Reg);
+ const auto ValSize = ValTy.getSizeInBits();
+
+ assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) &&
+ "Don't know how to load/store 64-bit value without VFP");
+
+ const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize);
+ if (NewOpc == G_LOAD || NewOpc == G_STORE)
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+
+ if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH)
+ // LDRH has a funny addressing mode (there's already a FIXME for it).
+ MIB.addReg(0);
+ MIB.addImm(0).add(predOps(ARMCC::AL));
+ break;
+ }
+ case G_SEQUENCE: {
+ if (!selectSequence(MIB, TII, MRI, TRI, RBI))
+ return false;
break;
+ }
+ case G_EXTRACT: {
+ if (!selectExtract(MIB, TII, MRI, TRI, RBI))
+ return false;
+ break;
+ }
default:
return false;
}
diff --git a/lib/Target/ARM/ARMInstructionSelector.h b/lib/Target/ARM/ARMInstructionSelector.h
index 5072cdd60ce4..530141d92c2c 100644
--- a/lib/Target/ARM/ARMInstructionSelector.h
+++ b/lib/Target/ARM/ARMInstructionSelector.h
@@ -1,4 +1,4 @@
-//===- ARMInstructionSelector ------------------------------------*- C++ -*-==//
+//===- ARMInstructionSelector -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,8 +6,10 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//
/// \file
/// This file declares the targeting of the InstructionSelector class for ARM.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
@@ -16,9 +18,9 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
namespace llvm {
+
class ARMBaseInstrInfo;
class ARMBaseRegisterInfo;
-class ARMBaseTargetMachine;
class ARMRegisterBankInfo;
class ARMSubtarget;
@@ -27,7 +29,7 @@ public:
ARMInstructionSelector(const ARMSubtarget &STI,
const ARMRegisterBankInfo &RBI);
- virtual bool select(MachineInstr &I) const override;
+ bool select(MachineInstr &I) const override;
private:
const ARMBaseInstrInfo &TII;
@@ -35,5 +37,6 @@ private:
const ARMRegisterBankInfo &RBI;
};
-} // End llvm namespace.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp
index 255ea4bc7198..994bbd673dd8 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "ARMLegalizerInfo.h"
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
@@ -23,22 +24,53 @@ using namespace llvm;
#error "You shouldn't build this"
#endif
-ARMLegalizerInfo::ARMLegalizerInfo() {
+ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) {
using namespace TargetOpcode;
const LLT p0 = LLT::pointer(0, 32);
+ const LLT s1 = LLT::scalar(1);
const LLT s8 = LLT::scalar(8);
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
setAction({G_FRAME_INDEX, p0}, Legal);
- setAction({G_LOAD, s32}, Legal);
- setAction({G_LOAD, 1, p0}, Legal);
+ for (unsigned Op : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s1, s8, s16, s32, p0})
+ setAction({Op, Ty}, Legal);
+ setAction({Op, 1, p0}, Legal);
+ }
- for (auto Ty : {s8, s16, s32})
+ for (auto Ty : {s1, s8, s16, s32})
setAction({G_ADD, Ty}, Legal);
+ for (unsigned Op : {G_SEXT, G_ZEXT}) {
+ setAction({Op, s32}, Legal);
+ for (auto Ty : {s1, s8, s16})
+ setAction({Op, 1, Ty}, Legal);
+ }
+
+ setAction({G_GEP, p0}, Legal);
+ setAction({G_GEP, 1, s32}, Legal);
+
+ setAction({G_CONSTANT, s32}, Legal);
+
+ if (!ST.useSoftFloat() && ST.hasVFP2()) {
+ setAction({G_FADD, s32}, Legal);
+ setAction({G_FADD, s64}, Legal);
+
+ setAction({G_LOAD, s64}, Legal);
+ setAction({G_STORE, s64}, Legal);
+ } else {
+ for (auto Ty : {s32, s64})
+ setAction({G_FADD, Ty}, Libcall);
+ }
+
+ for (unsigned Op : {G_FREM, G_FPOW})
+ for (auto Ty : {s32, s64})
+ setAction({Op, Ty}, Libcall);
+
computeTables();
}
diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h
index ca3eea81271b..0b8a608a6bde 100644
--- a/lib/Target/ARM/ARMLegalizerInfo.h
+++ b/lib/Target/ARM/ARMLegalizerInfo.h
@@ -18,12 +18,12 @@
namespace llvm {
-class LLVMContext;
+class ARMSubtarget;
/// This class provides the information for the target register banks.
class ARMLegalizerInfo : public LegalizerInfo {
public:
- ARMLegalizerInfo();
+ ARMLegalizerInfo(const ARMSubtarget &ST);
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 48ab491b5be9..72fcf7cd6a4f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -517,8 +517,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
- .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
+ .add(t1CondCodeOp(true))
+ .addReg(Base)
+ .addImm(WordOffset * 4)
+ .addImm(Pred)
+ .addReg(PredReg);
return;
}
@@ -534,9 +538,12 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
// information and *always* have to reset at the end of a block.
// See PR21029.
if (MBBI != MBB.end()) --MBBI;
- AddDefaultT1CC(
- BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
- .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
+ .add(t1CondCodeOp(true))
+ .addReg(Base)
+ .addImm(WordOffset * 4)
+ .addImm(Pred)
+ .addReg(PredReg);
}
}
@@ -602,13 +609,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback.
// It's also not possible to merge an STR of the base register in Thumb1.
- if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+ if (isThumb1 && ContainsReg(Regs, Base)) {
assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
- if (Opcode == ARM::tLDRi) {
+ if (Opcode == ARM::tLDRi)
Writeback = false;
- } else if (Opcode == ARM::tSTRi) {
+ else if (Opcode == ARM::tSTRi)
return nullptr;
- }
}
ARM_AM::AMSubMode Mode = ARM_AM::ia;
@@ -700,8 +706,8 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
.addReg(Base, getKillRegState(KillOldBase));
} else
BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
- .addReg(Base, getKillRegState(KillOldBase))
- .addImm(Pred).addReg(PredReg);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .add(predOps(Pred, PredReg));
// The following ADDS/SUBS becomes an update.
Base = NewBase;
@@ -710,17 +716,21 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
if (BaseOpc == ARM::tADDrSPi) {
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
- .addImm(Pred).addReg(PredReg);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset / 4)
+ .add(predOps(Pred, PredReg));
} else
- AddDefaultT1CC(
- BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
- .addImm(Pred).addReg(PredReg);
+ BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+ .add(t1CondCodeOp(true))
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
} else {
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
- .addImm(Pred).addReg(PredReg).addReg(0);
+ .addReg(Base, getKillRegState(KillOldBase))
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp());
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
@@ -1259,7 +1269,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Transfer the rest of operands.
for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.addOperand(MI->getOperand(OpNum));
+ MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -1392,14 +1402,19 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
} else {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg));
}
} else {
// t2LDR_PRE, t2LDR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
- .addReg(Base, RegState::Define)
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(Base, RegState::Define)
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
}
} else {
MachineOperand &MO = MI->getOperand(0);
@@ -1410,13 +1425,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
// STR_PRE, STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addReg(0)
+ .addImm(Imm)
+ .add(predOps(Pred, PredReg));
} else {
// t2STR_PRE, t2STR_POST
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
- .addReg(MO.getReg(), getKillRegState(MO.isKill()))
- .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+ .addReg(Base)
+ .addImm(Offset)
+ .add(predOps(Pred, PredReg));
}
}
MBB.erase(MBBI);
@@ -1462,12 +1482,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
DebugLoc DL = MI.getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
- MIB.addOperand(Reg0Op).addOperand(Reg1Op)
- .addReg(BaseOp.getReg(), RegState::Define);
+ MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
} else {
assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
- MIB.addReg(BaseOp.getReg(), RegState::Define)
- .addOperand(Reg0Op).addOperand(Reg1Op);
+ MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
}
MIB.addReg(BaseOp.getReg(), RegState::Kill)
.addImm(Offset).addImm(Pred).addReg(PredReg);
@@ -1477,7 +1495,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
// Transfer implicit operands.
for (const MachineOperand &MO : MI.implicit_operands())
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MBB.erase(MBBI);
@@ -1891,8 +1909,9 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
for (auto Use : Prev->uses())
if (Use.isKill()) {
- AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
- .addReg(Use.getReg(), RegState::Kill))
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+ .addReg(Use.getReg(), RegState::Kill)
+ .add(predOps(ARMCC::AL))
.copyImplicitOps(*MBBI);
MBB.erase(MBBI);
MBB.erase(Prev);
@@ -1942,6 +1961,7 @@ namespace {
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+ AliasAnalysis *AA;
const DataLayout *TD;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -1955,6 +1975,11 @@ namespace {
return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
}
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
private:
bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
unsigned &NewOpc, unsigned &EvenReg,
@@ -1984,6 +2009,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
TRI = STI->getRegisterInfo();
MRI = &Fn.getRegInfo();
MF = &Fn;
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
bool Modified = false;
for (MachineBasicBlock &MFI : Fn)
@@ -1997,28 +2023,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
MachineBasicBlock::iterator E,
SmallPtrSetImpl<MachineInstr*> &MemOps,
SmallSet<unsigned, 4> &MemRegs,
- const TargetRegisterInfo *TRI) {
+ const TargetRegisterInfo *TRI,
+ AliasAnalysis *AA) {
// Are there stores / loads / calls between them?
- // FIXME: This is overly conservative. We should make use of alias information
- // some day.
SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
if (I->isDebugValue() || MemOps.count(&*I))
continue;
if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
return false;
- if (isLd && I->mayStore())
- return false;
- if (!isLd) {
- if (I->mayLoad())
- return false;
- // It's not safe to move the first 'str' down.
- // str r1, [r0]
- // strh r5, [r0]
- // str r4, [r0, #+4]
- if (I->mayStore())
- return false;
- }
+ if (I->mayStore() || (!isLd && I->mayLoad()))
+ for (MachineInstr *MemOp : MemOps)
+ if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
+ return false;
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
MachineOperand &MO = I->getOperand(j);
if (!MO.isReg())
@@ -2142,33 +2159,40 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
unsigned LastBytes = 0;
unsigned NumMove = 0;
for (int i = Ops.size() - 1; i >= 0; --i) {
+ // Make sure each operation has the same kind.
MachineInstr *Op = Ops[i];
- unsigned Loc = MI2LocMap[Op];
- if (Loc <= FirstLoc) {
- FirstLoc = Loc;
- FirstOp = Op;
- }
- if (Loc >= LastLoc) {
- LastLoc = Loc;
- LastOp = Op;
- }
-
unsigned LSMOpcode
= getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
if (LastOpcode && LSMOpcode != LastOpcode)
break;
+ // Check that we have a continuous set of offsets.
int Offset = getMemoryOpOffset(*Op);
unsigned Bytes = getLSMultipleTransferSize(Op);
if (LastBytes) {
if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
break;
}
+
+ // Don't try to reschedule too many instructions.
+ if (NumMove == 8) // FIXME: Tune this limit.
+ break;
+
+ // Found a mergable instruction; save information about it.
+ ++NumMove;
LastOffset = Offset;
LastBytes = Bytes;
LastOpcode = LSMOpcode;
- if (++NumMove == 8) // FIXME: Tune this limit.
- break;
+
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
}
if (NumMove <= 1)
@@ -2176,7 +2200,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
else {
SmallPtrSet<MachineInstr*, 4> MemOps;
SmallSet<unsigned, 4> MemRegs;
- for (int i = NumMove-1; i >= 0; --i) {
+ for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
MemOps.insert(Ops[i]);
MemRegs.insert(Ops[i]->getOperand(0).getReg());
}
@@ -2186,7 +2210,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
if (DoMove)
DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
- MemOps, MemRegs, TRI);
+ MemOps, MemRegs, TRI, AA);
if (!DoMove) {
for (unsigned i = 0; i != NumMove; ++i)
Ops.pop_back();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 07044b9697b6..0fd98268723a 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -14,23 +14,36 @@
#include "ARM.h"
#include "ARMAsmPrinter.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
-using namespace llvm;
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+using namespace llvm;
MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
const MCSymbol *Symbol) {
+ MCSymbolRefExpr::VariantKind SymbolVariant = MCSymbolRefExpr::VK_None;
+ if (MO.getTargetFlags() & ARMII::MO_SBREL)
+ SymbolVariant = MCSymbolRefExpr::VK_ARM_SBREL;
+
const MCExpr *Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
switch (MO.getTargetFlags() & ARMII::MO_OPTION_MASK) {
default:
llvm_unreachable("Unknown target flag on symbol operand");
@@ -38,12 +51,12 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
break;
case ARMII::MO_LO16:
Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
Expr = ARMMCExpr::createLower16(Expr, OutContext);
break;
case ARMII::MO_HI16:
Expr =
- MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
Expr = ARMMCExpr::createUpper16(Expr, OutContext);
break;
}
@@ -75,11 +88,10 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), OutContext));
break;
- case MachineOperand::MO_GlobalAddress: {
+ case MachineOperand::MO_GlobalAddress:
MCOp = GetSymbolRef(MO,
GetARMGVSymbol(MO.getGlobal(), MO.getTargetFlags()));
break;
- }
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO,
GetExternalSymbolSymbol(MO.getSymbolName()));
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 50d8f0941460..e25d36b57616 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
using namespace llvm;
@@ -15,10 +16,4 @@ void ARMFunctionInfo::anchor() {}
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
- StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0),
- HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0),
- IsSplitCSR(false), PromotedGlobalsIncrease(0) {}
+ hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 8c485e89bf54..816116772995 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -14,11 +14,11 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
-#include "ARMSubtarget.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <utility>
namespace llvm {
@@ -29,42 +29,42 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// isThumb - True if this function is compiled under Thumb mode.
/// Used to initialized Align, so must precede it.
- bool isThumb;
+ bool isThumb = false;
/// hasThumb2 - True if the target architecture supports Thumb2. Do not use
/// to determine if function is compiled under Thumb mode, for that use
/// 'isThumb'.
- bool hasThumb2;
+ bool hasThumb2 = false;
/// StByValParamsPadding - For parameter that is split between
/// GPRs and memory; while recovering GPRs part, when
/// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0,
/// we need to insert gap before parameter start address. It allows to
/// "attach" GPR-part to the part that was passed via stack.
- unsigned StByValParamsPadding;
+ unsigned StByValParamsPadding = 0;
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
- unsigned ArgRegsSaveSize;
+ unsigned ArgRegsSaveSize = 0;
/// ReturnRegsCount - Number of registers used up in the return.
- unsigned ReturnRegsCount;
+ unsigned ReturnRegsCount = 0;
/// HasStackFrame - True if this function has a stack frame. Set by
/// determineCalleeSaves().
- bool HasStackFrame;
+ bool HasStackFrame = false;
/// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
/// emitPrologue.
- bool RestoreSPFromFP;
+ bool RestoreSPFromFP = false;
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
- bool LRSpilledForFarJump;
+ bool LRSpilledForFarJump = false;
/// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
/// spill stack offset.
- unsigned FramePtrSpillOffset;
+ unsigned FramePtrSpillOffset = 0;
/// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
/// register spills areas. For Mac OS X:
@@ -77,16 +77,16 @@ class ARMFunctionInfo : public MachineFunctionInfo {
///
/// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
/// Some may be spilled after the stack has been realigned.
- unsigned GPRCS1Offset;
- unsigned GPRCS2Offset;
- unsigned DPRCSOffset;
+ unsigned GPRCS1Offset = 0;
+ unsigned GPRCS2Offset = 0;
+ unsigned DPRCSOffset = 0;
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
- unsigned GPRCS1Size;
- unsigned GPRCS2Size;
- unsigned DPRCSAlignGapSize;
- unsigned DPRCSSize;
+ unsigned GPRCS1Size = 0;
+ unsigned GPRCS2Size = 0;
+ unsigned DPRCSAlignGapSize = 0;
+ unsigned DPRCSSize = 0;
/// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
/// the aligned portion of the stack frame. This is always a contiguous
@@ -95,15 +95,15 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// We do not keep track of the frame indices used for these registers - they
/// behave like any other frame index in the aligned stack frame. These
/// registers also aren't included in DPRCSSize above.
- unsigned NumAlignedDPRCS2Regs;
+ unsigned NumAlignedDPRCS2Regs = 0;
- unsigned PICLabelUId;
+ unsigned PICLabelUId = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// HasITBlocks - True if IT blocks have been inserted.
- bool HasITBlocks;
+ bool HasITBlocks = false;
/// CPEClones - Track constant pool entries clones created by Constant Island
/// pass.
@@ -111,7 +111,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
/// being passed on the stack
- unsigned ArgumentStackSize;
+ unsigned ArgumentStackSize = 0;
/// CoalescedWeights - mapping of basic blocks to the rolling counter of
/// coalesced weights.
@@ -119,26 +119,16 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies.
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
/// Globals that have had their storage promoted into the constant pool.
SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
/// The amount the literal pool has been increasedby due to promoted globals.
- int PromotedGlobalsIncrease;
+ int PromotedGlobalsIncrease = 0;
public:
- ARMFunctionInfo() :
- isThumb(false),
- hasThumb2(false),
- ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false),
- RestoreSPFromFP(false),
- LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
- NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false),
- PromotedGlobalsIncrease(0) {}
+ ARMFunctionInfo() = default;
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -250,6 +240,7 @@ public:
PromotedGlobalsIncrease = Sz;
}
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 324087d670b5..08f3da738868 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -13,11 +13,15 @@
#include "ARMRegisterBankInfo.h"
#include "ARMInstrInfo.h" // For the register classes
+#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#define GET_TARGET_REGBANK_IMPL
+#include "ARMGenRegisterBank.inc"
+
using namespace llvm;
#ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -29,44 +33,109 @@ using namespace llvm;
// into an ARMGenRegisterBankInfo.def (similar to AArch64).
namespace llvm {
namespace ARM {
-const uint32_t GPRCoverageData[] = {
- // Classes 0-31
- (1u << ARM::GPRRegClassID) | (1u << ARM::GPRwithAPSRRegClassID) |
- (1u << ARM::GPRnopcRegClassID) | (1u << ARM::rGPRRegClassID) |
- (1u << ARM::hGPRRegClassID) | (1u << ARM::tGPRRegClassID) |
- (1u << ARM::GPRnopc_and_hGPRRegClassID) |
- (1u << ARM::hGPR_and_rGPRRegClassID) | (1u << ARM::tcGPRRegClassID) |
- (1u << ARM::tGPR_and_tcGPRRegClassID) | (1u << ARM::GPRspRegClassID) |
- (1u << ARM::hGPR_and_tcGPRRegClassID),
- // Classes 32-63
- 0,
- // Classes 64-96
- 0,
- // FIXME: Some of the entries below this point can be safely removed once
- // this is tablegenerated. It's only needed because of the hardcoded
- // register class limit.
- // Classes 97-128
- 0,
- // Classes 129-160
- 0,
- // Classes 161-192
- 0,
- // Classes 193-224
- 0,
+enum PartialMappingIdx {
+ PMI_GPR,
+ PMI_SPR,
+ PMI_DPR,
+ PMI_Min = PMI_GPR,
+};
+
+RegisterBankInfo::PartialMapping PartMappings[]{
+ // GPR Partial Mapping
+ {0, 32, GPRRegBank},
+ // SPR Partial Mapping
+ {0, 32, FPRRegBank},
+ // DPR Partial Mapping
+ {0, 64, FPRRegBank},
};
-RegisterBank GPRRegBank(ARM::GPRRegBankID, "GPRB", 32, ARM::GPRCoverageData);
-RegisterBank *RegBanks[] = {&GPRRegBank};
+#ifndef NDEBUG
+static bool checkPartMapping(const RegisterBankInfo::PartialMapping &PM,
+ unsigned Start, unsigned Length,
+ unsigned RegBankID) {
+ return PM.StartIdx == Start && PM.Length == Length &&
+ PM.RegBank->getID() == RegBankID;
+}
+
+static void checkPartialMappings() {
+ assert(
+ checkPartMapping(PartMappings[PMI_GPR - PMI_Min], 0, 32, GPRRegBankID) &&
+ "Wrong mapping for GPR");
+ assert(
+ checkPartMapping(PartMappings[PMI_SPR - PMI_Min], 0, 32, FPRRegBankID) &&
+ "Wrong mapping for SPR");
+ assert(
+ checkPartMapping(PartMappings[PMI_DPR - PMI_Min], 0, 64, FPRRegBankID) &&
+ "Wrong mapping for DPR");
+}
+#endif
-RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank};
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPR3OpsIdx = 1,
+ SPR3OpsIdx = 4,
+ DPR3OpsIdx = 7,
+};
RegisterBankInfo::ValueMapping ValueMappings[] = {
- {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}};
+ // invalid
+ {nullptr, 0},
+ // 3 ops in GPRs
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ // 3 ops in SPRs
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ {&PartMappings[PMI_SPR - PMI_Min], 1},
+ // 3 ops in DPRs
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1},
+ {&PartMappings[PMI_DPR - PMI_Min], 1}};
+
+#ifndef NDEBUG
+static bool checkValueMapping(const RegisterBankInfo::ValueMapping &VM,
+ RegisterBankInfo::PartialMapping *BreakDown) {
+ return VM.NumBreakDowns == 1 && VM.BreakDown == BreakDown;
+}
+
+static void checkValueMappings() {
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 1],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+ assert(checkValueMapping(ValueMappings[GPR3OpsIdx + 2],
+ &PartMappings[PMI_GPR - PMI_Min]) &&
+ "Wrong value mapping for 3 GPR ops instruction");
+
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 1],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+ assert(checkValueMapping(ValueMappings[SPR3OpsIdx + 2],
+ &PartMappings[PMI_SPR - PMI_Min]) &&
+ "Wrong value mapping for 3 SPR ops instruction");
+
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 1],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+ assert(checkValueMapping(ValueMappings[DPR3OpsIdx + 2],
+ &PartMappings[PMI_DPR - PMI_Min]) &&
+ "Wrong value mapping for 3 DPR ops instruction");
+}
+#endif
} // end namespace arm
} // end namespace llvm
ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
- : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) {
+ : ARMGenRegisterBankInfo() {
static bool AlreadyInit = false;
// We have only one set of register banks, whatever the subtarget
// is. Therefore, the initialization of the RegBanks table should be
@@ -97,6 +166,11 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
"Subclass not added?");
assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
+
+#ifndef NDEBUG
+ ARM::checkPartialMappings();
+ ARM::checkValueMappings();
+#endif
}
const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
@@ -105,8 +179,16 @@ const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
switch (RC.getID()) {
case GPRRegClassID:
+ case GPRnopcRegClassID:
+ case GPRspRegClassID:
case tGPR_and_tcGPRRegClassID:
+ case tGPRRegClassID:
return getRegBank(ARM::GPRRegBankID);
+ case SPR_8RegClassID:
+ case SPRRegClassID:
+ case DPR_8RegClassID:
+ case DPRRegClassID:
+ return getRegBank(ARM::FPRRegBankID);
default:
llvm_unreachable("Unsupported register kind");
}
@@ -128,23 +210,83 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
using namespace TargetOpcode;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
unsigned NumOperands = MI.getNumOperands();
- const ValueMapping *OperandsMapping = &ARM::ValueMappings[0];
+ const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
switch (Opc) {
case G_ADD:
- case G_LOAD:
+ case G_SEXT:
+ case G_ZEXT:
+ case G_GEP:
// FIXME: We're abusing the fact that everything lives in a GPR for now; in
// the real world we would use different mappings.
- OperandsMapping = &ARM::ValueMappings[0];
+ OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx];
+ break;
+ case G_LOAD:
+ case G_STORE:
+ OperandsMapping =
+ Ty.getSizeInBits() == 64
+ ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx]})
+ : &ARM::ValueMappings[ARM::GPR3OpsIdx];
+ break;
+ case G_FADD:
+ assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) &&
+ "Unsupported size for G_FADD");
+ OperandsMapping = Ty.getSizeInBits() == 64
+ ? &ARM::ValueMappings[ARM::DPR3OpsIdx]
+ : &ARM::ValueMappings[ARM::SPR3OpsIdx];
break;
+ case G_CONSTANT:
case G_FRAME_INDEX:
- OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr});
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
break;
+ case G_SEQUENCE: {
+ // We only support G_SEQUENCE for creating a double precision floating point
+ // value out of two GPRs.
+ LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
+ LLT Ty2 = MRI.getType(MI.getOperand(3).getReg());
+ if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 ||
+ Ty2.getSizeInBits() != 32)
+ return InstructionMapping{};
+ OperandsMapping =
+ getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx],
+ &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr,
+ &ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr});
+ break;
+ }
+ case G_EXTRACT: {
+ // We only support G_EXTRACT for splitting a double precision floating point
+ // value into two GPRs.
+ LLT Ty1 = MRI.getType(MI.getOperand(1).getReg());
+ if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 64 ||
+ MI.getOperand(2).getImm() % 32 != 0)
+ return InstructionMapping{};
+ OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx],
+ &ARM::ValueMappings[ARM::DPR3OpsIdx],
+ nullptr, nullptr});
+ break;
+ }
default:
return InstructionMapping{};
}
+#ifndef NDEBUG
+ for (unsigned i = 0; i < NumOperands; i++) {
+ for (const auto &Mapping : OperandsMapping[i]) {
+ assert(
+ (Mapping.RegBank->getID() != ARM::FPRRegBankID ||
+ MF.getSubtarget<ARMSubtarget>().hasVFP2()) &&
+ "Trying to use floating point register bank on target without vfp");
+ }
+ }
+#endif
+
return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping,
NumOperands};
}
diff --git a/lib/Target/ARM/ARMRegisterBankInfo.h b/lib/Target/ARM/ARMRegisterBankInfo.h
index 773920ee57a7..5222c1e6389f 100644
--- a/lib/Target/ARM/ARMRegisterBankInfo.h
+++ b/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -16,19 +16,20 @@
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#define GET_REGBANK_DECLARATIONS
+#include "ARMGenRegisterBank.inc"
+
namespace llvm {
class TargetRegisterInfo;
-namespace ARM {
-enum {
- GPRRegBankID = 0, // General purpose registers
- NumRegisterBanks,
+class ARMGenRegisterBankInfo : public RegisterBankInfo {
+#define GET_TARGET_REGBANK_CLASS
+#include "ARMGenRegisterBank.inc"
};
-} // end namespace ARM
/// This class provides the information for the target register banks.
-class ARMRegisterBankInfo final : public RegisterBankInfo {
+class ARMRegisterBankInfo final : public ARMGenRegisterBankInfo {
public:
ARMRegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/lib/Target/ARM/ARMRegisterBanks.td b/lib/Target/ARM/ARMRegisterBanks.td
new file mode 100644
index 000000000000..7cd2d60d36a4
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterBanks.td
@@ -0,0 +1,14 @@
+//=- ARMRegisterBank.td - Describe the AArch64 Banks ---------*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+def GPRRegBank : RegisterBank<"GPRB", [GPR, GPRwithAPSR]>;
+def FPRRegBank : RegisterBank<"FPRB", [SPR, DPR]>;
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index b7d2d34614df..87eb4c2b9074 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Instruction scheduling annotations for out-of-order CPUs.
+// Instruction scheduling annotations for in-order and out-of-order CPUs.
// These annotations are independent of the itinerary class defined below.
// Here we define the subtarget independent read/write per-operand resources.
// The subtarget schedule definitions will then map these to the subtarget's
@@ -54,6 +54,9 @@
// }
// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+//===----------------------------------------------------------------------===//
+// Sched definitions for integer pipeline instructions
+//
// Basic ALU operation.
def WriteALU : SchedWrite;
def ReadALU : SchedRead;
@@ -69,24 +72,65 @@ def WriteCMP : SchedWrite;
def WriteCMPsi : SchedWrite;
def WriteCMPsr : SchedWrite;
-// Division.
-def WriteDiv : SchedWrite;
+// Multiplys.
+def WriteMUL16 : SchedWrite; // 16-bit multiply.
+def WriteMUL32 : SchedWrite; // 32-bit multiply.
+def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg.
+def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg.
+def ReadMUL : SchedRead;
+
+// Multiply-accumulates.
+def WriteMAC16 : SchedWrite; // 16-bit mac.
+def WriteMAC32 : SchedWrite; // 32-bit mac.
+def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg.
+def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg.
+def ReadMAC : SchedRead;
+
+// Divisions.
+def WriteDIV : SchedWrite;
-// Loads.
+// Loads/Stores.
def WriteLd : SchedWrite;
def WritePreLd : SchedWrite;
+def WriteST : SchedWrite;
// Branches.
def WriteBr : SchedWrite;
def WriteBrL : SchedWrite;
def WriteBrTbl : SchedWrite;
-// Fixpoint conversions.
-def WriteCvtFP : SchedWrite;
-
// Noop.
def WriteNoop : SchedWrite;
+//===----------------------------------------------------------------------===//
+// Sched definitions for floating-point and neon instructions
+//
+// Floating point conversions
+def WriteFPCVT : SchedWrite;
+def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa
+
+// ALU operations (32/64-bit)
+def WriteFPALU32 : SchedWrite;
+def WriteFPALU64 : SchedWrite;
+
+// Multiplication
+def WriteFPMUL32 : SchedWrite;
+def WriteFPMUL64 : SchedWrite;
+def ReadFPMUL : SchedRead; // multiplier read
+def ReadFPMAC : SchedRead; // accumulator read
+
+// Multiply-accumulate
+def WriteFPMAC32 : SchedWrite;
+def WriteFPMAC64 : SchedWrite;
+
+// Division
+def WriteFPDIV32 : SchedWrite;
+def WriteFPDIV64 : SchedWrite;
+
+// Square-root
+def WriteFPSQRT32 : SchedWrite;
+def WriteFPSQRT64 : SchedWrite;
+
// Define TII for use in SchedVariant Predicates.
def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 519e595bd184..8fb8a2a3b6d2 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1944,6 +1944,16 @@ def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
let NumMicroOps = 0; }
+def : SchedAlias<WriteMUL16, A9WriteM16>;
+def : SchedAlias<WriteMUL32, A9WriteM>;
+def : SchedAlias<WriteMUL64Lo, A9WriteM>;
+def : SchedAlias<WriteMUL64Hi, A9WriteMHi>;
+def : SchedAlias<WriteMAC16, A9WriteM16>;
+def : SchedAlias<WriteMAC32, A9WriteM>;
+def : SchedAlias<WriteMAC64Lo, A9WriteM>;
+def : SchedAlias<WriteMAC64Hi, A9WriteMHi>;
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
// Floating-point
// Only one FP or AGU instruction may issue per cycle. We model this
@@ -1953,6 +1963,7 @@ def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
+
def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
@@ -1992,6 +2003,7 @@ def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
// Load Integer.
def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
+def : SchedAlias<WriteLd, A9WriteL>;
// Load the upper 32-bits using the same micro-op.
def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
let NumMicroOps = 0; }
@@ -2471,6 +2483,34 @@ def : SchedAlias<WriteALUsr, A9WriteALUsr>;
def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
def : SchedAlias<ReadALU, A9ReadALU>;
def : SchedAlias<ReadALUsr, A9ReadALU>;
+def : SchedAlias<WriteST, A9WriteS>;
+
+// ===---------------------------------------------------------------------===//
+// Floating-point. Map target defined SchedReadWrite to processor specific ones
+//
+def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
+def : SchedAlias<WriteFPMOV, A9WriteFMov>;
+
+def : SchedAlias<WriteFPALU32, A9WriteF>;
+def : SchedAlias<WriteFPALU64, A9WriteF>;
+
+def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
+def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
+
+def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
+def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
+
+def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
+def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
+def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
+def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
+
+def : ReadAdvance<ReadFPMUL, 0>;
+def : ReadAdvance<ReadFPMAC, 0>;
+
+// ===---------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
+//
def : InstRW< [WriteALU],
(instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
"BICrr")>;
@@ -2518,12 +2558,11 @@ def : InstRW<[A9WriteLb],
"LDRH", "LDRSH", "LDRSB")>;
def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
-def : WriteRes<WriteDiv, []> { let Latency = 0; }
+def : WriteRes<WriteDIV, []> { let Latency = 0; }
def : WriteRes<WriteBr, [A9UnitB]>;
def : WriteRes<WriteBrL, [A9UnitB]>;
def : WriteRes<WriteBrTbl, [A9UnitB]>;
def : WriteRes<WritePreLd, []>;
-def : SchedAlias<WriteCvtFP, A9WriteF>;
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td
index 1b40742a093b..537e5da9669f 100644
--- a/lib/Target/ARM/ARMScheduleR52.td
+++ b/lib/Target/ARM/ARMScheduleR52.td
@@ -70,15 +70,13 @@ def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
+// Multiply - aliased to sub-target specific later
+
// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
-def : WriteRes<WriteDiv, [R52UnitDiv]> {
- let Latency = 8; let ResourceCycles = [8]; // not pipelined
+def : WriteRes<WriteDIV, [R52UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8]; // non-pipelined
}
-// Loads
-def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
-def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
-
// Branches - LR written in Late EX2
def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
@@ -86,11 +84,44 @@ def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
// Misc
def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
-def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
+// Integer pipeline by-passes
def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
+def : ReadAdvance<ReadMUL, 0>;
+def : ReadAdvance<ReadMAC, 0>;
+
+// Floating-point. Map target-defined SchedReadWrites to subtarget
+def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
+
+def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
+ let Latency = 6;
+}
+
+def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
+ let Latency = 11; // as it is internally two insns (MUL then ADD)
+}
+def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
+ R52UnitFPALU, R52UnitFPALU]> {
+ let Latency = 11;
+}
+
+def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
+ let Latency = 7; // FP div takes fixed #cycles
+ let ResourceCycles = [7]; // is not pipelined
+}
+
+def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+
+def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
+def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
+
+def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
+def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
//===----------------------------------------------------------------------===//
// Subtarget-specific SchedReadWrites.
@@ -106,6 +137,9 @@ def : ReadAdvance<R52Read_F2, 2>;
// Cortex-R52 specific SchedWrites for use with InstRW
def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
+def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
+ let Latency = 4; let NumMicroOps = 0;
+}
def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
let Latency = 8; let ResourceCycles = [8]; // not pipelined
}
@@ -120,6 +154,19 @@ def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
+// Alias generics to sub-target specific
+def : SchedAlias<WriteMUL16, R52WriteMAC>;
+def : SchedAlias<WriteMUL32, R52WriteMAC>;
+def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
+def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
+def : SchedAlias<WriteMAC16, R52WriteMAC>;
+def : SchedAlias<WriteMAC32, R52WriteMAC>;
+def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
+def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
+def : SchedAlias<WritePreLd, R52WriteLd>;
+def : SchedAlias<WriteLd, R52WriteLd>;
+def : SchedAlias<WriteST, R52WriteST>;
+
def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
let Latency = 4;
@@ -147,19 +194,17 @@ def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
-def R52WriteFPDIV_SP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 7; // FP div takes fixed #cycles
- let ResourceCycles = [7]; // is not pipelined
- }
-def R52WriteFPDIV_DP : SchedWriteRes<[R52UnitFPDIV]> {
- let Latency = 17;
- let ResourceCycles = [17];
-}
-
-
//===----------------------------------------------------------------------===//
-// Subtarget-specific - map operands to SchedReadWrites
+// Floating-point. Map target defined SchedReadWrites to processor specific ones
+//
+def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
+def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
+def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
+//===----------------------------------------------------------------------===//
+// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
+//
def : InstRW<[WriteALU], (instrs COPY)>;
def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
@@ -235,7 +280,7 @@ def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
"t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
- (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+ (instregex "t2SDIV", "t2UDIV")>;
// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
// However, that's non-trivial to specify, so we keep it uniform
@@ -294,15 +339,6 @@ def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
-//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
-
-//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
-//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
-//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
-
-
// Integer Load, Multiple.
foreach Lat = 3-25 in {
def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
@@ -492,12 +528,6 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VAC
def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
-def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
-def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
-
-def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
- (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
-
def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
@@ -687,16 +717,19 @@ def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 6;
let NumMicroOps = 3;
let ResourceCycles = [2];
+ let SingleIssue = 1;
}
def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [3];
+ let SingleIssue = 1;
}
def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 8;
let NumMicroOps = 7;
let ResourceCycles = [4];
+ let SingleIssue = 1;
}
def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
let Latency = 5;
@@ -777,9 +810,8 @@ def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHS
def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
-def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index ea2bf4b578f0..dc041c6c6006 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -133,6 +133,8 @@ let SchedModel = SwiftModel in {
def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
def : ReadAdvance<ReadALU, 0>;
def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+ def : SchedAlias<WriteLd, SwiftWriteP2ThreeCycle>;
+ def : SchedAlias<WriteST, SwiftWriteP2>;
def SwiftChooseShiftKindP01OneOrTwoCycle : SchedWriteVariant<[
@@ -166,10 +168,10 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP01OneCycle2x_load],
(instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
- def SwiftWriteP0TwoCyleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
+ def SwiftWriteP0TwoCycleTwoUops : WriteSequence<[SwiftWriteP0OneCycle], 2>;
def SwiftPredP0OneOrTwoCycle : SchedWriteVariant<[
- SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCyleTwoUops ]>,
+ SchedVar<IsPredicatedPred, [ SwiftWriteP0TwoCycleTwoUops ]>,
SchedVar<NoSchedPred, [ SwiftWriteP0OneCycle ]>
]>;
@@ -282,6 +284,18 @@ let SchedModel = SwiftModel in {
let ResourceCycles = [2, 3];
}
+ // Aliasing sub-target specific WriteRes to generic ones
+ def : SchedAlias<WriteMUL16, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteMUL32, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteMUL64Lo, SwiftP0P0P01FiveCycle>;
+ def : SchedAlias<WriteMUL64Hi, SwiftWrite5Cycle>;
+ def : SchedAlias<WriteMAC16, SwiftPredP0P01FourFiveCycle>;
+ def : SchedAlias<WriteMAC32, SwiftPredP0P01FourFiveCycle>;
+ def : SchedAlias<WriteMAC64Lo, SwiftWrite5Cycle>;
+ def : SchedAlias<WriteMAC64Hi, Swift2P03P01FiveCycle>;
+ def : ReadAdvance<ReadMUL, 0>;
+ def : SchedAlias<ReadMAC, SwiftReadAdvanceFourCyclesPred>;
+
// 4.2.15 Integer Multiply Accumulate, Long
// 4.2.16 Integer Multiply Accumulate, Dual
// 4.2.17 Integer Multiply Accumulate Accumulate, Long
@@ -300,7 +314,7 @@ let SchedModel = SwiftModel in {
let ResourceCycles = [1, 14];
}
// 4.2.18 Integer Divide
- def : WriteRes<WriteDiv, [SwiftUnitDiv]>; // Workaround.
+ def : WriteRes<WriteDIV, [SwiftUnitDiv]>; // Workaround.
def : InstRW <[SwiftDiv],
(instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
@@ -310,7 +324,7 @@ let SchedModel = SwiftModel in {
let Latency = 3;
let NumMicroOps = 2;
}
- def SwiftWriteP2P01FourCyle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
+ def SwiftWriteP2P01FourCycle : SchedWriteRes<[SwiftUnitP2, SwiftUnitP01]> {
let Latency = 4;
let NumMicroOps = 2;
}
@@ -343,7 +357,7 @@ let SchedModel = SwiftModel in {
"tLDR(r|i|spi|pci|pciASM)")>;
def : InstRW<[SwiftWriteP2ThreeCycle],
(instregex "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$")>;
- def : InstRW<[SwiftWriteP2P01FourCyle],
+ def : InstRW<[SwiftWriteP2P01FourCycle],
(instregex "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
"t2LDRpci_pic", "tLDRS(B|H)")>;
def : InstRW<[SwiftWriteP2P01ThreeCycle, SwiftWrBackOne],
@@ -597,8 +611,6 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMUL(S|v|p|f|s)", "VNMULS", "VQDMULH", "VQRDMULH",
"VMULL", "VQDMULL")>;
- def : InstRW<[SwiftWriteP1SixCycle],
- (instregex "VMULD", "VNMULD")>;
def : InstRW<[SwiftWriteP1FourCycle],
(instregex "VMLA", "VMLS", "VNMLA", "VNMLS", "VFMA(S|D)", "VFMS(S|D)",
"VFNMA", "VFNMS", "VMLAL", "VMLSL","VQDMLAL", "VQDMLSL")>;
@@ -607,8 +619,6 @@ let SchedModel = SwiftModel in {
// 4.2.36 Advanced SIMD and VFP, Convert
def : InstRW<[SwiftWriteP1FourCycle], (instregex "VCVT", "V(S|U)IT", "VTO(S|U)")>;
- // Fixpoint conversions.
- def : WriteRes<WriteCvtFP, [SwiftUnitP1]> { let Latency = 4; }
// 4.2.37 Advanced SIMD and VFP, Move
def : InstRW<[SwiftWriteP0TwoCycle],
@@ -1036,6 +1046,30 @@ let SchedModel = SwiftModel in {
def : InstRW<[SwiftDiv17], (instregex "VDIVS", "VSQRTS")>;
def : InstRW<[SwiftDiv32], (instregex "VDIVD", "VSQRTD")>;
+ // ===---------------------------------------------------------------------===//
+ // Floating-point. Map target defined SchedReadWrite to processor specific ones
+ //
+ def : SchedAlias<WriteFPCVT, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMOV, SwiftWriteP2ThreeCycle>;
+
+ def : SchedAlias<WriteFPALU32, SwiftWriteP0FourCycle>;
+ def : SchedAlias<WriteFPALU64, SwiftWriteP0SixCycle>;
+
+ def : SchedAlias<WriteFPMUL32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMUL64, SwiftWriteP1SixCycle>;
+
+ def : SchedAlias<WriteFPMAC32, SwiftWriteP1FourCycle>;
+ def : SchedAlias<WriteFPMAC64, SwiftWriteP1FourCycle>;
+
+ def : SchedAlias<WriteFPDIV32, SwiftDiv17>;
+ def : SchedAlias<WriteFPSQRT32, SwiftDiv17>;
+
+ def : SchedAlias<WriteFPDIV64, SwiftDiv32>;
+ def : SchedAlias<WriteFPSQRT64, SwiftDiv32>;
+
+ def : ReadAdvance<ReadFPMUL, 0>;
+ def : ReadAdvance<ReadFPMAC, 0>;
+
// Not specified.
def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>;
// Preload.
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 3b99762f7157..33dcf9b8fef0 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -95,7 +95,7 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
Entry.Node = Src;
Entry.Ty = Type::getInt32Ty(*DAG.getContext());
- Entry.isSExt = false;
+ Entry.IsSExt = false;
Args.push_back(Entry);
} else {
Entry.Node = Src;
@@ -114,11 +114,11 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(
- TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
- TLI->getPointerTy(DAG.getDataLayout())),
- std::move(Args))
+ .setLibCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+ TLI->getPointerTy(DAG.getDataLayout())),
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -198,17 +198,18 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
return Chain;
// Issue loads / stores for the trailing (1 - 3) bytes.
+ auto getRemainingValueType = [](unsigned BytesLeft) {
+ return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
+ };
+ auto getRemainingSize = [](unsigned BytesLeft) {
+ return (BytesLeft >= 2) ? 2 : 1;
+ };
+
unsigned BytesLeftSave = BytesLeft;
i = 0;
while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
+ VT = getRemainingValueType(BytesLeft);
+ VTSize = getRemainingSize(BytesLeft);
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
DAG.getConstant(SrcOff, dl, MVT::i32)),
@@ -224,14 +225,8 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
i = 0;
BytesLeft = BytesLeftSave;
while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
+ VT = getRemainingValueType(BytesLeft);
+ VTSize = getRemainingSize(BytesLeft);
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
DAG.getConstant(DstOff, dl, MVT::i32)),
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index e2df0bddd0d1..b8a708a20a95 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -13,25 +13,27 @@
#include "ARMSubtarget.h"
#include "ARMFrameLowering.h"
-#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMMachineFunctionInfo.h"
-#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Attributes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetParser.h"
+#include <cassert>
+#include <string>
using namespace llvm;
@@ -104,7 +106,7 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
: !isThumb()
? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
: (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
- TLInfo(TM, *this), GISel() {}
+ TLInfo(TM, *this) {}
const CallLowering *ARMSubtarget::getCallLowering() const {
assert(GISel && "Access to GlobalISel APIs not set");
@@ -148,11 +150,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isTargetDarwin()) {
StringRef ArchName = TargetTriple.getArchName();
- unsigned ArchKind = llvm::ARM::parseArch(ArchName);
- if (ArchKind == llvm::ARM::AK_ARMV7S)
+ unsigned ArchKind = ARM::parseArch(ArchName);
+ if (ArchKind == ARM::AK_ARMV7S)
// Default to the Swift CPU when targeting armv7s/thumbv7s.
CPUString = "swift";
- else if (ArchKind == llvm::ARM::AK_ARMV7K)
+ else if (ArchKind == ARM::AK_ARMV7K)
// Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
// ARMv7k does not use SjLj exception handling.
CPUString = "cortex-a7";
@@ -200,12 +202,12 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// support in the assembler and linker to be used. This would need to be
// fixed to fully support tail calls in Thumb1.
//
- // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
- // LR. This means if we need to reload LR, it takes an extra instructions,
- // which outweighs the value of the tail call; but here we don't know yet
- // whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
- // emitEpilogue if LR is used.
+ // For ARMv8-M, we /do/ implement tail calls. Doing this is tricky for v8-M
+ // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This
+ // means if we need to reload LR, it takes extra instructions, which outweighs
+ // the value of the tail call; but here we don't know yet whether LR is going
+ // to be used. We generate the tail call here and turn it back into CALL/RET
+ // in emitEpilogue if LR is used.
// Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
// but we need to make sure there are enough registers; the only valid
@@ -274,6 +276,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexM3:
case ExynosM1:
case CortexR52:
+ case Kryo:
break;
case Krait:
PreISelOperandLatencyAdjustment = 1;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8c8218d0f432..40993fc0aa8a 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,47 +14,93 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
-
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMFrameLowering.h"
#include "ARMISelLowering.h"
-#include "ARMInstrInfo.h"
#include "ARMSelectionDAGInfo.h"
-#include "ARMSubtarget.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "Thumb1FrameLowering.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb2InstrInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <memory>
#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "ARMGenSubtargetInfo.inc"
namespace llvm {
+
+class ARMBaseTargetMachine;
class GlobalValue;
class StringRef;
-class TargetOptions;
-class ARMBaseTargetMachine;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3,
- CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73,
- Krait, Swift, ExynosM1
+ Others,
+
+ CortexA12,
+ CortexA15,
+ CortexA17,
+ CortexA32,
+ CortexA35,
+ CortexA5,
+ CortexA53,
+ CortexA57,
+ CortexA7,
+ CortexA72,
+ CortexA73,
+ CortexA8,
+ CortexA9,
+ CortexM3,
+ CortexR4,
+ CortexR4F,
+ CortexR5,
+ CortexR52,
+ CortexR7,
+ ExynosM1,
+ Krait,
+ Kryo,
+ Swift
};
enum ARMProcClassEnum {
- None, AClass, RClass, MClass
+ None,
+
+ AClass,
+ MClass,
+ RClass
};
enum ARMArchEnum {
- ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
- ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
- ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline,
+ ARMv2,
+ ARMv2a,
+ ARMv3,
+ ARMv3m,
+ ARMv4,
+ ARMv4t,
+ ARMv5,
+ ARMv5t,
+ ARMv5te,
+ ARMv5tej,
+ ARMv6,
+ ARMv6k,
+ ARMv6kz,
+ ARMv6m,
+ ARMv6sm,
+ ARMv6t2,
+ ARMv7a,
+ ARMv7em,
+ ARMv7m,
+ ARMv7r,
+ ARMv7ve,
+ ARMv81a,
+ ARMv82a,
+ ARMv8a,
+ ARMv8mBaseline,
+ ARMv8mMainline,
ARMv8r
};
@@ -168,10 +214,6 @@ protected:
/// HasHardwareDivideInARM - True if subtarget supports [su]div in ARM mode
bool HasHardwareDivideInARM = false;
- /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
- /// instructions.
- bool HasT2ExtractPack = false;
-
/// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
/// instructions.
bool HasDataBarrier = false;
@@ -310,6 +352,10 @@ protected:
/// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
bool UseSjLjEH = false;
+ /// Implicitly convert an instruction to a different one if its immediates
+ /// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
+ bool NegativeImmediates = true;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment = 4;
@@ -362,6 +408,7 @@ public:
unsigned getMaxInlineSizeThreshold() const {
return 64;
}
+
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -373,15 +420,19 @@ public:
const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
+
const ARMBaseInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
}
+
const ARMTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+
const ARMFrameLowering *getFrameLowering() const override {
return FrameLowering.get();
}
+
const ARMBaseRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
@@ -451,19 +502,21 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
bool hasVirtualization() const { return HasVirtualization; }
+
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
}
bool hasDivide() const { return HasHardwareDivide; }
bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
- bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool hasDataBarrier() const { return HasDataBarrier; }
bool hasV7Clrex() const { return HasV7Clrex; }
bool hasAcquireRelease() const { return HasAcquireRelease; }
+
bool hasAnyDataBarrier() const {
return HasDataBarrier || (hasV6Ops() && !isThumb());
}
+
bool useMulOps() const { return UseMulOps; }
bool useFPVMLx() const { return !SlowFPVMLx; }
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
@@ -561,9 +614,10 @@ public:
TargetTriple.getEnvironment() == Triple::EABIHF ||
isTargetWindows() || isAAPCS16_ABI();
}
+
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
- virtual bool isXRaySupported() const override;
+ bool isXRaySupported() const override;
bool isAPCS_ABI() const;
bool isAAPCS_ABI() const;
@@ -588,6 +642,7 @@ public:
bool useR7AsFramePointer() const {
return isTargetDarwin() || (!isTargetWindows() && isThumb());
}
+
/// Returns true if the frame setup is split into two separate pushes (first
/// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
/// to lr. This is always required on Thumb1-only targets, as the push and
@@ -656,6 +711,7 @@ public:
/// True if fast-isel is used.
bool useFastISel() const;
};
-} // End llvm namespace
-#endif // ARMSUBTARGET_H
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 70c9567d99f8..b8dadb331ecf 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -10,30 +10,50 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetMachine.h"
#include "ARM.h"
#include "ARMCallLowering.h"
-#include "ARMFrameLowering.h"
#include "ARMInstructionSelector.h"
#include "ARMLegalizerInfo.h"
#include "ARMRegisterBankInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
#include "ARMTargetObjectFile.h"
#include "ARMTargetTransformInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <cassert>
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool>
@@ -57,6 +77,10 @@ static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("arm-global-merge", cl::Hidden,
cl::desc("Enable the global merge pass"));
+namespace llvm {
+ void initializeARMExecutionDepsFixPass(PassRegistry&);
+}
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
@@ -68,14 +92,16 @@ extern "C" void LLVMInitializeARMTarget() {
initializeGlobalISel(Registry);
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
+ initializeARMConstantIslandsPass(Registry);
+ initializeARMExecutionDepsFixPass(Registry);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO())
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
if (TT.isOSWindows())
- return make_unique<TargetLoweringObjectFileCOFF>();
- return make_unique<ARMElfTargetObjectFile>();
+ return llvm::make_unique<TargetLoweringObjectFileCOFF>();
+ return llvm::make_unique<ARMElfTargetObjectFile>();
}
static ARMBaseTargetMachine::ARMABI
@@ -94,13 +120,13 @@ computeTargetABI(const Triple &TT, StringRef CPU,
ARMBaseTargetMachine::ARMABI TargetABI =
ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
- unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
- StringRef ArchName = llvm::ARM::getArchName(ArchKind);
+ unsigned ArchKind = ARM::parseCPUArch(CPU);
+ StringRef ArchName = ARM::getArchName(ArchKind);
// FIXME: This is duplicated code from the front end and should be unified.
if (TT.isOSBinFormatMachO()) {
- if (TT.getEnvironment() == llvm::Triple::EABI ||
- (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
- llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) {
+ if (TT.getEnvironment() == Triple::EABI ||
+ (TT.getOS() == Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
+ ARM::parseArchProfile(ArchName) == ARM::PK_M) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
} else if (TT.isWatchABI()) {
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
@@ -113,16 +139,16 @@ computeTargetABI(const Triple &TT, StringRef CPU,
} else {
// Select the default based on the platform.
switch (TT.getEnvironment()) {
- case llvm::Triple::Android:
- case llvm::Triple::GNUEABI:
- case llvm::Triple::GNUEABIHF:
- case llvm::Triple::MuslEABI:
- case llvm::Triple::MuslEABIHF:
- case llvm::Triple::EABIHF:
- case llvm::Triple::EABI:
+ case Triple::Android:
+ case Triple::GNUEABI:
+ case Triple::GNUEABIHF:
+ case Triple::MuslEABI:
+ case Triple::MuslEABIHF:
+ case Triple::EABIHF:
+ case Triple::EABI:
TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
break;
- case llvm::Triple::GNU:
+ case Triple::GNU:
TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
break;
default:
@@ -141,7 +167,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options,
bool isLittle) {
auto ABI = computeTargetABI(TT, CPU, Options);
- std::string Ret = "";
+ std::string Ret;
if (isLittle)
// Little endian.
@@ -238,29 +264,35 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
}
}
-ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
+ARMBaseTargetMachine::~ARMBaseTargetMachine() = default;
#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {
+
struct ARMGISelActualAccessor : public GISelAccessor {
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
const CallLowering *getCallLowering() const override {
return CallLoweringInfo.get();
}
+
const InstructionSelector *getInstructionSelector() const override {
return InstSelector.get();
}
+
const LegalizerInfo *getLegalizerInfo() const override {
return Legalizer.get();
}
+
const RegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
#endif
const ARMSubtarget *
@@ -300,7 +332,7 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
#else
ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
- GISel->Legalizer.reset(new ARMLegalizerInfo());
+ GISel->Legalizer.reset(new ARMLegalizerInfo(*I));
auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
@@ -390,6 +422,7 @@ ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
: ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
+
/// ARM Code Generator Pass Configuration Options.
class ARMPassConfig : public TargetPassConfig {
public:
@@ -413,7 +446,21 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+class ARMExecutionDepsFix : public ExecutionDepsFix {
+public:
+ static char ID;
+ ARMExecutionDepsFix() : ExecutionDepsFix(ID, ARM::DPRRegClass) {}
+ StringRef getPassName() const override {
+ return "ARM Execution Dependency Fix";
+ }
+};
+char ARMExecutionDepsFix::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(ARMExecutionDepsFix, "arm-execution-deps-fix",
+ "ARM Execution Dependency Fix", false, false)
TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARMPassConfig(this, PM);
@@ -508,7 +555,7 @@ void ARMPassConfig::addPreSched2() {
if (EnableARMLoadStoreOpt)
addPass(createARMLoadStoreOptimizationPass());
- addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
+ addPass(new ARMExecutionDepsFix());
}
// Expand some pseudo instructions into multiple instructions to allow
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c6b70b953162..f0ca9427d9fb 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,10 +14,14 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
-#include "ARMInstrInfo.h"
#include "ARMSubtarget.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
@@ -32,7 +36,7 @@ public:
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- ARMSubtarget Subtarget;
+ ARMSubtarget Subtarget;
bool isLittle;
mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap;
@@ -62,7 +66,8 @@ public:
///
class ARMTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
- public:
+
+public:
ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Optional<Reloc::Model> RM, CodeModel::Model CM,
@@ -73,6 +78,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
///
class ARMLETargetMachine : public ARMTargetMachine {
void anchor() override;
+
public:
ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -84,6 +90,7 @@ public:
///
class ARMBETargetMachine : public ARMTargetMachine {
void anchor() override;
+
public:
ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -97,6 +104,7 @@ public:
///
class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
+
public:
ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -108,6 +116,7 @@ public:
///
class ThumbLETargetMachine : public ThumbTargetMachine {
void anchor() override;
+
public:
ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -119,6 +128,7 @@ public:
///
class ThumbBETargetMachine : public ThumbTargetMachine {
void anchor() override;
+
public:
ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -128,4 +138,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 625c4280e1a6..94f9e8dfebbf 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -7,17 +7,20 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/IR/Mangler.h"
+#include "ARMTargetObjectFile.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
using namespace llvm;
using namespace dwarf;
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 24e755ddac27..dbb8128269dc 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -11,19 +11,19 @@
#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCExpr.h"
namespace llvm {
-class MCContext;
-class TargetMachine;
-
class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
mutable bool genExecuteOnly = false;
+
protected:
- const MCSection *AttributesSection;
+ const MCSection *AttributesSection = nullptr;
+
public:
ARMElfTargetObjectFile()
- : TargetLoweringObjectFileELF(), AttributesSection(nullptr) {
+ : TargetLoweringObjectFileELF() {
PLTRelativeVariantKind = MCSymbolRefExpr::VK_ARM_PREL31;
}
@@ -47,4 +47,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2b6b36bc3e68..8eb9dbf5f9de 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -92,7 +92,8 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
}
-int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -310,7 +311,8 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
}
-int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
// On NEON a a vector select gets lowered to vbsl.
@@ -335,7 +337,7 @@ int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
return LT.first;
}
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
@@ -504,7 +506,7 @@ int ARMTTIImpl::getArithmeticInstrCost(
}
int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
if (Src->isVectorTy() && Alignment != 16 &&
@@ -529,12 +531,14 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
unsigned NumElts = VecTy->getVectorNumElements();
- Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
- unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+ auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
// vldN/vstN only support legal vector types of size 64 or 128 in bits.
- if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
- return Factor;
+ // Accesses having vector types that are a multiple of 128 bits can be
+ // matched to more than one vldN/vstN instruction.
+ if (NumElts % Factor == 0 &&
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 3c83cd92a61a..7de0543dfa5e 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -33,10 +33,6 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
@@ -98,9 +94,11 @@ public:
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
@@ -118,7 +116,7 @@ public:
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, unsigned Alignment,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c243a2d35979..f421d3ac1693 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -915,40 +915,37 @@ public:
int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
return Val != -1;
}
- bool isFBits16() const {
+
+ template<int64_t N, int64_t M>
+ bool isImmediate() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return Value >= 0 && Value <= 16;
+ return Value >= N && Value <= M;
}
- bool isFBits32() const {
+ template<int64_t N, int64_t M>
+ bool isImmediateS4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
int64_t Value = CE->getValue();
- return Value >= 1 && Value <= 32;
+ return ((Value & 3) == 0) && Value >= N && Value <= M;
+ }
+ bool isFBits16() const {
+ return isImmediate<0, 17>();
+ }
+ bool isFBits32() const {
+ return isImmediate<1, 33>();
}
bool isImm8s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
+ return isImmediateS4<-1020, 1020>();
}
bool isImm0_1020s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
+ return isImmediateS4<0, 1020>();
}
bool isImm0_508s4() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
+ return isImmediateS4<0, 508>();
}
bool isImm0_508s4Neg() const {
if (!isImm()) return false;
@@ -958,27 +955,6 @@ public:
// explicitly exclude zero. we want that to use the normal 0_508 version.
return ((Value & 3) == 0) && Value > 0 && Value <= 508;
}
- bool isImm0_239() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 240;
- }
- bool isImm0_255() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 256;
- }
- bool isImm0_4095() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 4096;
- }
bool isImm0_4095Neg() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -986,145 +962,17 @@ public:
int64_t Value = -CE->getValue();
return Value > 0 && Value < 4096;
}
- bool isImm0_1() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 2;
- }
- bool isImm0_3() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 4;
- }
bool isImm0_7() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 8;
- }
- bool isImm0_15() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 16;
- }
- bool isImm0_31() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 32;
- }
- bool isImm0_63() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 64;
- }
- bool isImm8() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 8;
- }
- bool isImm16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 16;
- }
- bool isImm32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value == 32;
- }
- bool isShrImm8() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 8;
- }
- bool isShrImm16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 16;
- }
- bool isShrImm32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 32;
- }
- bool isShrImm64() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 64;
- }
- bool isImm1_7() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 8;
- }
- bool isImm1_15() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 16;
- }
- bool isImm1_31() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 32;
+ return isImmediate<0, 7>();
}
bool isImm1_16() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 17;
+ return isImmediate<1, 16>();
}
bool isImm1_32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 33;
- }
- bool isImm0_32() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 33;
+ return isImmediate<1, 32>();
}
- bool isImm0_65535() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 65536;
+ bool isImm8_255() const {
+ return isImmediate<8, 255>();
}
bool isImm256_65535Expr() const {
if (!isImm()) return false;
@@ -1145,32 +993,16 @@ public:
return Value >= 0 && Value < 65536;
}
bool isImm24bit() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value <= 0xffffff;
+ return isImmediate<0, 0xffffff + 1>();
}
bool isImmThumbSR() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value < 33;
+ return isImmediate<1, 33>();
}
bool isPKHLSLImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < 32;
+ return isImmediate<0, 32>();
}
bool isPKHASRImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= 32;
+ return isImmediate<0, 33>();
}
bool isAdrLabel() const {
// If we have an immediate that's not a constant, treat it as a label
@@ -1245,6 +1077,20 @@ public:
return ARM_AM::getSOImmVal(Value) == -1 &&
ARM_AM::getSOImmVal(-Value) != -1;
}
+ bool isThumbModImmNeg1_7() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int32_t Value = -(int32_t)CE->getValue();
+ return 0 < Value && Value < 8;
+ }
+ bool isThumbModImmNeg8_255() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int32_t Value = -(int32_t)CE->getValue();
+ return 7 < Value && Value < 256;
+ }
bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; }
bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
@@ -2035,6 +1881,20 @@ public:
Inst.addOperand(MCOperand::createImm(Enc));
}
+ void addThumbModImmNeg8_255Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Val = -CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
+ void addThumbModImmNeg1_7Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ uint32_t Val = -CE->getValue();
+ Inst.addOperand(MCOperand::createImm(Val));
+ }
+
void addBitfieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// Munge the lsb/width into a bitfield mask.
@@ -2141,7 +2001,7 @@ public:
// The operand is actually a t2_so_imm, but we have its bitwise
// negation in the assembly source, so twiddle it here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::createImm(~CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(~(uint32_t)CE->getValue()));
}
void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
@@ -2149,7 +2009,7 @@ public:
// The operand is actually a t2_so_imm, but we have its
// negation in the assembly source, so twiddle it here.
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::createImm(-CE->getValue()));
+ Inst.addOperand(MCOperand::createImm(-(uint32_t)CE->getValue()));
}
void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
@@ -4330,7 +4190,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
// If some specific flag is already set, it means that some letter is
// present more than once, this is not acceptable.
- if (FlagsVal == ~0U || (FlagsVal & Flag))
+ if (Flag == ~0U || (FlagsVal & Flag))
return MatchOperand_NoMatch;
FlagsVal |= Flag;
}
@@ -5484,7 +5344,8 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
enum {
COFF = (1 << MCObjectFileInfo::IsCOFF),
ELF = (1 << MCObjectFileInfo::IsELF),
- MACHO = (1 << MCObjectFileInfo::IsMachO)
+ MACHO = (1 << MCObjectFileInfo::IsMachO),
+ WASM = (1 << MCObjectFileInfo::IsWasm),
};
static const struct PrefixEntry {
const char *Spelling;
@@ -5518,6 +5379,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
case MCObjectFileInfo::IsCOFF:
CurrentFormat = COFF;
break;
+ case MCObjectFileInfo::IsWasm:
+ CurrentFormat = WASM;
+ break;
}
if (~Prefix->SupportedFormats & CurrentFormat) {
@@ -6301,10 +6165,6 @@ bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,
else if (ListContainsPC && ListContainsLR)
return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
"PC and LR may not be in the register list simultaneously");
- else if (inITBlock() && !lastInITBlock() && ListContainsPC)
- return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(),
- "instruction must be outside of IT block or the last "
- "instruction in an IT block");
return false;
}
@@ -6366,6 +6226,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
return Warning(Loc, "predicated instructions should be in IT block");
}
+ // PC-setting instructions in an IT block, but not the last instruction of
+ // the block, are UNPREDICTABLE.
+ if (inExplicitITBlock() && !lastInITBlock() && isITBlockTerminator(Inst)) {
+ return Error(Loc, "instruction must be outside of IT block or the last instruction in an IT block");
+ }
+
const unsigned Opcode = Inst.getOpcode();
switch (Opcode) {
case ARM::LDRD:
@@ -6676,6 +6542,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
break;
}
case ARM::MOVi16:
+ case ARM::MOVTi16:
case ARM::t2MOVi16:
case ARM::t2MOVTi16:
{
@@ -8232,7 +8099,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
case ARM::t2LSRri:
case ARM::t2ASRri: {
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
- Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+ isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
!(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
@@ -8307,23 +8174,38 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
isNarrow = true;
MCInst TmpInst;
unsigned newOpc;
- switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
- default: llvm_unreachable("unexpected opcode!");
- case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
- case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
- case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
- case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
- case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
- }
+ unsigned Shift = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
unsigned Amount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+ bool isMov = false;
+ // MOV rd, rm, LSL #0 is actually a MOV instruction
+ if (Shift == ARM_AM::lsl && Amount == 0) {
+ isMov = true;
+ // The 16-bit encoding of MOV rd, rm, LSL #N is explicitly encoding T2 of
+ // MOV (register) in the ARMv8-A and ARMv8-M manuals, and immediate 0 is
+ // unpredictable in an IT block so the 32-bit encoding T3 has to be used
+ // instead.
+ if (inITBlock()) {
+ isNarrow = false;
+ }
+ newOpc = isNarrow ? ARM::tMOVSr : ARM::t2MOVr;
+ } else {
+ switch(Shift) {
+ default: llvm_unreachable("unexpected opcode!");
+ case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+ case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+ case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+ case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+ case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+ }
+ }
if (Amount == 32) Amount = 0;
TmpInst.setOpcode(newOpc);
TmpInst.addOperand(Inst.getOperand(0)); // Rd
- if (isNarrow)
+ if (isNarrow && !isMov)
TmpInst.addOperand(MCOperand::createReg(
Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
TmpInst.addOperand(Inst.getOperand(1)); // Rn
- if (newOpc != ARM::t2RRX)
+ if (newOpc != ARM::t2RRX && !isMov)
TmpInst.addOperand(MCOperand::createImm(Amount));
TmpInst.addOperand(Inst.getOperand(3)); // CondCode
TmpInst.addOperand(Inst.getOperand(4));
@@ -8918,6 +8800,9 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
inITBlock())
return Match_RequiresNotITBlock;
+ // LSL with zero immediate is not allowed in an IT block
+ if (Opc == ARM::tLSLri && Inst.getOperand(3).getImm() == 0 && inITBlock())
+ return Match_RequiresNotITBlock;
} else if (isThumbOne()) {
// Some high-register supporting Thumb1 encodings only allow both registers
// to be from r0-r7 when in Thumb2.
@@ -8932,6 +8817,22 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_RequiresV6;
}
+ // Before ARMv8 the rules for when SP is allowed in t2MOVr are more complex
+ // than the loop below can handle, so it uses the GPRnopc register class and
+ // we do SP handling here.
+ if (Opc == ARM::t2MOVr && !hasV8Ops())
+ {
+ // SP as both source and destination is not allowed
+ if (Inst.getOperand(0).getReg() == ARM::SP &&
+ Inst.getOperand(1).getReg() == ARM::SP)
+ return Match_RequiresV8;
+ // When flags-setting SP as either source or destination is not allowed
+ if (Inst.getOperand(4).getReg() == ARM::CPSR &&
+ (Inst.getOperand(0).getReg() == ARM::SP ||
+ Inst.getOperand(1).getReg() == ARM::SP))
+ return Match_RequiresV8;
+ }
+
for (unsigned I = 0; I < MCID.NumOperands; ++I)
if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
// rGPRRegClass excludes PC, and also excluded SP before ARMv8
@@ -8945,7 +8846,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
}
namespace llvm {
-template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
+template <> inline bool IsCPSRDead<MCInst>(const MCInst *Instr) {
return true; // In an assembly source, no need to second-guess
}
}
@@ -8975,6 +8876,7 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
// operands. We only care about Thumb instructions here, as ARM instructions
// obviously can't be in an IT block.
switch (Inst.getOpcode()) {
+ case ARM::tLDMIA:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
case ARM::t2LDMDB:
@@ -9088,6 +8990,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
PendConditionalInstruction, Out);
+ SMLoc ErrorLoc;
+ if (ErrorInfo < Operands.size()) {
+ ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+
switch (MatchResult) {
case Match_Success:
// Context sensitive operand constraints aren't handled by the matcher,
@@ -9177,16 +9086,52 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv8 or later");
case Match_RequiresFlagSetting:
return Error(IDLoc, "no flag-preserving variant of this instruction available");
- case Match_ImmRange0_15: {
- SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ case Match_ImmRange0_1:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,1]");
+ case Match_ImmRange0_3:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,3]");
+ case Match_ImmRange0_7:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,7]");
+ case Match_ImmRange0_15:
return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
- }
- case Match_ImmRange0_239: {
- SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ case Match_ImmRange0_31:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,31]");
+ case Match_ImmRange0_32:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,32]");
+ case Match_ImmRange0_63:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,63]");
+ case Match_ImmRange0_239:
return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
- }
+ case Match_ImmRange0_255:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,255]");
+ case Match_ImmRange0_4095:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,4095]");
+ case Match_ImmRange0_65535:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,65535]");
+ case Match_ImmRange1_7:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,7]");
+ case Match_ImmRange1_8:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,8]");
+ case Match_ImmRange1_15:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,15]");
+ case Match_ImmRange1_16:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,16]");
+ case Match_ImmRange1_31:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,31]");
+ case Match_ImmRange1_32:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,32]");
+ case Match_ImmRange1_64:
+ return Error(ErrorLoc, "immediate operand must be in the range [1,64]");
+ case Match_ImmRange8_8:
+ return Error(ErrorLoc, "immediate operand must be 8.");
+ case Match_ImmRange16_16:
+ return Error(ErrorLoc, "immediate operand must be 16.");
+ case Match_ImmRange32_32:
+ return Error(ErrorLoc, "immediate operand must be 32.");
+ case Match_ImmRange256_65535:
+ return Error(ErrorLoc, "immediate operand must be in the range [255,65535]");
+ case Match_ImmRange0_16777215:
+ return Error(ErrorLoc, "immediate operand must be in the range [0,0xffffff]");
case Match_AlignedMemoryRequiresNone:
case Match_DupAlignedMemoryRequiresNone:
case Match_AlignedMemoryRequires16:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 0c57a3e3166b..1062c7943201 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,5 +1,6 @@
set(LLVM_TARGET_DEFINITIONS ARM.td)
+tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank)
tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index ac3d8c780af2..e812d32cc76f 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -7,21 +7,24 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
-#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <vector>
using namespace llvm;
@@ -31,6 +34,7 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+
// Handles the condition code status of instructions in IT blocks
class ITStatus
{
@@ -81,9 +85,7 @@ namespace {
private:
std::vector<unsigned char> ITStates;
};
-}
-namespace {
/// ARM disassembler for all ARM platforms.
class ARMDisassembler : public MCDisassembler {
public:
@@ -91,7 +93,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ARMDisassembler() override {}
+ ~ARMDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -106,7 +108,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ThumbDisassembler() override {}
+ ~ThumbDisassembler() override = default;
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -118,7 +120,8 @@ private:
DecodeStatus AddThumbPredicate(MCInst&) const;
void UpdateThumbVFPPredicate(MCInst&) const;
};
-}
+
+} // end anonymous namespace
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -135,7 +138,6 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
llvm_unreachable("Invalid DecodeStatus!");
}
-
// Forward declare these because the autogenerated code will reference them.
// Definitions are further down.
static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -319,7 +321,6 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
@@ -395,8 +396,9 @@ static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
+
#include "ARMGenDisassemblerTables.inc"
static MCDisassembler *createARMDisassembler(const Target &T,
@@ -416,8 +418,7 @@ static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size,
uint64_t Address, raw_ostream &OS,
raw_ostream &CS,
uint32_t Insn,
- DecodeStatus Result)
-{
+ DecodeStatus Result) {
switch (MI.getOpcode()) {
case ARM::HVC: {
// HVC is undefined if condition = 0xf otherwise upredictable
@@ -461,65 +462,28 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result);
}
- // VFP and NEON instructions, similarly, are shared between ARM
- // and Thumb modes.
- Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
-
- Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
-
- Result =
- decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
-
- Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
- this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
-
- Result =
- decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- // Add a fake predicate operand, because we share these instruction
- // definitions with Thumb2 where these instructions are predicable.
- if (!DecodePredicateOperand(MI, 0xE, Address, this))
- return MCDisassembler::Fail;
- return Result;
- }
+ struct DecodeTable {
+ const uint8_t *P;
+ bool DecodePred;
+ };
- Result =
- decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
+ const DecodeTable Tables[] = {
+ {DecoderTableVFP32, false}, {DecoderTableVFPV832, false},
+ {DecoderTableNEONData32, true}, {DecoderTableNEONLoadStore32, true},
+ {DecoderTableNEONDup32, true}, {DecoderTablev8NEON32, false},
+ {DecoderTablev8Crypto32, false},
+ };
- Result =
- decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
+ for (auto Table : Tables) {
+ Result = decodeInstruction(Table.P, MI, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ // Add a fake predicate operand, because we share these instruction
+ // definitions with Thumb2 where these instructions are predicable.
+ if (Table.DecodePred && !DecodePredicateOperand(MI, 0xE, Address, this))
+ return MCDisassembler::Fail;
+ return Result;
+ }
}
Size = 0;
@@ -527,8 +491,10 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
namespace llvm {
+
extern const MCInstrDesc ARMInsts[];
-}
+
+} // end namespace llvm
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
/// immediate Value in the MCInst. The immediate Value has had any PC
@@ -859,7 +825,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
return MCDisassembler::Fail;
}
-
extern "C" void LLVMInitializeARMDisassembler() {
TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(),
createARMDisassembler);
@@ -1056,7 +1021,6 @@ static const uint16_t QPRDecoderTable[] = {
ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15
};
-
static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31 || (RegNo & 1) != 0)
@@ -1676,7 +1640,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRD:
case ARM::LDRD_PRE:
case ARM::LDRD_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt2 == 15)
S = MCDisassembler::SoftFail;
break;
@@ -1693,7 +1657,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRH:
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt == 15)
S = MCDisassembler::SoftFail;
break;
@@ -1711,7 +1675,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
case ARM::LDRSB:
case ARM::LDRSB_PRE:
case ARM::LDRSB_POST:
- if (type && Rn == 15){
+ if (type && Rn == 15) {
if (Rt == 15)
S = MCDisassembler::SoftFail;
break;
@@ -2309,7 +2273,6 @@ DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -3748,7 +3711,6 @@ static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
return MCDisassembler::Success;
}
-
static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4073,7 +4035,7 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
static DecodeStatus
DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder){
+ uint64_t Address, const void *Decoder) {
if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<9>(Val<<1) + 4,
true, 2, Inst, Decoder))
Inst.addOperand(MCOperand::createImm(SignExtend32<9>(Val << 1)));
@@ -4081,7 +4043,8 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
}
static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder){
+ uint64_t Address,
+ const void *Decoder) {
// Val is passed in as S:J1:J2:imm10:imm11
// Note no trailing zero after imm11. Also the J1 and J2 values are from
// the encoded instruction. So here change to I1 and I2 values via:
@@ -4247,7 +4210,8 @@ static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
}
static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder){
+ uint64_t Address,
+ const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
unsigned Rd = fieldFromInstruction(Insn, 12, 4);
@@ -4323,7 +4287,6 @@ static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4506,7 +4469,6 @@ static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4637,7 +4599,6 @@ static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4771,7 +4732,6 @@ static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
return S;
}
-
static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -5266,9 +5226,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
return S;
}
-static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
-
DecodeStatus S = MCDisassembler::Success;
unsigned CRm = fieldFromInstruction(Val, 0, 4);
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 3667952d44c0..57b91366a085 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -20,7 +20,15 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -73,7 +81,6 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
unsigned Opcode = MI->getOpcode();
switch (Opcode) {
-
// Check for MOVs and print canonical forms, instead.
case ARM::MOVsr: {
// FIXME: Thumb variants?
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 9d80eed84dc2..86873a3a6ccb 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -235,4 +235,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index a58d5b34131b..40bf545e8322 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -357,13 +357,14 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
}
unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx,
+ bool IsPCRel, MCContext &Ctx,
bool IsLittleEndian,
bool IsResolved) const {
unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
- llvm_unreachable("Unknown fixup kind!");
+ Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
+ return 0;
case FK_Data_1:
case FK_Data_2:
case FK_Data_4:
@@ -412,8 +413,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
isAdd = false;
}
- if (Ctx && Value >= 4096) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 4096) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -433,8 +434,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
Value = -Value;
opc = 2; // 0b0010
}
- if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (ARM_AM::getSOImmVal(Value) == -1) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
// Encode the immediate and shift the opcode into place.
@@ -541,8 +542,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
//
// Note that the halfwords are stored high first, low second; so we need
// to transpose the fixup value here to map properly.
- if (Ctx && Value % 4 != 0) {
- Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination");
+ if (Value % 4 != 0) {
+ Ctx.reportError(Fixup.getLoc(), "misaligned ARM call destination");
return 0;
}
@@ -568,10 +569,10 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case ARM::fixup_arm_thumb_cp:
// On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
// could have an error on our hands.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
@@ -581,8 +582,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// CB instructions can only branch to offsets in [4, 126] in multiples of 2
// so ensure that the raw value LSB is zero and it lies in [2, 130].
// An offset of 2 will be relaxed to a NOP.
- if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if ((int64_t)Value < 2 || Value > 0x82 || Value & 1) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
// Offset by 4 and don't encode the lower bit, which is always 0.
@@ -592,21 +593,21 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] &&
- !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
+ !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
return ((Value - 4) >> 1) & 0x7ff;
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
- if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ if (!STI->getFeatureBits()[ARM::FeatureThumb2]) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
- Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+ Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
return 0;
}
}
@@ -620,8 +621,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value = (Value & 0xf) | ((Value & 0xf0) << 4);
@@ -641,8 +642,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
// These values don't encode the low two bits since they're always zero.
Value >>= 2;
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -667,13 +668,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
isAdd = false;
}
// These values don't encode the low bit since it's always zero.
- if (Ctx && (Value & 1)) {
- Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup");
+ if (Value & 1) {
+ Ctx.reportError(Fixup.getLoc(), "invalid value for this fixup");
return 0;
}
Value >>= 1;
- if (Ctx && Value >= 256) {
- Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+ if (Value >= 256) {
+ Ctx.reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
return 0;
}
Value |= isAdd << 23;
@@ -687,8 +688,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
}
case ARM::fixup_arm_mod_imm:
Value = ARM_AM::getSOImmVal(Value);
- if (Ctx && Value >> 12) {
- Ctx->reportError(Fixup.getLoc(), "out of range immediate fixup value");
+ if (Value >> 12) {
+ Ctx.reportError(Fixup.getLoc(), "out of range immediate fixup value");
return 0;
}
return Value;
@@ -737,12 +738,6 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
(unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
IsResolved = false;
-
- // Try to get the encoded value for the fixup as-if we're mapping it into
- // the instruction. This allows adjustFixupValue() to issue a diagnostic
- // if the value aren't invalid.
- (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
- IsLittleEndian, IsResolved);
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -846,11 +841,10 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
}
void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ unsigned DataSize, uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value =
- adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
+ Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true);
if (!Value)
return; // Doesn't change encoding.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 84caaacc47d3..2ddedb5d6105 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -46,11 +46,11 @@ public:
bool &IsResolved) override;
unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
- MCContext *Ctx, bool IsLittleEndian,
+ MCContext &Ctx, bool IsLittleEndian,
bool IsResolved) const;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
unsigned getRelaxedOpcode(unsigned Op) const;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 088b4205ed62..92e553f21f14 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -291,7 +291,11 @@ namespace ARMII {
/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
- MO_OPTION_MASK = 0x1f,
+ MO_OPTION_MASK = 0x0f,
+
+ /// MO_SBREL - On a symbol operand, this represents a static base relative
+ /// relocation. Used in movw and movt instructions.
+ MO_SBREL = 0x10,
/// MO_DLLIMPORT - On a symbol operand, this represents that the reference
/// to the symbol is for an import stub. This is used for DLL import
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 6f19754b899e..e1fa24571820 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -7,32 +7,32 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMFixupKinds.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
using namespace llvm;
namespace {
+
class ARMELFObjectWriter : public MCELFObjectTargetWriter {
enum { DefaultEABIVersion = 0x05000000U };
- unsigned GetRelocTypeInner(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const;
+ unsigned GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel, MCContext &Ctx) const;
public:
ARMELFObjectWriter(uint8_t OSABI);
- ~ARMELFObjectWriter() override;
+ ~ARMELFObjectWriter() override = default;
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
@@ -40,15 +40,14 @@ namespace {
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
};
-}
+
+} // end anonymous namespace
ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
ELF::EM_ARM,
/*HasRelocationAddend*/ false) {}
-ARMELFObjectWriter::~ARMELFObjectWriter() {}
-
bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const {
// FIXME: This is extremely conservative. This really needs to use a
@@ -70,19 +69,20 @@ bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned ARMELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- return GetRelocTypeInner(Target, Fixup, IsPCRel);
+ return GetRelocTypeInner(Target, Fixup, IsPCRel, Ctx);
}
unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
- bool IsPCRel) const {
+ bool IsPCRel,
+ MCContext &Ctx) const {
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
unsigned Type = 0;
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
default:
- report_fatal_error("unsupported relocation on symbol");
+ Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_4:
switch (Modifier) {
@@ -161,7 +161,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default:
- report_fatal_error("unsupported relocation on symbol");
+ Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol");
return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
@@ -270,10 +270,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
break;
case ARM::fixup_t2_movt_hi16:
- Type = ELF::R_ARM_THM_MOVT_ABS;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_THM_MOVT_ABS;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_THM_MOVT_BREL;
+ break;
+ }
break;
case ARM::fixup_t2_movw_lo16:
- Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+ break;
+ case MCSymbolRefExpr::VK_ARM_SBREL:
+ Type = ELF:: R_ARM_THM_MOVW_BREL_NC;
+ break;
+ }
break;
}
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index f6bb35d2326b..6fa890ba1cd5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -15,7 +15,11 @@
#include "ARMRegisterInfo.h"
#include "ARMUnwindOpAsm.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -24,25 +28,33 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SectionKind.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetParser.h"
#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <string>
using namespace llvm;
@@ -101,16 +113,21 @@ ARMTargetAsmStreamer::ARMTargetAsmStreamer(MCStreamer &S,
bool VerboseAsm)
: ARMTargetStreamer(S), OS(OS), InstPrinter(InstPrinter),
IsVerboseAsm(VerboseAsm) {}
+
void ARMTargetAsmStreamer::emitFnStart() { OS << "\t.fnstart\n"; }
void ARMTargetAsmStreamer::emitFnEnd() { OS << "\t.fnend\n"; }
void ARMTargetAsmStreamer::emitCantUnwind() { OS << "\t.cantunwind\n"; }
+
void ARMTargetAsmStreamer::emitPersonality(const MCSymbol *Personality) {
OS << "\t.personality " << Personality->getName() << '\n';
}
+
void ARMTargetAsmStreamer::emitPersonalityIndex(unsigned Index) {
OS << "\t.personalityindex " << Index << '\n';
}
+
void ARMTargetAsmStreamer::emitHandlerData() { OS << "\t.handlerdata\n"; }
+
void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
int64_t Offset) {
OS << "\t.setfp\t";
@@ -121,6 +138,7 @@ void ARMTargetAsmStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
OS << ", #" << Offset;
OS << '\n';
}
+
void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
assert((Reg != ARM::SP && Reg != ARM::PC) &&
"the operand of .movsp cannot be either sp or pc");
@@ -131,9 +149,11 @@ void ARMTargetAsmStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
OS << ", #" << Offset;
OS << '\n';
}
+
void ARMTargetAsmStreamer::emitPad(int64_t Offset) {
OS << "\t.pad\t#" << Offset << '\n';
}
+
void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector) {
assert(RegList.size() && "RegList should not be empty");
@@ -151,8 +171,9 @@ void ARMTargetAsmStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
OS << "}\n";
}
-void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {
-}
+
+void ARMTargetAsmStreamer::switchVendor(StringRef Vendor) {}
+
void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
OS << "\t.eabi_attribute\t" << Attribute << ", " << Twine(Value);
if (IsVerboseAsm) {
@@ -162,6 +183,7 @@ void ARMTargetAsmStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
StringRef String) {
switch (Attribute) {
@@ -179,6 +201,7 @@ void ARMTargetAsmStreamer::emitTextAttribute(unsigned Attribute,
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {
@@ -194,20 +217,25 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
}
OS << "\n";
}
+
void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
}
+
void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
}
+
void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
}
+
void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
}
-void ARMTargetAsmStreamer::finishAttributeSection() {
-}
+
+void ARMTargetAsmStreamer::finishAttributeSection() {}
+
void
ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
OS << "\t.tlsdescseq\t" << S->getSymbol().getName();
@@ -274,12 +302,12 @@ private:
};
StringRef CurrentVendor;
- unsigned FPU;
- unsigned Arch;
- unsigned EmittedArch;
+ unsigned FPU = ARM::FK_INVALID;
+ unsigned Arch = ARM::AK_INVALID;
+ unsigned EmittedArch = ARM::AK_INVALID;
SmallVector<AttributeItem, 64> Contents;
- MCSection *AttributeSection;
+ MCSection *AttributeSection = nullptr;
AttributeItem *getAttributeItem(unsigned Attribute) {
for (size_t i = 0; i < Contents.size(); ++i)
@@ -393,9 +421,7 @@ private:
public:
ARMTargetELFStreamer(MCStreamer &S)
- : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
- Arch(ARM::AK_INVALID), EmittedArch(ARM::AK_INVALID),
- AttributeSection(nullptr) {}
+ : ARMTargetStreamer(S), CurrentVendor("aeabi") {}
};
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -416,12 +442,11 @@ public:
ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool IsThumb)
- : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
- MappingSymbolCounter(0), LastEMS(EMS_None) {
+ : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) {
EHReset();
}
- ~ARMELFStreamer() {}
+ ~ARMELFStreamer() override = default;
void FinishImpl() override;
@@ -439,20 +464,21 @@ public:
void emitUnwindRaw(int64_t Offset, const SmallVectorImpl<uint8_t> &Opcodes);
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override {
- // We have to keep track of the mapping symbol state of any sections we
- // use. Each one should start off as EMS_None, which is provided as the
- // default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection().first] = LastEMS;
- LastEMS = LastMappingSymbols.lookup(Section);
-
+ LastMappingSymbols[getPreviousSection().first] = std::move(LastEMSInfo);
MCELFStreamer::ChangeSection(Section, Subsection);
+ auto LastMappingSymbol = LastMappingSymbols.find(Section);
+ if (LastMappingSymbol != LastMappingSymbols.end()) {
+ LastEMSInfo = std::move(LastMappingSymbol->second);
+ return;
+ }
+ LastEMSInfo.reset(new ElfMappingSymbolInfo(SMLoc(), nullptr, 0));
}
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst& Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
if (IsThumb)
EmitThumbMappingSymbol();
else
@@ -507,15 +533,25 @@ public:
MCELFStreamer::EmitBytes(Data);
}
+ void FlushPendingMappingSymbol() {
+ if (!LastEMSInfo->hasInfo())
+ return;
+ ElfMappingSymbolInfo *EMS = LastEMSInfo.get();
+ EmitMappingSymbol("$d", EMS->Loc, EMS->F, EMS->Offset);
+ EMS->resetInfo();
+ }
+
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
- if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
+ if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) {
if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
getContext().reportError(Loc, "relocated expression must be 32-bit");
return;
}
+ getOrCreateDataFragment();
+ }
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size, Loc);
@@ -548,22 +584,54 @@ private:
EMS_Data
};
+ struct ElfMappingSymbolInfo {
+ explicit ElfMappingSymbolInfo(SMLoc Loc, MCFragment *F, uint64_t O)
+ : Loc(Loc), F(F), Offset(O), State(EMS_None) {}
+ void resetInfo() {
+ F = nullptr;
+ Offset = 0;
+ }
+ bool hasInfo() { return F != nullptr; }
+ SMLoc Loc;
+ MCFragment *F;
+ uint64_t Offset;
+ ElfMappingSymbol State;
+ };
+
void EmitDataMappingSymbol() {
- if (LastEMS == EMS_Data) return;
+ if (LastEMSInfo->State == EMS_Data)
+ return;
+ else if (LastEMSInfo->State == EMS_None) {
+ // This is a tentative symbol, it won't really be emitted until it's
+ // actually needed.
+ ElfMappingSymbolInfo *EMS = LastEMSInfo.get();
+ auto *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!DF)
+ return;
+ EMS->Loc = SMLoc();
+ EMS->F = getCurrentFragment();
+ EMS->Offset = DF->getContents().size();
+ LastEMSInfo->State = EMS_Data;
+ return;
+ }
EmitMappingSymbol("$d");
- LastEMS = EMS_Data;
+ LastEMSInfo->State = EMS_Data;
}
void EmitThumbMappingSymbol() {
- if (LastEMS == EMS_Thumb) return;
+ if (LastEMSInfo->State == EMS_Thumb)
+ return;
+ FlushPendingMappingSymbol();
EmitMappingSymbol("$t");
- LastEMS = EMS_Thumb;
+ LastEMSInfo->State = EMS_Thumb;
}
void EmitARMMappingSymbol() {
- if (LastEMS == EMS_ARM) return;
+ if (LastEMSInfo->State == EMS_ARM)
+ return;
+ FlushPendingMappingSymbol();
EmitMappingSymbol("$a");
- LastEMS = EMS_ARM;
+ LastEMSInfo->State = EMS_ARM;
}
void EmitMappingSymbol(StringRef Name) {
@@ -576,6 +644,17 @@ private:
Symbol->setExternal(false);
}
+ void EmitMappingSymbol(StringRef Name, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) {
+ auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++)));
+ EmitLabel(Symbol, Loc, F);
+ Symbol->setType(ELF::STT_NOTYPE);
+ Symbol->setBinding(ELF::STB_LOCAL);
+ Symbol->setExternal(false);
+ Symbol->setOffset(Offset);
+ }
+
void EmitThumbFunc(MCSymbol *Func) override {
getAssembler().setIsThumbFunc(Func);
EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
@@ -599,10 +678,12 @@ private:
void EmitFixup(const MCExpr *Expr, MCFixupKind Kind);
bool IsThumb;
- int64_t MappingSymbolCounter;
+ int64_t MappingSymbolCounter = 0;
+
+ DenseMap<const MCSection *, std::unique_ptr<ElfMappingSymbolInfo>>
+ LastMappingSymbols;
- DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
- ElfMappingSymbol LastEMS;
+ std::unique_ptr<ElfMappingSymbolInfo> LastEMSInfo;
// ARM Exception Handling Frame Information
MCSymbol *ExTab;
@@ -618,6 +699,7 @@ private:
SmallVector<uint8_t, 64> Opcodes;
UnwindOpcodeAssembler UnwindOpAsm;
};
+
} // end anonymous namespace
ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
@@ -627,33 +709,42 @@ ARMELFStreamer &ARMTargetELFStreamer::getStreamer() {
void ARMTargetELFStreamer::emitFnStart() { getStreamer().emitFnStart(); }
void ARMTargetELFStreamer::emitFnEnd() { getStreamer().emitFnEnd(); }
void ARMTargetELFStreamer::emitCantUnwind() { getStreamer().emitCantUnwind(); }
+
void ARMTargetELFStreamer::emitPersonality(const MCSymbol *Personality) {
getStreamer().emitPersonality(Personality);
}
+
void ARMTargetELFStreamer::emitPersonalityIndex(unsigned Index) {
getStreamer().emitPersonalityIndex(Index);
}
+
void ARMTargetELFStreamer::emitHandlerData() {
getStreamer().emitHandlerData();
}
+
void ARMTargetELFStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
int64_t Offset) {
getStreamer().emitSetFP(FpReg, SpReg, Offset);
}
+
void ARMTargetELFStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
getStreamer().emitMovSP(Reg, Offset);
}
+
void ARMTargetELFStreamer::emitPad(int64_t Offset) {
getStreamer().emitPad(Offset);
}
+
void ARMTargetELFStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector) {
getStreamer().emitRegSave(RegList, isVector);
}
+
void ARMTargetELFStreamer::emitUnwindRaw(int64_t Offset,
const SmallVectorImpl<uint8_t> &Opcodes) {
getStreamer().emitUnwindRaw(Offset, Opcodes);
}
+
void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
assert(!Vendor.empty() && "Vendor cannot be empty.");
@@ -668,25 +759,31 @@ void ARMTargetELFStreamer::switchVendor(StringRef Vendor) {
CurrentVendor = Vendor;
}
+
void ARMTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitTextAttribute(unsigned Attribute,
StringRef Value) {
setAttributeItem(Attribute, Value, /* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {
setAttributeItems(Attribute, IntValue, StringValue,
/* OverwriteExisting= */ true);
}
+
void ARMTargetELFStreamer::emitArch(unsigned Value) {
Arch = Value;
}
+
void ARMTargetELFStreamer::emitObjectArch(unsigned Value) {
EmittedArch = Value;
}
+
void ARMTargetELFStreamer::emitArchDefaultAttributes() {
using namespace ARMBuildAttrs;
@@ -786,9 +883,11 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
break;
}
}
+
void ARMTargetELFStreamer::emitFPU(unsigned Value) {
FPU = Value;
}
+
void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
switch (FPU) {
case ARM::FK_VFP:
@@ -920,6 +1019,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
break;
}
}
+
size_t ARMTargetELFStreamer::calculateContentSize() const {
size_t Result = 0;
for (size_t i = 0; i < Contents.size(); ++i) {
@@ -944,6 +1044,7 @@ size_t ARMTargetELFStreamer::calculateContentSize() const {
}
return Result;
}
+
void ARMTargetELFStreamer::finishAttributeSection() {
// <format-version>
// [ <section-length> "vendor-name"
@@ -1093,9 +1194,9 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
const MCSymbolELF *Group = FnSection.getGroup();
if (Group)
Flags |= ELF::SHF_GROUP;
- MCSectionELF *EHSection =
- getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
- FnSection.getUniqueID(), nullptr, &FnSection);
+ MCSectionELF *EHSection = getContext().getELFSection(
+ EHSecName, Type, Flags, 0, Group, FnSection.getUniqueID(),
+ static_cast<const MCSymbolELF *>(&Fn));
assert(EHSection && "Failed to get the required EH section");
@@ -1114,6 +1215,7 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
SectionKind::getData(), FnStart);
}
+
void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
MCDataFragment *Frag = getOrCreateDataFragment();
Frag->getFixups().push_back(MCFixup::create(Frag->getContents().size(), Expr,
@@ -1396,8 +1498,6 @@ MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
return S;
- }
-
}
-
+} // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 559a4f8de75f..d9df2c6da7ec 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -11,22 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMFixupKinds.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
using namespace llvm;
@@ -36,9 +47,8 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
namespace {
+
class ARMMCCodeEmitter : public MCCodeEmitter {
- ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
- void operator=(const ARMMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
@@ -47,15 +57,18 @@ public:
ARMMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool IsLittle)
: MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
}
-
- ~ARMMCCodeEmitter() override {}
+ ARMMCCodeEmitter(const ARMMCCodeEmitter &) = delete;
+ ARMMCCodeEmitter &operator=(const ARMMCCodeEmitter &) = delete;
+ ~ARMMCCodeEmitter() override = default;
bool isThumb(const MCSubtargetInfo &STI) const {
return STI.getFeatureBits()[ARM::ModeThumb];
}
+
bool isThumb2(const MCSubtargetInfo &STI) const {
return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2];
}
+
bool isTargetMachO(const MCSubtargetInfo &STI) const {
const Triple &TT = STI.getTargetTriple();
return TT.isOSBinFormatMachO();
@@ -200,6 +213,7 @@ public:
case ARM_AM::ib: return 3;
}
}
+
/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
///
unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
@@ -273,7 +287,6 @@ public:
unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(Op);
// We expect MO to be an immediate or an expression,
@@ -432,18 +445,6 @@ public:
} // end anonymous namespace
-MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new ARMMCCodeEmitter(MCII, Ctx, true);
-}
-
-MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new ARMMCCodeEmitter(MCII, Ctx, false);
-}
-
/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
/// instructions, and rewrite them to their Thumb2 form if we are currently in
/// Thumb2 mode.
@@ -550,7 +551,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
bool ARMMCCodeEmitter::
EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
@@ -1515,7 +1516,7 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
uint32_t v = ~MO.getImm();
uint32_t lsb = countTrailingZeros(v);
uint32_t msb = (32 - countLeadingZeros (v)) - 1;
- assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+ assert(v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
return lsb | (msb << 5);
}
@@ -1700,3 +1701,15 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
#include "ARMGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, true);
+}
+
+MCCodeEmitter *llvm::createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new ARMMCCodeEmitter(MCII, Ctx, false);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 9e4d202321e6..477755157040 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -260,18 +260,37 @@ public:
return false;
int64_t Imm = Inst.getOperand(0).getImm();
- // FIXME: This is not right for thumb.
Target = Addr+Imm+8; // In ARM mode the PC is always off by 8 bytes.
return true;
}
};
+class ThumbMCInstrAnalysis : public ARMMCInstrAnalysis {
+public:
+ ThumbMCInstrAnalysis(const MCInstrInfo *Info) : ARMMCInstrAnalysis(Info) {}
+
+ bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const override {
+ // We only handle PCRel branches for now.
+ if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType!=MCOI::OPERAND_PCREL)
+ return false;
+
+ int64_t Imm = Inst.getOperand(0).getImm();
+ Target = Addr+Imm+4; // In Thumb mode the PC is always off by 4 bytes.
+ return true;
+ }
+};
+
}
static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
return new ARMMCInstrAnalysis(Info);
}
+static MCInstrAnalysis *createThumbMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new ThumbMCInstrAnalysis(Info);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeARMTargetMC() {
for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(),
@@ -289,9 +308,6 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCSubtargetInfo(*T,
ARM_MC::createARMMCSubtargetInfo);
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
-
TargetRegistry::RegisterELFStreamer(*T, createELFStreamer);
TargetRegistry::RegisterCOFFStreamer(*T, createARMWinCOFFStreamer);
TargetRegistry::RegisterMachOStreamer(*T, createARMMachOStreamer);
@@ -313,6 +329,12 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterMCRelocationInfo(*T, createARMMCRelocationInfo);
}
+ // Register the MC instruction analyzer.
+ for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget()})
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createARMMCInstrAnalysis);
+ for (Target *T : {&getTheThumbLETarget(), &getTheThumbBETarget()})
+ TargetRegistry::RegisterMCInstrAnalysis(*T, createThumbMCInstrAnalysis);
+
// Register the MC Code Emitter
for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()})
TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index 482bcf902518..34c770440e1b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachORelocationInfo.cpp ----------------------------------------===//
+//===- ARMMachORelocationInfo.cpp -----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "ARMMCExpr.h"
-#include "llvm-c/Disassembler.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm-c/Disassembler.h"
using namespace llvm;
-using namespace object;
namespace {
+
class ARMMachORelocationInfo : public MCRelocationInfo {
public:
ARMMachORelocationInfo(MCContext &Ctx) : MCRelocationInfo(Ctx) {}
@@ -35,7 +35,8 @@ public:
}
}
};
-} // End unnamed namespace
+
+} // end anonymous namespace
/// createARMMachORelocationInfo - Construct an ARM Mach-O RelocationInfo.
MCRelocationInfo *llvm::createARMMachORelocationInfo(MCContext &Ctx) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index c0d10c896354..73e563890dd9 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -10,20 +10,21 @@
// This file implements the ARMTargetStreamer class.
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/MapVector.h"
+
#include "llvm/MC/ConstantPools.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
+
//
// ARMTargetStreamer Implemenation
//
+
ARMTargetStreamer::ARMTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
-ARMTargetStreamer::~ARMTargetStreamer() {}
+ARMTargetStreamer::~ARMTargetStreamer() = default;
// The constant pool handling is shared by all ARMTargetStreamer
// implementations.
@@ -73,5 +74,4 @@ void ARMTargetStreamer::finishAttributeSection() {}
void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
void
ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
-
void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 173cc93d44fb..d3ab83bbccbc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -14,12 +14,14 @@
#include "ARMUnwindOpAsm.h"
#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
using namespace llvm;
namespace {
+
/// UnwindOpcodeStreamer - The simple wrapper over SmallVector to emit bytes
/// with MSB to LSB per uint32_t ordering. For example, the first byte will
/// be placed in Vec[3], and the following bytes will be placed in 2, 1, 0,
@@ -27,20 +29,19 @@ namespace {
class UnwindOpcodeStreamer {
private:
SmallVectorImpl<uint8_t> &Vec;
- size_t Pos;
+ size_t Pos = 3;
public:
- UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V), Pos(3) {
- }
+ UnwindOpcodeStreamer(SmallVectorImpl<uint8_t> &V) : Vec(V) {}
/// Emit the byte in MSB to LSB per uint32_t order.
- inline void EmitByte(uint8_t elem) {
+ void EmitByte(uint8_t elem) {
Vec[Pos] = elem;
Pos = (((Pos ^ 0x3u) + 1) ^ 0x3u);
}
/// Emit the size prefix.
- inline void EmitSize(size_t Size) {
+ void EmitSize(size_t Size) {
size_t SizeInWords = (Size + 3) / 4;
assert(SizeInWords <= 0x100u &&
"Only 256 additional words are allowed for unwind opcodes");
@@ -48,19 +49,20 @@ namespace {
}
/// Emit the personality index prefix.
- inline void EmitPersonalityIndex(unsigned PI) {
+ void EmitPersonalityIndex(unsigned PI) {
assert(PI < ARM::EHABI::NUM_PERSONALITY_INDEX &&
"Invalid personality prefix");
EmitByte(ARM::EHABI::EHT_COMPACT | PI);
}
/// Fill the rest of bytes with FINISH opcode.
- inline void FillFinishOpcode() {
+ void FillFinishOpcode() {
while (Pos < Vec.size())
EmitByte(ARM::EHABI::UNWIND_OPCODE_FINISH);
}
};
-}
+
+} // end anonymous namespace
void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
if (RegSave == 0u)
@@ -153,7 +155,6 @@ void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
void UnwindOpcodeAssembler::Finalize(unsigned &PersonalityIndex,
SmallVectorImpl<uint8_t> &Result) {
-
UnwindOpcodeStreamer OpStreamer(Result);
if (HasPersonality) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
index e0c113ecfaa3..a7bfbdf4938e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -16,8 +16,8 @@
#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ARMEHABI.h"
-#include "llvm/Support/DataTypes.h"
+#include <cstddef>
+#include <cstdint>
namespace llvm {
@@ -25,13 +25,12 @@ class MCSymbol;
class UnwindOpcodeAssembler {
private:
- llvm::SmallVector<uint8_t, 32> Ops;
- llvm::SmallVector<unsigned, 8> OpBegins;
- bool HasPersonality;
+ SmallVector<uint8_t, 32> Ops;
+ SmallVector<unsigned, 8> OpBegins;
+ bool HasPersonality = false;
public:
- UnwindOpcodeAssembler()
- : HasPersonality(0) {
+ UnwindOpcodeAssembler() {
OpBegins.push_back(0);
}
@@ -40,12 +39,12 @@ public:
Ops.clear();
OpBegins.clear();
OpBegins.push_back(0);
- HasPersonality = 0;
+ HasPersonality = false;
}
/// Set the personality
void setPersonality(const MCSymbol *Per) {
- HasPersonality = 1;
+ HasPersonality = true;
}
/// Emit unwind opcodes for .save directives
@@ -88,6 +87,6 @@ private:
}
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 166c04b41a77..7ae2f864d79d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -10,23 +10,28 @@
#include "MCTargetDesc/ARMFixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/COFF.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
using namespace llvm;
namespace {
+
class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
public:
ARMWinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
assert(!Is64Bit && "AArch64 support not yet implemented");
}
- ~ARMWinCOFFObjectWriter() override {}
+
+ ~ARMWinCOFFObjectWriter() override = default;
unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsCrossSection,
@@ -35,6 +40,8 @@ public:
bool recordRelocation(const MCFixup &) const override;
};
+} // end anonymous namespace
+
unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
@@ -79,13 +86,13 @@ unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
}
-}
namespace llvm {
+
MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit) {
MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
return createWinCOFFObjectWriter(MOTW, OS);
}
-}
+} // end namespace llvm
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 9953c61cd89c..fc083b98395b 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -11,14 +11,36 @@
//
//===----------------------------------------------------------------------===//
-#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "ThumbRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <iterator>
+#include <vector>
using namespace llvm;
@@ -238,9 +260,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP) {
FramePtrOffsetInBlock +=
MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
- .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
- .setMIFlags(MachineInstr::FrameSetup));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+ .addReg(ARM::SP)
+ .addImm(FramePtrOffsetInBlock / 4)
+ .setMIFlags(MachineInstr::FrameSetup)
+ .add(predOps(ARMCC::AL));
if(FramePtrOffsetInBlock) {
CFAOffset += FramePtrOffsetInBlock;
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
@@ -336,14 +360,19 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
// will be allocated after this, so we can still use the base pointer
// to reference locals.
if (RegInfo->hasBasePointer(MF))
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
- .addReg(ARM::SP));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
// If the frame has variable sized objects then the epilogue must restore
// the sp from fp. We can assume there's an FP here since hasFP already
// checks for hasVarSizedObjects.
if (MFI.hasVarSizedObjects())
AFI->setShouldRestoreSPFromFP(true);
+
+ // In some cases, virtual registers have been introduced, e.g. by uses of
+ // emitThumbRegPlusImmInReg.
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs);
}
static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
@@ -408,13 +437,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
"No scratch register to restore SP from FP!");
emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
TII, *RegInfo);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(ARM::R4));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(ARM::R4)
+ .add(predOps(ARMCC::AL));
} else
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
- ARM::SP)
- .addReg(FramePtr));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+ .addReg(FramePtr)
+ .add(predOps(ARMCC::AL));
} else {
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
&MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
@@ -493,12 +522,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
return true;
MachineInstrBuilder MIB =
- AddDefaultPred(
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET))
+ .add(predOps(ARMCC::AL));
// Copy implicit ops and popped registers, if any.
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
- MIB.addOperand(MO);
+ MIB.add(MO);
MIB.addReg(ARM::PC, RegState::Define);
// Erase the old instruction (tBX_RET or tPOP).
MBB.erase(MBBI);
@@ -566,22 +595,23 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
if (TemporaryReg) {
assert(!PopReg && "Unnecessary MOV is about to be inserted");
PopReg = PopFriendly.find_first();
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(TemporaryReg, RegState::Define)
- .addReg(PopReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(TemporaryReg, RegState::Define)
+ .addReg(PopReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
}
if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
// We couldn't use the direct restoration above, so
// perform the opposite conversion: tPOP_RET to tPOP.
MachineInstrBuilder MIB =
- AddDefaultPred(
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL));
bool Popped = false;
for (auto MO: MBBI->operands())
if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
MO.getReg() != ARM::PC) {
- MIB.addOperand(MO);
+ MIB.add(MO);
if (!MO.isImplicit())
Popped = true;
}
@@ -590,23 +620,27 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
MBB.erase(MIB.getInstr());
// Erase the old instruction.
MBB.erase(MBBI);
- MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+ MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET))
+ .add(predOps(ARMCC::AL));
}
assert(PopReg && "Do not know how to get LR");
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
.addReg(PopReg, RegState::Define);
emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(ARM::LR, RegState::Define)
- .addReg(PopReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(ARM::LR, RegState::Define)
+ .addReg(PopReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
if (TemporaryReg)
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
- .addReg(PopReg, RegState::Define)
- .addReg(TemporaryReg, RegState::Kill));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+ .addReg(PopReg, RegState::Define)
+ .addReg(TemporaryReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
return true;
}
@@ -667,8 +701,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Push the low registers and lr
if (!LoRegsToSave.empty()) {
- MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
if (LoRegsToSave.count(Reg)) {
bool isKill = !MF.getRegInfo().isLiveIn(Reg);
@@ -708,8 +742,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
// Create the PUSH, but don't insert it yet (the MOVs need to come first).
- MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH));
- AddDefaultPred(PushMIB);
+ MachineInstrBuilder PushMIB =
+ BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
SmallVector<unsigned, 4> RegsToPush;
while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
@@ -719,11 +753,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MBB.addLiveIn(*HiRegToSave);
// Emit a MOV from the high reg to the low reg.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
- MIB.addReg(*CopyReg, RegState::Define);
- MIB.addReg(*HiRegToSave, getKillRegState(isKill));
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(*CopyReg, RegState::Define)
+ .addReg(*HiRegToSave, getKillRegState(isKill))
+ .add(predOps(ARMCC::AL));
// Record the register that must be added to the PUSH.
RegsToPush.push_back(*CopyReg);
@@ -735,7 +768,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
}
// Add the low registers to the PUSH, in ascending order.
- for (unsigned Reg : reverse(RegsToPush))
+ for (unsigned Reg : llvm::reverse(RegsToPush))
PushMIB.addReg(Reg, RegState::Kill);
// Insert the PUSH instruction after the MOVs.
@@ -817,19 +850,18 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
// Create the POP instruction.
- MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP));
- AddDefaultPred(PopMIB);
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
// Add the low register to the POP.
PopMIB.addReg(*CopyReg, RegState::Define);
// Create the MOV from low to high register.
- MachineInstrBuilder MIB =
- BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
- MIB.addReg(*HiRegToRestore, RegState::Define);
- MIB.addReg(*CopyReg, RegState::Kill);
- AddDefaultPred(MIB);
+ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr))
+ .addReg(*HiRegToRestore, RegState::Define)
+ .addReg(*CopyReg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
HiRegToRestore =
@@ -837,11 +869,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
}
}
-
-
-
- MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MF, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
bool NeedsPop = false;
for (unsigned i = CSI.size(); i != 0; --i) {
@@ -859,6 +888,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// ARMv4T requires BX, see emitEpilogue
if (!STI.hasV5TOps())
continue;
+ // Tailcall optimization failed; change TCRETURN to a tBL
+ if (MI->getOpcode() == ARM::TCRETURNdi ||
+ MI->getOpcode() == ARM::TCRETURNri) {
+ unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi
+ ? ARM::tBL : ARM::tBLXr;
+ MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode));
+ BL.add(predOps(ARMCC::AL));
+ BL.add(MI->getOperand(0));
+ MBB.insert(MI, &*BL);
+ }
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
if (MI != MBB.end())
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 4b4fbaab28d9..27bff4d75acf 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -50,20 +50,29 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg)
|| !ARM::tGPRRegClass.contains(DestReg))
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
else {
- // FIXME: The performance consequences of this are going to be atrocious.
- // Some things to try that should be better:
- // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11
- // * 'movs $dst, $src' if cpsr isn't live
- // See: http://lists.llvm.org/pipermail/llvm-dev/2014-August/075998.html
+ // FIXME: Can also use 'mov hi, $src; mov $dst, hi',
+ // with hi as either r10 or r11.
+
+ const TargetRegisterInfo *RegInfo = st.getRegisterInfo();
+ if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I)
+ == MachineBasicBlock::LQR_Dead) {
+ BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ ->addRegisterDead(ARM::CPSR, RegInfo);
+ return;
+ }
// 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH)))
- .addReg(SrcReg, getKillRegState(KillSrc));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP)))
- .addReg(DestReg, getDefRegState(true));
+ BuildMI(MBB, I, DL, get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, I, DL, get(ARM::tPOP))
+ .add(predOps(ARMCC::AL))
+ .addReg(DestReg, getDefRegState(true));
}
}
@@ -87,9 +96,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::tSTRspi))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
}
@@ -113,8 +125,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
}
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d01fc8c40ddf..04bdd91b53e6 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -9,13 +9,26 @@
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include <cassert>
+#include <new>
+
using namespace llvm;
#define DEBUG_TYPE "thumb2-it"
@@ -24,16 +37,18 @@ STATISTIC(NumITs, "Number of IT blocks inserted");
STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
+
class Thumb2ITBlockPass : public MachineFunctionPass {
public:
static char ID;
- Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
bool restrictIT;
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
bool runOnMachineFunction(MachineFunction &Fn) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -52,8 +67,10 @@ namespace {
SmallSet<unsigned, 4> &Uses);
bool InsertITInstructions(MachineBasicBlock &MBB);
};
+
char Thumb2ITBlockPass::ID = 0;
-}
+
+} // end anonymous namespace
/// TrackDefUses - Tracking what registers are being defined and used by
/// instructions in the IT block. This also tracks "dependencies", i.e. uses
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 1c731d669eda..818ba85c7d40 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -117,8 +117,9 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc)));
+ BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .add(predOps(ARMCC::AL));
}
void Thumb2InstrInfo::
@@ -138,9 +139,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
return;
}
@@ -156,8 +160,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
- MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
return;
}
@@ -180,8 +183,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
return;
}
@@ -198,8 +204,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
- MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
- AddDefaultPred(MIB);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO).add(predOps(ARMCC::AL));
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
@@ -259,10 +264,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (Fits) {
if (isSub) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
- .addReg(BaseReg)
- .addReg(DestReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
} else {
// Here we know that DestReg is not SP but we do not
// know anything about BaseReg. t2ADDrr is an invalid
@@ -270,10 +276,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
// is fine if SP is the first argument. To be sure we
// do not generate invalid encoding, put BaseReg first.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
- .addReg(BaseReg)
- .addReg(DestReg, RegState::Kill)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(BaseReg)
+ .addReg(DestReg, RegState::Kill)
+ .add(predOps(Pred, PredReg))
+ .add(condCodeOp())
+ .setMIFlags(MIFlags);
}
return;
}
@@ -284,8 +291,10 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
unsigned Opc = 0;
if (DestReg == ARM::SP && BaseReg != ARM::SP) {
// mov sp, rn. Note t2MOVr cannot be used.
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
- .addReg(BaseReg).setMIFlags(MIFlags));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+ .addReg(BaseReg)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
BaseReg = ARM::SP;
continue;
}
@@ -296,8 +305,11 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags));
+ BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg)
+ .addImm(ThisVal / 4)
+ .setMIFlags(MIFlags)
+ .add(predOps(ARMCC::AL));
NumBytes = 0;
continue;
}
@@ -334,12 +346,13 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
}
// Build the new ADD / SUB.
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
- .addReg(BaseReg, RegState::Kill)
- .addImm(ThisVal)).setMIFlags(MIFlags);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+ .addReg(BaseReg, RegState::Kill)
+ .addImm(ThisVal)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
if (HasCCOut)
- AddDefaultCC(MIB);
+ MIB.add(condCodeOp());
BaseReg = DestReg;
}
@@ -474,7 +487,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
do MI.RemoveOperand(FrameRegIdx+1);
while (MI.getNumOperands() > FrameRegIdx+1);
MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
return true;
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 8208e7e24770..c90475c28db7 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -10,20 +10,38 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h" // To access Function attributes
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "t2-reduce-size"
@@ -40,6 +58,7 @@ static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
cl::init(-1), cl::Hidden);
namespace {
+
/// ReduceTable - A static table with information on mapping from wide
/// opcodes to narrow
struct ReduceEntry {
@@ -139,11 +158,12 @@ namespace {
class Thumb2SizeReduce : public MachineFunctionPass {
public:
static char ID;
- Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
const Thumb2InstrInfo *TII;
const ARMSubtarget *STI;
+ Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
+
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
@@ -201,19 +221,21 @@ namespace {
struct MBBInfo {
// The flags leaving this block have high latency.
- bool HighLatencyCPSR;
+ bool HighLatencyCPSR = false;
// Has this block been visited yet?
- bool Visited;
+ bool Visited = false;
- MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+ MBBInfo() = default;
};
SmallVector<MBBInfo, 8> BlockInfo;
std::function<bool(const Function &)> PredicateFtor;
};
+
char Thumb2SizeReduce::ID = 0;
-}
+
+} // end anonymous namespace
Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
: MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
@@ -490,14 +512,13 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
isLdStMul = true;
break;
}
- case ARM::t2STMIA: {
+ case ARM::t2STMIA:
// If the base register is killed, we don't care what its value is after the
// instruction, so we can use an updating STMIA.
if (!MI->getOperand(0).isKill())
return false;
break;
- }
case ARM::t2LDMIA_RET: {
unsigned BaseReg = MI->getOperand(1).getReg();
if (BaseReg != ARM::SP)
@@ -562,8 +583,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
if (!isLdStMul) {
- MIB.addOperand(MI->getOperand(0));
- MIB.addOperand(MI->getOperand(1));
+ MIB.add(MI->getOperand(0));
+ MIB.add(MI->getOperand(1));
if (HasImmOffset)
MIB.addImm(OffsetImm / Scale);
@@ -577,7 +598,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// Transfer the rest of operands.
for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.addOperand(MI->getOperand(OpNum));
+ MIB.add(MI->getOperand(OpNum));
// Transfer memoperands.
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -621,12 +642,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
return false;
- MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
- TII->get(ARM::tADDrSPi))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
- AddDefaultPred(MIB);
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(ARM::tADDrSPi))
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
+ .add(predOps(ARMCC::AL));
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
@@ -652,11 +674,10 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
switch (Opc) {
default: break;
- case ARM::t2ADDSri: {
+ case ARM::t2ADDSri:
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
LLVM_FALLTHROUGH;
- }
case ARM::t2ADDSrr:
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
@@ -698,7 +719,6 @@ bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
bool LiveCPSR, bool IsSelfLoop) {
-
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
@@ -785,13 +805,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.addOperand(MI->getOperand(0));
- if (NewMCID.hasOptionalDef()) {
- if (HasCC)
- AddDefaultT1CC(MIB, CCDead);
- else
- AddNoT1CC(MIB);
- }
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
@@ -800,7 +816,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
continue;
if (SkipPred && MCID.OpInfo[i].isPredicate())
continue;
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
}
// Transfer MI flags.
@@ -880,13 +896,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Add the 16-bit instruction.
DebugLoc dl = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
- MIB.addOperand(MI->getOperand(0));
- if (NewMCID.hasOptionalDef()) {
- if (HasCC)
- AddDefaultT1CC(MIB, CCDead);
- else
- AddNoT1CC(MIB);
- }
+ MIB.add(MI->getOperand(0));
+ if (NewMCID.hasOptionalDef())
+ MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
// Transfer the rest of operands.
unsigned NumOps = MCID.getNumOperands();
@@ -909,10 +921,10 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Skip implicit def of CPSR. Either it's modeled as an optional
// def now or it's already an implicit def on the new instruction.
continue;
- MIB.addOperand(MO);
+ MIB.add(MO);
}
if (!MCID.isPredicable() && NewMCID.isPredicable())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
// Transfer MI flags.
MIB.setMIFlags(MI->getFlags());
diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp
index 2efd63b84a2c..15a567523336 100644
--- a/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -93,9 +93,10 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB,
unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
- .addReg(DestReg, getDefRegState(true), SubIdx)
- .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
- .setMIFlags(MIFlags);
+ .addReg(DestReg, getDefRegState(true), SubIdx)
+ .addConstantPoolIndex(Idx)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
}
/// emitLoadConstPool - Emits a load from constpool to materialize the
@@ -145,14 +146,17 @@ static void emitThumbRegPlusImmInReg(
LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass);
if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)
+ .add(t1CondCodeOp())
.addImm(NumBytes)
.setMIFlags(MIFlags);
} else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) {
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)
+ .add(t1CondCodeOp())
.addImm(NumBytes)
.setMIFlags(MIFlags);
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)
+ .add(t1CondCodeOp())
.addReg(LdReg, RegState::Kill)
.setMIFlags(MIFlags);
} else if (ST.genExecuteOnly()) {
@@ -167,12 +171,12 @@ static void emitThumbRegPlusImmInReg(
: ((isHigh || !CanChangeCC) ? ARM::tADDhirr : ARM::tADDrr);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
if (Opc != ARM::tADDhirr)
- MIB = AddDefaultT1CC(MIB);
+ MIB = MIB.add(t1CondCodeOp());
if (DestReg == ARM::SP || isSub)
MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
else
MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
@@ -307,12 +311,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg);
if (CopyNeedsCC)
- MIB = AddDefaultT1CC(MIB);
+ MIB = MIB.add(t1CondCodeOp());
MIB.addReg(BaseReg, RegState::Kill);
if (CopyOpc != ARM::tMOVr) {
MIB.addImm(CopyImm);
}
- AddDefaultPred(MIB.setMIFlags(MIFlags));
+ MIB.setMIFlags(MIFlags).add(predOps(ARMCC::AL));
BaseReg = DestReg;
}
@@ -324,10 +328,11 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg);
if (ExtraNeedsCC)
- MIB = AddDefaultT1CC(MIB);
- MIB.addReg(BaseReg).addImm(ExtraImm);
- MIB = AddDefaultPred(MIB);
- MIB.setMIFlags(MIFlags);
+ MIB = MIB.add(t1CondCodeOp());
+ MIB.addReg(BaseReg)
+ .addImm(ExtraImm)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
}
}
@@ -460,9 +465,10 @@ bool ThumbRegisterInfo::saveScavengerRegister(
// a call clobbered register that we know won't be used in Thumb1 mode.
const TargetInstrInfo &TII = *STI.getInstrInfo();
DebugLoc DL;
- AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
- .addReg(ARM::R12, RegState::Define)
- .addReg(Reg, RegState::Kill));
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+ .addReg(ARM::R12, RegState::Define)
+ .addReg(Reg, RegState::Kill)
+ .add(predOps(ARMCC::AL));
// The UseMI is where we would like to restore the register. If there's
// interference with R12 before then, however, we'll need to restore it
@@ -490,8 +496,10 @@ bool ThumbRegisterInfo::saveScavengerRegister(
}
}
// Restore the register from R12
- AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
- addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr))
+ .addReg(Reg, RegState::Define)
+ .addReg(ARM::R12, RegState::Kill)
+ .add(predOps(ARMCC::AL));
return true;
}
@@ -621,5 +629,5 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Add predicate back if it's needed.
if (MI.isPredicable())
- AddDefaultPred(MIB);
+ MIB.add(predOps(ARMCC::AL));
}
diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp
index 4afdd3a0ec08..50bb50b44f27 100644
--- a/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -130,7 +130,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
}
}
- printOperand(MI, OpNum, O);
+ if (Error)
+ printOperand(MI, OpNum, O);
return false;
}
diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 1b2f2cec0bca..13080a5d72f0 100644
--- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -509,8 +509,8 @@ bool AVRExpandPseudo::expand<AVR::LDIWRdK>(Block &MBB, BlockIt MBBI) {
const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
unsigned TF = MI.getOperand(1).getTargetFlags();
- MIBLO.addOperand(MachineOperand::CreateBA(BA, TF | AVRII::MO_LO));
- MIBHI.addOperand(MachineOperand::CreateBA(BA, TF | AVRII::MO_HI));
+ MIBLO.add(MachineOperand::CreateBA(BA, TF | AVRII::MO_LO));
+ MIBHI.add(MachineOperand::CreateBA(BA, TF | AVRII::MO_HI));
break;
}
case MachineOperand::MO_Immediate: {
@@ -785,9 +785,8 @@ bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
auto Op1 = MI.getOperand(0);
auto Op2 = MI.getOperand(1);
- MachineInstr &NewInst = *buildMI(MBB, MBBI, Opcode)
- .addOperand(Op1).addOperand(Op2)
- .getInstr();
+ MachineInstr &NewInst =
+ *buildMI(MBB, MBBI, Opcode).add(Op1).add(Op2).getInstr();
f(NewInst);
});
}
@@ -810,15 +809,13 @@ bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
// Create the load
- buildMI(MBB, MBBI, LoadOpcode).addOperand(Op1).addOperand(Op2);
+ buildMI(MBB, MBBI, LoadOpcode).add(Op1).add(Op2);
// Create the arithmetic op
- buildMI(MBB, MBBI, ArithOpcode)
- .addOperand(Op1).addOperand(Op1)
- .addOperand(Op2);
+ buildMI(MBB, MBBI, ArithOpcode).add(Op1).add(Op1).add(Op2);
// Create the store
- buildMI(MBB, MBBI, StoreOpcode).addOperand(Op2).addOperand(Op1);
+ buildMI(MBB, MBBI, StoreOpcode).add(Op2).add(Op1);
});
}
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index 07fc3f6890b8..0b95d3819399 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -48,6 +48,8 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
@@ -311,7 +313,7 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
unsigned Opcode = Op->getOpcode();
assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
"Invalid opcode for Div/Rem lowering");
- bool isSigned = (Opcode == ISD::SDIVREM);
+ bool IsSigned = (Opcode == ISD::SDIVREM);
EVT VT = Op->getValueType(0);
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
@@ -320,16 +322,16 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
default:
llvm_unreachable("Unexpected request for libcall!");
case MVT::i8:
- LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
+ LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
break;
case MVT::i16:
- LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
+ LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
break;
case MVT::i32:
- LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
+ LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
break;
case MVT::i64:
- LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
+ LC = IsSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
break;
}
@@ -340,8 +342,8 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
for (SDValue const &Value : Op->op_values()) {
Entry.Node = Value;
Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.isSExt = isSigned;
- Entry.isZExt = !isSigned;
+ Entry.IsSExt = IsSigned;
+ Entry.IsZExt = !IsSigned;
Args.push_back(Entry);
}
@@ -354,10 +356,10 @@ SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(InChain)
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+ .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setInRegister()
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned);
+ .setSExtResult(IsSigned)
+ .setZExtResult(!IsSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
@@ -932,6 +934,12 @@ static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI,
bool UsesStack = false;
for (unsigned i = 0, pos = 0, e = Args.size(); i != e; ++i) {
unsigned Size = Args[i];
+
+ // If we have a zero-sized argument, don't attempt to lower it.
+ // AVR-GCC does not support zero-sized arguments and so we need not
+ // worry about ABI compatibility.
+ if (Size == 0) continue;
+
MVT LocVT = (IsCall) ? (*Outs)[pos].VT : (*Ins)[pos].VT;
// If we have plenty of regs to pass the whole argument do it.
@@ -1373,7 +1381,7 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Don't emit the ret/reti instruction when the naked attribute is present in
// the function being compiled.
if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::Naked)) {
+ AttributeList::FunctionIndex, Attribute::Naked)) {
return Chain;
}
@@ -1975,4 +1983,3 @@ unsigned AVRTargetLowering::getRegisterByName(const char *RegName,
}
} // end of namespace llvm
-
diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td
index bc66379ab708..693d80a1c06f 100644
--- a/lib/Target/AVR/AVRInstrInfo.td
+++ b/lib/Target/AVR/AVRInstrInfo.td
@@ -694,7 +694,7 @@ Defs = [SREG] in
}
//===----------------------------------------------------------------------===//
-// One's/Two's Compliment
+// One's/Two's Complement
//===----------------------------------------------------------------------===//
let Constraints = "$src = $rd",
Defs = [SREG] in
@@ -1718,7 +1718,7 @@ Defs = [SREG] in
(implicit SREG)]>;
// CBR Rd, K
- // Alias for `ANDI Rd, COM(K)` where COM(K) is the compliment of K.
+ // Alias for `ANDI Rd, COM(K)` where COM(K) is the complement of K.
// FIXME: This uses the 'complement' encoder. We need it to also use the
// imm_ldi8 encoder. This will cause no fixups to be created on this instruction.
def CBRRdK : FRdK<0b0111,
diff --git a/lib/Target/AVR/AVRInstrumentFunctions.cpp b/lib/Target/AVR/AVRInstrumentFunctions.cpp
index 5553dc2da31b..e7fca74e1701 100644
--- a/lib/Target/AVR/AVRInstrumentFunctions.cpp
+++ b/lib/Target/AVR/AVRInstrumentFunctions.cpp
@@ -96,7 +96,7 @@ static void BuildSignatureCall(StringRef SymName, BasicBlock &BB, Function &F) {
Value *FunctionName = CreateStringPtr(BB, F.getName());
Value *Args[] = {FunctionName,
- ConstantInt::get(I16, F.getArgumentList().size())};
+ ConstantInt::get(I16, F.arg_size())};
CallInst::Create(Fn, Args, "", &BB);
}
diff --git a/lib/Target/AVR/AVRMCInstLower.cpp b/lib/Target/AVR/AVRMCInstLower.cpp
index 342fe558813a..475dda420e89 100644
--- a/lib/Target/AVR/AVRMCInstLower.cpp
+++ b/lib/Target/AVR/AVRMCInstLower.cpp
@@ -56,7 +56,7 @@ void AVRMCInstLower::lowerInstruction(const MachineInstr &MI, MCInst &OutMI) con
switch (MO.getType()) {
default:
- MI.dump();
+ MI.print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 081d8b5740ef..5c3b45ac2328 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -335,7 +335,7 @@ MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
if (Value == 0)
return; // Doesn't change encoding.
diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 7ff4b8f350f6..f2be2494684a 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -41,7 +41,7 @@ public:
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 481de320b22f..713754821005 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -1,5 +1,7 @@
#include "AVRELFStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index cca3bcc4968a..9f2ee8cf8035 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -23,6 +23,7 @@ AVRMCAsmInfo::AVRMCAsmInfo(const Triple &TT) {
CommentString = ";";
PrivateGlobalPrefix = ".L";
UsesELFSectionDirectiveForBSS = true;
+ UseIntegratedAssembler = true;
}
} // end of namespace llvm
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index e6dc8868c705..c3d43ebb407e 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "mccodeemitter"
diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 5fa425c296a5..4cee8d904c9d 100644
--- a/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -63,7 +63,7 @@ private:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// Takes the compliment of a number (~0 - val).
+ /// Takes the complement of a number (~0 - val).
unsigned encodeComplement(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index 12091449cc11..279cdb1a89b4 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -71,7 +71,7 @@ bool BPFDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
// Addresses of the form Addr+const or Addr|const
if (CurDAG->isBaseWithConstantOffset(Addr)) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<32>(CN->getSExtValue())) {
+ if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN =
@@ -99,7 +99,7 @@ bool BPFDAGToDAGISel::SelectFIAddr(SDValue Addr, SDValue &Base, SDValue &Offset)
// Addresses of the form Addr+const or Addr|const
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
- if (isInt<32>(CN->getSExtValue())) {
+ if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN =
@@ -138,7 +138,7 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
else
errs() << "Error: ";
errs() << "Unsupport signed division for DAG: ";
- Node->dump(CurDAG);
+ Node->print(errs(), CurDAG);
errs() << "Please convert to unsigned div/mod.\n";
break;
}
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index cca3492a1992..b9b3dff95c0a 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
#define DEBUG_TYPE "bpf-lower"
-static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
+static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
MachineFunction &MF = DAG.getMachineFunction();
DAG.getContext()->diagnose(
DiagnosticInfoUnsupported(*MF.getFunction(), Msg, DL.getDebugLoc()));
@@ -306,11 +306,23 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ auto GV = G->getGlobal();
+ fail(CLI.DL, DAG,
+ "A call to global function '" + StringRef(GV->getName())
+ + "' is not supported. "
+ + (GV->isDeclaration() ?
+ "Only calls to predefined BPF helpers are allowed." :
+ "Please use __attribute__((always_inline) to make sure"
+ " this function is inlined."));
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
G->getOffset(), 0);
- else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+ } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
+ fail(CLI.DL, DAG, Twine("A call to built-in function '"
+ + StringRef(E->getSymbol())
+ + "' is not supported."));
+ }
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index a7910dea98de..93ee24371c4d 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -470,6 +470,7 @@ def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),
// Calls
def : Pat<(BPFcall tglobaladdr:$dst), (JAL tglobaladdr:$dst)>;
+def : Pat<(BPFcall texternalsym:$dst), (JAL texternalsym:$dst)>;
def : Pat<(BPFcall imm:$dst), (JAL imm:$dst)>;
// Loads
diff --git a/lib/Target/BPF/BPFMCInstLower.cpp b/lib/Target/BPF/BPFMCInstLower.cpp
index f64defecf3cc..c8528e867310 100644
--- a/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/lib/Target/BPF/BPFMCInstLower.cpp
@@ -29,6 +29,11 @@ BPFMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
return Printer.getSymbol(MO.getGlobal());
}
+MCSymbol *
+BPFMCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
@@ -49,7 +54,7 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
@@ -66,6 +71,9 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
case MachineOperand::MO_RegisterMask:
continue;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ break;
case MachineOperand::MO_GlobalAddress:
MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
break;
diff --git a/lib/Target/BPF/BPFMCInstLower.h b/lib/Target/BPF/BPFMCInstLower.h
index 054e89407db2..eac811f4cf88 100644
--- a/lib/Target/BPF/BPFMCInstLower.h
+++ b/lib/Target/BPF/BPFMCInstLower.h
@@ -37,6 +37,7 @@ public:
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
};
}
diff --git a/lib/Target/BPF/BPFRegisterInfo.cpp b/lib/Target/BPF/BPFRegisterInfo.cpp
index 71846e3e92c9..7925bee9c587 100644
--- a/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
#define GET_REGINFO_TARGET_DESC
#include "BPFGenRegisterInfo.inc"
@@ -41,6 +42,18 @@ BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+static void WarnSize(int Offset, MachineFunction &MF, DebugLoc& DL)
+{
+ if (Offset <= -512) {
+ auto F = MF.getFunction();
+ DiagnosticInfoUnsupported DiagStackSize(*F,
+ "Looks like the BPF stack limit of 512 bytes is exceeded. "
+ "Please move large on stack variables into BPF per-cpu array map.\n",
+ DL);
+ F->getContext().diagnose(DiagStackSize);
+ }
+}
+
void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -48,9 +61,18 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned i = 0;
MachineInstr &MI = *II;
- MachineFunction &MF = *MI.getParent()->getParent();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
DebugLoc DL = MI.getDebugLoc();
+ if (!DL)
+ /* try harder to get some debug loc */
+ for (auto &I : MBB)
+ if (I.getDebugLoc()) {
+ DL = I.getDebugLoc();
+ break;
+ }
+
while (!MI.getOperand(i).isFI()) {
++i;
assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
@@ -59,11 +81,11 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned FrameReg = getFrameRegister(MF);
int FrameIndex = MI.getOperand(i).getIndex();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineBasicBlock &MBB = *MI.getParent();
if (MI.getOpcode() == BPF::MOV_rr) {
int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex);
+ WarnSize(Offset, MF, DL);
MI.getOperand(i).ChangeToRegister(FrameReg, false);
unsigned reg = MI.getOperand(i - 1).getReg();
BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
@@ -78,6 +100,8 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!isInt<32>(Offset))
llvm_unreachable("bug in frame offset");
+ WarnSize(Offset, MF, DL);
+
if (MI.getOpcode() == BPF::FI_ri) {
// architecture does not really support FI_ri, replace it with
// MOV_rr <target_reg>, frame_reg
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index afc321ea2c34..1f355171ebd3 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -28,7 +28,7 @@ public:
~BPFAsmBackend() override = default;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -62,8 +62,8 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
}
void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ unsigned DataSize, uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const {
if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
assert(Value == 0);
} else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 044db10fb3fa..1e6abfacb792 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -17,3 +17,9 @@ foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
endforeach()
+
+# Currently we do not allow libraries from lib to reference targets directly.
+# This property is used to enforce that convention. It is important because the
+# logic in llvm_map_components_to_libnames is order dependent on the target
+# libraries being created.
+set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On)
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index becc086c81b0..4bbc36a86e5b 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -63,21 +63,25 @@ using namespace llvm;
static cl::opt<bool> EnableFutureRegs("mfuture-regs",
cl::desc("Enable future registers"));
-static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis",
-cl::desc("Warn for missing parenthesis around predicate registers"),
-cl::init(true));
-static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis",
-cl::desc("Error for missing parenthesis around predicate registers"),
-cl::init(false));
-static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch",
-cl::desc("Warn for mismatching a signed and unsigned value"),
-cl::init(true));
-static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register",
-cl::desc("Warn for register names that arent contigious"),
-cl::init(true));
-static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
-cl::desc("Error for register names that aren't contigious"),
-cl::init(false));
+static cl::opt<bool> WarnMissingParenthesis(
+ "mwarn-missing-parenthesis",
+ cl::desc("Warn for missing parenthesis around predicate registers"),
+ cl::init(true));
+static cl::opt<bool> ErrorMissingParenthesis(
+ "merror-missing-parenthesis",
+ cl::desc("Error for missing parenthesis around predicate registers"),
+ cl::init(false));
+static cl::opt<bool> WarnSignedMismatch(
+ "mwarn-sign-mismatch",
+ cl::desc("Warn for mismatching a signed and unsigned value"),
+ cl::init(true));
+static cl::opt<bool> WarnNoncontigiousRegister(
+ "mwarn-noncontigious-register",
+ cl::desc("Warn for register names that arent contigious"), cl::init(true));
+static cl::opt<bool> ErrorNoncontigiousRegister(
+ "merror-noncontigious-register",
+ cl::desc("Error for register names that aren't contigious"),
+ cl::init(false));
namespace {
@@ -123,9 +127,11 @@ class HexagonAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
- uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
bool OutOfRange(SMLoc IDLoc, long long Val, long long Max);
int processInstruction(MCInst &Inst, OperandVector const &Operands,
SMLoc IDLoc);
@@ -168,11 +174,10 @@ public:
bool parseInstruction(OperandVector &Operands);
bool implicitExpressionLocation(OperandVector &Operands);
bool parseExpressionOrOperand(OperandVector &Operands);
- bool parseExpression(MCExpr const *& Expr);
+ bool parseExpression(MCExpr const *&Expr);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc, OperandVector &Operands) override
- {
+ SMLoc NameLoc, OperandVector &Operands) override {
llvm_unreachable("Unimplemented");
}
@@ -289,45 +294,63 @@ public:
return false;
}
- bool isf32Ext() const { return false; }
- bool iss32_0Imm() const { return CheckImmRange(32, 0, true, true, false); }
+ bool isa30_2Imm() const { return CheckImmRange(30, 2, true, true, true); }
+ bool isb30_2Imm() const { return CheckImmRange(30, 2, true, true, true); }
+ bool isb15_2Imm() const { return CheckImmRange(15, 2, true, true, false); }
+ bool isb13_2Imm() const { return CheckImmRange(13, 2, true, true, false); }
+
+ bool ism32_0Imm() const { return true; }
+
+ bool isf32Imm() const { return false; }
+ bool isf64Imm() const { return false; }
+ bool iss32_0Imm() const { return true; }
+ bool iss31_1Imm() const { return true; }
+ bool iss30_2Imm() const { return true; }
+ bool iss29_3Imm() const { return true; }
bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); }
+ bool iss10_0Imm() const { return CheckImmRange(10, 0, true, false, false); }
+ bool iss10_6Imm() const { return CheckImmRange(10, 6, true, false, false); }
+ bool iss9_0Imm() const { return CheckImmRange(9, 0, true, false, false); }
bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); }
bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); }
bool iss7_0Imm() const { return CheckImmRange(7, 0, true, false, false); }
bool iss6_0Imm() const { return CheckImmRange(6, 0, true, false, false); }
+ bool iss6_3Imm() const { return CheckImmRange(6, 3, true, false, false); }
bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
- bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
- bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
bool iss3_0Imm() const { return CheckImmRange(3, 0, true, false, false); }
bool isu64_0Imm() const { return CheckImmRange(64, 0, false, true, true); }
- bool isu32_0Imm() const { return CheckImmRange(32, 0, false, true, false); }
+ bool isu32_0Imm() const { return true; }
+ bool isu31_1Imm() const { return true; }
+ bool isu30_2Imm() const { return true; }
+ bool isu29_3Imm() const { return true; }
bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
- bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
- bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
- bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
bool isu10_0Imm() const { return CheckImmRange(10, 0, false, false, false); }
bool isu9_0Imm() const { return CheckImmRange(9, 0, false, false, false); }
bool isu8_0Imm() const { return CheckImmRange(8, 0, false, false, false); }
bool isu7_0Imm() const { return CheckImmRange(7, 0, false, false, false); }
bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+ bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
+ bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
+ bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
bool isu5_0Imm() const { return CheckImmRange(5, 0, false, false, false); }
+ bool isu5_2Imm() const { return CheckImmRange(5, 2, false, false, false); }
+ bool isu5_3Imm() const { return CheckImmRange(5, 3, false, false, false); }
bool isu4_0Imm() const { return CheckImmRange(4, 0, false, false, false); }
+ bool isu4_2Imm() const { return CheckImmRange(4, 2, false, false, false); }
bool isu3_0Imm() const { return CheckImmRange(3, 0, false, false, false); }
+ bool isu3_1Imm() const { return CheckImmRange(3, 1, false, false, false); }
bool isu2_0Imm() const { return CheckImmRange(2, 0, false, false, false); }
bool isu1_0Imm() const { return CheckImmRange(1, 0, false, false, false); }
- bool ism6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
- bool isn8_0Imm() const { return CheckImmRange(8, 0, false, false, false); }
bool isn1Const() const {
if (!isImm())
return false;
@@ -336,35 +359,18 @@ public:
return false;
return Value == -1;
}
-
- bool iss16_0Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
- bool iss12_0Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
- bool iss10_0Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
- bool iss9_0Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
- bool iss8_0Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
- bool iss7_0Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
- bool iss6_0Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
- bool iss11_0Ext() const {
+ bool iss11_0Imm() const {
return CheckImmRange(11 + 26, 0, true, true, true);
}
- bool iss11_1Ext() const {
+ bool iss11_1Imm() const {
return CheckImmRange(11 + 26, 1, true, true, true);
}
- bool iss11_2Ext() const {
+ bool iss11_2Imm() const {
return CheckImmRange(11 + 26, 2, true, true, true);
}
- bool iss11_3Ext() const {
+ bool iss11_3Imm() const {
return CheckImmRange(11 + 26, 3, true, true, true);
}
-
- bool isu7_0Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
- bool isu8_0Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
- bool isu9_0Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
- bool isu10_0Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
- bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
- bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
- bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
- bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
bool isu32_0MustExt() const { return isImm(); }
void addRegOperands(MCInst &Inst, unsigned N) const {
@@ -392,188 +398,10 @@ public:
Inst.addOperand(MCOperand::createExpr(Expr));
}
- void addf32ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void adds32_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds23_2ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds8_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds8_0Imm64Operands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds6_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_1ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_2ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds3_0ImmOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
-
- void addu64_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu32_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_1ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_2ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu16_3ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu10_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu9_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu8_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu7_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_1ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_2ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu5_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu4_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu3_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu2_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu1_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void addm6_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addn8_0ImmOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void adds16_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds12_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds10_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds9_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds8_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds6_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_1ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_2ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
- void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
- addSignedImmOperands(Inst, N);
- }
void addn1ConstOperands(MCInst &Inst, unsigned N) const {
addImmOperands(Inst, N);
}
- void addu7_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu8_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu9_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu10_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_1ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_2ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
- void addu32_0MustExtOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void adds4_6ImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE =
- dyn_cast<MCConstantExpr>(&HexagonMCInstrInfo::getExpr(*getImm()));
- Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
- }
-
- void adds3_6ImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE =
- dyn_cast<MCConstantExpr>(&HexagonMCInstrInfo::getExpr(*getImm()));
- Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
- }
-
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -749,10 +577,6 @@ bool HexagonAsmParser::matchBundleOptions() {
HexagonMCInstrInfo::setInnerLoop(MCB);
else if (Option.compare_lower("endloop1") == 0)
HexagonMCInstrInfo::setOuterLoop(MCB);
- else if (Option.compare_lower("mem_noshuf") == 0)
- HexagonMCInstrInfo::setMemReorderDisabled(MCB);
- else if (Option.compare_lower("mem_shuf") == 0)
- HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
else
return true;
Lex();
@@ -770,8 +594,7 @@ void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) {
int64_t Value (I.getImm());
NewInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(
MCConstantExpr::create(Value, getContext()), getContext())));
- }
- else {
+ } else {
if (I.isExpr() && cast<HexagonMCExpr>(I.getExpr())->signMismatch() &&
WarnSignedMismatch)
Warning (MCI.getLoc(), "Signed/Unsigned mismatch");
@@ -1066,6 +889,9 @@ bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
// validate register against architecture
bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
+ if (HexagonMCRegisterClasses[Hexagon::V62RegsRegClassID].contains(MatchNum))
+ if (!getSTI().getFeatureBits()[Hexagon::ArchV62])
+ return false;
return true;
}
@@ -1171,11 +997,15 @@ bool HexagonAsmParser::parseOperand(OperandVector &Operands) {
bool HexagonAsmParser::isLabel(AsmToken &Token) {
MCAsmLexer &Lexer = getLexer();
AsmToken const &Second = Lexer.getTok();
- AsmToken Third = Lexer.peekTok();
+ AsmToken Third = Lexer.peekTok();
StringRef String = Token.getString();
if (Token.is(AsmToken::TokenKind::LCurly) ||
Token.is(AsmToken::TokenKind::RCurly))
return false;
+ // special case for parsing vwhist256:sat
+ if (String.lower() == "vwhist256" && Second.is(AsmToken::Colon) &&
+ Third.getString().lower() == "sat")
+ return false;
if (!Token.is(AsmToken::TokenKind::Identifier))
return true;
if (!matchRegister(String.lower()))
@@ -1756,8 +1586,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
TmpInst.setOpcode(Hexagon::L2_loadrdgp);
TmpInst.addOperand(MO_0);
- TmpInst.addOperand(
- MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext())));
+ TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(
+ MCSymbolRefExpr::create(Sym, getContext()), getContext())));
Inst = TmpInst;
}
}
@@ -2142,6 +1972,67 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
+ case Hexagon::PS_loadrubabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrubgp);
+ break;
+ case Hexagon::PS_loadrbabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrbgp);
+ break;
+ case Hexagon::PS_loadruhabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadruhgp);
+ break;
+ case Hexagon::PS_loadrhabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrhgp);
+ break;
+ case Hexagon::PS_loadriabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrigp);
+ break;
+ case Hexagon::PS_loadrdabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(1).getExpr()))
+ Inst.setOpcode(Hexagon::L2_loadrdgp);
+ break;
+ case Hexagon::PS_storerbabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerbgp);
+ break;
+ case Hexagon::PS_storerhabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerhgp);
+ break;
+ case Hexagon::PS_storerfabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerfgp);
+ break;
+ case Hexagon::PS_storeriabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerigp);
+ break;
+ case Hexagon::PS_storerdabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerdgp);
+ break;
+ case Hexagon::PS_storerbnewabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerbnewgp);
+ break;
+ case Hexagon::PS_storerhnewabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerhnewgp);
+ break;
+ case Hexagon::PS_storerinewabs:
+ if (!HexagonMCInstrInfo::mustExtend(*Inst.getOperand(0).getExpr()))
+ Inst.setOpcode(Hexagon::S2_storerinewgp);
+ break;
+ case Hexagon::A2_zxtb: {
+ Inst.setOpcode(Hexagon::A2_andir);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, Context)));
+ break;
+ }
} // switch
return Match_Success;
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 963fb99ce09b..61d3630ac095 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -317,6 +317,15 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
return true;
}
+BT::RegisterCell &BT::RegisterCell::regify(unsigned R) {
+ for (unsigned i = 0, n = width(); i < n; ++i) {
+ const BitValue &V = Bits[i];
+ if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
+ Bits[i].RefI = BitRef(R, i);
+ }
+ return *this;
+}
+
uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
// The general problem is with finding a register class that corresponds
// to a given reference reg:sub. There can be several such classes, and
@@ -378,12 +387,7 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
return;
assert(RR.Sub == 0 && "Unexpected sub-register in definition");
// Eliminate all ref-to-reg-0 bit values: replace them with "self".
- for (unsigned i = 0, n = RC.width(); i < n; ++i) {
- const BitValue &V = RC[i];
- if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
- RC[i].RefI = BitRef(RR.Reg, i);
- }
- M[RR.Reg] = RC;
+ M[RR.Reg] = RC.regify(RR.Reg);
}
// Check if the cell represents a compile-time integer value.
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index 48c5f2266acf..a547b34e852f 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -283,6 +283,9 @@ struct BitTracker::RegisterCell {
return !operator==(RC);
}
+ // Replace the ref-to-reg-0 bit values with the given register.
+ RegisterCell &regify(unsigned R);
+
// Generate a "ref" cell for the corresponding register. In the resulting
// cell each bit will be described as being the same as the corresponding
// bit in register Reg (i.e. the cell is "defined" by register Reg).
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 2c527d598628..2f3dd3326fcc 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_target(HexagonCodeGen
HexagonInstrInfo.cpp
HexagonISelDAGToDAG.cpp
HexagonISelLowering.cpp
+ HexagonLoopIdiomRecognition.cpp
HexagonMachineFunctionInfo.cpp
HexagonMachineScheduler.cpp
HexagonMCInstLower.cpp
@@ -58,10 +59,10 @@ add_llvm_target(HexagonCodeGen
RDFDeadCode.cpp
RDFGraph.cpp
RDFLiveness.cpp
- )
+ RDFRegisters.cpp
+)
add_subdirectory(AsmParser)
add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
add_subdirectory(Disassembler)
-
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index c05fbc1d7756..ae15ed0e9240 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -57,11 +57,38 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
-
- void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
};
+namespace {
+ uint32_t fullValue(MCInstrInfo const &MCII, MCInst &MCB, MCInst &MI,
+ int64_t Value) {
+ MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB));
+ if (!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
+ return Value;
+ unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
+ int64_t Bits;
+ bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
+ assert(Success); (void)Success;
+ uint32_t Upper26 = static_cast<uint32_t>(Bits);
+ uint32_t Operand = Upper26 | Lower6;
+ return Operand;
+ }
+ HexagonDisassembler const &disassembler(void const *Decoder) {
+ return *static_cast<HexagonDisassembler const *>(Decoder);
+ }
+ template <size_t T>
+ void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ int64_t FullValue =
+ fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI,
+ SignExtend64<T>(tmp));
+ int64_t Extended = SignExtend64<32>(FullValue);
+ HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
+ }
+}
} // end anonymous namespace
// Forward declare these because the auto-generated code will reference them.
@@ -70,6 +97,10 @@ public:
static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -79,6 +110,9 @@ static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus
+DecodeGeneralDoubleLow8RegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -98,31 +132,10 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn);
-static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
- void const *Decoder);
-
-static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
- raw_ostream &os);
-
-static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
-
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t Address, const void *Decoder);
-static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
+static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t /*Address*/, const void *Decoder);
static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
@@ -135,13 +148,12 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
-static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
- const void *Decoder);
-static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
const void *Decoder);
+#include "HexagonDepDecoders.h"
#include "HexagonGenDisassemblerTables.inc"
static MCDisassembler *createHexagonDisassembler(const Target &T,
@@ -175,20 +187,31 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Size += HEXAGON_INSTR_SIZE;
Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
}
- if(Result == MCDisassembler::Fail)
+ if (Result == MCDisassembler::Fail)
return Result;
- HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo());
- if(!Checker.check())
+ if (Size > HEXAGON_MAX_PACKET_SIZE)
+ return MCDisassembler::Fail;
+ HexagonMCChecker Checker(*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+ if (!Checker.check())
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static HexagonDisassembler const &disassembler(void const *Decoder) {
- return *static_cast<HexagonDisassembler const *>(Decoder);
+namespace {
+void adjustDuplex(MCInst &MI, MCContext &Context) {
+ switch (MI.getOpcode()) {
+ case Hexagon::SA1_setin1:
+ MI.insert(MI.begin() + 1,
+ MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+ break;
+ case Hexagon::SA1_dec:
+ MI.insert(MI.begin() + 2,
+ MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+ break;
+ default:
+ break;
+ }
}
-
-static MCContext &contextFromDecoder(void const *Decoder) {
- return disassembler(Decoder).getContext();
}
DecodeStatus HexagonDisassembler::getSingleInstruction(
@@ -196,8 +219,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
raw_ostream &os, raw_ostream &cs, bool &Complete) const {
assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
- uint32_t Instruction =
- (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
+ uint32_t Instruction = support::endian::read32le(Bytes.data());
auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
@@ -210,103 +232,92 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
return DecodeStatus::Fail;
}
- DecodeStatus Result = DecodeStatus::Success;
+ MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+ MCB, HexagonMCInstrInfo::bundleSize(MCB));
+
+ DecodeStatus Result = DecodeStatus::Fail;
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
HexagonII::INST_PARSE_DUPLEX) {
- // Determine the instruction class of each instruction in the duplex.
- unsigned duplexIClass, IClassLow, IClassHigh;
-
+ unsigned duplexIClass;
+ uint8_t const *DecodeLow, *DecodeHigh;
duplexIClass = ((Instruction >> 28) & 0xe) | ((Instruction >> 13) & 0x1);
switch (duplexIClass) {
default:
return MCDisassembler::Fail;
case 0:
- IClassLow = HexagonII::HSIG_L1;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_L132;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 1:
- IClassLow = HexagonII::HSIG_L2;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_L232;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 2:
- IClassLow = HexagonII::HSIG_L2;
- IClassHigh = HexagonII::HSIG_L2;
+ DecodeLow = DecoderTableSUBINSN_L232;
+ DecodeHigh = DecoderTableSUBINSN_L232;
break;
case 3:
- IClassLow = HexagonII::HSIG_A;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_A32;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 4:
- IClassLow = HexagonII::HSIG_L1;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_L132;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 5:
- IClassLow = HexagonII::HSIG_L2;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_L232;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 6:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 7:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_A;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_A32;
break;
case 8:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 9:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_L2;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_L232;
break;
case 10:
- IClassLow = HexagonII::HSIG_S1;
- IClassHigh = HexagonII::HSIG_S1;
+ DecodeLow = DecoderTableSUBINSN_S132;
+ DecodeHigh = DecoderTableSUBINSN_S132;
break;
case 11:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_S1;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_S132;
break;
case 12:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_L1;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_L132;
break;
case 13:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_L2;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_L232;
break;
case 14:
- IClassLow = HexagonII::HSIG_S2;
- IClassHigh = HexagonII::HSIG_S2;
+ DecodeLow = DecoderTableSUBINSN_S232;
+ DecodeHigh = DecoderTableSUBINSN_S232;
break;
}
-
- // Set the MCInst to be a duplex instruction. Which one doesn't matter.
- MI.setOpcode(Hexagon::DuplexIClass0);
-
- // Decode each instruction in the duplex.
- // Create an MCInst for each instruction.
- unsigned instLow = Instruction & 0x1fff;
- unsigned instHigh = (Instruction >> 16) & 0x1fff;
- unsigned opLow;
- if (GetSubinstOpcode(IClassLow, instLow, opLow, os) !=
- MCDisassembler::Success)
- return MCDisassembler::Fail;
- unsigned opHigh;
- if (GetSubinstOpcode(IClassHigh, instHigh, opHigh, os) !=
- MCDisassembler::Success)
- return MCDisassembler::Fail;
+ MI.setOpcode(Hexagon::DuplexIClass0 + duplexIClass);
MCInst *MILow = new (getContext()) MCInst;
- MILow->setOpcode(opLow);
MCInst *MIHigh = new (getContext()) MCInst;
- MIHigh->setOpcode(opHigh);
- addSubinstOperands(MILow, opLow, instLow);
- addSubinstOperands(MIHigh, opHigh, instHigh);
- // see ConvertToSubInst() in
- // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
-
- // Add the duplex instruction MCInsts as operands to the passed in MCInst.
+ Result = decodeInstruction(DecodeLow, *MILow, Instruction & 0x1fff, Address,
+ this, STI);
+ if (Result != DecodeStatus::Success)
+ return DecodeStatus::Fail;
+ adjustDuplex(*MILow, getContext());
+ Result = decodeInstruction(
+ DecodeHigh, *MIHigh, (Instruction >> 16) & 0x1fff, Address, this, STI);
+ if (Result != DecodeStatus::Success)
+ return DecodeStatus::Fail;
+ adjustDuplex(*MIHigh, getContext());
MCOperand OPLow = MCOperand::createInst(MILow);
MCOperand OPHigh = MCOperand::createInst(MIHigh);
MI.addOperand(OPLow);
@@ -316,34 +327,23 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
if ((Instruction & HexagonII::INST_PARSE_MASK) ==
HexagonII::INST_PARSE_PACKET_END)
Complete = true;
- // Calling the auto-generated decoder function.
- Result =
- decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
- // If a, "standard" insn isn't found check special cases.
- if (MCDisassembler::Success != Result ||
- MI.getOpcode() == Hexagon::A4_ext) {
- Result = decodeImmext(MI, Instruction, this);
- if (MCDisassembler::Success != Result) {
- Result = decodeSpecial(MI, Instruction);
- }
- } else {
- // If the instruction is a compound instruction, register values will
- // follow the duplex model, so the register values in the MCInst are
- // incorrect. If the instruction is a compound, loop through the
- // operands and change registers appropriately.
- if (HexagonMCInstrInfo::getType(*MCII, MI) == HexagonII::TypeCOMPOUND) {
- for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
- if (i->isReg()) {
- unsigned reg = i->getReg() - Hexagon::R0;
- i->setReg(getRegFromSubinstEncoding(reg));
- }
- }
- }
- }
+ if (Extender != nullptr)
+ Result = decodeInstruction(DecoderTableMustExtend32, MI, Instruction,
+ Address, this, STI);
+
+ if (Result != MCDisassembler::Success)
+ Result = decodeInstruction(DecoderTable32, MI, Instruction, Address, this,
+ STI);
+
+ if (Result != MCDisassembler::Success &&
+ STI.getFeatureBits()[Hexagon::ExtensionHVX])
+ Result = decodeInstruction(DecoderTableEXT_mmvec32, MI, Instruction,
+ Address, this, STI);
+
}
- switch(MI.getOpcode()) {
+ switch (MI.getOpcode()) {
case Hexagon::J4_cmpeqn1_f_jumpnv_nt:
case Hexagon::J4_cmpeqn1_f_jumpnv_t:
case Hexagon::J4_cmpeqn1_fp0_jump_nt:
@@ -368,7 +368,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
case Hexagon::J4_cmpgtn1_tp0_jump_t:
case Hexagon::J4_cmpgtn1_tp1_jump_nt:
case Hexagon::J4_cmpgtn1_tp1_jump_t:
- MI.insert(MI.begin() + 1, MCOperand::createExpr(MCConstantExpr::create(-1, getContext())));
+ MI.insert(MI.begin() + 1,
+ MCOperand::createExpr(MCConstantExpr::create(-1, getContext())));
break;
default:
break;
@@ -423,13 +424,10 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
return MCDisassembler::Fail;
}
- adjustExtendedInstructions(MI, MCB);
- MCInst const *Extender =
- HexagonMCInstrInfo::extenderForIndex(MCB,
- HexagonMCInstrInfo::bundleSize(MCB));
- if(Extender != nullptr) {
- MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ?
- *MI.getOperand(1).getInst() : MI;
+ if (Extender != nullptr) {
+ MCInst const &Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI)
+ ? *MI.getOperand(1).getInst()
+ : MI;
if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) &&
!HexagonMCInstrInfo::isExtended(*MCII, Inst))
return MCDisassembler::Fail;
@@ -437,68 +435,6 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
return Result;
}
-void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
- MCInst const &MCB) const {
- if (!HexagonMCInstrInfo::hasExtenderForIndex(
- MCB, HexagonMCInstrInfo::bundleSize(MCB))) {
- unsigned opcode;
- // This code is used by the disassembler to disambiguate between GP
- // relative and absolute addressing instructions since they both have
- // same encoding bits. However, an absolute addressing instruction must
- // follow an immediate extender. Disassembler alwaus select absolute
- // addressing instructions first and uses this code to change them into
- // GP relative instruction in the absence of the corresponding immediate
- // extender.
- switch (MCI.getOpcode()) {
- case Hexagon::PS_storerbabs:
- opcode = Hexagon::S2_storerbgp;
- break;
- case Hexagon::PS_storerhabs:
- opcode = Hexagon::S2_storerhgp;
- break;
- case Hexagon::PS_storerfabs:
- opcode = Hexagon::S2_storerfgp;
- break;
- case Hexagon::PS_storeriabs:
- opcode = Hexagon::S2_storerigp;
- break;
- case Hexagon::PS_storerbnewabs:
- opcode = Hexagon::S2_storerbnewgp;
- break;
- case Hexagon::PS_storerhnewabs:
- opcode = Hexagon::S2_storerhnewgp;
- break;
- case Hexagon::PS_storerinewabs:
- opcode = Hexagon::S2_storerinewgp;
- break;
- case Hexagon::PS_storerdabs:
- opcode = Hexagon::S2_storerdgp;
- break;
- case Hexagon::PS_loadrbabs:
- opcode = Hexagon::L2_loadrbgp;
- break;
- case Hexagon::PS_loadrubabs:
- opcode = Hexagon::L2_loadrubgp;
- break;
- case Hexagon::PS_loadrhabs:
- opcode = Hexagon::L2_loadrhgp;
- break;
- case Hexagon::PS_loadruhabs:
- opcode = Hexagon::L2_loadruhgp;
- break;
- case Hexagon::PS_loadriabs:
- opcode = Hexagon::L2_loadrigp;
- break;
- case Hexagon::PS_loadrdabs:
- opcode = Hexagon::L2_loadrdgp;
- break;
- default:
- opcode = MCI.getOpcode();
- }
- MCI.setOpcode(opcode);
- }
-}
-
static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
ArrayRef<MCPhysReg> Table) {
if (RegNo < Table.size()) {
@@ -530,6 +466,20 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
}
+static DecodeStatus DecodeGeneralSubRegsRegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ static const MCPhysReg GeneralSubRegDecoderTable[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3,
+ Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7,
+ Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+ Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
+ };
+
+ return DecodeRegisterClass(Inst, RegNo, GeneralSubRegDecoderTable);
+}
+
static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
@@ -557,6 +507,15 @@ static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
}
+static DecodeStatus DecodeGeneralDoubleLow8RegsRegisterClass(
+ MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) {
+ static const MCPhysReg GeneralDoubleLow8RegDecoderTable[] = {
+ Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3,
+ Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11};
+
+ return DecodeRegisterClass(Inst, RegNo, GeneralDoubleLow8RegDecoderTable);
+}
+
static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
@@ -590,17 +549,23 @@ static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
+ using namespace Hexagon;
static const MCPhysReg CtrlRegDecoderTable[] = {
- Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
- Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7,
- Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
- Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC
+ /* 0 */ SA0, LC0, SA1, LC1,
+ /* 4 */ P3_0, C5, C6, C7,
+ /* 8 */ USR, PC, UGP, GP,
+ /* 12 */ CS0, CS1, UPCYCLELO, UPCYCLEHI,
+ /* 16 */ FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI,
+ /* 20 */ 0, 0, 0, 0,
+ /* 24 */ 0, 0, 0, 0,
+ /* 28 */ 0, 0, UTIMERLO, UTIMERHI
};
if (RegNo >= array_lengthof(CtrlRegDecoderTable))
return MCDisassembler::Fail;
- if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
+ if (CtrlRegDecoderTable[RegNo] == NoRegister)
return MCDisassembler::Fail;
unsigned Register = CtrlRegDecoderTable[RegNo];
@@ -611,20 +576,23 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
+ using namespace Hexagon;
static const MCPhysReg CtrlReg64DecoderTable[] = {
- Hexagon::C1_0, Hexagon::NoRegister,
- Hexagon::C3_2, Hexagon::NoRegister,
- Hexagon::C7_6, Hexagon::NoRegister,
- Hexagon::C9_8, Hexagon::NoRegister,
- Hexagon::C11_10, Hexagon::NoRegister,
- Hexagon::CS, Hexagon::NoRegister,
- Hexagon::UPC, Hexagon::NoRegister
+ /* 0 */ C1_0, 0, C3_2, 0,
+ /* 4 */ C5_4, 0, C7_6, 0,
+ /* 8 */ C9_8, 0, C11_10, 0,
+ /* 12 */ CS, 0, UPCYCLE, 0,
+ /* 16 */ C17_16, 0, PKTCOUNT, 0,
+ /* 20 */ 0, 0, 0, 0,
+ /* 24 */ 0, 0, 0, 0,
+ /* 28 */ 0, 0, UTIMER, 0
};
if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
return MCDisassembler::Fail;
- if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
+ if (CtrlReg64DecoderTable[RegNo] == NoRegister)
return MCDisassembler::Fail;
unsigned Register = CtrlReg64DecoderTable[RegNo];
@@ -650,132 +618,23 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success;
}
-static uint32_t fullValue(MCInstrInfo const &MCII, MCInst &MCB, MCInst &MI,
- int64_t Value) {
- MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
- MCB, HexagonMCInstrInfo::bundleSize(MCB));
- if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
- return Value;
- unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
- uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
- int64_t Bits;
- bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
- assert(Success);(void)Success;
- uint32_t Upper26 = static_cast<uint32_t>(Bits);
- uint32_t Operand = Upper26 | Lower6;
- return Operand;
-}
-
-template <size_t T>
-static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
- HexagonDisassembler const &Disassembler = disassembler(Decoder);
- int64_t FullValue = fullValue(*Disassembler.MCII,
- **Disassembler.CurrentBundle,
- MI, SignExtend64<T>(tmp));
- int64_t Extended = SignExtend64<32>(FullValue);
- HexagonMCInstrInfo::addConstant(MI, Extended,
- Disassembler.getContext());
-}
-
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/,
const void *Decoder) {
HexagonDisassembler const &Disassembler = disassembler(Decoder);
- int64_t FullValue = fullValue(*Disassembler.MCII,
- **Disassembler.CurrentBundle,
- MI, tmp);
+ int64_t FullValue =
+ fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI, tmp);
assert(FullValue >= 0 && "Negative in unsigned decoder");
HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext());
return MCDisassembler::Success;
}
-static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<16>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<12>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<11>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp,
uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<13>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<14>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<10>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
- const void *Decoder) {
- signedDecoder<8>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<4>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<5>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<6>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<7>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<10>(MI, tmp, Decoder);
- return MCDisassembler::Success;
-}
-
-static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp,
- uint64_t /*Address*/, const void *Decoder) {
- signedDecoder<19>(MI, tmp, Decoder);
+ HexagonDisassembler const &Disassembler = disassembler(Decoder);
+ unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
+ tmp = SignExtend64(tmp, Bits);
+ signedDecoder<32>(MI, tmp, Decoder);
return MCDisassembler::Success;
}
@@ -787,838 +646,13 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
// r13_2 is not extendable, so if there are no extent bits, it's r13_2
if (Bits == 0)
Bits = 15;
- uint32_t FullValue = fullValue(*Disassembler.MCII,
- **Disassembler.CurrentBundle,
- MI, SignExtend64(tmp, Bits));
+ uint32_t FullValue =
+ fullValue(*Disassembler.MCII, **Disassembler.CurrentBundle, MI,
+ SignExtend64(tmp, Bits));
int64_t Extended = SignExtend64<32>(FullValue) + Address;
- if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true,
- 0, 4))
+ if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4))
HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
return MCDisassembler::Success;
}
-// Addressing mode dependent load store opcode map.
-// - If an insn is preceded by an extender the address is absolute.
-// - memw(##symbol) = r0
-// - If an insn is not preceded by an extender the address is GP relative.
-// - memw(gp + #symbol) = r0
-// Please note that the instructions must be ordered in the descending order
-// of their opcode.
-// HexagonII::INST_ICLASS_ST
-static const unsigned int StoreConditionalOpcodeData[][2] = {
- {S4_pstorerdfnew_abs, 0xafc02084},
- {S4_pstorerdtnew_abs, 0xafc02080},
- {S4_pstorerdf_abs, 0xafc00084},
- {S4_pstorerdt_abs, 0xafc00080},
- {S4_pstorerinewfnew_abs, 0xafa03084},
- {S4_pstorerinewtnew_abs, 0xafa03080},
- {S4_pstorerhnewfnew_abs, 0xafa02884},
- {S4_pstorerhnewtnew_abs, 0xafa02880},
- {S4_pstorerbnewfnew_abs, 0xafa02084},
- {S4_pstorerbnewtnew_abs, 0xafa02080},
- {S4_pstorerinewf_abs, 0xafa01084},
- {S4_pstorerinewt_abs, 0xafa01080},
- {S4_pstorerhnewf_abs, 0xafa00884},
- {S4_pstorerhnewt_abs, 0xafa00880},
- {S4_pstorerbnewf_abs, 0xafa00084},
- {S4_pstorerbnewt_abs, 0xafa00080},
- {S4_pstorerifnew_abs, 0xaf802084},
- {S4_pstoreritnew_abs, 0xaf802080},
- {S4_pstorerif_abs, 0xaf800084},
- {S4_pstorerit_abs, 0xaf800080},
- {S4_pstorerhfnew_abs, 0xaf402084},
- {S4_pstorerhtnew_abs, 0xaf402080},
- {S4_pstorerhf_abs, 0xaf400084},
- {S4_pstorerht_abs, 0xaf400080},
- {S4_pstorerbfnew_abs, 0xaf002084},
- {S4_pstorerbtnew_abs, 0xaf002080},
- {S4_pstorerbf_abs, 0xaf000084},
- {S4_pstorerbt_abs, 0xaf000080}};
-// HexagonII::INST_ICLASS_LD
-
-// HexagonII::INST_ICLASS_LD_ST_2
-static unsigned int LoadStoreOpcodeData[][2] = {{PS_loadrdabs, 0x49c00000},
- {PS_loadriabs, 0x49800000},
- {PS_loadruhabs, 0x49600000},
- {PS_loadrhabs, 0x49400000},
- {PS_loadrubabs, 0x49200000},
- {PS_loadrbabs, 0x49000000},
- {PS_storerdabs, 0x48c00000},
- {PS_storerinewabs, 0x48a01000},
- {PS_storerhnewabs, 0x48a00800},
- {PS_storerbnewabs, 0x48a00000},
- {PS_storeriabs, 0x48800000},
- {PS_storerfabs, 0x48600000},
- {PS_storerhabs, 0x48400000},
- {PS_storerbabs, 0x48000000}};
-static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
-static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
-
-static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
- unsigned MachineOpcode = 0;
- unsigned LLVMOpcode = 0;
-
- if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) {
- for (size_t i = 0; i < NumCondS; ++i) {
- if ((insn & StoreConditionalOpcodeData[i][1]) ==
- StoreConditionalOpcodeData[i][1]) {
- MachineOpcode = StoreConditionalOpcodeData[i][1];
- LLVMOpcode = StoreConditionalOpcodeData[i][0];
- break;
- }
- }
- }
- if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) {
- for (size_t i = 0; i < NumLS; ++i) {
- if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) {
- MachineOpcode = LoadStoreOpcodeData[i][1];
- LLVMOpcode = LoadStoreOpcodeData[i][0];
- break;
- }
- }
- }
-
- if (MachineOpcode) {
- unsigned Value = 0;
- unsigned shift = 0;
- MI.setOpcode(LLVMOpcode);
- // Remove the parse bits from the insn.
- insn &= ~HexagonII::INST_PARSE_MASK;
-
- switch (LLVMOpcode) {
- default:
- return MCDisassembler::Fail;
- break;
-
- case Hexagon::S4_pstorerdf_abs:
- case Hexagon::S4_pstorerdt_abs:
- case Hexagon::S4_pstorerdfnew_abs:
- case Hexagon::S4_pstorerdtnew_abs:
- // op: Pv
- Value = insn & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 12) & UINT64_C(48);
- Value |= (insn >> 3) & UINT64_C(15);
- MI.addOperand(MCOperand::createImm(Value));
- // op: Rtt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- case Hexagon::S4_pstorerbnewf_abs:
- case Hexagon::S4_pstorerbnewt_abs:
- case Hexagon::S4_pstorerbnewfnew_abs:
- case Hexagon::S4_pstorerbnewtnew_abs:
- case Hexagon::S4_pstorerhnewf_abs:
- case Hexagon::S4_pstorerhnewt_abs:
- case Hexagon::S4_pstorerhnewfnew_abs:
- case Hexagon::S4_pstorerhnewtnew_abs:
- case Hexagon::S4_pstorerinewf_abs:
- case Hexagon::S4_pstorerinewt_abs:
- case Hexagon::S4_pstorerinewfnew_abs:
- case Hexagon::S4_pstorerinewtnew_abs:
- // op: Pv
- Value = insn & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 12) & UINT64_C(48);
- Value |= (insn >> 3) & UINT64_C(15);
- MI.addOperand(MCOperand::createImm(Value));
- // op: Nt
- Value = (insn >> 8) & UINT64_C(7);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- case Hexagon::S4_pstorerbf_abs:
- case Hexagon::S4_pstorerbt_abs:
- case Hexagon::S4_pstorerbfnew_abs:
- case Hexagon::S4_pstorerbtnew_abs:
- case Hexagon::S4_pstorerhf_abs:
- case Hexagon::S4_pstorerht_abs:
- case Hexagon::S4_pstorerhfnew_abs:
- case Hexagon::S4_pstorerhtnew_abs:
- case Hexagon::S4_pstorerif_abs:
- case Hexagon::S4_pstorerit_abs:
- case Hexagon::S4_pstorerifnew_abs:
- case Hexagon::S4_pstoreritnew_abs:
- // op: Pv
- Value = insn & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 12) & UINT64_C(48);
- Value |= (insn >> 3) & UINT64_C(15);
- MI.addOperand(MCOperand::createImm(Value));
- // op: Rt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- case Hexagon::L4_ploadrdf_abs:
- case Hexagon::L4_ploadrdt_abs:
- case Hexagon::L4_ploadrdfnew_abs:
- case Hexagon::L4_ploadrdtnew_abs:
- // op: Rdd
- Value = insn & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- // op: Pt
- Value = ((insn >> 9) & UINT64_C(3));
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = ((insn >> 15) & UINT64_C(62));
- Value |= ((insn >> 8) & UINT64_C(1));
- MI.addOperand(MCOperand::createImm(Value));
- break;
-
- case Hexagon::L4_ploadrbf_abs:
- case Hexagon::L4_ploadrbt_abs:
- case Hexagon::L4_ploadrbfnew_abs:
- case Hexagon::L4_ploadrbtnew_abs:
- case Hexagon::L4_ploadrhf_abs:
- case Hexagon::L4_ploadrht_abs:
- case Hexagon::L4_ploadrhfnew_abs:
- case Hexagon::L4_ploadrhtnew_abs:
- case Hexagon::L4_ploadrubf_abs:
- case Hexagon::L4_ploadrubt_abs:
- case Hexagon::L4_ploadrubfnew_abs:
- case Hexagon::L4_ploadrubtnew_abs:
- case Hexagon::L4_ploadruhf_abs:
- case Hexagon::L4_ploadruht_abs:
- case Hexagon::L4_ploadruhfnew_abs:
- case Hexagon::L4_ploadruhtnew_abs:
- case Hexagon::L4_ploadrif_abs:
- case Hexagon::L4_ploadrit_abs:
- case Hexagon::L4_ploadrifnew_abs:
- case Hexagon::L4_ploadritnew_abs:
- // op: Rd
- Value = insn & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- // op: Pt
- Value = (insn >> 9) & UINT64_C(3);
- DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
- // op: u6
- Value = (insn >> 15) & UINT64_C(62);
- Value |= (insn >> 8) & UINT64_C(1);
- MI.addOperand(MCOperand::createImm(Value));
- break;
-
- // op: g16_2
- case (Hexagon::PS_loadriabs):
- ++shift;
- // op: g16_1
- case Hexagon::PS_loadrhabs:
- case Hexagon::PS_loadruhabs:
- ++shift;
- // op: g16_0
- case Hexagon::PS_loadrbabs:
- case Hexagon::PS_loadrubabs:
- // op: Rd
- Value |= insn & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(511);
- MI.addOperand(MCOperand::createImm(Value << shift));
- break;
-
- case Hexagon::PS_loadrdabs:
- Value = insn & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(511);
- MI.addOperand(MCOperand::createImm(Value << 3));
- break;
-
- case Hexagon::PS_storerdabs:
- // op: g16_3
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(256);
- Value |= insn & UINT64_C(255);
- MI.addOperand(MCOperand::createImm(Value << 3));
- // op: Rtt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- // op: g16_2
- case Hexagon::PS_storerinewabs:
- ++shift;
- // op: g16_1
- case Hexagon::PS_storerhnewabs:
- ++shift;
- // op: g16_0
- case Hexagon::PS_storerbnewabs:
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(256);
- Value |= insn & UINT64_C(255);
- MI.addOperand(MCOperand::createImm(Value << shift));
- // op: Nt
- Value = (insn >> 8) & UINT64_C(7);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
-
- // op: g16_2
- case Hexagon::PS_storeriabs:
- ++shift;
- // op: g16_1
- case Hexagon::PS_storerhabs:
- case Hexagon::PS_storerfabs:
- ++shift;
- // op: g16_0
- case Hexagon::PS_storerbabs:
- Value = (insn >> 11) & UINT64_C(49152);
- Value |= (insn >> 7) & UINT64_C(15872);
- Value |= (insn >> 5) & UINT64_C(256);
- Value |= insn & UINT64_C(255);
- MI.addOperand(MCOperand::createImm(Value << shift));
- // op: Rt
- Value = (insn >> 8) & UINT64_C(31);
- DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
- break;
- }
- return MCDisassembler::Success;
- }
- return MCDisassembler::Fail;
-}
-
-static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
- void const *Decoder) {
- // Instruction Class for a constant a extender: bits 31:28 = 0x0000
- if ((~insn & 0xf0000000) == 0xf0000000) {
- unsigned Value;
- // 27:16 High 12 bits of 26-bit extender.
- Value = (insn & 0x0fff0000) << 4;
- // 13:0 Low 14 bits of 26-bit extender.
- Value |= ((insn & 0x3fff) << 6);
- MI.setOpcode(Hexagon::A4_ext);
- HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder));
- return MCDisassembler::Success;
- }
- return MCDisassembler::Fail;
-}
-
-// These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
-enum subInstBinaryValues {
- SA1_addi_BITS = 0x0000,
- SA1_addi_MASK = 0x1800,
- SA1_addrx_BITS = 0x1800,
- SA1_addrx_MASK = 0x1f00,
- SA1_addsp_BITS = 0x0c00,
- SA1_addsp_MASK = 0x1c00,
- SA1_and1_BITS = 0x1200,
- SA1_and1_MASK = 0x1f00,
- SA1_clrf_BITS = 0x1a70,
- SA1_clrf_MASK = 0x1e70,
- SA1_clrfnew_BITS = 0x1a50,
- SA1_clrfnew_MASK = 0x1e70,
- SA1_clrt_BITS = 0x1a60,
- SA1_clrt_MASK = 0x1e70,
- SA1_clrtnew_BITS = 0x1a40,
- SA1_clrtnew_MASK = 0x1e70,
- SA1_cmpeqi_BITS = 0x1900,
- SA1_cmpeqi_MASK = 0x1f00,
- SA1_combine0i_BITS = 0x1c00,
- SA1_combine0i_MASK = 0x1d18,
- SA1_combine1i_BITS = 0x1c08,
- SA1_combine1i_MASK = 0x1d18,
- SA1_combine2i_BITS = 0x1c10,
- SA1_combine2i_MASK = 0x1d18,
- SA1_combine3i_BITS = 0x1c18,
- SA1_combine3i_MASK = 0x1d18,
- SA1_combinerz_BITS = 0x1d08,
- SA1_combinerz_MASK = 0x1d08,
- SA1_combinezr_BITS = 0x1d00,
- SA1_combinezr_MASK = 0x1d08,
- SA1_dec_BITS = 0x1300,
- SA1_dec_MASK = 0x1f00,
- SA1_inc_BITS = 0x1100,
- SA1_inc_MASK = 0x1f00,
- SA1_seti_BITS = 0x0800,
- SA1_seti_MASK = 0x1c00,
- SA1_setin1_BITS = 0x1a00,
- SA1_setin1_MASK = 0x1e40,
- SA1_sxtb_BITS = 0x1500,
- SA1_sxtb_MASK = 0x1f00,
- SA1_sxth_BITS = 0x1400,
- SA1_sxth_MASK = 0x1f00,
- SA1_tfr_BITS = 0x1000,
- SA1_tfr_MASK = 0x1f00,
- SA1_zxtb_BITS = 0x1700,
- SA1_zxtb_MASK = 0x1f00,
- SA1_zxth_BITS = 0x1600,
- SA1_zxth_MASK = 0x1f00,
- SL1_loadri_io_BITS = 0x0000,
- SL1_loadri_io_MASK = 0x1000,
- SL1_loadrub_io_BITS = 0x1000,
- SL1_loadrub_io_MASK = 0x1000,
- SL2_deallocframe_BITS = 0x1f00,
- SL2_deallocframe_MASK = 0x1fc0,
- SL2_jumpr31_BITS = 0x1fc0,
- SL2_jumpr31_MASK = 0x1fc4,
- SL2_jumpr31_f_BITS = 0x1fc5,
- SL2_jumpr31_f_MASK = 0x1fc7,
- SL2_jumpr31_fnew_BITS = 0x1fc7,
- SL2_jumpr31_fnew_MASK = 0x1fc7,
- SL2_jumpr31_t_BITS = 0x1fc4,
- SL2_jumpr31_t_MASK = 0x1fc7,
- SL2_jumpr31_tnew_BITS = 0x1fc6,
- SL2_jumpr31_tnew_MASK = 0x1fc7,
- SL2_loadrb_io_BITS = 0x1000,
- SL2_loadrb_io_MASK = 0x1800,
- SL2_loadrd_sp_BITS = 0x1e00,
- SL2_loadrd_sp_MASK = 0x1f00,
- SL2_loadrh_io_BITS = 0x0000,
- SL2_loadrh_io_MASK = 0x1800,
- SL2_loadri_sp_BITS = 0x1c00,
- SL2_loadri_sp_MASK = 0x1e00,
- SL2_loadruh_io_BITS = 0x0800,
- SL2_loadruh_io_MASK = 0x1800,
- SL2_return_BITS = 0x1f40,
- SL2_return_MASK = 0x1fc4,
- SL2_return_f_BITS = 0x1f45,
- SL2_return_f_MASK = 0x1fc7,
- SL2_return_fnew_BITS = 0x1f47,
- SL2_return_fnew_MASK = 0x1fc7,
- SL2_return_t_BITS = 0x1f44,
- SL2_return_t_MASK = 0x1fc7,
- SL2_return_tnew_BITS = 0x1f46,
- SL2_return_tnew_MASK = 0x1fc7,
- SS1_storeb_io_BITS = 0x1000,
- SS1_storeb_io_MASK = 0x1000,
- SS1_storew_io_BITS = 0x0000,
- SS1_storew_io_MASK = 0x1000,
- SS2_allocframe_BITS = 0x1c00,
- SS2_allocframe_MASK = 0x1e00,
- SS2_storebi0_BITS = 0x1200,
- SS2_storebi0_MASK = 0x1f00,
- SS2_storebi1_BITS = 0x1300,
- SS2_storebi1_MASK = 0x1f00,
- SS2_stored_sp_BITS = 0x0a00,
- SS2_stored_sp_MASK = 0x1e00,
- SS2_storeh_io_BITS = 0x0000,
- SS2_storeh_io_MASK = 0x1800,
- SS2_storew_sp_BITS = 0x0800,
- SS2_storew_sp_MASK = 0x1e00,
- SS2_storewi0_BITS = 0x1000,
- SS2_storewi0_MASK = 0x1f00,
- SS2_storewi1_BITS = 0x1100,
- SS2_storewi1_MASK = 0x1f00
-};
-static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
- raw_ostream &os) {
- switch (IClass) {
- case HexagonII::HSIG_L1:
- if ((inst & SL1_loadri_io_MASK) == SL1_loadri_io_BITS)
- op = Hexagon::SL1_loadri_io;
- else if ((inst & SL1_loadrub_io_MASK) == SL1_loadrub_io_BITS)
- op = Hexagon::SL1_loadrub_io;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_L2:
- if ((inst & SL2_deallocframe_MASK) == SL2_deallocframe_BITS)
- op = Hexagon::SL2_deallocframe;
- else if ((inst & SL2_jumpr31_MASK) == SL2_jumpr31_BITS)
- op = Hexagon::SL2_jumpr31;
- else if ((inst & SL2_jumpr31_f_MASK) == SL2_jumpr31_f_BITS)
- op = Hexagon::SL2_jumpr31_f;
- else if ((inst & SL2_jumpr31_fnew_MASK) == SL2_jumpr31_fnew_BITS)
- op = Hexagon::SL2_jumpr31_fnew;
- else if ((inst & SL2_jumpr31_t_MASK) == SL2_jumpr31_t_BITS)
- op = Hexagon::SL2_jumpr31_t;
- else if ((inst & SL2_jumpr31_tnew_MASK) == SL2_jumpr31_tnew_BITS)
- op = Hexagon::SL2_jumpr31_tnew;
- else if ((inst & SL2_loadrb_io_MASK) == SL2_loadrb_io_BITS)
- op = Hexagon::SL2_loadrb_io;
- else if ((inst & SL2_loadrd_sp_MASK) == SL2_loadrd_sp_BITS)
- op = Hexagon::SL2_loadrd_sp;
- else if ((inst & SL2_loadrh_io_MASK) == SL2_loadrh_io_BITS)
- op = Hexagon::SL2_loadrh_io;
- else if ((inst & SL2_loadri_sp_MASK) == SL2_loadri_sp_BITS)
- op = Hexagon::SL2_loadri_sp;
- else if ((inst & SL2_loadruh_io_MASK) == SL2_loadruh_io_BITS)
- op = Hexagon::SL2_loadruh_io;
- else if ((inst & SL2_return_MASK) == SL2_return_BITS)
- op = Hexagon::SL2_return;
- else if ((inst & SL2_return_f_MASK) == SL2_return_f_BITS)
- op = Hexagon::SL2_return_f;
- else if ((inst & SL2_return_fnew_MASK) == SL2_return_fnew_BITS)
- op = Hexagon::SL2_return_fnew;
- else if ((inst & SL2_return_t_MASK) == SL2_return_t_BITS)
- op = Hexagon::SL2_return_t;
- else if ((inst & SL2_return_tnew_MASK) == SL2_return_tnew_BITS)
- op = Hexagon::SL2_return_tnew;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_A:
- if ((inst & SA1_addi_MASK) == SA1_addi_BITS)
- op = Hexagon::SA1_addi;
- else if ((inst & SA1_addrx_MASK) == SA1_addrx_BITS)
- op = Hexagon::SA1_addrx;
- else if ((inst & SA1_addsp_MASK) == SA1_addsp_BITS)
- op = Hexagon::SA1_addsp;
- else if ((inst & SA1_and1_MASK) == SA1_and1_BITS)
- op = Hexagon::SA1_and1;
- else if ((inst & SA1_clrf_MASK) == SA1_clrf_BITS)
- op = Hexagon::SA1_clrf;
- else if ((inst & SA1_clrfnew_MASK) == SA1_clrfnew_BITS)
- op = Hexagon::SA1_clrfnew;
- else if ((inst & SA1_clrt_MASK) == SA1_clrt_BITS)
- op = Hexagon::SA1_clrt;
- else if ((inst & SA1_clrtnew_MASK) == SA1_clrtnew_BITS)
- op = Hexagon::SA1_clrtnew;
- else if ((inst & SA1_cmpeqi_MASK) == SA1_cmpeqi_BITS)
- op = Hexagon::SA1_cmpeqi;
- else if ((inst & SA1_combine0i_MASK) == SA1_combine0i_BITS)
- op = Hexagon::SA1_combine0i;
- else if ((inst & SA1_combine1i_MASK) == SA1_combine1i_BITS)
- op = Hexagon::SA1_combine1i;
- else if ((inst & SA1_combine2i_MASK) == SA1_combine2i_BITS)
- op = Hexagon::SA1_combine2i;
- else if ((inst & SA1_combine3i_MASK) == SA1_combine3i_BITS)
- op = Hexagon::SA1_combine3i;
- else if ((inst & SA1_combinerz_MASK) == SA1_combinerz_BITS)
- op = Hexagon::SA1_combinerz;
- else if ((inst & SA1_combinezr_MASK) == SA1_combinezr_BITS)
- op = Hexagon::SA1_combinezr;
- else if ((inst & SA1_dec_MASK) == SA1_dec_BITS)
- op = Hexagon::SA1_dec;
- else if ((inst & SA1_inc_MASK) == SA1_inc_BITS)
- op = Hexagon::SA1_inc;
- else if ((inst & SA1_seti_MASK) == SA1_seti_BITS)
- op = Hexagon::SA1_seti;
- else if ((inst & SA1_setin1_MASK) == SA1_setin1_BITS)
- op = Hexagon::SA1_setin1;
- else if ((inst & SA1_sxtb_MASK) == SA1_sxtb_BITS)
- op = Hexagon::SA1_sxtb;
- else if ((inst & SA1_sxth_MASK) == SA1_sxth_BITS)
- op = Hexagon::SA1_sxth;
- else if ((inst & SA1_tfr_MASK) == SA1_tfr_BITS)
- op = Hexagon::SA1_tfr;
- else if ((inst & SA1_zxtb_MASK) == SA1_zxtb_BITS)
- op = Hexagon::SA1_zxtb;
- else if ((inst & SA1_zxth_MASK) == SA1_zxth_BITS)
- op = Hexagon::SA1_zxth;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_S1:
- if ((inst & SS1_storeb_io_MASK) == SS1_storeb_io_BITS)
- op = Hexagon::SS1_storeb_io;
- else if ((inst & SS1_storew_io_MASK) == SS1_storew_io_BITS)
- op = Hexagon::SS1_storew_io;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- case HexagonII::HSIG_S2:
- if ((inst & SS2_allocframe_MASK) == SS2_allocframe_BITS)
- op = Hexagon::SS2_allocframe;
- else if ((inst & SS2_storebi0_MASK) == SS2_storebi0_BITS)
- op = Hexagon::SS2_storebi0;
- else if ((inst & SS2_storebi1_MASK) == SS2_storebi1_BITS)
- op = Hexagon::SS2_storebi1;
- else if ((inst & SS2_stored_sp_MASK) == SS2_stored_sp_BITS)
- op = Hexagon::SS2_stored_sp;
- else if ((inst & SS2_storeh_io_MASK) == SS2_storeh_io_BITS)
- op = Hexagon::SS2_storeh_io;
- else if ((inst & SS2_storew_sp_MASK) == SS2_storew_sp_BITS)
- op = Hexagon::SS2_storew_sp;
- else if ((inst & SS2_storewi0_MASK) == SS2_storewi0_BITS)
- op = Hexagon::SS2_storewi0;
- else if ((inst & SS2_storewi1_MASK) == SS2_storewi1_BITS)
- op = Hexagon::SS2_storewi1;
- else {
- os << "<unknown subinstruction>";
- return MCDisassembler::Fail;
- }
- break;
- default:
- os << "<unknown>";
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
-}
-
-static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
- if (encoded_reg < 8)
- return Hexagon::R0 + encoded_reg;
- else if (encoded_reg < 16)
- return Hexagon::R0 + encoded_reg + 8;
-
- // patently false value
- return Hexagon::NoRegister;
-}
-
-static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
- if (encoded_dreg < 4)
- return Hexagon::D0 + encoded_dreg;
- else if (encoded_dreg < 8)
- return Hexagon::D0 + encoded_dreg + 4;
-
- // patently false value
- return Hexagon::NoRegister;
-}
-
-void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
- unsigned inst) const {
- int64_t operand;
- MCOperand Op;
- switch (opcode) {
- case Hexagon::SL2_deallocframe:
- case Hexagon::SL2_jumpr31:
- case Hexagon::SL2_jumpr31_f:
- case Hexagon::SL2_jumpr31_fnew:
- case Hexagon::SL2_jumpr31_t:
- case Hexagon::SL2_jumpr31_tnew:
- case Hexagon::SL2_return:
- case Hexagon::SL2_return_f:
- case Hexagon::SL2_return_fnew:
- case Hexagon::SL2_return_t:
- case Hexagon::SL2_return_tnew:
- // no operands for these instructions
- break;
- case Hexagon::SS2_allocframe:
- // u 8-4{5_3}
- operand = ((inst & 0x1f0) >> 4) << 3;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL1_loadri_io:
- // Rd 3-0, Rs 7-4, u 11-8{4_2}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf00) >> 6;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL1_loadrub_io:
- // Rd 3-0, Rs 7-4, u 11-8
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf00) >> 8;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadrb_io:
- // Rd 3-0, Rs 7-4, u 10-8
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0x700) >> 8;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadrh_io:
- case Hexagon::SL2_loadruh_io:
- // Rd 3-0, Rs 7-4, u 10-8{3_1}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x700) >> 8) << 1;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadrd_sp:
- // Rdd 2-0, u 7-3{5_3}
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x0f8) >> 3) << 3;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SL2_loadri_sp:
- // Rd 3-0, u 8-4{5_2}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x1f0) >> 4) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_addi:
- // Rx 3-0 (x2), s7 10-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- MI->addOperand(Op);
- operand = SignExtend64<7>((inst & 0x7f0) >> 4);
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_addrx:
- // Rx 3-0 (x2), Rs 7-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SA1_and1:
- case Hexagon::SA1_dec:
- case Hexagon::SA1_inc:
- case Hexagon::SA1_sxtb:
- case Hexagon::SA1_sxth:
- case Hexagon::SA1_tfr:
- case Hexagon::SA1_zxtb:
- case Hexagon::SA1_zxth:
- // Rd 3-0, Rs 7-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SA1_addsp:
- // Rd 3-0, u 9-4{6_2}
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x3f0) >> 4) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_seti:
- // Rd 3-0, u 9-4
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0x3f0) >> 4;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_clrf:
- case Hexagon::SA1_clrfnew:
- case Hexagon::SA1_clrt:
- case Hexagon::SA1_clrtnew:
- case Hexagon::SA1_setin1:
- // Rd 3-0
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- if (opcode == Hexagon::SA1_setin1)
- break;
- MI->addOperand(MCOperand::createReg(Hexagon::P0));
- break;
- case Hexagon::SA1_cmpeqi:
- // Rs 7-4, u 1-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = inst & 0x3;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_combine0i:
- case Hexagon::SA1_combine1i:
- case Hexagon::SA1_combine2i:
- case Hexagon::SA1_combine3i:
- // Rdd 2-0, u 6-5
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0x060) >> 5;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SA1_combinerz:
- case Hexagon::SA1_combinezr:
- // Rdd 2-0, Rs 7-4
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS1_storeb_io:
- // Rs 7-4, u 11-8, Rt 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf00) >> 8;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS1_storew_io:
- // Rs 7-4, u 11-8{4_2}, Rt 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0xf00) >> 8) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS2_storebi0:
- case Hexagon::SS2_storebi1:
- // Rs 7-4, u 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = inst & 0xf;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SS2_storewi0:
- case Hexagon::SS2_storewi1:
- // Rs 7-4, u 3-0{4_2}
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = (inst & 0xf) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- break;
- case Hexagon::SS2_stored_sp:
- // s 8-3{6_3}, Rtt 2-0
- operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getDRegFromSubinstEncoding(inst & 0x7);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS2_storeh_io:
- // Rs 7-4, u 10-8{3_1}, Rt 3-0
- operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- operand = ((inst & 0x700) >> 8) << 1;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- case Hexagon::SS2_storew_sp:
- // u 8-4{5_2}, Rd 3-0
- operand = ((inst & 0x1f0) >> 4) << 2;
- HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
- operand = getRegFromSubinstEncoding(inst & 0xf);
- Op = MCOperand::createReg(operand);
- MI->addOperand(Op);
- break;
- default:
- // don't crash with an invalid subinstruction
- // llvm_unreachable("Invalid subinstruction in duplex instruction");
- break;
- }
-}
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 0b2b46387b6a..4767165141a3 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -22,14 +22,12 @@ include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// Hexagon Architectures
-def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Hexagon V4">;
-def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Hexagon V5">;
-def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
-def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
+include "HexagonDepArch.td"
-def FeatureHVX: SubtargetFeature<"hvx", "UseHVXOps", "true",
+// Hexagon ISA Extensions
+def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps", "true",
"Hexagon HVX instructions">;
-def FeatureHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", "true",
+def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", "true",
"Hexagon HVX Double instructions">;
def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true",
"Use constant-extended calls">;
@@ -37,19 +35,14 @@ def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true",
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasV5T : Predicate<"HST->hasV5TOps()">;
-def NoV5T : Predicate<"!HST->hasV5TOps()">;
-def HasV55T : Predicate<"HST->hasV55TOps()">,
- AssemblerPredicate<"ArchV55">;
-def HasV60T : Predicate<"HST->hasV60TOps()">,
- AssemblerPredicate<"ArchV60">;
+
def UseMEMOP : Predicate<"HST->useMemOps()">;
def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
def UseHVXDbl : Predicate<"HST->useHVXDblOps()">,
- AssemblerPredicate<"FeatureHVXDbl">;
+ AssemblerPredicate<"ExtensionHVXDbl">;
def UseHVXSgl : Predicate<"HST->useHVXSglOps()">;
def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
- AssemblerPredicate<"FeatureHVX">;
+ AssemblerPredicate<"ExtensionHVX">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -81,7 +74,7 @@ class IntrinsicsRel;
def getPredOpcode : InstrMapping {
let FilterClass = "PredRel";
// Instructions with the same BaseOpcode and isNVStore values form a row.
- let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"];
+ let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isBrTaken", "isNT"];
// Instructions with the same predicate sense form a column.
let ColFields = ["PredSense"];
// The key column is the unpredicated instructions.
@@ -132,7 +125,7 @@ def getPredNewOpcode : InstrMapping {
//
def getPredOldOpcode : InstrMapping {
let FilterClass = "PredNewRel";
- let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+ let RowFields = ["BaseOpcode", "PredSense", "isNVStore", "isBrTaken"];
let ColFields = ["PNewValue"];
let KeyCol = ["new"];
let ValueCols = [[""]];
@@ -248,11 +241,18 @@ def getRealHWInstr : InstrMapping {
//===----------------------------------------------------------------------===//
include "HexagonSchedule.td"
include "HexagonRegisterInfo.td"
-include "HexagonCallingConv.td"
-include "HexagonInstrInfo.td"
+include "HexagonOperands.td"
+include "HexagonDepOperands.td"
+include "HexagonDepITypes.td"
+include "HexagonInstrFormats.td"
+include "HexagonDepInstrFormats.td"
+include "HexagonDepInstrInfo.td"
+include "HexagonPseudo.td"
include "HexagonPatterns.td"
+include "HexagonDepMappings.td"
include "HexagonIntrinsics.td"
include "HexagonIntrinsicsDerived.td"
+include "HexagonMapAsm2IntrinV62.gen.td"
def HexagonInstrInfo : InstrInfo;
@@ -271,7 +271,9 @@ def : Proc<"hexagonv5", HexagonModelV4,
def : Proc<"hexagonv55", HexagonModelV55,
[ArchV4, ArchV5, ArchV55]>;
def : Proc<"hexagonv60", HexagonModelV60,
- [ArchV4, ArchV5, ArchV55, ArchV60, FeatureHVX]>;
+ [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
+def : Proc<"hexagonv62", HexagonModelV62,
+ [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ExtensionHVX]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 54db5ad4374b..fda23f8f6b05 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -261,10 +261,34 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
return Sym;
}
+static MCInst ScaleVectorOffset(MCInst &Inst, unsigned OpNo,
+ unsigned VectorSize, MCContext &Ctx) {
+ MCInst T;
+ T.setOpcode(Inst.getOpcode());
+ for (unsigned i = 0, n = Inst.getNumOperands(); i != n; ++i) {
+ if (i != OpNo) {
+ T.addOperand(Inst.getOperand(i));
+ continue;
+ }
+ MCOperand &ImmOp = Inst.getOperand(i);
+ const auto *HE = static_cast<const HexagonMCExpr*>(ImmOp.getExpr());
+ int32_t V = cast<MCConstantExpr>(HE->getExpr())->getValue();
+ auto *NewCE = MCConstantExpr::create(V / int32_t(VectorSize), Ctx);
+ auto *NewHE = HexagonMCExpr::create(NewCE, Ctx);
+ T.addOperand(MCOperand::createExpr(NewHE));
+ }
+ return T;
+}
+
void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
const MachineInstr &MI) {
MCInst &MappedInst = static_cast <MCInst &>(Inst);
const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ unsigned VectorSize = HST.useHVXSglOps()
+ ? Hexagon::VectorRegsRegClass.getSize()
+ : Hexagon::VectorRegs128BRegClass.getSize();
switch (Inst.getOpcode()) {
default: return;
@@ -282,6 +306,36 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
break;
}
+ case Hexagon::A2_tfrf: {
+ Inst.setOpcode(Hexagon::A2_paddif);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_tfrt: {
+ Inst.setOpcode(Hexagon::A2_paddit);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_tfrfnew: {
+ Inst.setOpcode(Hexagon::A2_paddifnew);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_tfrtnew: {
+ Inst.setOpcode(Hexagon::A2_padditnew);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ break;
+ }
+
+ case Hexagon::A2_zxtb: {
+ Inst.setOpcode(Hexagon::A2_andir);
+ Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, OutContext)));
+ break;
+ }
+
// "$dst = CONST64(#$src1)",
case Hexagon::CONST64:
if (!OutStreamer->hasRawTextSupport()) {
@@ -376,6 +430,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI));
return;
}
+ case Hexagon::PS_call_nr:
+ Inst.setOpcode(Hexagon::J2_call);
+ break;
case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
MCOperand &MO = MappedInst.getOperand(2);
int64_t Imm;
@@ -564,6 +621,181 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
return;
}
+ case Hexagon::V6_vL32Ub_pi:
+ case Hexagon::V6_vL32b_cur_pi:
+ case Hexagon::V6_vL32b_nt_cur_pi:
+ case Hexagon::V6_vL32b_pi:
+ case Hexagon::V6_vL32b_nt_pi:
+ case Hexagon::V6_vL32b_nt_tmp_pi:
+ case Hexagon::V6_vL32b_tmp_pi:
+ case Hexagon::V6_vL32Ub_pi_128B:
+ case Hexagon::V6_vL32b_cur_pi_128B:
+ case Hexagon::V6_vL32b_nt_cur_pi_128B:
+ case Hexagon::V6_vL32b_pi_128B:
+ case Hexagon::V6_vL32b_nt_pi_128B:
+ case Hexagon::V6_vL32b_nt_tmp_pi_128B:
+ case Hexagon::V6_vL32b_tmp_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 3, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vL32Ub_ai:
+ case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vL32b_cur_ai:
+ case Hexagon::V6_vL32b_nt_ai:
+ case Hexagon::V6_vL32b_nt_cur_ai:
+ case Hexagon::V6_vL32b_nt_tmp_ai:
+ case Hexagon::V6_vL32b_tmp_ai:
+ case Hexagon::V6_vL32Ub_ai_128B:
+ case Hexagon::V6_vL32b_ai_128B:
+ case Hexagon::V6_vL32b_cur_ai_128B:
+ case Hexagon::V6_vL32b_nt_ai_128B:
+ case Hexagon::V6_vL32b_nt_cur_ai_128B:
+ case Hexagon::V6_vL32b_nt_tmp_ai_128B:
+ case Hexagon::V6_vL32b_tmp_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_pi:
+ case Hexagon::V6_vS32b_new_pi:
+ case Hexagon::V6_vS32b_nt_new_pi:
+ case Hexagon::V6_vS32b_nt_pi:
+ case Hexagon::V6_vS32b_pi:
+ case Hexagon::V6_vS32Ub_pi_128B:
+ case Hexagon::V6_vS32b_new_pi_128B:
+ case Hexagon::V6_vS32b_nt_new_pi_128B:
+ case Hexagon::V6_vS32b_nt_pi_128B:
+ case Hexagon::V6_vS32b_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_ai:
+ case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vS32b_new_ai:
+ case Hexagon::V6_vS32b_nt_ai:
+ case Hexagon::V6_vS32b_nt_new_ai:
+ case Hexagon::V6_vS32Ub_ai_128B:
+ case Hexagon::V6_vS32b_ai_128B:
+ case Hexagon::V6_vS32b_new_ai_128B:
+ case Hexagon::V6_vS32b_nt_ai_128B:
+ case Hexagon::V6_vS32b_nt_new_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 1, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vL32b_cur_npred_pi:
+ case Hexagon::V6_vL32b_cur_pred_pi:
+ case Hexagon::V6_vL32b_npred_pi:
+ case Hexagon::V6_vL32b_nt_cur_npred_pi:
+ case Hexagon::V6_vL32b_nt_cur_pred_pi:
+ case Hexagon::V6_vL32b_nt_npred_pi:
+ case Hexagon::V6_vL32b_nt_pred_pi:
+ case Hexagon::V6_vL32b_nt_tmp_npred_pi:
+ case Hexagon::V6_vL32b_nt_tmp_pred_pi:
+ case Hexagon::V6_vL32b_pred_pi:
+ case Hexagon::V6_vL32b_tmp_npred_pi:
+ case Hexagon::V6_vL32b_tmp_pred_pi:
+ case Hexagon::V6_vL32b_cur_npred_pi_128B:
+ case Hexagon::V6_vL32b_cur_pred_pi_128B:
+ case Hexagon::V6_vL32b_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_cur_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_cur_pred_pi_128B:
+ case Hexagon::V6_vL32b_nt_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_pred_pi_128B:
+ case Hexagon::V6_vL32b_nt_tmp_npred_pi_128B:
+ case Hexagon::V6_vL32b_nt_tmp_pred_pi_128B:
+ case Hexagon::V6_vL32b_pred_pi_128B:
+ case Hexagon::V6_vL32b_tmp_npred_pi_128B:
+ case Hexagon::V6_vL32b_tmp_pred_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 4, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vL32b_cur_npred_ai:
+ case Hexagon::V6_vL32b_cur_pred_ai:
+ case Hexagon::V6_vL32b_npred_ai:
+ case Hexagon::V6_vL32b_nt_cur_npred_ai:
+ case Hexagon::V6_vL32b_nt_cur_pred_ai:
+ case Hexagon::V6_vL32b_nt_npred_ai:
+ case Hexagon::V6_vL32b_nt_pred_ai:
+ case Hexagon::V6_vL32b_nt_tmp_npred_ai:
+ case Hexagon::V6_vL32b_nt_tmp_pred_ai:
+ case Hexagon::V6_vL32b_pred_ai:
+ case Hexagon::V6_vL32b_tmp_npred_ai:
+ case Hexagon::V6_vL32b_tmp_pred_ai:
+ case Hexagon::V6_vL32b_cur_npred_ai_128B:
+ case Hexagon::V6_vL32b_cur_pred_ai_128B:
+ case Hexagon::V6_vL32b_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_cur_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_cur_pred_ai_128B:
+ case Hexagon::V6_vL32b_nt_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_pred_ai_128B:
+ case Hexagon::V6_vL32b_nt_tmp_npred_ai_128B:
+ case Hexagon::V6_vL32b_nt_tmp_pred_ai_128B:
+ case Hexagon::V6_vL32b_pred_ai_128B:
+ case Hexagon::V6_vL32b_tmp_npred_ai_128B:
+ case Hexagon::V6_vL32b_tmp_pred_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 3, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_npred_pi:
+ case Hexagon::V6_vS32Ub_pred_pi:
+ case Hexagon::V6_vS32b_new_npred_pi:
+ case Hexagon::V6_vS32b_new_pred_pi:
+ case Hexagon::V6_vS32b_npred_pi:
+ case Hexagon::V6_vS32b_nqpred_pi:
+ case Hexagon::V6_vS32b_nt_new_npred_pi:
+ case Hexagon::V6_vS32b_nt_new_pred_pi:
+ case Hexagon::V6_vS32b_nt_npred_pi:
+ case Hexagon::V6_vS32b_nt_nqpred_pi:
+ case Hexagon::V6_vS32b_nt_pred_pi:
+ case Hexagon::V6_vS32b_nt_qpred_pi:
+ case Hexagon::V6_vS32b_pred_pi:
+ case Hexagon::V6_vS32b_qpred_pi:
+ case Hexagon::V6_vS32Ub_npred_pi_128B:
+ case Hexagon::V6_vS32Ub_pred_pi_128B:
+ case Hexagon::V6_vS32b_new_npred_pi_128B:
+ case Hexagon::V6_vS32b_new_pred_pi_128B:
+ case Hexagon::V6_vS32b_npred_pi_128B:
+ case Hexagon::V6_vS32b_nqpred_pi_128B:
+ case Hexagon::V6_vS32b_nt_new_npred_pi_128B:
+ case Hexagon::V6_vS32b_nt_new_pred_pi_128B:
+ case Hexagon::V6_vS32b_nt_npred_pi_128B:
+ case Hexagon::V6_vS32b_nt_nqpred_pi_128B:
+ case Hexagon::V6_vS32b_nt_pred_pi_128B:
+ case Hexagon::V6_vS32b_nt_qpred_pi_128B:
+ case Hexagon::V6_vS32b_pred_pi_128B:
+ case Hexagon::V6_vS32b_qpred_pi_128B:
+ MappedInst = ScaleVectorOffset(Inst, 3, VectorSize, OutContext);
+ return;
+
+ case Hexagon::V6_vS32Ub_npred_ai:
+ case Hexagon::V6_vS32Ub_pred_ai:
+ case Hexagon::V6_vS32b_new_npred_ai:
+ case Hexagon::V6_vS32b_new_pred_ai:
+ case Hexagon::V6_vS32b_npred_ai:
+ case Hexagon::V6_vS32b_nqpred_ai:
+ case Hexagon::V6_vS32b_nt_new_npred_ai:
+ case Hexagon::V6_vS32b_nt_new_pred_ai:
+ case Hexagon::V6_vS32b_nt_npred_ai:
+ case Hexagon::V6_vS32b_nt_nqpred_ai:
+ case Hexagon::V6_vS32b_nt_pred_ai:
+ case Hexagon::V6_vS32b_nt_qpred_ai:
+ case Hexagon::V6_vS32b_pred_ai:
+ case Hexagon::V6_vS32b_qpred_ai:
+ case Hexagon::V6_vS32Ub_npred_ai_128B:
+ case Hexagon::V6_vS32Ub_pred_ai_128B:
+ case Hexagon::V6_vS32b_new_npred_ai_128B:
+ case Hexagon::V6_vS32b_new_pred_ai_128B:
+ case Hexagon::V6_vS32b_npred_ai_128B:
+ case Hexagon::V6_vS32b_nqpred_ai_128B:
+ case Hexagon::V6_vS32b_nt_new_npred_ai_128B:
+ case Hexagon::V6_vS32b_nt_new_pred_ai_128B:
+ case Hexagon::V6_vS32b_nt_npred_ai_128B:
+ case Hexagon::V6_vS32b_nt_nqpred_ai_128B:
+ case Hexagon::V6_vS32b_nt_pred_ai_128B:
+ case Hexagon::V6_vS32b_nt_qpred_ai_128B:
+ case Hexagon::V6_vS32b_pred_ai_128B:
+ case Hexagon::V6_vS32b_qpred_ai_128B:
+ MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext);
+ return;
}
}
@@ -578,13 +810,9 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MI->isBundle()) {
const MachineBasicBlock* MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
- unsigned IgnoreCount = 0;
for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
- if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
- MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
- ++IgnoreCount;
- else
+ if (!MII->isDebugValue() && !MII->isImplicitDef())
HexagonLowerToMC(MCII, &*MII, MCB, *this);
}
else
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index fe7278fde1b1..61f290ca98d7 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -46,6 +46,17 @@ using namespace llvm;
static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
cl::init(true), cl::desc("Preserve subregisters in tied operands"));
+static cl::opt<bool> GenExtract("hexbit-extract", cl::Hidden,
+ cl::init(true), cl::desc("Generate extract instructions"));
+static cl::opt<bool> GenBitSplit("hexbit-bitsplit", cl::Hidden,
+ cl::init(true), cl::desc("Generate bitsplit instructions"));
+
+static cl::opt<unsigned> MaxExtract("hexbit-max-extract", cl::Hidden,
+ cl::init(UINT_MAX));
+static unsigned CountExtract = 0;
+static cl::opt<unsigned> MaxBitSplit("hexbit-max-bitsplit", cl::Hidden,
+ cl::init(UINT_MAX));
+static unsigned CountBitSplit = 0;
namespace llvm {
@@ -249,8 +260,6 @@ INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
RegisterSet &AVs) {
- MachineDomTreeNode *N = MDT->getNode(&B);
- typedef GraphTraits<MachineDomTreeNode*> GTN;
bool Changed = false;
if (T.TopDown)
@@ -262,10 +271,9 @@ bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
RegisterSet NewAVs = AVs;
NewAVs.insert(Defs);
- for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
- MachineBasicBlock *SB = (*I)->getBlock();
- Changed |= visitBlock(*SB, T, NewAVs);
- }
+ for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(&B)))
+ Changed |= visitBlock(*(DTN->getBlock()), T, NewAVs);
+
if (!T.TopDown)
Changed |= T.processBlock(B, AVs);
@@ -896,6 +904,7 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
*MRI.getTargetRegisterInfo());
auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {
+ (void)HRI;
assert(Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo) ||
Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi));
};
@@ -983,9 +992,9 @@ bool DeadCodeElimination::isDead(unsigned R) const {
bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
bool Changed = false;
- typedef GraphTraits<MachineDomTreeNode*> GTN;
- for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
- Changed |= runOnNode(*I);
+
+ for (auto *DTN : children<MachineDomTreeNode*>(N))
+ Changed |= runOnNode(DTN);
MachineBasicBlock *B = N->getBlock();
std::vector<MachineInstr*> Instrs;
@@ -1735,10 +1744,11 @@ namespace {
// This is by no means complete
class BitSimplification : public Transformation {
public:
- BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
- const HexagonRegisterInfo &hri, MachineRegisterInfo &mri,
- MachineFunction &mf)
- : Transformation(true), HII(hii), HRI(hri), MRI(mri), MF(mf), BT(bt) {}
+ BitSimplification(BitTracker &bt, const MachineDominatorTree &mdt,
+ const HexagonInstrInfo &hii, const HexagonRegisterInfo &hri,
+ MachineRegisterInfo &mri, MachineFunction &mf)
+ : Transformation(true), MDT(mdt), HII(hii), HRI(hri), MRI(mri),
+ MF(mf), BT(bt) {}
bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
@@ -1765,9 +1775,18 @@ namespace {
const BitTracker::RegisterCell &RC);
bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC);
+ bool genBitSplit(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
const BitTracker::RegisterCell &RC);
+ bool simplifyExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+ const BitTracker::RegisterCell &RC, const RegisterSet &AVs);
+
+ // Cache of created instructions to avoid creating duplicates.
+ // XXX Currently only used by genBitSplit.
+ std::vector<MachineInstr*> NewMIs;
+ const MachineDominatorTree &MDT;
const HexagonInstrInfo &HII;
const HexagonRegisterInfo &HRI;
MachineRegisterInfo &MRI;
@@ -2149,6 +2168,146 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
return false;
}
+bool BitSimplification::genBitSplit(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
+ const RegisterSet &AVs) {
+ if (!GenBitSplit)
+ return false;
+ if (CountBitSplit >= MaxBitSplit)
+ return false;
+
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::A4_bitsplit:
+ case Hexagon::A4_bitspliti:
+ return false;
+ }
+
+ unsigned W = RC.width();
+ if (W != 32)
+ return false;
+
+ auto ctlz = [] (const BitTracker::RegisterCell &C) -> unsigned {
+ unsigned Z = C.width();
+ while (Z > 0 && C[Z-1].is(0))
+ --Z;
+ return C.width() - Z;
+ };
+
+ // Count the number of leading zeros in the target RC.
+ unsigned Z = ctlz(RC);
+ if (Z == 0 || Z == W)
+ return false;
+
+ // A simplistic analysis: assume the source register (the one being split)
+ // is fully unknown, and that all its bits are self-references.
+ const BitTracker::BitValue &B0 = RC[0];
+ if (B0.Type != BitTracker::BitValue::Ref)
+ return false;
+
+ unsigned SrcR = B0.RefI.Reg;
+ unsigned SrcSR = 0;
+ unsigned Pos = B0.RefI.Pos;
+
+ // All the non-zero bits should be consecutive bits from the same register.
+ for (unsigned i = 1; i < W-Z; ++i) {
+ const BitTracker::BitValue &V = RC[i];
+ if (V.Type != BitTracker::BitValue::Ref)
+ return false;
+ if (V.RefI.Reg != SrcR || V.RefI.Pos != Pos+i)
+ return false;
+ }
+
+ // Now, find the other bitfield among AVs.
+ for (unsigned S = AVs.find_first(); S; S = AVs.find_next(S)) {
+ // The number of leading zeros here should be the number of trailing
+ // non-zeros in RC.
+ if (!BT.has(S))
+ continue;
+ const BitTracker::RegisterCell &SC = BT.lookup(S);
+ if (SC.width() != W || ctlz(SC) != W-Z)
+ continue;
+ // The Z lower bits should now match SrcR.
+ const BitTracker::BitValue &S0 = SC[0];
+ if (S0.Type != BitTracker::BitValue::Ref || S0.RefI.Reg != SrcR)
+ continue;
+ unsigned P = S0.RefI.Pos;
+
+ if (Pos <= P && (Pos + W-Z) != P)
+ continue;
+ if (P < Pos && (P + Z) != Pos)
+ continue;
+ // The starting bitfield position must be at a subregister boundary.
+ if (std::min(P, Pos) != 0 && std::min(P, Pos) != 32)
+ continue;
+
+ unsigned I;
+ for (I = 1; I < Z; ++I) {
+ const BitTracker::BitValue &V = SC[I];
+ if (V.Type != BitTracker::BitValue::Ref)
+ break;
+ if (V.RefI.Reg != SrcR || V.RefI.Pos != P+I)
+ break;
+ }
+ if (I != Z)
+ continue;
+
+ // Generate bitsplit where S is defined.
+ CountBitSplit++;
+ MachineInstr *DefS = MRI.getVRegDef(S);
+ assert(DefS != nullptr);
+ DebugLoc DL = DefS->getDebugLoc();
+ MachineBasicBlock &B = *DefS->getParent();
+ auto At = DefS->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(DefS);
+ if (MRI.getRegClass(SrcR)->getID() == Hexagon::DoubleRegsRegClassID)
+ SrcSR = (std::min(Pos, P) == 32) ? Hexagon::isub_hi : Hexagon::isub_lo;
+ if (!validateReg({SrcR,SrcSR}, Hexagon::A4_bitspliti, 1))
+ continue;
+ unsigned ImmOp = Pos <= P ? W-Z : Z;
+
+ // Find an existing bitsplit instruction if one already exists.
+ unsigned NewR = 0;
+ for (MachineInstr *In : NewMIs) {
+ if (In->getOpcode() != Hexagon::A4_bitspliti)
+ continue;
+ MachineOperand &Op1 = In->getOperand(1);
+ if (Op1.getReg() != SrcR || Op1.getSubReg() != SrcSR)
+ continue;
+ if (In->getOperand(2).getImm() != ImmOp)
+ continue;
+ // Check if the target register is available here.
+ MachineOperand &Op0 = In->getOperand(0);
+ MachineInstr *DefI = MRI.getVRegDef(Op0.getReg());
+ assert(DefI != nullptr);
+ if (!MDT.dominates(DefI, &*At))
+ continue;
+
+ // Found one that can be reused.
+ assert(Op0.getSubReg() == 0);
+ NewR = Op0.getReg();
+ break;
+ }
+ if (!NewR) {
+ NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+ auto NewBS = BuildMI(B, At, DL, HII.get(Hexagon::A4_bitspliti), NewR)
+ .addReg(SrcR, 0, SrcSR)
+ .addImm(ImmOp);
+ NewMIs.push_back(NewBS);
+ }
+ if (Pos <= P) {
+ HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_lo, MRI);
+ HBS::replaceRegWithSub(S, NewR, Hexagon::isub_hi, MRI);
+ } else {
+ HBS::replaceRegWithSub(S, NewR, Hexagon::isub_lo, MRI);
+ HBS::replaceRegWithSub(RD.Reg, NewR, Hexagon::isub_hi, MRI);
+ }
+ return true;
+ }
+
+ return false;
+}
+
// Check for tstbit simplification opportunity, where the bit being checked
// can be tracked back to another register. For example:
// vreg2 = S2_lsr_i_r vreg1, 5
@@ -2210,6 +2369,201 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
return false;
}
+// Detect whether RD is a bitfield extract (sign- or zero-extended) of
+// some register from the AVs set. Create a new corresponding instruction
+// at the location of MI. The intent is to recognize situations where
+// a sequence of instructions performs an operation that is equivalent to
+// an extract operation, such as a shift left followed by a shift right.
+bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
+ BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC,
+ const RegisterSet &AVs) {
+ if (!GenExtract)
+ return false;
+ if (CountExtract >= MaxExtract)
+ return false;
+ CountExtract++;
+
+ unsigned W = RC.width();
+ unsigned RW = W;
+ unsigned Len;
+ bool Signed;
+
+ // The code is mostly class-independent, except for the part that generates
+ // the extract instruction, and establishes the source register (in case it
+ // needs to use a subregister).
+ const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+ if (FRC != &Hexagon::IntRegsRegClass && FRC != &Hexagon::DoubleRegsRegClass)
+ return false;
+ assert(RD.Sub == 0);
+
+ // Observation:
+ // If the cell has a form of 00..0xx..x with k zeros and n remaining
+ // bits, this could be an extractu of the n bits, but it could also be
+ // an extractu of a longer field which happens to have 0s in the top
+ // bit positions.
+ // The same logic applies to sign-extended fields.
+ //
+ // Do not check for the extended extracts, since it would expand the
+ // search space quite a bit. The search may be expensive as it is.
+
+ const BitTracker::BitValue &TopV = RC[W-1];
+
+ // Eliminate candidates that have self-referential bits, since they
+ // cannot be extracts from other registers. Also, skip registers that
+ // have compile-time constant values.
+ bool IsConst = true;
+ for (unsigned I = 0; I != W; ++I) {
+ const BitTracker::BitValue &V = RC[I];
+ if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == RD.Reg)
+ return false;
+ IsConst = IsConst && (V.is(0) || V.is(1));
+ }
+ if (IsConst)
+ return false;
+
+ if (TopV.is(0) || TopV.is(1)) {
+ bool S = TopV.is(1);
+ for (--W; W > 0 && RC[W-1].is(S); --W)
+ ;
+ Len = W;
+ Signed = S;
+ // The sign bit must be a part of the field being extended.
+ if (Signed)
+ ++Len;
+ } else {
+ // This could still be a sign-extended extract.
+ assert(TopV.Type == BitTracker::BitValue::Ref);
+ if (TopV.RefI.Reg == RD.Reg || TopV.RefI.Pos == W-1)
+ return false;
+ for (--W; W > 0 && RC[W-1] == TopV; --W)
+ ;
+ // The top bits of RC are copies of TopV. One occurrence of TopV will
+ // be a part of the field.
+ Len = W + 1;
+ Signed = true;
+ }
+
+ // This would be just a copy. It should be handled elsewhere.
+ if (Len == RW)
+ return false;
+
+ DEBUG({
+ dbgs() << __func__ << " on reg: " << PrintReg(RD.Reg, &HRI, RD.Sub)
+ << ", MI: " << *MI;
+ dbgs() << "Cell: " << RC << '\n';
+ dbgs() << "Expected bitfield size: " << Len << " bits, "
+ << (Signed ? "sign" : "zero") << "-extended\n";
+ });
+
+ bool Changed = false;
+
+ for (unsigned R = AVs.find_first(); R != 0; R = AVs.find_next(R)) {
+ if (!BT.has(R))
+ continue;
+ const BitTracker::RegisterCell &SC = BT.lookup(R);
+ unsigned SW = SC.width();
+
+ // The source can be longer than the destination, as long as its size is
+ // a multiple of the size of the destination. Also, we would need to be
+ // able to refer to the subregister in the source that would be of the
+ // same size as the destination, but only check the sizes here.
+ if (SW < RW || (SW % RW) != 0)
+ continue;
+
+ // The field can start at any offset in SC as long as it contains Len
+ // bits and does not cross subregister boundary (if the source register
+ // is longer than the destination).
+ unsigned Off = 0;
+ while (Off <= SW-Len) {
+ unsigned OE = (Off+Len)/RW;
+ if (OE != Off/RW) {
+ // The assumption here is that if the source (R) is longer than the
+ // destination, then the destination is a sequence of words of
+ // size RW, and each such word in R can be accessed via a subregister.
+ //
+ // If the beginning and the end of the field cross the subregister
+ // boundary, advance to the next subregister.
+ Off = OE*RW;
+ continue;
+ }
+ if (HBS::isEqual(RC, 0, SC, Off, Len))
+ break;
+ ++Off;
+ }
+
+ if (Off > SW-Len)
+ continue;
+
+ // Found match.
+ unsigned ExtOpc = 0;
+ if (Off == 0) {
+ if (Len == 8)
+ ExtOpc = Signed ? Hexagon::A2_sxtb : Hexagon::A2_zxtb;
+ else if (Len == 16)
+ ExtOpc = Signed ? Hexagon::A2_sxth : Hexagon::A2_zxth;
+ else if (Len < 10 && !Signed)
+ ExtOpc = Hexagon::A2_andir;
+ }
+ if (ExtOpc == 0) {
+ ExtOpc =
+ Signed ? (RW == 32 ? Hexagon::S4_extract : Hexagon::S4_extractp)
+ : (RW == 32 ? Hexagon::S2_extractu : Hexagon::S2_extractup);
+ }
+ unsigned SR = 0;
+ // This only recognizes isub_lo and isub_hi.
+ if (RW != SW && RW*2 != SW)
+ continue;
+ if (RW != SW)
+ SR = (Off/RW == 0) ? Hexagon::isub_lo : Hexagon::isub_hi;
+ Off = Off % RW;
+
+ if (!validateReg({R,SR}, ExtOpc, 1))
+ continue;
+
+ // Don't generate the same instruction as the one being optimized.
+ if (MI->getOpcode() == ExtOpc) {
+ // All possible ExtOpc's have the source in operand(1).
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ if (SrcOp.getReg() == R)
+ continue;
+ }
+
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock &B = *MI->getParent();
+ unsigned NewR = MRI.createVirtualRegister(FRC);
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ auto MIB = BuildMI(B, At, DL, HII.get(ExtOpc), NewR)
+ .addReg(R, 0, SR);
+ switch (ExtOpc) {
+ case Hexagon::A2_sxtb:
+ case Hexagon::A2_zxtb:
+ case Hexagon::A2_sxth:
+ case Hexagon::A2_zxth:
+ break;
+ case Hexagon::A2_andir:
+ MIB.addImm((1u << Len) - 1);
+ break;
+ case Hexagon::S4_extract:
+ case Hexagon::S2_extractu:
+ case Hexagon::S4_extractp:
+ case Hexagon::S2_extractup:
+ MIB.addImm(Len)
+ .addImm(Off);
+ break;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+
+ HBS::replaceReg(RD.Reg, NewR, MRI);
+ BT.put(BitTracker::RegisterRef(NewR), RC);
+ Changed = true;
+ break;
+ }
+
+ return Changed;
+}
+
bool BitSimplification::processBlock(MachineBasicBlock &B,
const RegisterSet &AVs) {
if (!BT.reached(&B))
@@ -2247,12 +2601,15 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
bool T = genPackhl(MI, RD, RC);
+ T = T || simplifyExtractLow(MI, RD, RC, AVB);
Changed |= T;
continue;
}
if (FRC->getID() == Hexagon::IntRegsRegClassID) {
- bool T = genExtractHalf(MI, RD, RC);
+ bool T = genBitSplit(MI, RD, RC, AVB);
+ T = T || simplifyExtractLow(MI, RD, RC, AVB);
+ T = T || genExtractHalf(MI, RD, RC);
T = T || genCombineHalf(MI, RD, RC);
T = T || genExtractLow(MI, RD, RC);
Changed |= T;
@@ -2313,7 +2670,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
BT.run();
RegisterSet ABS; // Available registers for BS.
- BitSimplification BitS(BT, HII, HRI, MRI, MF);
+ BitSimplification BitS(BT, *MDT, HII, HRI, MRI, MF);
Changed |= visitBlock(Entry, BitS, ABS);
Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
@@ -2599,7 +2956,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
const MachineOperand &Op = SI->getOperand(j);
if (!Op.isReg()) {
- MIB.addOperand(Op);
+ MIB.add(Op);
continue;
}
if (!Op.isUse())
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index 436f88dcd450..90ccecb6629a 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -74,7 +74,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
// Module::AnyPointerSize.
if (Width == 0 || Width > 64)
break;
- AttributeSet Attrs = F.getAttributes();
+ AttributeList Attrs = F.getAttributes();
if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal))
continue;
InPhysReg = getNextPhysReg(InPhysReg, Width);
@@ -272,6 +272,9 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
// cases below.
uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0;
+ // Register id of the 0th operand. It can be 0.
+ unsigned Reg0 = Reg[0].Reg;
+
switch (Opc) {
// Transfer immediate:
@@ -792,6 +795,17 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
case A2_zxth:
return rr0(eZXT(rc(1), 16), Outputs);
+ // Saturations
+
+ case A2_satb:
+ return rr0(eSXT(RegisterCell::self(0, W0).regify(Reg0), 8), Outputs);
+ case A2_sath:
+ return rr0(eSXT(RegisterCell::self(0, W0).regify(Reg0), 16), Outputs);
+ case A2_satub:
+ return rr0(eZXT(RegisterCell::self(0, W0).regify(Reg0), 8), Outputs);
+ case A2_satuh:
+ return rr0(eZXT(RegisterCell::self(0, W0).regify(Reg0), 16), Outputs);
+
// Bit count:
case S2_cl0:
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp
index adc213c3d438..1640b40c164f 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -219,8 +219,7 @@ HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf)
TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()),
Reserved(TRI.getReservedRegs(mf)) {
// Consider all non-allocatable registers as reserved.
- for (auto I = TRI.regclass_begin(), E = TRI.regclass_end(); I != E; ++I) {
- auto *RC = *I;
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
if (RC->isAllocatable())
continue;
for (unsigned R : *RC)
@@ -233,14 +232,16 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns(
const TargetRegisterInfo &TRI) {
RegisterSet LiveIns;
RegisterSet Tmp;
+
for (auto I : B.liveins()) {
- if (I.LaneMask.all()) {
- Tmp.insert({I.PhysReg,0});
+ MCSubRegIndexIterator S(I.PhysReg, &TRI);
+ if (I.LaneMask.all() || (I.LaneMask.any() && !S.isValid())) {
+ Tmp.insert({I.PhysReg, 0});
continue;
}
- for (MCSubRegIndexIterator S(I.PhysReg, &TRI); S.isValid(); ++S) {
- LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
- if ((M & I.LaneMask).any())
+ for (; S.isValid(); ++S) {
+ unsigned SI = S.getSubRegIndex();
+ if ((I.LaneMask & TRI.getSubRegIndexLaneMask(SI)).any())
Tmp.insert({S.getSubReg(), 0});
}
}
@@ -307,6 +308,8 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
LastUse[R] = LastDef[R] = IndexType::None;
};
+ RegisterSet Defs, Clobbers;
+
for (auto &In : B) {
if (In.isDebugValue())
continue;
@@ -325,19 +328,67 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
closeRange(S);
}
}
- // Process defs.
+ // Process defs and clobbers.
+ Defs.clear();
+ Clobbers.clear();
for (auto &Op : In.operands()) {
if (!Op.isReg() || !Op.isDef() || Op.isUndef())
continue;
RegisterRef R = { Op.getReg(), Op.getSubReg() };
- if (TargetRegisterInfo::isPhysicalRegister(R.Reg) && Reserved[R.Reg])
- continue;
for (auto S : expandToSubRegs(R, MRI, TRI)) {
- if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
- closeRange(S);
- LastDef[S] = Index;
+ if (TargetRegisterInfo::isPhysicalRegister(S.Reg) && Reserved[S.Reg])
+ continue;
+ if (Op.isDead())
+ Clobbers.insert(S);
+ else
+ Defs.insert(S);
+ }
+ }
+
+ for (auto &Op : In.operands()) {
+ if (!Op.isRegMask())
+ continue;
+ const uint32_t *BM = Op.getRegMask();
+ for (unsigned PR = 1, N = TRI.getNumRegs(); PR != N; ++PR) {
+ // Skip registers that have subregisters. A register is preserved
+ // iff its bit is set in the regmask, so if R1:0 was preserved, both
+ // R1 and R0 would also be present.
+ if (MCSubRegIterator(PR, &TRI, false).isValid())
+ continue;
+ if (Reserved[PR])
+ continue;
+ if (BM[PR/32] & (1u << (PR%32)))
+ continue;
+ RegisterRef R = { PR, 0 };
+ if (!Defs.count(R))
+ Clobbers.insert(R);
}
}
+ // Defs and clobbers can overlap, e.g.
+ // %D0<def,dead> = COPY %vreg5, %R0<imp-def>, %R1<imp-def>
+ for (RegisterRef R : Defs)
+ Clobbers.erase(R);
+
+ // Update maps for defs.
+ for (RegisterRef S : Defs) {
+ // Defs should already be expanded into subregs.
+ assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) ||
+ !MCSubRegIterator(S.Reg, &TRI, false).isValid());
+ if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
+ closeRange(S);
+ LastDef[S] = Index;
+ }
+ // Update maps for clobbers.
+ for (RegisterRef S : Clobbers) {
+ // Clobbers should already be expanded into subregs.
+ assert(!TargetRegisterInfo::isPhysicalRegister(S.Reg) ||
+ !MCSubRegIterator(S.Reg, &TRI, false).isValid());
+ if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
+ closeRange(S);
+ // Create a single-instruction range.
+ LastDef[S] = LastUse[S] = Index;
+ closeRange(S);
+ }
}
// Collect live-on-exit.
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
deleted file mode 100644
index e61b2a7a58ac..000000000000
--- a/lib/Target/Hexagon/HexagonCallingConv.td
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the Hexagon architectures.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
-//===----------------------------------------------------------------------===//
-
-// Hexagon 32-bit C return-value convention.
-def RetCC_Hexagon32 : CallingConv<[
- CCIfType<[i32, f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
- CCIfType<[i64, f64], CCAssignToReg<[D0, D1, D2]>>,
-
- // Alternatively, they are assigned to the stack in 4-byte aligned units.
- CCAssignToStack<4, 4>
-]>;
-
-// Hexagon 32-bit C Calling convention.
-def CC_Hexagon32 : CallingConv<[
- // All arguments get passed in integer registers if there is space.
- CCIfType<[f32, i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
- CCIfType<[f64, i64], CCAssignToReg<[D0, D1, D2]>>,
-
- // Alternatively, they are assigned to the stack in 4-byte aligned units.
- CCAssignToStack<4, 4>
-]>;
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 489da6be923d..a07ba77e6f3e 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -315,11 +315,8 @@ void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
// visited".
Order.push_back(Root);
- DomTreeNode *DTN = DT->getNode(Root);
- typedef GraphTraits<DomTreeNode*> GTN;
- typedef GTN::ChildIteratorType Iter;
- for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
- getBlockTraversalOrder((*I)->getBlock(), Order);
+ for (auto *DTN : children<DomTreeNode*>(DT->getNode(Root)))
+ getBlockTraversalOrder(DTN->getBlock(), Order);
}
bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
@@ -1235,11 +1232,8 @@ void HexagonCommonGEP::removeDeadCode() {
for (unsigned i = 0; i < BO.size(); ++i) {
BasicBlock *B = cast<BasicBlock>(BO[i]);
- DomTreeNode *N = DT->getNode(B);
- typedef GraphTraits<DomTreeNode*> GTN;
- typedef GTN::ChildIteratorType Iter;
- for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
- BO.push_back((*I)->getBlock());
+ for (auto DTN : children<DomTreeNode*>(DT->getNode(B)))
+ BO.push_back(DTN->getBlock());
}
for (unsigned i = BO.size(); i > 0; --i) {
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 36080997ec6b..8118c8eb149d 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -440,22 +440,28 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
// Put instructions that last defined integer or double registers into the
// map.
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || !Op.isDef() || !Op.getReg())
- continue;
- unsigned Reg = Op.getReg();
- if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- LastDef[*SubRegs] = &MI;
- }
- } else if (Hexagon::IntRegsRegClass.contains(Reg))
- LastDef[Reg] = &MI;
+ for (MachineOperand &Op : MI.operands()) {
+ if (Op.isReg()) {
+ if (!Op.isDef() || !Op.getReg())
+ continue;
+ unsigned Reg = Op.getReg();
+ if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+ LastDef[*SubRegs] = &MI;
+ } else if (Hexagon::IntRegsRegClass.contains(Reg))
+ LastDef[Reg] = &MI;
+ } else if (Op.isRegMask()) {
+ for (unsigned Reg : Hexagon::IntRegsRegClass)
+ if (Op.clobbersPhysReg(Reg))
+ LastDef[Reg] = &MI;
+ }
}
}
}
bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
if (IsCombinesDisabled) return false;
diff --git a/lib/Target/Hexagon/HexagonDepArch.h b/lib/Target/Hexagon/HexagonDepArch.h
new file mode 100644
index 000000000000..1009aa39cefb
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepArch.h
@@ -0,0 +1,10 @@
+//===--- HexagonDepArch.h -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+enum HexagonArchEnum { V4,V5,V55,V60,V62 };
diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td
new file mode 100644
index 000000000000..5b1d02c136f0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepArch.td
@@ -0,0 +1,19 @@
+//===--- HexagonDepArch.td ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "V62", "Enable Hexagon V62 architecture">;
+def HasV62T : Predicate<"HST->hasV62TOps()">, AssemblerPredicate<"ArchV62">;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Enable Hexagon V60 architecture">;
+def HasV60T : Predicate<"HST->hasV60TOps()">, AssemblerPredicate<"ArchV60">;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Enable Hexagon V55 architecture">;
+def HasV55T : Predicate<"HST->hasV55TOps()">, AssemblerPredicate<"ArchV55">;
+def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Enable Hexagon V4 architecture">;
+def HasV4T : Predicate<"HST->hasV4TOps()">, AssemblerPredicate<"ArchV4">;
+def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Enable Hexagon V5 architecture">;
+def HasV5T : Predicate<"HST->hasV5TOps()">, AssemblerPredicate<"ArchV5">;
diff --git a/lib/Target/Hexagon/HexagonDepDecoders.h b/lib/Target/Hexagon/HexagonDepDecoders.h
new file mode 100644
index 000000000000..aa9787ecf0c8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepDecoders.h
@@ -0,0 +1,64 @@
+//===--- HexagonDepDecoders.h ---------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<4>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<14>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<8>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<7>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<12>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<3>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<13>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<9>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<5>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
+static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
+ uint64_t, const void *Decoder) {
+ signedDecoder<6>(MI, tmp, Decoder);
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/Hexagon/HexagonDepITypes.h b/lib/Target/Hexagon/HexagonDepITypes.h
new file mode 100644
index 000000000000..f8ae39a37994
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepITypes.h
@@ -0,0 +1,53 @@
+//===--- HexagonDepITypes.h -----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace HexagonII {
+enum Type {
+ TypeALU32_2op = 0,
+ TypeALU32_3op = 1,
+ TypeALU32_ADDI = 2,
+ TypeALU64 = 3,
+ TypeCJ = 4,
+ TypeCOPROC_VMEM = 5,
+ TypeCR = 7,
+ TypeCVI_HIST = 10,
+ TypeCVI_VA = 16,
+ TypeCVI_VA_DV = 17,
+ TypeCVI_VINLANESAT = 18,
+ TypeCVI_VM_CUR_LD = 19,
+ TypeCVI_VM_LD = 20,
+ TypeCVI_VM_NEW_ST = 21,
+ TypeCVI_VM_ST = 22,
+ TypeCVI_VM_STU = 23,
+ TypeCVI_VM_TMP_LD = 24,
+ TypeCVI_VM_VP_LDU = 25,
+ TypeCVI_VP = 26,
+ TypeCVI_VP_VS = 27,
+ TypeCVI_VS = 28,
+ TypeCVI_VX = 30,
+ TypeCVI_VX_DV = 31,
+ TypeDUPLEX = 32,
+ TypeENDLOOP = 33,
+ TypeEXTENDER = 34,
+ TypeJ = 35,
+ TypeLD = 36,
+ TypeM = 37,
+ TypeMAPPING = 38,
+ TypeNCJ = 39,
+ TypePSEUDO = 40,
+ TypeST = 41,
+ TypeSUBINSN = 42,
+ TypeS_2op = 43,
+ TypeS_3op = 44,
+ TypeV2LDST = 47,
+ TypeV4LDST = 48
+};
+}
+}
diff --git a/lib/Target/Hexagon/HexagonDepITypes.td b/lib/Target/Hexagon/HexagonDepITypes.td
new file mode 100644
index 000000000000..f1d689ce12f4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepITypes.td
@@ -0,0 +1,48 @@
+//===--- HexagonDepITypes.td ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class IType<bits<6> t> { bits<6> Value = t; }
+def TypeALU32_2op : IType<0>;
+def TypeALU32_3op : IType<1>;
+def TypeALU32_ADDI : IType<2>;
+def TypeALU64 : IType<3>;
+def TypeCJ : IType<4>;
+def TypeCOPROC_VMEM : IType<5>;
+def TypeCR : IType<7>;
+def TypeCVI_HIST : IType<10>;
+def TypeCVI_VA : IType<16>;
+def TypeCVI_VA_DV : IType<17>;
+def TypeCVI_VINLANESAT : IType<18>;
+def TypeCVI_VM_CUR_LD : IType<19>;
+def TypeCVI_VM_LD : IType<20>;
+def TypeCVI_VM_NEW_ST : IType<21>;
+def TypeCVI_VM_ST : IType<22>;
+def TypeCVI_VM_STU : IType<23>;
+def TypeCVI_VM_TMP_LD : IType<24>;
+def TypeCVI_VM_VP_LDU : IType<25>;
+def TypeCVI_VP : IType<26>;
+def TypeCVI_VP_VS : IType<27>;
+def TypeCVI_VS : IType<28>;
+def TypeCVI_VX : IType<30>;
+def TypeCVI_VX_DV : IType<31>;
+def TypeDUPLEX : IType<32>;
+def TypeENDLOOP : IType<33>;
+def TypeEXTENDER : IType<34>;
+def TypeJ : IType<35>;
+def TypeLD : IType<36>;
+def TypeM : IType<37>;
+def TypeMAPPING : IType<38>;
+def TypeNCJ : IType<39>;
+def TypePSEUDO : IType<40>;
+def TypeST : IType<41>;
+def TypeSUBINSN : IType<42>;
+def TypeS_2op : IType<43>;
+def TypeS_3op : IType<44>;
+def TypeV2LDST : IType<47>;
+def TypeV4LDST : IType<48>;
diff --git a/lib/Target/Hexagon/HexagonDepInstrFormats.td b/lib/Target/Hexagon/HexagonDepInstrFormats.td
new file mode 100644
index 000000000000..d7a99f48803b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -0,0 +1,4182 @@
+//===--- HexagonDepInstrFormats.td ----------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class Enc_12122225 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <3> Qd8;
+ let Inst{2-0} = Qd8{2-0};
+}
+class Enc_16626097 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_13397056 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7315939 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{24-22} = n1{4-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_15275738 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12822813 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_10282127 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_14264243 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{11-8} = Rt16{3-0};
+}
+class Enc_6778937 : OpcodeHexagon {
+ bits <5> Rxx32;
+ let Inst{20-16} = Rxx32{4-0};
+ bits <0> sgp10;
+}
+class Enc_5480539 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_11422009 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_16357011 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{8-4} = Vv32{4-0};
+ bits <5> Vt32;
+ let Inst{13-9} = Vt32{4-0};
+ bits <4> Vdd16;
+ let Inst{3-0} = Vdd16{3-0};
+}
+class Enc_4975051 : OpcodeHexagon {
+ bits <19> Ii;
+ let Inst{26-25} = Ii{18-17};
+ let Inst{20-16} = Ii{16-12};
+ let Inst{13-5} = Ii{11-3};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_14786238 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_15472748 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_6773159 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_12535811 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_14007201 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <8> II;
+ let Inst{22-16} = II{7-1};
+ let Inst{13-13} = II{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_2577026 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{2-0} = Qt8{2-0};
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_7305764 : OpcodeHexagon {
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+}
+class Enc_11682941 : OpcodeHexagon {
+ bits <19> Ii;
+ let Inst{26-25} = Ii{18-17};
+ let Inst{20-16} = Ii{16-12};
+ let Inst{13-13} = Ii{11-11};
+ let Inst{7-0} = Ii{10-3};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_16376009 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13249928 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_1971351 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13715847 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_13303422 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14574598 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_13094118 : OpcodeHexagon {
+ bits <5> Css32;
+ let Inst{20-16} = Css32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4231995 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_844699 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-22} = n1{2-0};
+}
+class Enc_8752140 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7978128 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+}
+class Enc_10492541 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_0 : OpcodeHexagon {
+}
+class Enc_15733946 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_738356 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_14400220 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{9-5} = Ii{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_15194851 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_14172170 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_10065510 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14998517 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <3> n1;
+ let Inst{29-29} = n1{2-2};
+ let Inst{26-25} = n1{1-0};
+}
+class Enc_16657398 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_14620934 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_10075393 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_8638014 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13261538 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_8990840 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_5974204 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_4711514 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_11492529 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9277990 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_6690615 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-4} = Ii{6-2};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_1220199 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_7785569 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-22} = n1{4-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_2880796 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_6858527 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{4-0} = Vv32{4-0};
+}
+class Enc_11863656 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_151014 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <2> Px4;
+ let Inst{6-5} = Px4{1-0};
+}
+class Enc_10333841 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_14044877 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-3} = Ii{4-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_13691337 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <2> Qx4;
+ let Inst{6-5} = Qx4{1-0};
+}
+class Enc_3817033 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <3> Qt8;
+ let Inst{10-8} = Qt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_3540372 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_5200852 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_15949334 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_3831744 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_8280533 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_10969213 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_3974695 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{10-4} = Ii{6-0};
+ bits <4> Rx16;
+ let Inst{3-0} = Rx16{3-0};
+}
+class Enc_7255914 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7212930 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12781442 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_799555 : OpcodeHexagon {
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_11083408 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_900013 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_9487067 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{19-16} = Ii{11-8};
+ let Inst{12-5} = Ii{7-0};
+ bits <2> Pu4;
+ let Inst{22-21} = Pu4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16014536 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_12419313 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-23} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_5503430 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_14767681 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_9093094 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <8> II;
+ let Inst{22-16} = II{7-1};
+ let Inst{13-13} = II{0-0};
+ bits <2> Pu4;
+ let Inst{24-23} = Pu4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11542684 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{27-21} = Ii{15-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8877260 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_1737833 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-3} = Ii{4-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_255516 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_10721363 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_7076358 : OpcodeHexagon {
+ bits <5> Zdd8;
+ let Inst{4-0} = Zdd8{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11930928 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2410156 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_6735062 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_7965855 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_5202340 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vyy32;
+ let Inst{4-0} = Vyy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10568534 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <2> Pu4;
+ let Inst{22-21} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16730127 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11224149 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{13-13} = Ii{7-7};
+ let Inst{7-3} = Ii{6-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_9772987 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{4-0} = Rtt32{4-0};
+}
+class Enc_9238139 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Zdd8;
+ let Inst{4-0} = Zdd8{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2082775 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_5790679 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{12-8} = Ii{8-4};
+ let Inst{4-3} = Ii{3-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_9305257 : OpcodeHexagon {
+ bits <5> Zu8;
+ let Inst{12-8} = Zu8{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_3735566 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12654528 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{4-0} = Vvv32{4-0};
+}
+class Enc_15290236 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_11139981 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_15546666 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{10-8} = Ii{8-6};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_486163 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2079016 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{1-0} = Ii{1-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_10095813 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_13133322 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_9422954 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_10642833 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14989332 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{4-0} = Vv32{4-0};
+}
+class Enc_10263630 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_13937564 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_7171569 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_2702036 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_1928953 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_5853469 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_7692963 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_15140689 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_748676 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_3372766 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7900405 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11930027 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+}
+class Enc_971574 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <6> II;
+ let Inst{23-23} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_13453446 : OpcodeHexagon {
+ bits <24> Ii;
+ let Inst{24-16} = Ii{23-15};
+ let Inst{13-1} = Ii{14-2};
+}
+class Enc_6356866 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_16246706 : OpcodeHexagon {
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_5326450 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11687333 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_2771456 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11282123 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_518319 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16104442 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_7912540 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_15560488 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7581852 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_10030031 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_3915770 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4075554 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11326438 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4050532 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_14461004 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13344657 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13114546 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_14530015 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-23} = n1{4-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_5967898 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_15450971 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <6> n1;
+ let Inst{28-28} = n1{5-5};
+ let Inst{25-22} = n1{4-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_15536400 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{3-0} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_1291652 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{8-8} = Ii{0-0};
+}
+class Enc_5636753 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+}
+class Enc_5757366 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_9752128 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13618890 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_5890213 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_5582416 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_13536408 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{3-0} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+}
+class Enc_9773189 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rxx32;
+ let Inst{12-8} = Rxx32{4-0};
+}
+class Enc_2152247 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_12848507 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_16279406 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_1734121 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{10-8} = Ii{3-1};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_766909 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_4527648 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_8849208 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_9894557 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <6> II;
+ let Inst{23-21} = II{5-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4109168 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+}
+class Enc_14560494 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9773167 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_1898420 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+}
+class Enc_11498120 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_15459921 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10058269 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_10197700 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_12608570 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-5} = Ii{9-1};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4804090 : OpcodeHexagon {
+ bits <6> Ss64;
+ let Inst{21-16} = Ss64{5-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14973146 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_5718302 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <2> Pe4;
+ let Inst{6-5} = Pe4{1-0};
+}
+class Enc_2103742 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_7564330 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_2176383 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{9-4} = Ii{5-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_7736768 : OpcodeHexagon {
+ bits <12> Ii;
+ let Inst{26-25} = Ii{11-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_13189194 : OpcodeHexagon {
+ bits <1> Ii;
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_5154851 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_1329520 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Cdd32;
+ let Inst{4-0} = Cdd32{4-0};
+}
+class Enc_14057553 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9223889 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_10979813 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{13-13} = Ii{6-6};
+ let Inst{7-3} = Ii{5-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_13490067 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{2-0} = Qt8{2-0};
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_10076500 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_163381 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-5} = Ii{11-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_10328975 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_14939491 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_8891794 : OpcodeHexagon {
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_7723767 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_2639299 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{11-8} = Rd16{3-0};
+}
+class Enc_11552785 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11849200 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_14868535 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{23-22} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <2> Pu4;
+ let Inst{9-8} = Pu4{1-0};
+}
+class Enc_48594 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6608821 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_11049656 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-3} = Ii{7-3};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_117962 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{23-21} = Ii{7-5};
+ let Inst{13-13} = Ii{4-4};
+ let Inst{7-5} = Ii{3-1};
+ let Inst{3-3} = Ii{0-0};
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_5900401 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_36641 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_9626139 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_11971407 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_9852473 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_6495334 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_1186018 : OpcodeHexagon {
+ bits <17> Ii;
+ let Inst{26-25} = Ii{16-15};
+ let Inst{20-16} = Ii{14-10};
+ let Inst{13-13} = Ii{9-9};
+ let Inst{7-0} = Ii{8-1};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_15999208 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_11477246 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_7971062 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{23-22} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4327792 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_10326434 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1572239 : OpcodeHexagon {
+ bits <2> Qt4;
+ let Inst{6-5} = Qt4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_6372758 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_15793331 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_11424254 : OpcodeHexagon {
+ bits <2> Qt4;
+ let Inst{6-5} = Qt4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_4983213 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{10-0} = Ii{13-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_16035138 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+}
+class Enc_8225953 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{13-13} = Ii{7-7};
+ let Inst{7-3} = Ii{6-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_4397470 : OpcodeHexagon {
+ bits <5> II;
+ let Inst{12-8} = II{4-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+}
+class Enc_1004392 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_16319737 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{26-25} = Ii{13-12};
+ let Inst{13-13} = Ii{11-11};
+ let Inst{7-0} = Ii{10-3};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_2296022 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9664427 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <3> Qss8;
+ let Inst{2-0} = Qss8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_877823 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_1589406 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6900405 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14150875 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-22} = n1{3-0};
+}
+class Enc_15707793 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Gd32;
+ let Inst{4-0} = Gd32{4-0};
+}
+class Enc_14689096 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{6-6} = Ii{0-0};
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Ru32;
+ let Inst{20-16} = Ru32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_9915754 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7470998 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qx4;
+ let Inst{1-0} = Qx4{1-0};
+}
+class Enc_11471622 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_14363183 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{23-22} = Qv4{1-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_15816255 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_5321335 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <4> Vdd16;
+ let Inst{7-4} = Vdd16{3-0};
+}
+class Enc_12702821 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_449439 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+}
+class Enc_2054304 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <6> Sd64;
+ let Inst{5-0} = Sd64{5-0};
+}
+class Enc_236434 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_5598813 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8409782 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_15182416 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{20-16} = Ii{5-1};
+ let Inst{8-8} = Ii{0-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4501395 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6039436 : OpcodeHexagon {
+ bits <3> Qtt8;
+ let Inst{2-0} = Qtt8{2-0};
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_476163 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_11281763 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9929262 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+}
+class Enc_13174858 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8437395 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_16578332 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{10-8} = Ii{8-6};
+ bits <5> Zdd8;
+ let Inst{4-0} = Zdd8{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12829314 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+}
+class Enc_9744403 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{13-9} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{8-4} = Vv32{4-0};
+ bits <4> Vdd16;
+ let Inst{3-0} = Vdd16{3-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10968391 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <7> n1;
+ let Inst{28-28} = n1{6-6};
+ let Inst{25-22} = n1{5-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_64199 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-4} = Ii{6-2};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_11039423 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6730375 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+}
+class Enc_16213761 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{23-19} = Vv32{4-0};
+ bits <3> Rt8;
+ let Inst{18-16} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_13204995 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_13338314 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_9920336 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rtt32;
+ let Inst{4-0} = Rtt32{4-0};
+}
+class Enc_15380240 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_3296020 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2428539 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{24-23} = n1{2-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_10039393 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9372046 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+}
+class Enc_2901241 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_16145290 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{6-5} = Ps4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_13783220 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_12261611 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6135183 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rx16;
+ let Inst{3-0} = Rx16{3-0};
+}
+class Enc_5523416 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13472494 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_16303398 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_3494181 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_13983714 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_931653 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7622936 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_8773155 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <5> II;
+ let Inst{4-0} = II{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_5401217 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <3> n1;
+ let Inst{28-28} = n1{2-2};
+ let Inst{24-23} = n1{1-0};
+}
+class Enc_6736678 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_3457570 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_3813442 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_3135259 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_5486172 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <3> Nt8;
+ let Inst{2-0} = Nt8{2-0};
+}
+class Enc_11081334 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
+}
+class Enc_9470751 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_2683366 : OpcodeHexagon {
+ bits <3> Quu8;
+ let Inst{10-8} = Quu8{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
+}
+class Enc_15830826 : OpcodeHexagon {
+ bits <14> Ii;
+ let Inst{10-0} = Ii{13-3};
+}
+class Enc_4967902 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{12-7} = Ii{6-1};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_14287645 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8324216 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_913538 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_16311032 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_9864697 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <6> II;
+ let Inst{20-16} = II{5-1};
+ let Inst{13-13} = II{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11205051 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{11-8} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rt16;
+ let Inst{3-0} = Rt16{3-0};
+}
+class Enc_5611087 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{8-5} = Ii{6-3};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10915758 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8943121 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+}
+class Enc_1539665 : OpcodeHexagon {
+ bits <5> Cs32;
+ let Inst{20-16} = Cs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8479583 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{23-23} = n1{1-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_313333 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_11544269 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_9018141 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Cd32;
+ let Inst{4-0} = Cd32{4-0};
+}
+class Enc_6152036 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Gdd32;
+ let Inst{4-0} = Gdd32{4-0};
+}
+class Enc_1954437 : OpcodeHexagon {
+ bits <6> Sss64;
+ let Inst{21-16} = Sss64{5-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_3742184 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_1835415 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{10-5} = Ii{6-1};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_1085466 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_13150110 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_6772177 : OpcodeHexagon {
+ bits <5> Zu8;
+ let Inst{12-8} = Zu8{4-0};
+ bits <5> Zd8;
+ let Inst{4-0} = Zd8{4-0};
+}
+class Enc_6616512 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_1886960 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2835415 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{10-5} = Ii{7-2};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14024197 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_12297800 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_7254313 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_677558 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{10-5} = Ii{8-3};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_6223403 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_674613 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_16479122 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{7-3} = Ii{7-3};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_11704059 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_9165078 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{8-3} = Ii{8-3};
+ bits <3> Rtt8;
+ let Inst{2-0} = Rtt8{2-0};
+}
+class Enc_15376009 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{8-5} = Ii{4-1};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8838398 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{21-21} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <6> II;
+ let Inst{13-8} = II{5-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_2328527 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_1451363 : OpcodeHexagon {
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_4030179 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_13770697 : OpcodeHexagon {
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ry32;
+ let Inst{12-8} = Ry32{4-0};
+}
+class Enc_12212978 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_12665927 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2082956 : OpcodeHexagon {
+ bits <32> Ii;
+ let Inst{27-16} = Ii{31-20};
+ let Inst{13-0} = Ii{19-6};
+}
+class Enc_220949 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-23} = n1{3-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_9939385 : OpcodeHexagon {
+ bits <9> Ii;
+ let Inst{12-8} = Ii{8-4};
+ let Inst{4-3} = Ii{3-2};
+ bits <10> II;
+ let Inst{20-16} = II{9-5};
+ let Inst{7-5} = II{4-2};
+ let Inst{1-0} = II{1-0};
+}
+class Enc_2117024 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8390029 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_10989558 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_5972412 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{20-16} = Vv32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_12851489 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vss32;
+ let Inst{7-3} = Vss32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9554661 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{12-7} = Ii{5-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4202401 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6091631 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{9-8} = Qs4{1-0};
+ bits <2> Qt4;
+ let Inst{23-22} = Qt4{1-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_10157519 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_4835423 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{10-5} = Ii{5-0};
+ bits <2> Pt4;
+ let Inst{12-11} = Pt4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14046916 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_2921694 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_8732960 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-8} = Ii{7-3};
+ let Inst{4-2} = Ii{2-0};
+}
+class Enc_5338033 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-22} = n1{3-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_6956613 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_2153798 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_16210172 : OpcodeHexagon {
+ bits <3> Qt8;
+ let Inst{10-8} = Qt8{2-0};
+ bits <3> Qd8;
+ let Inst{5-3} = Qd8{2-0};
+}
+class Enc_5023792 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_1244745 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_10002182 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{26-25} = Ii{10-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_12492533 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1774350 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{17-16} = Ii{5-4};
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_2703240 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_6975103 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_9789480 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_12244921 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8674673 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{23-22} = n1{1-0};
+}
+class Enc_8514936 : OpcodeHexagon {
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13455308 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_10188026 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_3158657 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10597934 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+ bits <2> n1;
+ let Inst{9-8} = n1{1-0};
+}
+class Enc_10612292 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <2> Qx4;
+ let Inst{1-0} = Qx4{1-0};
+}
+class Enc_5178985 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_3967902 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <6> II;
+ let Inst{13-13} = II{5-5};
+ let Inst{4-0} = II{4-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_2462143 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_9849208 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_12618352 : OpcodeHexagon {
+ bits <5> Rtt32;
+ let Inst{20-16} = Rtt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+}
+class Enc_7303598 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+}
+class Enc_13823098 : OpcodeHexagon {
+ bits <5> Gss32;
+ let Inst{20-16} = Gss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_16388420 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{6-5} = Qs4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_8328140 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{21-21} = Ii{15-15};
+ let Inst{13-8} = Ii{14-9};
+ let Inst{2-0} = Ii{8-6};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1793896 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_4944558 : OpcodeHexagon {
+ bits <2> Qu4;
+ let Inst{9-8} = Qu4{1-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{4-0} = Vx32{4-0};
+}
+class Enc_13211717 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{20-16} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_8170340 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <3> Qdd8;
+ let Inst{2-0} = Qdd8{2-0};
+}
+class Enc_14071773 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8605375 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12711252 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{9-8} = Pv4{1-0};
+}
+class Enc_8202458 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_8577055 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{25-23} = n1{3-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_1409050 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_7466005 : OpcodeHexagon {
+ bits <5> Gs32;
+ let Inst{20-16} = Gs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_2380082 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_10067774 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11000933 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <3> Nt8;
+ let Inst{2-0} = Nt8{2-0};
+}
+class Enc_13201267 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_1989309 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vvv32;
+ let Inst{4-0} = Vvv32{4-0};
+}
+class Enc_9082775 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_8065534 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4631106 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{17-16} = Ps4{1-0};
+ bits <2> Pt4;
+ let Inst{9-8} = Pt4{1-0};
+ bits <2> Pu4;
+ let Inst{7-6} = Pu4{1-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_11065510 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{6-3} = Ii{4-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6673186 : OpcodeHexagon {
+ bits <13> Ii;
+ let Inst{26-25} = Ii{12-11};
+ let Inst{13-13} = Ii{10-10};
+ let Inst{7-0} = Ii{9-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_8498433 : OpcodeHexagon {
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4395009 : OpcodeHexagon {
+ bits <7> Ii;
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10926598 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{12-8} = Vuu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vxx32;
+ let Inst{7-3} = Vxx32{4-0};
+}
+class Enc_7606379 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_8131399 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_11522288 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rx32;
+ let Inst{4-0} = Rx32{4-0};
+}
+class Enc_114098 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{5-5} = Ii{0-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_5654851 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_12023037 : OpcodeHexagon {
+ bits <2> Ps4;
+ let Inst{6-5} = Ps4{1-0};
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_176263 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{9-4} = Ii{7-2};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_6130414 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{23-22} = Ii{15-14};
+ let Inst{13-0} = Ii{13-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_631197 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <6> II;
+ let Inst{23-21} = II{5-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_16214129 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_8333157 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_4834775 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{13-8} = II{5-0};
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rd16;
+ let Inst{19-16} = Rd16{3-0};
+}
+class Enc_16601956 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_15946706 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{6-5} = Ii{1-0};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_6923828 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{13-13} = Ii{3-3};
+ let Inst{10-8} = Ii{2-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+}
+class Enc_1332717 : OpcodeHexagon {
+ bits <2> Pu4;
+ let Inst{6-5} = Pu4{1-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_1786883 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <6> Sdd64;
+ let Inst{5-0} = Sdd64{5-0};
+}
+class Enc_14303394 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_9282127 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-7} = Ii{7-2};
+ bits <8> II;
+ let Inst{13-13} = II{7-7};
+ let Inst{6-0} = II{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_2813446 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{6-3} = Ii{3-0};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_364753 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{23-23} = n1{0-0};
+}
+class Enc_12477789 : OpcodeHexagon {
+ bits <15> Ii;
+ let Inst{21-21} = Ii{14-14};
+ let Inst{13-13} = Ii{13-13};
+ let Inst{11-1} = Ii{12-2};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+}
+class Enc_44555 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_8497723 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{13-8} = Ii{5-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rxx32;
+ let Inst{4-0} = Rxx32{4-0};
+}
+class Enc_4359901 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <4> n1;
+ let Inst{29-29} = n1{3-3};
+ let Inst{26-25} = n1{2-1};
+ let Inst{22-22} = n1{0-0};
+}
+class Enc_11271630 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_10501894 : OpcodeHexagon {
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <3> Rdd8;
+ let Inst{2-0} = Rdd8{2-0};
+}
+class Enc_9768377 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{4-0} = Vd32{4-0};
+}
+class Enc_16268019 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vvv32;
+ let Inst{12-8} = Vvv32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_8814718 : OpcodeHexagon {
+ bits <18> Ii;
+ let Inst{26-25} = Ii{17-16};
+ let Inst{20-16} = Ii{15-11};
+ let Inst{13-5} = Ii{10-2};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_6212930 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <2> Pt4;
+ let Inst{10-9} = Pt4{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_5462762 : OpcodeHexagon {
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vv32;
+ let Inst{12-8} = Vv32{4-0};
+ bits <5> Vw32;
+ let Inst{4-0} = Vw32{4-0};
+}
+class Enc_6154421 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{13-13} = Ii{6-6};
+ let Inst{7-3} = Ii{5-1};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+}
+class Enc_8940892 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_3531000 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{11-5} = Ii{6-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_14311138 : OpcodeHexagon {
+ bits <5> Vuu32;
+ let Inst{20-16} = Vuu32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+}
+class Enc_2216485 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{22-21} = Ii{5-4};
+ let Inst{13-13} = Ii{3-3};
+ let Inst{7-5} = Ii{2-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12395768 : OpcodeHexagon {
+ bits <16> Ii;
+ let Inst{26-25} = Ii{15-14};
+ let Inst{20-16} = Ii{13-9};
+ let Inst{13-13} = Ii{8-8};
+ let Inst{7-0} = Ii{7-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+}
+class Enc_11047413 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_1256611 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_7884306 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{8-4} = Ii{7-3};
+}
+class Enc_11244923 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_8612939 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <5> n1;
+ let Inst{29-29} = n1{4-4};
+ let Inst{26-25} = n1{3-2};
+ let Inst{22-22} = n1{1-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_16355964 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{12-5} = Ii{7-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12616482 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{11-8} = II{5-2};
+ let Inst{6-5} = II{1-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_5915771 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-22} = n1{3-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_14459927 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_7504828 : OpcodeHexagon {
+ bits <10> Ii;
+ let Inst{21-21} = Ii{9-9};
+ let Inst{13-5} = Ii{8-0};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_14209223 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_3931661 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{8-5} = Ii{5-2};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_13606251 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{11-8} = Ii{5-2};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_11475992 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vdd32;
+ let Inst{7-3} = Vdd32{4-0};
+}
+class Enc_13133231 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_9959498 : OpcodeHexagon {
+ bits <8> Ii;
+ let Inst{22-21} = Ii{7-6};
+ let Inst{13-13} = Ii{5-5};
+ let Inst{7-5} = Ii{4-2};
+ bits <5> Ru32;
+ let Inst{4-0} = Ru32{4-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rd32;
+ let Inst{12-8} = Rd32{4-0};
+}
+class Enc_8919369 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <5> n1;
+ let Inst{28-28} = n1{4-4};
+ let Inst{24-23} = n1{3-2};
+ let Inst{13-13} = n1{1-1};
+ let Inst{8-8} = n1{0-0};
+}
+class Enc_2968094 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{11-5} = Ii{6-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_4813442 : OpcodeHexagon {
+ bits <6> Ii;
+ let Inst{6-3} = Ii{5-2};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4684887 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <4> Rs16;
+ let Inst{19-16} = Rs16{3-0};
+ bits <4> n1;
+ let Inst{28-28} = n1{3-3};
+ let Inst{25-23} = n1{2-0};
+}
+class Enc_15606259 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_2268028 : OpcodeHexagon {
+ bits <3> Qtt8;
+ let Inst{10-8} = Qtt8{2-0};
+ bits <3> Qdd8;
+ let Inst{5-3} = Qdd8{2-0};
+}
+class Enc_13430430 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Rt32;
+ let Inst{20-16} = Rt32{4-0};
+ bits <5> Vd32;
+ let Inst{7-3} = Vd32{4-0};
+ bits <3> Qxx8;
+ let Inst{2-0} = Qxx8{2-0};
+}
+class Enc_13336212 : OpcodeHexagon {
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+ bits <1> n1;
+ let Inst{9-9} = n1{0-0};
+}
+class Enc_15008287 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{20-16} = Vu32{4-0};
+ bits <3> Rt8;
+ let Inst{2-0} = Rt8{2-0};
+ bits <5> Vx32;
+ let Inst{7-3} = Vx32{4-0};
+ bits <5> Vy32;
+ let Inst{12-8} = Vy32{4-0};
+}
+class Enc_4897205 : OpcodeHexagon {
+ bits <2> Qs4;
+ let Inst{9-8} = Qs4{1-0};
+ bits <2> Qd4;
+ let Inst{1-0} = Qd4{1-0};
+}
+class Enc_8038806 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{11-8} = Ii{3-0};
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_12669374 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vxx32;
+ let Inst{4-0} = Vxx32{4-0};
+}
+class Enc_971347 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{8-5} = Ii{3-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Ryy32;
+ let Inst{4-0} = Ryy32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_1997594 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
+class Enc_11940513 : OpcodeHexagon {
+ bits <2> Ii;
+ let Inst{13-13} = Ii{1-1};
+ let Inst{7-7} = Ii{0-0};
+ bits <2> Pv4;
+ let Inst{6-5} = Pv4{1-0};
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <5> Ru32;
+ let Inst{12-8} = Ru32{4-0};
+ bits <5> Rt32;
+ let Inst{4-0} = Rt32{4-0};
+}
+class Enc_2735552 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Pv4;
+ let Inst{12-11} = Pv4{1-0};
+ bits <3> Os8;
+ let Inst{2-0} = Os8{2-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_16410950 : OpcodeHexagon {
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <5> Vs32;
+ let Inst{7-3} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_6226085 : OpcodeHexagon {
+ bits <5> Ii;
+ let Inst{12-8} = Ii{4-0};
+ bits <5> II;
+ let Inst{22-21} = II{4-3};
+ let Inst{7-5} = II{2-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
+class Enc_14193700 : OpcodeHexagon {
+ bits <6> II;
+ let Inst{5-0} = II{5-0};
+ bits <3> Nt8;
+ let Inst{10-8} = Nt8{2-0};
+ bits <5> Re32;
+ let Inst{20-16} = Re32{4-0};
+}
+class Enc_15763937 : OpcodeHexagon {
+ bits <11> Ii;
+ let Inst{21-20} = Ii{10-9};
+ let Inst{7-1} = Ii{8-2};
+ bits <3> Ns8;
+ let Inst{18-16} = Ns8{2-0};
+ bits <6> n1;
+ let Inst{29-29} = n1{5-5};
+ let Inst{26-25} = n1{4-3};
+ let Inst{23-22} = n1{2-1};
+ let Inst{13-13} = n1{0-0};
+}
+class Enc_2492727 : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <5> Rt32;
+ let Inst{12-8} = Rt32{4-0};
+ bits <2> Pd4;
+ let Inst{1-0} = Pd4{1-0};
+}
+class Enc_13425035 : OpcodeHexagon {
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <1> Mu2;
+ let Inst{13-13} = Mu2{0-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_4135257 : OpcodeHexagon {
+ bits <4> Ii;
+ let Inst{10-8} = Ii{3-1};
+ bits <4> Rs16;
+ let Inst{7-4} = Rs16{3-0};
+ bits <4> Rd16;
+ let Inst{3-0} = Rd16{3-0};
+}
+class Enc_14631806 : OpcodeHexagon {
+ bits <5> Vu32;
+ let Inst{12-8} = Vu32{4-0};
+ bits <5> Vdd32;
+ let Inst{4-0} = Vdd32{4-0};
+}
+class Enc_12397062 : OpcodeHexagon {
+ bits <3> Ii;
+ let Inst{10-8} = Ii{2-0};
+ bits <2> Qv4;
+ let Inst{12-11} = Qv4{1-0};
+ bits <5> Vs32;
+ let Inst{4-0} = Vs32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
+class Enc_11959851 : OpcodeHexagon {
+ bits <7> Ii;
+ let Inst{6-3} = Ii{6-3};
+ bits <2> Pv4;
+ let Inst{1-0} = Pv4{1-0};
+ bits <5> Rtt32;
+ let Inst{12-8} = Rtt32{4-0};
+ bits <5> Rx32;
+ let Inst{20-16} = Rx32{4-0};
+}
diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td
new file mode 100644
index 000000000000..2bfde9acaea9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -0,0 +1,45573 @@
+//===--- HexagonDepInstrInfo.td -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def A2_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = abs($Rs32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_absp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = abs($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000100;
+}
+def A2_abssat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = abs($Rs32):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_add : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_addh_h16_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_h16_sat_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_h16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.h,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_h16_sat_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_h16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_l16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_l16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_addh_l16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.h):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addh_l16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = add($Rt32.l,$Rs32.l):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_addi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = add($Rs32,#$Ii)",
+ALU32_ADDI_tc_1_SLOT0123, TypeALU32_ADDI>, Enc_11542684, PredNewRel, ImmRegRel {
+let Inst{31-28} = 0b1011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isPredicable = 1;
+let isAdd = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def A2_addp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let isCommutable = 1;
+let isAdd = 1;
+}
+def A2_addpsat : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+let Defs = [USR_OVF];
+let isCommutable = 1;
+}
+def A2_addsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add($Rs32,$Rt32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_addsp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rs32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64> {
+let isPseudo = 1;
+}
+def A2_addsph : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32):raw:hi",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_addspl : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = add($Rss32,$Rtt32):raw:lo",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_and : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_and";
+let InputType = "reg";
+let BaseOpcode = "A2_and";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_andir : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = and($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, ImmRegRel {
+let Inst{31-22} = 0b0111011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_and";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def A2_andp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = and($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let isCommutable = 1;
+}
+def A2_aslh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_aslh";
+let isPredicable = 1;
+}
+def A2_asrh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_asrh";
+let isPredicable = 1;
+}
+def A2_combine_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.h,$Rs32.h)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combine_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.h,$Rs32.l)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combine_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.l,$Rs32.h)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combine_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = combine($Rt32.l,$Rs32.l)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_combineii : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s32_0Imm:$Ii, s8_0Imm:$II),
+"$Rdd32 = combine(#$Ii,#$II)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_14007201 {
+let Inst{31-23} = 0b011111000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_combinew : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1997594, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110101000;
+let InputType = "reg";
+let BaseOpcode = "A2_combinew";
+let isPredicable = 1;
+}
+def A2_max : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = max($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_maxp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = max($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_maxu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = maxu($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_maxup : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = maxu($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_min : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = min($Rt32,$Rs32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_minp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = min($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_minu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = minu($Rt32,$Rs32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_minup : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = minu($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_neg : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = neg($Rs32)",
+PSEUDO, TypeALU32_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_negp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = neg($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000100;
+}
+def A2_negsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = neg($Rs32):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_nop : HInst<
+(outs),
+(ins),
+"nop",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b0111111100000000;
+}
+def A2_not : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = not($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_notp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = not($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000100;
+}
+def A2_or : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_or";
+let InputType = "reg";
+let BaseOpcode = "A2_or";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_orir : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = or($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, ImmRegRel {
+let Inst{31-22} = 0b0111011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_or";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def A2_orp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = or($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let isCommutable = 1;
+}
+def A2_paddf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_paddfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_paddif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if (!$Pu4) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011101001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_paddifnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if (!$Pu4.new) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-23} = 0b011101001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_paddit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if ($Pu4) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011101000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_padditnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"if ($Pu4.new) $Rd32 = add($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-23} = 0b011101000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "imm";
+let BaseOpcode = "A2_addi";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A2_paddt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_paddtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = add($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel, ImmRegRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_add";
+let InputType = "reg";
+let BaseOpcode = "A2_add";
+}
+def A2_pandf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_and";
+}
+def A2_pandfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_and";
+}
+def A2_pandt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_and";
+}
+def A2_pandtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = and($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_and";
+}
+def A2_porf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_or";
+}
+def A2_porfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_or";
+}
+def A2_port : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_or";
+}
+def A2_portnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = or($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_or";
+}
+def A2_psubf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sub";
+}
+def A2_psubfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sub";
+}
+def A2_psubt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sub";
+}
+def A2_psubtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rt32, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1332717, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sub";
+}
+def A2_pxorf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_xor";
+}
+def A2_pxorfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_xor";
+}
+def A2_pxort : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_xor";
+}
+def A2_pxortnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111001011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_xor";
+}
+def A2_roundsat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = round($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = sat($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_satb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = satb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_sath : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sath($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_satub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = satub($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_satuh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = satuh($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_sub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375, PredNewRel, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_sub";
+let InputType = "reg";
+let BaseOpcode = "A2_sub";
+let isPredicable = 1;
+}
+def A2_subh_h16_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l):<<16",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_h16_sat_hh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_h16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.h,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_h16_sat_lh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_h16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l):sat:<<16",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_l16_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_l16_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_subh_l16_sat_hl : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.h):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subh_l16_sat_ll : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32.l,$Rs32.l):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def A2_subp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = sub($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_subri : HInst<
+(outs IntRegs:$Rd32),
+(ins s32_0Imm:$Ii, IntRegs:$Rs32),
+"$Rd32 = sub(#$Ii,$Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_13472494, PredNewRel, ImmRegRel {
+let Inst{31-22} = 0b0111011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_sub";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def A2_subsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32,$Rs32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+}
+def A2_svaddh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vaddh($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svaddhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vaddh($Rs32,$Rt32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svadduhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vadduh($Rs32,$Rt32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svavgh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vavgh($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svavghs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vavgh($Rs32,$Rt32):rnd",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A2_svnavgh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vnavgh($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_svsubh : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vsubh($Rt32,$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A2_svsubhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vsubh($Rt32,$Rs32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+}
+def A2_svsubuhs : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = vsubuh($Rt32,$Rs32):sat",
+ALU32_3op_tc_2_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+let InputType = "reg";
+}
+def A2_swiz : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = swiz($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_sxtb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxtb";
+let isPredicable = 1;
+}
+def A2_sxth : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxth";
+let isPredicable = 1;
+}
+def A2_sxtw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = sxtw($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000100010;
+}
+def A2_tfr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPredicable = 1;
+}
+def A2_tfrcrr : HInst<
+(outs IntRegs:$Rd32),
+(ins CtrRegs:$Cs32),
+"$Rd32 = $Cs32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_1539665 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_tfrf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_tfrfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_tfrih : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
+"$Rx32.h = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_6130414 {
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def A2_tfril : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, u16_0Imm:$Ii),
+"$Rx32.l = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_6130414 {
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def A2_tfrp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let BaseOpcode = "A2_tfrp";
+let isPredicable = 1;
+let isPseudo = 1;
+}
+def A2_tfrpf : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if (!$Pu4) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrpfnew : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if (!$Pu4.new) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrpi : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s8_0Imm:$Ii),
+"$Rdd32 = #$Ii",
+ALU64_tc_1_SLOT23, TypeALU64> {
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+let isMoveImm = 1;
+let isPseudo = 1;
+}
+def A2_tfrpt : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if ($Pu4) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrptnew : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32),
+"if ($Pu4.new) $Rdd32 = $Rss32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let isPredicated = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_tfrp";
+let isPseudo = 1;
+}
+def A2_tfrrcr : HInst<
+(outs CtrRegs:$Cd32),
+(ins IntRegs:$Rs32),
+"$Cd32 = $Rs32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_9018141 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def A2_tfrsi : HInst<
+(outs IntRegs:$Rd32),
+(ins s32_0Imm:$Ii),
+"$Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_7971062, PredNewRel, ImmRegRel {
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isPredicable = 1;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def A2_tfrt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_tfrtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = $Rs32",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel, ImmRegRel {
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "reg";
+let BaseOpcode = "A2_tfr";
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_vabsh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000010;
+}
+def A2_vabshsat : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsh($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000010;
+let Defs = [USR_OVF];
+}
+def A2_vabsw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsw($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000010;
+}
+def A2_vabswsat : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vabsw($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000010;
+let Defs = [USR_OVF];
+}
+def A2_vaddb_map : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddb($Rss32,$Rtt32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_vaddh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddh($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+}
+def A2_vaddhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddh($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vaddub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddub($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+}
+def A2_vaddubs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddub($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vadduhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vadduh($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vaddw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddw($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+}
+def A2_vaddws : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vaddw($Rss32,$Rtt32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011000;
+let Defs = [USR_OVF];
+}
+def A2_vavgh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgh($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavghcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgh($Rss32,$Rtt32):crnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+let prefersSlot3 = 1;
+}
+def A2_vavghr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgh($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavgub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgub($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavgubr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgub($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavguh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguh($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavguhr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguh($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011010;
+}
+def A2_vavguw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguw($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vavguwr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavguw($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vavgw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgw($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vavgwcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgw($Rss32,$Rtt32):crnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+let prefersSlot3 = 1;
+}
+def A2_vavgwr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vavgw($Rss32,$Rtt32):rnd",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011011;
+}
+def A2_vcmpbeq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpb.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b110000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpbgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpb.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b111000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpheq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmph.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmphgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmph.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmphgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmph.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpweq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpw.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpwgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpw.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vcmpwgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpw.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010000;
+}
+def A2_vconj : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vconj($Rss32):sat",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000100;
+let Defs = [USR_OVF];
+}
+def A2_vmaxb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxb($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxub($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxuh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxuh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vmaxuw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxuw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vmaxw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vmaxw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vminb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminb($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011110;
+}
+def A2_vminh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminub($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminuh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminuh($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminuw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminuw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vminw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vminw($Rtt32,$Rss32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011101;
+}
+def A2_vnavgh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgh($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+}
+def A2_vnavghcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgh($Rtt32,$Rss32):crnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vnavghr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgh($Rtt32,$Rss32):rnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vnavgw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgw($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+}
+def A2_vnavgwcr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgw($Rtt32,$Rss32):crnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vnavgwr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vnavgw($Rtt32,$Rss32):rnd:sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A2_vraddub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vraddub($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def A2_vraddub_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vraddub($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A2_vrsadub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrsadub($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def A2_vrsadub_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrsadub($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A2_vsubb_map : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vsubb($Rss32,$Rtt32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_vsubh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubh($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_vsubhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubh($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_vsubub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubub($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_vsububs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubub($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_vsubuhs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubuh($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_vsubw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubw($Rtt32,$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+}
+def A2_vsubws : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vsubw($Rtt32,$Rss32):sat",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011001;
+let Defs = [USR_OVF];
+}
+def A2_xor : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = xor($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+let BaseOpcode = "A2_xor";
+let isCommutable = 1;
+let isPredicable = 1;
+}
+def A2_xorp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = xor($Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let isCommutable = 1;
+}
+def A2_zxtb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, PredNewRel {
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxtb";
+let isPredicable = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def A2_zxth : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_4075554, PredNewRel {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01110000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxth";
+let isPredicable = 1;
+}
+def A4_addp_c : HInst<
+(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
+"$Rdd32 = add($Rss32,$Rtt32,$Px4):carry",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_151014 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010110;
+let isPredicateLate = 1;
+let Constraints = "$Px4 = $Px4in";
+}
+def A4_andn : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = and($Rt32,~$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A4_andnp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = and($Rtt32,~$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+}
+def A4_bitsplit : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = bitsplit($Rs32,$Rt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010100001;
+}
+def A4_bitspliti : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rdd32 = bitsplit($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_5654851 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001000110;
+}
+def A4_boundscheck : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Pd4 = boundscheck($Rs32,$Rtt32)",
+M_tc_3x_SLOT23, TypeALU64> {
+let isPseudo = 1;
+}
+def A4_boundscheck_hi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = boundscheck($Rss32,$Rtt32):raw:hi",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_boundscheck_lo : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = boundscheck($Rss32,$Rtt32):raw:lo",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_cmpbeq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmpb.eq($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b110000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpbeq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def A4_cmpbeqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u8_0Imm:$Ii),
+"$Pd4 = cmpb.eq($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101000;
+let CextOpcode = "A4_cmpbeq";
+let InputType = "imm";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def A4_cmpbgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmpb.gt($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpbgt";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmpbgti : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s8_0Imm:$Ii),
+"$Pd4 = cmpb.gt($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101001;
+let CextOpcode = "A4_cmpbgt";
+let InputType = "imm";
+let isCompare = 1;
+}
+def A4_cmpbgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmpb.gtu($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b111000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpbgtu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmpbgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = cmpb.gtu($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3531000, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011101010;
+let CextOpcode = "A4_cmpbgtu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 0;
+}
+def A4_cmpheq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmph.eq($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmpheq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def A4_cmpheqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmph.eq($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101000;
+let CextOpcode = "A4_cmpheq";
+let InputType = "imm";
+let isCommutable = 1;
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_cmphgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmph.gt($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmphgt";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmphgti : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmph.gt($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_6736678, ImmRegRel {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011101001;
+let CextOpcode = "A4_cmphgt";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_cmphgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmph.gtu($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111110;
+let CextOpcode = "A4_cmphgtu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def A4_cmphgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = cmph.gtu($Rs32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3531000, ImmRegRel {
+let Inst{4-2} = 0b010;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011101010;
+let CextOpcode = "A4_cmphgtu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 0;
+}
+def A4_combineii : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s8_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = combine(#$Ii,#$II)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9864697 {
+let Inst{31-21} = 0b01111100100;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def A4_combineir : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins s32_0Imm:$Ii, IntRegs:$Rs32),
+"$Rdd32 = combine(#$Ii,$Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_2462143 {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011001;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_combineri : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rdd32 = combine($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_2462143 {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011000;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_cround_ri : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = cround($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_cround_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cround($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_ext : HInst<
+(outs),
+(ins u26_6Imm:$Ii),
+"immext(#$Ii)",
+EXTENDER_tc_1_SLOT0123, TypeEXTENDER>, Enc_2082956 {
+let Inst{31-28} = 0b0000;
+}
+def A4_modwrapu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = modwrap($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_orn : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = or($Rt32,~$Rs32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8605375 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def A4_ornp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = or($Rtt32,~$Rss32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_11687333 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010011111;
+}
+def A4_paslhf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_aslh";
+}
+def A4_paslhfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_aslh";
+}
+def A4_paslht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_aslh";
+}
+def A4_paslhtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = aslh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_aslh";
+}
+def A4_pasrhf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_asrh";
+}
+def A4_pasrhfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_asrh";
+}
+def A4_pasrht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_asrh";
+}
+def A4_pasrhtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = asrh($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_asrh";
+}
+def A4_psxtbf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxtbfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxtbt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxtbtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = sxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000101;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxtb";
+}
+def A4_psxthf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxth";
+}
+def A4_psxthfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxth";
+}
+def A4_psxtht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_sxth";
+}
+def A4_psxthtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = sxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000111;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_sxth";
+}
+def A4_pzxtbf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxtbfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxtbt : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxtbtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = zxtb($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxtb";
+}
+def A4_pzxthf : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxth";
+}
+def A4_pzxthfnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1011;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxth";
+}
+def A4_pzxtht : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let BaseOpcode = "A2_zxth";
+}
+def A4_pzxthtnew : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) $Rd32 = zxth($Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9422954, PredNewRel {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b01110000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_zxth";
+}
+def A4_rcmpeq : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpeq";
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A4_rcmpeqi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_16355964, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpeqi";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_rcmpneq : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = !cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_14071773, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpneq";
+let InputType = "reg";
+let isCommutable = 1;
+}
+def A4_rcmpneqi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = !cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_16355964, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b01110011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A4_rcmpeqi";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def A4_round_ri : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = round($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_round_ri_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = round($Rs32,#$Ii):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A4_round_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = round($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def A4_round_rr_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = round($Rs32,$Rt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A4_subp_c : HInst<
+(outs DoubleRegs:$Rdd32, PredRegs:$Px4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Px4in),
+"$Rdd32 = sub($Rss32,$Rtt32,$Px4):carry",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_151014 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010111;
+let isPredicateLate = 1;
+let Constraints = "$Px4 = $Px4in";
+}
+def A4_tfrcpp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins CtrRegs64:$Css32),
+"$Rdd32 = $Css32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_13094118 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101000000;
+}
+def A4_tfrpcp : HInst<
+(outs CtrRegs64:$Cdd32),
+(ins DoubleRegs:$Rss32),
+"$Cdd32 = $Rss32",
+CR_tc_3x_SLOT3, TypeCR>, Enc_1329520 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100011001;
+}
+def A4_tlbmatch : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Pd4 = tlbmatch($Rss32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2492727 {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+let isPredicateLate = 1;
+}
+def A4_vcmpbeq_any : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = any8(vcmpb.eq($Rss32,$Rtt32))",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_vcmpbeqi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u8_0Imm:$Ii),
+"$Pd4 = vcmpb.eq($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100000;
+}
+def A4_vcmpbgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = vcmpb.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11010010000;
+}
+def A4_vcmpbgti : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmpb.gt($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100001;
+}
+def A4_vcmpbgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
+"$Pd4 = vcmpb.gtu($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 {
+let Inst{4-2} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011100010;
+}
+def A4_vcmpheqi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmph.eq($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100000;
+}
+def A4_vcmphgti : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmph.gt($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100001;
+}
+def A4_vcmphgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
+"$Pd4 = vcmph.gtu($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 {
+let Inst{4-2} = 0b010;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011100010;
+}
+def A4_vcmpweqi : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmpw.eq($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100000;
+}
+def A4_vcmpwgti : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, s8_0Imm:$Ii),
+"$Pd4 = vcmpw.gt($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_13455308 {
+let Inst{4-2} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11011100001;
+}
+def A4_vcmpwgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u7_0Imm:$Ii),
+"$Pd4 = vcmpw.gtu($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_2968094 {
+let Inst{4-2} = 0b100;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b11011100010;
+}
+def A4_vrmaxh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrmaxuh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxuh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrmaxuw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxuw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrmaxw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrmaxw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminuh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminuh($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminuw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminuw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A4_vrminw : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Ru32),
+"$Rxx32 = vrminw($Rss32,$Ru32)",
+S_3op_tc_3_SLOT23, TypeS_3op>, Enc_9773189 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A5_ACS : HInst<
+(outs DoubleRegs:$Rxx32, PredRegs:$Pe4),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)",
+M_tc_3stall_SLOT23, TypeM>, Enc_12822813, Requires<[HasV55T]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def A5_vaddhubs : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vaddhub($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9277990, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def A6_vminub_RdP : HInst<
+(outs DoubleRegs:$Rdd32, PredRegs:$Pe4),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_766909, Requires<[HasV62T]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+}
+def C2_all8 : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = all8($Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-18} = 0b01101011101000;
+}
+def C2_and : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = and($Pt4,$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011000000;
+}
+def C2_andn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = and($Pt4,!$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011011000;
+}
+def C2_any8 : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = any8($Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-18} = 0b01101011100000;
+}
+def C2_bitsclr : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = bitsclr($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111100;
+}
+def C2_bitsclri : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u6_0Imm:$Ii),
+"$Pd4 = bitsclr($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_14574598 {
+let Inst{7-2} = 0b000000;
+let Inst{31-21} = 0b10000101100;
+}
+def C2_bitsset : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = bitsset($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111010;
+}
+def C2_ccombinewf : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_ccombinewnewf : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_ccombinewnewt : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4.new) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let isPredicatedNew = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_ccombinewt : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pu4) $Rdd32 = combine($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_8202458, PredNewRel {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11111101000;
+let isPredicated = 1;
+let BaseOpcode = "A2_combinew";
+}
+def C2_cmoveif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if (!$Pu4) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmoveit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if ($Pu4) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmovenewif : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if (!$Pu4.new) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmovenewit : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii),
+"if ($Pu4.new) $Rd32 = #$Ii",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9487067, PredNewRel, ImmRegRel {
+let Inst{13-13} = 0b1;
+let Inst{20-20} = 0b0;
+let Inst{31-23} = 0b011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPredicatedNew = 1;
+let CextOpcode = "A2_tfr";
+let InputType = "imm";
+let BaseOpcode = "A2_tfrsi";
+let isMoveImm = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 0;
+}
+def C2_cmpeq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010000;
+let CextOpcode = "C2_cmpeq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def C2_cmpeqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{31-22} = 0b0111010100;
+let CextOpcode = "C2_cmpeq";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C2_cmpeqp : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = cmp.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010100;
+let isCommutable = 1;
+let isCompare = 1;
+}
+def C2_cmpgei : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s8_0Imm:$Ii),
+"$Pd4 = cmp.ge($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> {
+let isCompare = 1;
+let isPseudo = 1;
+}
+def C2_cmpgeui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u8_0Imm:$Ii),
+"$Pd4 = cmp.geu($Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op> {
+let isCompare = 1;
+let isPseudo = 1;
+}
+def C2_cmpgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.gt($Rs32,$Rt32)",
+ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010010;
+let CextOpcode = "C2_cmpgt";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C2_cmpgti : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = cmp.gt($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{31-22} = 0b0111010101;
+let CextOpcode = "C2_cmpgt";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C2_cmpgtp : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = cmp.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010100;
+let isCompare = 1;
+}
+def C2_cmpgtu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.gtu($Rs32,$Rt32)",
+ALU32_3op_tc_2early_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010011;
+let CextOpcode = "C2_cmpgtu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C2_cmpgtui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = cmp.gtu($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_13249928, ImmRegRel {
+let Inst{4-2} = 0b000;
+let Inst{31-21} = 0b01110101100;
+let CextOpcode = "C2_cmpgtu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 0;
+}
+def C2_cmpgtup : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = cmp.gtu($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744 {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010100;
+let isCompare = 1;
+}
+def C2_cmplt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.lt($Rs32,$Rt32)",
+PSEUDO, TypeALU32_3op> {
+let isCompare = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def C2_cmpltu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = cmp.ltu($Rs32,$Rt32)",
+PSEUDO, TypeALU32_3op> {
+let isCompare = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def C2_mask : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4),
+"$Rdd32 = mask($Pt4)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_10328975 {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0000;
+let Inst{31-16} = 0b1000011000000000;
+}
+def C2_mux : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mux($Pu4,$Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_9626139 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "reg";
+}
+def C2_muxii : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii, s8_0Imm:$II),
+"$Rd32 = mux($Pu4,#$Ii,#$II)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_9093094 {
+let Inst{31-25} = 0b0111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def C2_muxir : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = mux($Pu4,$Rs32,#$Ii)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def C2_muxri : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pu4, s32_0Imm:$Ii, IntRegs:$Rs32),
+"$Rd32 = mux($Pu4,#$Ii,$Rs32)",
+ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, Enc_10568534 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def C2_not : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = not($Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_6975103 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-18} = 0b01101011110000;
+}
+def C2_or : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = or($Pt4,$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011001000;
+}
+def C2_orn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Pt4, PredRegs:$Ps4),
+"$Pd4 = or($Pt4,!$Ps4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8891794 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011111000;
+}
+def C2_pxfer_map : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4),
+"$Pd4 = $Ps4",
+S_2op_tc_1_SLOT23, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def C2_tfrpr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Ps4),
+"$Rd32 = $Ps4",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_11139981 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-18} = 0b10001001010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def C2_tfrrp : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32),
+"$Pd4 = $Rs32",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_4527648 {
+let Inst{13-2} = 0b000000000000;
+let Inst{31-21} = 0b10000101010;
+}
+def C2_vitpack : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Rd32 = vitpack($Ps4,$Pt4)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_6735062 {
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b10001001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def C2_vmux : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pu4, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmux($Pu4,$Rss32,$Rtt32)",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_7606379 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010001000;
+}
+def C2_xor : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Pd4 = xor($Ps4,$Pt4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011010000;
+}
+def C4_addipc : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = add(pc,#$Ii)",
+CR_tc_2_SLOT3, TypeCR>, Enc_9554661 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0110101001001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def C4_and_and : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,and($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011000100;
+}
+def C4_and_andn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,and($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011100100;
+}
+def C4_and_or : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,or($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011001100;
+}
+def C4_and_orn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = and($Ps4,or($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011101100;
+}
+def C4_cmplte : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !cmp.gt($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010010;
+let CextOpcode = "C4_cmplte";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C4_cmpltei : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = !cmp.gt($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b100;
+let Inst{31-22} = 0b0111010101;
+let CextOpcode = "C4_cmplte";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C4_cmplteu : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !cmp.gtu($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010011;
+let CextOpcode = "C4_cmplteu";
+let InputType = "reg";
+let isCompare = 1;
+}
+def C4_cmplteui : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Pd4 = !cmp.gtu($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_13249928, ImmRegRel {
+let Inst{4-2} = 0b100;
+let Inst{31-21} = 0b01110101100;
+let CextOpcode = "C4_cmplteu";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 0;
+}
+def C4_cmpneq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !cmp.eq($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_10157519, ImmRegRel {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110010000;
+let CextOpcode = "C4_cmpneq";
+let InputType = "reg";
+let isCommutable = 1;
+let isCompare = 1;
+}
+def C4_cmpneqi : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Pd4 = !cmp.eq($Rs32,#$Ii)",
+ALU32_2op_tc_2early_SLOT0123, TypeALU32_2op>, Enc_16014536, ImmRegRel {
+let Inst{4-2} = 0b100;
+let Inst{31-22} = 0b0111010100;
+let CextOpcode = "C4_cmpneq";
+let InputType = "imm";
+let isCompare = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+}
+def C4_fastcorner9 : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Pd4 = fastcorner9($Ps4,$Pt4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 {
+let Inst{7-2} = 0b100100;
+let Inst{13-10} = 0b1000;
+let Inst{31-18} = 0b01101011000000;
+}
+def C4_fastcorner9_not : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4),
+"$Pd4 = !fastcorner9($Ps4,$Pt4)",
+CR_tc_2early_SLOT23, TypeCR>, Enc_8324216 {
+let Inst{7-2} = 0b100100;
+let Inst{13-10} = 0b1000;
+let Inst{31-18} = 0b01101011000100;
+}
+def C4_nbitsclr : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !bitsclr($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111101;
+}
+def C4_nbitsclri : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u6_0Imm:$Ii),
+"$Pd4 = !bitsclr($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_14574598 {
+let Inst{7-2} = 0b000000;
+let Inst{31-21} = 0b10000101101;
+}
+def C4_nbitsset : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !bitsset($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111011;
+}
+def C4_or_and : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,and($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011010100;
+}
+def C4_or_andn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,and($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011110100;
+}
+def C4_or_or : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,or($Pt4,$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011011100;
+}
+def C4_or_orn : HInst<
+(outs PredRegs:$Pd4),
+(ins PredRegs:$Ps4, PredRegs:$Pt4, PredRegs:$Pu4),
+"$Pd4 = or($Ps4,or($Pt4,!$Pu4))",
+CR_tc_2early_SLOT23, TypeCR>, Enc_4631106 {
+let Inst{5-2} = 0b0000;
+let Inst{13-10} = 0b0000;
+let Inst{31-18} = 0b01101011111100;
+}
+def F2_conv_d2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_d2df($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000011;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_d2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_d2sf($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2d : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2d($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2d_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2d($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2sf($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2ud : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2ud($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2ud_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_df2ud($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2uw : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2uw($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2uw_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2uw($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2w : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2w($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_df2w_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_df2w($Rss32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2d : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2d($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2d_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2d($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2df($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2ud : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2ud($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000011;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2ud_chop : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_sf2ud($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2uw : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2uw($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2uw_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2uw($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2w : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2w($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_sf2w_chop : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_sf2w($Rs32):chop",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_ud2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = convert_ud2df($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_13133231, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000000111;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_ud2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = convert_ud2sf($Rss32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10001000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_uw2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_uw2df($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_uw2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_uw2sf($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_w2df : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = convert_w2df($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000100100;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_conv_w2sf : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = convert_w2sf($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_dfclass : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Pd4 = dfclass($Rss32,#$Ii)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_14400220, Requires<[HasV5T]> {
+let Inst{4-2} = 0b100;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b11011100100;
+let isFP = 1;
+let Uses = [USR];
+}
+def F2_dfcmpeq : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfcmpge : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfcmpgt : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfcmpuo : HInst<
+(outs PredRegs:$Pd4),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
+ALU64_tc_2early_SLOT23, TypeALU64>, Enc_3831744, Requires<[HasV5T]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010010111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_dfimm_n : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u10_0Imm:$Ii),
+"$Rdd32 = dfmake(#$Ii):neg",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_2702036, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101100101;
+let prefersSlot3 = 1;
+}
+def F2_dfimm_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u10_0Imm:$Ii),
+"$Rdd32 = dfmake(#$Ii):pos",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_2702036, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101100100;
+let prefersSlot3 = 1;
+}
+def F2_sfadd : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfadd($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let isCommutable = 1;
+}
+def F2_sfclass : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Pd4 = sfclass($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742, Requires<[HasV5T]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000101111;
+let isFP = 1;
+let Uses = [USR];
+}
+def F2_sfcmpeq : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.eq($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sfcmpge : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.ge($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sfcmpgt : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.gt($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sfcmpuo : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = sfcmp.uo($Rs32,$Rt32)",
+ALU64_tc_2early_SLOT23, TypeS_3op>, Enc_10157519, Requires<[HasV5T]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111111;
+let isFP = 1;
+let Uses = [USR];
+let isCompare = 1;
+}
+def F2_sffixupd : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sffixupd($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+}
+def F2_sffixupn : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sffixupn($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+}
+def F2_sffixupr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = sffixupr($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_4075554, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+}
+def F2_sffma : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += sfmpy($Rs32,$Rt32)",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffma_lib : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += sfmpy($Rs32,$Rt32):lib",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffma_sc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
+"$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_15194851, Requires<[HasV5T]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffms : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= sfmpy($Rs32,$Rt32)",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sffms_lib : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= sfmpy($Rs32,$Rt32):lib",
+M_tc_3or4x_acc_SLOT23, TypeM>, Enc_9223889, Requires<[HasV5T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def F2_sfimm_n : HInst<
+(outs IntRegs:$Rd32),
+(ins u10_0Imm:$Ii),
+"$Rd32 = sfmake(#$Ii):neg",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_9082775, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def F2_sfimm_p : HInst<
+(outs IntRegs:$Rd32),
+(ins u10_0Imm:$Ii),
+"$Rd32 = sfmake(#$Ii):pos",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_9082775, Requires<[HasV5T]> {
+let Inst{20-16} = 0b00000;
+let Inst{31-22} = 0b1101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def F2_sfinvsqrta : HInst<
+(outs IntRegs:$Rd32, PredRegs:$Pe4),
+(ins IntRegs:$Rs32),
+"$Rd32,$Pe4 = sfinvsqrta($Rs32)",
+S_2op_tc_3or4x_SLOT23, TypeS_2op>, Enc_5718302, Requires<[HasV5T]> {
+let Inst{13-7} = 0b0000000;
+let Inst{31-21} = 0b10001011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+}
+def F2_sfmax : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfmax($Rs32,$Rt32)",
+M_tc_2_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_sfmin : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfmin($Rs32,$Rt32)",
+M_tc_2_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def F2_sfmpy : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfmpy($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+let isCommutable = 1;
+}
+def F2_sfrecipa : HInst<
+(outs IntRegs:$Rd32, PredRegs:$Pe4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_5853469, Requires<[HasV5T]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+}
+def F2_sfsub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = sfsub($Rs32,$Rt32)",
+M_tc_3or4x_SLOT23, TypeM>, Enc_14071773, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isFP = 1;
+let prefersSlot3 = 1;
+let Uses = [USR];
+}
+def J2_call : HInst<
+(outs),
+(ins a30_2Imm:$Ii),
+"call $Ii",
+J_tc_2early_SLOT23, TypeJ>, Enc_13453446, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{31-25} = 0b0101101;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let BaseOpcode = "J2_call";
+let isPredicable = 1;
+let hasSideEffects = 1;
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 24;
+let opExtentAlign = 2;
+}
+def J2_callf : HInst<
+(outs),
+(ins PredRegs:$Pu4, a30_2Imm:$Ii),
+"if (!$Pu4) call $Ii",
+J_tc_2early_SLOT23, TypeJ>, Enc_14868535, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let BaseOpcode = "J2_call";
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_callr : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"callr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010000101;
+let cofMax1 = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let hasSideEffects = 1;
+}
+def J2_callrf : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) callr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let cofMax1 = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+}
+def J2_callrt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) callr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010001000;
+let isPredicated = 1;
+let cofMax1 = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+}
+def J2_callt : HInst<
+(outs),
+(ins PredRegs:$Pu4, a30_2Imm:$Ii),
+"if ($Pu4) call $Ii",
+J_tc_2early_SLOT23, TypeJ>, Enc_14868535, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011101;
+let isPredicated = 1;
+let isCall = 1;
+let prefersSlot3 = 1;
+let Uses = [R29];
+let Defs = [PC, R31];
+let BaseOpcode = "J2_call";
+let hasSideEffects = 1;
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_endloop0 : HInst<
+(outs),
+(ins),
+"endloop0",
+PSEUDO, TypeJ> {
+let Uses = [LC0, SA0];
+let Defs = [LC0, P3, PC, USR];
+let isPseudo = 1;
+}
+def J2_endloop01 : HInst<
+(outs),
+(ins),
+"endloop01",
+PSEUDO, TypeJ> {
+let Uses = [LC0, LC1, SA0, SA1];
+let Defs = [LC0, LC1, P3, PC, USR];
+let isPseudo = 1;
+}
+def J2_endloop1 : HInst<
+(outs),
+(ins),
+"endloop1",
+PSEUDO, TypeJ> {
+let Uses = [LC1, SA1];
+let Defs = [LC1, PC];
+let isPseudo = 1;
+}
+def J2_jump : HInst<
+(outs),
+(ins b30_2Imm:$Ii),
+"jump $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_13453446, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{31-25} = 0b0101100;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isBarrier = 1;
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 24;
+let opExtentAlign = 2;
+}
+def J2_jumpf : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpf_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, b15_2Imm:$Ii),
+"if (!$Pu4) jump $Ii",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumpfnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4.new) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b010;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpfnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4.new) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b110;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpfpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if (!$Pu4) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, Requires<[HasV60T]>, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b100;
+let Inst{21-21} = 0b1;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpr : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"jumpr $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059, PredNewRel {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010010100;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isBarrier = 1;
+let isPredicable = 1;
+}
+def J2_jumprf : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprf_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) jumpr $Rs32",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumprfnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0010;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprfnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4.new) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0110;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprfpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if (!$Pu4) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, Requires<[HasV60T]>, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0100;
+let Inst{31-21} = 0b01010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprgtez : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32>=#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000101;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprgtezpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32>=#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000101;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprltez : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32<=#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000111;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprltezpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32<=#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000111;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprnz : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32==#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprnzpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32==#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprt_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) jumpr $Rs32",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumprtnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) jumpr:nt $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0010;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprtnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4.new) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0110;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprtpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, IntRegs:$Rs32),
+"if ($Pu4) jumpr:t $Rs32",
+J_tc_2early_SLOT2, TypeJ>, Enc_1928953, Requires<[HasV60T]>, PredNewRel {
+let Inst{7-0} = 0b00000000;
+let Inst{13-10} = 0b0100;
+let Inst{31-21} = 0b01010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+let InputType = "reg";
+let BaseOpcode = "J2_jumpr";
+let isTaken = Inst{12};
+}
+def J2_jumprz : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32!=#0) jump:nt $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b0;
+let Inst{31-22} = 0b0110000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumprzpt : HInst<
+(outs),
+(ins IntRegs:$Rs32, b13_2Imm:$Ii),
+"if ($Rs32!=#0) jump:t $Ii",
+CR_tc_2early_SLOT3, TypeCR>, Enc_12477789 {
+let Inst{0-0} = 0b0;
+let Inst{12-12} = 0b1;
+let Inst{31-22} = 0b0110000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let isTaken = Inst{12};
+}
+def J2_jumpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b000;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumpt_nopred_map : HInst<
+(outs),
+(ins PredRegs:$Pu4, b15_2Imm:$Ii),
+"if ($Pu4) jump $Ii",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def J2_jumptnew : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4.new) jump:nt $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b010;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumptnewpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4.new) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b110;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_jumptpt : HInst<
+(outs),
+(ins PredRegs:$Pu4, b30_2Imm:$Ii),
+"if ($Pu4) jump:t $Ii",
+J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT, TypeJ>, Enc_14868535, Requires<[HasV60T]>, PredNewRel {
+let Inst{0-0} = 0b0;
+let Inst{12-10} = 0b100;
+let Inst{21-21} = 0b0;
+let Inst{31-24} = 0b01011100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let InputType = "imm";
+let BaseOpcode = "J2_jump";
+let isTaken = Inst{12};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 17;
+let opExtentAlign = 2;
+}
+def J2_loop0i : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"loop0($Ii,#$II)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001000;
+let Defs = [LC0, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_loop0r : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"loop0($Ii,$Rs32)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000000;
+let Defs = [LC0, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_loop1i : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"loop1($Ii,#$II)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001001;
+let Defs = [LC1, SA1];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_loop1r : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"loop1($Ii,$Rs32)",
+CR_tc_3x_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000001;
+let Defs = [LC1, SA1];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_pause : HInst<
+(outs),
+(ins u8_0Imm:$Ii),
+"pause(#$Ii)",
+J_tc_2early_SLOT2, TypeJ>, Enc_8732960 {
+let Inst{1-0} = 0b00;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0101010001000000;
+let isSolo = 1;
+}
+def J2_ploop1si : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"p3 = sp1loop0($Ii,#$II)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001101;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop1sr : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"p3 = sp1loop0($Ii,$Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000101;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop2si : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"p3 = sp2loop0($Ii,#$II)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001110;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop2sr : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"p3 = sp2loop0($Ii,$Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000110;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop3si : HInst<
+(outs),
+(ins b30_2Imm:$Ii, u10_0Imm:$II),
+"p3 = sp3loop0($Ii,#$II)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_9939385 {
+let Inst{2-2} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101001111;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_ploop3sr : HInst<
+(outs),
+(ins b30_2Imm:$Ii, IntRegs:$Rs32),
+"p3 = sp3loop0($Ii,$Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_5790679 {
+let Inst{2-0} = 0b000;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01100000111;
+let isPredicateLate = 1;
+let Defs = [LC0, P3, SA0, USR];
+let isExtendable = 1;
+let opExtendable = 0;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 2;
+}
+def J2_trap0 : HInst<
+(outs),
+(ins u8_0Imm:$Ii),
+"trap0(#$Ii)",
+J_tc_2early_SLOT2, TypeJ>, Enc_8732960 {
+let Inst{1-0} = 0b00;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0101010000000000;
+let isSolo = 1;
+}
+def J4_cmpeq_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeq_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,$Rt16); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeq_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,$Rt16); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqi";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqi_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$II); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqi_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$II); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4359901, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8612939, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_844699, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5338033, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14150875, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_15450971, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_14998517, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.eq($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_11544269, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpeqn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpeqn1_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5401217, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$n1); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12419313, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpeqn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_4684887, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpeqn1_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.eq($Rs16,#$n1); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_220949, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpeqn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgt_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,$Rt16); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtp0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgt_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,$Rt16); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtp1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgti_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$II); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgti_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$II); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8674673, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (!cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15763937, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_5915771, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7315939, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7785569, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_10968391, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$n1)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_364753, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, n1Const:$n1, b30_2Imm:$Ii),
+"if (cmp.gt($Ns8.new,#$n1)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_8479583, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010011010;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtn1r";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtn1_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_2428539, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p0 = cmp.gt($Rs16,#$n1); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_8919369, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtn1p0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_8577055, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000001;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtn1_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1, b30_2Imm:$Ii),
+"p1 = cmp.gt($Rs16,#$n1); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14530015, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100001;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtn1p1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,$Rt32)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,$Rt32)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_15140689, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtu_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,$Rt16); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b10;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtup0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtu_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, GeneralSubRegs:$Rt16, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,$Rt16); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_14264243, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b11;
+let Inst{31-22} = 0b0001010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtup1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (!cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,#$II)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, u5_0Imm:$II, b30_2Imm:$Ii),
+"if (cmp.gtu($Ns8.new,#$II)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_4397470, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpgtuir";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_cmpgtui_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p0 = cmp.gtu($Rs16,#$II); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let BaseOpcode = "J4_cmpgtuip0";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-22} = 0b0001001100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmpgtui_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u5_0Imm:$II, b30_2Imm:$Ii),
+"p1 = cmp.gtu($Rs16,#$II); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_7305764, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-22} = 0b0001001100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let BaseOpcode = "J4_cmpgtuip1";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_cmplt_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmplt_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmplt_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gt($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmplt_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gt($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltr";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gtu($Rt32,$Ns8.new)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_cmpltu_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Rt32, IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (cmp.gtu($Rt32,$Ns8.new)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_6730375, PredRel {
+let Inst{0-0} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010001000;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let BaseOpcode = "J4_cmpltur";
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def J4_hintjumpr : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"hintjr($Rs32)",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010010101;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+}
+def J4_jumpseti : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u6_0Imm:$II, b30_2Imm:$Ii),
+"$Rd16 = #$II ; jump $Ii",
+COMPOUND, TypeCJ>, Enc_4834775 {
+let Inst{0-0} = 0b0;
+let Inst{31-22} = 0b0001011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_jumpsetr : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"$Rd16 = $Rs16 ; jump $Ii",
+COMPOUND, TypeCJ>, Enc_2639299 {
+let Inst{0-0} = 0b0;
+let Inst{13-12} = 0b00;
+let Inst{31-22} = 0b0001011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isTerminator = 1;
+let isBranch = 1;
+let Defs = [PC];
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_f_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!tstbit($Ns8.new,#0)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_f_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (!tstbit($Ns8.new,#0)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_fp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (!p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_fp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (!p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_fp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (!p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_fp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (!p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_t_jumpnv_nt : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (tstbit($Ns8.new,#0)) jump:nt $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_t_jumpnv_t : HInst<
+(outs),
+(ins IntRegs:$Ns8, b30_2Imm:$Ii),
+"if (tstbit($Ns8.new,#0)) jump:t $Ii",
+NCJ_tc_3or4stall_SLOT0, TypeNCJ>, Enc_1898420 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100000;
+let Inst{19-19} = 0b0;
+let Inst{31-22} = 0b0010010110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let isNewValue = 1;
+let Defs = [PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+let opNewValue = 0;
+}
+def J4_tstbit0_tp0_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (p0.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_tp0_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p0 = tstbit($Rs16,#0); if (p0.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P0];
+let Defs = [P0, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_tp1_jump_nt : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (p1.new) jump:nt $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b000011;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def J4_tstbit0_tp1_jump_t : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, b30_2Imm:$Ii),
+"p1 = tstbit($Rs16,#0); if (p1.new) jump:t $Ii",
+COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>, Enc_12829314 {
+let Inst{0-0} = 0b0;
+let Inst{13-8} = 0b100011;
+let Inst{31-22} = 0b0001001110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isBranch = 1;
+let isPredicatedNew = 1;
+let Uses = [P1];
+let Defs = [P1, PC];
+let isTaken = Inst{13};
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 2;
+}
+def L2_deallocframe : HInst<
+(outs),
+(ins),
+"deallocframe",
+LD_tc_ld_SLOT01, TypeLD>, Enc_0 {
+let Inst{4-0} = 0b11110;
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10010000000;
+let Inst{20-16} = 0b11110;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [R29, R30, R31];
+}
+def L2_loadalignb_io : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Ryy32 = memb_fifo($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_449439 {
+let Inst{24-21} = 0b0100;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadalignb_pbr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110100;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pci : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_971347 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pcr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pi : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"$Ryy32 = memb_fifo($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6372758 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_pr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memb_fifo($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100100;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignb_zomap : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
+"$Ryy32 = memb_fifo($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadalignh_io : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Ryy32 = memh_fifo($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_11930027 {
+let Inst{24-21} = 0b0010;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadalignh_pbr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110010;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pci : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_1971351 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pcr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pi : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Ryy32 = memh_fifo($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_3372766 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_pr : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Rx32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Ryy32 = memh_fifo($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12261611 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Ryy32 = $Ryy32in, $Rx32 = $Rx32in";
+}
+def L2_loadalignh_zomap : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rs32),
+"$Ryy32 = memh_fifo($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L2_loadbsw2_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = membh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738 {
+let Inst{24-21} = 0b0001;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadbsw2_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = membh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = membh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw2_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = membh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadbsw4_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii),
+"$Rdd32 = membh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_9852473 {
+let Inst{24-21} = 0b0111;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def L2_loadbsw4_pbr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110111;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pci : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_3931661 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pcr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
+"$Rdd32 = membh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_8752140 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_pr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = membh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100111;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbsw4_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = membh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadbzw2_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memubh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738 {
+let Inst{24-21} = 0b0011;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadbzw2_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = memubh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memubh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw2_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memubh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadbzw4_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii),
+"$Rdd32 = memubh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_9852473 {
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def L2_loadbzw4_pbr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011110101;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pci : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_3931661 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011000101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pcr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011000101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
+"$Rdd32 = memubh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_8752140 {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011010101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_pr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memubh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011100101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadbzw4_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = memubh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrb_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = memb($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_14461004, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def L2_loadrb_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_16303398 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
+"$Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5598813, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memb($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrb_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrbgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memb(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrb_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def L2_loadrd_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, s29_3Imm:$Ii),
+"$Rdd32 = memd($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_163381, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 14;
+let opExtentAlign = 3;
+}
+def L2_loadrd_pbr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111110;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pci : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_931653 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pcr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii),
+"$Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_9752128, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_pr : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rdd32 = memd($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_2901241 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrd_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrdgp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u29_3Imm:$Ii),
+"$Rdd32 = memd(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4975051, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b01001;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrd_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def L2_loadrh_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadrh_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrh_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrhgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memh(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrh_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def L2_loadri_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii),
+"$Rd32 = memw($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_8990840, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def L2_loadri_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_14303394 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii),
+"$Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_16376009, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memw($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadri_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrigp : HInst<
+(outs IntRegs:$Rd32),
+(ins u30_2Imm:$Ii),
+"$Rd32 = memw(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_8814718, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadri_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def L2_loadrub_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rd32 = memub($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_14461004, AddrModeRel {
+let Inst{24-21} = 0b1001;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def L2_loadrub_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_16303398 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii),
+"$Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5598813, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memub($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadrub_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadrubgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memub(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1001;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadrub_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def L2_loadruh_io : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii),
+"$Rd32 = memuh($Rs32+#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15275738, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def L2_loadruh_pbr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++$Mu2:brev)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pci : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++#$Ii:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13303422 {
+let Inst{12-9} = 0b0000;
+let Inst{31-21} = 0b10011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pcr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++I:circ($Mu2))",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00010000;
+let Inst{31-21} = 0b10011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii),
+"$Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_15376009, PredNewRel {
+let Inst{13-9} = 0b00000;
+let Inst{31-21} = 0b10011011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_pr : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Rd32 = memuh($Rx32++$Mu2)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_48594 {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b10011101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_loadruh_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_loadruhgp : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memuh(gp+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let Uses = [GP];
+let BaseOpcode = "L4_loadruh_abs";
+let isPredicable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def L2_loadw_locked : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = memw_locked($Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_4075554 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10010010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let accessSize = WordAccess;
+let isSoloAX = 1;
+let mayLoad = 1;
+}
+def L2_ploadrbf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrbfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrbt_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbt_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbt_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrbtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memb($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L2_loadrb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrbtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memb($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrb_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrbtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memb($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdf_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if (!$Pt4) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdf_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if (!$Pt4) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdf_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdfnew_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if (!$Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdfnew_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if (!$Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdfnew_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdt_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if ($Pt4) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdt_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if ($Pt4) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdt_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrdtnew_io : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u29_3Imm:$Ii),
+"if ($Pt4.new) $Rdd32 = memd($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_677558, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L2_loadrd_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def L2_ploadrdtnew_pi : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_3Imm:$Ii),
+"if ($Pt4.new) $Rdd32 = memd($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_5611087, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrdtnew_zomap : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rdd32 = memd($Rs32)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrhf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrhf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrhf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrhfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrhfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrhfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrht_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrht_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrht_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrhtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L2_loadrh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadrhtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrhtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrif_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if (!$Pt4) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadrif_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if (!$Pt4) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrif_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrifnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadrifnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrifnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrit_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if ($Pt4) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadrit_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if ($Pt4) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrit_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadritnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u30_2Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memw($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_2835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L2_loadri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L2_ploadritnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_2Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memw($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_6212930, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadri_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadritnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memw($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubt_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubt_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubt_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadrubtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memub($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4835423, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L2_loadrub_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L2_ploadrubtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memub($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_12212978, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadrub_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadrubtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memub($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruhf_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000101011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruhf_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruhf_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruhfnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruhfnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruhfnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if (!$Pt4.new) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruht_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000001011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruht_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruht_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L2_ploadruhtnew_io : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32, u31_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memuh($Rs32+#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1835415, AddrModeRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01000011011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L2_loadruh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L2_ploadruhtnew_pi : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Rx32),
+(ins PredRegs:$Pt4, IntRegs:$Rx32in, s4_1Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memuh($Rx32++#$Ii)",
+LD_tc_ld_pi_SLOT01, TypeLD>, Enc_7212930, PredNewRel {
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011011011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let BaseOpcode = "L2_loadruh_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def L2_ploadruhtnew_zomap : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, IntRegs:$Rs32),
+"if ($Pt4.new) $Rd32 = memuh($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_add_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) += $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_add_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) += $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_add_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) += $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_add_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) += $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_add_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) += $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_add_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) += $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_and_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) &= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_and_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) &= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_and_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) &= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_and_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) &= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_and_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) &= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_and_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) &= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iadd_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) += #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_iadd_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) += #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iadd_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) += #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_iadd_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) += #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iadd_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) += #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b00;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_iadd_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) += #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iand_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) = clrbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_iand_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) = clrbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iand_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) = clrbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_iand_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) = clrbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_iand_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) = clrbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_iand_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) = clrbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ior_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) = setbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ior_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) = setbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ior_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) = setbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_ior_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) = setbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ior_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) = setbit(#$II)",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_ior_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) = setbit(#$II)",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_isub_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, u5_0Imm:$II),
+"memb($Rs32+#$Ii) -= #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_6773159 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_isub_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memb($Rs32) -= #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_isub_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, u5_0Imm:$II),
+"memh($Rs32+#$Ii) -= #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9773167 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_isub_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memh($Rs32) -= #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_isub_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, u5_0Imm:$II),
+"memw($Rs32+#$Ii) -= #$II",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8773155 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111111010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_isub_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, u5_0Imm:$II),
+"memw($Rs32) -= #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_loadalignb_ap : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
+(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
+"$Ryy32 = memb_fifo($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_11047413 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010100;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadalignb_ur : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Ryy32 = memb_fifo($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_7303598 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100100;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 4;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadalignh_ap : HInst<
+(outs DoubleRegs:$Ryy32, IntRegs:$Re32),
+(ins DoubleRegs:$Ryy32in, u32_0Imm:$II),
+"$Ryy32 = memh_fifo($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_11047413 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010010;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadalignh_ur : HInst<
+(outs DoubleRegs:$Ryy32),
+(ins DoubleRegs:$Ryy32in, IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Ryy32 = memh_fifo($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_7303598 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100010;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 4;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let Constraints = "$Ryy32 = $Ryy32in";
+}
+def L4_loadbsw2_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = membh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbsw2_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = membh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbsw4_ap : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rdd32 = membh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_877823 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010111;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbsw4_ur : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = membh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_5582416 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100111;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw2_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memubh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw2_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memubh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw4_ap : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rdd32 = memubh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_877823 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011010101;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadbzw4_ur : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = memubh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_5582416 {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011100101;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadd_locked : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = memd_locked($Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_4030179 {
+let Inst{13-5} = 0b010000000;
+let Inst{31-21} = 0b10010010000;
+let accessSize = DoubleWordAccess;
+let isSoloAX = 1;
+let mayLoad = 1;
+}
+def L4_loadrb_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memb($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrb_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+let isPredicable = 1;
+}
+def L4_loadrb_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memb($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrd_ap : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rdd32 = memd($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_877823 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011110;
+let hasNewValue = 1;
+let opNewValue = 1;
+let addrMode = AbsoluteSet;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrd_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7581852, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010110;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+let isPredicable = 1;
+}
+def L4_loadrd_ur : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rdd32 = memd($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_5582416, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101110;
+let addrMode = BaseLongOffset;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrh_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrh_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+let isPredicable = 1;
+}
+def L4_loadrh_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadri_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memw($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadri_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+let isPredicable = 1;
+}
+def L4_loadri_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memw($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrub_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memub($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadrub_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+let isPredicable = 1;
+}
+def L4_loadrub_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memub($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadruh_ap : HInst<
+(outs IntRegs:$Rd32, IntRegs:$Re32),
+(ins u32_0Imm:$II),
+"$Rd32 = memuh($Re32=#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_12616482 {
+let Inst{7-7} = 0b0;
+let Inst{13-12} = 0b01;
+let Inst{31-21} = 0b10011011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_loadruh_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_10721363, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111010011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+let isPredicable = 1;
+}
+def L4_loadruh_ur : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, u2_0Imm:$Ii, u32_0Imm:$II),
+"$Rd32 = memuh($Rt32<<#$Ii+#$II)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_486163, AddrModeRel, ImmRegShl {
+let Inst{12-12} = 0b1;
+let Inst{31-21} = 0b10011101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "imm";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_or_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) |= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_or_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) |= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_or_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) |= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_or_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) |= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_or_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) |= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_or_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) |= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_ploadrbf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrbfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrbt_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbt_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrbtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memb(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrbtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memb($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrb_rr";
+}
+def L4_ploadrdf_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdf_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrdfnew_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdfnew_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrdt_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdt_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110000110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrdtnew_abs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rdd32 = memd(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_15182416, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111110;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrdtnew_rr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rdd32 = memd($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_7254313, AddrModeRel {
+let Inst{31-21} = 0b00110010110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrd_rr";
+}
+def L4_ploadrhf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrhf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrhfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrhfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrht_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrht_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrhtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrhtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010010;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrh_rr";
+}
+def L4_ploadrif_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrif_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadrifnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrifnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadrit_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrit_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadritnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memw(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadritnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memw($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let InputType = "reg";
+let BaseOpcode = "L4_loadri_rr";
+}
+def L4_ploadrubf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadrubfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011001;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadrubt_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubt_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadrubtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memub(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadrubtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memub($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010001;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let InputType = "reg";
+let BaseOpcode = "L4_loadrub_rr";
+}
+def L4_ploadruhf_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruhf_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110001011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_ploadruhfnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if (!$Pt4.new) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b111;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruhfnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if (!$Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_ploadruht_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruht_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110000011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_ploadruhtnew_abs : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pt4, u32_0Imm:$Ii),
+"if ($Pt4.new) $Rd32 = memuh(#$Ii)",
+LD_tc_ld_SLOT01, TypeLD>, Enc_13344657, AddrModeRel {
+let Inst{7-5} = 0b100;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10011111011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_ploadruhtnew_rr : HInst<
+(outs IntRegs:$Rd32),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"if ($Pv4.new) $Rd32 = memuh($Rs32+$Rt32<<#$Ii)",
+V4LDST_tc_ld_SLOT01, TypeLD>, Enc_1793896, AddrModeRel {
+let Inst{31-21} = 0b00110010011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let InputType = "reg";
+let BaseOpcode = "L4_loadruh_rr";
+}
+def L4_return : HInst<
+(outs),
+(ins),
+"dealloc_return",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_0, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isBarrier = 1;
+let isPredicable = 1;
+let isTaken = 1;
+}
+def L4_return_f : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if (!$Pv4) dealloc_return",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1100;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_fnew_pnt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if (!$Pv4.new) dealloc_return:nt",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1010;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_fnew_pt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if (!$Pv4.new) dealloc_return:t",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b1110;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_t : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if ($Pv4) dealloc_return",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0100;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_tnew_pnt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if ($Pv4.new) dealloc_return:nt",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0010;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_return_tnew_pt : HInst<
+(outs),
+(ins PredRegs:$Pv4),
+"if ($Pv4.new) dealloc_return:t",
+LD_tc_3or4stall_SLOT0, TypeLD>, Enc_12711252, PredNewRel {
+let Inst{4-0} = 0b11110;
+let Inst{7-5} = 0b000;
+let Inst{13-10} = 0b0110;
+let Inst{31-21} = 0b10010110000;
+let Inst{20-16} = 0b11110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R29, R30, R31];
+let BaseOpcode = "L4_return";
+let isTaken = Inst{12};
+}
+def L4_sub_memopb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) -= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_11849200 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def L4_sub_memopb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) -= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_sub_memoph_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) -= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_8849208 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def L4_sub_memoph_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) -= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def L4_sub_memopw_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) -= $Rt32",
+V4LDST_tc_st_SLOT0, TypeV4LDST>, Enc_9849208 {
+let Inst{6-5} = 0b01;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00111110010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let mayLoad = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def L4_sub_memopw_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) -= $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def M2_acci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += add($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889, ImmRegRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_acci";
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_accii : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 += add($Rs32,#$Ii)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_11522288, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_acci";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_cmaci_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpyi($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacr_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpyr($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacsc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32*):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmacsc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += cmpy($Rs32,$Rt32*):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cmpyi_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpyi($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+}
+def M2_cmpyr_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpyr($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+}
+def M2_cmpyrs_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpyrs_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpyrsc_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32*):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpyrsc_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = cmpy($Rs32,$Rt32*):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpys_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpys_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpysc_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32*):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cmpysc_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = cmpy($Rs32,$Rt32*):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_cnacs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cnacs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cnacsc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32*):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_cnacsc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= cmpy($Rs32,$Rt32*):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyss_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyss_nac_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyss_rnd_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_dpmpyss_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+}
+def M2_dpmpyuu_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyuu_nac_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_dpmpyuu_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+}
+def M2_hmmpyh_rs1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.h):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_hmmpyh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.h):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_hmmpyl_rs1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.l):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_hmmpyl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32.l):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_maci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyi($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_maci";
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_macsin : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rx32 -= mpyi($Rs32,#$Ii)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_11522288 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_macsip : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rx32 += mpyi($Rs32,#$Ii)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_11522288, ImmRegRel {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_maci";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mmachs_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmachs_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmachs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmachs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywoh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacls_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmacuhs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpywouh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_rs0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_rs1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmaculs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyweuh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mmpyh_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyh_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywoh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyuh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpywouh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_rs0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_rs1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mmpyul_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyweuh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_acc_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.h,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_acc_sat_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32.l,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_nac_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.h,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.h):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_nac_sat_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32.l,$Rt32.l):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpy_rnd_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_rnd_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_sat_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_sat_rnd_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpy_up : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_up_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpy_up_s1_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_mpyd_acc_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_acc_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_hh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_hh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_hl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_hl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_lh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_lh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_ll_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100000;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_ll_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100100;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_nac_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_nac_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpy($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyd_rnd_hh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_hh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_hl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_hl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.h,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_lh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_lh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.h):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_ll_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l):rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100001;
+let prefersSlot3 = 1;
+}
+def M2_mpyd_rnd_ll_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpy($Rs32.l,$Rt32.l):<<1:rnd",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100101;
+let prefersSlot3 = 1;
+}
+def M2_mpyi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyi($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M2_mpyi";
+let InputType = "reg";
+}
+def M2_mpysin : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u8_0Imm:$Ii),
+"$Rd32 = -mpyi($Rs32,#$Ii)",
+M_tc_3x_SLOT23, TypeM>, Enc_16355964 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpysip : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rd32 = +mpyi($Rs32,#$Ii)",
+M_tc_3x_SLOT23, TypeM>, Enc_16355964 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def M2_mpysmi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, m32_0Imm:$Ii),
+"$Rd32 = mpyi($Rs32,#$Ii)",
+M_tc_3x_SLOT23, TypeM>, ImmRegRel {
+let hasNewValue = 1;
+let opNewValue = 0;
+let CextOpcode = "M2_mpyi";
+let InputType = "imm";
+let isPseudo = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 9;
+let opExtentAlign = 0;
+}
+def M2_mpysu_up : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpysu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_acc_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_acc_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_hh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_hh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_hl_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_hl_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_lh_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_lh_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_ll_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_ll_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyu_nac_hh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_hh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_hl_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_hl_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_lh_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_lh_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_ll_s0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_nac_ll_s1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_mpyu_up : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_acc_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_acc_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_hh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_hh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_hl_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_hl_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_lh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_lh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_ll_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100010;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_ll_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100100110;
+let prefersSlot3 = 1;
+}
+def M2_mpyud_nac_hh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_hh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_hl_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_hl_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.h,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_lh_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.h)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_lh_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.h):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_ll_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.l)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyud_nac_ll_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 -= mpyu($Rs32.l,$Rt32.l):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100110111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_mpyui : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = mpyui($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def M2_nacci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= add($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_naccii : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 -= add($Rs32,#$Ii)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_11522288 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100010100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_subacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rx32 += sub($Rt32,$Rs32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_7692963 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M2_vabsdiffh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffh($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+}
+def M2_vabsdiffw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffw($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+}
+def M2_vcmac_s0_sat_i : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vcmpyi($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vcmac_s0_sat_r : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vcmpyr($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vcmpy_s0_sat_i : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyi($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vcmpy_s0_sat_r : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyr($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vcmpy_s1_sat_i : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyi($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vcmpy_s1_sat_r : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vcmpyr($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmacs_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vdmpy($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vdmacs_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vdmpy($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vdmpyrs_s0 : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vdmpy($Rss32,$Rtt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmpyrs_s1 : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vdmpy($Rss32,$Rtt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmpys_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vdmpy($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vdmpys_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vdmpy($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmac2 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyh($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2es : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyeh($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2es_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyeh($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2es_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vmpyeh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2s_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyh($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2s_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyh($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2su_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyhsu($Rs32,$Rt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111011;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmac2su_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpyhsu($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vmpy2es_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyeh($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2es_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vmpyeh($Rss32,$Rtt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyh($Rs32,$Rt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s0pack : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vmpyh($Rs32,$Rt32):rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyh($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2s_s1pack : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = vmpyh($Rs32,$Rt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_14071773 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101101101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2su_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyhsu($Rs32,$Rt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101000;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vmpy2su_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpyhsu($Rs32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vraddh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vraddh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_vradduh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vradduh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M2_vrcmaci_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyi($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmaci_s0c : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyi($Rss32,$Rtt32*)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmacr_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyr($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmacr_s0c : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpyr($Rss32,$Rtt32*)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpyi_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyi($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpyi_s0c : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyi($Rss32,$Rtt32*)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpyr_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyr($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpyr_s0c : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpyr($Rss32,$Rtt32*)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000011;
+let prefersSlot3 = 1;
+}
+def M2_vrcmpys_acc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += vrcmpys($Rss32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM> {
+let isPseudo = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpys_acc_s1_h : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpys_acc_s1_l : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrcmpys_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vrcmpys($Rss32,$Rt32):<<1:sat",
+M_tc_3x_SLOT23, TypeM> {
+let isPseudo = 1;
+}
+def M2_vrcmpys_s1_h : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:hi",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrcmpys_s1_l : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrcmpys($Rss32,$Rtt32):<<1:sat:raw:lo",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrcmpys_s1rp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = vrcmpys($Rss32,$Rt32):<<1:rnd:sat",
+M_tc_3x_SLOT23, TypeM> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+}
+def M2_vrcmpys_s1rp_h : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:hi",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrcmpys_s1rp_l : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = vrcmpys($Rss32,$Rtt32):<<1:rnd:sat:raw:lo",
+M_tc_3x_SLOT23, TypeM>, Enc_9277990 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M2_vrmac_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpyh($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M2_vrmpy_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpyh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000000;
+let prefersSlot3 = 1;
+}
+def M2_xor_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= xor($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= and($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_andn : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= and($Rs32,~$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= or($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_and_xor : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= xor($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_cmpyi_wh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyiwh($Rss32,$Rt32):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_cmpyi_whc : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645, Requires<[HasV5T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_cmpyr_wh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyrwh($Rss32,$Rt32):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_cmpyr_whc : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_14287645, Requires<[HasV5T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M4_mac_up_s1_sat : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += mpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_mpyri_addi : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii, IntRegs:$Rs32, u6_0Imm:$II),
+"$Rd32 = add(#$Ii,mpyi($Rs32,#$II))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_971574, ImmRegRel {
+let Inst{31-24} = 0b11011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyri_addr";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def M4_mpyri_addr : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Ru32, IntRegs:$Rs32, u32_0Imm:$Ii),
+"$Rd32 = add($Ru32,mpyi($Rs32,#$Ii))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_236434, ImmRegRel {
+let Inst{31-23} = 0b110111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyri_addr";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def M4_mpyri_addr_u2 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Ru32, u6_2Imm:$Ii, IntRegs:$Rs32),
+"$Rd32 = add($Ru32,mpyi(#$Ii,$Rs32))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_9959498 {
+let Inst{31-23} = 0b110111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def M4_mpyrr_addi : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add(#$Ii,mpyi($Rs32,$Rt32))",
+ALU64_tc_3x_SLOT23, TypeALU64>, Enc_2216485, ImmRegRel {
+let Inst{31-23} = 0b110101110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyrr_addr";
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def M4_mpyrr_addr : HInst<
+(outs IntRegs:$Ry32),
+(ins IntRegs:$Ru32, IntRegs:$Ry32in, IntRegs:$Rs32),
+"$Ry32 = add($Ru32,mpyi($Ry32in,$Rs32))",
+M_tc_3x_SLOT23, TypeM>, Enc_13770697, ImmRegRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let CextOpcode = "M4_mpyrr_addr";
+let InputType = "reg";
+let Constraints = "$Ry32 = $Ry32in";
+}
+def M4_nac_up_s1_sat : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= mpy($Rs32,$Rt32):<<1:sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= and($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_andn : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= and($Rs32,~$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= or($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_or_xor : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= xor($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_pmpyw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = pmpyw($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+}
+def M4_pmpyw_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 ^= pmpyw($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vpmpyh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vpmpyh($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101110;
+let prefersSlot3 = 1;
+}
+def M4_vpmpyh_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 ^= vpmpyh($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyeh_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpyweh($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyeh_acc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpyweh($Rss32,$Rtt32):<<1",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyeh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpyweh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000010;
+let prefersSlot3 = 1;
+}
+def M4_vrmpyeh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpyweh($Rss32,$Rtt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+}
+def M4_vrmpyoh_acc_s0 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpywoh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyoh_acc_s1 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpywoh($Rss32,$Rtt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010111;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M4_vrmpyoh_s0 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpywoh($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000001;
+let prefersSlot3 = 1;
+}
+def M4_vrmpyoh_s1 : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpywoh($Rss32,$Rtt32):<<1",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+}
+def M4_xor_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= and($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_xor_andn : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= and($Rs32,~$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_xor_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 ^= or($Rs32,$Rt32)",
+M_tc_2_acc_SLOT23, TypeM>, Enc_9223889 {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "reg";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def M4_xor_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 ^= xor($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vdmacbsu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010001;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vdmpybsu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157, Requires<[HasV5T]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def M5_vmacbsu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpybsu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vmacbuu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rxx32 += vmpybu($Rs32,$Rt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_1409050 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100111100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vmpybsu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpybsu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101010;
+let prefersSlot3 = 1;
+}
+def M5_vmpybuu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = vmpybu($Rs32,$Rt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_1997594 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11100101100;
+let prefersSlot3 = 1;
+}
+def M5_vrmacbsu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpybsu($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vrmacbuu : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 += vrmpybu($Rss32,$Rtt32)",
+M_tc_3x_acc_SLOT23, TypeM>, Enc_12702821 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def M5_vrmpybsu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpybsu($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000110;
+let prefersSlot3 = 1;
+}
+def M5_vrmpybuu : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vrmpybu($Rss32,$Rtt32)",
+M_tc_3x_SLOT23, TypeM>, Enc_8333157 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000100;
+let prefersSlot3 = 1;
+}
+def M6_vabsdiffb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffb($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333, Requires<[HasV62T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000111;
+let prefersSlot3 = 1;
+}
+def M6_vabsdiffub : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = vabsdiffub($Rtt32,$Rss32)",
+M_tc_2_SLOT23, TypeM>, Enc_11687333, Requires<[HasV62T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11101000101;
+let prefersSlot3 = 1;
+}
+def PS_loadrbabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memb(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrb";
+let BaseOpcode = "L4_loadrb_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def PS_loadrdabs : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins u29_3Imm:$Ii),
+"$Rdd32 = memd(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_4975051, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrd";
+let BaseOpcode = "L4_loadrd_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def PS_loadrhabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memh(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrh";
+let BaseOpcode = "L4_loadrh_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_loadriabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u30_2Imm:$Ii),
+"$Rd32 = memw(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_8814718, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadri";
+let BaseOpcode = "L4_loadri_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def PS_loadrubabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u32_0Imm:$Ii),
+"$Rd32 = memub(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_1886960, AddrModeRel {
+let Inst{24-21} = 0b1001;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadrub";
+let BaseOpcode = "L4_loadrub_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def PS_loadruhabs : HInst<
+(outs IntRegs:$Rd32),
+(ins u31_1Imm:$Ii),
+"$Rd32 = memuh(#$Ii)",
+V2LDST_tc_ld_SLOT01, TypeV2LDST>, Enc_12608570, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b01001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayLoad = 1;
+let CextOpcode = "L2_loadruh";
+let BaseOpcode = "L4_loadruh_abs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_storerbabs : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_12395768, AddrModeRel {
+let Inst{24-21} = 0b0000;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def PS_storerbnewabs : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Nt8),
+"memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_4050532, AddrModeRel {
+let Inst{12-11} = 0b00;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+let opNewValue = 1;
+}
+def PS_storerdabs : HInst<
+(outs),
+(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_11682941, AddrModeRel {
+let Inst{24-21} = 0b0110;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def PS_storerfabs : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0011;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_storerhabs : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0010;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def PS_storerhnewabs : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Nt8),
+"memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_13618890, AddrModeRel {
+let Inst{12-11} = 0b01;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+let opNewValue = 1;
+}
+def PS_storeriabs : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeV2LDST>, Enc_15999208, AddrModeRel {
+let Inst{24-21} = 0b0100;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def PS_storerinewabs : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Nt8),
+"memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeV2LDST>, Enc_12297800, AddrModeRel {
+let Inst{12-11} = 0b10;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtended = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def S2_addasl_rrri : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32, u3_0Imm:$Ii),
+"$Rd32 = addasl($Rt32,$Rs32,#$Ii)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_3494181 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_allocframe : HInst<
+(outs),
+(ins u11_3Imm:$Ii),
+"allocframe(#$Ii)",
+ST_tc_ld_SLOT0, TypeST>, Enc_15830826 {
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10100000100;
+let Inst{20-16} = 0b11101;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [R29, R30, R31];
+let Defs = [R29, R30];
+}
+def S2_asl_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asl($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000000000;
+}
+def S2_asl_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_p_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 ^= asl($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10000010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asl($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asl_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_r_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asl($Rs32,#$Ii):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_asl_i_r_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 ^= asl($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_i_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vaslh($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 {
+let Inst{7-5} = 0b010;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000100;
+}
+def S2_asl_i_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rdd32 = vaslw($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000000010;
+}
+def S2_asl_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = asl($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_asl_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= asl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asl_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asl($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asl_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= asl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asl_r_r_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asl($Rs32,$Rt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_asl_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vaslh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_asl_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vaslw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_asr_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asr($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10000000000;
+}
+def S2_asr_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= asr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_i_p_rnd : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asr($Rss32,#$Ii):rnd",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995, Requires<[HasV5T]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b10000000110;
+let prefersSlot3 = 1;
+}
+def S2_asr_i_p_rnd_goodsyntax : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = asrrnd($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Requires<[HasV5T]> {
+let isPseudo = 1;
+}
+def S2_asr_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asr($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= asr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_i_r_rnd : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asr($Rs32,#$Ii):rnd",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_asr_i_r_rnd_goodsyntax : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = asrrnd($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+}
+def S2_asr_i_svw_trun : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rd32 = vasrw($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2380082 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001000110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_i_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vasrh($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 {
+let Inst{7-5} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000100;
+}
+def S2_asr_i_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rdd32 = vasrw($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000000010;
+}
+def S2_asr_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = asr($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_asr_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= asr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_asr_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asr($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= asr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_asr_r_r_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = asr($Rs32,$Rt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_asr_r_svw_trun : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rd32 = vasrw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14287645 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_asr_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vasrh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_asr_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vasrw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_brev : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = brev($Rs32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_brevp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = brev($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000110;
+}
+def S2_cabacdecbin : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = decbin($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let isPredicateLate = 1;
+let prefersSlot3 = 1;
+let Defs = [P0];
+}
+def S2_cl0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = cl0($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_cl0p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = cl0($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_cl1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = cl1($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_cl1p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = cl1($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = clb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clbnorm : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = normamt($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clbp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = clb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clrbit_i : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = clrbit($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_clrbit_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = clrbit($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct0 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = ct0($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct0p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = ct0($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct1 : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = ct1($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_ct1p : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = ct1($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000111;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_deinterleave : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = deinterleave($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000110;
+}
+def S2_extractu : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rd32 = extractu($Rs32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_11930928 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b100011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_extractu_rp : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rd32 = extractu($Rs32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_15472748 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_extractup : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
+"$Rdd32 = extractu($Rss32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_9894557 {
+let Inst{31-24} = 0b10000001;
+let prefersSlot3 = 1;
+}
+def S2_extractup_rp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = extractu($Rss32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+let prefersSlot3 = 1;
+}
+def S2_insert : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = insert($Rs32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2880796 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b100011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_insert_rp : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rx32 = insert($Rs32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_16311032 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_insertp : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
+"$Rxx32 = insert($Rss32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_631197 {
+let Inst{31-24} = 0b10000011;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_insertp_rp : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rxx32 = insert($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_12702821 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001010000;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_interleave : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = interleave($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000110;
+}
+def S2_lfsp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = lfs($Rss32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+let prefersSlot3 = 1;
+}
+def S2_lsl_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = lsl($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_lsl_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= lsl($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsl_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = lsl($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_lsl_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= lsl($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsl_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlslh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_lsl_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlslw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_lsr_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = lsr($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000000000;
+}
+def S2_lsr_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_p_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 ^= lsr($Rss32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8497723 {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b10000010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = lsr($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_lsr_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_r_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 ^= lsr($Rs32,#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_2410156 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_i_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vlsrh($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775 {
+let Inst{7-5} = 0b001;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000100;
+}
+def S2_lsr_i_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
+"$Rdd32 = vlsrw($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13201267 {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000000010;
+}
+def S2_lsr_r_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = lsr($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011100;
+}
+def S2_lsr_r_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011110;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 &= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 -= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 |= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_p_xor : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 ^= lsr($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001011011;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_lsr_r_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = lsr($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_lsr_r_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 += lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 &= lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 -= lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rx32 |= lsr($Rs32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_9223889 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_lsr_r_vh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlsrh($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011010;
+}
+def S2_lsr_r_vw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vlsrw($Rss32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011000;
+}
+def S2_packhl : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = packhl($Rs32,$Rt32)",
+ALU32_3op_tc_1_SLOT0123, TypeALU32_3op>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11110101100;
+let InputType = "reg";
+}
+def S2_parityp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rd32 = parity($Rss32,$Rtt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_9277990 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S2_pstorerbf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S2_pstorerbf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerbfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S2_pstorerbnewf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerbnewfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S2_pstorerbnewt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbnewt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerbnewtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_2813446, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerb_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S2_pstorerbt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerbt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerbtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_8065534, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011000;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S2_pstorerdf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerdfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S2_pstorerdt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerdt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerdtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11959851, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011110;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerff_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerff_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerff_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerffnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerft_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000011;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerft_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerft_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerftnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011011;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerhf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerhfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S2_pstorerhnewf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerhnewfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S2_pstorerhnewt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerhnewt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerhnewtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_3813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerht_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S2_pstorerht_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerht_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerhtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011010;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerif_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000100100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S2_pstorerif_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerif_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstorerifnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S2_pstorerinewf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerinewfnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S2_pstorerinewt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerinewt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S2_pstorerinewtnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_4813442, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-21} = 0b10101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_pi";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerit_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000000100;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S2_pstorerit_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_pstorerit_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_pstoreritnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10065510, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b10101011100;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_setbit_i : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = setbit($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_setbit_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = setbit($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_shuffeb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = shuffeb($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+}
+def S2_shuffeh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = shuffeh($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+}
+def S2_shuffob : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = shuffob($Rtt32,$Rss32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11687333 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001000;
+}
+def S2_shuffoh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
+"$Rdd32 = shuffoh($Rtt32,$Rss32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11687333 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S2_storerb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13150110, AddrModeRel {
+let Inst{24-21} = 0b1000;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isPredicable = 1;
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+}
+def S2_storerb_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++$Mu2:brev) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111000;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pbr";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++#$Ii:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_3915770 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++I:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_12492533, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memb($Rx32++$Mu2) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101000;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerbgp : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Rt32),
+"memb(gp+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_12395768, AddrModeRel {
+let Inst{24-21} = 0b0000;
+let Inst{31-27} = 0b01001;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+}
+def S2_storerbnew_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Nt8),
+"memb($Rs32+#$Ii) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10002182, AddrModeRel {
+let Inst{12-11} = 0b00;
+let Inst{24-21} = 0b1101;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 11;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S2_storerbnew_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++$Mu2:brev) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101111101;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerb_pbr";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_5326450 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++I:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_0Imm:$Ii, IntRegs:$Nt8),
+"memb($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_5900401, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10101011101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerb_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memb($Rx32++$Mu2) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101101101;
+let addrMode = PostInc;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerbnew_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Nt8),
+"memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def S2_storerbnewgp : HInst<
+(outs),
+(ins u32_0Imm:$Ii, IntRegs:$Nt8),
+"memb(gp+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_4050532, AddrModeRel {
+let Inst{12-11} = 0b00;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerbabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 16;
+let opExtentAlign = 0;
+let opNewValue = 1;
+}
+def S2_storerd_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd($Rs32+#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16319737, AddrModeRel {
+let Inst{24-21} = 0b1110;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 14;
+let opExtentAlign = 3;
+}
+def S2_storerd_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++$Mu2:brev) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_15816255 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111110;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++#$Ii:circ($Mu2)) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_4501395 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++I:circ($Mu2)) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_15816255 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd($Rx32++#$Ii) = $Rtt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11271630, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerd_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, DoubleRegs:$Rtt32),
+"memd($Rx32++$Mu2) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_15816255 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101110;
+let addrMode = PostInc;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerd_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerdgp : HInst<
+(outs),
+(ins u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"memd(gp+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11682941, AddrModeRel {
+let Inst{24-21} = 0b0110;
+let Inst{31-27} = 0b01001;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerdabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 19;
+let opExtentAlign = 3;
+}
+def S2_storerf_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7736768, AddrModeRel {
+let Inst{24-21} = 0b1011;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def S2_storerf_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2:brev) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111011;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_10915758 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++I:circ($Mu2)) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rx32++#$Ii) = $Rt32.h",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11492529, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerf_pi";
+let isPredicable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101011;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerf_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerfgp : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(gp+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0011;
+let Inst{31-27} = 0b01001;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerfabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def S2_storerh_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7736768, AddrModeRel {
+let Inst{24-21} = 0b1010;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isPredicable = 1;
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+}
+def S2_storerh_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2:brev) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111010;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pbr";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++#$Ii:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_10915758 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++I:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Rt32),
+"memh($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_11492529, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memh($Rx32++$Mu2) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101010;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerh_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerhgp : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Rt32),
+"memh(gp+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_1186018, AddrModeRel {
+let Inst{24-21} = 0b0010;
+let Inst{31-27} = 0b01001;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+}
+def S2_storerhnew_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Nt8),
+"memh($Rs32+#$Ii) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_748676, AddrModeRel {
+let Inst{12-11} = 0b01;
+let Inst{24-21} = 0b1101;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 12;
+let opExtentAlign = 1;
+let opNewValue = 2;
+}
+def S2_storerhnew_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++$Mu2:brev) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101111101;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerh_pbr";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10326434 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++I:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000010;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_1Imm:$Ii, IntRegs:$Nt8),
+"memh($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_6900405, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b001;
+let Inst{31-21} = 0b10101011101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerh_pi";
+let isNVStorable = 1;
+let isPredicable = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memh($Rx32++$Mu2) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101101101;
+let addrMode = PostInc;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerhnew_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Nt8),
+"memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def S2_storerhnewgp : HInst<
+(outs),
+(ins u31_1Imm:$Ii, IntRegs:$Nt8),
+"memh(gp+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_13618890, AddrModeRel {
+let Inst{12-11} = 0b01;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storerhabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 17;
+let opExtentAlign = 1;
+let opNewValue = 1;
+}
+def S2_storeri_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_6673186, AddrModeRel {
+let Inst{24-21} = 0b1100;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isPredicable = 1;
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+}
+def S2_storeri_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++$Mu2:brev) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101111100;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pbr";
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++#$Ii:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_9915754 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{31-21} = 0b10101001100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++I:circ($Mu2)) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000010;
+let Inst{31-21} = 0b10101001100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let Uses = [CS];
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Rt32),
+"memw($Rx32++#$Ii) = $Rt32",
+ST_tc_st_pi_SLOT01, TypeST>, Enc_10492541, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isPredicable = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Rt32),
+"memw($Rx32++$Mu2) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_7255914 {
+let Inst{7-0} = 0b00000000;
+let Inst{31-21} = 0b10101101100;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let mayStore = 1;
+let isNVStorable = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storeri_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S2_storerigp : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Rt32),
+"memw(gp+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_15999208, AddrModeRel {
+let Inst{24-21} = 0b0100;
+let Inst{31-27} = 0b01001;
+let accessSize = WordAccess;
+let mayStore = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let isNVStorable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+}
+def S2_storerinew_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Nt8),
+"memw($Rs32+#$Ii) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_8409782, AddrModeRel {
+let Inst{12-11} = 0b10;
+let Inst{24-21} = 0b1101;
+let Inst{31-27} = 0b10100;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 1;
+let opExtentBits = 13;
+let opExtentAlign = 2;
+let opNewValue = 2;
+}
+def S2_storerinew_pbr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++$Mu2:brev) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774, AddrModeRel {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101111101;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storeri_pbr";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pci : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++#$Ii:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_11326438 {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pcr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++I:circ($Mu2)) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000010;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101001101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [CS];
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s4_2Imm:$Ii, IntRegs:$Nt8),
+"memw($Rx32++#$Ii) = $Nt8.new",
+ST_tc_st_pi_SLOT0, TypeST>, Enc_7900405, AddrModeRel {
+let Inst{2-0} = 0b000;
+let Inst{7-7} = 0b0;
+let Inst{13-11} = 0b010;
+let Inst{31-21} = 0b10101011101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storeri_pi";
+let isPredicable = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_pr : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Nt8),
+"memw($Rx32++$Mu2) = $Nt8.new",
+ST_tc_st_SLOT0, TypeST>, Enc_10067774 {
+let Inst{7-0} = 0b00000000;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101101101;
+let addrMode = PostInc;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_storerinew_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Nt8),
+"memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def S2_storerinewgp : HInst<
+(outs),
+(ins u30_2Imm:$Ii, IntRegs:$Nt8),
+"memw(gp+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_12297800, AddrModeRel {
+let Inst{12-11} = 0b10;
+let Inst{24-21} = 0b0101;
+let Inst{31-27} = 0b01001;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let Uses = [GP];
+let BaseOpcode = "S2_storeriabs";
+let isPredicable = 1;
+let opExtendable = 0;
+let isExtentSigned = 0;
+let opExtentBits = 18;
+let opExtentAlign = 2;
+let opNewValue = 1;
+}
+def S2_storew_locked : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"memw_locked($Rs32,$Pd4) = $Rt32",
+ST_tc_ld_SLOT0, TypeST>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100000101;
+let accessSize = WordAccess;
+let isSoloAX = 1;
+let mayStore = 1;
+let isPredicateLate = 1;
+}
+def S2_svsathb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = vsathb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_svsathub : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = vsathub($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_tableidxb : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxb($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxb_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxb($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxd : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxd($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxd_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxd($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxh : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxh($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxh_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxh($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxw : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, s6_0Imm:$II),
+"$Rx32 = tableidxw($Rs32,#$Ii,#$II):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8838398 {
+let Inst{31-22} = 0b1000011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_tableidxw_goodsyntax : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II),
+"$Rx32 = tableidxw($Rs32,#$Ii,#$II)",
+S_2op_tc_1_SLOT23, TypeS_2op> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S2_togglebit_i : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = togglebit($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_togglebit_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = togglebit($Rs32,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_14071773 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_tstbit_i : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Pd4 = tstbit($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000101000;
+}
+def S2_tstbit_r : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = tstbit($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111000;
+}
+def S2_valignib : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, u3_0Imm:$Ii),
+"$Rdd32 = valignb($Rtt32,$Rss32,#$Ii)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11971407 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000000000;
+}
+def S2_valignrb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32, PredRegs:$Pu4),
+"$Rdd32 = valignb($Rtt32,$Rss32,$Pu4)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_11552785 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010000;
+}
+def S2_vcnegh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vcnegh($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S2_vcrotate : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rdd32 = vcrotate($Rss32,$Rt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8940892 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000011110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S2_vrcnegh : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32),
+"$Rxx32 += vrcnegh($Rss32,$Rt32)",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_7912540 {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b11001011001;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S2_vrndpackwh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vrndwh($Rss32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_vrndpackwhs : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vrndwh($Rss32):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsathb : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsathb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsathb_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsathb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsathub : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsathub($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsathub_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsathub($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsatwh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsatwh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsatwh_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsatwh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsatwuh : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vsatwuh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10001000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def S2_vsatwuh_nopack : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32),
+"$Rdd32 = vsatwuh($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_13133231 {
+let Inst{13-5} = 0b000000101;
+let Inst{31-21} = 0b10000000000;
+let Defs = [USR_OVF];
+}
+def S2_vsplatrb : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32),
+"$Rd32 = vsplatb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4075554 {
+let Inst{13-5} = 0b000000111;
+let Inst{31-21} = 0b10001100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vsplatrh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsplath($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000100010;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vspliceib : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, u3_0Imm:$Ii),
+"$Rdd32 = vspliceb($Rss32,$Rtt32,#$Ii)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_16730127 {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000000100;
+}
+def S2_vsplicerb : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32, PredRegs:$Pu4),
+"$Rdd32 = vspliceb($Rss32,$Rtt32,$Pu4)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_5178985 {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000010100;
+}
+def S2_vsxtbh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsxtbh($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vsxthw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsxthw($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vtrunehb : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vtrunehb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_vtrunewh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunewh($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S2_vtrunohb : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = vtrunohb($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S2_vtrunowh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunowh($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S2_vzxtbh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vzxtbh($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S2_vzxthw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vzxthw($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179 {
+let Inst{13-5} = 0b000000110;
+let Inst{31-21} = 0b10000100000;
+let isReMaterializable = 1;
+let isAsCheapAsAMove = 1;
+}
+def S4_addaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, s32_0Imm:$Ii),
+"$Rd32 = add($Rs32,add($Ru32,#$Ii))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6495334 {
+let Inst{31-23} = 0b110110110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_addi_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = add(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b100;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_addi_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = add(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b100;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_andi_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = and(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b000;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_andi_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = and(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b000;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_clbaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s6_0Imm:$Ii),
+"$Rd32 = add(clb($Rs32),#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_5523416 {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b10001100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_clbpaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, s6_0Imm:$Ii),
+"$Rd32 = add(clb($Rss32),#$Ii)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_10188026 {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_clbpnorm : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = normamt($Rss32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_3742184 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S4_extract : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii, u5_0Imm:$II),
+"$Rd32 = extract($Rs32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_11930928 {
+let Inst{13-13} = 0b0;
+let Inst{31-23} = 0b100011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_extract_rp : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rd32 = extract($Rs32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_15472748 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11001001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_extractp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii, u6_0Imm:$II),
+"$Rdd32 = extract($Rss32,#$Ii,#$II)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_9894557 {
+let Inst{31-24} = 0b10001010;
+let prefersSlot3 = 1;
+}
+def S4_extractp_rp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = extract($Rss32,$Rtt32)",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let prefersSlot3 = 1;
+}
+def S4_lsli : HInst<
+(outs IntRegs:$Rd32),
+(ins s6_0Imm:$Ii, IntRegs:$Rt32),
+"$Rd32 = lsl(#$Ii,$Rt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_518319 {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S4_ntstbit_i : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Pd4 = !tstbit($Rs32,#$Ii)",
+S_2op_tc_2early_SLOT23, TypeS_2op>, Enc_2103742 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10000101001;
+}
+def S4_ntstbit_r : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Pd4 = !tstbit($Rs32,$Rt32)",
+S_3op_tc_2early_SLOT23, TypeS_3op>, Enc_10157519 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000111001;
+}
+def S4_or_andi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 |= and($Rs32,#$Ii)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6356866 {
+let Inst{31-22} = 0b1101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_or_andix : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Ru32, IntRegs:$Rx32in, s32_0Imm:$Ii),
+"$Rx32 = or($Ru32,and($Rx32in,#$Ii))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_7504828 {
+let Inst{31-22} = 0b1101101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_or_ori : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, s32_0Imm:$Ii),
+"$Rx32 |= or($Rs32,#$Ii)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6356866 {
+let Inst{31-22} = 0b1101101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let InputType = "imm";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 10;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_ori_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = or(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b010;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_ori_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = or(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b010;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_parity : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = parity($Rs32,$Rt32)",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S4_pstorerbf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerbnewf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b000;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110101101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000110101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_pstorerbnewfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4.new) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerbnewt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b000;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110100101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b100;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerbnewtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_1737833, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b01000010101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_pstorerbnewtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b00;
+let Inst{31-21} = 0b00110110101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let opNewValue = 4;
+}
+def S4_pstorerbnewtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4.new) memb($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerbt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100000;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111000000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S2_storerbabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_14044877, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S2_storerb_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerbtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110000;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerbtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memb($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerdf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110101110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S4_pstorerdfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110111110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if (!$Pv4.new) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerdt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110100110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd(#$Ii) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_13715847, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111110000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let BaseOpcode = "S2_storerdabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerdtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u29_3Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rs32+#$Ii) = $Rtt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_11049656, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010110;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 9;
+let opExtentAlign = 3;
+}
+def S4_pstorerdtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9920336, AddrModeRel {
+let Inst{31-21} = 0b00110110110;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+}
+def S4_pstorerdtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"if ($Pv4.new) memd($Rs32) = $Rtt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerff_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerff_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerffnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerffnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerffnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111011;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerffnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerft_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerft_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100011;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerftnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh(#$Ii) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111011000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let BaseOpcode = "S2_storerfabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerftnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32.h",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010011;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S2_storerf_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerftnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110011;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+}
+def S4_pstorerftnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32) = $Rt32.h",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerhf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerhf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerhfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerhfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerhnewf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b001;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110101101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000110101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S4_pstorerhnewfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4.new) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerhnewt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b001;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110100101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b101;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerhnewtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_6154421, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b01000010101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+let opNewValue = 3;
+}
+def S4_pstorerhnewtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b01;
+let Inst{31-21} = 0b00110110101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let opNewValue = 4;
+}
+def S4_pstorerhnewtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4.new) memh($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerht_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerht_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100010;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111010000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerhabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerhtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u31_1Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_10979813, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 7;
+let opExtentAlign = 1;
+}
+def S4_pstorerhtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110010;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerhtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memh($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerif_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerif_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110101100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerifnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerifnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000110100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S4_pstorerifnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110111100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstorerifnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if (!$Pv4.new) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_pstorerinewf_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b010;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewf_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110101101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewfnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b1;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000110101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S4_pstorerinewfnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110111101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if (!$Pv4.new) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerinewt_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b010;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewt_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110100101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewtnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw(#$Ii) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_1774350, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-11} = 0b110;
+let Inst{31-18} = 0b10101111101000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_pstorerinewtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rs32+#$Ii) = $Nt8.new",
+V2LDST_tc_st_SLOT0, TypeV2LDST>, Enc_11224149, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b01000010101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+let opNewValue = 3;
+}
+def S4_pstorerinewtnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_11000933, AddrModeRel {
+let Inst{4-3} = 0b10;
+let Inst{31-21} = 0b00110110101;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let opNewValue = 4;
+}
+def S4_pstorerinewtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Nt8),
+"if ($Pv4.new) memw($Rs32) = $Nt8.new",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def S4_pstorerit_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b0;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstorerit_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110100100;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstoreritnew_abs : HInst<
+(outs),
+(ins PredRegs:$Pv4, u32_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw(#$Ii) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_16657398, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-18} = 0b10101111100000;
+let isPredicated = 1;
+let addrMode = Absolute;
+let accessSize = WordAccess;
+let isExtended = 1;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeriabs";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_pstoreritnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u30_2Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rs32+#$Ii) = $Rt32",
+V2LDST_tc_st_SLOT01, TypeV2LDST>, Enc_8225953, AddrModeRel {
+let Inst{2-2} = 0b0;
+let Inst{31-21} = 0b01000010100;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_io";
+let isNVStorable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 2;
+}
+def S4_pstoreritnew_rr : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11940513, AddrModeRel {
+let Inst{31-21} = 0b00110110100;
+let isPredicated = 1;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+}
+def S4_pstoreritnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, IntRegs:$Rt32),
+"if ($Pv4.new) memw($Rs32) = $Rt32",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_stored_locked : HInst<
+(outs PredRegs:$Pd4),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"memd_locked($Rs32,$Pd4) = $Rtt32",
+ST_tc_ld_SLOT0, TypeST>, Enc_2921694 {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100000111;
+let accessSize = DoubleWordAccess;
+let isSoloAX = 1;
+let mayStore = 1;
+let isPredicateLate = 1;
+}
+def S4_storeirb_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_11282123, PredNewRel {
+let Inst{31-21} = 0b00111100000;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def S4_storeirb_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, s8_0Imm:$II),
+"memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4.new) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4.new) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbt_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111000000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbt_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirbtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4.new) memb($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_5967898, PredNewRel {
+let Inst{31-21} = 0b00111001000;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirb_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirbtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4.new) memb($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirh_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_10282127, PredNewRel {
+let Inst{31-21} = 0b00111100001;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def S4_storeirh_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, s8_0Imm:$II),
+"memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirhf_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirhf_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirhfnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4.new) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirhfnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4.new) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirht_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111000001;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirht_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirhtnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_1Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4.new) memh($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_4967902, PredNewRel {
+let Inst{31-21} = 0b00111001001;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S4_storeirh_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirhtnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4.new) memh($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeiri_io : HInst<
+(outs),
+(ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9282127, PredNewRel {
+let Inst{31-21} = 0b00111100010;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isPredicable = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+}
+def S4_storeiri_zomap : HInst<
+(outs),
+(ins IntRegs:$Rs32, s8_0Imm:$II),
+"memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirif_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirif_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirifnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if (!$Pv4.new) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirifnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if (!$Pv4.new) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeirit_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111000010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeirit_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storeiritnew_io : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
+"if ($Pv4.new) memw($Rs32+#$Ii) = #$II",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_3967902, PredNewRel {
+let Inst{31-21} = 0b00111001010;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let isPredicatedNew = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S4_storeiri_io";
+let isExtendable = 1;
+let opExtendable = 3;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeiritnew_zomap : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rs32, s6_0Imm:$II),
+"if ($Pv4.new) memw($Rs32) = #$II",
+PSEUDO, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def S4_storerb_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memb($Re32=#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerb_ap";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerb_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memb($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011000;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isNVStorable = 1;
+let isPredicable = 1;
+}
+def S4_storerb_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memb($Ru32<<#$Ii+#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101000;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "imm";
+let BaseOpcode = "S4_storerb_ur";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerbnew_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Nt8),
+"memb($Re32=#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10101011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerb_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_storerbnew_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"memb($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel {
+let Inst{6-3} = 0b0000;
+let Inst{31-21} = 0b00111011101;
+let addrMode = BaseRegOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let InputType = "reg";
+let BaseOpcode = "S4_storerb_rr";
+let isPredicable = 1;
+let opNewValue = 3;
+}
+def S4_storerbnew_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
+"memb($Ru32<<#$Ii+#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel {
+let Inst{7-7} = 0b1;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b10101101101;
+let addrMode = BaseLongOffset;
+let accessSize = ByteAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerb";
+let BaseOpcode = "S4_storerb_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_storerd_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, DoubleRegs:$Rtt32),
+"memd($Re32=#$II) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_8131399 {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S4_storerd_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerd_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, DoubleRegs:$Rtt32),
+"memd($Rs32+$Ru32<<#$Ii) = $Rtt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_9772987, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011110;
+let addrMode = BaseRegOffset;
+let accessSize = DoubleWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "reg";
+let BaseOpcode = "S2_storerd_rr";
+let isPredicable = 1;
+}
+def S4_storerd_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, DoubleRegs:$Rtt32),
+"memd($Ru32<<#$Ii+#$II) = $Rtt32",
+ST_tc_st_SLOT01, TypeST>, Enc_12848507, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101110;
+let addrMode = BaseLongOffset;
+let accessSize = DoubleWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerd";
+let InputType = "imm";
+let BaseOpcode = "S2_storerd_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerf_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Re32=#$II) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246 {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S4_storerf_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerf_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+$Ru32<<#$Ii) = $Rt32.h",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011011;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "reg";
+let BaseOpcode = "S4_storerf_rr";
+let isPredicable = 1;
+}
+def S4_storerf_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Ru32<<#$Ii+#$II) = $Rt32.h",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101011;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerf";
+let InputType = "imm";
+let BaseOpcode = "S4_storerf_rr";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerh_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Re32=#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storerh_ap";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerh_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memh($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011010;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isNVStorable = 1;
+let isPredicable = 1;
+}
+def S4_storerh_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memh($Ru32<<#$Ii+#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101010;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "imm";
+let BaseOpcode = "S2_storerh_ur";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerhnew_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Nt8),
+"memh($Re32=#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-11} = 0b001;
+let Inst{31-21} = 0b10101011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storerh_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_storerhnew_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"memh($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel {
+let Inst{6-3} = 0b0001;
+let Inst{31-21} = 0b00111011101;
+let addrMode = BaseRegOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let InputType = "reg";
+let BaseOpcode = "S2_storerh_rr";
+let isPredicable = 1;
+let opNewValue = 3;
+}
+def S4_storerhnew_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
+"memh($Ru32<<#$Ii+#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel {
+let Inst{7-7} = 0b1;
+let Inst{12-11} = 0b01;
+let Inst{31-21} = 0b10101101101;
+let addrMode = BaseLongOffset;
+let accessSize = HalfWordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storerh";
+let BaseOpcode = "S2_storerh_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_storeri_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Rt32),
+"memw($Re32=#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_11477246, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10101011100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let BaseOpcode = "S2_storeri_ap";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storeri_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Rt32),
+"memw($Rs32+$Ru32<<#$Ii) = $Rt32",
+V4LDST_tc_st_SLOT01, TypeST>, Enc_14046916, AddrModeRel, ImmRegShl {
+let Inst{6-5} = 0b00;
+let Inst{31-21} = 0b00111011100;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isNVStorable = 1;
+let isPredicable = 1;
+}
+def S4_storeri_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Rt32),
+"memw($Ru32<<#$Ii+#$II) = $Rt32",
+ST_tc_st_SLOT01, TypeST>, Enc_14689096, AddrModeRel, ImmRegShl {
+let Inst{7-7} = 0b1;
+let Inst{31-21} = 0b10101101100;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isExtended = 1;
+let mayStore = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "imm";
+let BaseOpcode = "S2_storeri_ur";
+let isNVStorable = 1;
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_storerinew_ap : HInst<
+(outs IntRegs:$Re32),
+(ins u32_0Imm:$II, IntRegs:$Nt8),
+"memw($Re32=#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_14193700, AddrModeRel {
+let Inst{7-6} = 0b10;
+let Inst{13-11} = 0b010;
+let Inst{31-21} = 0b10101011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = AbsoluteSet;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "S2_storeri_ap";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 2;
+}
+def S4_storerinew_rr : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Ru32, u2_0Imm:$Ii, IntRegs:$Nt8),
+"memw($Rs32+$Ru32<<#$Ii) = $Nt8.new",
+V4LDST_tc_st_SLOT0, TypeST>, Enc_5486172, AddrModeRel {
+let Inst{6-3} = 0b0010;
+let Inst{31-21} = 0b00111011101;
+let addrMode = BaseRegOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let InputType = "reg";
+let BaseOpcode = "S2_storeri_rr";
+let isPredicable = 1;
+let opNewValue = 3;
+}
+def S4_storerinew_ur : HInst<
+(outs),
+(ins IntRegs:$Ru32, u2_0Imm:$Ii, u32_0Imm:$II, IntRegs:$Nt8),
+"memw($Ru32<<#$Ii+#$II) = $Nt8.new",
+NCJ_tc_3or4stall_SLOT0, TypeST>, Enc_10076500, AddrModeRel {
+let Inst{7-7} = 0b1;
+let Inst{12-11} = 0b10;
+let Inst{31-21} = 0b10101101101;
+let addrMode = BaseLongOffset;
+let accessSize = WordAccess;
+let isNVStore = 1;
+let isExtended = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let CextOpcode = "S2_storeri";
+let BaseOpcode = "S2_storeri_ur";
+let DecoderNamespace = "MustExtend";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+let opNewValue = 3;
+}
+def S4_subaddi : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Ru32),
+"$Rd32 = add($Rs32,sub(#$Ii,$Ru32))",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_6495334 {
+let Inst{31-23} = 0b110110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def S4_subi_asl_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = sub(#$Ii,asl($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b110;
+let Inst{4-4} = 0b0;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_subi_lsr_ri : HInst<
+(outs IntRegs:$Rx32),
+(ins u32_0Imm:$Ii, IntRegs:$Rx32in, u5_0Imm:$II),
+"$Rx32 = sub(#$Ii,lsr($Rx32in,#$II))",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_117962 {
+let Inst{2-0} = 0b110;
+let Inst{4-4} = 0b1;
+let Inst{31-24} = 0b11011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 8;
+let opExtentAlign = 0;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S4_vrcrotate : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rdd32 = vrcrotate($Rss32,$Rt32,#$Ii)",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_114098 {
+let Inst{7-6} = 0b11;
+let Inst{31-21} = 0b11000011110;
+let prefersSlot3 = 1;
+}
+def S4_vrcrotate_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, IntRegs:$Rt32, u2_0Imm:$Ii),
+"$Rxx32 += vrcrotate($Rss32,$Rt32,#$Ii)",
+S_3op_tc_3x_SLOT23, TypeS_3op>, Enc_13114546 {
+let Inst{7-6} = 0b00;
+let Inst{31-21} = 0b11001011101;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S4_vxaddsubh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxaddsubh($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S4_vxaddsubhr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxaddsubh($Rss32,$Rtt32):rnd:>>1:sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S4_vxaddsubw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxaddsubw($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S4_vxsubaddh : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxsubaddh($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S4_vxsubaddhr : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxsubaddh($Rss32,$Rtt32):rnd:>>1:sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001110;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S4_vxsubaddw : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vxsubaddw($Rss32,$Rtt32):sat",
+S_3op_tc_2_SLOT23, TypeS_3op>, Enc_8333157 {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001010;
+let Defs = [USR_OVF];
+}
+def S5_asrhub_rnd_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rd32 = vasrhub($Rss32,#$Ii):raw",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8038806, Requires<[HasV5T]> {
+let Inst{7-5} = 0b100;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S5_asrhub_rnd_sat_goodsyntax : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Requires<[HasV5T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+}
+def S5_asrhub_sat : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rd32 = vasrhub($Rss32,#$Ii):sat",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_8038806, Requires<[HasV5T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Defs = [USR_OVF];
+}
+def S5_popcountp : HInst<
+(outs IntRegs:$Rd32),
+(ins DoubleRegs:$Rss32),
+"$Rd32 = popcount($Rss32)",
+S_2op_tc_2_SLOT23, TypeS_2op>, Enc_3742184, Requires<[HasV5T]> {
+let Inst{13-5} = 0b000000011;
+let Inst{31-21} = 0b10001000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+}
+def S5_vasrhrnd : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vasrh($Rss32,#$Ii):raw",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2082775, Requires<[HasV5T]> {
+let Inst{7-5} = 0b000;
+let Inst{13-12} = 0b00;
+let Inst{31-21} = 0b10000000001;
+let prefersSlot3 = 1;
+}
+def S5_vasrhrnd_goodsyntax : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
+"$Rdd32 = vasrh($Rss32,#$Ii):rnd",
+S_2op_tc_1_SLOT23, TypeS_2op>, Requires<[HasV5T]> {
+let isPseudo = 1;
+}
+def S6_rol_i_p : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rdd32 = rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4231995, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000000000;
+}
+def S6_rol_i_p_acc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 += rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_and : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 &= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_nac : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 -= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000010000;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_or : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 |= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b10000010010;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_p_xacc : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
+"$Rxx32 ^= rol($Rss32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_8497723, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b10000010100;
+let prefersSlot3 = 1;
+let Constraints = "$Rxx32 = $Rxx32in";
+}
+def S6_rol_i_r : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rd32 = rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2771456, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def S6_rol_i_r_acc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 += rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_and : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 &= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_nac : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 -= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_or : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 |= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_rol_i_r_xacc : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
+"$Rx32 ^= rol($Rs32,#$Ii)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_2410156, Requires<[HasV60T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10001110100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let prefersSlot3 = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def S6_vsplatrbp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32),
+"$Rdd32 = vsplatb($Rs32)",
+S_2op_tc_1_SLOT23, TypeS_2op>, Enc_4030179, Requires<[HasV62T]> {
+let Inst{13-5} = 0b000000100;
+let Inst{31-21} = 0b10000100010;
+}
+def S6_vtrunehb_ppp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunehb($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157, Requires<[HasV62T]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def S6_vtrunohb_ppp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
+"$Rdd32 = vtrunohb($Rss32,$Rtt32)",
+S_3op_tc_1_SLOT23, TypeS_3op>, Enc_8333157, Requires<[HasV62T]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11000001100;
+}
+def SA1_addi : HInst<
+(outs GeneralSubRegs:$Rx16),
+(ins IntRegs:$Rx16in, s32_0Imm:$Ii),
+"$Rx16 = add($Rx16in,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_3974695 {
+let Inst{12-11} = 0b00;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+let isExtendable = 1;
+let opExtendable = 2;
+let isExtentSigned = 1;
+let opExtentBits = 7;
+let opExtentAlign = 0;
+let Constraints = "$Rx16 = $Rx16in";
+}
+def SA1_addrx : HInst<
+(outs GeneralSubRegs:$Rx16),
+(ins IntRegs:$Rx16in, GeneralSubRegs:$Rs16),
+"$Rx16 = add($Rx16in,$Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_6135183 {
+let Inst{12-8} = 0b11000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+let Constraints = "$Rx16 = $Rx16in";
+}
+def SA1_addsp : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u6_2Imm:$Ii),
+"$Rd16 = add(r29,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_176263 {
+let Inst{12-10} = 0b011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_and1 : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = and($Rs16,#1)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrf : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (!p0) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrfnew : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (!p0.new) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPredicatedNew = 1;
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrt : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (p0) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_clrtnew : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins),
+"if (p0.new) $Rd16 = #0",
+PSEUDO, TypeSUBINSN>, Enc_1451363 {
+let Inst{12-4} = 0b110100100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let isPredicatedNew = 1;
+let Uses = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_cmpeqi : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u2_0Imm:$Ii),
+"p0 = cmp.eq($Rs16,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_2079016 {
+let Inst{3-2} = 0b00;
+let Inst{12-8} = 0b11001;
+let AsmVariantName = "NonParsable";
+let Defs = [P0];
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine0i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#0,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b00;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine1i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#1,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b01;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine2i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#2,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b10;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combine3i : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u2_0Imm:$Ii),
+"$Rdd8 = combine(#3,#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15946706 {
+let Inst{4-3} = 0b11;
+let Inst{12-7} = 0b111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combinerz : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins GeneralSubRegs:$Rs16),
+"$Rdd8 = combine($Rs16,#0)",
+PSEUDO, TypeSUBINSN>, Enc_10501894 {
+let Inst{3-3} = 0b1;
+let Inst{12-8} = 0b11101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_combinezr : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins GeneralSubRegs:$Rs16),
+"$Rdd8 = combine(#0,$Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_10501894 {
+let Inst{3-3} = 0b0;
+let Inst{12-8} = 0b11101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_dec : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, n1Const:$n1),
+"$Rd16 = add($Rs16,#$n1)",
+PSEUDO, TypeSUBINSN>, Enc_10597934 {
+let Inst{12-8} = 0b10011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_inc : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = add($Rs16,#1)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_seti : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u32_0Imm:$Ii),
+"$Rd16 = #$Ii",
+PSEUDO, TypeSUBINSN>, Enc_2176383 {
+let Inst{12-10} = 0b010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+let isExtendable = 1;
+let opExtendable = 1;
+let isExtentSigned = 0;
+let opExtentBits = 6;
+let opExtentAlign = 0;
+}
+def SA1_setin1 : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins n1Const:$n1),
+"$Rd16 = #$n1",
+PSEUDO, TypeSUBINSN>, Enc_13336212 {
+let Inst{12-4} = 0b110100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_sxtb : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = sxtb($Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_sxth : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = sxth($Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_tfr : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = $Rs16",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_zxtb : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = and($Rs16,#255)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SA1_zxth : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16),
+"$Rd16 = zxth($Rs16)",
+PSEUDO, TypeSUBINSN>, Enc_14939491 {
+let Inst{12-8} = 0b10110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let AsmVariantName = "NonParsable";
+let DecoderNamespace = "SUBINSN_A";
+}
+def SL1_loadri_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
+"$Rd16 = memw($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_13606251 {
+let Inst{12-12} = 0b0;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L1";
+}
+def SL1_loadrub_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
+"$Rd16 = memub($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_15606259 {
+let Inst{12-12} = 0b1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L1";
+}
+def SL2_deallocframe : HInst<
+(outs),
+(ins),
+"deallocframe",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111100000000;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [R30, R29, R31];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31 : HInst<
+(outs),
+(ins),
+"jumpr r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000000;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let Uses = [R31];
+let Defs = [PC];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_f : HInst<
+(outs),
+(ins),
+"if (!p0) jumpr r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_fnew : HInst<
+(outs),
+(ins),
+"if (!p0.new) jumpr:nt r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_t : HInst<
+(outs),
+(ins),
+"if (p0) jumpr r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_jumpr31_tnew : HInst<
+(outs),
+(ins),
+"if (p0.new) jumpr:nt r31",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111111000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let Uses = [P0, R31];
+let Defs = [PC];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadrb_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u3_0Imm:$Ii),
+"$Rd16 = memb($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_3135259 {
+let Inst{12-11} = 0b10;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadrd_sp : HInst<
+(outs GeneralDoubleLow8Regs:$Rdd8),
+(ins u5_3Imm:$Ii),
+"$Rdd8 = memd(r29+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_16479122 {
+let Inst{12-8} = 0b11110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadrh_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
+"$Rd16 = memh($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_4135257 {
+let Inst{12-11} = 0b00;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadri_sp : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins u5_2Imm:$Ii),
+"$Rd16 = memw(r29+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_64199 {
+let Inst{12-9} = 0b1110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_loadruh_io : HInst<
+(outs GeneralSubRegs:$Rd16),
+(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii),
+"$Rd16 = memuh($Rs16+#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_4135257 {
+let Inst{12-11} = 0b01;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayLoad = 1;
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return : HInst<
+(outs),
+(ins),
+"dealloc_return",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000000;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [R30];
+let Defs = [PC, R30, R29, R31];
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_f : HInst<
+(outs),
+(ins),
+"if (!p0) dealloc_return",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_fnew : HInst<
+(outs),
+(ins),
+"if (!p0.new) dealloc_return:nt",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_t : HInst<
+(outs),
+(ins),
+"if (p0) dealloc_return",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000100;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SL2_return_tnew : HInst<
+(outs),
+(ins),
+"if (p0.new) dealloc_return:nt",
+PSEUDO, TypeSUBINSN>, Enc_0 {
+let Inst{12-0} = 0b1111101000110;
+let isPredicated = 1;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let accessSize = DoubleWordAccess;
+let cofMax1 = 1;
+let AsmVariantName = "NonParsable";
+let isReturn = 1;
+let isPredicatedNew = 1;
+let mayLoad = 1;
+let Uses = [P0, R30];
+let Defs = [PC, R30, R29, R31];
+let isTaken = Inst{4};
+let DecoderNamespace = "SUBINSN_L2";
+}
+def SS1_storeb_io : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii, GeneralSubRegs:$Rt16),
+"memb($Rs16+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_13204995 {
+let Inst{12-12} = 0b1;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S1";
+}
+def SS1_storew_io : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii, GeneralSubRegs:$Rt16),
+"memw($Rs16+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_11205051 {
+let Inst{12-12} = 0b0;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S1";
+}
+def SS2_allocframe : HInst<
+(outs),
+(ins u5_3Imm:$Ii),
+"allocframe(#$Ii)",
+PSEUDO, TypeSUBINSN>, Enc_7884306 {
+let Inst{3-0} = 0b0000;
+let Inst{12-9} = 0b1110;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let Uses = [R30, R29, R31];
+let Defs = [R30, R29];
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storebi0 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
+"memb($Rs16+#$Ii) = #0",
+PSEUDO, TypeSUBINSN>, Enc_13536408 {
+let Inst{12-8} = 0b10010;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storebi1 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_0Imm:$Ii),
+"memb($Rs16+#$Ii) = #1",
+PSEUDO, TypeSUBINSN>, Enc_13536408 {
+let Inst{12-8} = 0b10011;
+let addrMode = BaseImmOffset;
+let accessSize = ByteAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_stored_sp : HInst<
+(outs),
+(ins s6_3Imm:$Ii, GeneralDoubleLow8Regs:$Rtt8),
+"memd(r29+#$Ii) = $Rtt8",
+PSEUDO, TypeSUBINSN>, Enc_9165078 {
+let Inst{12-9} = 0b0101;
+let addrMode = BaseImmOffset;
+let accessSize = DoubleWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storeh_io : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u3_1Imm:$Ii, GeneralSubRegs:$Rt16),
+"memh($Rs16+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_1734121 {
+let Inst{12-11} = 0b00;
+let addrMode = BaseImmOffset;
+let accessSize = HalfWordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storew_sp : HInst<
+(outs),
+(ins u5_2Imm:$Ii, GeneralSubRegs:$Rt16),
+"memw(r29+#$Ii) = $Rt16",
+PSEUDO, TypeSUBINSN>, Enc_6690615 {
+let Inst{12-9} = 0b0100;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let Uses = [R29];
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storewi0 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
+"memw($Rs16+#$Ii) = #0",
+PSEUDO, TypeSUBINSN>, Enc_15536400 {
+let Inst{12-8} = 0b10000;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def SS2_storewi1 : HInst<
+(outs),
+(ins GeneralSubRegs:$Rs16, u4_2Imm:$Ii),
+"memw($Rs16+#$Ii) = #1",
+PSEUDO, TypeSUBINSN>, Enc_15536400 {
+let Inst{12-8} = 0b10001;
+let addrMode = BaseImmOffset;
+let accessSize = WordAccess;
+let AsmVariantName = "NonParsable";
+let mayStore = 1;
+let DecoderNamespace = "SUBINSN_S2";
+}
+def V6_MAP_equb : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_MAP_equb_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_MAP_equb_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_ior : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_ior_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equb_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.ub,$Vv32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_MAP_equh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_MAP_equh_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_ior : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_ior_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equh_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uh,$Vv32.uh)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_MAP_equw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_MAP_equw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_ior : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_ior_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_MAP_equw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_extractw : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rs32),
+"$Rd32 = vextract($Vu32,$Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_16601956, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10010010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_extractw_128B : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rs32),
+"$Rd32 = vextract($Vu32,$Rs32)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_16601956, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10010010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_extractw_alt : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rs32),
+"$Rd32.w = vextract($Vu32,$Rs32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_extractw_alt_128B : HInst<
+(outs IntRegs:$Rd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rs32),
+"$Rd32.w = vextract($Vu32,$Rs32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_hi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vss32),
+"$Vd32 = hi($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_hi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vss32),
+"$Vd32 = hi($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_ld0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_ld0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_ldnt0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32):nt",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_ldnt0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmem($Rt32):nt",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_ldu0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmemu($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_ldu0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vmemu($Rt32)",
+PSEUDO, TypeCVI_VM_LD>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lo : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vss32),
+"$Vd32 = lo($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lo_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vss32),
+"$Vd32 = lo($Vss32)",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lvsplatb : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.b = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lvsplatb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.b = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000010;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lvsplath : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.h = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lvsplath_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32.h = vsplat($Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_9768377, Requires<[HasV62T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_lvsplatw : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vsplat($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_9768377, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_lvsplatw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32),
+"$Vd32 = vsplat($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_9768377, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b000000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_and : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = and($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_and_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = and($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_and_n : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = and($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_and_n_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = and($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_not : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4),
+"$Qd4 = not($Qs4)",
+CVI_VA, TypeCVI_VA>, Enc_4897205, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-10} = 0b0000;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_not_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4),
+"$Qd4 = not($Qs4)",
+CVI_VA, TypeCVI_VA>, Enc_4897205, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-10} = 0b0000;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_or : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = or($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_or_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = or($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_or_n : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = or($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_or_n_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = or($Qs4,!$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_scalar2 : HInst<
+(outs VecPredRegs:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV60T,UseHVX]> {
+let Inst{13-2} = 0b000000010001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_scalar2_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV60T,UseHVX]> {
+let Inst{13-2} = 0b000000010001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_scalar2v2 : HInst<
+(outs VecPredRegs:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq2($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV62T,UseHVX]> {
+let Inst{13-2} = 0b000000010011;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_scalar2v2_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins IntRegs:$Rt32),
+"$Qd4 = vsetq2($Rt32)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_12781442, Requires<[HasV62T,UseHVX]> {
+let Inst{13-2} = 0b000000010011;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_pred_xor : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4 = xor($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_pred_xor_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4 = xor($Qs4,$Qt4)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000011;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_shuffeqh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4.b = vshuffe($Qs4.h,$Qt4.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_shuffeqh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4.b = vshuffe($Qs4.h,$Qt4.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_shuffeqw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VecPredRegs:$Qs4, VecPredRegs:$Qt4),
+"$Qd4.h = vshuffe($Qs4.w,$Qt4.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_shuffeqw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VecPredRegs128B:$Qs4, VecPredRegs128B:$Qt4),
+"$Qd4.h = vshuffe($Qs4.w,$Qt4.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_6091631, Requires<[HasV62T,UseHVX]> {
+let Inst{7-2} = 0b000111;
+let Inst{13-10} = 0b0000;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_st0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Vs32),
+"vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_st0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stn0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Os8),
+"vmem($Rt32) = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 1;
+}
+def V6_stn0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Os8),
+"vmem($Rt32) = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def V6_stnnt0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Os8),
+"vmem($Rt32):nt = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 1;
+}
+def V6_stnnt0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Os8),
+"vmem($Rt32):nt = $Os8.new",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 1;
+}
+def V6_stnp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnp0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnpnt0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnpnt0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnq0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnq0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnqnt0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnqnt0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stnt0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Vs32),
+"vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stnt0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stp0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stpnt0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stpnt0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stq0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stq0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stqnt0 : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stqnt0_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32):nt = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stu0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs:$Vs32),
+"vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stu0_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stunp0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stunp0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_stup0 : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_stup0_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rt32) = $Vs32",
+PSEUDO, TypeCVI_VM_ST>, Requires<[HasV60T,UseHVX]> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32Ub_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmemu($Rt32+#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32Ub_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmemu($Rt32+#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32Ub_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmemu($Rx32++#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32Ub_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmemu($Rx32++#$Ii)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32Ub_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmemu($Rx32++$Mu2)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32Ub_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmemu($Rx32++$Mu2)",
+CVI_VM_VP_LDU, TypeCVI_VM_VP_LDU>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_cur_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_cur_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_cur_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_cur_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_cur_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_cur_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_cur_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000101;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000001;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_cur_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_cur_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt",
+CVI_VM_CUR_LD, TypeCVI_VM_CUR_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_cur_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt",
+CVI_VM_CUR_LD, TypeCOPROC_VMEM>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000100;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000011;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_tmp_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_tmp_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_tmp_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_tmp_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_nt_tmp_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_nt_tmp_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_nt_tmp_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011110;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let isNonTemporal = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let isCVLoadable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)",
+CVI_VM_LD, TypeCVI_VM_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000010;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_1244745, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_tmp_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_8437395, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_tmp_npred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_tmp_npred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_tmp_npred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_npred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_npred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_npred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_10039393, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_11039423, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15949334, Requires<[HasV60T,UseHVX]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_ai : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_13338314, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL32b_tmp_pred_ai_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_738356, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vL32b_tmp_pred_pi : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_14560494, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_pi_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_15560488, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_ppu : HInst<
+(outs VectorRegs:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL32b_tmp_pred_ppu_128B : HInst<
+(outs VectorRegs128B:$Vd32, IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2),
+"if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)",
+CVI_VM_TMP_LD, TypeCVI_VM_TMP_LD>, Enc_3158657, Requires<[HasV62T,UseHVX]> {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011100;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isCVLoad = 1;
+let mayLoad = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32Ub_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32Ub_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai";
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32Ub_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32Ub_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000111;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b111;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000111;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai";
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32Ub_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rt32+#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32Ub_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rx32++#$Ii) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32Ub_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Pv4) vmemu($Rx32++$Mu2) = $Vs32",
+CVI_VM_STU, TypeCVI_VM_STU>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000110;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32Ub_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_new_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_6608821, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 2;
+}
+def V6_vS32b_new_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2152247, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000001;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def V6_vS32b_new_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_new_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_new_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001101;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001101;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_12244921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_11244923, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_new_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_new_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_new_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nqpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nqpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nqpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_6923828, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_5757366, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_new_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_6608821, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 2;
+}
+def V6_vS32b_nt_new_ai_128B : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2152247, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 2;
+}
+def V6_vS32b_nt_new_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001111;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001111;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_12244921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_11244923, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b00100;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_1589406, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-3} = 0b0000000100;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_9372046, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_13937564, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 3;
+}
+def V6_vS32b_nt_new_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_3735566, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_2735552, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-3} = 0b01010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001010;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let DecoderNamespace = "EXT_mmvec";
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_new_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Os8),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Os8.new",
+CVI_VM_NEW_ST, TypeCVI_VM_NEW_ST>, Enc_8498433, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-3} = 0b00001010;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let isNVStore = 1;
+let mayStore = 1;
+let isNonTemporal = 1;
+let isNewValue = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let opNewValue = 4;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_npred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_npred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_npred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_nqpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_nqpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_nqpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if (!$Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000001;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000111;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011111;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let BaseOpcode = "V6_vS32b_ppu_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_nt_qpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000110;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_nt_qpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_nt_qpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2):nt = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011110;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNonTemporal = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_3296020, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2296022, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_11281763, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{12-5} = 0b00000000;
+let Inst{31-21} = 0b00101011001;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let isPredicable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_ai : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_10075393, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_pred_ai_128B : HInst<
+(outs),
+(ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_9470751, Requires<[HasV60T,UseHVX]>, NewValueRel {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000101;
+let isPredicated = 1;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_ai_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_pred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15459921, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_14459927, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let BaseOpcode = "V6_vS32b_pi_128B";
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_pred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Pv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_15733946, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011101;
+let isPredicated = 1;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let isNVStorable = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_ai : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_16279406, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS32b_qpred_ai_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rt32, s4_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rt32+#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_2703240, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{31-21} = 0b00101000100;
+let addrMode = BaseImmOffset;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vS32b_qpred_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_12397062, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_pi_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, s3_0Imm:$Ii, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++#$Ii) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13397056, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00101001100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector64Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS32b_qpred_ppu_128B : HInst<
+(outs IntRegs:$Rx32),
+(ins VecPredRegs128B:$Qv4, IntRegs:$Rx32in, ModRegs:$Mu2, VectorRegs128B:$Vs32),
+"if ($Qv4) vmem($Rx32++$Mu2) = $Vs32",
+CVI_VM_ST, TypeCVI_VM_ST>, Enc_13425035, Requires<[HasV60T,UseHVX]> {
+let Inst{10-5} = 0b000000;
+let Inst{31-21} = 0b00101011100;
+let addrMode = PostInc;
+let accessSize = Vector128Access;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vabsdiffh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vabsdiff($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vabsdiff($Vu32.uh,$Vv32.uh)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vabsdiff($Vu32.w,$Vv32.w)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsdiffw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vabsdiffw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsdiffw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vabsdiffw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vabs($Vu32.h)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vabs($Vu32.h)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vabs($Vu32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vabs($Vu32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsh_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsh($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsh_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsh($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.w = vabs($Vu32.w)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.w = vabs($Vu32.w)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.w = vabs($Vu32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.w = vabs($Vu32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vabsw_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vabsw($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vabsw_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vabsw($Vu32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddb_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddb_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.b += $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.b) $Vx32.b += $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddbsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vadd($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vadd($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddbsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddbsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddcarry : HInst<
+(outs VectorRegs:$Vd32, VecPredRegs:$Qx4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, VecPredRegs:$Qx4in),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vaddcarry_128B : HInst<
+(outs VectorRegs128B:$Vd32, VecPredRegs128B:$Qx4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, VecPredRegs128B:$Qx4in),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vaddclbh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddclbh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vadd(vclb($Vu32.h),$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddclbw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddclbw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vadd(vclb($Vu32.w),$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddh_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddh_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.h += $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.h) $Vx32.h += $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vadd($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vadd($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddhw_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vadd($Vu32.h,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vaddh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubh_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.h += vadd($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vaddubh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vaddub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.ub = vadd($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddubsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddubsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddububb_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddububb_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vadd($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vadd($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vadduh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vadduh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uh = vadd($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vadduh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vadduh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduhw_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vadd($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vadduhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vadduh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vadd($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vadduw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vadduw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uw = vadd($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vadduwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vadduw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vadduwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vadduw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddw_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddw_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.w += $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.w) $Vx32.w += $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaddwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vadd($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaddw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vadd($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaddwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaddwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vaddw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_valignb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = valign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_valignb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = valign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_valignbi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = valign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_valignbi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = valign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vand : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vand($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vand_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vand($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandnqrt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4711514, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandnqrt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4711514, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandnqrt_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4944558, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand(!$Qu4,$Rt32)",
+CVI_VX, TypeCVI_VX>, Enc_4944558, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1001;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandnqrt_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandnqrt_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand(!$Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandqrt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4711514, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandqrt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32 = vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4711514, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-10} = 0b0000;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandqrt_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4944558, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32 |= vand($Qu4,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_4944558, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-10} = 0b1000;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vx32.ub |= vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vandqrt_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandqrt_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qu4, IntRegs:$Rt32),
+"$Vd32.ub = vand($Qu4.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvnqv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vu32),
+"$Vd32 = vand(!$Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvnqv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vu32),
+"$Vd32 = vand(!$Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvqv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vu32),
+"$Vd32 = vand($Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvqv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vu32),
+"$Vd32 = vand($Qv4,$Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_1220199, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000011;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvrt : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qd4 = vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_11498120, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvrt_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qd4 = vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_11498120, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vandvrt_acc : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qx4 |= vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_10612292, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_acc_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qx4 |= vand($Vu32,$Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_10612292, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_acc_alt : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qx4.ub |= vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_acc_alt_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qx4.ub |= vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vandvrt_alt : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Qd4.ub = vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vandvrt_alt_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Qd4.ub = vand($Vu32.ub,$Rt32.ub)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasl($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasl($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslhv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vasl($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslhv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vasl($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslhv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaslh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslhv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaslh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslw_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasl($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vaslw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vaslw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslwv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vasl($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslwv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vasl($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vaslwv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vaslw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vaslwv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vaslw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasr($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vasr($Vu32.h,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhbrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhbrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhbrndsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrhbsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhbsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhubrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubrndsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhubsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.ub = vasr($Vu32.h,$Vv32.h,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhubsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrhv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vasr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vasr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrhv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vasrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrhv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vasrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasruwuhrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasruwuhrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.uw,$Vv32.uw,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrw_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vasr($Vu32.w,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vasrw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vasrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8)",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwhrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhrndsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.h = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwuhrndsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwuhrndsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):rnd:sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwuhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwuhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.uh = vasr($Vu32.w,$Vv32.w,$Rt8):sat",
+CVI_VS, TypeCVI_VS>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwuhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def V6_vasrwv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vasr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vasr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vasrwv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vasrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vasrwv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vasrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vassign : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vassign_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vassignp : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32),
+"$Vdd32 = $Vuu32",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vassignp_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32),
+"$Vdd32 = $Vuu32",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavghrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavghrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vavg($Vu32.h,$Vv32.h):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavghrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavghrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgubrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgubrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vavg($Vu32.ub,$Vv32.ub):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgubrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgubrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgub($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguhrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguhrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vavg($Vu32.uh,$Vv32.uh):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavguhrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavguhrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavguh($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgwrnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgwrnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vavg($Vu32.w,$Vv32.w):rnd",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vavgwrnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vavgwrnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vavgw($Vu32,$Vv32):rnd",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vccombine : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vccombine_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"if ($Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010011;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0h : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.uh = vcl0($Vu32.uh)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0h_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.uh = vcl0($Vu32.uh)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0h_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vcl0h($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0h_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vcl0h($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0w : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.uw = vcl0($Vu32.uw)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0w_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.uw = vcl0($Vu32.uw)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcl0w_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vcl0w($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcl0w_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vcl0w($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcmov : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32),
+"if ($Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcmov_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32),
+"if ($Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000000000;
+let isPredicated = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vcombine : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isRegSequence = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vcombine_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isRegSequence = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vd0 : HInst<
+(outs VectorRegs:$Vd32),
+(ins),
+"$Vd32 = #0",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vd0_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins),
+"$Vd32 = #0",
+CVI_VA, TypeCVI_VA>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdeal : HInst<
+(outs VectorRegs:$Vy32, VectorRegs:$Vx32),
+(ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32),
+"vdeal($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vdeal_128B : HInst<
+(outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"vdeal($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vdealb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.b = vdeal($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb4w : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vdeale($Vu32.b,$Vv32.b)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb4w_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vdeale($Vu32.b,$Vv32.b)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealb4w_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vdealb4w($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb4w_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vdealb4w($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.b = vdeal($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vdealb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vdealb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vdeal($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vdeal($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vdealh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vdealh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdealvdd : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdealvdd_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vdeal($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdelta : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdelta_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vdmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpybus_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpybus_dv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vdmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpybus_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpybus_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhb_dv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vdmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdmpyhb_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhb_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhisat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhisat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhisat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhisat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhisat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vuu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsuisat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsuisat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_36641, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsuisat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vuu32.h,$Rt32.uh,#1):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5890213, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsuisat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsuisat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vuu32,$Rt32,#1):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsusat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsusat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhsusat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhsusat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhsusat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vdmpyhsu($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhvsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhvsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdmpyhvsat_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vdmpy($Vu32.h,$Vv32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vdmpyhvsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdmpyhvsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vdmpyh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdsaduh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdsaduh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.uw = vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vdsaduh_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.uw += vdsad($Vuu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vdsaduh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vdsaduh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vdsaduh($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqb : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqb_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqb_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqb_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqh_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqh_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_veqw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_veqw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_veqw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.eq($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtb_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtb_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtb_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgth_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgth_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgth_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtub_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtub_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtub_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtuh_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtuh_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuh_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtuw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtuw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b001010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b011010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtuw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b101010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw : HInst<
+(outs VecPredRegs:$Qd4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtw_128B : HInst<
+(outs VecPredRegs128B:$Qd4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_13983714, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vgtw_and : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_and_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b000110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_or : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_or_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b010110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_xor : HInst<
+(outs VecPredRegs:$Qx4),
+(ins VecPredRegs:$Qx4in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtw_xor_128B : HInst<
+(outs VecPredRegs128B:$Qx4),
+(ins VecPredRegs128B:$Qx4in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_7470998, Requires<[HasV60T,UseHVX]> {
+let Inst{7-2} = 0b100110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vhist : HInst<
+(outs),
+(ins),
+"vhist",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vhist_128B : HInst<
+(outs),
+(ins),
+"vhist",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vhistq : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vhist($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vhistq_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vhist($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV60T,UseHVX]> {
+let Inst{13-0} = 0b10000010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vinsertwr : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, IntRegs:$Rt32),
+"$Vx32.w = vinsert($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_313333, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b100000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vinsertwr_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"$Vx32.w = vinsert($Rt32)",
+CVI_VX_LATE, TypeCVI_VX>, Enc_313333, Requires<[HasV60T,UseHVX]> {
+let Inst{13-5} = 0b100000001;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlalignb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vlalign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlalignb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32 = vlalign($Vu32,$Vv32,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlalignbi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = vlalign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlalignbi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vd32 = vlalign($Vu32,$Vv32,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV60T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.ub = vlsr($Vu32.ub,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.ub = vlsr($Vu32.ub,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.uh = vlsr($Vu32.uh,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.uh = vlsr($Vu32.uh,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrhv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vlsr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrhv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vlsr($Vu32.h,$Vv32.h)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrhv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vlsrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrhv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vlsrh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vlsr($Vu32.uw,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vlsr($Vu32.uw,$Rt32)",
+CVI_VS, TypeCVI_VS>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vlsrw($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrwv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vlsr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrwv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vlsr($Vu32.w,$Vv32.w)",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlsrwv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vlsrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlsrwv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vlsrw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvvb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvvb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvvb_nm : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvvb_nm_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,$Rt8):nomatch",
+CVI_VP_LONG, TypeCVI_VP>, Enc_11083408, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvvb_oracc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8877260, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvb_oracc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8877260, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvb_oracci : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8280533, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvb_oracci_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vx32.b |= vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_8280533, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vlutvvbi : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvvbi_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vd32.b = vlut32($Vu32.b,$Vv32.b,#$Ii)",
+CVI_VP_LONG, TypeCVI_VP>, Enc_7171569, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvwh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvwh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvwh_nm : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvwh_nm_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,$Rt8):nomatch",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-24} = 0b00011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vlutvwh_oracc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_16213761, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwh_oracc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_16213761, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwh_oracci : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_3457570, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwh_oracci_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vxx32.h |= vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_3457570, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vlutvwhi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, u3_0Imm:$Ii),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_13261538, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vlutvwhi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, u3_0Imm:$Ii),
+"$Vdd32.h = vlut16($Vu32.b,$Vv32.h,#$Ii)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_13261538, Requires<[HasV62T,UseHVX]> {
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vmax($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vmax($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmax($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmax($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vmax($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vmax($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmax($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmax($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmaxw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmaxw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmaxw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmaxw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vmin($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vmin($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmin($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmin($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vmin($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vmin($Vu32.uh,$Vv32.uh)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmin($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmin($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vminw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vminw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vminw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vminw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpa($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpabus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpabus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabusv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabusv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabusv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vmpabus($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabusv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vmpabus($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabuuv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabuuv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vmpa($Vuu32.ub,$Vvv32.ub)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpabuuv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vmpabuu($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpabuuv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vmpabuu($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpahb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpahb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpahb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpahb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpahb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpahb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpauhb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpauhb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpauhb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpa($Vuu32.uh,$Rt32.b)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpauhb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpauhb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vmpauhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.h = vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.h += vmpy($Vu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybusv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybusv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybusv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.h += vmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybusv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybusv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpybv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.h += vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.h += vmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpybv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpybv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyewuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyewuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyewuh_64 : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyewuh_64_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpye($Vu32.w,$Vv32.uh)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyewuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyewuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpy($Vu32.h,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.w = vmpy($Vu32.h,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhsat_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsat_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.w += vmpy($Vu32.h,$Rt32.h):sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsat_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsat_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyh($Vu32,$Rt32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhsrs : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhsrs_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhsrs_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhsrs_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhss : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhss_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhss_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhss_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyh($Vu32,$Rt32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyhus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.w += vmpy($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyhv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhvsrs : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhvsrs_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyhvsrs_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyhvsrs_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyieoh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyieoh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyieo($Vu32.h,$Vv32.h)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiewh_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewh_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewh_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyiewh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewh_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyiewh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiewuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiewuh_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyie($Vu32.w,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiewuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiewuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyiewuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyih : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyih_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyih_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.h += vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.h += vmpyi($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyih_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyih_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyih($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyihb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyihb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.h = vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyihb_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.h += vmpyi($Vu32.h,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyihb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyihb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyihb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiowh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyio($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiowh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyio($Vu32.w,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiowh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyiowh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiowh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyiowh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwb_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwb($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwh_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_16214129, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyiwub_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_10058269, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vmpyi($Vu32.w,$Rt32.ub)",
+CVI_VX_LONG, TypeCVI_VX>, Enc_10058269, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyiwub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyiwub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vmpyiwub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_64_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyo($Vu32.w,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyowh_64_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyo($Vu32.w,$Vv32.h)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyowh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_rnd : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_rnd_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_rnd_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyowh_rnd_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmpyowh($Vu32,$Vv32):<<1:rnd:sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyowh_rnd_sacc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_rnd_sacc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:rnd:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_rnd_sacc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:rnd:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_rnd_sacc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:rnd:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vmpyo($Vu32.w,$Vv32.h):<<1:sat:shift",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyowh_sacc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vmpyowh($Vu32,$Vv32):<<1:sat:shift",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vmpyub : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyub_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyub_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyub_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyub_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyubv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyubv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.uh = vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyubv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.uh += vmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyubv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyubv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_11471622, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuh_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Rt32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2153798, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vxx32 += vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vdd32 = vmpyuh($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuhv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuhv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.uw = vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmpyuhv_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32.uw += vmpy($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5972412, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vxx32 += vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vxx32 += vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vmpyuhv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpyuhv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vmpyuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vmux : HInst<
+(outs VectorRegs:$Vd32),
+(ins VecPredRegs:$Qt4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vmux($Qt4,$Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_1572239, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmux_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VecPredRegs128B:$Qt4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vmux($Qt4,$Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_1572239, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vnavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vnavg($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vnavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vnavgh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vnavg($Vu32.ub,$Vv32.ub)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vnavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vnavgub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vnavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vnavg($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnavgw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vnavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnavgw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vnavgw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnccombine : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnccombine_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"if (!$Ps4) $Vdd32 = vcombine($Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_16145290, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011010010;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vncmov : HInst<
+(outs VectorRegs:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs:$Vu32),
+"if (!$Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vncmov_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins PredRegs:$Ps4, VectorRegs128B:$Vu32),
+"if (!$Ps4) $Vd32 = $Vu32",
+CVI_VA, TypeCVI_VA>, Enc_12023037, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001101000100000;
+let isPredicated = 1;
+let isPredicatedFalse = 1;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamth : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vnormamt($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamth_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vnormamt($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamth_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vnormamth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamth_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vnormamth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamtw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.w = vnormamt($Vu32.w)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamtw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.w = vnormamt($Vu32.w)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnormamtw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vnormamtw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnormamtw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vnormamtw($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vnot : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vnot($Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vnot_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vnot($Vu32)",
+CVI_VA, TypeCVI_VA>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vor : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vor_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vpacke($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vpacke($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vpacke($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vpacke($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackeh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackeh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhb_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhb_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhb_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhb_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhub_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhub_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vpack($Vu32.h,$Vv32.h):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackhub_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackhub_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackob : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vpacko($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackob_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vpacko($Vu32.h,$Vv32.h)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackob_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackob_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackoh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vpacko($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackoh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vpacko($Vu32.w,$Vv32.w)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackoh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackoh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwh_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwh_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwh_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwh_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwuh_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwuh_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vpack($Vu32.w,$Vv32.w):sat",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpackwuh_sat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vpackwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpackwuh_sat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vpackwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpopcounth : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vpopcount($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpopcounth_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vpopcount($Vu32.h)",
+CVI_VS, TypeCVI_VS>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vpopcounth_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vpopcounth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vpopcounth_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vpopcounth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrdelta : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrdelta_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrdelta($Vu32,$Vv32)",
+CVI_VP, TypeCVI_VP>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybus : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybus_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.w = vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybus_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.w += vrmpy($Vu32.ub,$Rt32.b)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybus_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybus_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpybus($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.w = vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusi_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.w += vrmpy($Vuu32.ub,$Rt32.b,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b10;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpybusi_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusi_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpybus($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybusv_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybusv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybusv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrmpybus($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpybv_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.w += vrmpy($Vu32.b,$Vv32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpybv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpybv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrmpyb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyub_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Rt32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_10058269, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vx32 += vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vrmpyub($Vu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubi_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrmpy($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrmpyubi_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubi_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrmpyub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubv : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubv_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX, TypeCVI_VX>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrmpyubv_acc : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_acc_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_2328527, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_acc_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VectorRegs:$Vx32in, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vx32 += vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_acc_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vx32 += vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vrmpyubv_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrmpyubv_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrmpyub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vror : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vror($Vu32,$Rt32)",
+CVI_VP, TypeCVI_VP>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vror_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, IntRegs:$Rt32),
+"$Vd32 = vror($Vu32,$Rt32)",
+CVI_VP, TypeCVI_VP>, Enc_16214129, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundhb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vround($Vu32.h,$Vv32.h):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundhub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundhub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduhub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduhub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vround($Vu32.uh,$Vv32.uh):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduhub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrounduhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduhub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrounduhub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduwuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduwuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vround($Vu32.uw,$Vv32.uw):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrounduwuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vrounduwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrounduwuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vrounduwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundwh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vround($Vu32.w,$Vv32.w):sat",
+CVI_VS, TypeCVI_VS>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vroundwuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vroundwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vroundwuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vroundwuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrsadubi : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrsadubi_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32.uw = vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_14172170, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vrsadubi_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32.uw += vrsad($Vuu32.ub,$Rt32.ub,#$Ii)",
+CVI_VX_DV_LONG, TypeCVI_VX_DV>, Enc_13189194, Requires<[HasV60T,UseHVX]> {
+let Inst{7-6} = 0b11;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vxx32 += vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vrsadubi_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vrsadubi_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii),
+"$Vdd32 = vrsadub($Vuu32,$Rt32,#$Ii)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsathub : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsathub_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vsat($Vu32.h,$Vv32.h)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsathub_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsathub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsathub_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsathub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatuwuh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatuwuh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vsat($Vu32.uw,$Vv32.uw)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatuwuh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsatuwuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatuwuh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsatuwuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatwh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vsat($Vu32.w,$Vv32.w)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatwh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vsat($Vu32.w,$Vv32.w)",
+CVI_VINLANESAT, TypeCVI_VINLANESAT>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsatwh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsatwh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsatwh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsatwh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.h = vsxt($Vu32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.h = vsxt($Vu32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vsxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vsxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.w = vsxt($Vu32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.w = vsxt($Vu32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vsxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vsxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufeh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vshuffe($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufeh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vshuffe($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufeh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufeh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuff : HInst<
+(outs VectorRegs:$Vy32, VectorRegs:$Vx32),
+(ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32),
+"vshuff($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vshuff_128B : HInst<
+(outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"vshuff($Vy32,$Vx32,$Rt32)",
+CVI_VP_VS_LONG_EARLY, TypeCVI_VP_VS>, Enc_11422009, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vshuffb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.b = vshuff($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.b = vshuff($Vu32.b)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vshuffb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vshuffb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffeb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vshuffe($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffeb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vshuffe($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffeb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffeb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32.h = vshuff($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32.h = vshuff($Vu32.h)",
+CVI_VP, TypeCVI_VP>, Enc_900013, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32),
+"$Vd32 = vshuffh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32),
+"$Vd32 = vshuffh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffob : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vshuffo($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffob_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vshuffo($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffob_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffob_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffob($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshuffvdd : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshuffvdd_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, IntRegsLow8:$Rt8),
+"$Vdd32 = vshuff($Vu32,$Vv32,$Rt8)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_14767681, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-24} = 0b00011011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.b = vshuffoe($Vu32.b,$Vv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vshuffoeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vshuffoeb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vshuffoe($Vu32.h,$Vv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoeh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vshuffoeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoeh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vshuffoeh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vshuffo($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vshuffo($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vshufoh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vshuffoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vshufoh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vshuffoh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubb_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubb_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.b -= $Vu32.b",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.b) $Vx32.b -= $Vu32.b",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubbsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.b = vsub($Vu32.b,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubb($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.b = vsub($Vuu32.b,$Vvv32.b):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubbsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubbsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubb($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubcarry : HInst<
+(outs VectorRegs:$Vd32, VecPredRegs:$Qx4),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32, VecPredRegs:$Qx4in),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vsubcarry_128B : HInst<
+(outs VectorRegs128B:$Vd32, VecPredRegs128B:$Qx4),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32, VecPredRegs128B:$Qx4in),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w,$Qx4):carry",
+CVI_VA, TypeCVI_VA>, Enc_13691337, Requires<[HasV62T,UseHVX]> {
+let Inst{7-7} = 0b1;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vsubh : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubh_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubh_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubh_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubh_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.h -= $Vu32.h",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000001;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.h) $Vx32.h -= $Vu32.h",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.h = vsub($Vu32.h,$Vv32.h):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.h = vsub($Vuu32.h,$Vvv32.h):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vsub($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vsub($Vu32.h,$Vv32.h)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vsubh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.h = vsub($Vu32.ub,$Vv32.ub)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vsubub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vsubub($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.ub):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubub($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.ub = vsub($Vuu32.ub,$Vvv32.ub):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsububsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsububsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubub($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubububb_sat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubububb_sat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.ub = vsub($Vu32.ub,$Vv32.b):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uh = vsub($Vu32.uh,$Vv32.uh):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubuh($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uh = vsub($Vuu32.uh,$Vvv32.uh):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubuh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubuh($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhw : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhw_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32.w = vsub($Vu32.uh,$Vv32.uh)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_15290236, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuhw_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vsubuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuhw_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vsubuh($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.uw = vsub($Vu32.uw,$Vv32.uw):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011111110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubuw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubuw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.uw = vsub($Vuu32.uw,$Vvv32.uw):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV62T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubuwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubuw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubuwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubuw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV62T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubw_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubw_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwnq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwnq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwnq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if (!$Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwnq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if (!$Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4) $Vx32.w -= $Vu32.w",
+CVI_VA, TypeCVI_VA>, Enc_12535811, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq_alt : HInst<
+(outs VectorRegs:$Vx32),
+(ins VecPredRegs:$Qv4, VectorRegs:$Vx32in, VectorRegs:$Vu32),
+"if ($Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwq_alt_128B : HInst<
+(outs VectorRegs128B:$Vx32),
+(ins VecPredRegs128B:$Qv4, VectorRegs128B:$Vx32in, VectorRegs128B:$Vu32),
+"if ($Qv4.w) $Vx32.w -= $Vu32.w",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vx32 = $Vx32in";
+}
+def V6_vsubwsat : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32.w = vsub($Vu32.w,$Vv32.w):sat",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100011;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwsat_alt : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_alt_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vsubw($Vu32,$Vv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwsat_dv : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_dv_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32.w = vsub($Vuu32.w,$Vvv32.w):sat",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_13211717, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vsubwsat_dv_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, VecDblRegs:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vsubwsat_dv_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, VecDblRegs128B:$Vvv32),
+"$Vdd32 = vsubw($Vuu32,$Vvv32):sat",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vswap : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecPredRegs:$Qt4, VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vdd32 = vswap($Qt4,$Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_11424254, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vswap_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecPredRegs128B:$Qt4, VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vdd32 = vswap($Qt4,$Vu32,$Vv32)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_11424254, Requires<[HasV60T,UseHVX]> {
+let Inst{7-7} = 0b0;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011110101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.b,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpybus : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpybus_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.h = vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpybus_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.h += vtmpy($Vuu32.ub,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpybus_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpybus_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpybus($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyhb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyhb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32.w = vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_5023792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011001101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtmpyhb_acc : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_acc_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32.w += vtmpy($Vuu32.h,$Rt32.b)",
+CVI_VX_DV, TypeCVI_VX_DV>, Enc_4327792, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011001000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_acc_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_acc_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vxx32 += vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vtmpyhb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VecDblRegs:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vtmpyhb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VecDblRegs128B:$Vuu32, IntRegs:$Rt32),
+"$Vdd32 = vtmpyhb($Vuu32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vtran2x2_map : HInst<
+(outs VectorRegs:$Vy32, VectorRegs:$Vx32),
+(ins VectorRegs:$Vy32in, VectorRegs:$Vx32in, IntRegs:$Rt32),
+"vtrans2x2($Vy32,$Vx32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vtran2x2_map_128B : HInst<
+(outs VectorRegs128B:$Vy32, VectorRegs128B:$Vx32),
+(ins VectorRegs128B:$Vy32in, VectorRegs128B:$Vx32in, IntRegs:$Rt32),
+"vtrans2x2($Vy32,$Vx32,$Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let hasNewValue2 = 1;
+let opNewValue2 = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vy32 = $Vy32in, $Vx32 = $Vx32in";
+}
+def V6_vunpackb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.h = vunpack($Vu32.b)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.h = vunpack($Vu32.b)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.w = vunpack($Vu32.h)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.w = vunpack($Vu32.h)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackob : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32.h |= vunpacko($Vu32.b)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackob_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32.h |= vunpacko($Vu32.b)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackob_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32 |= vunpackob($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackob_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32 |= vunpackob($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32.w |= vunpacko($Vu32.h)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32.w |= vunpacko($Vu32.h)",
+CVI_VP_VS_LONG, TypeCVI_VP_VS>, Enc_12669374, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh_alt : HInst<
+(outs VecDblRegs:$Vxx32),
+(ins VecDblRegs:$Vxx32in, VectorRegs:$Vu32),
+"$Vxx32 |= vunpackoh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackoh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vxx32),
+(ins VecDblRegs128B:$Vxx32in, VectorRegs128B:$Vu32),
+"$Vxx32 |= vunpackoh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+let Constraints = "$Vxx32 = $Vxx32in";
+}
+def V6_vunpackub : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uh = vunpack($Vu32.ub)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackub_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uh = vunpack($Vu32.ub)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackub_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackub($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackub_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackub($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackuh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uw = vunpack($Vu32.uh)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackuh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uw = vunpack($Vu32.uh)",
+CVI_VP_VS, TypeCVI_VP_VS>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vunpackuh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vunpackuh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vunpackuh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vunpackuh($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128 : HInst<
+(outs),
+(ins),
+"vwhist128",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128_128B : HInst<
+(outs),
+(ins),
+"vwhist128",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128m : HInst<
+(outs),
+(ins u1_0Imm:$Ii),
+"vwhist128(#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_1291652, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128m_128B : HInst<
+(outs),
+(ins u1_0Imm:$Ii),
+"vwhist128(#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_1291652, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128q : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vwhist128($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128q_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vwhist128($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10010010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist128qm : HInst<
+(outs),
+(ins VecPredRegs:$Qv4, u1_0Imm:$Ii),
+"vwhist128($Qv4,#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_7978128, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist128qm_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4, u1_0Imm:$Ii),
+"vwhist128($Qv4,#$Ii)",
+CVI_HIST, TypeCVI_HIST>, Enc_7978128, Requires<[HasV62T,UseHVX]> {
+let Inst{7-0} = 0b10000000;
+let Inst{13-9} = 0b10011;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256 : HInst<
+(outs),
+(ins),
+"vwhist256",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256_128B : HInst<
+(outs),
+(ins),
+"vwhist256",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256_sat : HInst<
+(outs),
+(ins),
+"vwhist256:sat",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256_sat_128B : HInst<
+(outs),
+(ins),
+"vwhist256:sat",
+CVI_HIST, TypeCVI_HIST>, Enc_0, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{31-16} = 0b0001111000000000;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256q : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vwhist256($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256q_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vwhist256($Qv4)",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001010000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vwhist256q_sat : HInst<
+(outs),
+(ins VecPredRegs:$Qv4),
+"vwhist256($Qv4):sat",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vwhist256q_sat_128B : HInst<
+(outs),
+(ins VecPredRegs128B:$Qv4),
+"vwhist256($Qv4):sat",
+CVI_HIST, TypeCVI_HIST>, Enc_4109168, Requires<[HasV62T,UseHVX]> {
+let Inst{13-0} = 0b10001110000000;
+let Inst{21-16} = 0b000010;
+let Inst{31-24} = 0b00011110;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vxor : HInst<
+(outs VectorRegs:$Vd32),
+(ins VectorRegs:$Vu32, VectorRegs:$Vv32),
+"$Vd32 = vxor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vxor_128B : HInst<
+(outs VectorRegs128B:$Vd32),
+(ins VectorRegs128B:$Vu32, VectorRegs128B:$Vv32),
+"$Vd32 = vxor($Vu32,$Vv32)",
+CVI_VA, TypeCVI_VA>, Enc_6223403, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b00011100001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzb : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uh = vzxt($Vu32.ub)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzb_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uh = vzxt($Vu32.ub)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzb_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vzxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzb_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vzxtb($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzh : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32.uw = vzxt($Vu32.uh)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzh_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32.uw = vzxt($Vu32.uh)",
+CVI_VA_DV, TypeCVI_VA_DV>, Enc_14631806, Requires<[HasV60T,UseHVX]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-16} = 0b0001111000000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def V6_vzh_alt : HInst<
+(outs VecDblRegs:$Vdd32),
+(ins VectorRegs:$Vu32),
+"$Vdd32 = vzxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vzh_alt_128B : HInst<
+(outs VecDblRegs128B:$Vdd32),
+(ins VectorRegs128B:$Vu32),
+"$Vdd32 = vzxth($Vu32)",
+PSEUDO, TypeMAPPING>, Requires<[HasV60T,UseHVX]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+let isCodeGenOnly = 1;
+}
+def Y2_barrier : HInst<
+(outs),
+(ins),
+"barrier",
+ST_tc_3stall_SLOT0, TypeST>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b1010100000000000;
+let isSoloAX = 1;
+let hasSideEffects = 1;
+}
+def Y2_break : HInst<
+(outs),
+(ins),
+"brkpt",
+CR_tc_3x_SLOT3, TypeCR>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b0110110000100000;
+let isSolo = 1;
+}
+def Y2_dccleana : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dccleana($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000000;
+let isSoloAin1 = 1;
+}
+def Y2_dccleaninva : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dccleaninva($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000010;
+let isSoloAin1 = 1;
+}
+def Y2_dcfetch : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dcfetch($Rs32)",
+PSEUDO, TypeMAPPING> {
+let hasSideEffects = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
+def Y2_dcfetchbo : HInst<
+(outs),
+(ins IntRegs:$Rs32, u11_3Imm:$Ii),
+"dcfetch($Rs32+#$Ii)",
+LD_tc_ld_SLOT0, TypeLD>, Enc_4983213 {
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b10010100000;
+let addrMode = BaseImmOffset;
+let hasSideEffects = 1;
+}
+def Y2_dcinva : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dcinva($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000001;
+let isSoloAin1 = 1;
+}
+def Y2_dczeroa : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"dczeroa($Rs32)",
+ST_tc_ld_SLOT0, TypeST>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b10100000110;
+let mayStore = 1;
+let isSoloAin1 = 1;
+}
+def Y2_icinva : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"icinva($Rs32)",
+J_tc_2early_SLOT2, TypeJ>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010110110;
+let isSolo = 1;
+}
+def Y2_isync : HInst<
+(outs),
+(ins),
+"isync",
+J_tc_2early_SLOT2, TypeJ>, Enc_0 {
+let Inst{13-0} = 0b00000000000010;
+let Inst{31-16} = 0b0101011111000000;
+let isSolo = 1;
+}
+def Y2_syncht : HInst<
+(outs),
+(ins),
+"syncht",
+ST_tc_ld_SLOT0, TypeST>, Enc_0 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-16} = 0b1010100001000000;
+let isSolo = 1;
+}
+def Y4_l2fetch : HInst<
+(outs),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"l2fetch($Rs32,$Rt32)",
+ST_tc_3stall_SLOT0, TypeST>, Enc_14620934 {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100110000;
+let isSoloAX = 1;
+let mayStore = 1;
+let hasSideEffects = 1;
+}
+def Y4_trace : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"trace($Rs32)",
+CR_tc_2early_SLOT3, TypeCR>, Enc_11704059 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100010010;
+let isSoloAX = 1;
+}
+def Y5_l2fetch : HInst<
+(outs),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"l2fetch($Rs32,$Rtt32)",
+ST_tc_3stall_SLOT0, TypeST>, Enc_8943121, Requires<[HasV5T]> {
+let Inst{7-0} = 0b00000000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10100110100;
+let isSoloAX = 1;
+let mayStore = 1;
+let hasSideEffects = 1;
+}
+def dep_A2_addsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rd32 = add($Rs32,$Rt32):sat:deprecated",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_14071773 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def dep_A2_subsat : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rt32, IntRegs:$Rs32),
+"$Rd32 = sub($Rt32,$Rs32):sat:deprecated",
+ALU64_tc_2_SLOT23, TypeALU64>, Enc_8605375 {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010101100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Defs = [USR_OVF];
+}
+def dep_S2_packhl : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, IntRegs:$Rt32),
+"$Rdd32 = packhl($Rs32,$Rt32):deprecated",
+ALU64_tc_1_SLOT23, TypeALU64>, Enc_1997594 {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b11010100000;
+}
diff --git a/lib/Target/Hexagon/HexagonDepMappings.td b/lib/Target/Hexagon/HexagonDepMappings.td
new file mode 100644
index 000000000000..77a56a9adf10
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepMappings.td
@@ -0,0 +1,654 @@
+//===--- HexagonDepMappings.td --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def A2_negAlias : InstAlias<"$Rd32=neg($Rs32)", (A2_subri IntRegs:$Rd32, 0, IntRegs:$Rs32)>;
+def A2_notAlias : InstAlias<"$Rd32=not($Rs32)", (A2_subri IntRegs:$Rd32, -1, IntRegs:$Rs32)>;
+def A2_tfrfAlias : InstAlias<"if (!$Pu4) $Rd32=$Rs32", (A2_paddif IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_tfrfnewAlias : InstAlias<"if (!$Pu4.new) $Rd32=$Rs32", (A2_paddifnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_tfrtAlias : InstAlias<"if ($Pu4) $Rd32=$Rs32", (A2_paddit IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_tfrtnewAlias : InstAlias<"if ($Pu4.new) $Rd32=$Rs32", (A2_padditnew IntRegs:$Rd32, PredRegs:$Pu4, IntRegs:$Rs32, 0)>;
+def A2_vaddb_mapAlias : InstAlias<"$Rdd32=vaddb($Rss32,$Rtt32)", (A2_vaddub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>;
+def A2_vsubb_mapAlias : InstAlias<"$Rdd32=vsubb($Rss32,$Rtt32)", (A2_vsubub DoubleRegs:$Rdd32, DoubleRegs:$Rss32, DoubleRegs:$Rtt32)>;
+def A2_zxtbAlias : InstAlias<"$Rd32=zxtb($Rs32)", (A2_andir IntRegs:$Rd32, IntRegs:$Rs32, 255)>;
+def C2_cmpltAlias : InstAlias<"$Pd4=cmp.lt($Rs32,$Rt32)", (C2_cmpgt PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>;
+def C2_cmpltuAlias : InstAlias<"$Pd4=cmp.ltu($Rs32,$Rt32)", (C2_cmpgtu PredRegs:$Pd4, IntRegs:$Rt32, IntRegs:$Rs32)>;
+def C2_pxfer_mapAlias : InstAlias<"$Pd4=$Ps4", (C2_or PredRegs:$Pd4, PredRegs:$Ps4, PredRegs:$Ps4)>;
+def J2_jumpf_nopred_mapAlias : InstAlias<"if (!$Pu4) jump $Ii", (J2_jumpf PredRegs:$Pu4, b30_2Imm:$Ii)>;
+def J2_jumprf_nopred_mapAlias : InstAlias<"if (!$Pu4) jumpr $Rs32", (J2_jumprf PredRegs:$Pu4, IntRegs:$Rs32)>;
+def J2_jumprt_nopred_mapAlias : InstAlias<"if ($Pu4) jumpr $Rs32", (J2_jumprt PredRegs:$Pu4, IntRegs:$Rs32)>;
+def J2_jumpt_nopred_mapAlias : InstAlias<"if ($Pu4) jump $Ii", (J2_jumpt PredRegs:$Pu4, b30_2Imm:$Ii)>;
+def L2_loadalignb_zomapAlias : InstAlias<"$Ryy32=memb_fifo($Rs32)", (L2_loadalignb_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>;
+def L2_loadalignh_zomapAlias : InstAlias<"$Ryy32=memh_fifo($Rs32)", (L2_loadalignh_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>;
+def L2_loadbsw2_zomapAlias : InstAlias<"$Rd32=membh($Rs32)", (L2_loadbsw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadbsw4_zomapAlias : InstAlias<"$Rdd32=membh($Rs32)", (L2_loadbsw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>;
+def L2_loadbzw2_zomapAlias : InstAlias<"$Rd32=memubh($Rs32)", (L2_loadbzw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadbzw4_zomapAlias : InstAlias<"$Rdd32=memubh($Rs32)", (L2_loadbzw4_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>;
+def L2_loadrb_zomapAlias : InstAlias<"$Rd32=memb($Rs32)", (L2_loadrb_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadrd_zomapAlias : InstAlias<"$Rdd32=memd($Rs32)", (L2_loadrd_io DoubleRegs:$Rdd32, IntRegs:$Rs32, 0)>;
+def L2_loadrh_zomapAlias : InstAlias<"$Rd32=memh($Rs32)", (L2_loadrh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadri_zomapAlias : InstAlias<"$Rd32=memw($Rs32)", (L2_loadri_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadrub_zomapAlias : InstAlias<"$Rd32=memub($Rs32)", (L2_loadrub_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_loadruh_zomapAlias : InstAlias<"$Rd32=memuh($Rs32)", (L2_loadruh_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
+def L2_ploadrbf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memb($Rs32)", (L2_ploadrbf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrbfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memb($Rs32)", (L2_ploadrbfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrbt_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memb($Rs32)", (L2_ploadrbt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrbtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memb($Rs32)", (L2_ploadrbtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdf_zomapAlias : InstAlias<"if (!$Pt4) $Rdd32=memd($Rs32)", (L2_ploadrdf_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rdd32=memd($Rs32)", (L2_ploadrdfnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdt_zomapAlias : InstAlias<"if ($Pt4) $Rdd32=memd($Rs32)", (L2_ploadrdt_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrdtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rdd32=memd($Rs32)", (L2_ploadrdtnew_io DoubleRegs:$Rdd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memh($Rs32)", (L2_ploadrhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memh($Rs32)", (L2_ploadrhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrht_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memh($Rs32)", (L2_ploadrht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memh($Rs32)", (L2_ploadrhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrif_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memw($Rs32)", (L2_ploadrif_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrifnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memw($Rs32)", (L2_ploadrifnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrit_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memw($Rs32)", (L2_ploadrit_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadritnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memw($Rs32)", (L2_ploadritnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memub($Rs32)", (L2_ploadrubf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memub($Rs32)", (L2_ploadrubfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubt_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memub($Rs32)", (L2_ploadrubt_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadrubtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memub($Rs32)", (L2_ploadrubtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruhf_zomapAlias : InstAlias<"if (!$Pt4) $Rd32=memuh($Rs32)", (L2_ploadruhf_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruhfnew_zomapAlias : InstAlias<"if (!$Pt4.new) $Rd32=memuh($Rs32)", (L2_ploadruhfnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruht_zomapAlias : InstAlias<"if ($Pt4) $Rd32=memuh($Rs32)", (L2_ploadruht_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L2_ploadruhtnew_zomapAlias : InstAlias<"if ($Pt4.new) $Rd32=memuh($Rs32)", (L2_ploadruhtnew_io IntRegs:$Rd32, PredRegs:$Pt4, IntRegs:$Rs32, 0)>;
+def L4_add_memopb_zomapAlias : InstAlias<"memb($Rs32)+=$Rt32", (L4_add_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_add_memoph_zomapAlias : InstAlias<"memh($Rs32)+=$Rt32", (L4_add_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_add_memopw_zomapAlias : InstAlias<"memw($Rs32)+=$Rt32", (L4_add_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_and_memopb_zomapAlias : InstAlias<"memb($Rs32)&=$Rt32", (L4_and_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_and_memoph_zomapAlias : InstAlias<"memh($Rs32)&=$Rt32", (L4_and_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_and_memopw_zomapAlias : InstAlias<"memw($Rs32)&=$Rt32", (L4_and_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_iadd_memopb_zomapAlias : InstAlias<"memb($Rs32)+=#$II", (L4_iadd_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iadd_memoph_zomapAlias : InstAlias<"memh($Rs32)+=#$II", (L4_iadd_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iadd_memopw_zomapAlias : InstAlias<"memw($Rs32)+=#$II", (L4_iadd_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iand_memopb_zomapAlias : InstAlias<"memb($Rs32)=clrbit(#$II)", (L4_iand_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iand_memoph_zomapAlias : InstAlias<"memh($Rs32)=clrbit(#$II)", (L4_iand_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_iand_memopw_zomapAlias : InstAlias<"memw($Rs32)=clrbit(#$II)", (L4_iand_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_ior_memopb_zomapAlias : InstAlias<"memb($Rs32)=setbit(#$II)", (L4_ior_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_ior_memoph_zomapAlias : InstAlias<"memh($Rs32)=setbit(#$II)", (L4_ior_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_ior_memopw_zomapAlias : InstAlias<"memw($Rs32)=setbit(#$II)", (L4_ior_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_isub_memopb_zomapAlias : InstAlias<"memb($Rs32)-=#$II", (L4_isub_memopb_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_isub_memoph_zomapAlias : InstAlias<"memh($Rs32)-=#$II", (L4_isub_memoph_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_isub_memopw_zomapAlias : InstAlias<"memw($Rs32)-=#$II", (L4_isub_memopw_io IntRegs:$Rs32, 0, u5_0Imm:$II)>;
+def L4_or_memopb_zomapAlias : InstAlias<"memb($Rs32)|=$Rt32", (L4_or_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_or_memoph_zomapAlias : InstAlias<"memh($Rs32)|=$Rt32", (L4_or_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_or_memopw_zomapAlias : InstAlias<"memw($Rs32)|=$Rt32", (L4_or_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_sub_memopb_zomapAlias : InstAlias<"memb($Rs32)-=$Rt32", (L4_sub_memopb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_sub_memoph_zomapAlias : InstAlias<"memh($Rs32)-=$Rt32", (L4_sub_memoph_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def L4_sub_memopw_zomapAlias : InstAlias<"memw($Rs32)-=$Rt32", (L4_sub_memopw_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def M2_mpyuiAlias : InstAlias<"$Rd32=mpyui($Rs32,$Rt32)", (M2_mpyi IntRegs:$Rd32, IntRegs:$Rs32, IntRegs:$Rt32)>;
+def S2_pstorerbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=$Rt32", (S2_pstorerbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerbnewf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=$Nt8.new", (S2_pstorerbnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerbnewt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=$Nt8.new", (S2_pstorerbnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=$Rt32", (S2_pstorerbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerdf_zomapAlias : InstAlias<"if (!$Pv4) memd($Rs32)=$Rtt32", (S2_pstorerdf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S2_pstorerdt_zomapAlias : InstAlias<"if ($Pv4) memd($Rs32)=$Rtt32", (S2_pstorerdt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S2_pstorerff_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Rt32.h", (S2_pstorerff_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerft_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Rt32.h", (S2_pstorerft_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Rt32", (S2_pstorerhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerhnewf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=$Nt8.new", (S2_pstorerhnewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerhnewt_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Nt8.new", (S2_pstorerhnewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=$Rt32", (S2_pstorerht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=$Rt32", (S2_pstorerif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_pstorerinewf_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=$Nt8.new", (S2_pstorerinewf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerinewt_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=$Nt8.new", (S2_pstorerinewt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_pstorerit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=$Rt32", (S2_pstorerit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerb_zomapAlias : InstAlias<"memb($Rs32)=$Rt32", (S2_storerb_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerbnew_zomapAlias : InstAlias<"memb($Rs32)=$Nt8.new", (S2_storerbnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_storerd_zomapAlias : InstAlias<"memd($Rs32)=$Rtt32", (S2_storerd_io IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S2_storerf_zomapAlias : InstAlias<"memh($Rs32)=$Rt32.h", (S2_storerf_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerh_zomapAlias : InstAlias<"memh($Rs32)=$Rt32", (S2_storerh_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerhnew_zomapAlias : InstAlias<"memh($Rs32)=$Nt8.new", (S2_storerhnew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_storeri_zomapAlias : InstAlias<"memw($Rs32)=$Rt32", (S2_storeri_io IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S2_storerinew_zomapAlias : InstAlias<"memw($Rs32)=$Nt8.new", (S2_storerinew_io IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S2_tableidxb_goodsyntaxAlias : InstAlias<"$Rx32=tableidxb($Rs32,#$Ii,#$II)", (S2_tableidxb IntRegs:$Rx32, IntRegs:$Rs32, u4_0Imm:$Ii, u5_0Imm:$II)>;
+def S4_pstorerbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=$Rt32", (S4_pstorerbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerbnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=$Nt8.new", (S4_pstorerbnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerbnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=$Nt8.new", (S4_pstorerbnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=$Rt32", (S4_pstorerbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerdfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memd($Rs32)=$Rtt32", (S4_pstorerdfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S4_pstorerdtnew_zomapAlias : InstAlias<"if ($Pv4.new) memd($Rs32)=$Rtt32", (S4_pstorerdtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, DoubleRegs:$Rtt32)>;
+def S4_pstorerffnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Rt32.h", (S4_pstorerffnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerftnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Rt32.h", (S4_pstorerftnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Rt32", (S4_pstorerhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerhnewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=$Nt8.new", (S4_pstorerhnewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerhnewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Nt8.new", (S4_pstorerhnewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=$Rt32", (S4_pstorerhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=$Rt32", (S4_pstorerifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_pstorerinewfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=$Nt8.new", (S4_pstorerinewfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstorerinewtnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=$Nt8.new", (S4_pstorerinewtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Nt8)>;
+def S4_pstoreritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=$Rt32", (S4_pstoreritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, IntRegs:$Rt32)>;
+def S4_storeirb_zomapAlias : InstAlias<"memb($Rs32)=#$II", (S4_storeirb_io IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbf_zomapAlias : InstAlias<"if (!$Pv4) memb($Rs32)=#$II", (S4_storeirbf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memb($Rs32)=#$II", (S4_storeirbfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbt_zomapAlias : InstAlias<"if ($Pv4) memb($Rs32)=#$II", (S4_storeirbt_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirbtnew_zomapAlias : InstAlias<"if ($Pv4.new) memb($Rs32)=#$II", (S4_storeirbtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirh_zomapAlias : InstAlias<"memh($Rs32)=#$II", (S4_storeirh_io IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirhf_zomapAlias : InstAlias<"if (!$Pv4) memh($Rs32)=#$II", (S4_storeirhf_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirhfnew_zomapAlias : InstAlias<"if (!$Pv4.new) memh($Rs32)=#$II", (S4_storeirhfnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirht_zomapAlias : InstAlias<"if ($Pv4) memh($Rs32)=#$II", (S4_storeirht_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirhtnew_zomapAlias : InstAlias<"if ($Pv4.new) memh($Rs32)=#$II", (S4_storeirhtnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeiri_zomapAlias : InstAlias<"memw($Rs32)=#$II", (S4_storeiri_io IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirif_zomapAlias : InstAlias<"if (!$Pv4) memw($Rs32)=#$II", (S4_storeirif_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirifnew_zomapAlias : InstAlias<"if (!$Pv4.new) memw($Rs32)=#$II", (S4_storeirifnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeirit_zomapAlias : InstAlias<"if ($Pv4) memw($Rs32)=#$II", (S4_storeirit_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def S4_storeiritnew_zomapAlias : InstAlias<"if ($Pv4.new) memw($Rs32)=#$II", (S4_storeiritnew_io PredRegs:$Pv4, IntRegs:$Rs32, 0, s32_0Imm:$II)>;
+def V6_MAP_equbAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_128BAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_and_128BAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_ior_128BAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equb_xor_128BAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.ub,$Vv32.ub)", (V6_veqb_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equhAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_128BAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_and_128BAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_ior_128BAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equh_xor_128BAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uh,$Vv32.uh)", (V6_veqh_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equwAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_128BAlias : InstAlias<"$Qd4=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw VecPredRegs:$Qd4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_andAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_and_128BAlias : InstAlias<"$Qx4&=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_iorAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_ior_128BAlias : InstAlias<"$Qx4|=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_xorAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_MAP_equw_xor_128BAlias : InstAlias<"$Qx4^=vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor VecPredRegs:$Qx4, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_extractw_altAlias : InstAlias<"$Rd32.w=vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, VectorRegs:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>;
+def V6_extractw_alt_128BAlias : InstAlias<"$Rd32.w=vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, VectorRegs:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>;
+def V6_ld0Alias : InstAlias<"$Vd32=vmem($Rt32)", (V6_vL32b_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ld0_128BAlias : InstAlias<"$Vd32=vmem($Rt32)", (V6_vL32b_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldnt0Alias : InstAlias<"$Vd32=vmem($Rt32):nt", (V6_vL32b_nt_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldnt0_128BAlias : InstAlias<"$Vd32=vmem($Rt32):nt", (V6_vL32b_nt_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldu0Alias : InstAlias<"$Vd32=vmemu($Rt32)", (V6_vL32Ub_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_ldu0_128BAlias : InstAlias<"$Vd32=vmemu($Rt32)", (V6_vL32Ub_ai VectorRegs:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_st0Alias : InstAlias<"vmem($Rt32)=$Vs32", (V6_vS32b_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_st0_128BAlias : InstAlias<"vmem($Rt32)=$Vs32", (V6_vS32b_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stn0Alias : InstAlias<"vmem($Rt32)=$Os8.new", (V6_vS32b_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stn0_128BAlias : InstAlias<"vmem($Rt32)=$Os8.new", (V6_vS32b_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stnnt0Alias : InstAlias<"vmem($Rt32):nt=$Os8.new", (V6_vS32b_nt_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stnnt0_128BAlias : InstAlias<"vmem($Rt32):nt=$Os8.new", (V6_vS32b_nt_new_ai IntRegs:$Rt32, 0, VectorRegs:$Os8)>, Requires<[UseHVX]>;
+def V6_stnp0Alias : InstAlias<"if (!$Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnp0_128BAlias : InstAlias<"if (!$Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnpnt0Alias : InstAlias<"if (!$Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnpnt0_128BAlias : InstAlias<"if (!$Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnq0Alias : InstAlias<"if (!$Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnq0_128BAlias : InstAlias<"if (!$Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnqnt0Alias : InstAlias<"if (!$Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnqnt0_128BAlias : InstAlias<"if (!$Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_nqpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnt0Alias : InstAlias<"vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stnt0_128BAlias : InstAlias<"vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stp0Alias : InstAlias<"if ($Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stp0_128BAlias : InstAlias<"if ($Pv4) vmem($Rt32)=$Vs32", (V6_vS32b_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stpnt0Alias : InstAlias<"if ($Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stpnt0_128BAlias : InstAlias<"if ($Pv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stq0Alias : InstAlias<"if ($Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stq0_128BAlias : InstAlias<"if ($Qv4) vmem($Rt32)=$Vs32", (V6_vS32b_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stqnt0Alias : InstAlias<"if ($Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stqnt0_128BAlias : InstAlias<"if ($Qv4) vmem($Rt32):nt=$Vs32", (V6_vS32b_nt_qpred_ai VecPredRegs:$Qv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stu0Alias : InstAlias<"vmemu($Rt32)=$Vs32", (V6_vS32Ub_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stu0_128BAlias : InstAlias<"vmemu($Rt32)=$Vs32", (V6_vS32Ub_ai IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stunp0Alias : InstAlias<"if (!$Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stunp0_128BAlias : InstAlias<"if (!$Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_npred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stup0Alias : InstAlias<"if ($Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_stup0_128BAlias : InstAlias<"if ($Pv4) vmemu($Rt32)=$Vs32", (V6_vS32Ub_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0, VectorRegs:$Vs32)>, Requires<[UseHVX]>;
+def V6_vabsdiffh_altAlias : InstAlias<"$Vd32=vabsdiffh($Vu32,$Vv32)", (V6_vabsdiffh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffh_alt_128BAlias : InstAlias<"$Vd32=vabsdiffh($Vu32,$Vv32)", (V6_vabsdiffh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffub_altAlias : InstAlias<"$Vd32=vabsdiffub($Vu32,$Vv32)", (V6_vabsdiffub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffub_alt_128BAlias : InstAlias<"$Vd32=vabsdiffub($Vu32,$Vv32)", (V6_vabsdiffub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffuh_altAlias : InstAlias<"$Vd32=vabsdiffuh($Vu32,$Vv32)", (V6_vabsdiffuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffuh_alt_128BAlias : InstAlias<"$Vd32=vabsdiffuh($Vu32,$Vv32)", (V6_vabsdiffuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffw_altAlias : InstAlias<"$Vd32=vabsdiffw($Vu32,$Vv32)", (V6_vabsdiffw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsdiffw_alt_128BAlias : InstAlias<"$Vd32=vabsdiffw($Vu32,$Vv32)", (V6_vabsdiffw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vabsh_altAlias : InstAlias<"$Vd32=vabsh($Vu32)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsh_alt_128BAlias : InstAlias<"$Vd32=vabsh($Vu32)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsh_sat_altAlias : InstAlias<"$Vd32=vabsh($Vu32):sat", (V6_vabsh_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsh_sat_alt_128BAlias : InstAlias<"$Vd32=vabsh($Vu32):sat", (V6_vabsh_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuh_altAlias : InstAlias<"$Vd32.uh=vabs($Vu32.h)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuh_alt_128BAlias : InstAlias<"$Vd32.uh=vabs($Vu32.h)", (V6_vabsh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuw_altAlias : InstAlias<"$Vd32.uw=vabs($Vu32.w)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsuw_alt_128BAlias : InstAlias<"$Vd32.uw=vabs($Vu32.w)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_altAlias : InstAlias<"$Vd32=vabsw($Vu32)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_alt_128BAlias : InstAlias<"$Vd32=vabsw($Vu32)", (V6_vabsw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_sat_altAlias : InstAlias<"$Vd32=vabsw($Vu32):sat", (V6_vabsw_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vabsw_sat_alt_128BAlias : InstAlias<"$Vd32=vabsw($Vu32):sat", (V6_vabsw_sat VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddb_altAlias : InstAlias<"$Vd32=vaddb($Vu32,$Vv32)", (V6_vaddb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddb_alt_128BAlias : InstAlias<"$Vd32=vaddb($Vu32,$Vv32)", (V6_vaddb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddb_dv_altAlias : InstAlias<"$Vdd32=vaddb($Vuu32,$Vvv32)", (V6_vaddb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddb_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddb($Vuu32,$Vvv32)", (V6_vaddb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddbnq_alt_128BAlias : InstAlias<"if (!$Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddbq_alt_128BAlias : InstAlias<"if ($Qv4.b) $Vx32.b+=$Vu32.b", (V6_vaddbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddh_altAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32)", (V6_vaddh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddh_alt_128BAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32)", (V6_vaddh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddh_dv_altAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32)", (V6_vaddh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddh_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32)", (V6_vaddh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhnq_alt_128BAlias : InstAlias<"if (!$Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhq_alt_128BAlias : InstAlias<"if ($Qv4.h) $Vx32.h+=$Vu32.h", (V6_vaddhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_altAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32):sat", (V6_vaddhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_alt_128BAlias : InstAlias<"$Vd32=vaddh($Vu32,$Vv32):sat", (V6_vaddhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_dv_altAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32):sat", (V6_vaddhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddh($Vuu32,$Vvv32):sat", (V6_vaddhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddhw_altAlias : InstAlias<"$Vdd32=vaddh($Vu32,$Vv32)", (V6_vaddhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddhw_alt_128BAlias : InstAlias<"$Vdd32=vaddh($Vu32,$Vv32)", (V6_vaddhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubh_altAlias : InstAlias<"$Vdd32=vaddub($Vu32,$Vv32)", (V6_vaddubh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubh_alt_128BAlias : InstAlias<"$Vdd32=vaddub($Vu32,$Vv32)", (V6_vaddubh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_altAlias : InstAlias<"$Vd32=vaddub($Vu32,$Vv32):sat", (V6_vaddubsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_alt_128BAlias : InstAlias<"$Vd32=vaddub($Vu32,$Vv32):sat", (V6_vaddubsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_dv_altAlias : InstAlias<"$Vdd32=vaddub($Vuu32,$Vvv32):sat", (V6_vaddubsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddubsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddub($Vuu32,$Vvv32):sat", (V6_vaddubsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_altAlias : InstAlias<"$Vd32=vadduh($Vu32,$Vv32):sat", (V6_vadduhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_alt_128BAlias : InstAlias<"$Vd32=vadduh($Vu32,$Vv32):sat", (V6_vadduhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_dv_altAlias : InstAlias<"$Vdd32=vadduh($Vuu32,$Vvv32):sat", (V6_vadduhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vadduhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vadduh($Vuu32,$Vvv32):sat", (V6_vadduhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vadduhw_altAlias : InstAlias<"$Vdd32=vadduh($Vu32,$Vv32)", (V6_vadduhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vadduhw_alt_128BAlias : InstAlias<"$Vdd32=vadduh($Vu32,$Vv32)", (V6_vadduhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddw_altAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32)", (V6_vaddw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddw_alt_128BAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32)", (V6_vaddw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddw_dv_altAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32)", (V6_vaddw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddw_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32)", (V6_vaddw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwnq_alt_128BAlias : InstAlias<"if (!$Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwq_alt_128BAlias : InstAlias<"if ($Qv4.w) $Vx32.w+=$Vu32.w", (V6_vaddwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_altAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32):sat", (V6_vaddwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_alt_128BAlias : InstAlias<"$Vd32=vaddw($Vu32,$Vv32):sat", (V6_vaddwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_dv_altAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32):sat", (V6_vaddwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vaddwsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vaddw($Vuu32,$Vvv32):sat", (V6_vaddwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vandqrt_acc_altAlias : InstAlias<"$Vx32.ub|=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt_acc VectorRegs:$Vx32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandqrt_acc_alt_128BAlias : InstAlias<"$Vx32.ub|=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt_acc VectorRegs:$Vx32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandqrt_altAlias : InstAlias<"$Vd32.ub=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt VectorRegs:$Vd32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandqrt_alt_128BAlias : InstAlias<"$Vd32.ub=vand($Qu4.ub,$Rt32.ub)", (V6_vandqrt VectorRegs:$Vd32, VecPredRegs:$Qu4, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_acc_altAlias : InstAlias<"$Qx4.ub|=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt_acc VecPredRegs:$Qx4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_acc_alt_128BAlias : InstAlias<"$Qx4.ub|=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt_acc VecPredRegs:$Qx4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_altAlias : InstAlias<"$Qd4.ub=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt VecPredRegs:$Qd4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vandvrt_alt_128BAlias : InstAlias<"$Qd4.ub=vand($Vu32.ub,$Rt32.ub)", (V6_vandvrt VecPredRegs:$Qd4, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslh_altAlias : InstAlias<"$Vd32=vaslh($Vu32,$Rt32)", (V6_vaslh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslh_alt_128BAlias : InstAlias<"$Vd32=vaslh($Vu32,$Rt32)", (V6_vaslh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslhv_altAlias : InstAlias<"$Vd32=vaslh($Vu32,$Vv32)", (V6_vaslhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaslhv_alt_128BAlias : InstAlias<"$Vd32=vaslh($Vu32,$Vv32)", (V6_vaslhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaslw_acc_altAlias : InstAlias<"$Vx32+=vaslw($Vu32,$Rt32)", (V6_vaslw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslw_acc_alt_128BAlias : InstAlias<"$Vx32+=vaslw($Vu32,$Rt32)", (V6_vaslw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslw_altAlias : InstAlias<"$Vd32=vaslw($Vu32,$Rt32)", (V6_vaslw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslw_alt_128BAlias : InstAlias<"$Vd32=vaslw($Vu32,$Rt32)", (V6_vaslw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vaslwv_altAlias : InstAlias<"$Vd32=vaslw($Vu32,$Vv32)", (V6_vaslwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vaslwv_alt_128BAlias : InstAlias<"$Vd32=vaslw($Vu32,$Vv32)", (V6_vaslwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrh_altAlias : InstAlias<"$Vd32=vasrh($Vu32,$Rt32)", (V6_vasrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrh_alt_128BAlias : InstAlias<"$Vd32=vasrh($Vu32,$Rt32)", (V6_vasrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrhbrndsat_altAlias : InstAlias<"$Vd32=vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhbrndsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrhubrndsat_altAlias : InstAlias<"$Vd32=vasrhub($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrhubrndsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrhubsat_altAlias : InstAlias<"$Vd32=vasrhub($Vu32,$Vv32,$Rt8):sat", (V6_vasrhubsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrhv_altAlias : InstAlias<"$Vd32=vasrh($Vu32,$Vv32)", (V6_vasrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrhv_alt_128BAlias : InstAlias<"$Vd32=vasrh($Vu32,$Vv32)", (V6_vasrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrw_acc_altAlias : InstAlias<"$Vx32+=vasrw($Vu32,$Rt32)", (V6_vasrw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrw_acc_alt_128BAlias : InstAlias<"$Vx32+=vasrw($Vu32,$Rt32)", (V6_vasrw_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrw_altAlias : InstAlias<"$Vd32=vasrw($Vu32,$Rt32)", (V6_vasrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrw_alt_128BAlias : InstAlias<"$Vd32=vasrw($Vu32,$Rt32)", (V6_vasrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vasrwh_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8)", (V6_vasrwhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwhrndsat_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8):rnd:sat", (V6_vasrwhrndsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwhsat_altAlias : InstAlias<"$Vd32=vasrwh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwuhsat_altAlias : InstAlias<"$Vd32=vasrwuh($Vu32,$Vv32,$Rt8):sat", (V6_vasrwuhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32, IntRegsLow8:$Rt8)>;
+def V6_vasrwv_altAlias : InstAlias<"$Vd32=vasrw($Vu32,$Vv32)", (V6_vasrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vasrwv_alt_128BAlias : InstAlias<"$Vd32=vasrw($Vu32,$Vv32)", (V6_vasrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgh_altAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32)", (V6_vavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgh_alt_128BAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32)", (V6_vavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavghrnd_altAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32):rnd", (V6_vavghrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavghrnd_alt_128BAlias : InstAlias<"$Vd32=vavgh($Vu32,$Vv32):rnd", (V6_vavghrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgub_altAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32)", (V6_vavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgub_alt_128BAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32)", (V6_vavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgubrnd_altAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32):rnd", (V6_vavgubrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgubrnd_alt_128BAlias : InstAlias<"$Vd32=vavgub($Vu32,$Vv32):rnd", (V6_vavgubrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguh_altAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32)", (V6_vavguh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguh_alt_128BAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32)", (V6_vavguh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguhrnd_altAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32):rnd", (V6_vavguhrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavguhrnd_alt_128BAlias : InstAlias<"$Vd32=vavguh($Vu32,$Vv32):rnd", (V6_vavguhrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgw_altAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32)", (V6_vavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgw_alt_128BAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32)", (V6_vavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgwrnd_altAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32):rnd", (V6_vavgwrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vavgwrnd_alt_128BAlias : InstAlias<"$Vd32=vavgw($Vu32,$Vv32):rnd", (V6_vavgwrnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vcl0h_altAlias : InstAlias<"$Vd32=vcl0h($Vu32)", (V6_vcl0h VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vcl0h_alt_128BAlias : InstAlias<"$Vd32=vcl0h($Vu32)", (V6_vcl0h VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vcl0w_altAlias : InstAlias<"$Vd32=vcl0w($Vu32)", (V6_vcl0w VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vcl0w_alt_128BAlias : InstAlias<"$Vd32=vcl0w($Vu32)", (V6_vcl0w VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vd0Alias : InstAlias<"$Vd32=#0", (V6_vxor VectorRegs:$Vd32, VectorRegs:$Vd32, VectorRegs:$Vd32)>, Requires<[UseHVX]>;
+def V6_vd0_128BAlias : InstAlias<"$Vd32=#0", (V6_vxor VectorRegs:$Vd32, VectorRegs:$Vd32, VectorRegs:$Vd32)>, Requires<[UseHVX]>;
+def V6_vdd0Alias : InstAlias<"$Vdd32=#0", (V6_vsubw_dv VecDblRegs:$Vdd32, W15, W15)>, Requires<[UseHVX]>;
+def V6_vdd0_128BAlias : InstAlias<"$Vdd32=#0", (V6_vsubw_dv VecDblRegs:$Vdd32, W15, W15)>, Requires<[UseHVX]>;
+def V6_vdealb4w_altAlias : InstAlias<"$Vd32=vdealb4w($Vu32,$Vv32)", (V6_vdealb4w VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdealb4w_alt_128BAlias : InstAlias<"$Vd32=vdealb4w($Vu32,$Vv32)", (V6_vdealb4w VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdealb_altAlias : InstAlias<"$Vd32=vdealb($Vu32)", (V6_vdealb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdealb_alt_128BAlias : InstAlias<"$Vd32=vdealb($Vu32)", (V6_vdealb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdealh_altAlias : InstAlias<"$Vd32=vdealh($Vu32)", (V6_vdealh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdealh_alt_128BAlias : InstAlias<"$Vd32=vdealh($Vu32)", (V6_vdealh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_acc_altAlias : InstAlias<"$Vx32+=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_altAlias : InstAlias<"$Vd32=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_alt_128BAlias : InstAlias<"$Vd32=vdmpybus($Vu32,$Rt32)", (V6_vdmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_acc_altAlias : InstAlias<"$Vxx32+=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_altAlias : InstAlias<"$Vdd32=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpybus_dv_alt_128BAlias : InstAlias<"$Vdd32=vdmpybus($Vuu32,$Rt32)", (V6_vdmpybus_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_acc_altAlias : InstAlias<"$Vx32+=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_altAlias : InstAlias<"$Vd32=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_alt_128BAlias : InstAlias<"$Vd32=vdmpyhb($Vu32,$Rt32)", (V6_vdmpyhb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_acc_altAlias : InstAlias<"$Vxx32+=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_altAlias : InstAlias<"$Vdd32=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhb_dv_alt_128BAlias : InstAlias<"$Vdd32=vdmpyhb($Vuu32,$Rt32)", (V6_vdmpyhb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_altAlias : InstAlias<"$Vd32=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhisat_alt_128BAlias : InstAlias<"$Vd32=vdmpyh($Vuu32,$Rt32):sat", (V6_vdmpyhisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_altAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsat_alt_128BAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Rt32):sat", (V6_vdmpyhsat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_acc_altAlias : InstAlias<"$Vx32+=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat_acc VectorRegs:$Vx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_altAlias : InstAlias<"$Vd32=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsuisat_alt_128BAlias : InstAlias<"$Vd32=vdmpyhsu($Vuu32,$Rt32,#1):sat", (V6_vdmpyhsuisat VectorRegs:$Vd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_acc_altAlias : InstAlias<"$Vx32+=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_altAlias : InstAlias<"$Vd32=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhsusat_alt_128BAlias : InstAlias<"$Vd32=vdmpyhsu($Vu32,$Rt32):sat", (V6_vdmpyhsusat VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_acc_altAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_acc_alt_128BAlias : InstAlias<"$Vx32+=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_altAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdmpyhvsat_alt_128BAlias : InstAlias<"$Vd32=vdmpyh($Vu32,$Vv32):sat", (V6_vdmpyhvsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_acc_altAlias : InstAlias<"$Vxx32+=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_acc_alt_128BAlias : InstAlias<"$Vxx32+=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_altAlias : InstAlias<"$Vdd32=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vdsaduh_alt_128BAlias : InstAlias<"$Vdd32=vdsaduh($Vuu32,$Rt32)", (V6_vdsaduh VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrh_altAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Rt32)", (V6_vlsrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrh_alt_128BAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Rt32)", (V6_vlsrh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrhv_altAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Vv32)", (V6_vlsrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vlsrhv_alt_128BAlias : InstAlias<"$Vd32=vlsrh($Vu32,$Vv32)", (V6_vlsrhv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vlsrw_altAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Rt32)", (V6_vlsrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrw_alt_128BAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Rt32)", (V6_vlsrw VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vlsrwv_altAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Vv32)", (V6_vlsrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vlsrwv_alt_128BAlias : InstAlias<"$Vd32=vlsrw($Vu32,$Vv32)", (V6_vlsrwv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxh_altAlias : InstAlias<"$Vd32=vmaxh($Vu32,$Vv32)", (V6_vmaxh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxh_alt_128BAlias : InstAlias<"$Vd32=vmaxh($Vu32,$Vv32)", (V6_vmaxh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxub_altAlias : InstAlias<"$Vd32=vmaxub($Vu32,$Vv32)", (V6_vmaxub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxub_alt_128BAlias : InstAlias<"$Vd32=vmaxub($Vu32,$Vv32)", (V6_vmaxub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxuh_altAlias : InstAlias<"$Vd32=vmaxuh($Vu32,$Vv32)", (V6_vmaxuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxuh_alt_128BAlias : InstAlias<"$Vd32=vmaxuh($Vu32,$Vv32)", (V6_vmaxuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxw_altAlias : InstAlias<"$Vd32=vmaxw($Vu32,$Vv32)", (V6_vmaxw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmaxw_alt_128BAlias : InstAlias<"$Vd32=vmaxw($Vu32,$Vv32)", (V6_vmaxw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminh_altAlias : InstAlias<"$Vd32=vminh($Vu32,$Vv32)", (V6_vminh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminh_alt_128BAlias : InstAlias<"$Vd32=vminh($Vu32,$Vv32)", (V6_vminh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminub_altAlias : InstAlias<"$Vd32=vminub($Vu32,$Vv32)", (V6_vminub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminub_alt_128BAlias : InstAlias<"$Vd32=vminub($Vu32,$Vv32)", (V6_vminub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminuh_altAlias : InstAlias<"$Vd32=vminuh($Vu32,$Vv32)", (V6_vminuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminuh_alt_128BAlias : InstAlias<"$Vd32=vminuh($Vu32,$Vv32)", (V6_vminuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminw_altAlias : InstAlias<"$Vd32=vminw($Vu32,$Vv32)", (V6_vminw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vminw_alt_128BAlias : InstAlias<"$Vd32=vminw($Vu32,$Vv32)", (V6_vminw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpabus_acc_altAlias : InstAlias<"$Vxx32+=vmpabus($Vuu32,$Rt32)", (V6_vmpabus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpabus($Vuu32,$Rt32)", (V6_vmpabus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabus_altAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Rt32)", (V6_vmpabus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabus_alt_128BAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Rt32)", (V6_vmpabus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpabusv_altAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Vvv32)", (V6_vmpabusv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpabusv_alt_128BAlias : InstAlias<"$Vdd32=vmpabus($Vuu32,$Vvv32)", (V6_vmpabusv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpabuuv_altAlias : InstAlias<"$Vdd32=vmpabuu($Vuu32,$Vvv32)", (V6_vmpabuuv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpabuuv_alt_128BAlias : InstAlias<"$Vdd32=vmpabuu($Vuu32,$Vvv32)", (V6_vmpabuuv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vmpahb_acc_altAlias : InstAlias<"$Vxx32+=vmpahb($Vuu32,$Rt32)", (V6_vmpahb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpahb_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpahb($Vuu32,$Rt32)", (V6_vmpahb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpahb_altAlias : InstAlias<"$Vdd32=vmpahb($Vuu32,$Rt32)", (V6_vmpahb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpahb_alt_128BAlias : InstAlias<"$Vdd32=vmpahb($Vuu32,$Rt32)", (V6_vmpahb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_acc_altAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Rt32)", (V6_vmpybus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Rt32)", (V6_vmpybus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_altAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Rt32)", (V6_vmpybus VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybus_alt_128BAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Rt32)", (V6_vmpybus VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_acc_altAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Vv32)", (V6_vmpybusv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpybus($Vu32,$Vv32)", (V6_vmpybusv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_altAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Vv32)", (V6_vmpybusv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybusv_alt_128BAlias : InstAlias<"$Vdd32=vmpybus($Vu32,$Vv32)", (V6_vmpybusv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_acc_altAlias : InstAlias<"$Vxx32+=vmpyb($Vu32,$Vv32)", (V6_vmpybv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyb($Vu32,$Vv32)", (V6_vmpybv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_altAlias : InstAlias<"$Vdd32=vmpyb($Vu32,$Vv32)", (V6_vmpybv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpybv_alt_128BAlias : InstAlias<"$Vdd32=vmpyb($Vu32,$Vv32)", (V6_vmpybv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyewuh_altAlias : InstAlias<"$Vd32=vmpyewuh($Vu32,$Vv32)", (V6_vmpyewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyewuh_alt_128BAlias : InstAlias<"$Vd32=vmpyewuh($Vu32,$Vv32)", (V6_vmpyewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyh_altAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Rt32)", (V6_vmpyh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyh_alt_128BAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Rt32)", (V6_vmpyh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsat_acc_altAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Rt32):sat", (V6_vmpyhsat_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsat_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Rt32):sat", (V6_vmpyhsat_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsrs_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:rnd:sat", (V6_vmpyhsrs VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhsrs_alt_128BAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:rnd:sat", (V6_vmpyhsrs VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhss_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:sat", (V6_vmpyhss VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhss_alt_128BAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Rt32):<<1:sat", (V6_vmpyhss VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_acc_altAlias : InstAlias<"$Vxx32+=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_altAlias : InstAlias<"$Vdd32=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhus_alt_128BAlias : InstAlias<"$Vdd32=vmpyhus($Vu32,$Vv32)", (V6_vmpyhus VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_acc_altAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Vv32)", (V6_vmpyhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyh($Vu32,$Vv32)", (V6_vmpyhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_altAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Vv32)", (V6_vmpyhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhv_alt_128BAlias : InstAlias<"$Vdd32=vmpyh($Vu32,$Vv32)", (V6_vmpyhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhvsrs_altAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyhvsrs VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyhvsrs_alt_128BAlias : InstAlias<"$Vd32=vmpyh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyhvsrs VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewh_acc_altAlias : InstAlias<"$Vx32+=vmpyiewh($Vu32,$Vv32)", (V6_vmpyiewh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewh_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiewh($Vu32,$Vv32)", (V6_vmpyiewh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_acc_altAlias : InstAlias<"$Vx32+=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_altAlias : InstAlias<"$Vd32=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiewuh_alt_128BAlias : InstAlias<"$Vd32=vmpyiewuh($Vu32,$Vv32)", (V6_vmpyiewuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_acc_altAlias : InstAlias<"$Vx32+=vmpyih($Vu32,$Vv32)", (V6_vmpyih_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyih($Vu32,$Vv32)", (V6_vmpyih_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_altAlias : InstAlias<"$Vd32=vmpyih($Vu32,$Vv32)", (V6_vmpyih VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyih_alt_128BAlias : InstAlias<"$Vd32=vmpyih($Vu32,$Vv32)", (V6_vmpyih VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_acc_altAlias : InstAlias<"$Vx32+=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_altAlias : InstAlias<"$Vd32=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyihb_alt_128BAlias : InstAlias<"$Vd32=vmpyihb($Vu32,$Rt32)", (V6_vmpyihb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiowh_altAlias : InstAlias<"$Vd32=vmpyiowh($Vu32,$Vv32)", (V6_vmpyiowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiowh_alt_128BAlias : InstAlias<"$Vd32=vmpyiowh($Vu32,$Vv32)", (V6_vmpyiowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_acc_altAlias : InstAlias<"$Vx32+=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_altAlias : InstAlias<"$Vd32=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwb_alt_128BAlias : InstAlias<"$Vd32=vmpyiwb($Vu32,$Rt32)", (V6_vmpyiwb VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_acc_altAlias : InstAlias<"$Vx32+=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_acc_alt_128BAlias : InstAlias<"$Vx32+=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_altAlias : InstAlias<"$Vd32=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyiwh_alt_128BAlias : InstAlias<"$Vd32=vmpyiwh($Vu32,$Rt32)", (V6_vmpyiwh VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_altAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:sat", (V6_vmpyowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_alt_128BAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:sat", (V6_vmpyowh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_rnd_altAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyowh_rnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyowh_rnd_alt_128BAlias : InstAlias<"$Vd32=vmpyowh($Vu32,$Vv32):<<1:rnd:sat", (V6_vmpyowh_rnd VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyub_acc_altAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Rt32)", (V6_vmpyub_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyub_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Rt32)", (V6_vmpyub_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyub_altAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Rt32)", (V6_vmpyub VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyub_alt_128BAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Rt32)", (V6_vmpyub VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_acc_altAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Vv32)", (V6_vmpyubv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyub($Vu32,$Vv32)", (V6_vmpyubv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_altAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Vv32)", (V6_vmpyubv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyubv_alt_128BAlias : InstAlias<"$Vdd32=vmpyub($Vu32,$Vv32)", (V6_vmpyubv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_acc_altAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_altAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuh_alt_128BAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Rt32)", (V6_vmpyuh VecDblRegs:$Vdd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_acc_altAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_acc_alt_128BAlias : InstAlias<"$Vxx32+=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv_acc VecDblRegs:$Vxx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_altAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vmpyuhv_alt_128BAlias : InstAlias<"$Vdd32=vmpyuh($Vu32,$Vv32)", (V6_vmpyuhv VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgh_altAlias : InstAlias<"$Vd32=vnavgh($Vu32,$Vv32)", (V6_vnavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgh_alt_128BAlias : InstAlias<"$Vd32=vnavgh($Vu32,$Vv32)", (V6_vnavgh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgub_altAlias : InstAlias<"$Vd32=vnavgub($Vu32,$Vv32)", (V6_vnavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgub_alt_128BAlias : InstAlias<"$Vd32=vnavgub($Vu32,$Vv32)", (V6_vnavgub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgw_altAlias : InstAlias<"$Vd32=vnavgw($Vu32,$Vv32)", (V6_vnavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnavgw_alt_128BAlias : InstAlias<"$Vd32=vnavgw($Vu32,$Vv32)", (V6_vnavgw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vnormamth_altAlias : InstAlias<"$Vd32=vnormamth($Vu32)", (V6_vnormamth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vnormamth_alt_128BAlias : InstAlias<"$Vd32=vnormamth($Vu32)", (V6_vnormamth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vnormamtw_altAlias : InstAlias<"$Vd32=vnormamtw($Vu32)", (V6_vnormamtw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vnormamtw_alt_128BAlias : InstAlias<"$Vd32=vnormamtw($Vu32)", (V6_vnormamtw VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vpackeb_altAlias : InstAlias<"$Vd32=vpackeb($Vu32,$Vv32)", (V6_vpackeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackeb_alt_128BAlias : InstAlias<"$Vd32=vpackeb($Vu32,$Vv32)", (V6_vpackeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackeh_altAlias : InstAlias<"$Vd32=vpackeh($Vu32,$Vv32)", (V6_vpackeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackeh_alt_128BAlias : InstAlias<"$Vd32=vpackeh($Vu32,$Vv32)", (V6_vpackeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhb_sat_altAlias : InstAlias<"$Vd32=vpackhb($Vu32,$Vv32):sat", (V6_vpackhb_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhb_sat_alt_128BAlias : InstAlias<"$Vd32=vpackhb($Vu32,$Vv32):sat", (V6_vpackhb_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhub_sat_altAlias : InstAlias<"$Vd32=vpackhub($Vu32,$Vv32):sat", (V6_vpackhub_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackhub_sat_alt_128BAlias : InstAlias<"$Vd32=vpackhub($Vu32,$Vv32):sat", (V6_vpackhub_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackob_altAlias : InstAlias<"$Vd32=vpackob($Vu32,$Vv32)", (V6_vpackob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackob_alt_128BAlias : InstAlias<"$Vd32=vpackob($Vu32,$Vv32)", (V6_vpackob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackoh_altAlias : InstAlias<"$Vd32=vpackoh($Vu32,$Vv32)", (V6_vpackoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackoh_alt_128BAlias : InstAlias<"$Vd32=vpackoh($Vu32,$Vv32)", (V6_vpackoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwh_sat_altAlias : InstAlias<"$Vd32=vpackwh($Vu32,$Vv32):sat", (V6_vpackwh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwh_sat_alt_128BAlias : InstAlias<"$Vd32=vpackwh($Vu32,$Vv32):sat", (V6_vpackwh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwuh_sat_altAlias : InstAlias<"$Vd32=vpackwuh($Vu32,$Vv32):sat", (V6_vpackwuh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpackwuh_sat_alt_128BAlias : InstAlias<"$Vd32=vpackwuh($Vu32,$Vv32):sat", (V6_vpackwuh_sat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vpopcounth_altAlias : InstAlias<"$Vd32=vpopcounth($Vu32)", (V6_vpopcounth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vpopcounth_alt_128BAlias : InstAlias<"$Vd32=vpopcounth($Vu32)", (V6_vpopcounth VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_acc_altAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_altAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybus_alt_128BAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Rt32)", (V6_vrmpybus VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_acc_altAlias : InstAlias<"$Vxx32+=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_acc_alt_128BAlias : InstAlias<"$Vxx32+=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_altAlias : InstAlias<"$Vdd32=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusi_alt_128BAlias : InstAlias<"$Vdd32=vrmpybus($Vuu32,$Rt32,#$Ii)", (V6_vrmpybusi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_acc_altAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_altAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybusv_alt_128BAlias : InstAlias<"$Vd32=vrmpybus($Vu32,$Vv32)", (V6_vrmpybusv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_acc_altAlias : InstAlias<"$Vx32+=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_altAlias : InstAlias<"$Vd32=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpybv_alt_128BAlias : InstAlias<"$Vd32=vrmpyb($Vu32,$Vv32)", (V6_vrmpybv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_acc_altAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub_acc VectorRegs:$Vx32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_altAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyub_alt_128BAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Rt32)", (V6_vrmpyub VectorRegs:$Vd32, VectorRegs:$Vu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_acc_altAlias : InstAlias<"$Vxx32+=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_acc_alt_128BAlias : InstAlias<"$Vxx32+=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_altAlias : InstAlias<"$Vdd32=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubi_alt_128BAlias : InstAlias<"$Vdd32=vrmpyub($Vuu32,$Rt32,#$Ii)", (V6_vrmpyubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_acc_altAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_acc_alt_128BAlias : InstAlias<"$Vx32+=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv_acc VectorRegs:$Vx32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_altAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrmpyubv_alt_128BAlias : InstAlias<"$Vd32=vrmpyub($Vu32,$Vv32)", (V6_vrmpyubv VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhb_altAlias : InstAlias<"$Vd32=vroundhb($Vu32,$Vv32):sat", (V6_vroundhb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhb_alt_128BAlias : InstAlias<"$Vd32=vroundhb($Vu32,$Vv32):sat", (V6_vroundhb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhub_altAlias : InstAlias<"$Vd32=vroundhub($Vu32,$Vv32):sat", (V6_vroundhub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundhub_alt_128BAlias : InstAlias<"$Vd32=vroundhub($Vu32,$Vv32):sat", (V6_vroundhub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwh_altAlias : InstAlias<"$Vd32=vroundwh($Vu32,$Vv32):sat", (V6_vroundwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwh_alt_128BAlias : InstAlias<"$Vd32=vroundwh($Vu32,$Vv32):sat", (V6_vroundwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwuh_altAlias : InstAlias<"$Vd32=vroundwuh($Vu32,$Vv32):sat", (V6_vroundwuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vroundwuh_alt_128BAlias : InstAlias<"$Vd32=vroundwuh($Vu32,$Vv32):sat", (V6_vroundwuh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vrsadubi_acc_altAlias : InstAlias<"$Vxx32+=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrsadubi_acc_alt_128BAlias : InstAlias<"$Vxx32+=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrsadubi_altAlias : InstAlias<"$Vdd32=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vrsadubi_alt_128BAlias : InstAlias<"$Vdd32=vrsadub($Vuu32,$Rt32,#$Ii)", (V6_vrsadubi VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32, u1_0Imm:$Ii)>, Requires<[UseHVX]>;
+def V6_vsathub_altAlias : InstAlias<"$Vd32=vsathub($Vu32,$Vv32)", (V6_vsathub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsathub_alt_128BAlias : InstAlias<"$Vd32=vsathub($Vu32,$Vv32)", (V6_vsathub VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsatwh_altAlias : InstAlias<"$Vd32=vsatwh($Vu32,$Vv32)", (V6_vsatwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsatwh_alt_128BAlias : InstAlias<"$Vd32=vsatwh($Vu32,$Vv32)", (V6_vsatwh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsb_altAlias : InstAlias<"$Vdd32=vsxtb($Vu32)", (V6_vsb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsb_alt_128BAlias : InstAlias<"$Vdd32=vsxtb($Vu32)", (V6_vsb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsh_altAlias : InstAlias<"$Vdd32=vsxth($Vu32)", (V6_vsh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsh_alt_128BAlias : InstAlias<"$Vdd32=vsxth($Vu32)", (V6_vsh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshufeh_altAlias : InstAlias<"$Vd32=vshuffeh($Vu32,$Vv32)", (V6_vshufeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufeh_alt_128BAlias : InstAlias<"$Vd32=vshuffeh($Vu32,$Vv32)", (V6_vshufeh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffb_altAlias : InstAlias<"$Vd32=vshuffb($Vu32)", (V6_vshuffb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffb_alt_128BAlias : InstAlias<"$Vd32=vshuffb($Vu32)", (V6_vshuffb VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffeb_altAlias : InstAlias<"$Vd32=vshuffeb($Vu32,$Vv32)", (V6_vshuffeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffeb_alt_128BAlias : InstAlias<"$Vd32=vshuffeb($Vu32,$Vv32)", (V6_vshuffeb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffh_altAlias : InstAlias<"$Vd32=vshuffh($Vu32)", (V6_vshuffh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffh_alt_128BAlias : InstAlias<"$Vd32=vshuffh($Vu32)", (V6_vshuffh VectorRegs:$Vd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vshuffob_altAlias : InstAlias<"$Vd32=vshuffob($Vu32,$Vv32)", (V6_vshuffob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshuffob_alt_128BAlias : InstAlias<"$Vd32=vshuffob($Vu32,$Vv32)", (V6_vshuffob VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeb_altAlias : InstAlias<"$Vdd32=vshuffoeb($Vu32,$Vv32)", (V6_vshufoeb VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeb_alt_128BAlias : InstAlias<"$Vdd32=vshuffoeb($Vu32,$Vv32)", (V6_vshufoeb VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeh_altAlias : InstAlias<"$Vdd32=vshuffoeh($Vu32,$Vv32)", (V6_vshufoeh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoeh_alt_128BAlias : InstAlias<"$Vdd32=vshuffoeh($Vu32,$Vv32)", (V6_vshufoeh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoh_altAlias : InstAlias<"$Vd32=vshuffoh($Vu32,$Vv32)", (V6_vshufoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vshufoh_alt_128BAlias : InstAlias<"$Vd32=vshuffoh($Vu32,$Vv32)", (V6_vshufoh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubb_altAlias : InstAlias<"$Vd32=vsubb($Vu32,$Vv32)", (V6_vsubb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubb_alt_128BAlias : InstAlias<"$Vd32=vsubb($Vu32,$Vv32)", (V6_vsubb VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubb_dv_altAlias : InstAlias<"$Vdd32=vsubb($Vuu32,$Vvv32)", (V6_vsubb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubb_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubb($Vuu32,$Vvv32)", (V6_vsubb_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubbnq_altAlias : InstAlias<"if (!$Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubbnq_alt_128BAlias : InstAlias<"if (!$Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubbq_altAlias : InstAlias<"if ($Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubbq_alt_128BAlias : InstAlias<"if ($Qv4.b) $Vx32.b-=$Vu32.b", (V6_vsubbq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubh_altAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32)", (V6_vsubh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubh_alt_128BAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32)", (V6_vsubh VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubh_dv_altAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32)", (V6_vsubh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubh_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32)", (V6_vsubh_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubhnq_altAlias : InstAlias<"if (!$Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhnq_alt_128BAlias : InstAlias<"if (!$Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhq_altAlias : InstAlias<"if ($Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhq_alt_128BAlias : InstAlias<"if ($Qv4.h) $Vx32.h-=$Vu32.h", (V6_vsubhq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_altAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32):sat", (V6_vsubhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_alt_128BAlias : InstAlias<"$Vd32=vsubh($Vu32,$Vv32):sat", (V6_vsubhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_dv_altAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32):sat", (V6_vsubhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubh($Vuu32,$Vvv32):sat", (V6_vsubhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubhw_altAlias : InstAlias<"$Vdd32=vsubh($Vu32,$Vv32)", (V6_vsubhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubhw_alt_128BAlias : InstAlias<"$Vdd32=vsubh($Vu32,$Vv32)", (V6_vsubhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububh_altAlias : InstAlias<"$Vdd32=vsubub($Vu32,$Vv32)", (V6_vsububh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububh_alt_128BAlias : InstAlias<"$Vdd32=vsubub($Vu32,$Vv32)", (V6_vsububh VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_altAlias : InstAlias<"$Vd32=vsubub($Vu32,$Vv32):sat", (V6_vsububsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_alt_128BAlias : InstAlias<"$Vd32=vsubub($Vu32,$Vv32):sat", (V6_vsububsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_dv_altAlias : InstAlias<"$Vdd32=vsubub($Vuu32,$Vvv32):sat", (V6_vsububsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsububsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubub($Vuu32,$Vvv32):sat", (V6_vsububsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_altAlias : InstAlias<"$Vd32=vsubuh($Vu32,$Vv32):sat", (V6_vsubuhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_alt_128BAlias : InstAlias<"$Vd32=vsubuh($Vu32,$Vv32):sat", (V6_vsubuhsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_dv_altAlias : InstAlias<"$Vdd32=vsubuh($Vuu32,$Vvv32):sat", (V6_vsubuhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubuhsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubuh($Vuu32,$Vvv32):sat", (V6_vsubuhsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubuhw_altAlias : InstAlias<"$Vdd32=vsubuh($Vu32,$Vv32)", (V6_vsubuhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubuhw_alt_128BAlias : InstAlias<"$Vdd32=vsubuh($Vu32,$Vv32)", (V6_vsubuhw VecDblRegs:$Vdd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubw_altAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32)", (V6_vsubw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubw_alt_128BAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32)", (V6_vsubw VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubw_dv_altAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32)", (V6_vsubw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubw_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32)", (V6_vsubw_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubwnq_altAlias : InstAlias<"if (!$Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwnq_alt_128BAlias : InstAlias<"if (!$Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwnq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwq_altAlias : InstAlias<"if ($Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwq_alt_128BAlias : InstAlias<"if ($Qv4.w) $Vx32.w-=$Vu32.w", (V6_vsubwq VectorRegs:$Vx32, VecPredRegs:$Qv4, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_altAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32):sat", (V6_vsubwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_alt_128BAlias : InstAlias<"$Vd32=vsubw($Vu32,$Vv32):sat", (V6_vsubwsat VectorRegs:$Vd32, VectorRegs:$Vu32, VectorRegs:$Vv32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_dv_altAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32):sat", (V6_vsubwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vsubwsat_dv_alt_128BAlias : InstAlias<"$Vdd32=vsubw($Vuu32,$Vvv32):sat", (V6_vsubwsat_dv VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, VecDblRegs:$Vvv32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_acc_altAlias : InstAlias<"$Vxx32+=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_acc_alt_128BAlias : InstAlias<"$Vxx32+=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_altAlias : InstAlias<"$Vdd32=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyb_alt_128BAlias : InstAlias<"$Vdd32=vtmpyb($Vuu32,$Rt32)", (V6_vtmpyb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_acc_altAlias : InstAlias<"$Vxx32+=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_acc_alt_128BAlias : InstAlias<"$Vxx32+=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_altAlias : InstAlias<"$Vdd32=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpybus_alt_128BAlias : InstAlias<"$Vdd32=vtmpybus($Vuu32,$Rt32)", (V6_vtmpybus VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_acc_altAlias : InstAlias<"$Vxx32+=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_acc_alt_128BAlias : InstAlias<"$Vxx32+=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb_acc VecDblRegs:$Vxx32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_altAlias : InstAlias<"$Vdd32=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtmpyhb_alt_128BAlias : InstAlias<"$Vdd32=vtmpyhb($Vuu32,$Rt32)", (V6_vtmpyhb VecDblRegs:$Vdd32, VecDblRegs:$Vuu32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtran2x2_mapAlias : InstAlias<"vtrans2x2($Vy32,$Vx32,$Rt32)", (V6_vshuff VectorRegs:$Vy32, VectorRegs:$Vx32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vtran2x2_map_128BAlias : InstAlias<"vtrans2x2($Vy32,$Vx32,$Rt32)", (V6_vshuff VectorRegs:$Vy32, VectorRegs:$Vx32, IntRegs:$Rt32)>, Requires<[UseHVX]>;
+def V6_vunpackb_altAlias : InstAlias<"$Vdd32=vunpackb($Vu32)", (V6_vunpackb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackb_alt_128BAlias : InstAlias<"$Vdd32=vunpackb($Vu32)", (V6_vunpackb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackh_altAlias : InstAlias<"$Vdd32=vunpackh($Vu32)", (V6_vunpackh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackh_alt_128BAlias : InstAlias<"$Vdd32=vunpackh($Vu32)", (V6_vunpackh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackoh_altAlias : InstAlias<"$Vxx32|=vunpackoh($Vu32)", (V6_vunpackoh VecDblRegs:$Vxx32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackoh_alt_128BAlias : InstAlias<"$Vxx32|=vunpackoh($Vu32)", (V6_vunpackoh VecDblRegs:$Vxx32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackub_altAlias : InstAlias<"$Vdd32=vunpackub($Vu32)", (V6_vunpackub VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackub_alt_128BAlias : InstAlias<"$Vdd32=vunpackub($Vu32)", (V6_vunpackub VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackuh_altAlias : InstAlias<"$Vdd32=vunpackuh($Vu32)", (V6_vunpackuh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vunpackuh_alt_128BAlias : InstAlias<"$Vdd32=vunpackuh($Vu32)", (V6_vunpackuh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzb_altAlias : InstAlias<"$Vdd32=vzxtb($Vu32)", (V6_vzb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzb_alt_128BAlias : InstAlias<"$Vdd32=vzxtb($Vu32)", (V6_vzb VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzh_altAlias : InstAlias<"$Vdd32=vzxth($Vu32)", (V6_vzh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def V6_vzh_alt_128BAlias : InstAlias<"$Vdd32=vzxth($Vu32)", (V6_vzh VecDblRegs:$Vdd32, VectorRegs:$Vu32)>, Requires<[UseHVX]>;
+def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>;
diff --git a/lib/Target/Hexagon/HexagonDepOperands.td b/lib/Target/Hexagon/HexagonDepOperands.td
new file mode 100644
index 000000000000..0e83b2678732
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonDepOperands.td
@@ -0,0 +1,132 @@
+//===--- HexagonDepOperands.td --------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
+def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
+def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
+def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
+def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
+def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
+def s10_6ImmOperand : AsmOperandClass { let Name = "s10_6Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s10_6Imm : Operand<i32> { let ParserMatchClass = s10_6ImmOperand; let DecoderMethod = "s10_6ImmDecoder"; }
+def s10_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 6>(N->getSExtValue());}]>;
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
+def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
+def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
+def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
+def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; }
+def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
+def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; }
+def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
+def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
+def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
+def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
+def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
+def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
+def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
+def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
+def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
+def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
+def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
+def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
+def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
+def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
+def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
+def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; }
+def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
+def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; }
+def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; }
+def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
+def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; }
+def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
+def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
+def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
+def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
+def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
+def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
+def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
+def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
+def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
+def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
+def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
+def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
+def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
+def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
+def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; }
+def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
+def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; }
+def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
+def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
+def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
+def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
+def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
+def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
+def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
+def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
+def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
+def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
+def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
+def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
+def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
+def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
+def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
+def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
+def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
+def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
+def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
+def s10_0ImmOperand : AsmOperandClass { let Name = "s10_0Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s10_0Imm : Operand<i32> { let ParserMatchClass = s10_0ImmOperand; let DecoderMethod = "s10_0ImmDecoder"; }
+def s10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<10, 0>(N->getSExtValue());}]>;
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index a5351cd08da5..67af947e089d 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -105,6 +105,8 @@ namespace {
cl::init(false), cl::desc("Enable branch probability info"));
cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
cl::desc("Size limit in Hexagon early if-conversion"));
+ cl::opt<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false),
+ cl::Hidden, cl::desc("Do not convert branches that may exit the loop"));
struct PrintMB {
PrintMB(const MachineBasicBlock *B) : MB(B) {}
@@ -142,8 +144,8 @@ namespace {
raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
<< ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
- << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
- << PrintMB(P.FP.FalseB)
+ << ", TrueB:" << PrintMB(P.FP.TrueB)
+ << ", FalseB:" << PrintMB(P.FP.FalseB)
<< ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
return OS;
}
@@ -187,7 +189,8 @@ namespace {
bool usesUndefVReg(const MachineInstr *MI) const;
bool isValid(const FlowPattern &FP) const;
unsigned countPredicateDefs(const MachineBasicBlock *B) const;
- unsigned computePhiCost(MachineBasicBlock *B) const;
+ unsigned computePhiCost(const MachineBasicBlock *B,
+ const FlowPattern &FP) const;
bool isProfitable(const FlowPattern &FP) const;
bool isPredicableStore(const MachineInstr *MI) const;
bool isSafeToSpeculate(const MachineInstr *MI) const;
@@ -199,6 +202,9 @@ namespace {
MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
unsigned PredR, bool IfTrue);
+ unsigned buildMux(MachineBasicBlock *B, MachineBasicBlock::iterator At,
+ const TargetRegisterClass *DRC, unsigned PredR, unsigned TR,
+ unsigned TSR, unsigned FR, unsigned FSR);
void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
void convert(const FlowPattern &FP);
@@ -230,7 +236,7 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
return false;
MachineBasicBlock *SB = *B->succ_begin();
MachineLoop *L = MLI->getLoopFor(SB);
- return L && SB == L->getHeader();
+ return L && SB == L->getHeader() && MDT->dominates(B, SB);
}
bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
@@ -264,9 +270,6 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
// mark as diamond with both sides equal?
return false;
}
- // Loop could be null for both.
- if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
- return false;
// Record the true/false blocks in such a way that "true" means "if (PredR)",
// and "false" means "if (!PredR)".
@@ -289,8 +292,14 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
// it has a single successor. In fact, the block has to end either with
// an unconditional branch (which can be predicated), or with a fall-
// through.
- bool TOk = (TNP == 1) && (TNS == 1);
- bool FOk = (FNP == 1) && (FNS == 1);
+ // Also, skip blocks that do not belong to the same loop.
+ bool TOk = (TNP == 1 && TNS == 1 && MLI->getLoopFor(TB) == L);
+ bool FOk = (FNP == 1 && FNS == 1 && MLI->getLoopFor(FB) == L);
+
+ // If requested (via an option), do not consider branches where the
+ // true and false targets do not belong to the same loop.
+ if (SkipExitBranches && MLI->getLoopFor(TB) != MLI->getLoopFor(FB))
+ return false;
// If neither is predicable, there is nothing interesting.
if (!TOk && !FOk)
@@ -307,17 +316,15 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
// Diamond: "if (P) then TB; else FB;".
} else {
// TOk && !FOk
- if (TSB == FB) {
+ if (TSB == FB)
JB = FB;
- FB = nullptr;
- }
+ FB = nullptr;
}
} else {
// !TOk && FOk (at least one must be true by now).
- if (FSB == TB) {
+ if (FSB == TB)
JB = TB;
- TB = nullptr;
- }
+ TB = nullptr;
}
// Don't try to predicate loop preheaders.
if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
@@ -383,8 +390,14 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
unsigned R = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(R))
continue;
- if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
- continue;
+ switch (MRI->getRegClass(R)->getID()) {
+ case Hexagon::PredRegsRegClassID:
+ case Hexagon::VecPredRegsRegClassID:
+ case Hexagon::VecPredRegs128BRegClassID:
+ break;
+ default:
+ continue;
+ }
for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
if (U->getParent()->isPHI())
return false;
@@ -442,24 +455,39 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
return true;
}
-unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
- assert(B->pred_size() <= 2);
+unsigned HexagonEarlyIfConversion::computePhiCost(const MachineBasicBlock *B,
+ const FlowPattern &FP) const {
if (B->pred_size() < 2)
return 0;
unsigned Cost = 0;
- MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
- for (I = B->begin(); I != E; ++I) {
- const MachineOperand &RO1 = I->getOperand(1);
- const MachineOperand &RO3 = I->getOperand(3);
- assert(RO1.isReg() && RO3.isReg());
+ for (const MachineInstr &MI : *B) {
+ if (!MI.isPHI())
+ break;
+ // If both incoming blocks are one of the TrueB/FalseB/SplitB, then
+ // a MUX may be needed. Otherwise the PHI will need to be updated at
+ // no extra cost.
+ // Find the interesting PHI operands for further checks.
+ SmallVector<unsigned,2> Inc;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+ const MachineBasicBlock *BB = MI.getOperand(i+1).getMBB();
+ if (BB == FP.SplitB || BB == FP.TrueB || BB == FP.FalseB)
+ Inc.push_back(i);
+ }
+ assert(Inc.size() <= 2);
+ if (Inc.size() < 2)
+ continue;
+
+ const MachineOperand &RA = MI.getOperand(1);
+ const MachineOperand &RB = MI.getOperand(3);
+ assert(RA.isReg() && RB.isReg());
// Must have a MUX if the phi uses a subregister.
- if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+ if (RA.getSubReg() != 0 || RB.getSubReg() != 0) {
Cost++;
continue;
}
- MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
- MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+ const MachineInstr *Def1 = MRI->getVRegDef(RA.getReg());
+ const MachineInstr *Def3 = MRI->getVRegDef(RB.getReg());
if (!HII->isPredicable(*Def1) || !HII->isPredicable(*Def3))
Cost++;
}
@@ -485,7 +513,6 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
if (FP.TrueB && FP.FalseB) {
-
// Do not IfCovert if the branch is one sided.
if (MBPI) {
BranchProbability Prob(9, 10);
@@ -510,18 +537,16 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
// the code size. If the predicated blocks are smaller than a packet size,
// approximate the spare room in the packet that could be filled with the
// predicated/speculated instructions.
- unsigned TS = 0, FS = 0, Spare = 0;
- if (FP.TrueB) {
- TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
- if (TS < HEXAGON_PACKET_SIZE)
- Spare += HEXAGON_PACKET_SIZE-TS;
- }
- if (FP.FalseB) {
- FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
- if (FS < HEXAGON_PACKET_SIZE)
- Spare += HEXAGON_PACKET_SIZE-TS;
- }
- unsigned TotalIn = TS+FS;
+ auto TotalCount = [] (const MachineBasicBlock *B, unsigned &Spare) {
+ if (!B)
+ return 0u;
+ unsigned T = std::distance(B->begin(), B->getFirstTerminator());
+ if (T < HEXAGON_PACKET_SIZE)
+ Spare += HEXAGON_PACKET_SIZE-T;
+ return T;
+ };
+ unsigned Spare = 0;
+ unsigned TotalIn = TotalCount(FP.TrueB, Spare) + TotalCount(FP.FalseB, Spare);
DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
<< TotalIn << ", spare room: " << Spare << "\n");
if (TotalIn >= SizeLimit+Spare)
@@ -536,17 +561,17 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
unsigned TotalPh = 0;
unsigned PredDefs = countPredicateDefs(FP.SplitB);
if (FP.JoinB) {
- TotalPh = computePhiCost(FP.JoinB);
+ TotalPh = computePhiCost(FP.JoinB, FP);
PredDefs += countPredicateDefs(FP.JoinB);
} else {
if (FP.TrueB && FP.TrueB->succ_size() > 0) {
MachineBasicBlock *SB = *FP.TrueB->succ_begin();
- TotalPh += computePhiCost(SB);
+ TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
}
if (FP.FalseB && FP.FalseB->succ_size() > 0) {
MachineBasicBlock *SB = *FP.FalseB->succ_begin();
- TotalPh += computePhiCost(SB);
+ TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
}
}
@@ -680,12 +705,12 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, HII->get(COpc));
MachineInstr::mop_iterator MOI = MI->operands_begin();
if (HII->isPostIncrement(*MI)) {
- MIB.addOperand(*MOI);
+ MIB.add(*MOI);
++MOI;
}
MIB.addReg(PredR);
for (const MachineOperand &MO : make_range(MOI, MI->operands_end()))
- MIB.addOperand(MO);
+ MIB.add(MO);
// Set memory references.
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -733,6 +758,43 @@ void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
}
}
+unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B,
+ MachineBasicBlock::iterator At, const TargetRegisterClass *DRC,
+ unsigned PredR, unsigned TR, unsigned TSR, unsigned FR, unsigned FSR) {
+ unsigned Opc = 0;
+ switch (DRC->getID()) {
+ case Hexagon::IntRegsRegClassID:
+ Opc = Hexagon::C2_mux;
+ break;
+ case Hexagon::DoubleRegsRegClassID:
+ Opc = Hexagon::PS_pselect;
+ break;
+ case Hexagon::VectorRegsRegClassID:
+ Opc = Hexagon::PS_vselect;
+ break;
+ case Hexagon::VecDblRegsRegClassID:
+ Opc = Hexagon::PS_wselect;
+ break;
+ case Hexagon::VectorRegs128BRegClassID:
+ Opc = Hexagon::PS_vselect_128B;
+ break;
+ case Hexagon::VecDblRegs128BRegClassID:
+ Opc = Hexagon::PS_wselect_128B;
+ break;
+ default:
+ llvm_unreachable("unexpected register type");
+ }
+ const MCInstrDesc &D = HII->get(Opc);
+
+ DebugLoc DL = B->findBranchDebugLoc();
+ unsigned MuxR = MRI->createVirtualRegister(DRC);
+ BuildMI(*B, At, DL, D, MuxR)
+ .addReg(PredR)
+ .addReg(TR, 0, TSR)
+ .addReg(FR, 0, FSR);
+ return MuxR;
+}
+
void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
const FlowPattern &FP) {
// Visit all PHI nodes in the WhereB block and generate MUX instructions
@@ -759,40 +821,25 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
TR = SR, TSR = SSR;
else if (FR == 0)
FR = SR, FSR = SSR;
- assert(TR && FR);
-
- using namespace Hexagon;
-
- unsigned DR = PN->getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI->getRegClass(DR);
- unsigned Opc = 0;
- if (RC == &IntRegsRegClass)
- Opc = C2_mux;
- else if (RC == &DoubleRegsRegClass)
- Opc = PS_pselect;
- else if (RC == &VectorRegsRegClass)
- Opc = PS_vselect;
- else if (RC == &VecDblRegsRegClass)
- Opc = PS_wselect;
- else if (RC == &VectorRegs128BRegClass)
- Opc = PS_vselect_128B;
- else if (RC == &VecDblRegs128BRegClass)
- Opc = PS_wselect_128B;
- else
- llvm_unreachable("unexpected register type");
- const MCInstrDesc &D = HII->get(Opc);
-
- MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
- DebugLoc DL;
- if (MuxAt != FP.SplitB->end())
- DL = MuxAt->getDebugLoc();
- unsigned MuxR = MRI->createVirtualRegister(RC);
- BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
- .addReg(FP.PredR)
- .addReg(TR, 0, TSR)
- .addReg(FR, 0, FSR);
-
- PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+
+ assert(TR || FR);
+ unsigned MuxR = 0, MuxSR = 0;
+
+ if (TR && FR) {
+ unsigned DR = PN->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DR);
+ MuxR = buildMux(FP.SplitB, FP.SplitB->getFirstTerminator(), RC,
+ FP.PredR, TR, TSR, FR, FSR);
+ } else if (TR) {
+ MuxR = TR;
+ MuxSR = TSR;
+ } else {
+ MuxR = FR;
+ MuxSR = FSR;
+ }
+
+ PN->addOperand(MachineOperand::CreateReg(MuxR, false, false, false, false,
+ false, false, MuxSR));
PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
}
}
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 8f070d842b8c..d8ba5dcd35ad 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -362,14 +362,16 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
if (Range.empty())
return;
- auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> bool {
+ // Return two booleans: { def-modifes-reg, def-covers-reg }.
+ auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> {
if (!Op.isReg() || !Op.isDef())
- return false;
+ return { false, false };
unsigned DR = Op.getReg(), DSR = Op.getSubReg();
if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg)
- return false;
+ return { false, false };
LaneBitmask SLM = getLaneMask(DR, DSR);
- return (SLM & LM).any();
+ LaneBitmask A = SLM & LM;
+ return { A.any(), A == SLM };
};
// The splitting step will create pairs of predicated definitions without
@@ -453,20 +455,27 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
// Remove <dead> flags from all defs that are not dead after live range
// extension, and collect all def operands. They will be used to generate
// the necessary implicit uses.
+ // At the same time, add <dead> flag to all defs that are actually dead.
+ // This can happen, for example, when a mux with identical inputs is
+ // replaced with a COPY: the use of the predicate register disappears and
+ // the dead can become dead.
std::set<RegisterRef> DefRegs;
for (auto &Seg : Range) {
if (!Seg.start.isRegister())
continue;
MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
for (auto &Op : DefI->operands()) {
- if (Seg.start.isDead() || !IsRegDef(Op))
- continue;
- DefRegs.insert(Op);
- Op.setIsDead(false);
+ auto P = IsRegDef(Op);
+ if (P.second && Seg.end.isDead()) {
+ Op.setIsDead(true);
+ } else if (P.first) {
+ DefRegs.insert(Op);
+ Op.setIsDead(false);
+ }
}
}
- // Finally, add implicit uses to each predicated def that is reached
+ // Now, add implicit uses to each predicated def that is reached
// by other defs.
for (auto &Seg : Range) {
if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot()))
@@ -486,6 +495,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
for (RegisterRef R : ImpUses)
MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub);
}
+
}
void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
@@ -595,9 +605,9 @@ MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
.addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg());
} else {
MIB = BuildMI(B, At, DL, HII->get(Opc))
- .addReg(DstR, DstState, DstSR)
- .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
- .addOperand(SrcOp);
+ .addReg(DstR, DstState, DstSR)
+ .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
+ .add(SrcOp);
}
DEBUG(dbgs() << "created an initial copy: " << *MIB);
@@ -622,6 +632,12 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
bool ReadUndef = MD.isUndef();
MachineBasicBlock::iterator At = MI;
+ auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void {
+ for (auto &Op : MI.operands())
+ if (Op.isReg())
+ UpdRegs.insert(Op.getReg());
+ };
+
// If this is a mux of the same register, just replace it with COPY.
// Ideally, this would happen earlier, so that register coalescing would
// see it.
@@ -630,6 +646,8 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
if (ST.isReg() && SF.isReg()) {
RegisterRef RT(ST);
if (RT == RegisterRef(SF)) {
+ // Copy regs to update first.
+ updateRegs(MI);
MI.setDesc(HII->get(TargetOpcode::COPY));
unsigned S = getRegState(ST);
while (MI.getNumOperands() > 1)
@@ -651,9 +669,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
LIS->InsertMachineInstrInMaps(*TfrF);
// Will need to recalculate live intervals for all registers in MI.
- for (auto &Op : MI.operands())
- if (Op.isReg())
- UpdRegs.insert(Op.getReg());
+ updateRegs(MI);
removeInstr(MI);
return true;
@@ -828,7 +844,7 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
while (Ox < NP) {
MachineOperand &MO = MI.getOperand(Ox);
if (!MO.isReg() || !MO.isImplicit())
- MB.addOperand(MO);
+ MB.add(MO);
Ox++;
}
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index dfd1f1d4f886..015d3b840e6f 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -190,5 +190,5 @@ void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
MIB = BuildMI(*MBB, MII, DL, TII->get(newOp));
for (unsigned i = 0; i < MII->getNumOperands(); ++i)
- MIB.addOperand(MII->getOperand(i));
+ MIB.add(MII->getOperand(i));
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index a3f6273f9f67..0e2380f4316a 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -301,16 +301,30 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
// the frame creation/destruction instructions.
if (MO.isFI())
return true;
- if (!MO.isReg())
- continue;
- unsigned R = MO.getReg();
- // Virtual registers will need scavenging, which then may require
- // a stack slot.
- if (TargetRegisterInfo::isVirtualRegister(R))
- return true;
- for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
- if (CSR[*S])
+ if (MO.isReg()) {
+ unsigned R = MO.getReg();
+ // Virtual registers will need scavenging, which then may require
+ // a stack slot.
+ if (TargetRegisterInfo::isVirtualRegister(R))
return true;
+ for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
+ if (CSR[*S])
+ return true;
+ continue;
+ }
+ if (MO.isRegMask()) {
+ // A regmask would normally have all callee-saved registers marked
+ // as preserved, so this check would not be needed, but in case of
+ // ever having other regmasks (for other calling conventions),
+ // make sure they would be processed correctly.
+ const uint32_t *BM = MO.getRegMask();
+ for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {
+ unsigned R = x;
+ // If this regmask does not preserve a CSR, a frame will be needed.
+ if (!(BM[R/32] & (1u << (R%32))))
+ return true;
+ }
+ }
}
}
return false;
@@ -1473,8 +1487,7 @@ bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
return false;
unsigned TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR)
- .addOperand(MI->getOperand(1));
+ BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
.addReg(TmpR, RegState::Kill);
@@ -1646,8 +1659,15 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
LivePhysRegs LPR(&HRI);
LPR.addLiveIns(B);
SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
- for (auto R = B.begin(); R != It; ++R)
+ for (auto R = B.begin(); R != It; ++R) {
+ Clobbers.clear();
LPR.stepForward(*R, Clobbers);
+ // Dead defs are recorded in Clobbers, but are not automatically removed
+ // from the live set.
+ for (auto &C : Clobbers)
+ if (C.second->isReg() && C.second->isDead())
+ LPR.removeReg(C.first);
+ }
DebugLoc DL = MI->getDebugLoc();
unsigned SrcR = MI->getOperand(2).getReg();
@@ -1985,9 +2005,9 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
// class HaveRC and a new class NewRC. Return nullptr if a common class
// cannot be found, otherwise return the resulting class. If HaveRC is
// nullptr, assume that it is still unset.
- auto getCommonRC = [&HRI] (const TargetRegisterClass *HaveRC,
- const TargetRegisterClass *NewRC)
- -> const TargetRegisterClass* {
+ auto getCommonRC =
+ [](const TargetRegisterClass *HaveRC,
+ const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
if (HaveRC == nullptr || HaveRC == NewRC)
return NewRC;
// Different classes, both non-null. Pick the more general one.
@@ -2221,7 +2241,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
const DebugLoc &DL = SI.getDebugLoc();
CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
- .addOperand(SrcOp);
+ .add(SrcOp);
}
++StartIt;
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index bb5e379ce014..c99ad5130aef 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -197,13 +197,13 @@ bool HexagonGenExtract::convert(Instruction *In) {
// It is still ok to generate extract, but only if the mask eliminates
// those bits (i.e. M does not have any bits set beyond U).
APInt C = APInt::getHighBitsSet(BW, BW-U);
- if (M.intersects(C) || !APIntOps::isMask(W, M))
+ if (M.intersects(C) || !M.isMask(W))
return false;
} else {
// Check if M starts with a contiguous sequence of W times 1 bits. Get
// the low U bits of M (which eliminates the 0 bits shifted in on the
// left), and check if the result is APInt's "mask":
- if (!APIntOps::isMask(W, M.getLoBits(U)))
+ if (!M.getLoBits(U).isMask(W))
return false;
}
@@ -221,11 +221,8 @@ bool HexagonGenExtract::convert(Instruction *In) {
bool HexagonGenExtract::visitBlock(BasicBlock *B) {
// Depth-first, bottom-up traversal.
- DomTreeNode *DTN = DT->getNode(B);
- typedef GraphTraits<DomTreeNode*> GTN;
- typedef GTN::ChildIteratorType Iter;
- for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
- visitBlock((*I)->getBlock());
+ for (auto *DTN : children<DomTreeNode*>(DT->getNode(B)))
+ visitBlock(DTN->getBlock());
// Allow limiting the number of generated extracts for debugging purposes.
bool HasCutoff = ExtractCutoff.getPosition();
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index 5a8e392d1275..54d99d399f88 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -947,11 +947,8 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
BlockDefs.insert(InsDefs);
}
- MachineDomTreeNode *N = MDT->getNode(B);
- typedef GraphTraits<MachineDomTreeNode*> GTN;
- typedef GTN::ChildIteratorType ChildIter;
- for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
- MachineBasicBlock *SB = (*I)->getBlock();
+ for (auto *DTN : children<MachineDomTreeNode*>(MDT->getNode(B))) {
+ MachineBasicBlock *SB = DTN->getBlock();
collectInBlock(SB, AVs);
}
@@ -1422,9 +1419,9 @@ bool HexagonGenInsert::generateInserts() {
bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
bool Changed = false;
- typedef GraphTraits<MachineDomTreeNode*> GTN;
- for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
- Changed |= removeDeadCode(*I);
+
+ for (auto *DTN : children<MachineDomTreeNode*>(N))
+ Changed |= removeDeadCode(DTN);
MachineBasicBlock *B = N->getBlock();
std::vector<MachineInstr*> Instrs;
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index a718df9c70ab..85222944c77c 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -324,9 +324,9 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
if (!MxOpc)
continue;
BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
- .addReg(MX.PredR)
- .addOperand(*MX.SrcT)
- .addOperand(*MX.SrcF);
+ .addReg(MX.PredR)
+ .add(*MX.SrcT)
+ .add(*MX.SrcF);
B.erase(MX.Def1);
B.erase(MX.Def2);
Changed = true;
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index e477dcc0f64a..86a8089401c2 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -100,6 +100,7 @@ namespace {
MachineRegisterInfo *MRI;
MachineDominatorTree *MDT;
const HexagonInstrInfo *TII;
+ const HexagonRegisterInfo *TRI;
#ifndef NDEBUG
static int Counter;
#endif
@@ -381,7 +382,9 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
MRI = &MF.getRegInfo();
MDT = &getAnalysis<MachineDominatorTree>();
- TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
+ TII = HST.getInstrInfo();
+ TRI = HST.getRegisterInfo();
for (auto &L : *MLI)
if (!L->getParentLoop()) {
@@ -960,24 +963,21 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
/// \brief Return true if the operation is invalid within hardware loop.
bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
bool IsInnerHWLoop) const {
-
// Call is not allowed because the callee may use a hardware loop except for
// the case when the call never returns.
if (MI->getDesc().isCall())
return !TII->doesNotReturn(*MI);
// Check if the instruction defines a hardware loop register.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
- continue;
- unsigned R = MO.getReg();
- if (IsInnerHWLoop && (R == Hexagon::LC0 || R == Hexagon::SA0 ||
- R == Hexagon::LC1 || R == Hexagon::SA1))
- return true;
- if (!IsInnerHWLoop && (R == Hexagon::LC1 || R == Hexagon::SA1))
+ using namespace Hexagon;
+ static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
+ static const unsigned Regs1[] = { LC1, SA1 };
+ auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
+ : makeArrayRef(Regs1, array_lengthof(Regs1));
+ for (unsigned R : CheckRegs)
+ if (MI->modifiesRegister(R, TRI))
return true;
- }
+
return false;
}
@@ -1511,7 +1511,7 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
int64_t V1, V2;
if (!checkForImmediate(S1, V1) || !checkForImmediate(S2, V2))
return false;
- TV = V2 | (V1 << 32);
+ TV = V2 | (static_cast<uint64_t>(V1) << 32);
break;
}
case TargetOpcode::REG_SEQUENCE: {
diff --git a/lib/Target/Hexagon/HexagonIICHVX.td b/lib/Target/Hexagon/HexagonIICHVX.td
new file mode 100644
index 000000000000..4081a225832b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIICHVX.td
@@ -0,0 +1,102 @@
+//===--- HexagonIICHVX.td -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Though all these itinerary classes exist for V60 onwards, they are being
+// listed here as 'HVXV62Itin' because itinerary class description prior to V62
+// doesn't include operand cycle info. In future, I plan to merge them
+// together and call it 'HVXItin'.
+//
+class HVXV62Itin {
+ list<InstrItinData> HVXV62Itin_list = [
+ InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+ [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<COPROC_VX_vtc_long_SLOT23,
+ [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<COPROC_VX_vtc_SLOT23,
+ [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<CVI_VA, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE,CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VA_DV, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF, CVI_MPY01]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV, [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV_SLOT2, [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VX_DV_SLOT2_LONG_EARLY,
+ [InstrStage<1, [SLOT2], 0>,
+ InstrStage<1, [CVI_MPY01]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS_EARLY, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_VS_LONG_EARLY,
+ [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VP_DV, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLSHF]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VS, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_SHIFT]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VINLANESAT, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_LD, [InstrStage<1, [SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_TMP_LD, [InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD]>],[1, 1, 1, 1, 10]>,
+ InstrItinData<CVI_VM_CUR_LD, [InstrStage<1,[SLOT0, SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_VP_LDU, [InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_LD], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_ST, [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+ CVI_MPY0, CVI_MPY1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_NEW_ST, [InstrStage<1,[SLOT0], 0>,
+ InstrStage<1, [CVI_ST]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_VM_STU, [InstrStage<1, [SLOT0], 0>,
+ InstrStage<1, [SLOT1], 0>,
+ InstrStage<1, [CVI_ST], 0>,
+ InstrStage<1, [CVI_XLANE]>], [1, 1, 1, 1]>,
+ InstrItinData<CVI_HIST, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+ InstrStage<1, [CVI_ALL]>], [1, 1, 1, 1]>];
+}
diff --git a/lib/Target/Hexagon/HexagonIICScalar.td b/lib/Target/Hexagon/HexagonIICScalar.td
new file mode 100644
index 000000000000..e69cfbdad688
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIICScalar.td
@@ -0,0 +1,164 @@
+//===--- HexagonIICScalar.td ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// These itinerary class descriptions are based on the instruction timing
+// classes as per V62. Curretnly, they are just extracted from
+// HexagonScheduleV62.td but will soon be auto-generated by HexagonGen.py.
+
+class ScalarItin {
+ list<InstrItinData> ScalarItin_list = [
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<1, [SLOT3]>], [2, 1, 1]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<1, [SLOT3]>], [2, 1, 1]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<1, [SLOT3]>], [3, 1, 1]>,
+
+ // Jump (conditional/unconditional/return etc)
+ InstrItinData<CR_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<CJ_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1, 1]>,
+
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<1, [SLOT2]>], [2, 1, 1]>,
+ InstrItinData<J_tc_3stall_SLOT2 , [InstrStage<1, [SLOT2]>], [3, 1, 1]>,
+
+ // Extender
+ InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1,
+ [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 1, 1, 1]>,
+
+ // Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<1, [SLOT0]>], [4, 1]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [3, 1]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<M_tc_3x_acc_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [4, 1, 1]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [4, 1, 1]>,
+ InstrItinData<M_tc_3stall_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+
+ // Store
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1]>,
+ InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<1, [SLOT0]>], [3, 1, 1]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [3, 1, 1]>,
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60.
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [4, 1, 1]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<1, [SLOT0]>],
+ [3, 1, 1, 1]>,
+
+ // Mem ops
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>],
+ [1, 1, 1, 1]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [3, 1, 1, 1]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1, 1]>,
+
+ // Endloop
+ InstrItinData<J_tc_2early_SLOT0123, [InstrStage<1, [SLOT_ENDLOOP]>],
+ [2]>,
+ InstrItinData<MAPPING_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>],
+ [1, 1, 1, 1]>,
+
+ // Duplex and Compound
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+ InstrItinData<COMPOUND_CJ_ARCHDEPSLOT,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 1]>,
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]>,
+ // Misc
+ InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>],
+ [1, 1, 1]>,
+ InstrItinData<PSEUDOM , [InstrStage<1, [SLOT2, SLOT3], 0>,
+ InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 1]>];
+}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index f6012d29d422..8e10c521a77d 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -123,6 +123,12 @@ private:
bool isAlignedMemNode(const MemSDNode *N) const;
bool isPositiveHalfWord(const SDNode *N) const;
+ // DAG preprocessing functions.
+ void ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes);
+ void ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes);
+ void ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes);
+ void ppHoistZextI1(std::vector<SDNode*> &&Nodes);
+
SmallDenseMap<SDNode *,int> RootWeights;
SmallDenseMap<SDNode *,int> RootHeights;
SmallDenseMap<const Value *,int> GAUsesInFunction;
@@ -932,55 +938,21 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) {
void HexagonDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return; // Already selected.
- }
+ if (N->isMachineOpcode())
+ return N->setNodeId(-1); // Already selected.
switch (N->getOpcode()) {
- case ISD::Constant:
- SelectConstant(N);
- return;
-
- case ISD::ConstantFP:
- SelectConstantFP(N);
- return;
-
- case ISD::FrameIndex:
- SelectFrameIndex(N);
- return;
-
- case ISD::BITCAST:
- SelectBitcast(N);
- return;
-
- case ISD::SHL:
- SelectSHL(N);
- return;
-
- case ISD::LOAD:
- SelectLoad(N);
- return;
-
- case ISD::STORE:
- SelectStore(N);
- return;
-
- case ISD::MUL:
- SelectMul(N);
- return;
-
- case ISD::ZERO_EXTEND:
- SelectZeroExtend(N);
- return;
-
- case ISD::INTRINSIC_W_CHAIN:
- SelectIntrinsicWChain(N);
- return;
-
- case ISD::INTRINSIC_WO_CHAIN:
- SelectIntrinsicWOChain(N);
- return;
+ case ISD::Constant: return SelectConstant(N);
+ case ISD::ConstantFP: return SelectConstantFP(N);
+ case ISD::FrameIndex: return SelectFrameIndex(N);
+ case ISD::BITCAST: return SelectBitcast(N);
+ case ISD::SHL: return SelectSHL(N);
+ case ISD::LOAD: return SelectLoad(N);
+ case ISD::STORE: return SelectStore(N);
+ case ISD::MUL: return SelectMul(N);
+ case ISD::ZERO_EXTEND: return SelectZeroExtend(N);
+ case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N);
+ case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N);
}
SelectCode(N);
@@ -1010,15 +982,52 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
}
-void HexagonDAGToDAGISel::PreprocessISelDAG() {
+static bool isMemOPCandidate(SDNode *I, SDNode *U) {
+ // I is an operand of U. Check if U is an arithmetic (binary) operation
+ // usable in a memop, where the other operand is a loaded value, and the
+ // result of U is stored in the same location.
+
+ if (!U->hasOneUse())
+ return false;
+ unsigned Opc = U->getOpcode();
+ switch (Opc) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ break;
+ default:
+ return false;
+ }
+
+ SDValue S0 = U->getOperand(0);
+ SDValue S1 = U->getOperand(1);
+ SDValue SY = (S0.getNode() == I) ? S1 : S0;
+
+ SDNode *UUse = *U->use_begin();
+ if (UUse->getNumValues() != 1)
+ return false;
+
+ // Check if one of the inputs to U is a load instruction and the output
+ // is used by a store instruction. If so and they also have the same
+ // base pointer, then don't preoprocess this node sequence as it
+ // can be matched to a memop.
+ SDNode *SYNode = SY.getNode();
+ if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
+ SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
+ SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
+ if (LDBasePtr == STBasePtr)
+ return true;
+ }
+ return false;
+}
+
+
+// Transform: (or (select c x 0) z) -> (select c (or x z) z)
+// (or (select c 0 y) z) -> (select c z (or y z))
+void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
SelectionDAG &DAG = *CurDAG;
- std::vector<SDNode*> Nodes;
- for (SDNode &Node : DAG.allnodes())
- Nodes.push_back(&Node);
- // Simplify: (or (select c x 0) z) -> (select c (or x z) z)
- // (or (select c 0 y) z) -> (select c z (or y z))
- // This may not be the right thing for all targets, so do it here.
for (auto I : Nodes) {
if (I->getOpcode() != ISD::OR)
continue;
@@ -1056,18 +1065,22 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
}
}
}
+}
+
+// Transform: (store ch val (add x (add (shl y c) e)))
+// to: (store ch val (add x (shl (add y d) c))),
+// where e = (shl d c) for some integer d.
+// The purpose of this is to enable generation of loads/stores with
+// shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
+// value c must be 0, 1 or 2.
+void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
+ SelectionDAG &DAG = *CurDAG;
- // Transform: (store ch addr (add x (add (shl y c) e)))
- // to: (store ch addr (add x (shl (add y d) c))),
- // where e = (shl d c) for some integer d.
- // The purpose of this is to enable generation of loads/stores with
- // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
- // value c must be 0, 1 or 2.
for (auto I : Nodes) {
if (I->getOpcode() != ISD::STORE)
continue;
- // I matched: (store ch addr Off)
+ // I matched: (store ch val Off)
SDValue Off = I->getOperand(2);
// Off needs to match: (add x (add (shl y c) (shl d c))))
if (Off.getOpcode() != ISD::ADD)
@@ -1109,15 +1122,192 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
ReplaceNode(T0.getNode(), NewShl.getNode());
}
+}
+
+// Transform: (load ch (add x (and (srl y c) Mask)))
+// to: (load ch (add x (shl (srl y d) d-c)))
+// where
+// Mask = 00..0 111..1 0.0
+// | | +-- d-c 0s, and d-c is 0, 1 or 2.
+// | +-------- 1s
+// +-------------- at most c 0s
+// Motivating example:
+// DAG combiner optimizes (add x (shl (srl y 5) 2))
+// to (add x (and (srl y 3) 1FFFFFFC))
+// which results in a constant-extended and(##...,lsr). This transformation
+// undoes this simplification for cases where the shl can be folded into
+// an addressing mode.
+void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
+ SelectionDAG &DAG = *CurDAG;
+
+ for (SDNode *N : Nodes) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::LOAD && Opc != ISD::STORE)
+ continue;
+ SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
+ // Addr must match: (add x T0)
+ if (Addr.getOpcode() != ISD::ADD)
+ continue;
+ SDValue T0 = Addr.getOperand(1);
+ // T0 must match: (and T1 Mask)
+ if (T0.getOpcode() != ISD::AND)
+ continue;
+
+ // We have an AND.
+ //
+ // Check the first operand. It must be: (srl y c).
+ SDValue S = T0.getOperand(0);
+ if (S.getOpcode() != ISD::SRL)
+ continue;
+ ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
+ if (SN == nullptr)
+ continue;
+ if (SN->getAPIntValue().getBitWidth() != 32)
+ continue;
+ uint32_t CV = SN->getZExtValue();
+
+ // Check the second operand: the supposed mask.
+ ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
+ if (MN == nullptr)
+ continue;
+ if (MN->getAPIntValue().getBitWidth() != 32)
+ continue;
+ uint32_t Mask = MN->getZExtValue();
+ // Examine the mask.
+ uint32_t TZ = countTrailingZeros(Mask);
+ uint32_t M1 = countTrailingOnes(Mask >> TZ);
+ uint32_t LZ = countLeadingZeros(Mask);
+ // Trailing zeros + middle ones + leading zeros must equal the width.
+ if (TZ + M1 + LZ != 32)
+ continue;
+ // The number of trailing zeros will be encoded in the addressing mode.
+ if (TZ > 2)
+ continue;
+ // The number of leading zeros must be at most c.
+ if (LZ > CV)
+ continue;
+
+ // All looks good.
+ SDValue Y = S.getOperand(0);
+ EVT VT = Addr.getValueType();
+ SDLoc dl(S);
+ // TZ = D-C, so D = TZ+C.
+ SDValue D = DAG.getConstant(TZ+CV, dl, VT);
+ SDValue DC = DAG.getConstant(TZ, dl, VT);
+ SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
+ SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
+ ReplaceNode(T0.getNode(), NewShl.getNode());
+ }
+}
+
+// Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
+// (op ... 1 ...))
+void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
+ SelectionDAG &DAG = *CurDAG;
+
+ for (SDNode *N : Nodes) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::ZERO_EXTEND)
+ continue;
+ SDValue OpI1 = N->getOperand(0);
+ EVT OpVT = OpI1.getValueType();
+ if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
+ continue;
+ for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+ SDNode *U = *I;
+ if (U->getNumValues() != 1)
+ continue;
+ EVT UVT = U->getValueType(0);
+ if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
+ continue;
+ if (isMemOPCandidate(N, U))
+ continue;
+
+ // Potentially simplifiable operation.
+ unsigned I1N = I.getOperandNo();
+ SmallVector<SDValue,2> Ops(U->getNumOperands());
+ for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
+ Ops[i] = U->getOperand(i);
+ EVT BVT = Ops[I1N].getValueType();
+
+ SDLoc dl(U);
+ SDValue C0 = DAG.getConstant(0, dl, BVT);
+ SDValue C1 = DAG.getConstant(1, dl, BVT);
+ SDValue If0, If1;
+
+ if (isa<MachineSDNode>(U)) {
+ unsigned UseOpc = U->getMachineOpcode();
+ Ops[I1N] = C0;
+ If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
+ Ops[I1N] = C1;
+ If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
+ } else {
+ unsigned UseOpc = U->getOpcode();
+ Ops[I1N] = C0;
+ If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
+ Ops[I1N] = C1;
+ If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
+ }
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
+ DAG.ReplaceAllUsesWith(U, Sel.getNode());
+ }
+ }
+}
+
+void HexagonDAGToDAGISel::PreprocessISelDAG() {
+ // Repack all nodes before calling each preprocessing function,
+ // because each of them can modify the set of nodes.
+ auto getNodes = [this] () -> std::vector<SDNode*> {
+ std::vector<SDNode*> T;
+ T.reserve(CurDAG->allnodes_size());
+ for (SDNode &N : CurDAG->allnodes())
+ T.push_back(&N);
+ return T;
+ };
+
+ // Transform: (or (select c x 0) z) -> (select c (or x z) z)
+ // (or (select c 0 y) z) -> (select c z (or y z))
+ ppSimplifyOrSelect0(getNodes());
+
+ // Transform: (store ch val (add x (add (shl y c) e)))
+ // to: (store ch val (add x (shl (add y d) c))),
+ // where e = (shl d c) for some integer d.
+ // The purpose of this is to enable generation of loads/stores with
+ // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
+ // value c must be 0, 1 or 2.
+ ppAddrReorderAddShl(getNodes());
+
+ // Transform: (load ch (add x (and (srl y c) Mask)))
+ // to: (load ch (add x (shl (srl y d) d-c)))
+ // where
+ // Mask = 00..0 111..1 0.0
+ // | | +-- d-c 0s, and d-c is 0, 1 or 2.
+ // | +-------- 1s
+ // +-------------- at most c 0s
+ // Motivating example:
+ // DAG combiner optimizes (add x (shl (srl y 5) 2))
+ // to (add x (and (srl y 3) 1FFFFFFC))
+ // which results in a constant-extended and(##...,lsr). This transformation
+ // undoes this simplification for cases where the shl can be folded into
+ // an addressing mode.
+ ppAddrRewriteAndSrl(getNodes());
+
+ // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
+ // (op ... 1 ...))
+ ppHoistZextI1(getNodes());
+
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "Preprocessed (Hexagon) selection DAG:";
+ CurDAG->dump();
+ });
if (EnableAddressRebalancing) {
rebalanceAddressTrees();
- DEBUG(
- dbgs() << "************* SelectionDAG after preprocessing: ***********\n";
+ DEBUG_WITH_TYPE("isel", {
+ dbgs() << "Address tree balanced selection DAG:";
CurDAG->dump();
- dbgs() << "************* End SelectionDAG after preprocessing ********\n";
- );
+ });
}
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index e87e1e6a7e0f..418dd71aeb4b 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -256,7 +256,9 @@ static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
return false;
}
- if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+ if (LocVT == MVT::i1) {
+ LocVT = MVT::i32;
+ } else if (LocVT == MVT::i8 || LocVT == MVT::i16) {
LocVT = MVT::i32;
ValVT = MVT::i32;
if (ArgFlags.isSExt())
@@ -483,9 +485,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
}
}
- unsigned Offset = State.AllocateStack(4, 4);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
+ return true;
}
static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
@@ -498,9 +498,7 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
}
}
- unsigned Offset = State.AllocateStack(8, 8);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
+ return true;
}
static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
@@ -511,7 +509,6 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
bool UseHVX = HST.useHVXOps();
bool UseHVXDbl = HST.useHVXDblOps();
- unsigned OffSiz = 64;
if (LocVT == MVT::v16i32) {
if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
@@ -523,18 +520,14 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
- OffSiz = 128;
} else if (LocVT == MVT::v64i32) {
if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
- OffSiz = 256;
}
- unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
+ return true;
}
void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
@@ -590,6 +583,16 @@ static bool isHvxVectorType(MVT Ty) {
}
}
+bool
+HexagonTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
+}
+
// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
// passed by value, the function prototype is modified to return void and
// the value is stored in memory pointed by a pointer passed by caller.
@@ -644,11 +647,11 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers. This assumes that
-/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// Chain/Glue are the input chain/glue to use, and that TheCall is the call
/// being lowered. Returns a SDNode with the same number of values as the
/// ISD::CALL.
SDValue HexagonTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
@@ -671,21 +674,24 @@ SDValue HexagonTargetLowering::LowerCallResult(
// predicate register as the call result.
auto &MRI = DAG.getMachineFunction().getRegInfo();
SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
- MVT::i32, InFlag);
+ MVT::i32, Glue);
// FR0 = (Value, Chain, Glue)
unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
FR0.getValue(0), FR0.getValue(2));
// TPR = (Chain, Glue)
- RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1,
- TPR.getValue(1));
+ // Don't glue this CopyFromReg, because it copies from a virtual
+ // register. If it is glued to the call, InstrEmitter will add it
+ // as an implicit def to the call (EmitMachineNode).
+ RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
+ Glue = TPR.getValue(1);
} else {
RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
- RVLocs[i].getValVT(), InFlag);
+ RVLocs[i].getValVT(), Glue);
+ Glue = RetVal.getValue(2);
}
InVals.push_back(RetVal.getValue(0));
Chain = RetVal.getValue(1);
- InFlag = RetVal.getValue(2);
}
return Chain;
@@ -840,16 +846,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+ SDValue Glue;
if (!IsTailCall) {
SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
Chain = DAG.getCALLSEQ_START(Chain, C, dl);
+ Glue = Chain.getValue(1);
}
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
// The Glue is necessary since all emitted instructions must be
// stuck together.
- SDValue Glue;
if (!IsTailCall) {
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
@@ -902,6 +909,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
}
+ const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
if (Glue.getNode())
Ops.push_back(Glue);
@@ -1054,6 +1065,18 @@ SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
}
+// Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
+// is marked as having side-effects, while the register read on Hexagon does
+// not have any. TableGen refuses to accept the direct pattern from that node
+// to the A4_tfrcpp.
+SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDLoc dl(Op);
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
+ return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
+}
+
SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
@@ -1140,10 +1163,25 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
EVT RegVT = VA.getLocVT();
if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
RegVT == MVT::i32 || RegVT == MVT::f32) {
- unsigned VReg =
+ unsigned VReg =
RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+ // Treat values of type MVT::i1 specially: they are passed in
+ // registers of type i32, but they need to remain as values of
+ // type i1 for consistency of the argument lowering.
+ if (VA.getValVT() == MVT::i1) {
+ // Generate a copy into a predicate register and use the value
+ // of the register as the "InVal".
+ unsigned PReg =
+ RegInfo.createVirtualRegister(&Hexagon::PredRegsRegClass);
+ SDNode *T = DAG.getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
+ Copy.getValue(0));
+ Copy = DAG.getCopyToReg(Copy.getValue(1), dl, PReg, SDValue(T, 0));
+ Copy = DAG.getCopyFromReg(Copy, dl, PReg, MVT::i1);
+ }
+ InVals.push_back(Copy);
+ Chain = Copy.getValue(1);
} else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
unsigned VReg =
RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
@@ -1217,7 +1255,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
InVals.push_back(FIN);
} else {
InVals.push_back(
- DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo()));
+ DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
}
}
}
@@ -1272,17 +1310,6 @@ static bool isSExtFree(SDValue N) {
return false;
}
-SDValue HexagonTargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
- SDValue InpVal = Op.getOperand(0);
- if (isa<ConstantSDNode>(InpVal)) {
- uint64_t V = cast<ConstantSDNode>(InpVal)->getZExtValue();
- return DAG.getTargetConstant(countPopulation(V), dl, MVT::i64);
- }
- SDValue PopOut = DAG.getNode(HexagonISD::POPCOUNT, dl, MVT::i32, InpVal);
- return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, PopOut);
-}
-
SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -1571,9 +1598,10 @@ HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
SDValue
HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
- GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, unsigned ReturnReg,
+ GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) const {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDLoc dl(GA);
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
@@ -1585,23 +1613,21 @@ HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
// 2. Callee which in this case is the Global address value.
// 3. Registers live into the call.In this case its R0, as we
// have just one argument to be passed.
- // 4. InFlag if there is any.
+ // 4. Glue.
// Note: The order is important.
- if (InFlag) {
- SDValue Ops[] = { Chain, TGA,
- DAG.getRegister(Hexagon::R0, PtrVT), *InFlag };
- Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
- } else {
- SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT)};
- Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
- }
+ const auto &HRI = *Subtarget.getRegisterInfo();
+ const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
+ DAG.getRegisterMask(Mask), Glue };
+ Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
// Inform MFI that function has calls.
MFI.setAdjustsStack(true);
- SDValue Flag = Chain.getValue(1);
- return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+ Glue = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
}
//
@@ -1694,7 +1720,7 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
InFlag = Chain.getValue(1);
- return GetDynamicTLSAddr(DAG, Chain, GA, &InFlag, PtrVT,
+ return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
Hexagon::R0, HexagonII::MO_GDPLT);
}
@@ -1821,6 +1847,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
@@ -1891,7 +1918,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTPOP, MVT::i8, Promote);
setOperationAction(ISD::CTPOP, MVT::i16, Promote);
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
- setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+ setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+ setOperationAction(ISD::BSWAP, MVT::i64, Legal);
// We custom lower i64 to i64 mul, so that it is not considered as a legal
// operation. There is a pattern that will match i64 mul and transform it
@@ -1901,7 +1933,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
for (unsigned IntExpOp :
{ ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
- ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+ ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
setOperationAction(IntExpOp, MVT::i32, Expand);
setOperationAction(IntExpOp, MVT::i64, Expand);
@@ -2268,7 +2300,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
case HexagonISD::JT: return "HexagonISD::JT";
case HexagonISD::PACKHL: return "HexagonISD::PACKHL";
- case HexagonISD::POPCOUNT: return "HexagonISD::POPCOUNT";
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB";
case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH";
@@ -2296,6 +2327,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VSRLW: return "HexagonISD::VSRLW";
case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH";
case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW";
+ case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::OP_END: break;
}
return nullptr;
@@ -2968,11 +3000,11 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
- case ISD::CTPOP: return LowerCTPOP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
+ case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
}
}
@@ -3026,37 +3058,25 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
}
case 'q': // q0-q3
- switch (VT.SimpleTy) {
+ switch (VT.getSizeInBits()) {
default:
- llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
- case MVT::v1024i1:
- case MVT::v512i1:
- case MVT::v32i16:
- case MVT::v16i32:
- case MVT::v64i8:
- case MVT::v8i64:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled vector size");
+ case 512:
return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+ case 1024:
+ return std::make_pair(0U, &Hexagon::VecPredRegs128BRegClass);
}
case 'v': // V0-V31
- switch (VT.SimpleTy) {
+ switch (VT.getSizeInBits()) {
default:
- llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
- case MVT::v16i32:
- case MVT::v32i16:
- case MVT::v64i8:
- case MVT::v8i64:
+ llvm_unreachable("getRegForInlineAsmConstraint Unhandled vector size");
+ case 512:
return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
- case MVT::v32i32:
- case MVT::v64i16:
- case MVT::v16i64:
- case MVT::v128i8:
+ case 1024:
if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
- case MVT::v256i8:
- case MVT::v128i16:
- case MVT::v64i32:
- case MVT::v32i64:
+ case 2048:
return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index a8ed29e585d4..fb8f0ba6b057 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -50,7 +50,6 @@ namespace HexagonISD {
JT, // Jump table.
CP, // Constant pool.
- POPCOUNT,
COMBINE,
PACKHL,
VSPLATB,
@@ -86,6 +85,7 @@ namespace HexagonISD {
TC_RETURN,
EH_RETURN,
DCFETCH,
+ READCYCLE,
OP_END
};
@@ -146,6 +146,7 @@ namespace HexagonISD {
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue
@@ -163,7 +164,7 @@ namespace HexagonISD {
SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
- GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT,
+ GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT,
unsigned ReturnReg, unsigned char OperandFlags) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
@@ -179,12 +180,16 @@ namespace HexagonISD {
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
diff --git a/lib/Target/Hexagon/HexagonInstrAlias.td b/lib/Target/Hexagon/HexagonInstrAlias.td
deleted file mode 100644
index 7283d94ee759..000000000000
--- a/lib/Target/Hexagon/HexagonInstrAlias.td
+++ /dev/null
@@ -1,652 +0,0 @@
-//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Hexagon Instruction Mappings
-//===----------------------------------------------------------------------===//
-
-
-def : InstAlias<"memb({GP}+#$addr) = $Nt.new",
- (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memh({GP}+#$addr) = $Nt.new",
- (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memw({GP}+#$addr) = $Nt.new",
- (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memb({GP}+#$addr) = $Nt",
- (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memh({GP}+#$addr) = $Nt",
- (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memh({GP}+#$addr) = $Nt.h",
- (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memw({GP}+#$addr) = $Nt",
- (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>;
-def : InstAlias<"memd({GP}+#$addr) = $Nt",
- (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>;
-
-def : InstAlias<"$Nt = memb({GP}+#$addr)",
- (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>;
-def : InstAlias<"$Nt = memub({GP}+#$addr)",
- (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>;
-def : InstAlias<"$Nt = memh({GP}+#$addr)",
- (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>;
-def : InstAlias<"$Nt = memuh({GP}+#$addr)",
- (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>;
-def : InstAlias<"$Nt = memw({GP}+#$addr)",
- (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>;
-def : InstAlias<"$Nt = memd({GP}+#$addr)",
- (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>;
-
-// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt
-def : InstAlias<"memb($Rs) = $Rt",
- (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memh($Rs) = $Rt",
- (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memh($Rs) = $Rt.h",
- (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memw($Rs) = $Rt",
- (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memb($Rs) = $Rt.new",
- (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memh($Rs) = $Rt.new",
- (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memw($Rs) = $Rt.new",
- (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"memb($Rs) = #$S8",
- (S4_storeirb_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
-
-def : InstAlias<"memh($Rs) = #$S8",
- (S4_storeirh_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
-
-def : InstAlias<"memw($Rs) = #$S8",
- (S4_storeiri_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
-
-def : InstAlias<"memd($Rs) = $Rtt",
- (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"memb($Rs) = setbit(#$U5)",
- (L4_ior_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memh($Rs) = setbit(#$U5)",
- (L4_ior_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memw($Rs) = setbit(#$U5)",
- (L4_ior_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memb($Rs) = clrbit(#$U5)",
- (L4_iand_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memh($Rs) = clrbit(#$U5)",
- (L4_iand_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-def : InstAlias<"memw($Rs) = clrbit(#$U5)",
- (L4_iand_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
-
-// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
-def : InstAlias<"$Rd = memb($Rs)",
- (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memub($Rs)",
- (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memh($Rs)",
- (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memuh($Rs)",
- (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memw($Rs)",
- (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memd($Rs)",
- (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = memubh($Rs)",
- (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memubh($Rs)",
- (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rd = membh($Rs)",
- (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = membh($Rs)",
- (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memb_fifo($Rs)",
- (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"$Rdd = memh_fifo($Rs)",
- (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
-
-// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X)
-// to: if ($Pt) $Rd = memXX($Rs)
-def : InstAlias<"if ($Pt) $Rd = memb($Rs)",
- (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memub($Rs)",
- (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memh($Rs)",
- (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memuh($Rs)",
- (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rd = memw($Rs)",
- (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt) $Rdd = memd($Rs)",
- (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt
-// to: if ($Pt) memXX($Rs) = $Rt
-def : InstAlias<"if ($Pt) memb($Rs) = $Rt",
- (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = $Rt",
- (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h",
- (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memw($Rs) = $Rt",
- (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memd($Rs) = $Rtt",
- (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new",
- (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new",
- (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new",
- (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new",
- (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new",
- (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new",
- (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-
-// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X)
-// to: if (!$Pt) $Rd = memXX($Rs)
-def : InstAlias<"if (!$Pt) $Rd = memb($Rs)",
- (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memub($Rs)",
- (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memh($Rs)",
- (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)",
- (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rd = memw($Rs)",
- (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)",
- (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt
-// to: if (!$Pt) memXX($Rs) = $Rt
-def : InstAlias<"if (!$Pt) memb($Rs) = $Rt",
- (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = $Rt",
- (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h",
- (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memw($Rs) = $Rt",
- (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt",
- (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new",
- (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new",
- (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new",
- (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new",
- (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new",
- (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
- (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
- (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
- (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
- (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
- (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
- (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
- (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
- (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
- (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
- (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
- (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
- (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
- (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
-
-// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
-// to: memXX($Rs) |= $Rt
-def : InstAlias<"memb($Rs) &= $Rt",
- (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) |= $Rt",
- (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) += $Rt",
- (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) -= $Rt",
- (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) += #$U5",
- (L4_iadd_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memb($Rs) -= #$U5",
- (L4_isub_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) &= $Rt",
- (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) |= $Rt",
- (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) += $Rt",
- (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) -= $Rt",
- (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) += #$U5",
- (L4_iadd_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memh($Rs) -= #$U5",
- (L4_isub_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) &= $Rt",
- (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) |= $Rt",
- (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) += $Rt",
- (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) -= $Rt",
- (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) += #$U5",
- (L4_iadd_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-def : InstAlias<"memw($Rs) -= #$U5",
- (L4_isub_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
- Requires<[UseMEMOP]>;
-
-//
-// Alias of: if ($Pv.new) memX($Rs) = $Rt
-// to: if (p3.new) memX(r17 + #0) = $Rt
-def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt",
- (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt",
- (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h",
- (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt",
- (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt",
- (S4_pstorerdtnew_io
- PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt",
- (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt",
- (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h",
- (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt",
- (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
-
-def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt",
- (S4_pstorerdfnew_io
- PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
-
-//
-// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ...
-// to: if ($Pt.new) $Rd = memub($Rs + #$u6_0)
-def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)",
- (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)",
- (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)",
- (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)",
- (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)",
- (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)",
- (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)",
- (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)",
- (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)",
- (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)",
- (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)",
- (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)",
- (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
-
-def : InstAlias<"dcfetch($Rs)",
- (Y2_dcfetchbo IntRegs:$Rs, 0), 0>;
-
-// Alias of some insn mappings, others must be handled by the parser
-def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
- (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
- (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-
-// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs)
-def : InstAlias<"$Rd = neg($Rs)",
- (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>;
-
-def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
-def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
-def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
-def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
-
-def : InstAlias<"$Pd = $Ps",
- (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>;
-
-def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)",
- (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>;
-
-def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)",
- (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>;
-
-def : InstAlias<"$Rd = mpyui($Rs,$Rt)",
- (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>;
-
-// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a)
-def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
- (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
- (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
-
-// maps if (!Pu) jumpr Rs -> if (!Pu) jumpr:nt Rs
-def : InstAlias<"if (!$Pu) jumpr $Rs",
- (J2_jumprf PredRegs:$Pu, IntRegs:$Rs)>,
- Requires<[HasV60T]>;
-
-// maps if (Pu) jumpr Rs -> if (Pu) jumpr:nt Rs
-def : InstAlias<"if ($Pu) jumpr $Rs",
- (J2_jumprt PredRegs:$Pu, IntRegs:$Rs)>,
- Requires<[HasV60T]>;
-
-// maps if (!Pu) jump $r15_2 -> if (!Pu) jump:nt $r15_2
-def : InstAlias<"if (!$Pu) jump $r15_2",
- (J2_jumpf PredRegs:$Pu, brtarget:$r15_2)>,
- Requires<[HasV60T]>;
-
-// maps if (Pu) jump $r15_2 -> if (Pu) jump:nt $r15_2
-def : InstAlias<"if ($Pu) jump $r15_2",
- (J2_jumpt PredRegs:$Pu, brtarget:$r15_2)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($src) jump $r15_2",
- (J2_jumpt PredRegs:$src, brtarget:$r15_2), 0>;
-
-def : InstAlias<"if (!$src) jump $r15_2",
- (J2_jumpf PredRegs:$src, brtarget:$r15_2), 0>;
-
-def : InstAlias<"if ($src1) jumpr $src2",
- (J2_jumprt PredRegs:$src1, IntRegs:$src2), 0>;
-
-def : InstAlias<"if (!$src1) jumpr $src2",
- (J2_jumprf PredRegs:$src1, IntRegs:$src2), 0>;
-
-// maps Vdd = Vss to Vdd = V6_vassignp(Vss)
-def : InstAlias<"$Vdd = $Vss",
- (V6_vassignp VecDblRegs:$Vdd, VecDblRegs:$Vss)>,
- Requires<[HasV60T]>;
-
-// maps Vd = #0 to Vd = vxor(Vd, Vd)
-def : InstAlias<"$Vd = #0",
- (V6_vxor VectorRegs:$Vd, VectorRegs:$Vd, VectorRegs:$Vd)>,
- Requires<[HasV60T]>;
-
-// maps Vdd = #0 to Vdd = vsub(Vdd, Vdd)
-def : InstAlias<"$Vdd = #0",
- (V6_vsubw_dv VecDblRegs:$Vdd, VecDblRegs:$Vdd, VecDblRegs:$Vdd)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd = vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd = vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd = vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd &= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd &= vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd &= vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd |= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd |= vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd |= vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd ^= vcmp.eq($Vu.uh, $Vv.uh)" -> "$Qd ^= vcmp.eq($Vu.h, $Vv.h)"
-def : InstAlias<"$Qd ^= vcmp.eq($Vu.uh, $Vv.uh)",
- (V6_veqh_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd = vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd = vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd = vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqw VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd &= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd &= vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd &= vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqw_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd |= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd |= vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd |= vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqh_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd ^= vcmp.eq($Vu.uw, $Vv.uw)" -> "$Qd ^= vcmp.eq($Vu.w, $Vv.w)"
-def : InstAlias<"$Qd ^= vcmp.eq($Vu.uw, $Vv.uw)",
- (V6_veqw_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd = vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd = vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd = vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd &= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd &= vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd &= vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb_and VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd |= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd |= vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd |= vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb_or VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Qd ^= vcmp.eq($Vu.ub, $Vv.ub)" -> "$Qd ^= vcmp.eq($Vu.b, $Vv.b)"
-def : InstAlias<"$Qd ^= vcmp.eq($Vu.ub, $Vv.ub)",
- (V6_veqb_xor VecPredRegs:$Qd, VectorRegs:$Vu, VectorRegs:$Vv)>,
- Requires<[HasV60T]>;
-
-// maps "$Rd.w = vextract($Vu, $Rs)" -> "$Rd = vextract($Vu, $Rs)"
-def : InstAlias<"$Rd.w = vextract($Vu, $Rs)",
- (V6_extractw IntRegs:$Rd, VectorRegs:$Vu, IntRegs:$Rs)>,
- Requires<[HasV60T]>;
-
-// Mapping from vtrans2x2(Vy32,Vx32,Rt32) to vshuff(Vy32,Vx32,Rt32)
-def : InstAlias<"vtrans2x2($Vy, $Vx, $Rt)",
- (V6_vshuff VectorRegs:$Vy, VectorRegs:$Vx, IntRegs:$Rt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"$Vt=vmem($Rs)",
- (V6_vL32b_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"$Vt=vmem($Rs):nt",
- (V6_vL32b_nt_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs)=$Vt",
- (V6_vS32b_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs)=$Vt.new",
- (V6_vS32b_new_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmem($Rs):nt=$Vt.new",
- (V6_vS32b_nt_new_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Qv) vmem($Rs)=$Vt",
- (V6_vS32b_qpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Qv) vmem($Rs)=$Vt",
- (V6_vS32b_nqpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Qv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_qpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Qv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_nqpred_ai VecPredRegs:$Qv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Pv) vmem($Rs)=$Vt",
- (V6_vS32b_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Pv) vmem($Rs)=$Vt",
- (V6_vS32b_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Pv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Pv) vmem($Rs):nt=$Vt",
- (V6_vS32b_nt_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"$Vt=vmemu($Rs)",
- (V6_vL32Ub_ai VectorRegs:$Vt, IntRegs:$Rs, 0)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"vmemu($Rs)=$Vt",
- (V6_vS32Ub_ai IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if ($Pv) vmemu($Rs)=$Vt",
- (V6_vS32Ub_pred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-def : InstAlias<"if (!$Pv) vmemu($Rs)=$Vt",
- (V6_vS32Ub_npred_ai PredRegs:$Pv, IntRegs:$Rs, 0, VectorRegs:$Vt)>,
- Requires<[HasV60T]>;
-
-
diff --git a/lib/Target/Hexagon/HexagonInstrEnc.td b/lib/Target/Hexagon/HexagonInstrEnc.td
deleted file mode 100644
index 280832fd167f..000000000000
--- a/lib/Target/Hexagon/HexagonInstrEnc.td
+++ /dev/null
@@ -1,1019 +0,0 @@
-class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { opc{14-4}, src2};
- let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst};
-}
-
-class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>;
-class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>;
-class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>;
-class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>;
-class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>;
-class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>;
-class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>;
-class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>;
-class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>;
-class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>;
-class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>;
-class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>;
-class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>;
-class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>;
-class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>;
-class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>;
-class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>;
-class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>;
-class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>;
-class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>;
-class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>;
-class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>;
-class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>;
-class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>;
-class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>;
-class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>;
-class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>;
-class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>;
-class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>;
-class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>;
-class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>;
-class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>;
-class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>;
-class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>;
-class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>;
-class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>;
-class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>;
-class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>;
-class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>;
-class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>;
-class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>;
-class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>;
-class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>;
-class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>;
-class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>;
-class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>;
-class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>;
-class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>;
-class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>;
-class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>;
-class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>;
-class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>;
-class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>;
-class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>;
-class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>;
-class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>;
-class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>;
-class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>;
-class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>;
-class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>;
-class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>;
-class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>;
-class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>;
-class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>;
-class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>;
-class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>;
-class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>;
-class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>;
-class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>;
-class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>;
-class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>;
-class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>;
-class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>;
-class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>;
-class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>;
-class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>;
-class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>;
-class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>;
-class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>;
-class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>;
-class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>;
-class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>;
-class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>;
-class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>;
-class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>;
-class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>;
-class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>;
-class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>;
-class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>;
-class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>;
-class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>;
-class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>;
-class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>;
-class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>;
-class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>;
-class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>;
-class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>;
-class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>;
-class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>;
-class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>;
-class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>;
-class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>;
-class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>;
-class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>;
-class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>;
-class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>;
-class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>;
-class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>;
-class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>;
-class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>;
-class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>;
-class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>;
-class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>;
-class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>;
-class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>;
-class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>;
-class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>;
-class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>;
-class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>;
-class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>;
-class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>;
-class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>;
-class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>;
-class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>;
-class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>;
-class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>;
-class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>;
-class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>;
-class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>;
-class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>;
-class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>;
-class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>;
-class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>;
-class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>;
-class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>;
-class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>;
-class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>;
-class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>;
-class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>;
-class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>;
-class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>;
-class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>;
-class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>;
-class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>;
-class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>;
-class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>;
-class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>;
-class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>;
-class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>;
-class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>;
-class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>;
-class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>;
-class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>;
-class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>;
-class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>;
-class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>;
-class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>;
-class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>;
-class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>;
-class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>;
-class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>;
-class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>;
-class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>;
-class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>;
-class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>;
-class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>;
-class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>;
-class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>;
-class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>;
-class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>;
-class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>;
-class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>;
-class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>;
-class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>;
-class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>;
-class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>;
-class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>;
-class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>;
-class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>;
-class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>;
-class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>;
-class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>;
-class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>;
-class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>;
-class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>;
-
-class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon {
- bits<2> dst;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} };
- let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} };
-}
-
-class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>;
-class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>;
-class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>;
-class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>;
-class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>;
-class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>;
-class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>;
-class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>;
-class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>;
-class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>;
-class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>;
-class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>;
-class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>;
-class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>;
-class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>;
-class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>;
-class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>;
-class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>;
-class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>;
-class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>;
-class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>;
-class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>;
-class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>;
-class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>;
-class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>;
-class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>;
-class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>;
-class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>;
-class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>;
-class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>;
-class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>;
-class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>;
-class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>;
-class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>;
-class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>;
-class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>;
-class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>;
-class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>;
-
-class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> dst;
- bits<5> src2;
-
- let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} };
- let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>;
-class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>;
-class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>;
-class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>;
-class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>;
-class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>;
-class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>;
-class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>;
-class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>;
-class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>;
-class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>;
-class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>;
-
-class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
-
- let Inst{31-16} = { 0b00011110000000, opc{5-4} };
- let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>;
-class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>;
-class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>;
-class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>;
-class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>;
-class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>;
-class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>;
-class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>;
-class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>;
-class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>;
-class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>;
-class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>;
-class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>;
-class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>;
-class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>;
-class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>;
-class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>;
-class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>;
-class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>;
-class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>;
-class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>;
-class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>;
-class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>;
-class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>;
-class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>;
-
-class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<10> src2;
- bits<4> src2_vector;
-
- let src2_vector = src2{9-6};
- let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>;
-class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>;
-class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>;
-class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>;
-class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>;
-class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>;
-class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>;
-
-class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<11> src2;
- bits<4> src2_vector;
-
- let src2_vector = src2{10-7};
- let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>;
-class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>;
-class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>;
-class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>;
-class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>;
-class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>;
-class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>;
-
-class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<4> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{9-6};
- let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<11> src2;
- bits<4> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{10-7};
- let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>;
-class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>;
-class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>;
-
-class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>;
-class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>;
-class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<4> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{9-6};
- let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>;
-class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<11> src2;
- bits<4> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{10-7};
- let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>;
-class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>;
-
-class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<4> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{9-6};
- let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<11> src3;
- bits<4> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{10-7};
- let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>;
-class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>;
-class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>;
-class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>;
-class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>;
-class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>;
-class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>;
-class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>;
-class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>;
-class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>;
-
-class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>;
-class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>;
-class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>;
-class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>;
-class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>;
-class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>;
-class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>;
-class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>;
-class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>;
-class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<4> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{9-6};
- let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>;
-class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>;
-class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>;
-class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<11> src3;
- bits<4> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{10-7};
- let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>;
-class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>;
-class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>;
-class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>;
-
-// TODO: Change script to generate dst, src1, src2 instead of
-// dst, dst2, src1.
-class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<9> src2;
- bits<3> src2_vector;
-
- let src2_vector = src2{8-6};
- let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>;
-class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>;
-class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>;
-class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>;
-class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>;
-class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>;
-class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>;
-
-class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<10> src2;
- bits<3> src2_vector;
-
- let src2_vector = src2{9-7};
- let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>;
-class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>;
-class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>;
-class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>;
-class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>;
-class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>;
-class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>;
-
-
-// TODO: Change script to generate src1, src2 and src3 instead of
-// dst, src1, src2.
-class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<9> src2;
- bits<3> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{8-6};
- let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
- let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>;
-class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>;
-class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>;
-
-class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<3> src2_vector;
- bits<5> src3;
-
- let src2_vector = src2{9-7};
- let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
- let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>;
-class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>;
-class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>;
-
-// TODO: Change script to generate src1, src2 and src3 instead of
-// dst, src1, src2.
-class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<9> src2;
- bits<3> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{8-6};
- let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>;
-class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<10> src2;
- bits<3> src2_vector;
- bits<3> src3;
-
- let src2_vector = src2{9-7};
- let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
-}
-
-class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>;
-class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>;
-
-// TODO: Change script to generate src1, src2,src3 and src4 instead of
-// dst, src1, src2, src3.
-class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3;
- bits<3> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{8-6};
- let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>;
-class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>;
-class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>;
-class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>;
-class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>;
-class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>;
-class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>;
-class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>;
-class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>;
-class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>;
-
-// TODO: Change script to generate src1, src2,src3 and src4 instead of
-// dst, src1, src2, src3.
-class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<3> src3_vector;
- bits<5> src4;
-
- let src3_vector = src3{9-7};
- let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>;
-class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>;
-class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>;
-class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>;
-class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>;
-class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>;
-class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>;
-class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>;
-class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>;
-class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3;
- bits<3> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{8-6};
- let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>;
-class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>;
-class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>;
-class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<10> src3;
- bits<3> src3_vector;
- bits<3> src4;
-
- let src3_vector = src3{9-7};
- let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>;
-class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>;
-class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>;
-class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>;
-
-class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<1> src2;
-
- let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} };
- let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} };
-}
-
-class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>;
-class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>;
-class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>;
-class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>;
-class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>;
-class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>;
-class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>;
-
-class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<1> src2;
- bits<5> src3;
-
- let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} };
- let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} };
-}
-
-class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>;
-class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>;
-class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>;
-
-class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<1> src2;
- bits<3> src3;
-
- let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} };
- let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} };
-}
-
-class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>;
-class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>;
-
-class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<1> src3;
- bits<5> src4;
-
- let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} };
- let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} };
-}
-
-class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>;
-class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>;
-class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>;
-class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>;
-class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>;
-class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>;
-class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>;
-class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>;
-class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>;
-class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>;
-
-class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> src2;
- bits<1> src3;
- bits<3> src4;
-
- let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} };
- let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} };
-}
-
-class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>;
-class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>;
-class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>;
-class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>;
-
-
-class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<1> src3;
-
- let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} };
- let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} };
-}
-
-class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>;
-class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>;
-class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>;
-class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>;
-class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>;
-class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>;
-
-class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} };
- let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} };
-}
-
-class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>;
-class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>;
-
-class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon {
- bits<5> src1;
- bits<5> src2;
- bits<5> src3;
-
- let Inst{31-16} = { 0b00011001111, src3{4-0} };
- let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} };
-}
-
-class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>;
-class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>;
-
-
-class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> dst;
- bits<5> src2;
-
- let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 };
- let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} };
-}
-
-class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>;
-class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>;
-
-class Enc_X_p3op<bits<8> opc> : OpcodeHexagon {
- bits<2> src1;
- bits<5> dst;
- bits<5> src2;
- bits<5> src3;
-
- let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} };
- let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} };
-}
-
-class V6_vnccombine_enc : Enc_X_p3op<0b00001000>;
-class V6_vccombine_enc : Enc_X_p3op<0b00001100>;
-
-class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<3> src3;
-
- let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} };
- let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>;
-class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>;
-class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>;
-class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>;
-class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>;
-class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>;
-class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>;
-class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>;
-class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>;
-class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>;
-class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>;
-class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>;
-class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>;
-class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>;
-class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>;
-
-class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<3> src3;
-
- let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} };
- let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} };
-}
-
-class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>;
-class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>;
-class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>;
-class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>;
-class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>;
-class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>;
-class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>;
-class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>;
-
-class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon {
- bits<2> dst;
- bits<2> src1;
- bits<2> src2;
-
- let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 };
- let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} };
-}
-
-class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>;
-class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>;
-class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>;
-class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>;
-class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>;
-
-class V6_pred_not_enc : OpcodeHexagon {
- bits<2> dst;
- bits<2> src1;
-
- let Inst{31-16} = { 0b0001111000000011 };
- let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} };
-}
-
-class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<5> src3;
-
- let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} };
- let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} };
-}
-
-class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>;
-class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>;
-
-class Enc_X_2op<bits<16> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
-
- let Inst{31-16} = { opc{15-5}, src1{4-0} };
- let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} };
-}
-
-class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>;
-class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>;
-class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>;
-
-
-class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon {
- bits<2> dst;
- bits<5> src1;
-
- let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} };
- let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} };
-}
-
-class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>;
-class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>;
-
-class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<6> src2;
-
- let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} };
- let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} };
-}
-
-class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>;
-class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>;
-class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>;
-class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>;
-class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>;
-class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>;
-
-class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { opc{14-4}, src1{4-0} };
- let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} };
-}
-
-class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>;
-class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>;
-class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>;
-class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>;
-class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>;
-class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>;
-class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>;
-class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>;
-
-class Enc_no_operands<bits<25> opc> : OpcodeHexagon {
-
- let Inst{31-16} = { opc{24-10}, 0 };
- let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 };
-}
-
-class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>;
-class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>;
-class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>;
-class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>;
-
-class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon {
- bits<5> src1;
-
- let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} };
- let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 };
-}
-
-class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>;
-class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>;
-
-class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon {
- bits<5> src1;
-
- let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 };
- let Inst{13-0} = { 0, src1{4-0}, 0b00000000 };
-}
-
-class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>;
-class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>;
-
-class A5_ACS_enc : OpcodeHexagon {
- bits<5> dst1;
- bits<2> dst2;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b11101010101, src1{4-0} };
- let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} };
-}
-
-class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<2> src3;
-
- let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} };
- let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} };
-}
-
-class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>;
-class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>;
-class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>;
-
-class V6_vhistq_enc : OpcodeHexagon {
- bits<2> src1;
-
- let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 };
- let Inst{13-0} = { 0b10000010000000 };
-}
-
-// TODO: Change script to generate dst1 instead of dst.
-class A6_vminub_RdP_enc : OpcodeHexagon {
- bits<5> dst1;
- bits<2> dst2;
- bits<5> src1;
- bits<5> src2;
-
- let Inst{31-16} = { 0b11101010111, src2{4-0} };
- let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} };
-}
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index fa3cccbd0879..39c2a6e4f5a5 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -7,26 +7,6 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Hexagon Instruction Flags +
-//
-// *** Must match HexagonBaseInfo.h ***
-//===----------------------------------------------------------------------===//
-
-class IType<bits<5> t> {
- bits<5> Value = t;
-}
-def TypePSEUDO : IType<0>;
-def TypeALU32 : IType<1>;
-def TypeCR : IType<2>;
-def TypeJR : IType<3>;
-def TypeJ : IType<4>;
-def TypeLD : IType<5>;
-def TypeST : IType<6>;
-def TypeSYSTEM : IType<7>;
-def TypeXTYPE : IType<8>;
-def TypeENDLOOP: IType<31>;
-
// Maintain list of valid subtargets for each instruction.
class SubTarget<bits<6> value> {
bits<6> Value = value;
@@ -54,6 +34,7 @@ class MemAccessSize<bits<4> value> {
bits<4> Value = value;
}
+// MemAccessSize is represented as 1+log2(N) where N is size in bits.
def NoMemAccess : MemAccessSize<0>;// Not a memory access instruction.
def ByteAccess : MemAccessSize<1>;// Byte access instruction (memb).
def HalfWordAccess : MemAccessSize<2>;// Half word access instruction (memh).
@@ -70,10 +51,9 @@ def Vector128Access : MemAccessSize<8>;// Vector access instruction (memv)
class OpcodeHexagon {
field bits<32> Inst = ?; // Default to an invalid insn.
bits<4> IClass = 0; // ICLASS
+ bits<1> zero = 0;
let Inst{31-28} = IClass;
-
- bits<1> zero = 0;
}
class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
@@ -99,85 +79,88 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// Instruction type according to the ISA.
IType Type = type;
- let TSFlags{4-0} = Type.Value;
+ let TSFlags{5-0} = Type.Value;
// Solo instructions, i.e., those that cannot be in a packet with others.
bits<1> isSolo = 0;
- let TSFlags{5} = isSolo;
+ let TSFlags{6} = isSolo;
// Packed only with A or X-type instructions.
bits<1> isSoloAX = 0;
- let TSFlags{6} = isSoloAX;
+ let TSFlags{7} = isSoloAX;
// Only A-type instruction in first slot or nothing.
bits<1> isSoloAin1 = 0;
- let TSFlags{7} = isSoloAin1;
+ let TSFlags{8} = isSoloAin1;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{8} = isPredicated;
+ let TSFlags{9} = isPredicated;
bits<1> isPredicatedFalse = 0;
- let TSFlags{9} = isPredicatedFalse;
+ let TSFlags{10} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{10} = isPredicatedNew;
+ let TSFlags{11} = isPredicatedNew;
bits<1> isPredicateLate = 0;
- let TSFlags{11} = isPredicateLate; // Late predicate producer insn.
+ let TSFlags{12} = isPredicateLate; // Late predicate producer insn.
// New-value insn helper fields.
bits<1> isNewValue = 0;
- let TSFlags{12} = isNewValue; // New-value consumer insn.
+ let TSFlags{13} = isNewValue; // New-value consumer insn.
bits<1> hasNewValue = 0;
- let TSFlags{13} = hasNewValue; // New-value producer insn.
+ let TSFlags{14} = hasNewValue; // New-value producer insn.
bits<3> opNewValue = 0;
- let TSFlags{16-14} = opNewValue; // New-value produced operand.
+ let TSFlags{17-15} = opNewValue; // New-value produced operand.
bits<1> isNVStorable = 0;
- let TSFlags{17} = isNVStorable; // Store that can become new-value store.
+ let TSFlags{18} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{18} = isNVStore; // New-value store insn.
+ let TSFlags{19} = isNVStore; // New-value store insn.
bits<1> isCVLoadable = 0;
- let TSFlags{19} = isCVLoadable; // Load that can become cur-value load.
+ let TSFlags{20} = isCVLoadable; // Load that can become cur-value load.
bits<1> isCVLoad = 0;
- let TSFlags{20} = isCVLoad; // Cur-value load insn.
+ let TSFlags{21} = isCVLoad; // Cur-value load insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{21} = isExtendable; // Insn may be extended.
+ let TSFlags{22} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{22} = isExtended; // Insn must be extended.
+ let TSFlags{23} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{25-23} = opExtendable; // Which operand may be extended.
+ let TSFlags{26-24} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{26} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{27} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{31-27} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{32-28} = opExtentBits; //Number of bits of range before extending.
bits<2> opExtentAlign = 0;
- let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending.
+ let TSFlags{34-33} = opExtentAlign; // Alignment exponent before extending.
// If an instruction is valid on a subtarget, set the corresponding
// bit from validSubTargets.
// By default, instruction is valid on all subtargets.
SubTarget validSubTargets = HasAnySubT;
- let TSFlags{39-34} = validSubTargets.Value;
+ let TSFlags{40-35} = validSubTargets.Value;
// Addressing mode for load/store instructions.
AddrModeType addrMode = NoAddrMode;
- let TSFlags{42-40} = addrMode.Value;
+ let TSFlags{43-41} = addrMode.Value;
// Memory access size for mem access instructions (load/store)
MemAccessSize accessSize = NoMemAccess;
- let TSFlags{46-43} = accessSize.Value;
+ let TSFlags{47-44} = accessSize.Value;
bits<1> isTaken = 0;
- let TSFlags {47} = isTaken; // Branch prediction.
+ let TSFlags {48} = isTaken; // Branch prediction.
bits<1> isFP = 0;
- let TSFlags {48} = isFP; // Floating-point.
+ let TSFlags {49} = isFP; // Floating-point.
bits<1> hasNewValue2 = 0;
- let TSFlags{50} = hasNewValue2; // Second New-value producer insn.
+ let TSFlags{51} = hasNewValue2; // Second New-value producer insn.
bits<3> opNewValue2 = 0;
- let TSFlags{53-51} = opNewValue2; // Second New-value produced operand.
+ let TSFlags{54-52} = opNewValue2; // Second New-value produced operand.
bits<1> isAccumulator = 0;
- let TSFlags{54} = isAccumulator;
+ let TSFlags{55} = isAccumulator;
+
+ bits<1> prefersSlot3 = 0;
+ let TSFlags{56} = prefersSlot3; // Complex XU
bit cofMax1 = 0;
let TSFlags{60} = cofMax1;
@@ -200,9 +183,13 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
let NValueST = !if(isNVStore, "true", "false");
let isNT = !if(isNonTemporal, "true", "false");
+ let hasSideEffects = 0;
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
}
+class HInst<dag outs, dag ins, string asmstr, InstrItinClass itin, IType type> :
+ InstHexagon<outs, ins, asmstr, [], "", itin, type>;
+
//===----------------------------------------------------------------------===//
// Instruction Classes Definitions +
//===----------------------------------------------------------------------===//
@@ -214,14 +201,13 @@ class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
-let mayLoad = 1 in
-class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : LDInst<outs, ins, asmstr, pattern, cstr>;
+class PseudoLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
class CONSTLDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : LDInst<outs, ins, asmstr, pattern, cstr>;
+ : PseudoLDInst<outs, ins, asmstr, pattern, cstr>;
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
@@ -247,6 +233,11 @@ class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
+let mayStore = 1 in
+class STInst_NoOpcode<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+
class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
: STInst<outs, ins, asmstr, pattern, cstr>;
@@ -269,28 +260,24 @@ class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
: STInst<outs, ins, asmstr, pattern, cstr, itin>;
-// SYSTEM Instruction Class in V4 can take SLOT0 only
-// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
-class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>,
- OpcodeHexagon;
-
-// ALU32 Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
-class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>, OpcodeHexagon;
-
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU64>,
OpcodeHexagon;
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Inst_NoOpcode<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU64>;
+
+
class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
: ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
@@ -302,13 +289,13 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeM>,
OpcodeHexagon;
// Same as above but doesn't derive from OpcodeHexagon
class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeM>;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -324,12 +311,16 @@ class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeS_2op>,
OpcodeHexagon;
+class SInst_NoOpcode<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeS_2op>;
+
class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeS_2op>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
@@ -337,7 +328,9 @@ class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23>
- : SInst<outs, ins, asmstr, pattern, cstr, itin>;
+ : SInst<outs, ins, asmstr, pattern, cstr, itin> {
+ let Type = TypeS_3op;
+}
// J Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
@@ -349,12 +342,6 @@ class JInst_CJUMP_UCJUMP<dag outs, dag ins, string asmstr, list<dag> pattern = [
string cstr = "", InstrItinClass itin = J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>, OpcodeHexagon;
-// JR Instruction Class in V2/V3/V4.
-// Definition of the instruction class NOT CHANGED.
-class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>, OpcodeHexagon;
-
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -383,26 +370,6 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Instruction Classes Definitions -
//===----------------------------------------------------------------------===//
-
-//
-// ALU32 patterns
-//.
-class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
-class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
-class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
-class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
- : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-
//
// ALU64 patterns.
//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 493d04703da9..1fdf930c62fd 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -11,18 +11,6 @@
//
//===----------------------------------------------------------------------===//
-//----------------------------------------------------------------------------//
-// Hexagon Instruction Flags
-//
-// *** Must match BaseInfo.h ***
-//----------------------------------------------------------------------------//
-
-def TypeV4LDST : IType<9>;
-def TypeNV : IType<10>;
-def TypeDUPLEX : IType<11>;
-def TypeCOMPOUND : IType<12>;
-def TypePREFIX : IType<30>;
-
// Duplex Instruction Class Declaration
//===----------------------------------------------------------------------===//
@@ -61,7 +49,7 @@ class InstDuplex<bits<4> iClass, list<dag> pattern = [],
// *** Must match MCTargetDesc/HexagonBaseInfo.h ***
- let TSFlags{4-0} = Type.Value;
+ let TSFlags{5-0} = Type.Value;
// Predicated instructions.
bits<1> isPredicated = 0;
@@ -107,7 +95,7 @@ class InstDuplex<bits<4> iClass, list<dag> pattern = [],
//
class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>, OpcodeHexagon;
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNCJ>, OpcodeHexagon;
class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
@@ -141,7 +129,7 @@ class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
: InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
- TypePREFIX>, OpcodeHexagon;
+ TypeEXTENDER>, OpcodeHexagon;
class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
@@ -150,11 +138,11 @@ class SUBInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
class CJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND_CJ_ARCHDEPSLOT, TypeCJ>,
OpcodeHexagon;
class CJInst_JMPSET<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCOMPOUND>,
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, COMPOUND, TypeCJ>,
OpcodeHexagon;
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index b9f4373a0b79..c8a7faea5ed5 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -12,28 +12,6 @@
//===----------------------------------------------------------------------===//
//----------------------------------------------------------------------------//
-// Hexagon Instruction Flags +
-//
-// *** Must match BaseInfo.h ***
-//----------------------------------------------------------------------------//
-
-def TypeCVI_VA : IType<13>;
-def TypeCVI_VA_DV : IType<14>;
-def TypeCVI_VX : IType<15>;
-def TypeCVI_VX_DV : IType<16>;
-def TypeCVI_VP : IType<17>;
-def TypeCVI_VP_VS : IType<18>;
-def TypeCVI_VS : IType<19>;
-def TypeCVI_VINLANESAT : IType<20>;
-def TypeCVI_VM_LD : IType<21>;
-def TypeCVI_VM_TMP_LD : IType<22>;
-def TypeCVI_VM_CUR_LD : IType<23>;
-def TypeCVI_VM_VP_LDU : IType<24>;
-def TypeCVI_VM_ST : IType<25>;
-def TypeCVI_VM_NEW_ST : IType<26>;
-def TypeCVI_VM_STU : IType<27>;
-def TypeCVI_HIST : IType<28>;
-//----------------------------------------------------------------------------//
// Instruction Classes Definitions +
//----------------------------------------------------------------------------//
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 0a7dc6b49d00..b265a883da5c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -152,10 +152,11 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
/// On Hexagon, we have two instructions used to set-up the hardware loop
/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
/// to indicate the end of a loop.
-static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+static MachineInstr *findLoopInstr(MachineBasicBlock *BB, unsigned EndLoopOp,
+ MachineBasicBlock *TargetBB,
SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
- int LOOPi;
- int LOOPr;
+ unsigned LOOPi;
+ unsigned LOOPr;
if (EndLoopOp == Hexagon::ENDLOOP0) {
LOOPi = Hexagon::J2_loop0i;
LOOPr = Hexagon::J2_loop0r;
@@ -165,26 +166,24 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
}
// The loop set-up instruction will be in a predecessor block
- for (MachineBasicBlock::pred_iterator PB = BB->pred_begin(),
- PE = BB->pred_end(); PB != PE; ++PB) {
+ for (MachineBasicBlock *PB : BB->predecessors()) {
// If this has been visited, already skip it.
- if (!Visited.insert(*PB).second)
+ if (!Visited.insert(PB).second)
continue;
- if (*PB == BB)
+ if (PB == BB)
continue;
- for (MachineBasicBlock::reverse_instr_iterator I = (*PB)->instr_rbegin(),
- E = (*PB)->instr_rend(); I != E; ++I) {
- int Opc = I->getOpcode();
+ for (auto I = PB->instr_rbegin(), E = PB->instr_rend(); I != E; ++I) {
+ unsigned Opc = I->getOpcode();
if (Opc == LOOPi || Opc == LOOPr)
return &*I;
- // We've reached a different loop, which means the loop0 has been removed.
- if (Opc == EndLoopOp)
+ // We've reached a different loop, which means the loop01 has been
+ // removed.
+ if (Opc == EndLoopOp && I->getOperand(0).getMBB() != TargetBB)
return nullptr;
}
// Check the predecessors for the LOOP instruction.
- MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
- if (loop)
- return loop;
+ if (MachineInstr *Loop = findLoopInstr(PB, EndLoopOp, TargetBB, Visited))
+ return Loop;
}
return nullptr;
}
@@ -597,7 +596,8 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, Cond[1].getMBB(),
+ VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
@@ -637,7 +637,8 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Since we're adding an ENDLOOP, there better be a LOOP instruction.
// Check for it, and change the BB target if needed.
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+ MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, Cond[1].getMBB(),
+ VisitedBBs);
assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
Loop->getOperand(0).setMBB(TBB);
// Add the ENDLOOP after the finding the LOOP0.
@@ -687,7 +688,8 @@ unsigned HexagonInstrInfo::reduceLoopCount(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
DebugLoc DL = Cmp.getDebugLoc();
SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
- MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(), VisitedBBs);
+ MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(),
+ Cmp.getOperand(0).getMBB(), VisitedBBs);
if (!Loop)
return 0;
// If the loop trip count is a compile-time value, then just change the
@@ -1074,13 +1076,13 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
MachineInstr *MI1New =
BuildMI(MBB, MI, DL, get(NewOpc))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
.addImm(MI.getOperand(1).getImm())
.addReg(SrcSubLo)
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
MI1New->getOperand(0).setIsKill(false);
BuildMI(MBB, MI, DL, get(NewOpc))
- .addOperand(MI.getOperand(0))
+ .add(MI.getOperand(0))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(1).getImm() + Offset)
.addReg(SrcSubHi)
@@ -1106,15 +1108,13 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
- MachineInstr *MI1New =
- BuildMI(MBB, MI, DL, get(NewOpc),
- HRI.getSubReg(DstReg, Hexagon::vsub_lo))
- .addOperand(MI.getOperand(1))
- .addImm(MI.getOperand(2).getImm());
+ MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpc),
+ HRI.getSubReg(DstReg, Hexagon::vsub_lo))
+ .add(MI.getOperand(1))
+ .addImm(MI.getOperand(2).getImm());
MI1New->getOperand(1).setIsKill(false);
- BuildMI(MBB, MI, DL, get(NewOpc),
- HRI.getSubReg(DstReg, Hexagon::vsub_hi))
- .addOperand(MI.getOperand(1))
+ BuildMI(MBB, MI, DL, get(NewOpc), HRI.getSubReg(DstReg, Hexagon::vsub_hi))
+ .add(MI.getOperand(1))
// The Vectors are indexed in multiples of vector size.
.addImm(MI.getOperand(2).getImm() + Offset)
.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -1227,18 +1227,18 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
if (Op0.getReg() != Op2.getReg()) {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov))
- .addOperand(Op0)
- .addOperand(Op1)
- .addOperand(Op2);
+ .add(Op0)
+ .add(Op1)
+ .add(Op2);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
IsDestLive = true;
}
if (Op0.getReg() != Op3.getReg()) {
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov))
- .addOperand(Op0)
- .addOperand(Op1)
- .addOperand(Op3);
+ .add(Op0)
+ .add(Op1)
+ .add(Op3);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
}
@@ -1259,10 +1259,10 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo);
unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
- .addOperand(Op0)
- .addOperand(Op1)
- .addReg(SrcHi)
- .addReg(SrcLo);
+ .add(Op0)
+ .add(Op1)
+ .addReg(SrcHi)
+ .addReg(SrcLo);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
IsDestLive = true;
@@ -1271,10 +1271,10 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
unsigned SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo);
unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi);
auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine))
- .addOperand(Op0)
- .addOperand(Op1)
- .addReg(SrcHi)
- .addReg(SrcLo);
+ .add(Op0)
+ .add(Op1)
+ .addReg(SrcHi)
+ .addReg(SrcLo);
if (IsDestLive)
T.addReg(Op0.getReg(), RegState::Implicit);
}
@@ -1376,7 +1376,7 @@ bool HexagonInstrInfo::PredicateInstruction(
MachineOperand &Op = MI.getOperand(NOp);
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
break;
- T.addOperand(Op);
+ T.add(Op);
NOp++;
}
@@ -1386,7 +1386,7 @@ bool HexagonInstrInfo::PredicateInstruction(
assert(GotPredReg);
T.addReg(PredReg, PredRegFlags);
while (NOp < NumOps)
- T.addOperand(MI.getOperand(NOp++));
+ T.add(MI.getOperand(NOp++));
MI.setDesc(get(PredOpc));
while (unsigned n = MI.getNumOperands())
@@ -1413,18 +1413,28 @@ bool HexagonInstrInfo::DefinesPredicate(
auto &HRI = getRegisterInfo();
for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) {
MachineOperand MO = MI.getOperand(oper);
- if (MO.isReg() && MO.isDef()) {
+ if (MO.isReg()) {
+ if (!MO.isDef())
+ continue;
const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg());
if (RC == &Hexagon::PredRegsRegClass) {
Pred.push_back(MO);
return true;
}
+ continue;
+ } else if (MO.isRegMask()) {
+ for (unsigned PR : Hexagon::PredRegsRegClass) {
+ if (!MI.modifiesRegister(PR, &HRI))
+ continue;
+ Pred.push_back(MO);
+ return true;
+ }
}
}
return false;
}
-bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const {
+bool HexagonInstrInfo::isPredicable(const MachineInstr &MI) const {
return MI.getDesc().isPredicable();
}
@@ -1715,7 +1725,7 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
// Return true if the instruction is a compund branch instruction.
bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const {
- return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch());
+ return getType(MI) == HexagonII::TypeCJ && MI.isBranch();
}
bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
@@ -3009,10 +3019,12 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
unsigned PredReg) const {
- for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) {
- const MachineOperand &MO = MI.getOperand(opNum);
+ for (const MachineOperand &MO : MI.operands()) {
+ // Predicate register must be explicitly defined.
+ if (MO.isRegMask() && MO.clobbersPhysReg(PredReg))
+ return false;
if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
- return false; // Predicate register must be explicitly defined.
+ return false;
}
// Hexagon Programmer's Reference says that decbin, memw_locked, and
@@ -3415,7 +3427,9 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
return NVOpcode;
switch (MI.getOpcode()) {
- default: llvm_unreachable("Unknown .new type");
+ default:
+ llvm::report_fatal_error(std::string("Unknown .new type: ") +
+ std::to_string(MI.getOpcode()).c_str());
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
@@ -3456,20 +3470,75 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const {
// We assume that block can have at most two successors.
- bool taken = false;
const MachineBasicBlock *Src = MI.getParent();
const MachineOperand &BrTarget = MI.getOperand(1);
- const MachineBasicBlock *Dst = BrTarget.getMBB();
+ bool Taken = false;
+ const BranchProbability OneHalf(1, 2);
- const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
- if (Prediction >= BranchProbability(1,2))
- taken = true;
+ if (BrTarget.isMBB()) {
+ const MachineBasicBlock *Dst = BrTarget.getMBB();
+ Taken = MBPI->getEdgeProbability(Src, Dst) >= OneHalf;
+ } else {
+ // The branch target is not a basic block (most likely a function).
+ // Since BPI only gives probabilities for targets that are basic blocks,
+ // try to identify another target of this branch (potentially a fall-
+ // -through) and check the probability of that target.
+ //
+ // The only handled branch combinations are:
+ // - one conditional branch,
+ // - one conditional branch followed by one unconditional branch.
+ // Otherwise, assume not-taken.
+ assert(MI.isConditionalBranch());
+ const MachineBasicBlock &B = *MI.getParent();
+ bool SawCond = false, Bad = false;
+ for (const MachineInstr &I : B) {
+ if (!I.isBranch())
+ continue;
+ if (I.isConditionalBranch()) {
+ SawCond = true;
+ if (&I != &MI) {
+ Bad = true;
+ break;
+ }
+ }
+ if (I.isUnconditionalBranch() && !SawCond) {
+ Bad = true;
+ break;
+ }
+ }
+ if (!Bad) {
+ MachineBasicBlock::const_instr_iterator It(MI);
+ MachineBasicBlock::const_instr_iterator NextIt = std::next(It);
+ if (NextIt == B.instr_end()) {
+ // If this branch is the last, look for the fall-through block.
+ for (const MachineBasicBlock *SB : B.successors()) {
+ if (!B.isLayoutSuccessor(SB))
+ continue;
+ Taken = MBPI->getEdgeProbability(Src, SB) < OneHalf;
+ break;
+ }
+ } else {
+ assert(NextIt->isUnconditionalBranch());
+ // Find the first MBB operand and assume it's the target.
+ const MachineBasicBlock *BT = nullptr;
+ for (const MachineOperand &Op : NextIt->operands()) {
+ if (!Op.isMBB())
+ continue;
+ BT = Op.getMBB();
+ break;
+ }
+ Taken = BT && MBPI->getEdgeProbability(Src, BT) < OneHalf;
+ }
+ } // if (!Bad)
+ }
+
+ // The Taken flag should be set to something reasonable by this point.
switch (MI.getOpcode()) {
case Hexagon::J2_jumpt:
- return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
+ return Taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
case Hexagon::J2_jumpf:
- return taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
+ return Taken ? Hexagon::J2_jumpfnewpt : Hexagon::J2_jumpfnew;
default:
llvm_unreachable("Unexpected jump instruction.");
@@ -3479,26 +3548,46 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
// Return .new predicate version for an instruction.
int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const {
- int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
- if (NewOpcode >= 0) // Valid predicate new instruction
- return NewOpcode;
-
switch (MI.getOpcode()) {
// Condtional Jumps
case Hexagon::J2_jumpt:
case Hexagon::J2_jumpf:
return getDotNewPredJumpOp(MI, MBPI);
-
- default:
- assert(0 && "Unknown .new type");
}
- return 0;
+
+ int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
+ if (NewOpcode >= 0)
+ return NewOpcode;
+
+ dbgs() << "Cannot convert to .new: " << getName(MI.getOpcode()) << '\n';
+ llvm_unreachable(nullptr);
}
-int HexagonInstrInfo::getDotOldOp(const int opc) const {
- int NewOp = opc;
+int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
+ int NewOp = MI.getOpcode();
if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
NewOp = Hexagon::getPredOldOpcode(NewOp);
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
+ // All Hexagon architectures have prediction bits on dot-new branches,
+ // but only Hexagon V60+ has prediction bits on dot-old ones. Make sure
+ // to pick the right opcode when converting back to dot-old.
+ if (!HST.getFeatureBits()[Hexagon::ArchV60]) {
+ switch (NewOp) {
+ case Hexagon::J2_jumptpt:
+ NewOp = Hexagon::J2_jumpt;
+ break;
+ case Hexagon::J2_jumpfpt:
+ NewOp = Hexagon::J2_jumpf;
+ break;
+ case Hexagon::J2_jumprtpt:
+ NewOp = Hexagon::J2_jumprt;
+ break;
+ case Hexagon::J2_jumprfpt:
+ NewOp = Hexagon::J2_jumprf;
+ break;
+ }
+ }
assert(NewOp >= 0 &&
"Couldn't change predicate new instruction to its old form.");
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2358d4b7e4c0..b268c7a28171 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -235,7 +235,7 @@ public:
/// Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
/// Test if the given instruction should be considered a scheduling boundary.
/// This primarily includes labels and terminators.
@@ -404,7 +404,7 @@ public:
const MachineBranchProbabilityInfo *MBPI) const;
int getDotNewPredOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const;
- int getDotOldOp(const int opc) const;
+ int getDotOldOp(const MachineInstr &MI) const;
HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr &MI)
const;
short getEquivalentHWInstr(const MachineInstr &MI) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
deleted file mode 100644
index c5719ad5b6d8..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ /dev/null
@@ -1,4799 +0,0 @@
-//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrFormats.td"
-include "HexagonOperands.td"
-include "HexagonInstrEnc.td"
-
-//===----------------------------------------------------------------------===//
-// Compare
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isCompare = 1, InputType = "imm", isExtendable = 1,
- opExtendable = 2 in
-class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp>
- : ALU32Inst <(outs PredRegs:$dst),
- (ins IntRegs:$src1, ImmOp:$src2),
- "$dst = "#!if(isNot, "!","")#mnemonic#"($src1, #$src2)",
- [], "",ALU32_2op_tc_2early_SLOT0123 >, ImmRegRel {
- bits<2> dst;
- bits<5> src1;
- bits<10> src2;
- let CextOpcode = mnemonic;
- let opExtentBits = !if(!eq(mnemonic, "cmp.gtu"), 9, 10);
- let isExtentSigned = !if(!eq(mnemonic, "cmp.gtu"), 0, 1);
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0101;
- let Inst{23-22} = MajOp;
- let Inst{21} = !if(!eq(mnemonic, "cmp.gtu"), 0, src2{9});
- let Inst{20-16} = src1;
- let Inst{13-5} = src2{8-0};
- let Inst{4} = isNot;
- let Inst{3-2} = 0b00;
- let Inst{1-0} = dst;
- }
-
-def C2_cmpeqi : T_CMP <"cmp.eq", 0b00, 0, s10_0Ext>;
-def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10_0Ext>;
-def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9_0Ext>;
-
-//===----------------------------------------------------------------------===//
-// ALU32/ALU +
-//===----------------------------------------------------------------------===//
-// Add.
-
-let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
-class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
- bit IsComm>
- : ALU32_rr<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#mnemonic#"($Rs, $Rt)",
- [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredRel {
- let isCommutable = IsComm;
- let BaseOpcode = mnemonic#_rr;
- let CextOpcode = mnemonic;
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
-
- let IClass = 0b1111;
- let Inst{27} = 0b0;
- let Inst{26-24} = MajOp;
- let Inst{23-21} = MinOp;
- let Inst{20-16} = !if(OpsRev,Rt,Rs);
- let Inst{12-8} = !if(OpsRev,Rs,Rt);
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_ALU32_3op_pred<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev, bit PredNot, bit PredNew>
- : ALU32_rr<(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
- "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") "#
- "$Rd = "#mnemonic#"($Rs, $Rt)",
- [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
- let isPredicated = 1;
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = PredNew;
- let BaseOpcode = mnemonic#_rr;
- let CextOpcode = mnemonic;
-
- bits<2> Pu;
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
-
- let IClass = 0b1111;
- let Inst{27} = 0b1;
- let Inst{26-24} = MajOp;
- let Inst{23-21} = MinOp;
- let Inst{20-16} = !if(OpsRev,Rt,Rs);
- let Inst{13} = PredNew;
- let Inst{12-8} = !if(OpsRev,Rs,Rt);
- let Inst{7} = PredNot;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rd;
-}
-
-class T_ALU32_combineh<string Op1, string Op2, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev>
- : T_ALU32_3op<"", MajOp, MinOp, OpsRev, 0> {
- let AsmString = "$Rd = combine($Rs"#Op1#", $Rt"#Op2#")";
-}
-
-def A2_combine_hh : T_ALU32_combineh<".h", ".h", 0b011, 0b100, 1>;
-def A2_combine_hl : T_ALU32_combineh<".h", ".l", 0b011, 0b101, 1>;
-def A2_combine_lh : T_ALU32_combineh<".l", ".h", 0b011, 0b110, 1>;
-def A2_combine_ll : T_ALU32_combineh<".l", ".l", 0b011, 0b111, 1>;
-
-class T_ALU32_3op_sfx<string mnemonic, string suffix, bits<3> MajOp,
- bits<3> MinOp, bit OpsRev, bit IsComm>
- : T_ALU32_3op<"", MajOp, MinOp, OpsRev, IsComm> {
- let AsmString = "$Rd = "#mnemonic#"($Rs, $Rt)"#suffix;
-}
-
-def A2_svaddh : T_ALU32_3op<"vaddh", 0b110, 0b000, 0, 1>;
-def A2_svsubh : T_ALU32_3op<"vsubh", 0b110, 0b100, 1, 0>;
-
-let Defs = [USR_OVF], Itinerary = ALU32_3op_tc_2_SLOT0123 in {
- def A2_svaddhs : T_ALU32_3op_sfx<"vaddh", ":sat", 0b110, 0b001, 0, 1>;
- def A2_addsat : T_ALU32_3op_sfx<"add", ":sat", 0b110, 0b010, 0, 1>;
- def A2_svadduhs : T_ALU32_3op_sfx<"vadduh", ":sat", 0b110, 0b011, 0, 1>;
- def A2_svsubhs : T_ALU32_3op_sfx<"vsubh", ":sat", 0b110, 0b101, 1, 0>;
- def A2_subsat : T_ALU32_3op_sfx<"sub", ":sat", 0b110, 0b110, 1, 0>;
- def A2_svsubuhs : T_ALU32_3op_sfx<"vsubuh", ":sat", 0b110, 0b111, 1, 0>;
-}
-
-let Itinerary = ALU32_3op_tc_2_SLOT0123 in
-def A2_svavghs : T_ALU32_3op_sfx<"vavgh", ":rnd", 0b111, 0b001, 0, 1>;
-
-def A2_svavgh : T_ALU32_3op<"vavgh", 0b111, 0b000, 0, 1>;
-def A2_svnavgh : T_ALU32_3op<"vnavgh", 0b111, 0b011, 1, 0>;
-
-multiclass T_ALU32_3op_p<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev> {
- def t : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 0>;
- def f : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 0>;
- def tnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 0, 1>;
- def fnew : T_ALU32_3op_pred<mnemonic, MajOp, MinOp, OpsRev, 1, 1>;
-}
-
-multiclass T_ALU32_3op_A2<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev, bit IsComm> {
- let isPredicable = 1 in
- def A2_#NAME : T_ALU32_3op <mnemonic, MajOp, MinOp, OpsRev, IsComm>;
- defm A2_p#NAME : T_ALU32_3op_p<mnemonic, MajOp, MinOp, OpsRev>;
-}
-
-defm add : T_ALU32_3op_A2<"add", 0b011, 0b000, 0, 1>;
-defm and : T_ALU32_3op_A2<"and", 0b001, 0b000, 0, 1>;
-defm or : T_ALU32_3op_A2<"or", 0b001, 0b001, 0, 1>;
-defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
-defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
-
-// A few special cases producing register pairs:
-let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
- def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>;
-
- let isPredicable = 1 in
- def A2_combinew : T_ALU32_3op <"combine", 0b101, 0b000, 0, 0>;
-
- // Conditional combinew uses "newt/f" instead of "t/fnew".
- def C2_ccombinewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 0>;
- def C2_ccombinewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 0>;
- def C2_ccombinewnewt : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 0, 1>;
- def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
-}
-
-let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
-class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
- : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#mnemonic#"($Rs, $Rt)",
- [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
- let CextOpcode = mnemonic;
- let isCommutable = IsComm;
- bits<5> Rs;
- bits<5> Rt;
- bits<2> Pd;
-
- let IClass = 0b1111;
- let Inst{27-24} = 0b0010;
- let Inst{22-21} = MinOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4} = IsNeg;
- let Inst{3-2} = 0b00;
- let Inst{1-0} = Pd;
-}
-
-let Itinerary = ALU32_3op_tc_2early_SLOT0123 in {
- def C2_cmpeq : T_ALU32_3op_cmp< "cmp.eq", 0b00, 0, 1>;
- def C2_cmpgt : T_ALU32_3op_cmp< "cmp.gt", 0b10, 0, 0>;
- def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
-}
-
-let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
-def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
- (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = mux($Pu, $Rs, $Rt)", [], "", ALU32_3op_tc_1_SLOT0123>, ImmRegRel {
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
- bits<5> Rt;
-
- let CextOpcode = "mux";
- let InputType = "reg";
- let hasSideEffects = 0;
- let IClass = 0b1111;
-
- let Inst{27-24} = 0b0100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rd;
-}
-
-// Combines the two immediates into a double register.
-// Increase complexity to make it greater than any complexity of a combine
-// that involves a register.
-
-let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
- isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
- AddedComplexity = 75 in
-def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8_0Ext:$s8, s8_0Imm:$S8),
- "$Rdd = combine(#$s8, #$S8)",
- []> {
- bits<5> Rdd;
- bits<8> s8;
- bits<8> S8;
-
- let IClass = 0b0111;
- let Inst{27-23} = 0b11000;
- let Inst{22-16} = S8{7-1};
- let Inst{13} = S8{0};
- let Inst{12-5} = s8;
- let Inst{4-0} = Rdd;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated ADD of a reg and an Immediate value.
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_Addri_Pred <bit PredNot, bit PredNew>
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8),
- !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ",
- ") $Rd = ")#"add($Rs, #$s8)"> {
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
- bits<8> s8;
-
- let isPredicatedNew = PredNew;
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0100;
- let Inst{23} = PredNot;
- let Inst{22-21} = Pu;
- let Inst{20-16} = Rs;
- let Inst{13} = PredNew;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
- }
-
-//===----------------------------------------------------------------------===//
-// A2_addi: Add a signed immediate to a register.
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_Addri <Operand immOp>
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, immOp:$s16),
- "$Rd = add($Rs, #$s16)", [], "", ALU32_ADDI_tc_1_SLOT0123> {
- bits<5> Rd;
- bits<5> Rs;
- bits<16> s16;
-
- let IClass = 0b1011;
-
- let Inst{27-21} = s16{15-9};
- let Inst{20-16} = Rs;
- let Inst{13-5} = s16{8-0};
- let Inst{4-0} = Rd;
- }
-
-//===----------------------------------------------------------------------===//
-// Multiclass for ADD of a register and an immediate value.
-//===----------------------------------------------------------------------===//
-multiclass Addri_Pred<string mnemonic, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- def NAME : T_Addri_Pred<PredNot, 0>;
- // Predicate new
- def NAME#new : T_Addri_Pred<PredNot, 1>;
- }
-}
-
-let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in
-multiclass Addri_base<string mnemonic, SDNode OpNode> {
- let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
- let opExtendable = 2, opExtentBits = 16, isPredicable = 1, isAdd = 1 in
- def A2_#NAME : T_Addri<s16_0Ext>;
-
- let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in {
- defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>;
- defm A2_p#NAME#f : Addri_Pred<mnemonic, 1>;
- }
- }
-}
-
-defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
-
-let hasNewValue = 1, hasSideEffects = 0, isPseudo = 1 in
-def A2_iconst
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins s23_2Imm:$s23_2),
- "$Rd = iconst(#$s23_2)"> {}
-
-//===----------------------------------------------------------------------===//
-// Template class used for the following ALU32 instructions.
-// Rd=and(Rs,#s10)
-// Rd=or(Rs,#s10)
-//===----------------------------------------------------------------------===//
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
-InputType = "imm", hasNewValue = 1 in
-class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : ALU32_ri <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, s10_0Ext:$s10),
- "$Rd = "#mnemonic#"($Rs, #$s10)" ,
- []> {
- bits<5> Rd;
- bits<5> Rs;
- bits<10> s10;
- let CextOpcode = mnemonic;
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0110;
- let Inst{23-22} = MinOp;
- let Inst{21} = s10{9};
- let Inst{20-16} = Rs;
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Rd;
- }
-
-def A2_orir : T_ALU32ri_logical<"or", or, 0b10>, ImmRegRel;
-def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
-
-// Subtract register from immediate
-// Rd32=sub(#s10,Rs32)
-let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1,
- opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in
-def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10_0Ext:$s10, IntRegs:$Rs),
- "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel {
- bits<5> Rd;
- bits<10> s10;
- bits<5> Rs;
-
- let IClass = 0b0111;
-
- let Inst{27-22} = 0b011001;
- let Inst{21} = s10{9};
- let Inst{20-16} = Rs;
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Rd;
- }
-
-// Nop.
-let hasSideEffects = 0 in
-def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
- let IClass = 0b0111;
- let Inst{27-24} = 0b1111;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_tfr16<bit isHi>
- : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16_0Imm:$u16),
- "$Rx"#!if(isHi, ".h", ".l")#" = #$u16",
- [], "$src1 = $Rx" > {
- bits<5> Rx;
- bits<16> u16;
-
- let IClass = 0b0111;
- let Inst{27-26} = 0b00;
- let Inst{25-24} = !if(isHi, 0b10, 0b01);
- let Inst{23-22} = u16{15-14};
- let Inst{21} = 0b1;
- let Inst{20-16} = Rx;
- let Inst{13-0} = u16{13-0};
- }
-
-def A2_tfril: T_tfr16<0>;
-def A2_tfrih: T_tfr16<1>;
-
-// Conditional transfer is an alias to conditional "Rd = add(Rs, #0)".
-let isPredicated = 1, hasNewValue = 1, opNewValue = 0 in
-class T_tfr_pred<bit isPredNot, bit isPredNew>
- : ALU32Inst<(outs IntRegs:$dst),
- (ins PredRegs:$src1, IntRegs:$src2),
- "if ("#!if(isPredNot, "!", "")#
- "$src1"#!if(isPredNew, ".new", "")#
- ") $dst = $src2"> {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
-
- let isPredicatedFalse = isPredNot;
- let isPredicatedNew = isPredNew;
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0100;
- let Inst{23} = isPredNot;
- let Inst{13} = isPredNew;
- let Inst{12-5} = 0;
- let Inst{4-0} = dst;
- let Inst{22-21} = src1;
- let Inst{20-16} = src2;
- }
-
-let isPredicable = 1 in
-class T_tfr : ALU32Inst<(outs IntRegs:$dst), (ins IntRegs:$src),
- "$dst = $src"> {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b0111;
-
- let Inst{27-21} = 0b0000011;
- let Inst{20-16} = src;
- let Inst{13} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let InputType = "reg", hasNewValue = 1, hasSideEffects = 0 in
-multiclass tfr_base<string CextOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp in {
- def NAME : T_tfr;
-
- // Predicate
- def t : T_tfr_pred<0, 0>;
- def f : T_tfr_pred<1, 0>;
- // Predicate new
- def tnew : T_tfr_pred<0, 1>;
- def fnew : T_tfr_pred<1, 1>;
- }
-}
-
-// Assembler mapped to C2_ccombinew[t|f|newt|newf].
-// Please don't add bits to this instruction as it'll be converted into
-// 'combine' before object code emission.
-let isPredicated = 1 in
-class T_tfrp_pred<bit PredNot, bit PredNew>
- : ALU32_rr <(outs DoubleRegs:$dst),
- (ins PredRegs:$src1, DoubleRegs:$src2),
- "if ("#!if(PredNot, "!", "")#"$src1"
- #!if(PredNew, ".new", "")#") $dst = $src2" > {
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = PredNew;
- }
-
-// Assembler mapped to A2_combinew.
-// Please don't add bits to this instruction as it'll be converted into
-// 'combine' before object code emission.
-class T_tfrp : ALU32Inst <(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src),
- "$dst = $src">;
-
-let hasSideEffects = 0 in
-multiclass TFR64_base<string BaseName> {
- let BaseOpcode = BaseName in {
- let isPredicable = 1 in
- def NAME : T_tfrp;
- // Predicate
- def t : T_tfrp_pred <0, 0>;
- def f : T_tfrp_pred <1, 0>;
- // Predicate new
- def tnew : T_tfrp_pred <0, 1>;
- def fnew : T_tfrp_pred <1, 1>;
- }
-}
-
-let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12,
- isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR",
- hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in
-class T_TFRI_Pred<bit PredNot, bit PredNew>
- : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12_0Ext:$s12),
- "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12",
- [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = PredNew;
-
- bits<5> Rd;
- bits<2> Pu;
- bits<12> s12;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b1110;
- let Inst{23} = PredNot;
- let Inst{22-21} = Pu;
- let Inst{20} = 0b0;
- let Inst{19-16,12-5} = s12;
- let Inst{13} = PredNew;
- let Inst{4-0} = Rd;
-}
-
-def C2_cmoveit : T_TFRI_Pred<0, 0>;
-def C2_cmoveif : T_TFRI_Pred<1, 0>;
-def C2_cmovenewit : T_TFRI_Pred<0, 1>;
-def C2_cmovenewif : T_TFRI_Pred<1, 1>;
-
-let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
- CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
- isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
- isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
-def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16_0Ext:$s16), "$Rd = #$s16",
- [], "", ALU32_2op_tc_1_SLOT0123>,
- ImmRegRel, PredRel {
- bits<5> Rd;
- bits<16> s16;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b1000;
- let Inst{23-22,20-16,13-5} = s16;
- let Inst{4-0} = Rd;
-}
-
-defm A2_tfr : tfr_base<"TFR">, ImmRegRel, PredNewRel;
-let isAsmParserOnly = 1 in
-defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
-
-// Assembler mapped
-let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
- isAsmParserOnly = 1 in
-def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8_0Imm64:$src1),
- "$dst = #$src1",
- []>;
-
-// TODO: see if this instruction can be deleted..
-let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
- isAsmParserOnly = 1 in {
-def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64_0Imm:$src1),
- "$dst = #$src1">;
-def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
- (ins s8_0Ext:$src1, s8_0Imm:$src2),
- "$dst = combine(##$src1, #$src2)">;
-}
-
-//===----------------------------------------------------------------------===//
-// ALU32/ALU -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM +
-//===----------------------------------------------------------------------===//
-// Scalar mux register immediate.
-let hasSideEffects = 0, isExtentSigned = 1, CextOpcode = "MUX",
- InputType = "imm", hasNewValue = 1, isExtendable = 1, opExtentBits = 8 in
-class T_MUX1 <bit MajOp, dag ins, string AsmStr>
- : ALU32Inst <(outs IntRegs:$Rd), ins, AsmStr>, ImmRegRel {
- bits<5> Rd;
- bits<2> Pu;
- bits<8> s8;
- bits<5> Rs;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b0011;
- let Inst{23} = MajOp;
- let Inst{22-21} = Pu;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
-}
-
-let opExtendable = 2 in
-def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8_0Ext:$s8, IntRegs:$Rs),
- "$Rd = mux($Pu, #$s8, $Rs)">;
-
-let opExtendable = 3 in
-def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8),
- "$Rd = mux($Pu, $Rs, #$s8)">;
-
-// C2_muxii: Scalar mux immediates.
-let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
- opExtentBits = 8, opExtendable = 2 in
-def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
- (ins PredRegs:$Pu, s8_0Ext:$s8, s8_0Imm:$S8),
- "$Rd = mux($Pu, #$s8, #$S8)" ,
- []> {
- bits<5> Rd;
- bits<2> Pu;
- bits<8> s8;
- bits<8> S8;
-
- let IClass = 0b0111;
-
- let Inst{27-25} = 0b101;
- let Inst{24-23} = Pu;
- let Inst{22-16} = S8{7-1};
- let Inst{13} = S8{0};
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
- }
-
-let isCodeGenOnly = 1, isPseudo = 1 in
-def PS_pselect : ALU64_rr<(outs DoubleRegs:$Rd),
- (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
- ".error \"should not emit\" ", []>;
-
-
-//===----------------------------------------------------------------------===//
-// template class for non-predicated alu32_2op instructions
-// - aslh, asrh, sxtb, sxth, zxth
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, opNewValue = 0 in
-class T_ALU32_2op <string mnemonic, bits<3> minOp> :
- ALU32Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = "#mnemonic#"($Rs)", [] > {
- bits<5> Rd;
- bits<5> Rs;
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0000;
- let Inst{23-21} = minOp;
- let Inst{13} = 0b0;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
-}
-
-//===----------------------------------------------------------------------===//
-// template class for predicated alu32_2op instructions
-// - aslh, asrh, sxtb, sxth, zxtb, zxth
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_ALU32_2op_Pred <string mnemonic, bits<3> minOp, bit isPredNot,
- bit isPredNew > :
- ALU32Inst <(outs IntRegs:$Rd), (ins PredRegs:$Pu, IntRegs:$Rs),
- !if(isPredNot, "if (!$Pu", "if ($Pu")
- #!if(isPredNew, ".new) ",") ")#"$Rd = "#mnemonic#"($Rs)"> {
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
-
- let IClass = 0b0111;
-
- let Inst{27-24} = 0b0000;
- let Inst{23-21} = minOp;
- let Inst{13} = 0b1;
- let Inst{11} = isPredNot;
- let Inst{10} = isPredNew;
- let Inst{4-0} = Rd;
- let Inst{9-8} = Pu;
- let Inst{20-16} = Rs;
-}
-
-multiclass ALU32_2op_Pred<string mnemonic, bits<3> minOp, bit PredNot> {
- let isPredicatedFalse = PredNot in {
- def NAME : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 0>;
-
- // Predicate new
- let isPredicatedNew = 1 in
- def NAME#new : T_ALU32_2op_Pred<mnemonic, minOp, PredNot, 1>;
- }
-}
-
-multiclass ALU32_2op_base<string mnemonic, bits<3> minOp> {
- let BaseOpcode = mnemonic in {
- let isPredicable = 1, hasSideEffects = 0 in
- def A2_#NAME : T_ALU32_2op<mnemonic, minOp>;
-
- let isPredicated = 1, hasSideEffects = 0 in {
- defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
- defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
- }
- }
-}
-
-defm aslh : ALU32_2op_base<"aslh", 0b000>, PredNewRel;
-defm asrh : ALU32_2op_base<"asrh", 0b001>, PredNewRel;
-defm sxtb : ALU32_2op_base<"sxtb", 0b101>, PredNewRel;
-defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
-defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
-
-// Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
-// Compiler would want to generate 'zxtb' instead of 'and' because 'zxtb' has
-// predicated forms while 'and' doesn't. Since integrated assembler can't
-// handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where
-// immediate operand is set to '255'.
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_ZXTB: ALU32Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = zxtb($Rs)", [] > { // Rd = and(Rs,255)
- bits<5> Rd;
- bits<5> Rs;
- bits<10> s10 = 255;
-
- let IClass = 0b0111;
-
- let Inst{27-22} = 0b011000;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
- let Inst{21} = s10{9};
- let Inst{13-5} = s10{8-0};
-}
-
-//Rd=zxtb(Rs): assembler mapped to "Rd=and(Rs,#255)
-multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
- let BaseOpcode = mnemonic in {
- let isPredicable = 1, hasSideEffects = 0 in
- def A2_#NAME : T_ZXTB;
-
- let isPredicated = 1, hasSideEffects = 0 in {
- defm A4_p#NAME#t : ALU32_2op_Pred<mnemonic, minOp, 0>;
- defm A4_p#NAME#f : ALU32_2op_Pred<mnemonic, minOp, 1>;
- }
- }
-}
-
-defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
-
-//===----------------------------------------------------------------------===//
-// Template class for vector add and avg
-//===----------------------------------------------------------------------===//
-
-class T_VectALU_64 <string opc, bits<3> majOp, bits<3> minOp,
- bit isSat, bit isRnd, bit isCrnd, bit SwapOps >
- : ALU64_rr < (outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = "#opc#"($Rss, $Rtt)"#!if(isRnd, ":rnd", "")
- #!if(isCrnd,":crnd","")
- #!if(isSat, ":sat", ""),
- [], "", ALU64_tc_2_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b0011;
- let Inst{23-21} = majOp;
- let Inst{20-16} = !if (SwapOps, Rtt, Rss);
- let Inst{12-8} = !if (SwapOps, Rss, Rtt);
- let Inst{7-5} = minOp;
- let Inst{4-0} = Rdd;
- }
-
-// ALU64 - Vector add
-// Rdd=vadd[u][bhw](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vaddub : T_VectALU_64 < "vaddub", 0b000, 0b000, 0, 0, 0, 0>;
- def A2_vaddh : T_VectALU_64 < "vaddh", 0b000, 0b010, 0, 0, 0, 0>;
- def A2_vaddw : T_VectALU_64 < "vaddw", 0b000, 0b101, 0, 0, 0, 0>;
-}
-
-// Rdd=vadd[u][bhw](Rss,Rtt):sat
-let Defs = [USR_OVF] in {
- def A2_vaddubs : T_VectALU_64 < "vaddub", 0b000, 0b001, 1, 0, 0, 0>;
- def A2_vaddhs : T_VectALU_64 < "vaddh", 0b000, 0b011, 1, 0, 0, 0>;
- def A2_vadduhs : T_VectALU_64 < "vadduh", 0b000, 0b100, 1, 0, 0, 0>;
- def A2_vaddws : T_VectALU_64 < "vaddw", 0b000, 0b110, 1, 0, 0, 0>;
-}
-
-// ALU64 - Vector average
-// Rdd=vavg[u][bhw](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vavgub : T_VectALU_64 < "vavgub", 0b010, 0b000, 0, 0, 0, 0>;
- def A2_vavgh : T_VectALU_64 < "vavgh", 0b010, 0b010, 0, 0, 0, 0>;
- def A2_vavguh : T_VectALU_64 < "vavguh", 0b010, 0b101, 0, 0, 0, 0>;
- def A2_vavgw : T_VectALU_64 < "vavgw", 0b011, 0b000, 0, 0, 0, 0>;
- def A2_vavguw : T_VectALU_64 < "vavguw", 0b011, 0b011, 0, 0, 0, 0>;
-}
-
-// Rdd=vavg[u][bhw](Rss,Rtt)[:rnd|:crnd]
-def A2_vavgubr : T_VectALU_64 < "vavgub", 0b010, 0b001, 0, 1, 0, 0>;
-def A2_vavghr : T_VectALU_64 < "vavgh", 0b010, 0b011, 0, 1, 0, 0>;
-def A2_vavghcr : T_VectALU_64 < "vavgh", 0b010, 0b100, 0, 0, 1, 0>;
-def A2_vavguhr : T_VectALU_64 < "vavguh", 0b010, 0b110, 0, 1, 0, 0>;
-
-def A2_vavgwr : T_VectALU_64 < "vavgw", 0b011, 0b001, 0, 1, 0, 0>;
-def A2_vavgwcr : T_VectALU_64 < "vavgw", 0b011, 0b010, 0, 0, 1, 0>;
-def A2_vavguwr : T_VectALU_64 < "vavguw", 0b011, 0b100, 0, 1, 0, 0>;
-
-// Rdd=vnavg[bh](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vnavgh : T_VectALU_64 < "vnavgh", 0b100, 0b000, 0, 0, 0, 1>;
- def A2_vnavgw : T_VectALU_64 < "vnavgw", 0b100, 0b011, 0, 0, 0, 1>;
-}
-
-// Rdd=vnavg[bh](Rss,Rtt)[:rnd|:crnd]:sat
-let Defs = [USR_OVF] in {
- def A2_vnavghr : T_VectALU_64 < "vnavgh", 0b100, 0b001, 1, 1, 0, 1>;
- def A2_vnavghcr : T_VectALU_64 < "vnavgh", 0b100, 0b010, 1, 0, 1, 1>;
- def A2_vnavgwr : T_VectALU_64 < "vnavgw", 0b100, 0b100, 1, 1, 0, 1>;
- def A2_vnavgwcr : T_VectALU_64 < "vnavgw", 0b100, 0b110, 1, 0, 1, 1>;
-}
-
-// Rdd=vsub[u][bh](Rss,Rtt)
-let Itinerary = ALU64_tc_1_SLOT23 in {
- def A2_vsubub : T_VectALU_64 < "vsubub", 0b001, 0b000, 0, 0, 0, 1>;
- def A2_vsubh : T_VectALU_64 < "vsubh", 0b001, 0b010, 0, 0, 0, 1>;
- def A2_vsubw : T_VectALU_64 < "vsubw", 0b001, 0b101, 0, 0, 0, 1>;
-}
-
-// Rdd=vsub[u][bh](Rss,Rtt):sat
-let Defs = [USR_OVF] in {
- def A2_vsububs : T_VectALU_64 < "vsubub", 0b001, 0b001, 1, 0, 0, 1>;
- def A2_vsubhs : T_VectALU_64 < "vsubh", 0b001, 0b011, 1, 0, 0, 1>;
- def A2_vsubuhs : T_VectALU_64 < "vsubuh", 0b001, 0b100, 1, 0, 0, 1>;
- def A2_vsubws : T_VectALU_64 < "vsubw", 0b001, 0b110, 1, 0, 0, 1>;
-}
-
-// Rdd=vmax[u][bhw](Rss,Rtt)
-def A2_vmaxb : T_VectALU_64 < "vmaxb", 0b110, 0b110, 0, 0, 0, 1>;
-def A2_vmaxub : T_VectALU_64 < "vmaxub", 0b110, 0b000, 0, 0, 0, 1>;
-def A2_vmaxh : T_VectALU_64 < "vmaxh", 0b110, 0b001, 0, 0, 0, 1>;
-def A2_vmaxuh : T_VectALU_64 < "vmaxuh", 0b110, 0b010, 0, 0, 0, 1>;
-def A2_vmaxw : T_VectALU_64 < "vmaxw", 0b110, 0b011, 0, 0, 0, 1>;
-def A2_vmaxuw : T_VectALU_64 < "vmaxuw", 0b101, 0b101, 0, 0, 0, 1>;
-
-// Rdd=vmin[u][bhw](Rss,Rtt)
-def A2_vminb : T_VectALU_64 < "vminb", 0b110, 0b111, 0, 0, 0, 1>;
-def A2_vminub : T_VectALU_64 < "vminub", 0b101, 0b000, 0, 0, 0, 1>;
-def A2_vminh : T_VectALU_64 < "vminh", 0b101, 0b001, 0, 0, 0, 1>;
-def A2_vminuh : T_VectALU_64 < "vminuh", 0b101, 0b010, 0, 0, 0, 1>;
-def A2_vminw : T_VectALU_64 < "vminw", 0b101, 0b011, 0, 0, 0, 1>;
-def A2_vminuw : T_VectALU_64 < "vminuw", 0b101, 0b100, 0, 0, 0, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template class for vector compare
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_vcmp <string Str, bits<4> minOp>
- : ALU64_rr <(outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Pd = "#Str#"($Rss, $Rtt)", [],
- "", ALU64_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00100;
- let Inst{13} = minOp{3};
- let Inst{7-5} = minOp{2-0};
- let Inst{1-0} = Pd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector compare bytes
-def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>;
-def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
-
-// Vector compare halfwords
-def A2_vcmpheq : T_vcmp <"vcmph.eq", 0b0011>;
-def A2_vcmphgt : T_vcmp <"vcmph.gt", 0b0100>;
-def A2_vcmphgtu : T_vcmp <"vcmph.gtu", 0b0101>;
-
-// Vector compare words
-def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0b0000>;
-def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>;
-def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PRED +
-//===----------------------------------------------------------------------===//
-// No bits needed. If cmp.ge is found the assembler parser will
-// transform it to cmp.gt subtracting 1 from the immediate.
-let isPseudo = 1 in {
-def C2_cmpgei: ALU32Inst <
- (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8_0Ext:$s8),
- "$Pd = cmp.ge($Rs, #$s8)">;
-def C2_cmpgeui: ALU32Inst <
- (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8_0Ext:$s8),
- "$Pd = cmp.geu($Rs, #$s8)">;
-}
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PRED -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU +
-//===----------------------------------------------------------------------===//
-// Add.
-//===----------------------------------------------------------------------===//
-// Template Class
-// Add/Subtract halfword
-// Rd=add(Rt.L,Rs.[HL])[:sat]
-// Rd=sub(Rt.L,Rs.[HL])[:sat]
-// Rd=add(Rt.[LH],Rs.[HL])[:sat][:<16]
-// Rd=sub(Rt.[LH],Rs.[HL])[:sat][:<16]
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_XTYPE_ADD_SUB <bits<2> LHbits, bit isSat, bit hasShift, bit isSub>
- : ALU64Inst <(outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
- "$Rd = "#!if(isSub,"sub","add")#"($Rt."
- #!if(hasShift, !if(LHbits{1},"h","l"),"l") #", $Rs."
- #!if(hasShift, !if(LHbits{0},"h)","l)"), !if(LHbits{1},"h)","l)"))
- #!if(isSat,":sat","")
- #!if(hasShift,":<<16",""), [], "", ALU64_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rt;
- bits<5> Rs;
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b01010;
- let Inst{22} = hasShift;
- let Inst{21} = isSub;
- let Inst{7} = isSat;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rd;
- let Inst{12-8} = Rt;
- let Inst{20-16} = Rs;
- }
-
-//Rd=sub(Rt.L,Rs.[LH])
-def A2_subh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 1>;
-def A2_subh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 1>;
-
-//Rd=add(Rt.L,Rs.[LH])
-def A2_addh_l16_ll : T_XTYPE_ADD_SUB <0b00, 0, 0, 0>;
-def A2_addh_l16_hl : T_XTYPE_ADD_SUB <0b10, 0, 0, 0>;
-
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
- //Rd=sub(Rt.L,Rs.[LH]):sat
- def A2_subh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 1>;
- def A2_subh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 1>;
-
- //Rd=add(Rt.L,Rs.[LH]):sat
- def A2_addh_l16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 0, 0>;
- def A2_addh_l16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 0, 0>;
-}
-
-//Rd=sub(Rt.[LH],Rs.[LH]):<<16
-def A2_subh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 1>;
-def A2_subh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 1>;
-def A2_subh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 1>;
-def A2_subh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 1>;
-
-//Rd=add(Rt.[LH],Rs.[LH]):<<16
-def A2_addh_h16_ll : T_XTYPE_ADD_SUB <0b00, 0, 1, 0>;
-def A2_addh_h16_lh : T_XTYPE_ADD_SUB <0b01, 0, 1, 0>;
-def A2_addh_h16_hl : T_XTYPE_ADD_SUB <0b10, 0, 1, 0>;
-def A2_addh_h16_hh : T_XTYPE_ADD_SUB <0b11, 0, 1, 0>;
-
-let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
- //Rd=sub(Rt.[LH],Rs.[LH]):sat:<<16
- def A2_subh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 1>;
- def A2_subh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 1>;
- def A2_subh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 1>;
- def A2_subh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 1>;
-
- //Rd=add(Rt.[LH],Rs.[LH]):sat:<<16
- def A2_addh_h16_sat_ll : T_XTYPE_ADD_SUB <0b00, 1, 1, 0>;
- def A2_addh_h16_sat_lh : T_XTYPE_ADD_SUB <0b01, 1, 1, 0>;
- def A2_addh_h16_sat_hl : T_XTYPE_ADD_SUB <0b10, 1, 1, 0>;
- def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
- (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0000;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let hasNewValue = 1, opNewValue = 0, hasSideEffects = 0 in
-class T_XTYPE_MIN_MAX < bit isMax, bit isUnsigned >
- : ALU64Inst < (outs IntRegs:$Rd), (ins IntRegs:$Rt, IntRegs:$Rs),
- "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
- #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rt;
- bits<5> Rs;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b01011;
- let Inst{22-21} = !if(isMax, 0b10, 0b01);
- let Inst{7} = isUnsigned;
- let Inst{4-0} = Rd;
- let Inst{12-8} = !if(isMax, Rs, Rt);
- let Inst{20-16} = !if(isMax, Rt, Rs);
- }
-
-def A2_min : T_XTYPE_MIN_MAX < 0, 0 >;
-def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
-def A2_max : T_XTYPE_MIN_MAX < 1, 0 >;
-def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
-
-class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
- : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> {
- let isCompare = 1;
- let isCommutable = IsComm;
- let hasSideEffects = 0;
-
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0010100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{1-0} = Pd;
-}
-
-def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0b000, 1>;
-def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>;
-def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
-
-def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
- (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
- let hasSideEffects = 0;
-
- bits<5> Rd;
- bits<2> Pu;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0001;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rd;
-}
-
-class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
- bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
- string Op2Pfx>
- : ALU64_rr<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
- "$Rd = " #mnemonic# "($Rs, " #Op2Pfx# "$Rt)" #suffix, [],
- "", ALU64_tc_1_SLOT23> {
- let hasSideEffects = 0;
- let isCommutable = IsComm;
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
-
- let IClass = 0b1101;
- let Inst{27-24} = RegType;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = !if (OpsRev,Rt,Rs);
- let Inst{12-8} = !if (OpsRev,Rs,Rt);
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
- bit OpsRev, bit IsComm>
- : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
- IsComm, "">;
-
-let isAdd = 1 in
-def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
-def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
-
-class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
- bit IsNeg>
- : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
- !if(IsNeg,"~","")>;
-
-def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
-def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>;
-def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/BIT +
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-// ALU64/BIT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/PERM +
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-// ALU64/PERM -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// CR +
-//===----------------------------------------------------------------------===//
-// Logical reductions on predicates.
-
-// Looping instructions.
-
-// Pipelined looping instructions.
-
-// Logical operations on predicates.
-let hasSideEffects = 0 in
-class T_LOGICAL_1OP<string MnOp, bits<2> OpBits>
- : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps),
- "$Pd = " # MnOp # "($Ps)", [], "", CR_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<2> Ps;
-
- let IClass = 0b0110;
- let Inst{27-23} = 0b10111;
- let Inst{22-21} = OpBits;
- let Inst{20} = 0b0;
- let Inst{17-16} = Ps;
- let Inst{13} = 0b0;
- let Inst{1-0} = Pd;
-}
-
-def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
-def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
-def C2_not : T_LOGICAL_1OP<"not", 0b10>;
-
-let hasSideEffects = 0 in
-class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
- : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt),
- "$Pd = " # MnOp # "($Ps, " # !if (IsNeg,"!","") # "$Pt)",
- [], "", CR_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<2> Ps;
- bits<2> Pt;
-
- let IClass = 0b0110;
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = OpBits;
- let Inst{20} = 0b0;
- let Inst{17-16} = !if(Rev,Pt,Ps); // Rs and Rt are reversed for some
- let Inst{13} = 0b0; // instructions.
- let Inst{9-8} = !if(Rev,Ps,Pt);
- let Inst{1-0} = Pd;
-}
-
-def C2_and : T_LOGICAL_2OP<"and", 0b000, 0, 1>;
-def C2_or : T_LOGICAL_2OP<"or", 0b001, 0, 1>;
-def C2_xor : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
-def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
-def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>;
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
- "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<2> Ps;
- bits<2> Pt;
-
- let IClass = 0b1000;
- let Inst{27-24} = 0b1001;
- let Inst{22-21} = 0b00;
- let Inst{17-16} = Ps;
- let Inst{9-8} = Pt;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0 in
-def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
- "$Rd = mask($Pt)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<2> Pt;
-
- let IClass = 0b1000;
- let Inst{27-24} = 0b0110;
- let Inst{9-8} = Pt;
- let Inst{4-0} = Rd;
-}
-
-// User control register transfer.
-//===----------------------------------------------------------------------===//
-// CR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// JR +
-//===----------------------------------------------------------------------===//
-
-class CondStr<string CReg, bit True, bit New> {
- string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
-}
-class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
- string S = Mnemonic # !if(Taken, ":t", ":nt");
-}
-
-let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
- isPredicable = 1,
- isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
- opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in
-class T_JMP<string ExtStr>
- : JInst_CJUMP_UCJUMP<(outs), (ins brtarget:$dst),
- "jump " # ExtStr # "$dst",
- [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> {
- bits<24> dst;
- let IClass = 0b0101;
-
- let Inst{27-25} = 0b100;
- let Inst{24-16} = dst{23-15};
- let Inst{13-1} = dst{14-2};
-}
-
-let isBranch = 1, Defs = [PC], hasSideEffects = 0, isPredicated = 1,
- isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
- opExtentBits = 17, opExtentAlign = 2, InputType = "imm" in
-class T_JMP_c<bit PredNot, bit isPredNew, bit isTak, string ExtStr>
- : JInst_CJUMP_UCJUMP<(outs), (ins PredRegs:$src, brtarget:$dst),
- CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
- JumpOpcStr<"jump", isPredNew, isTak>.S # " " #
- ExtStr # "$dst",
- [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT>, ImmRegRel {
- let isTaken = isTak;
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = isPredNew;
- bits<2> src;
- bits<17> dst;
-
- let IClass = 0b0101;
-
- let Inst{27-24} = 0b1100;
- let Inst{21} = PredNot;
- let Inst{12} = isTak;
- let Inst{11} = isPredNew;
- let Inst{9-8} = src;
- let Inst{23-22} = dst{16-15};
- let Inst{20-16} = dst{14-10};
- let Inst{13} = dst{9};
- let Inst{7-1} = dst{8-2};
- }
-
-multiclass JMP_Pred<bit PredNot, string ExtStr> {
- def NAME : T_JMP_c<PredNot, 0, 0, ExtStr>; // not taken
- // Predicate new
- def NAME#newpt : T_JMP_c<PredNot, 1, 1, ExtStr>; // taken
- def NAME#new : T_JMP_c<PredNot, 1, 0, ExtStr>; // not taken
-}
-
-multiclass JMP_base<string BaseOp, string ExtStr> {
- let BaseOpcode = BaseOp in {
- def NAME : T_JMP<ExtStr>;
- defm t : JMP_Pred<0, ExtStr>;
- defm f : JMP_Pred<1, ExtStr>;
- }
-}
-
-// Jumps to address stored in a register, JUMPR_MISC
-// if ([[!]P[.new]]) jumpr[:t/nt] Rs
-let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC],
- isPredicable = 1, hasSideEffects = 0, InputType = "reg" in
-class T_JMPr
- : JRInst<(outs), (ins IntRegs:$dst),
- "jumpr $dst", [], "", J_tc_2early_SLOT2> {
- bits<5> dst;
-
- let IClass = 0b0101;
- let Inst{27-21} = 0b0010100;
- let Inst{20-16} = dst;
-}
-
-let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
- hasSideEffects = 0, InputType = "reg" in
-class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
- : JRInst <(outs), (ins PredRegs:$src, IntRegs:$dst),
- CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
- JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst", [],
- "", J_tc_2early_SLOT2> {
-
- let isTaken = isTak;
- let isPredicatedFalse = PredNot;
- let isPredicatedNew = isPredNew;
- bits<2> src;
- bits<5> dst;
-
- let IClass = 0b0101;
-
- let Inst{27-22} = 0b001101;
- let Inst{21} = PredNot;
- let Inst{20-16} = dst;
- let Inst{12} = isTak;
- let Inst{11} = isPredNew;
- let Inst{9-8} = src;
-}
-
-multiclass JMPR_Pred<bit PredNot> {
- def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken
- // Predicate new
- def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken
- def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken
-}
-
-multiclass JMPR_base<string BaseOp> {
- let BaseOpcode = BaseOp in {
- def NAME : T_JMPr;
- defm t : JMPR_Pred<0>;
- defm f : JMPR_Pred<1>;
- }
-}
-
-let isCall = 1, hasSideEffects = 1 in
-class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
- dag InputDag = (ins IntRegs:$Rs)>
- : JRInst<(outs), InputDag,
- !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs",
- "if ($Pu) callr $Rs"),
- "callr $Rs"),
- [], "", J_tc_2early_SLOT2> {
- bits<5> Rs;
- bits<2> Pu;
- let isPredicated = isPred;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b0101;
- let Inst{27-25} = 0b000;
- let Inst{24-23} = !if (isPred, 0b10, 0b01);
- let Inst{22} = 0;
- let Inst{21} = isPredNot;
- let Inst{9-8} = !if (isPred, Pu, 0b00);
- let Inst{20-16} = Rs;
-
- }
-
-let Defs = VolatileV3.Regs in {
- def J2_callrt : JUMPR_MISC_CALLR<1, 0, (ins PredRegs:$Pu, IntRegs:$Rs)>;
- def J2_callrf : JUMPR_MISC_CALLR<1, 1, (ins PredRegs:$Pu, IntRegs:$Rs)>;
-}
-
-let isTerminator = 1, hasSideEffects = 0 in {
- defm J2_jump : JMP_base<"JMP", "">, PredNewRel;
-
- defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel;
-
- let isReturn = 1, isPseudo = 1, isCodeGenOnly = 1 in
- defm PS_jmpret : JMPR_base<"JMPret">, PredNewRel;
-}
-
-let validSubTargets = HasV60SubT in
-multiclass JMPpt_base<string BaseOp> {
- let BaseOpcode = BaseOp in {
- def tpt : T_JMP_c <0, 0, 1, "">; // Predicate true - taken
- def fpt : T_JMP_c <1, 0, 1, "">; // Predicate false - taken
- }
-}
-
-let validSubTargets = HasV60SubT in
-multiclass JMPRpt_base<string BaseOp> {
- let BaseOpcode = BaseOp in {
- def tpt : T_JMPr_c<0, 0, 1>; // predicate true - taken
- def fpt : T_JMPr_c<1, 0, 1>; // predicate false - taken
- }
-}
-
-defm J2_jumpr : JMPRpt_base<"JMPr">;
-defm J2_jump : JMPpt_base<"JMP">;
-
-// A return through builtin_eh_return.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
- isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
-def EH_RETURN_JMPR : T_JMPr;
-
-//===----------------------------------------------------------------------===//
-// JR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// LD +
-//===----------------------------------------------------------------------===//
-
-// Load - Base with Immediate offset addressing mode
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, AddedComplexity = 20 in
-class T_load_io <string mnemonic, RegisterClass RC, bits<4> MajOp,
- Operand ImmOp>
- : LDInst<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
- "$dst = "#mnemonic#"($src1 + #$offset)", []>, AddrModeRel {
- bits<4> name;
- bits<5> dst;
- bits<5> src1;
- bits<14> offset;
- bits<11> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), offset{13-3},
- !if (!eq(ImmOpStr, "s11_2Ext"), offset{12-2},
- !if (!eq(ImmOpStr, "s11_1Ext"), offset{11-1},
- /* s11_0Ext */ offset{10-0})));
- let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
- !if (!eq(ImmOpStr, "s11_2Ext"), 13,
- !if (!eq(ImmOpStr, "s11_1Ext"), 12,
- /* s11_0Ext */ 11)));
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13-5} = offsetBits{8-0};
- let Inst{4-0} = dst;
- }
-
-let opExtendable = 3, isExtentSigned = 0, isPredicated = 1 in
-class T_pload_io <string mnemonic, RegisterClass RC, bits<4>MajOp,
- Operand ImmOp, bit isNot, bit isPredNew>
- : LDInst<(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- "if ("#!if(isNot, "!$src1", "$src1")
- #!if(isPredNew, ".new", "")
- #") $dst = "#mnemonic#"($src2 + #$offset)",
- [],"", V2LDST_tc_ld_SLOT01> , AddrModeRel {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<9> offset;
- bits<6> offsetBits;
- string ImmOpStr = !cast<string>(ImmOp);
-
- let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), offset{8-3},
- !if (!eq(ImmOpStr, "u6_2Ext"), offset{7-2},
- !if (!eq(ImmOpStr, "u6_1Ext"), offset{6-1},
- /* u6_0Ext */ offset{5-0})));
- let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
- !if (!eq(ImmOpStr, "u6_2Ext"), 8,
- !if (!eq(ImmOpStr, "u6_1Ext"), 7,
- /* u6_0Ext */ 6)));
- let hasNewValue = !if (!eq(ImmOpStr, "u6_3Ext"), 0, 1);
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isNot;
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b0;
- let Inst{27} = 0b0;
- let Inst{26} = isNot;
- let Inst{25} = isPredNew;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b0;
- let Inst{12-11} = src1;
- let Inst{10-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-let isExtendable = 1, hasSideEffects = 0, addrMode = BaseImmOffset in
-multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<4>MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- let isPredicable = 1 in
- def L2_#NAME#_io : T_load_io <mnemonic, RC, MajOp, ImmOp>;
-
- // Predicated
- def L2_p#NAME#t_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 0>;
- def L2_p#NAME#f_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 0>;
-
- // Predicated new
- def L2_p#NAME#tnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 0, 1>;
- def L2_p#NAME#fnew_io : T_pload_io <mnemonic, RC, MajOp, predImmOp, 1, 1>;
- }
-}
-
-let accessSize = ByteAccess in {
- defm loadrb: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext, 0b1000>;
- defm loadrub: LD_Idxd <"memub", "LDriub", IntRegs, s11_0Ext, u6_0Ext, 0b1001>;
-}
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- defm loadrh: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext, 0b1010>;
- defm loadruh: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext, 0b1011>;
-}
-
-let accessSize = WordAccess, opExtentAlign = 2 in
-defm loadri: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext, 0b1100>;
-
-let accessSize = DoubleWordAccess, opExtentAlign = 3 in
-defm loadrd: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext, 0b1110>;
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- def L2_loadbsw2_io: T_load_io<"membh", IntRegs, 0b0001, s11_1Ext>;
- def L2_loadbzw2_io: T_load_io<"memubh", IntRegs, 0b0011, s11_1Ext>;
-}
-
-let accessSize = WordAccess, opExtentAlign = 2 in {
- def L2_loadbzw4_io: T_load_io<"memubh", DoubleRegs, 0b0101, s11_2Ext>;
- def L2_loadbsw4_io: T_load_io<"membh", DoubleRegs, 0b0111, s11_2Ext>;
-}
-
-let addrMode = BaseImmOffset, isExtendable = 1, hasSideEffects = 0,
- opExtendable = 3, isExtentSigned = 1 in
-class T_loadalign_io <string str, bits<4> MajOp, Operand ImmOp>
- : LDInst<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- "$dst = "#str#"($src2 + #$offset)", [],
- "$src1 = $dst">, AddrModeRel {
- bits<4> name;
- bits<5> dst;
- bits<5> src2;
- bits<12> offset;
- bits<11> offsetBits;
-
- let offsetBits = !if (!eq(!cast<string>(ImmOp), "s11_1Ext"), offset{11-1},
- /* s11_0Ext */ offset{10-0});
- let IClass = 0b1001;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13-5} = offsetBits{8-0};
- let Inst{4-0} = dst;
- }
-
-let accessSize = HalfWordAccess, opExtentBits = 12, opExtentAlign = 1 in
-def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>;
-
-let accessSize = ByteAccess, opExtentBits = 11 in
-def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
-
-//===----------------------------------------------------------------------===//
-// Post increment load
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated post increment loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_load_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp >
- : LDInstPI <(outs RC:$dst, IntRegs:$dst2),
- (ins IntRegs:$src1, ImmOp:$offset),
- "$dst = "#mnemonic#"($src1++#$offset)" ,
- [],
- "$src1 = $dst2" > ,
- PredNewRel {
- bits<5> dst;
- bits<5> src1;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13-12} = 0b00;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated post increment loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_pload_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isPredNot, bit isPredNew >
- : LDInst <(outs RC:$dst, IntRegs:$dst2),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($src2++#$offset)",
- [] ,
- "$src2 = $dst2" > ,
- PredNewRel {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<7> offset;
- bits<4> offsetBits;
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- let hasNewValue = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b1;
- let Inst{12} = isPredNew;
- let Inst{11} = isPredNot;
- let Inst{10-9} = src1;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Multiclass for post increment loads with immediate offset.
-//===----------------------------------------------------------------------===//
-
-multiclass LD_PostInc <string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp, bits<4> MajOp> {
- let BaseOpcode = "POST_"#BaseOp in {
- let isPredicable = 1 in
- def L2_#NAME#_pi : T_load_pi < mnemonic, RC, ImmOp, MajOp>;
-
- // Predicated
- def L2_p#NAME#t_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 0>;
- def L2_p#NAME#f_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 0>;
-
- // Predicated new
- def L2_p#NAME#tnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 0, 1>;
- def L2_p#NAME#fnew_pi : T_pload_pi < mnemonic, RC, ImmOp, MajOp, 1, 1>;
- }
-}
-
-// post increment byte loads with immediate offset
-let accessSize = ByteAccess in {
- defm loadrb : LD_PostInc <"memb", "LDrib", IntRegs, s4_0Imm, 0b1000>;
- defm loadrub : LD_PostInc <"memub", "LDriub", IntRegs, s4_0Imm, 0b1001>;
-}
-
-// post increment halfword loads with immediate offset
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- defm loadrh : LD_PostInc <"memh", "LDrih", IntRegs, s4_1Imm, 0b1010>;
- defm loadruh : LD_PostInc <"memuh", "LDriuh", IntRegs, s4_1Imm, 0b1011>;
-}
-
-// post increment word loads with immediate offset
-let accessSize = WordAccess, opExtentAlign = 2 in
-defm loadri : LD_PostInc <"memw", "LDriw", IntRegs, s4_2Imm, 0b1100>;
-
-// post increment doubleword loads with immediate offset
-let accessSize = DoubleWordAccess, opExtentAlign = 3 in
-defm loadrd : LD_PostInc <"memd", "LDrid", DoubleRegs, s4_3Imm, 0b1110>;
-
-// Rd=memb[u]h(Rx++#s4:1)
-// Rdd=memb[u]h(Rx++#s4:2)
-let accessSize = HalfWordAccess, opExtentAlign = 1 in {
- def L2_loadbsw2_pi : T_load_pi <"membh", IntRegs, s4_1Imm, 0b0001>;
- def L2_loadbzw2_pi : T_load_pi <"memubh", IntRegs, s4_1Imm, 0b0011>;
-}
-let accessSize = WordAccess, opExtentAlign = 2, hasNewValue = 0 in {
- def L2_loadbsw4_pi : T_load_pi <"membh", DoubleRegs, s4_2Imm, 0b0111>;
- def L2_loadbzw4_pi : T_load_pi <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment fifo loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_loadalign_pi <string mnemonic, Operand ImmOp, bits<4> MajOp >
- : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$dst2),
- (ins DoubleRegs:$src1, IntRegs:$src2, ImmOp:$offset),
- "$dst = "#mnemonic#"($src2++#$offset)" ,
- [], "$src2 = $dst2, $src1 = $dst" > ,
- PredNewRel {
- bits<5> dst;
- bits<5> src2;
- bits<5> offset;
- bits<4> offsetBits;
-
- let offsetBits = !if (!eq(!cast<string>(ImmOp), "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0});
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13-12} = 0b00;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-// Ryy=memh_fifo(Rx++#s4:1)
-// Ryy=memb_fifo(Rx++#s4:0)
-let accessSize = ByteAccess in
-def L2_loadalignb_pi : T_loadalign_pi <"memb_fifo", s4_0Imm, 0b0100>;
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in
-def L2_loadalignh_pi : T_loadalign_pi <"memh_fifo", s4_1Imm, 0b0010>;
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment loads with register offset.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_load_pr <string mnemonic, RegisterClass RC, bits<4> MajOp,
- MemAccessSize AccessSz>
- : LDInstPI <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2),
- "$dst = "#mnemonic#"($src1++$src2)" ,
- [], "$src1 = $_dst_" > {
- bits<5> dst;
- bits<5> src1;
- bits<1> src2;
-
- let accessSize = AccessSz;
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b110;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2;
- let Inst{12} = 0b0;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1 in {
- def L2_loadrb_pr : T_load_pr <"memb", IntRegs, 0b1000, ByteAccess>;
- def L2_loadrub_pr : T_load_pr <"memub", IntRegs, 0b1001, ByteAccess>;
- def L2_loadrh_pr : T_load_pr <"memh", IntRegs, 0b1010, HalfWordAccess>;
- def L2_loadruh_pr : T_load_pr <"memuh", IntRegs, 0b1011, HalfWordAccess>;
- def L2_loadri_pr : T_load_pr <"memw", IntRegs, 0b1100, WordAccess>;
-
- def L2_loadbzw2_pr : T_load_pr <"memubh", IntRegs, 0b0011, HalfWordAccess>;
-}
-
-def L2_loadrd_pr : T_load_pr <"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
-def L2_loadbzw4_pr : T_load_pr <"memubh", DoubleRegs, 0b0101, WordAccess>;
-
-// Load predicate.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def LDriw_pred : LDInst<(outs PredRegs:$dst),
- (ins IntRegs:$addr, s11_2Ext:$off),
- ".error \"should not emit\"", []>;
-// Load modifier.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def LDriw_mod : LDInst<(outs ModRegs:$dst),
- (ins IntRegs:$addr, s11_2Ext:$off),
- ".error \"should not emit\"", []>;
-
-let Defs = [R29, R30, R31], Uses = [R30], hasSideEffects = 0 in
- def L2_deallocframe : LDInst<(outs), (ins),
- "deallocframe",
- []> {
- let IClass = 0b1001;
-
- let Inst{27-16} = 0b000000011110;
- let Inst{13} = 0b0;
- let Inst{4-0} = 0b11110;
-}
-
-// Load / Post increment circular addressing mode.
-let Uses = [CS], hasSideEffects = 0, addrMode = PostInc in
-class T_load_pcr<string mnemonic, RegisterClass RC, bits<4> MajOp>
- : LDInst <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
- "$Rz = $_dst_" > {
- bits<5> dst;
- bits<5> Rz;
- bit Mu;
-
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{9} = 0b1;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let accessSize = ByteAccess in {
- def L2_loadrb_pcr : T_load_pcr <"memb", IntRegs, 0b1000>;
- def L2_loadrub_pcr : T_load_pcr <"memub", IntRegs, 0b1001>;
-}
-
-let accessSize = HalfWordAccess in {
- def L2_loadrh_pcr : T_load_pcr <"memh", IntRegs, 0b1010>;
- def L2_loadruh_pcr : T_load_pcr <"memuh", IntRegs, 0b1011>;
- def L2_loadbsw2_pcr : T_load_pcr <"membh", IntRegs, 0b0001>;
- def L2_loadbzw2_pcr : T_load_pcr <"memubh", IntRegs, 0b0011>;
-}
-
-let accessSize = WordAccess in {
- def L2_loadri_pcr : T_load_pcr <"memw", IntRegs, 0b1100>;
- let hasNewValue = 0 in {
- def L2_loadbzw4_pcr : T_load_pcr <"memubh", DoubleRegs, 0b0101>;
- def L2_loadbsw4_pcr : T_load_pcr <"membh", DoubleRegs, 0b0111>;
- }
-}
-
-let accessSize = DoubleWordAccess in
-def L2_loadrd_pcr : T_load_pcr <"memd", DoubleRegs, 0b1110>;
-
-// Load / Post increment circular addressing mode.
-let Uses = [CS], hasSideEffects = 0, addrMode = PostInc in
-class T_loadalign_pcr<string mnemonic, bits<4> MajOp, MemAccessSize AccessSz >
- : LDInst <(outs DoubleRegs:$dst, IntRegs:$_dst_),
- (ins DoubleRegs:$_src_, IntRegs:$Rz, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ I:circ($Mu))", [],
- "$Rz = $_dst_, $dst = $_src_" > {
- bits<5> dst;
- bits<5> Rz;
- bit Mu;
-
- let accessSize = AccessSz;
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{9} = 0b1;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-def L2_loadalignb_pcr : T_loadalign_pcr <"memb_fifo", 0b0100, ByteAccess>;
-def L2_loadalignh_pcr : T_loadalign_pcr <"memh_fifo", 0b0010, HalfWordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Circular loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let Uses = [CS], mayLoad = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_load_pci <string mnemonic, RegisterClass RC,
- Operand ImmOp, bits<4> MajOp>
- : LDInstPI<(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$Rz, ImmOp:$offset, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ #$offset:circ($Mu))", [],
- "$Rz = $_dst_"> {
- bits<5> dst;
- bits<5> Rz;
- bits<1> Mu;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- let IClass = 0b1001;
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{9} = 0b0;
- let Inst{8-5} = offsetBits;
- let Inst{4-0} = dst;
- }
-
-// Byte variants of circ load
-let accessSize = ByteAccess in {
- def L2_loadrb_pci : T_load_pci <"memb", IntRegs, s4_0Imm, 0b1000>;
- def L2_loadrub_pci : T_load_pci <"memub", IntRegs, s4_0Imm, 0b1001>;
-}
-
-// Half word variants of circ load
-let accessSize = HalfWordAccess in {
- def L2_loadrh_pci : T_load_pci <"memh", IntRegs, s4_1Imm, 0b1010>;
- def L2_loadruh_pci : T_load_pci <"memuh", IntRegs, s4_1Imm, 0b1011>;
- def L2_loadbzw2_pci : T_load_pci <"memubh", IntRegs, s4_1Imm, 0b0011>;
- def L2_loadbsw2_pci : T_load_pci <"membh", IntRegs, s4_1Imm, 0b0001>;
-}
-
-// Word variants of circ load
-let accessSize = WordAccess in
-def L2_loadri_pci : T_load_pci <"memw", IntRegs, s4_2Imm, 0b1100>;
-
-let accessSize = WordAccess, hasNewValue = 0 in {
- def L2_loadbzw4_pci : T_load_pci <"memubh", DoubleRegs, s4_2Imm, 0b0101>;
- def L2_loadbsw4_pci : T_load_pci <"membh", DoubleRegs, s4_2Imm, 0b0111>;
-}
-
-let accessSize = DoubleWordAccess, hasNewValue = 0 in
-def L2_loadrd_pci : T_load_pci <"memd", DoubleRegs, s4_3Imm, 0b1110>;
-
-
-// TODO: memb_fifo and memh_fifo must take destination register as input.
-// One-off circ loads - not enough in common to break into a class.
-let accessSize = ByteAccess in
-def L2_loadalignb_pci : T_load_pci <"memb_fifo", DoubleRegs, s4_0Imm, 0b0100>;
-
-let accessSize = HalfWordAccess, opExtentAlign = 1 in
-def L2_loadalignh_pci : T_load_pci <"memh_fifo", DoubleRegs, s4_1Imm, 0b0010>;
-
-// L[24]_load[wd]_locked: Load word/double with lock.
-let isSoloAX = 1 in
-class T_load_locked <string mnemonic, RegisterClass RC>
- : LD0Inst <(outs RC:$dst),
- (ins IntRegs:$src),
- "$dst = "#mnemonic#"($src)"> {
- bits<5> dst;
- bits<5> src;
- let IClass = 0b1001;
- let Inst{27-21} = 0b0010000;
- let Inst{20-16} = src;
- let Inst{13-12} = !if (!eq(mnemonic, "memd_locked"), 0b01, 0b00);
- let Inst{5} = 0;
- let Inst{4-0} = dst;
-}
-let hasNewValue = 1, accessSize = WordAccess, opNewValue = 0 in
- def L2_loadw_locked : T_load_locked <"memw_locked", IntRegs>;
-let accessSize = DoubleWordAccess in
- def L4_loadd_locked : T_load_locked <"memd_locked", DoubleRegs>;
-
-// S[24]_store[wd]_locked: Store word/double conditionally.
-let isSoloAX = 1, isPredicateLate = 1 in
-class T_store_locked <string mnemonic, RegisterClass RC>
- : ST0Inst <(outs PredRegs:$Pd), (ins IntRegs:$Rs, RC:$Rt),
- mnemonic#"($Rs, $Pd) = $Rt"> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1010;
- let Inst{27-23} = 0b00001;
- let Inst{22} = !if (!eq(mnemonic, "memw_locked"), 0b0, 0b1);
- let Inst{21} = 0b1;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{1-0} = Pd;
-}
-
-let accessSize = WordAccess in
-def S2_storew_locked : T_store_locked <"memw_locked", IntRegs>;
-
-let accessSize = DoubleWordAccess in
-def S4_stored_locked : T_store_locked <"memd_locked", DoubleRegs>;
-
-//===----------------------------------------------------------------------===//
-// Bit-reversed loads with auto-increment register
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_load_pbr<string mnemonic, RegisterClass RC,
- MemAccessSize addrSize, bits<4> majOp>
- : LDInst
- <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu),
- "$dst = "#mnemonic#"($Rz ++ $Mu:brev)" ,
- [] , "$Rz = $_dst_" > {
-
- let accessSize = addrSize;
-
- bits<5> dst;
- bits<5> Rz;
- bits<1> Mu;
-
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b111;
- let Inst{24-21} = majOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12} = 0b0;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue =1, opNewValue = 0 in {
- def L2_loadrb_pbr : T_load_pbr <"memb", IntRegs, ByteAccess, 0b1000>;
- def L2_loadrub_pbr : T_load_pbr <"memub", IntRegs, ByteAccess, 0b1001>;
- def L2_loadrh_pbr : T_load_pbr <"memh", IntRegs, HalfWordAccess, 0b1010>;
- def L2_loadruh_pbr : T_load_pbr <"memuh", IntRegs, HalfWordAccess, 0b1011>;
- def L2_loadbsw2_pbr : T_load_pbr <"membh", IntRegs, HalfWordAccess, 0b0001>;
- def L2_loadbzw2_pbr : T_load_pbr <"memubh", IntRegs, HalfWordAccess, 0b0011>;
- def L2_loadri_pbr : T_load_pbr <"memw", IntRegs, WordAccess, 0b1100>;
-}
-
-def L2_loadbzw4_pbr : T_load_pbr <"memubh", DoubleRegs, WordAccess, 0b0101>;
-def L2_loadbsw4_pbr : T_load_pbr <"membh", DoubleRegs, WordAccess, 0b0111>;
-def L2_loadrd_pbr : T_load_pbr <"memd", DoubleRegs, DoubleWordAccess, 0b1110>;
-
-def L2_loadalignb_pbr :T_load_pbr <"memb_fifo", DoubleRegs, ByteAccess, 0b0100>;
-def L2_loadalignh_pbr :T_load_pbr <"memh_fifo", DoubleRegs,
- HalfWordAccess, 0b0010>;
-
-//===----------------------------------------------------------------------===//
-// LD -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/ALU +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/COMPLEX +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/COMPLEX -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYH +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Template Class
-// MPYS / Multipy signed/unsigned halfwords
-//Rd=mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_M2_mpy < bits<2> LHbits, bit isSat, bit isRnd,
- bit hasShift, bit isUnsigned>
- : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1","")
- #!if(isRnd,":rnd","")
- #!if(isSat,":sat",""),
- [], "", M_tc_3x_SLOT23 > {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1100;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isRnd;
- let Inst{7} = isSat;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpy_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 0>;
-def M2_mpy_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 0>;
-def M2_mpy_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 0>;
-def M2_mpy_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 0>;
-def M2_mpy_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 0>;
-def M2_mpy_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 0>;
-def M2_mpy_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 0>;
-def M2_mpy_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 0>;
-
-//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpyu_ll_s1: T_M2_mpy<0b00, 0, 0, 1, 1>;
-def M2_mpyu_ll_s0: T_M2_mpy<0b00, 0, 0, 0, 1>;
-def M2_mpyu_lh_s1: T_M2_mpy<0b01, 0, 0, 1, 1>;
-def M2_mpyu_lh_s0: T_M2_mpy<0b01, 0, 0, 0, 1>;
-def M2_mpyu_hl_s1: T_M2_mpy<0b10, 0, 0, 1, 1>;
-def M2_mpyu_hl_s0: T_M2_mpy<0b10, 0, 0, 0, 1>;
-def M2_mpyu_hh_s1: T_M2_mpy<0b11, 0, 0, 1, 1>;
-def M2_mpyu_hh_s0: T_M2_mpy<0b11, 0, 0, 0, 1>;
-
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1]:rnd
-def M2_mpy_rnd_ll_s1: T_M2_mpy <0b00, 0, 1, 1, 0>;
-def M2_mpy_rnd_ll_s0: T_M2_mpy <0b00, 0, 1, 0, 0>;
-def M2_mpy_rnd_lh_s1: T_M2_mpy <0b01, 0, 1, 1, 0>;
-def M2_mpy_rnd_lh_s0: T_M2_mpy <0b01, 0, 1, 0, 0>;
-def M2_mpy_rnd_hl_s1: T_M2_mpy <0b10, 0, 1, 1, 0>;
-def M2_mpy_rnd_hl_s0: T_M2_mpy <0b10, 0, 1, 0, 0>;
-def M2_mpy_rnd_hh_s1: T_M2_mpy <0b11, 0, 1, 1, 0>;
-def M2_mpy_rnd_hh_s0: T_M2_mpy <0b11, 0, 1, 0, 0>;
-
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//Rd=mpy(Rs.[H|L],Rt.[H|L])[:<<1][:rnd][:sat]
-let Defs = [USR_OVF] in {
- def M2_mpy_sat_ll_s1: T_M2_mpy <0b00, 1, 0, 1, 0>;
- def M2_mpy_sat_ll_s0: T_M2_mpy <0b00, 1, 0, 0, 0>;
- def M2_mpy_sat_lh_s1: T_M2_mpy <0b01, 1, 0, 1, 0>;
- def M2_mpy_sat_lh_s0: T_M2_mpy <0b01, 1, 0, 0, 0>;
- def M2_mpy_sat_hl_s1: T_M2_mpy <0b10, 1, 0, 1, 0>;
- def M2_mpy_sat_hl_s0: T_M2_mpy <0b10, 1, 0, 0, 0>;
- def M2_mpy_sat_hh_s1: T_M2_mpy <0b11, 1, 0, 1, 0>;
- def M2_mpy_sat_hh_s0: T_M2_mpy <0b11, 1, 0, 0, 0>;
-
- def M2_mpy_sat_rnd_ll_s1: T_M2_mpy <0b00, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_ll_s0: T_M2_mpy <0b00, 1, 1, 0, 0>;
- def M2_mpy_sat_rnd_lh_s1: T_M2_mpy <0b01, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_lh_s0: T_M2_mpy <0b01, 1, 1, 0, 0>;
- def M2_mpy_sat_rnd_hl_s1: T_M2_mpy <0b10, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_hl_s0: T_M2_mpy <0b10, 1, 1, 0, 0>;
- def M2_mpy_sat_rnd_hh_s1: T_M2_mpy <0b11, 1, 1, 1, 0>;
- def M2_mpy_sat_rnd_hh_s0: T_M2_mpy <0b11, 1, 1, 0, 0>;
-}
-
-//===----------------------------------------------------------------------===//
-// Template Class
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the accumulator.
-//Rx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_M2_mpy_acc < bits<2> LHbits, bit isSat, bit isNac,
- bit hasShift, bit isUnsigned >
- : MInst_acc<(outs IntRegs:$Rx), (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
- #"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1","")
- #!if(isSat,":sat",""),
- [], "$dst2 = $Rx", M_tc_3x_SLOT23 > {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
- let Inst{27-24} = 0b1110;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isNac;
- let Inst{7} = isSat;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpy_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 0>;
-def M2_mpy_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 0>;
-def M2_mpy_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 0>;
-def M2_mpy_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 0>;
-def M2_mpy_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 0>;
-def M2_mpy_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 0>;
-def M2_mpy_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 0>;
-def M2_mpy_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 0>;
-
-//Rx += mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpyu_acc_ll_s1: T_M2_mpy_acc <0b00, 0, 0, 1, 1>;
-def M2_mpyu_acc_ll_s0: T_M2_mpy_acc <0b00, 0, 0, 0, 1>;
-def M2_mpyu_acc_lh_s1: T_M2_mpy_acc <0b01, 0, 0, 1, 1>;
-def M2_mpyu_acc_lh_s0: T_M2_mpy_acc <0b01, 0, 0, 0, 1>;
-def M2_mpyu_acc_hl_s1: T_M2_mpy_acc <0b10, 0, 0, 1, 1>;
-def M2_mpyu_acc_hl_s0: T_M2_mpy_acc <0b10, 0, 0, 0, 1>;
-def M2_mpyu_acc_hh_s1: T_M2_mpy_acc <0b11, 0, 0, 1, 1>;
-def M2_mpyu_acc_hh_s0: T_M2_mpy_acc <0b11, 0, 0, 0, 1>;
-
-//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpy_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 0>;
-def M2_mpy_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 0>;
-def M2_mpy_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 0>;
-def M2_mpy_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 0>;
-def M2_mpy_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 0>;
-def M2_mpy_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 0>;
-def M2_mpy_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 0>;
-def M2_mpy_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 0>;
-
-//Rx -= mpyu(Rs.[H|L],Rt.[H|L])[:<<1]
-def M2_mpyu_nac_ll_s1: T_M2_mpy_acc <0b00, 0, 1, 1, 1>;
-def M2_mpyu_nac_ll_s0: T_M2_mpy_acc <0b00, 0, 1, 0, 1>;
-def M2_mpyu_nac_lh_s1: T_M2_mpy_acc <0b01, 0, 1, 1, 1>;
-def M2_mpyu_nac_lh_s0: T_M2_mpy_acc <0b01, 0, 1, 0, 1>;
-def M2_mpyu_nac_hl_s1: T_M2_mpy_acc <0b10, 0, 1, 1, 1>;
-def M2_mpyu_nac_hl_s0: T_M2_mpy_acc <0b10, 0, 1, 0, 1>;
-def M2_mpyu_nac_hh_s1: T_M2_mpy_acc <0b11, 0, 1, 1, 1>;
-def M2_mpyu_nac_hh_s0: T_M2_mpy_acc <0b11, 0, 1, 0, 1>;
-
-//Rx += mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-def M2_mpy_acc_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 0, 0, 0>;
-def M2_mpy_acc_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 0, 0, 0>;
-def M2_mpy_acc_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 0, 0, 0>;
-def M2_mpy_acc_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 0, 1, 0>;
-def M2_mpy_acc_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 0, 0, 0>;
-
-//Rx -= mpy(Rs.[H|L],Rt.[H|L])[:<<1]:sat
-def M2_mpy_nac_sat_ll_s1: T_M2_mpy_acc <0b00, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_ll_s0: T_M2_mpy_acc <0b00, 1, 1, 0, 0>;
-def M2_mpy_nac_sat_lh_s1: T_M2_mpy_acc <0b01, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_lh_s0: T_M2_mpy_acc <0b01, 1, 1, 0, 0>;
-def M2_mpy_nac_sat_hl_s1: T_M2_mpy_acc <0b10, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_hl_s0: T_M2_mpy_acc <0b10, 1, 1, 0, 0>;
-def M2_mpy_nac_sat_hh_s1: T_M2_mpy_acc <0b11, 1, 1, 1, 0>;
-def M2_mpy_nac_sat_hh_s0: T_M2_mpy_acc <0b11, 1, 1, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template Class
-// MPYS / Multipy signed/unsigned halfwords and add/subtract the
-// result from the 64-bit destination register.
-//Rxx [-+]= mpy[u](Rs.[H|L],Rt.[H|L])[:<<1][:sat]
-//===----------------------------------------------------------------------===//
-
-class T_M2_mpyd_acc < bits<2> LHbits, bit isNac, bit hasShift, bit isUnsigned>
- : MInst_acc<(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rxx "#!if(isNac,"-= ","+= ")#!if(isUnsigned,"mpyu","mpy")
- #"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1",""),
- [], "$dst2 = $Rxx", M_tc_3x_SLOT23 > {
- bits<5> Rxx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0110;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isNac;
- let Inst{7} = 0;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-def M2_mpyd_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 0>;
-def M2_mpyd_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 0>;
-def M2_mpyd_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 0>;
-def M2_mpyd_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 0>;
-
-def M2_mpyd_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 0>;
-def M2_mpyd_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 0>;
-def M2_mpyd_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 0>;
-def M2_mpyd_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 0>;
-
-def M2_mpyd_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 0>;
-def M2_mpyd_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 0>;
-def M2_mpyd_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 0>;
-def M2_mpyd_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 0>;
-
-def M2_mpyd_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 0>;
-def M2_mpyd_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 0>;
-def M2_mpyd_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 0>;
-def M2_mpyd_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 0>;
-
-def M2_mpyud_acc_hh_s0: T_M2_mpyd_acc <0b11, 0, 0, 1>;
-def M2_mpyud_acc_hl_s0: T_M2_mpyd_acc <0b10, 0, 0, 1>;
-def M2_mpyud_acc_lh_s0: T_M2_mpyd_acc <0b01, 0, 0, 1>;
-def M2_mpyud_acc_ll_s0: T_M2_mpyd_acc <0b00, 0, 0, 1>;
-
-def M2_mpyud_acc_hh_s1: T_M2_mpyd_acc <0b11, 0, 1, 1>;
-def M2_mpyud_acc_hl_s1: T_M2_mpyd_acc <0b10, 0, 1, 1>;
-def M2_mpyud_acc_lh_s1: T_M2_mpyd_acc <0b01, 0, 1, 1>;
-def M2_mpyud_acc_ll_s1: T_M2_mpyd_acc <0b00, 0, 1, 1>;
-
-def M2_mpyud_nac_hh_s0: T_M2_mpyd_acc <0b11, 1, 0, 1>;
-def M2_mpyud_nac_hl_s0: T_M2_mpyd_acc <0b10, 1, 0, 1>;
-def M2_mpyud_nac_lh_s0: T_M2_mpyd_acc <0b01, 1, 0, 1>;
-def M2_mpyud_nac_ll_s0: T_M2_mpyd_acc <0b00, 1, 0, 1>;
-
-def M2_mpyud_nac_hh_s1: T_M2_mpyd_acc <0b11, 1, 1, 1>;
-def M2_mpyud_nac_hl_s1: T_M2_mpyd_acc <0b10, 1, 1, 1>;
-def M2_mpyud_nac_lh_s1: T_M2_mpyd_acc <0b01, 1, 1, 1>;
-def M2_mpyud_nac_ll_s1: T_M2_mpyd_acc <0b00, 1, 1, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- Vector Multipy
-// Used for complex multiply real or imaginary, dual multiply and even halfwords
-//===----------------------------------------------------------------------===//
-class T_M2_vmpy < string opc, bits<3> MajOp, bits<3> MinOp, bit hasShift,
- bit isRnd, bit isSat >
- : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
- #!if(isRnd,":rnd","")
- #!if(isSat,":sat",""),
- [] > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector complex multiply imaginary: Rdd=vcmpyi(Rss,Rtt)[:<<1]:sat
-let Defs = [USR_OVF] in {
-def M2_vcmpy_s1_sat_i: T_M2_vmpy <"vcmpyi", 0b110, 0b110, 1, 0, 1>;
-def M2_vcmpy_s0_sat_i: T_M2_vmpy <"vcmpyi", 0b010, 0b110, 0, 0, 1>;
-
-// Vector complex multiply real: Rdd=vcmpyr(Rss,Rtt)[:<<1]:sat
-def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>;
-def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>;
-
-// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
-def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>;
-def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>;
-
-// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
-def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>;
-def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>;
-
-//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>;
-def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>;
-def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>;
-def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>;
-
-//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>;
-def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>;
-def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>;
-def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>;
-
-//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>;
-def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>;
-def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>;
-def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>;
-
-//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>;
-def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>;
-def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>;
-def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>;
-}
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
- bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0,
- string op2Suffix = "", bit isRaw = 0, bit isHi = 0 >
- : MInst <(outs IntRegs:$dst), (ins RC:$src1, RC:$src2),
- "$dst = "#mnemonic
- #"($src1, $src2"#op2Suffix#")"
- #!if(MajOp{2}, ":<<1", "")
- #!if(isRnd, ":rnd", "")
- #!if(isSat, ":sat", "")
- #!if(isRaw, !if(isHi, ":raw:hi", ":raw:lo"), ""), [] > {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = 0b0;
- let Inst{12-8} = src2;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_MType_vrcmpy <string mnemonic, bits<3> MajOp, bits<3> MinOp, bit isHi>
- : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, 1, 1, "", 1, isHi>;
-
-class T_MType_dd <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0 >
- : T_MType_mpy <mnemonic, 0b1001, DoubleRegs, MajOp, MinOp, isSat, isRnd>;
-
-class T_MType_rr1 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0 >
- : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd>;
-
-class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0, string op2str = "" >
- : T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
-
-def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>;
-def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>;
-def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>;
-
-let CextOpcode = "mpyi", InputType = "reg" in
-def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
-
-def M2_mpy_up : T_MType_rr1 <"mpy", 0b000, 0b001>;
-def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
-
-def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
-
-def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>;
-def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>;
-
-def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
-def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
-
-def M2_cmpyrs_s0 : T_MType_rr2 <"cmpy", 0b001, 0b110, 1, 1>;
-def M2_cmpyrs_s1 : T_MType_rr2 <"cmpy", 0b101, 0b110, 1, 1>;
-def M2_cmpyrsc_s0 : T_MType_rr2 <"cmpy", 0b011, 0b110, 1, 1, "*">;
-def M2_cmpyrsc_s1 : T_MType_rr2 <"cmpy", 0b111, 0b110, 1, 1, "*">;
-
-// V4 Instructions
-def M2_vraddh : T_MType_dd <"vraddh", 0b001, 0b111, 0>;
-def M2_mpysu_up : T_MType_rr1 <"mpysu", 0b011, 0b001, 0>;
-def M2_mpy_up_s1 : T_MType_rr1 <"mpy", 0b101, 0b010, 0>;
-def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
-
-def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
-def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
- : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8),
- "$Rd ="#!if(isNeg, "- ", "+ ")#"mpyi($Rs, #$u8)" ,
- pattern, "", M_tc_3x_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<8> u8;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0000;
- let Inst{23} = isNeg;
- let Inst{13} = 0b0;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rs;
- let Inst{12-5} = u8;
- }
-
-let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
-def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext, []>;
-
-def M2_mpysin : T_MType_mpy_ri <1, u8_0Imm, []>;
-
-// Assember mapped to M2_mpyi
-let isAsmParserOnly = 1 in
-def M2_mpyui : MInst<(outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2),
- "$dst = mpyui($src1, $src2)">;
-
-// Rd=mpyi(Rs,#m9)
-// s9 is NOT the same as m9 - but it works.. so far.
-// Assembler maps to either Rd=+mpyi(Rs,#u8) or Rd=-mpyi(Rs,#u8)
-// depending on the value of m9. See Arch Spec.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
- CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
- isAsmParserOnly = 1 in
-def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9_0Ext:$src2),
- "$dst = mpyi($src1, #$src2)", []>, ImmRegRel;
-
-let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
- InputType = "imm" in
-class T_MType_acc_ri <string mnemonic, bits<3> MajOp, Operand ImmOp,
- list<dag> pattern = []>
- : MInst < (outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, ImmOp:$src3),
- "$dst "#mnemonic#"($src2, #$src3)",
- pattern, "$src1 = $dst", M_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src2;
- bits<8> src3;
-
- let IClass = 0b1110;
-
- let Inst{27-26} = 0b00;
- let Inst{25-23} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b0;
- let Inst{12-5} = src3;
- let Inst{4-0} = dst;
- }
-
-let InputType = "reg", hasNewValue = 1 in
-class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSwap = 0, list<dag> pattern = [], bit hasNot = 0,
- bit isSat = 0, bit isShift = 0>
- : MInst < (outs IntRegs:$dst),
- (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
- "$dst "#mnemonic#"($src2, "#!if(hasNot, "~$src3)","$src3)")
- #!if(isShift, ":<<1", "")
- #!if(isSat, ":sat", ""),
- pattern, "$src1 = $dst", M_tc_2_SLOT23 > {
- bits<5> dst;
- bits<5> src2;
- bits<5> src3;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = !if(isSwap, src3, src2);
- let Inst{13} = 0b0;
- let Inst{12-8} = !if(isSwap, src2, src3);
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
- def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext, []>, ImmRegRel;
-
- def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, []>, ImmRegRel;
-}
-
-let CextOpcode = "ADD_acc" in {
- let isExtentSigned = 1 in
- def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext, []>, ImmRegRel;
-
- def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, []>, ImmRegRel;
-}
-
-let CextOpcode = "SUB_acc" in {
- let isExtentSigned = 1 in
- def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8_0Ext>, ImmRegRel;
-
- def M2_nacci : T_MType_acc_rr <"-= add", 0b100, 0b001, 0>, ImmRegRel;
-}
-
-let Itinerary = M_tc_3x_SLOT23 in
-def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8_0Ext>;
-
-def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
-def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- XType Vector Instructions
-//===----------------------------------------------------------------------===//
-class T_XTYPE_Vect < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
- : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
- [] > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-class T_XTYPE_Vect_acc < string opc, bits<3> MajOp, bits<3> MinOp, bit isConj >
- : MInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd += "#opc#"($Rss, $Rtt"#!if(isConj,"*)",")"),
- [], "$dst2 = $Rdd",M_tc_3x_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-class T_XTYPE_Vect_diff < bits<3> MajOp, string opc >
- : MInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rtt, DoubleRegs:$Rss),
- "$Rdd = "#opc#"($Rtt, $Rss)",
- [], "",M_tc_2_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector reduce add unsigned bytes: Rdd32=vrmpybu(Rss32,Rtt32)
-def A2_vraddub: T_XTYPE_Vect <"vraddub", 0b010, 0b001, 0>;
-def A2_vraddub_acc: T_XTYPE_Vect_acc <"vraddub", 0b010, 0b001, 0>;
-
-// Vector sum of absolute differences unsigned bytes: Rdd=vrsadub(Rss,Rtt)
-def A2_vrsadub: T_XTYPE_Vect <"vrsadub", 0b010, 0b010, 0>;
-def A2_vrsadub_acc: T_XTYPE_Vect_acc <"vrsadub", 0b010, 0b010, 0>;
-
-// Vector absolute difference: Rdd=vabsdiffh(Rtt,Rss)
-def M2_vabsdiffh: T_XTYPE_Vect_diff<0b011, "vabsdiffh">;
-
-// Vector absolute difference words: Rdd=vabsdiffw(Rtt,Rss)
-def M2_vabsdiffw: T_XTYPE_Vect_diff<0b001, "vabsdiffw">;
-
-// Vector reduce complex multiply real or imaginary:
-// Rdd[+]=vrcmpy[ir](Rss,Rtt[*])
-def M2_vrcmpyi_s0: T_XTYPE_Vect <"vrcmpyi", 0b000, 0b000, 0>;
-def M2_vrcmpyi_s0c: T_XTYPE_Vect <"vrcmpyi", 0b010, 0b000, 1>;
-def M2_vrcmaci_s0: T_XTYPE_Vect_acc <"vrcmpyi", 0b000, 0b000, 0>;
-def M2_vrcmaci_s0c: T_XTYPE_Vect_acc <"vrcmpyi", 0b010, 0b000, 1>;
-
-def M2_vrcmpyr_s0: T_XTYPE_Vect <"vrcmpyr", 0b000, 0b001, 0>;
-def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>;
-def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>;
-def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>;
-
-// Vector reduce halfwords:
-// Rdd[+]=vrmpyh(Rss,Rtt)
-def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>;
-def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- Vector Multipy with accumulation.
-// Used for complex multiply real or imaginary, dual multiply and even halfwords
-//===----------------------------------------------------------------------===//
-let Defs = [USR_OVF] in
-class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp,
- bit hasShift, bit isRnd >
- : MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
- #!if(isRnd,":rnd","")#":sat",
- [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp,
- bit hasShift, bit isRnd >
- : MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
- #!if(isRnd,":rnd",""),
- [], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// Vector multiply word by signed half with accumulation
-// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>;
-def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>;
-def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>;
-def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>;
-
-def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>;
-def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>;
-def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>;
-def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>;
-
-// Vector multiply word by unsigned half with accumulation
-// Rxx+=vmpyw[eo]uh(Rss,Rtt)[:<<1][:rnd]:sat
-def M2_mmaculs_s1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b110, 0b101, 1, 0>;
-def M2_mmaculs_s0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b010, 0b101, 0, 0>;
-def M2_mmaculs_rs1: T_M2_vmpy_acc_sat <"vmpyweuh", 0b111, 0b101, 1, 1>;
-def M2_mmaculs_rs0: T_M2_vmpy_acc_sat <"vmpyweuh", 0b011, 0b101, 0, 1>;
-
-def M2_mmacuhs_s1: T_M2_vmpy_acc_sat <"vmpywouh", 0b110, 0b111, 1, 0>;
-def M2_mmacuhs_s0: T_M2_vmpy_acc_sat <"vmpywouh", 0b010, 0b111, 0, 0>;
-def M2_mmacuhs_rs1: T_M2_vmpy_acc_sat <"vmpywouh", 0b111, 0b111, 1, 1>;
-def M2_mmacuhs_rs0: T_M2_vmpy_acc_sat <"vmpywouh", 0b011, 0b111, 0, 1>;
-
-// Vector multiply even halfwords with accumulation
-// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
-def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>;
-def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>;
-def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>;
-
-// Vector dual multiply with accumulation
-// Rxx+=vdmpy(Rss,Rtt)[:sat]
-def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>;
-def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>;
-
-// Vector complex multiply real or imaginary with accumulation
-// Rxx+=vcmpy[ir](Rss,Rtt):sat
-def M2_vcmac_s0_sat_r: T_M2_vmpy_acc_sat <"vcmpyr", 0b001, 0b100, 0, 0>;
-def M2_vcmac_s0_sat_i: T_M2_vmpy_acc_sat <"vcmpyi", 0b010, 0b100, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template Class -- Multiply signed/unsigned halfwords with and without
-// saturation and rounding
-//===----------------------------------------------------------------------===//
-class T_M2_mpyd < bits<2> LHbits, bit isRnd, bit hasShift, bit isUnsigned >
- : MInst < (outs DoubleRegs:$Rdd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rdd = "#!if(isUnsigned,"mpyu","mpy")#"($Rs."#!if(LHbits{1},"h","l")
- #", $Rt."#!if(LHbits{0},"h)","l)")
- #!if(hasShift,":<<1","")
- #!if(isRnd,":rnd",""),
- [] > {
- bits<5> Rdd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0100;
- let Inst{23} = hasShift;
- let Inst{22} = isUnsigned;
- let Inst{21} = isRnd;
- let Inst{6-5} = LHbits;
- let Inst{4-0} = Rdd;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
-}
-
-def M2_mpyd_hh_s0: T_M2_mpyd<0b11, 0, 0, 0>;
-def M2_mpyd_hl_s0: T_M2_mpyd<0b10, 0, 0, 0>;
-def M2_mpyd_lh_s0: T_M2_mpyd<0b01, 0, 0, 0>;
-def M2_mpyd_ll_s0: T_M2_mpyd<0b00, 0, 0, 0>;
-
-def M2_mpyd_hh_s1: T_M2_mpyd<0b11, 0, 1, 0>;
-def M2_mpyd_hl_s1: T_M2_mpyd<0b10, 0, 1, 0>;
-def M2_mpyd_lh_s1: T_M2_mpyd<0b01, 0, 1, 0>;
-def M2_mpyd_ll_s1: T_M2_mpyd<0b00, 0, 1, 0>;
-
-def M2_mpyd_rnd_hh_s0: T_M2_mpyd<0b11, 1, 0, 0>;
-def M2_mpyd_rnd_hl_s0: T_M2_mpyd<0b10, 1, 0, 0>;
-def M2_mpyd_rnd_lh_s0: T_M2_mpyd<0b01, 1, 0, 0>;
-def M2_mpyd_rnd_ll_s0: T_M2_mpyd<0b00, 1, 0, 0>;
-
-def M2_mpyd_rnd_hh_s1: T_M2_mpyd<0b11, 1, 1, 0>;
-def M2_mpyd_rnd_hl_s1: T_M2_mpyd<0b10, 1, 1, 0>;
-def M2_mpyd_rnd_lh_s1: T_M2_mpyd<0b01, 1, 1, 0>;
-def M2_mpyd_rnd_ll_s1: T_M2_mpyd<0b00, 1, 1, 0>;
-
-//Rdd=mpyu(Rs.[HL],Rt.[HL])[:<<1]
-def M2_mpyud_hh_s0: T_M2_mpyd<0b11, 0, 0, 1>;
-def M2_mpyud_hl_s0: T_M2_mpyd<0b10, 0, 0, 1>;
-def M2_mpyud_lh_s0: T_M2_mpyd<0b01, 0, 0, 1>;
-def M2_mpyud_ll_s0: T_M2_mpyd<0b00, 0, 0, 1>;
-
-def M2_mpyud_hh_s1: T_M2_mpyd<0b11, 0, 1, 1>;
-def M2_mpyud_hl_s1: T_M2_mpyd<0b10, 0, 1, 1>;
-def M2_mpyud_lh_s1: T_M2_mpyd<0b01, 0, 1, 1>;
-def M2_mpyud_ll_s1: T_M2_mpyd<0b00, 0, 1, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template Class for xtype mpy:
-// Vector multiply
-// Complex multiply
-// multiply 32X32 and use full result
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_XTYPE_mpy64 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat, bit hasShift, bit isConj>
- : MInst <(outs DoubleRegs:$Rdd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rdd = "#mnemonic#"($Rs, $Rt"#!if(isConj,"*)",")")
- #!if(hasShift,":<<1","")
- #!if(isSat,":sat",""),
- [] > {
- bits<5> Rdd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- }
-
-//===----------------------------------------------------------------------===//
-// Template Class for xtype mpy with accumulation into 64-bit:
-// Vector multiply
-// Complex multiply
-// multiply 32X32 and use full result
-//===----------------------------------------------------------------------===//
-class T_XTYPE_mpy64_acc <string op1, string op2, bits<3> MajOp, bits<3> MinOp,
- bit isSat, bit hasShift, bit isConj>
- : MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rxx "#op2#"= "#op1#"($Rs, $Rt"#!if(isConj,"*)",")")
- #!if(hasShift,":<<1","")
- #!if(isSat,":sat",""),
-
- [] , "$dst2 = $Rxx" > {
- bits<5> Rxx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b0111;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rxx;
- }
-
-// MPY - Multiply and use full result
-// Rdd = mpy[u](Rs,Rt)
-def M2_dpmpyss_s0 : T_XTYPE_mpy64 < "mpy", 0b000, 0b000, 0, 0, 0>;
-def M2_dpmpyuu_s0 : T_XTYPE_mpy64 < "mpyu", 0b010, 0b000, 0, 0, 0>;
-
-// Rxx[+-]= mpy[u](Rs,Rt)
-def M2_dpmpyss_acc_s0 : T_XTYPE_mpy64_acc < "mpy", "+", 0b000, 0b000, 0, 0, 0>;
-def M2_dpmpyss_nac_s0 : T_XTYPE_mpy64_acc < "mpy", "-", 0b001, 0b000, 0, 0, 0>;
-def M2_dpmpyuu_acc_s0 : T_XTYPE_mpy64_acc < "mpyu", "+", 0b010, 0b000, 0, 0, 0>;
-def M2_dpmpyuu_nac_s0 : T_XTYPE_mpy64_acc < "mpyu", "-", 0b011, 0b000, 0, 0, 0>;
-
-// Complex multiply real or imaginary
-// Rxx=cmpy[ir](Rs,Rt)
-def M2_cmpyi_s0 : T_XTYPE_mpy64 < "cmpyi", 0b000, 0b001, 0, 0, 0>;
-def M2_cmpyr_s0 : T_XTYPE_mpy64 < "cmpyr", 0b000, 0b010, 0, 0, 0>;
-
-// Rxx+=cmpy[ir](Rs,Rt)
-def M2_cmaci_s0 : T_XTYPE_mpy64_acc < "cmpyi", "+", 0b000, 0b001, 0, 0, 0>;
-def M2_cmacr_s0 : T_XTYPE_mpy64_acc < "cmpyr", "+", 0b000, 0b010, 0, 0, 0>;
-
-// Complex multiply
-// Rdd=cmpy(Rs,Rt)[:<<]:sat
-def M2_cmpys_s0 : T_XTYPE_mpy64 < "cmpy", 0b000, 0b110, 1, 0, 0>;
-def M2_cmpys_s1 : T_XTYPE_mpy64 < "cmpy", 0b100, 0b110, 1, 1, 0>;
-
-// Rdd=cmpy(Rs,Rt*)[:<<]:sat
-def M2_cmpysc_s0 : T_XTYPE_mpy64 < "cmpy", 0b010, 0b110, 1, 0, 1>;
-def M2_cmpysc_s1 : T_XTYPE_mpy64 < "cmpy", 0b110, 0b110, 1, 1, 1>;
-
-// Rxx[-+]=cmpy(Rs,Rt)[:<<1]:sat
-def M2_cmacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b000, 0b110, 1, 0, 0>;
-def M2_cnacs_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b000, 0b111, 1, 0, 0>;
-def M2_cmacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b100, 0b110, 1, 1, 0>;
-def M2_cnacs_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b100, 0b111, 1, 1, 0>;
-
-// Rxx[-+]=cmpy(Rs,Rt*)[:<<1]:sat
-def M2_cmacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b010, 0b110, 1, 0, 1>;
-def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>;
-def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>;
-def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>;
-
-// Vector multiply halfwords
-// Rdd=vmpyh(Rs,Rt)[:<<]:sat
-//let Defs = [USR_OVF] in {
- def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>;
- def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>;
-//}
-
-// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
-def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
-def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
-def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYH -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYS +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/MPYS -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/VB +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/VB -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MTYPE/VH +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// MTYPE/VH -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ST +
-//===----------------------------------------------------------------------===//
-///
-// Store doubleword.
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated post increment stores with immediate offset
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isHalf >
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$offset, RC:$src2),
- mnemonic#"($src1++#$offset) = $src2"#!if(isHalf, ".h", ""),
- [], "$src1 = $_dst_" >,
- AddrModeRel {
- bits<5> src1;
- bits<5> src2;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = 0b0;
- let Inst{12-8} = src2;
- let Inst{7} = 0b0;
- let Inst{6-3} = offsetBits;
- let Inst{1} = 0b0;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated post increment stores with immediate offset
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
-class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew>
- : STInst <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2++#$offset) = $src3"#!if(isHalf, ".h", ""),
- [], "$src2 = $_dst_" >,
- AddrModeRel {
- bits<2> src1;
- bits<5> src2;
- bits<7> offset;
- bits<5> src3;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_3Imm"), offset{6-3},
- !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0})));
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b1010;
-
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = 0b1;
- let Inst{12-8} = src3;
- let Inst{7} = isPredNew;
- let Inst{6-3} = offsetBits;
- let Inst{2} = isPredNot;
- let Inst{1-0} = src1;
- }
-
-multiclass ST_PostInc<string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp, bits<4> MajOp, bit isHalf = 0 > {
-
- let BaseOpcode = "POST_"#BaseOp in {
- def S2_#NAME#_pi : T_store_pi <mnemonic, RC, ImmOp, MajOp, isHalf>;
-
- // Predicated
- def S2_p#NAME#t_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 0, 0>;
- def S2_p#NAME#f_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp, isHalf, 1, 0>;
-
- // Predicated new
- def S2_p#NAME#tnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
- isHalf, 0, 1>;
- def S2_p#NAME#fnew_pi : T_pstore_pi <mnemonic, RC, ImmOp, MajOp,
- isHalf, 1, 1>;
- }
-}
-
-let accessSize = ByteAccess in
-defm storerb: ST_PostInc <"memb", "STrib", IntRegs, s4_0Imm, 0b1000>;
-
-let accessSize = HalfWordAccess in
-defm storerh: ST_PostInc <"memh", "STrih", IntRegs, s4_1Imm, 0b1010>;
-
-let accessSize = WordAccess in
-defm storeri: ST_PostInc <"memw", "STriw", IntRegs, s4_2Imm, 0b1100>;
-
-let accessSize = DoubleWordAccess in
-defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
-
-let accessSize = HalfWordAccess, isNVStorable = 0 in
-defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment stores with register offset.
-//===----------------------------------------------------------------------===//
-class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
- MemAccessSize AccessSz, bit isHalf = 0>
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, RC:$src3),
- mnemonic#"($src1++$src2) = $src3"#!if(isHalf, ".h", ""),
- [], "$src1 = $_dst_" > {
- bits<5> src1;
- bits<1> src2;
- bits<5> src3;
- let accessSize = AccessSz;
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2;
- let Inst{12-8} = src3;
- let Inst{7} = 0b0;
- }
-
-def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
-def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
-def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
-def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
-def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
-
-let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
-class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp, bit isH = 0>
- : STInst <(outs),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
- AddrModeRel, ImmRegRel {
- bits<5> src1;
- bits<14> src2; // Actual address offset
- bits<5> src3;
- bits<11> offsetBits; // Represents offset encoding
-
- string ImmOpStr = !cast<string>(ImmOp);
-
- let opExtentBits = !if (!eq(ImmOpStr, "s11_3Ext"), 14,
- !if (!eq(ImmOpStr, "s11_2Ext"), 13,
- !if (!eq(ImmOpStr, "s11_1Ext"), 12,
- /* s11_0Ext */ 11)));
- let offsetBits = !if (!eq(ImmOpStr, "s11_3Ext"), src2{13-3},
- !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
- !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
- /* s11_0Ext */ src2{10-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
- let IClass = 0b1010;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24} = 0b1;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = offsetBits{8};
- let Inst{12-8} = src3;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-let opExtendable = 2, isPredicated = 1 in
-class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3>MajOp, bit PredNot, bit isPredNew, bit isH = 0>
- : STInst <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+#$src3) = $src4"#!if(isH,".h",""),
- [],"",V2LDST_tc_st_SLOT01 >,
- AddrModeRel, ImmRegRel {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3; // Actual address offset
- bits<5> src4;
- bits<6> offsetBits; // Represents offset encoding
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = PredNot;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u6_3Ext"), 9,
- !if (!eq(ImmOpStr, "u6_2Ext"), 8,
- !if (!eq(ImmOpStr, "u6_1Ext"), 7,
- /* u6_0Ext */ 6)));
- let offsetBits = !if (!eq(ImmOpStr, "u6_3Ext"), src3{8-3},
- !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
- !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
- /* u6_0Ext */ src3{5-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b0;
- let Inst{26} = PredNot;
- let Inst{25} = isPredNew;
- let Inst{24} = 0b0;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = offsetBits{5};
- let Inst{12-8} = src4;
- let Inst{7-3} = offsetBits{4-0};
- let Inst{1-0} = src1;
- }
-
-let isExtendable = 1, hasSideEffects = 0 in
-multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- def S2_#NAME#_io : T_store_io <mnemonic, RC, ImmOp, MajOp, isH>;
-
- // Predicated
- def S2_p#NAME#t_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 0, 0, isH>;
- def S2_p#NAME#f_io : T_pstore_io<mnemonic, RC, predImmOp, MajOp, 1, 0, isH>;
-
- // Predicated new
- def S4_p#NAME#tnew_io : T_pstore_io <mnemonic, RC, predImmOp,
- MajOp, 0, 1, isH>;
- def S4_p#NAME#fnew_io : T_pstore_io <mnemonic, RC, predImmOp,
- MajOp, 1, 1, isH>;
- }
-}
-
-let addrMode = BaseImmOffset, InputType = "imm" in {
- let accessSize = ByteAccess in
- defm storerb: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext, u6_0Ext, 0b000>;
-
- let accessSize = HalfWordAccess, opExtentAlign = 1 in
- defm storerh: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext, u6_1Ext, 0b010>;
-
- let accessSize = WordAccess, opExtentAlign = 2 in
- defm storeri: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext, u6_2Ext, 0b100>;
-
- let accessSize = DoubleWordAccess, isNVStorable = 0, opExtentAlign = 3 in
- defm storerd: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
- u6_3Ext, 0b110>;
-
- let accessSize = HalfWordAccess, opExtentAlign = 1 in
- defm storerf: ST_Idxd < "memh", "STrif", IntRegs, s11_1Ext,
- u6_1Ext, 0b011, 1>;
-}
-
-// Store predicate.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def STriw_pred : STInst<(outs),
- (ins IntRegs:$addr, s11_2Ext:$off, PredRegs:$src1),
- ".error \"should not emit\"", []>;
-// Store modifier.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def STriw_mod : STInst<(outs),
- (ins IntRegs:$addr, s11_2Ext:$off, ModRegs:$src1),
- ".error \"should not emit\"", []>;
-
-// S2_allocframe: Allocate stack frame.
-let Defs = [R29, R30], Uses = [R29, R31, R30],
- hasSideEffects = 0, accessSize = DoubleWordAccess in
-def S2_allocframe: ST0Inst <
- (outs), (ins u11_3Imm:$u11_3),
- "allocframe(#$u11_3)" > {
- bits<14> u11_3;
-
- let IClass = 0b1010;
- let Inst{27-16} = 0b000010011101;
- let Inst{13-11} = 0b000;
- let Inst{10-0} = u11_3{13-3};
- }
-
-// S2_storer[bhwdf]_pci: Store byte/half/word/double.
-// S2_storer[bhwdf]_pci -> S2_storerbnew_pci
-let Uses = [CS], addrMode = PostInc in
-class T_store_pci <string mnemonic, RegisterClass RC,
- Operand Imm, bits<4>MajOp,
- MemAccessSize AlignSize, string RegSrc = "Rt">
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, RC:$Rt),
- #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $"#RegSrc#"",
- [] ,
- "$Rz = $_dst_" > {
- bits<5> Rz;
- bits<7> offset;
- bits<1> Mu;
- bits<5> Rt;
- let accessSize = AlignSize;
- let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
- !if(!eq(RegSrc,"Rt.h"), 0, 1));
-
- let IClass = 0b1010;
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b0;
- let Inst{6-3} =
- !if (!eq(!cast<string>(AlignSize), "DoubleWordAccess"), offset{6-3},
- !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
- !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
- /* ByteAccess */ offset{3-0})));
- let Inst{1} = 0b0;
- }
-
-def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
- ByteAccess>;
-def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
- HalfWordAccess>;
-def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
- HalfWordAccess, "Rt.h">;
-def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
- WordAccess>;
-def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
- DoubleWordAccess>;
-
-let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4,
- addrMode = PostInc in
-class T_storenew_pci <string mnemonic, Operand Imm,
- bits<2>MajOp, MemAccessSize AlignSize>
- : NVInst < (outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, Imm:$offset, ModRegs:$Mu, IntRegs:$Nt),
- #mnemonic#"($Rz ++ #$offset:circ($Mu)) = $Nt.new",
- [],
- "$Rz = $_dst_"> {
- bits<5> Rz;
- bits<6> offset;
- bits<1> Mu;
- bits<3> Nt;
-
- let accessSize = AlignSize;
-
- let IClass = 0b1010;
- let Inst{27-21} = 0b1001101;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = Nt;
- let Inst{7} = 0b0;
- let Inst{6-3} =
- !if (!eq(!cast<string>(AlignSize), "WordAccess"), offset{5-2},
- !if (!eq(!cast<string>(AlignSize), "HalfWordAccess"), offset{4-1},
- /* ByteAccess */ offset{3-0}));
- let Inst{1} = 0b0;
- }
-
-def S2_storerbnew_pci : T_storenew_pci <"memb", s4_0Imm, 0b00, ByteAccess>;
-def S2_storerhnew_pci : T_storenew_pci <"memh", s4_1Imm, 0b01, HalfWordAccess>;
-def S2_storerinew_pci : T_storenew_pci <"memw", s4_2Imm, 0b10, WordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Circular stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let Uses = [CS], addrMode = PostInc in
-class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
- MemAccessSize AlignSize, string RegSrc = "Rt">
- : STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, RC:$Rt),
- #mnemonic#"($Rz ++ I:circ($Mu)) = $"#RegSrc#"",
- [],
- "$Rz = $_dst_" > {
- bits<5> Rz;
- bits<1> Mu;
- bits<5> Rt;
-
- let accessSize = AlignSize;
- let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
- !if(!eq(RegSrc,"Rt.h"), 0, 1));
-
- let IClass = 0b1010;
- let Inst{27-25} = 0b100;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b0;
- let Inst{1} = 0b1;
- }
-
-def S2_storerb_pcr : T_store_pcr<"memb", IntRegs, 0b1000, ByteAccess>;
-def S2_storerh_pcr : T_store_pcr<"memh", IntRegs, 0b1010, HalfWordAccess>;
-def S2_storeri_pcr : T_store_pcr<"memw", IntRegs, 0b1100, WordAccess>;
-def S2_storerd_pcr : T_store_pcr<"memd", DoubleRegs, 0b1110, DoubleWordAccess>;
-def S2_storerf_pcr : T_store_pcr<"memh", IntRegs, 0b1011,
- HalfWordAccess, "Rt.h">;
-
-//===----------------------------------------------------------------------===//
-// Circular .new stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
- addrMode = PostInc in
-class T_storenew_pcr <string mnemonic, bits<2>MajOp,
- MemAccessSize AlignSize>
- : NVInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
- #mnemonic#"($Rz ++ I:circ($Mu)) = $Nt.new" ,
- [] ,
- "$Rz = $_dst_"> {
- bits<5> Rz;
- bits<1> Mu;
- bits<3> Nt;
-
- let accessSize = AlignSize;
-
- let IClass = 0b1010;
- let Inst{27-21} = 0b1001101;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = Nt;
- let Inst{7} = 0b0;
- let Inst{1} = 0b1;
- }
-
-def S2_storerbnew_pcr : T_storenew_pcr <"memb", 0b00, ByteAccess>;
-def S2_storerhnew_pcr : T_storenew_pcr <"memh", 0b01, HalfWordAccess>;
-def S2_storerinew_pcr : T_storenew_pcr <"memw", 0b10, WordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Bit-reversed stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = PostInc in
-class T_store_pbr<string mnemonic, RegisterClass RC,
- MemAccessSize addrSize, bits<3> majOp,
- bit isHalf = 0>
- : STInst
- <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, RC:$src),
- #mnemonic#"($Rz ++ $Mu:brev) = $src"#!if (!eq(isHalf, 1), ".h", ""),
- [], "$Rz = $_dst_" > {
-
- let accessSize = addrSize;
-
- bits<5> Rz;
- bits<1> Mu;
- bits<5> src;
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = majOp;
- let Inst{7} = 0b0;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{12-8} = src;
- }
-
-let isNVStorable = 1 in {
- let BaseOpcode = "S2_storerb_pbr" in
- def S2_storerb_pbr : T_store_pbr<"memb", IntRegs, ByteAccess,
- 0b000>, NewValueRel;
- let BaseOpcode = "S2_storerh_pbr" in
- def S2_storerh_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess,
- 0b010>, NewValueRel;
- let BaseOpcode = "S2_storeri_pbr" in
- def S2_storeri_pbr : T_store_pbr<"memw", IntRegs, WordAccess,
- 0b100>, NewValueRel;
-}
-
-def S2_storerf_pbr : T_store_pbr<"memh", IntRegs, HalfWordAccess, 0b011, 1>;
-def S2_storerd_pbr : T_store_pbr<"memd", DoubleRegs, DoubleWordAccess, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// Bit-reversed .new stores with auto-increment register
-//===----------------------------------------------------------------------===//
-let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3,
- hasSideEffects = 0, addrMode = PostInc in
-class T_storenew_pbr<string mnemonic, MemAccessSize addrSize, bits<2> majOp>
- : NVInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$Rz, ModRegs:$Mu, IntRegs:$Nt),
- #mnemonic#"($Rz ++ $Mu:brev) = $Nt.new", [],
- "$Rz = $_dst_">, NewValueRel {
- let accessSize = addrSize;
- bits<5> Rz;
- bits<1> Mu;
- bits<3> Nt;
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1111101;
- let Inst{12-11} = majOp;
- let Inst{7} = 0b0;
- let Inst{20-16} = Rz;
- let Inst{13} = Mu;
- let Inst{10-8} = Nt;
- }
-
-let BaseOpcode = "S2_storerb_pbr" in
-def S2_storerbnew_pbr : T_storenew_pbr<"memb", ByteAccess, 0b00>;
-
-let BaseOpcode = "S2_storerh_pbr" in
-def S2_storerhnew_pbr : T_storenew_pbr<"memh", HalfWordAccess, 0b01>;
-
-let BaseOpcode = "S2_storeri_pbr" in
-def S2_storerinew_pbr : T_storenew_pbr<"memw", WordAccess, 0b10>;
-
-//===----------------------------------------------------------------------===//
-// ST -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Template class for S_2op instructions.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_S2op_1 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
- RegisterClass RCIn, bits<2> MajOp, bits<3> MinOp, bit isSat>
- : SInst <(outs RCOut:$dst), (ins RCIn:$src),
- "$dst = "#mnemonic#"($src)"#!if(isSat, ":sat", ""),
- [], "", S_2op_tc_1_SLOT23 > {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23-22} = MajOp;
- let Inst{21} = 0b0;
- let Inst{20-16} = src;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_S2op_1_di <string mnemonic, bits<2> MajOp, bits<3> MinOp>
- : T_S2op_1 <mnemonic, 0b0100, DoubleRegs, IntRegs, MajOp, MinOp, 0>;
-
-let hasNewValue = 1 in
-class T_S2op_1_id <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
- : T_S2op_1 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, isSat>;
-
-let hasNewValue = 1 in
-class T_S2op_1_ii <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit isSat = 0>
- : T_S2op_1 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp, isSat>;
-
-// Vector sign/zero extend
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def S2_vsxtbh : T_S2op_1_di <"vsxtbh", 0b00, 0b000>;
- def S2_vsxthw : T_S2op_1_di <"vsxthw", 0b00, 0b100>;
- def S2_vzxtbh : T_S2op_1_di <"vzxtbh", 0b00, 0b010>;
- def S2_vzxthw : T_S2op_1_di <"vzxthw", 0b00, 0b110>;
-}
-
-// Vector splat bytes/halfwords
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
- def S2_vsplatrb : T_S2op_1_ii <"vsplatb", 0b01, 0b111>;
- def S2_vsplatrh : T_S2op_1_di <"vsplath", 0b01, 0b010>;
-}
-
-// Sign extend word to doubleword
-def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>;
-
-// Vector saturate and pack
-let Defs = [USR_OVF] in {
- def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
- def S2_svsathub : T_S2op_1_ii <"vsathub", 0b10, 0b010>;
- def S2_vsathb : T_S2op_1_id <"vsathb", 0b00, 0b110>;
- def S2_vsathub : T_S2op_1_id <"vsathub", 0b00, 0b000>;
- def S2_vsatwh : T_S2op_1_id <"vsatwh", 0b00, 0b010>;
- def S2_vsatwuh : T_S2op_1_id <"vsatwuh", 0b00, 0b100>;
-}
-
-// Vector truncate
-def S2_vtrunohb : T_S2op_1_id <"vtrunohb", 0b10, 0b000>;
-def S2_vtrunehb : T_S2op_1_id <"vtrunehb", 0b10, 0b010>;
-
-// Swizzle the bytes of a word
-def A2_swiz : T_S2op_1_ii <"swiz", 0b10, 0b111>;
-
-// Saturate
-let Defs = [USR_OVF] in {
- def A2_sat : T_S2op_1_id <"sat", 0b11, 0b000>;
- def A2_satb : T_S2op_1_ii <"satb", 0b11, 0b111>;
- def A2_satub : T_S2op_1_ii <"satub", 0b11, 0b110>;
- def A2_sath : T_S2op_1_ii <"sath", 0b11, 0b100>;
- def A2_satuh : T_S2op_1_ii <"satuh", 0b11, 0b101>;
- def A2_roundsat : T_S2op_1_id <"round", 0b11, 0b001, 0b1>;
-}
-
-let Itinerary = S_2op_tc_2_SLOT23 in {
- // Vector round and pack
- def S2_vrndpackwh : T_S2op_1_id <"vrndwh", 0b10, 0b100>;
-
- let Defs = [USR_OVF] in
- def S2_vrndpackwhs : T_S2op_1_id <"vrndwh", 0b10, 0b110, 1>;
-
- // Bit reverse
- def S2_brev : T_S2op_1_ii <"brev", 0b01, 0b110>;
-
- // Absolute value word
- def A2_abs : T_S2op_1_ii <"abs", 0b10, 0b100>;
-
- let Defs = [USR_OVF] in
- def A2_abssat : T_S2op_1_ii <"abs", 0b10, 0b101, 1>;
-
- // Negate with saturation
- let Defs = [USR_OVF] in
- def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>;
-}
-
-class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
- RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp,
- bit isSat, bit isRnd, list<dag> pattern = []>
- : SInst <(outs RCOut:$dst),
- (ins RCIn:$src, u5_0Imm:$u5),
- "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "")
- #!if(isRnd, ":rnd", ""),
- pattern, "", S_2op_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src;
- bits<5> u5;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src;
- let Inst{13} = 0b0;
- let Inst{12-8} = u5;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_S2op_2_di <string mnemonic, bits<3> MajOp, bits<3> MinOp>
- : T_S2op_2 <mnemonic, 0b1000, DoubleRegs, IntRegs, MajOp, MinOp, 0, 0>;
-
-let hasNewValue = 1 in
-class T_S2op_2_id <string mnemonic, bits<3> MajOp, bits<3> MinOp>
- : T_S2op_2 <mnemonic, 0b1000, IntRegs, DoubleRegs, MajOp, MinOp, 0, 0>;
-
-let hasNewValue = 1 in
-class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit isSat = 0, bit isRnd = 0, list<dag> pattern = []>
- : T_S2op_2 <mnemonic, 0b1100, IntRegs, IntRegs, MajOp, MinOp,
- isSat, isRnd, pattern>;
-
-class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
- : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, []>;
-
-// Vector arithmetic shift right by immediate with truncate and pack
-def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
-
-// Arithmetic/logical shift right/left by immediate
-let Itinerary = S_2op_tc_1_SLOT23 in {
- def S2_asr_i_r : T_S2op_shift <"asr", 0b000, 0b000, sra>;
- def S2_lsr_i_r : T_S2op_shift <"lsr", 0b000, 0b001, srl>;
- def S2_asl_i_r : T_S2op_shift <"asl", 0b000, 0b010, shl>;
-}
-
-// Shift left by immediate with saturation
-let Defs = [USR_OVF] in
-def S2_asl_i_r_sat : T_S2op_2_ii <"asl", 0b010, 0b010, 1>;
-
-// Shift right with round
-def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
-
-let isAsmParserOnly = 1 in
-def S2_asr_i_r_rnd_goodsyntax
- : SInst <(outs IntRegs:$dst), (ins IntRegs:$src, u5_0Imm:$u5),
- "$dst = asrrnd($src, #$u5)",
- [], "", S_2op_tc_1_SLOT23>;
-
-let isAsmParserOnly = 1 in
-def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
- "$dst = not($src)">;
-
-class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
- : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
- "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> {
- bits<5> Rss;
- bits<5> Rdd;
- let IClass = 0b1000;
- let Inst{27-24} = 0;
- let Inst{23-22} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{7-5} = minOp;
- let Inst{4-0} = Rdd;
-}
-
-def A2_absp : T_S2op_3 <"abs", 0b10, 0b110>;
-def A2_negp : T_S2op_3 <"neg", 0b10, 0b101>;
-def A2_notp : T_S2op_3 <"not", 0b10, 0b100>;
-
-// Innterleave/deinterleave
-def S2_interleave : T_S2op_3 <"interleave", 0b11, 0b101>;
-def S2_deinterleave : T_S2op_3 <"deinterleave", 0b11, 0b100>;
-
-// Vector Complex conjugate
-def A2_vconj : T_S2op_3 <"vconj", 0b10, 0b111, 1>;
-
-// Vector saturate without pack
-def S2_vsathb_nopack : T_S2op_3 <"vsathb", 0b00, 0b111>;
-def S2_vsathub_nopack : T_S2op_3 <"vsathub", 0b00, 0b100>;
-def S2_vsatwh_nopack : T_S2op_3 <"vsatwh", 0b00, 0b110>;
-def S2_vsatwuh_nopack : T_S2op_3 <"vsatwuh", 0b00, 0b101>;
-
-// Vector absolute value halfwords with and without saturation
-// Rdd64=vabsh(Rss64)[:sat]
-def A2_vabsh : T_S2op_3 <"vabsh", 0b01, 0b100>;
-def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>;
-
-// Vector absolute value words with and without saturation
-def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>;
-def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
-
-//===----------------------------------------------------------------------===//
-// STYPE/BIT +
-//===----------------------------------------------------------------------===//
-// Bit count
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_COUNT_LEADING<string MnOp, bits<3> MajOp, bits<3> MinOp, bit Is32,
- dag Out, dag Inp>
- : SInst<Out, Inp, "$Rd = "#MnOp#"($Rs)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rs;
- bits<5> Rd;
- let IClass = 0b1000;
- let Inst{27} = 0b1;
- let Inst{26} = Is32;
- let Inst{25-24} = 0b00;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-class T_COUNT_LEADING_32<string MnOp, bits<3> MajOp, bits<3> MinOp>
- : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b1,
- (outs IntRegs:$Rd), (ins IntRegs:$Rs)>;
-
-class T_COUNT_LEADING_64<string MnOp, bits<3> MajOp, bits<3> MinOp>
- : T_COUNT_LEADING<MnOp, MajOp, MinOp, 0b0,
- (outs IntRegs:$Rd), (ins DoubleRegs:$Rs)>;
-
-def S2_cl0 : T_COUNT_LEADING_32<"cl0", 0b000, 0b101>;
-def S2_cl1 : T_COUNT_LEADING_32<"cl1", 0b000, 0b110>;
-def S2_ct0 : T_COUNT_LEADING_32<"ct0", 0b010, 0b100>;
-def S2_ct1 : T_COUNT_LEADING_32<"ct1", 0b010, 0b101>;
-def S2_cl0p : T_COUNT_LEADING_64<"cl0", 0b010, 0b010>;
-def S2_cl1p : T_COUNT_LEADING_64<"cl1", 0b010, 0b100>;
-def S2_clb : T_COUNT_LEADING_32<"clb", 0b000, 0b100>;
-def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>;
-def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
-
-// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
-
-// Bit set/clear/toggle
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp>
- : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5_0Imm:$u5),
- "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> u5;
- let IClass = 0b1000;
- let Inst{27-21} = 0b1100110;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = u5;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-class T_SCT_BIT_REG<string MnOp, bits<2> MinOp>
- : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#MnOp#"($Rs, $Rt)", [], "", S_3op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
- let IClass = 0b1100;
- let Inst{27-22} = 0b011010;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-6} = MinOp;
- let Inst{4-0} = Rd;
-}
-
-def S2_clrbit_i : T_SCT_BIT_IMM<"clrbit", 0b001>;
-def S2_setbit_i : T_SCT_BIT_IMM<"setbit", 0b000>;
-def S2_togglebit_i : T_SCT_BIT_IMM<"togglebit", 0b010>;
-def S2_clrbit_r : T_SCT_BIT_REG<"clrbit", 0b01>;
-def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>;
-def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
-
-// Bit test
-
-let hasSideEffects = 0 in
-class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5_0Imm:$u5),
- "$Pd = "#MnOp#"($Rs, #$u5)",
- [], "", S_2op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> u5;
- let IClass = 0b1000;
- let Inst{27-24} = 0b0101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = 0;
- let Inst{12-8} = u5;
- let Inst{1-0} = Pd;
-}
-
-let hasSideEffects = 0 in
-class T_TEST_BIT_REG<string MnOp, bit IsNeg>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#MnOp#"($Rs, $Rt)",
- [], "", S_3op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
- let IClass = 0b1100;
- let Inst{27-22} = 0b011100;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{1-0} = Pd;
-}
-
-def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
-def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
-
-let hasSideEffects = 0 in
-class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6_0Imm:$u6),
- "$Pd = "#MnOp#"($Rs, #$u6)",
- [], "", S_2op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<6> u6;
- let IClass = 0b1000;
- let Inst{27-24} = 0b0101;
- let Inst{23-22} = MajOp;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{13-8} = u6;
- let Inst{1-0} = Pd;
-}
-
-let hasSideEffects = 0 in
-class T_TEST_BITS_REG<string MnOp, bits<2> MajOp, bit IsNeg>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#MnOp#"($Rs, $Rt)",
- [], "", S_3op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
- let IClass = 0b1100;
- let Inst{27-24} = 0b0111;
- let Inst{23-22} = MajOp;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{1-0} = Pd;
-}
-
-def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
-def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
-def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
-
-//===----------------------------------------------------------------------===//
-// STYPE/BIT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/COMPLEX +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/COMPLEX -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PERM +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PERM -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/PRED +
-//===----------------------------------------------------------------------===//
-
-// Predicate transfer.
-let hasSideEffects = 0, hasNewValue = 1 in
-def C2_tfrpr : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps),
- "$Rd = $Ps", [], "", S_2op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<2> Ps;
-
- let IClass = 0b1000;
- let Inst{27-24} = 0b1001;
- let Inst{22} = 0b1;
- let Inst{17-16} = Ps;
- let Inst{4-0} = Rd;
-}
-
-// Transfer general register to predicate.
-let hasSideEffects = 0 in
-def C2_tfrrp: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs),
- "$Pd = $Rs", [], "", S_2op_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
-
- let IClass = 0b1000;
- let Inst{27-21} = 0b0101010;
- let Inst{20-16} = Rs;
- let Inst{1-0} = Pd;
-}
-
-let hasSideEffects = 0, isCodeGenOnly = 1 in
-def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
- "$dst = $src">;
-
-//===----------------------------------------------------------------------===//
-// STYPE/PRED -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/SHIFT +
-//===----------------------------------------------------------------------===//
-class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp,
- Operand Imm, list<dag> pattern = [], bit isRnd = 0>
- : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, Imm:$src2),
- "$dst = "#Mnemonic#"($src1, #$src2)"#!if(isRnd, ":rnd", ""),
- pattern> {
- bits<5> src1;
- bits<5> dst;
- let IClass = 0b1000;
- let Inst{27-24} = 0;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
-}
-
-class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
- : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6_0Imm, []> {
- bits<6> src2;
- let Inst{13-8} = src2;
-}
-
-// Shift by immediate.
-def S2_asr_i_p : S_2OpInstImmI6<"asr", sra, 0b000>;
-def S2_asl_i_p : S_2OpInstImmI6<"asl", shl, 0b010>;
-def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
-
-// Shift left by small amount and add.
-let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
-def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
- (ins IntRegs:$Rt, IntRegs:$Rs, u3_0Imm:$u3),
- "$Rd = addasl($Rt, $Rs, #$u3)" , [],
- "", S_3op_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rt;
- bits<5> Rs;
- bits<3> u3;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b0100000;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7-5} = u3;
- let Inst{4-0} = Rd;
- }
-
-//===----------------------------------------------------------------------===//
-// STYPE/SHIFT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/VH +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/VH -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// STYPE/VW +
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// STYPE/VW -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SYSTEM/SUPER +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SYSTEM/USER +
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 1, isSoloAX = 1 in
-def Y2_barrier : SYSInst<(outs), (ins), "barrier", [],"",ST_tc_st_SLOT0> {
- let Inst{31-28} = 0b1010;
- let Inst{27-21} = 0b1000000;
-}
-
-//===----------------------------------------------------------------------===//
-// SYSTEM/SUPER -
-//===----------------------------------------------------------------------===//
-
-// Generate frameindex addresses. The main reason for the offset operand is
-// that every instruction that is allowed to have frame index as an operand
-// will then have that operand followed by an immediate operand (the offset).
-// This simplifies the frame-index elimination code.
-//
-let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
- isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
- def PS_fi : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$fi, s32_0Imm:$off), "">;
- def PS_fia : ALU32_ri<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">;
-}
-
-//===----------------------------------------------------------------------===//
-// CRUSER - Type.
-//===----------------------------------------------------------------------===//
-// HW loop
-let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, hasSideEffects = 0 in
-class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
- : CRInst<(outs), (ins brOp:$offset, u10_0Imm:$src2),
- #mnemonic#"($offset, #$src2)",
- [], "" , CR_tc_3x_SLOT3> {
- bits<9> offset;
- bits<10> src2;
-
- let IClass = 0b0110;
-
- let Inst{27-22} = 0b100100;
- let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
- let Inst{20-16} = src2{9-5};
- let Inst{12-8} = offset{8-4};
- let Inst{7-5} = src2{4-2};
- let Inst{4-3} = offset{3-2};
- let Inst{1-0} = src2{1-0};
-}
-
-let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, hasSideEffects = 0 in
-class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
- : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2),
- #mnemonic#"($offset, $src2)",
- [], "" ,CR_tc_3x_SLOT3> {
- bits<9> offset;
- bits<5> src2;
-
- let IClass = 0b0110;
-
- let Inst{27-22} = 0b000000;
- let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
- let Inst{20-16} = src2;
- let Inst{12-8} = offset{8-4};
- let Inst{4-3} = offset{3-2};
- }
-
-multiclass LOOP_ri<string mnemonic> {
- def i : LOOP_iBase<mnemonic, brtarget>;
- def r : LOOP_rBase<mnemonic, brtarget>;
-
- let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
- def iext: LOOP_iBase<mnemonic, brtargetExt, 1>;
- def rext: LOOP_rBase<mnemonic, brtargetExt, 1>;
- }
-}
-
-
-let Defs = [SA0, LC0, USR] in
-defm J2_loop0 : LOOP_ri<"loop0">;
-
-// Interestingly only loop0's appear to set usr.lpcfg
-let Defs = [SA1, LC1] in
-defm J2_loop1 : LOOP_ri<"loop1">;
-
-let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
- Defs = [PC, LC0], Uses = [SA0, LC0] in {
-def ENDLOOP0 : Endloop<(outs), (ins brtarget:$offset),
- ":endloop0",
- []>;
-}
-
-let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
- Defs = [PC, LC1], Uses = [SA1, LC1] in {
-def ENDLOOP1 : Endloop<(outs), (ins brtarget:$offset),
- ":endloop1",
- []>;
-}
-
-// Pipelined loop instructions, sp[123]loop0
-let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
- isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, isPredicateLate = 1 in
-class SPLOOP_iBase<string SP, bits<2> op>
- : CRInst <(outs), (ins brtarget:$r7_2, u10_0Imm:$U10),
- "p3 = sp"#SP#"loop0($r7_2, #$U10)" > {
- bits<9> r7_2;
- bits<10> U10;
-
- let IClass = 0b0110;
-
- let Inst{22-21} = op;
- let Inst{27-23} = 0b10011;
- let Inst{20-16} = U10{9-5};
- let Inst{12-8} = r7_2{8-4};
- let Inst{7-5} = U10{4-2};
- let Inst{4-3} = r7_2{3-2};
- let Inst{1-0} = U10{1-0};
- }
-
-let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
- isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
- opExtendable = 0, isPredicateLate = 1 in
-class SPLOOP_rBase<string SP, bits<2> op>
- : CRInst <(outs), (ins brtarget:$r7_2, IntRegs:$Rs),
- "p3 = sp"#SP#"loop0($r7_2, $Rs)" > {
- bits<9> r7_2;
- bits<5> Rs;
-
- let IClass = 0b0110;
-
- let Inst{22-21} = op;
- let Inst{27-23} = 0b00001;
- let Inst{20-16} = Rs;
- let Inst{12-8} = r7_2{8-4};
- let Inst{4-3} = r7_2{3-2};
- }
-
-multiclass SPLOOP_ri<string mnemonic, bits<2> op> {
- def i : SPLOOP_iBase<mnemonic, op>;
- def r : SPLOOP_rBase<mnemonic, op>;
-}
-
-defm J2_ploop1s : SPLOOP_ri<"1", 0b01>;
-defm J2_ploop2s : SPLOOP_ri<"2", 0b10>;
-defm J2_ploop3s : SPLOOP_ri<"3", 0b11>;
-
-// if (Rs[!>=<]=#0) jump:[t/nt]
-let Defs = [PC], isPredicated = 1, isBranch = 1, hasSideEffects = 0,
- hasSideEffects = 0 in
-class J2_jump_0_Base<string compare, bit isTak, bits<2> op>
- : CRInst <(outs), (ins IntRegs:$Rs, brtarget:$r13_2),
- "if ($Rs"#compare#"#0) jump"#!if(isTak, ":t", ":nt")#" $r13_2" > {
- bits<5> Rs;
- bits<15> r13_2;
-
- let IClass = 0b0110;
-
- let Inst{27-24} = 0b0001;
- let Inst{23-22} = op;
- let Inst{12} = isTak;
- let Inst{21} = r13_2{14};
- let Inst{20-16} = Rs;
- let Inst{11-1} = r13_2{12-2};
- let Inst{13} = r13_2{13};
- }
-
-multiclass J2_jump_compare_0<string compare, bits<2> op> {
- def NAME : J2_jump_0_Base<compare, 0, op>;
- def NAME#pt : J2_jump_0_Base<compare, 1, op>;
-}
-
-defm J2_jumprz : J2_jump_compare_0<"!=", 0b00>;
-defm J2_jumprgtez : J2_jump_compare_0<">=", 0b01>;
-defm J2_jumprnz : J2_jump_compare_0<"==", 0b10>;
-defm J2_jumprltez : J2_jump_compare_0<"<=", 0b11>;
-
-// Transfer to/from Control/GPR Guest/GPR
-let hasSideEffects = 0 in
-class TFR_CR_RS_base<RegisterClass CTRC, RegisterClass RC, bit isDouble>
- : CRInst <(outs CTRC:$dst), (ins RC:$src),
- "$dst = $src", [], "", CR_tc_3x_SLOT3> {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b0110;
-
- let Inst{27-25} = 0b001;
- let Inst{24} = isDouble;
- let Inst{23-21} = 0b001;
- let Inst{20-16} = src;
- let Inst{4-0} = dst;
- }
-
-def A2_tfrrcr : TFR_CR_RS_base<CtrRegs, IntRegs, 0b0>;
-def A4_tfrpcp : TFR_CR_RS_base<CtrRegs64, DoubleRegs, 0b1>;
-def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
-def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
-
-let hasSideEffects = 0 in
-class TFR_RD_CR_base<RegisterClass RC, RegisterClass CTRC, bit isSingle>
- : CRInst <(outs RC:$dst), (ins CTRC:$src),
- "$dst = $src", [], "", CR_tc_3x_SLOT3> {
- bits<5> dst;
- bits<5> src;
-
- let IClass = 0b0110;
-
- let Inst{27-26} = 0b10;
- let Inst{25} = isSingle;
- let Inst{24-21} = 0b0000;
- let Inst{20-16} = src;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1, opNewValue = 0 in
-def A2_tfrcrr : TFR_RD_CR_base<IntRegs, CtrRegs, 1>;
-def A4_tfrcpp : TFR_RD_CR_base<DoubleRegs, CtrRegs64, 0>;
-def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
-def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
-
-// Y4_trace: Send value to etm trace.
-let isSoloAX = 1, hasSideEffects = 0 in
-def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
- "trace($Rs)"> {
- bits<5> Rs;
-
- let IClass = 0b0110;
- let Inst{27-21} = 0b0010010;
- let Inst{20-16} = Rs;
- }
-
-// HI/LO Instructions
-let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
- hasNewValue = 1, opNewValue = 0 in
-class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp>
- : ALU32_ri<(outs IntRegs:$dst),
- (ins u16_0Imm:$imm_value),
- "$dst"#RegHalf#" = $imm_value", []> {
- bits<5> dst;
- bits<32> imm_value;
- let IClass = 0b0111;
-
- let Inst{27} = Rs;
- let Inst{26-24} = MajOp;
- let Inst{21} = MinOp;
- let Inst{20-16} = dst;
- let Inst{23-22} = imm_value{15-14};
- let Inst{13-0} = imm_value{13-0};
-}
-
-let isAsmParserOnly = 1 in {
- def LO : REG_IMMED<".l", 0b0, 0b001, 0b1>;
- def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>;
-}
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in {
- def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v),
- "$Rd = CONST32(#$v)", []>;
- def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v),
- "$Rd = CONST64(#$v)", []>;
-}
-
-let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
- isCodeGenOnly = 1 in
-def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>;
-
-let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
- isCodeGenOnly = 1 in
-def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>;
-
-let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
- ".error \"should not emit\" ", []>;
-
-let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- ".error \"should not emit\" ", []>;
-
-// Call subroutine indirectly.
-let Defs = VolatileV3.Regs in
-def J2_callr : JUMPR_MISC_CALLR<0, 1>;
-
-// Indirect tail-call.
-let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
- isTerminator = 1, isCodeGenOnly = 1 in
-def PS_tailcall_r : T_JMPr;
-
-// Direct tail-calls.
-let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
- isTerminator = 1, isCodeGenOnly = 1 in
-def PS_tailcall_i : JInst<(outs), (ins calltarget:$dst), "", []>;
-
-// The reason for the custom inserter is to record all ALLOCA instructions
-// in MachineFunctionInfo.
-let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in
-def PS_alloca: ALU32Inst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u32_0Imm:$A), "", []>;
-
-let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
-def PS_aligna : ALU32Inst<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>;
-
-// XTYPE/SHIFT
-//
-//===----------------------------------------------------------------------===//
-// Template Class
-// Shift by immediate/register and accumulate/logical
-//===----------------------------------------------------------------------===//
-
-// Rx[+-&|]=asr(Rs,#u5)
-// Rx[+-&|^]=lsr(Rs,#u5)
-// Rx[+-&|^]=asl(Rs,#u5)
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp>
- : SInst_acc<(outs IntRegs:$Rx),
- (ins IntRegs:$src1, IntRegs:$Rs, u5_0Imm:$u5),
- "$Rx "#opc2#opc1#"($Rs, #$u5)", [],
- "$src1 = $Rx", S_2op_tc_2_SLOT23> {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> u5;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b1110;
- let Inst{23-22} = majOp{2-1};
- let Inst{13} = 0b0;
- let Inst{7} = majOp{0};
- let Inst{6-5} = minOp;
- let Inst{4-0} = Rx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = u5;
- }
-
-// Rx[+-&|]=asr(Rs,Rt)
-// Rx[+-&|^]=lsr(Rs,Rt)
-// Rx[+-&|^]=asl(Rs,Rt)
-
-let hasNewValue = 1, opNewValue = 0 in
-class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<2> majOp, bits<2> minOp>
- : SInst_acc<(outs IntRegs:$Rx),
- (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt),
- "$Rx "#opc2#opc1#"($Rs, $Rt)", [],
- "$src1 = $Rx", S_3op_tc_2_SLOT23 > {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b1100;
- let Inst{23-22} = majOp;
- let Inst{7-6} = minOp;
- let Inst{4-0} = Rx;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- }
-
-// Rxx[+-&|]=asr(Rss,#u6)
-// Rxx[+-&|^]=lsr(Rss,#u6)
-// Rxx[+-&|^]=asl(Rss,#u6)
-
-class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp>
- : SInst_acc<(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6_0Imm:$u6),
- "$Rxx "#opc2#opc1#"($Rss, #$u6)", [],
- "$src1 = $Rxx", S_2op_tc_2_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<6> u6;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b0010;
- let Inst{23-22} = majOp{2-1};
- let Inst{7} = majOp{0};
- let Inst{6-5} = minOp;
- let Inst{4-0} = Rxx;
- let Inst{20-16} = Rss;
- let Inst{13-8} = u6;
- }
-
-
-// Rxx[+-&|]=asr(Rss,Rt)
-// Rxx[+-&|^]=lsr(Rss,Rt)
-// Rxx[+-&|^]=asl(Rss,Rt)
-// Rxx[+-&|^]=lsl(Rss,Rt)
-
-class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp>
- : SInst_acc<(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rxx "#opc2#opc1#"($Rss, $Rt)", [],
- "$src1 = $Rxx", S_3op_tc_2_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = majOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rt;
- let Inst{7-6} = minOp;
- let Inst{4-0} = Rxx;
- }
-
-//===----------------------------------------------------------------------===//
-// Multi-class for the shift instructions with logical/arithmetic operators.
-//===----------------------------------------------------------------------===//
-
-multiclass xtype_imm_base<string OpcStr1, string OpcStr2, SDNode OpNode1,
- SDNode OpNode2, bits<3> majOp, bits<2> minOp > {
- def _i_r#NAME : T_shift_imm_acc_r< OpcStr1, OpcStr2, OpNode1,
- OpNode2, majOp, minOp >;
- def _i_p#NAME : T_shift_imm_acc_p< OpcStr1, OpcStr2, OpNode1,
- OpNode2, majOp, minOp >;
-}
-
-multiclass xtype_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
- let AddedComplexity = 100 in
- defm _acc : xtype_imm_base< opc1, "+= ", OpNode, add, 0b001, minOp>;
-
- defm _nac : xtype_imm_base< opc1, "-= ", OpNode, sub, 0b000, minOp>;
- defm _and : xtype_imm_base< opc1, "&= ", OpNode, and, 0b010, minOp>;
- defm _or : xtype_imm_base< opc1, "|= ", OpNode, or, 0b011, minOp>;
-}
-
-multiclass xtype_xor_imm_acc<string opc1, SDNode OpNode, bits<2>minOp> {
-let AddedComplexity = 100 in
- defm _xacc : xtype_imm_base< opc1, "^= ", OpNode, xor, 0b100, minOp>;
-}
-
-defm S2_asr : xtype_imm_acc<"asr", sra, 0b00>;
-
-defm S2_lsr : xtype_imm_acc<"lsr", srl, 0b01>,
- xtype_xor_imm_acc<"lsr", srl, 0b01>;
-
-defm S2_asl : xtype_imm_acc<"asl", shl, 0b10>,
- xtype_xor_imm_acc<"asl", shl, 0b10>;
-
-multiclass xtype_reg_acc_r<string opc1, SDNode OpNode, bits<2>minOp> {
- let AddedComplexity = 100 in
- def _acc : T_shift_reg_acc_r <opc1, "+= ", OpNode, add, 0b11, minOp>;
-
- def _nac : T_shift_reg_acc_r <opc1, "-= ", OpNode, sub, 0b10, minOp>;
- def _and : T_shift_reg_acc_r <opc1, "&= ", OpNode, and, 0b01, minOp>;
- def _or : T_shift_reg_acc_r <opc1, "|= ", OpNode, or, 0b00, minOp>;
-}
-
-multiclass xtype_reg_acc_p<string opc1, SDNode OpNode, bits<2>minOp> {
- let AddedComplexity = 100 in
- def _acc : T_shift_reg_acc_p <opc1, "+= ", OpNode, add, 0b110, minOp>;
-
- def _nac : T_shift_reg_acc_p <opc1, "-= ", OpNode, sub, 0b100, minOp>;
- def _and : T_shift_reg_acc_p <opc1, "&= ", OpNode, and, 0b010, minOp>;
- def _or : T_shift_reg_acc_p <opc1, "|= ", OpNode, or, 0b000, minOp>;
- def _xor : T_shift_reg_acc_p <opc1, "^= ", OpNode, xor, 0b011, minOp>;
-}
-
-multiclass xtype_reg_acc<string OpcStr, SDNode OpNode, bits<2> minOp > {
- defm _r_r : xtype_reg_acc_r <OpcStr, OpNode, minOp>;
- defm _r_p : xtype_reg_acc_p <OpcStr, OpNode, minOp>;
-}
-
-defm S2_asl : xtype_reg_acc<"asl", shl, 0b10>;
-defm S2_asr : xtype_reg_acc<"asr", sra, 0b00>;
-defm S2_lsr : xtype_reg_acc<"lsr", srl, 0b01>;
-defm S2_lsl : xtype_reg_acc<"lsl", shl, 0b11>;
-
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_S3op_1 <string mnemonic, RegisterClass RC, bits<2> MajOp, bits<3> MinOp,
- bit SwapOps, bit isSat = 0, bit isRnd = 0, bit hasShift = 0>
- : SInst <(outs RC:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst = "#mnemonic#"($src1, $src2)"#!if(isRnd, ":rnd", "")
- #!if(hasShift,":>>1","")
- #!if(isSat, ":sat", ""),
- [], "", S_3op_tc_2_SLOT23 > {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0001;
- let Inst{23-22} = MajOp;
- let Inst{20-16} = !if (SwapOps, src2, src1);
- let Inst{12-8} = !if (SwapOps, src1, src2);
- let Inst{7-5} = MinOp;
- let Inst{4-0} = dst;
- }
-
-class T_S3op_64 <string mnemonic, bits<2> MajOp, bits<3> MinOp, bit SwapOps,
- bit isSat = 0, bit isRnd = 0, bit hasShift = 0 >
- : T_S3op_1 <mnemonic, DoubleRegs, MajOp, MinOp, SwapOps,
- isSat, isRnd, hasShift>;
-
-let Itinerary = S_3op_tc_1_SLOT23 in {
- def S2_shuffeb : T_S3op_64 < "shuffeb", 0b00, 0b010, 0>;
- def S2_shuffeh : T_S3op_64 < "shuffeh", 0b00, 0b110, 0>;
- def S2_shuffob : T_S3op_64 < "shuffob", 0b00, 0b100, 1>;
- def S2_shuffoh : T_S3op_64 < "shuffoh", 0b10, 0b000, 1>;
-
- def S2_vtrunewh : T_S3op_64 < "vtrunewh", 0b10, 0b010, 0>;
- def S2_vtrunowh : T_S3op_64 < "vtrunowh", 0b10, 0b100, 0>;
-}
-
-def S2_lfsp : T_S3op_64 < "lfs", 0b10, 0b110, 0>;
-
-let hasSideEffects = 0 in
-class T_S3op_2 <string mnemonic, bits<3> MajOp, bit SwapOps>
- : SInst < (outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
- "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu)",
- [], "", S_3op_tc_1_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
- bits<2> Pu;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = !if (SwapOps, Rtt, Rss);
- let Inst{12-8} = !if (SwapOps, Rss, Rtt);
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rdd;
- }
-
-def S2_valignrb : T_S3op_2 < "valignb", 0b000, 1>;
-def S2_vsplicerb : T_S3op_2 < "vspliceb", 0b100, 0>;
-
-//===----------------------------------------------------------------------===//
-// Template class used by vector shift, vector rotate, vector neg,
-// 32-bit shift, 64-bit shifts, etc.
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0 in
-class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp,
- bits<2> MinOp, bit isSat = 0, list<dag> pattern = [] >
- : SInst <(outs RC:$dst),
- (ins RC:$src1, IntRegs:$src2),
- "$dst = "#mnemonic#"($src1, $src2)"#!if(isSat, ":sat", ""),
- pattern, "", S_3op_tc_1_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b0110, 0b0011);
- let Inst{23-22} = MajOp;
- let Inst{20-16} = src1;
- let Inst{12-8} = src2;
- let Inst{7-6} = MinOp;
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1 in
-class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, []>;
-
-let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in
-class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
- : T_S3op_3 <mnemonic, IntRegs, 0b00, MinOp, 1, []>;
-
-
-class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, []>;
-
-
-class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
- : T_S3op_3 <mnemonic, DoubleRegs, MajOp, MinOp, 0, []>;
-
-
-// Shift by register
-// Rdd=[asr|lsr|asl|lsl](Rss,Rt)
-
-def S2_asr_r_p : T_S3op_shift64 < "asr", sra, 0b00>;
-def S2_lsr_r_p : T_S3op_shift64 < "lsr", srl, 0b01>;
-def S2_asl_r_p : T_S3op_shift64 < "asl", shl, 0b10>;
-def S2_lsl_r_p : T_S3op_shift64 < "lsl", shl, 0b11>;
-
-// Rd=[asr|lsr|asl|lsl](Rs,Rt)
-
-def S2_asr_r_r : T_S3op_shift32<"asr", sra, 0b00>;
-def S2_lsr_r_r : T_S3op_shift32<"lsr", srl, 0b01>;
-def S2_asl_r_r : T_S3op_shift32<"asl", shl, 0b10>;
-def S2_lsl_r_r : T_S3op_shift32<"lsl", shl, 0b11>;
-
-// Shift by register with saturation
-// Rd=asr(Rs,Rt):sat
-// Rd=asl(Rs,Rt):sat
-
-let Defs = [USR_OVF] in {
- def S2_asr_r_r_sat : T_S3op_shift32_Sat<"asr", 0b00>;
- def S2_asl_r_r_sat : T_S3op_shift32_Sat<"asl", 0b10>;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_S3op_8 <string opc, bits<3> MinOp, bit isSat, bit isRnd, bit hasShift, bit hasSplat = 0>
- : SInst < (outs IntRegs:$Rd),
- (ins DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rd = "#opc#"($Rss, $Rt"#!if(hasSplat, "*", "")#")"
- #!if(hasShift, ":<<1", "")
- #!if(isRnd, ":rnd", "")
- #!if(isSat, ":sat", ""),
- [], "", S_3op_tc_1_SLOT23 > {
- bits<5> Rd;
- bits<5> Rss;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0101;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-def S2_asr_r_svw_trun : T_S3op_8<"vasrw", 0b010, 0, 0, 0>;
-
-let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
-def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>;
-
-let hasSideEffects = 0 in
-class T_S3op_7 <string mnemonic, bit MajOp >
- : SInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3_0Imm:$u3),
- "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" ,
- [], "", S_3op_tc_1_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
- bits<3> u3;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0000;
- let Inst{23} = MajOp;
- let Inst{20-16} = !if(MajOp, Rss, Rtt);
- let Inst{12-8} = !if(MajOp, Rtt, Rss);
- let Inst{7-5} = u3;
- let Inst{4-0} = Rdd;
- }
-
-def S2_valignib : T_S3op_7 < "valignb", 0>;
-def S2_vspliceib : T_S3op_7 < "vspliceb", 1>;
-
-//===----------------------------------------------------------------------===//
-// Template class for 'insert bitfield' instructions
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in
-class T_S3op_insert <string mnemonic, RegisterClass RC>
- : SInst <(outs RC:$dst),
- (ins RC:$src1, RC:$src2, DoubleRegs:$src3),
- "$dst = "#mnemonic#"($src2, $src3)" ,
- [], "$src1 = $dst", S_3op_tc_1_SLOT23 > {
- bits<5> dst;
- bits<5> src2;
- bits<5> src3;
-
- let IClass = 0b1100;
-
- let Inst{27-26} = 0b10;
- let Inst{25-24} = !if(!eq(!cast<string>(RC), "IntRegs"), 0b00, 0b10);
- let Inst{23} = 0b0;
- let Inst{20-16} = src2;
- let Inst{12-8} = src3;
- let Inst{4-0} = dst;
- }
-
-let hasSideEffects = 0 in
-class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
- : SInst <(outs RC:$dst), (ins RC:$dst2, RC:$src1, ImmOp:$src2, ImmOp:$src3),
- "$dst = insert($src1, #$src2, #$src3)",
- [], "$dst2 = $dst", S_2op_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<6> src2;
- bits<6> src3;
- bit bit23;
- bit bit13;
- string ImmOpStr = !cast<string>(ImmOp);
-
- let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5}, 0);
- let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0);
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23} = bit23;
- let Inst{22-21} = src3{4-3};
- let Inst{20-16} = src1;
- let Inst{13} = bit13;
- let Inst{12-8} = src2{4-0};
- let Inst{7-5} = src3{2-0};
- let Inst{4-0} = dst;
- }
-
-// Rx=insert(Rs,Rtt)
-// Rx=insert(Rs,#u5,#U5)
-let hasNewValue = 1 in {
- def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
- def S2_insert : T_S2op_insert <0b1111, IntRegs, u5_0Imm>;
-}
-
-// Rxx=insert(Rss,Rtt)
-// Rxx=insert(Rss,#u6,#U6)
-def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
-def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6_0Imm>;
-
-
-//===----------------------------------------------------------------------===//
-// Template class for 'extract bitfield' instructions
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_S3op_extract <string mnemonic, bits<2> MinOp>
- : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
- "$Rd = "#mnemonic#"($Rs, $Rtt)",
- [], "", S_3op_tc_2_SLOT23 > {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rtt;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b100100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rtt;
- let Inst{7-6} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-let hasSideEffects = 0 in
-class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
- RegisterClass RC, Operand ImmOp>
- : SInst <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2, ImmOp:$src3),
- "$dst = "#mnemonic#"($src1, #$src2, #$src3)",
- [], "", S_2op_tc_2_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<6> src2;
- bits<6> src3;
- bit bit23;
- bit bit13;
- string ImmOpStr = !cast<string>(ImmOp);
-
- let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5},
- !if (!eq(mnemonic, "extractu"), 0, 1));
-
- let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0);
-
- let IClass = 0b1000;
-
- let Inst{27-24} = RegTyBits;
- let Inst{23} = bit23;
- let Inst{22-21} = src3{4-3};
- let Inst{20-16} = src1;
- let Inst{13} = bit13;
- let Inst{12-8} = src2{4-0};
- let Inst{7-5} = src3{2-0};
- let Inst{4-0} = dst;
- }
-
-// Extract bitfield
-
-// Rdd=extractu(Rss,Rtt)
-// Rdd=extractu(Rss,#u6,#U6)
-def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
-def S2_extractup : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6_0Imm>;
-
-// Rd=extractu(Rs,Rtt)
-// Rd=extractu(Rs,#u5,#U5)
-let hasNewValue = 1 in {
- def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
- def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5_0Imm>;
-}
-
-//===----------------------------------------------------------------------===//
-// :raw for of tableindx[bdhw] insns
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class tableidxRaw<string OpStr, bits<2>MinOp>
- : SInst <(outs IntRegs:$Rx),
- (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, s6_0Imm:$S6),
- "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw",
- [], "$Rx = $_dst_" > {
- bits<5> Rx;
- bits<5> Rs;
- bits<4> u4;
- bits<6> S6;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b0111;
- let Inst{23-22} = MinOp;
- let Inst{21} = u4{3};
- let Inst{20-16} = Rs;
- let Inst{13-8} = S6;
- let Inst{7-5} = u4{2-0};
- let Inst{4-0} = Rx;
- }
-
-def S2_tableidxb : tableidxRaw<"tableidxb", 0b00>;
-def S2_tableidxh : tableidxRaw<"tableidxh", 0b01>;
-def S2_tableidxw : tableidxRaw<"tableidxw", 0b10>;
-def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
-
-//===----------------------------------------------------------------------===//
-// Template class for 'table index' instructions which are assembler mapped
-// to their :raw format.
-//===----------------------------------------------------------------------===//
-let isPseudo = 1 in
-class tableidx_goodsyntax <string mnemonic>
- : SInst <(outs IntRegs:$Rx),
- (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, u5_0Imm:$u5),
- "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
- [], "$Rx = $_dst_" >;
-
-def S2_tableidxb_goodsyntax : tableidx_goodsyntax<"tableidxb">;
-def S2_tableidxh_goodsyntax : tableidx_goodsyntax<"tableidxh">;
-def S2_tableidxw_goodsyntax : tableidx_goodsyntax<"tableidxw">;
-def S2_tableidxd_goodsyntax : tableidx_goodsyntax<"tableidxd">;
-
-//===----------------------------------------------------------------------===//
-// V3 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV3.td"
-
-//===----------------------------------------------------------------------===//
-// V3 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// V4 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV4.td"
-
-//===----------------------------------------------------------------------===//
-// V4 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// V5 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV5.td"
-
-//===----------------------------------------------------------------------===//
-// V5 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// V60 Instructions +
-//===----------------------------------------------------------------------===//
-
-include "HexagonInstrInfoV60.td"
-
-//===----------------------------------------------------------------------===//
-// V60 Instructions -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU32/64/Vector +
-//===----------------------------------------------------------------------===///
-
-include "HexagonInstrInfoVector.td"
-
-include "HexagonInstrAlias.td"
-include "HexagonSystemInst.td"
-
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
deleted file mode 100644
index 225f94405076..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ /dev/null
@@ -1,215 +0,0 @@
-//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V3 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// J +
-//===----------------------------------------------------------------------===//
-// Call subroutine.
-let isCall = 1, hasSideEffects = 1, isPredicable = 1,
- isExtended = 0, isExtendable = 1, opExtendable = 0,
- isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
-class T_Call<bit CSR, string ExtStr>
- : JInst<(outs), (ins calltarget:$dst),
- "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> {
- let BaseOpcode = "call";
- bits<24> dst;
-
- let Defs = !if (CSR, VolatileV3.Regs, []);
- let IClass = 0b0101;
- let Inst{27-25} = 0b101;
- let Inst{24-16,13-1} = dst{23-2};
- let Inst{0} = 0b0;
-}
-
-let isCall = 1, hasSideEffects = 1, isPredicated = 1,
- isExtended = 0, isExtendable = 1, opExtendable = 1,
- isExtentSigned = 1, opExtentBits = 17, opExtentAlign = 2 in
-class T_CallPred<bit CSR, bit IfTrue, string ExtStr>
- : JInst<(outs), (ins PredRegs:$Pu, calltarget:$dst),
- CondStr<"$Pu", IfTrue, 0>.S # "call " # ExtStr # "$dst",
- [], "", J_tc_2early_SLOT23> {
- let BaseOpcode = "call";
- let isPredicatedFalse = !if(IfTrue,0,1);
- bits<2> Pu;
- bits<17> dst;
-
- let Defs = !if (CSR, VolatileV3.Regs, []);
- let IClass = 0b0101;
- let Inst{27-24} = 0b1101;
- let Inst{23-22,20-16,13,7-1} = dst{16-2};
- let Inst{21} = !if(IfTrue,0,1);
- let Inst{11} = 0b0;
- let Inst{9-8} = Pu;
-}
-
-multiclass T_Calls<bit CSR, string ExtStr> {
- def NAME : T_Call<CSR, ExtStr>;
- def t : T_CallPred<CSR, 1, ExtStr>;
- def f : T_CallPred<CSR, 0, ExtStr>;
-}
-
-defm J2_call: T_Calls<1, "">, PredRel;
-
-let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
- Defs = VolatileV3.Regs in
-def PS_call_nr : T_Call<1, "">, PredRel;
-
-let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
- Defs = [PC, R31, R6, R7, P0] in
-def PS_call_stk : T_Call<0, "">, PredRel;
-
-//===----------------------------------------------------------------------===//
-// J -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// JR +
-//===----------------------------------------------------------------------===//
-// Call subroutine from register.
-
-let isCodeGenOnly = 1, Defs = VolatileV3.Regs in {
- def PS_callr_nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
-}
-
-//===----------------------------------------------------------------------===//
-// JR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU +
-//===----------------------------------------------------------------------===//
-
-let Defs = [USR_OVF], Itinerary = ALU64_tc_2_SLOT23 in
-def A2_addpsat : T_ALU64_arith<"add", 0b011, 0b101, 1, 0, 1>;
-
-class T_ALU64_addsp_hl<string suffix, bits<3> MinOp>
- : T_ALU64_rr<"add", suffix, 0b0011, 0b011, MinOp, 0, 0, "">;
-
-def A2_addspl : T_ALU64_addsp_hl<":raw:lo", 0b110>;
-def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
-
-let hasSideEffects = 0, isAsmParserOnly = 1 in
-def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
- (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", [],
- "", ALU64_tc_1_SLOT23>;
-
-
-let hasSideEffects = 0 in
-class T_XTYPE_MIN_MAX_P<bit isMax, bit isUnsigned>
- : ALU64Inst<(outs DoubleRegs:$Rd), (ins DoubleRegs:$Rt, DoubleRegs:$Rs),
- "$Rd = "#!if(isMax,"max","min")#!if(isUnsigned,"u","")
- #"($Rt, $Rs)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00111;
- let Inst{22-21} = !if(isMax, 0b10, 0b01);
- let Inst{20-16} = !if(isMax, Rt, Rs);
- let Inst{12-8} = !if(isMax, Rs, Rt);
- let Inst{7} = 0b1;
- let Inst{6} = !if(isMax, 0b0, 0b1);
- let Inst{5} = isUnsigned;
- let Inst{4-0} = Rd;
-}
-
-def A2_minp : T_XTYPE_MIN_MAX_P<0, 0>;
-def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
-def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>;
-def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
-
-//===----------------------------------------------------------------------===//
-// ALU64/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// :raw form of vrcmpys:hi/lo insns
-//===----------------------------------------------------------------------===//
-// Vector reduce complex multiply by scalar.
-let Defs = [USR_OVF], hasSideEffects = 0 in
-class T_vrcmpRaw<string HiLo, bits<3>MajOp>:
- MInst<(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rdd = vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, []> {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{7-5} = 0b100;
- let Inst{4-0} = Rdd;
-}
-
-def M2_vrcmpys_s1_h: T_vrcmpRaw<"hi", 0b101>;
-def M2_vrcmpys_s1_l: T_vrcmpRaw<"lo", 0b111>;
-
-// Assembler mapped to M2_vrcmpys_s1_h or M2_vrcmpys_s1_l
-let hasSideEffects = 0, isAsmParserOnly = 1 in
-def M2_vrcmpys_s1
- : MInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rdd=vrcmpys($Rss,$Rt):<<1:sat">;
-
-// Vector reduce complex multiply by scalar with accumulation.
-let Defs = [USR_OVF], hasSideEffects = 0 in
-class T_vrcmpys_acc<string HiLo, bits<3>MajOp>:
- MInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$_src_, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx += vrcmpys($Rss, $Rtt):<<1:sat:raw:"#HiLo, [],
- "$Rxx = $_src_"> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{7-5} = 0b100;
- let Inst{4-0} = Rxx;
- }
-
-def M2_vrcmpys_acc_s1_h: T_vrcmpys_acc<"hi", 0b101>;
-def M2_vrcmpys_acc_s1_l: T_vrcmpys_acc<"lo", 0b111>;
-
-// Assembler mapped to M2_vrcmpys_acc_s1_h or M2_vrcmpys_acc_s1_l
-
-let isAsmParserOnly = 1 in
-def M2_vrcmpys_acc_s1
- : MInst <(outs DoubleRegs:$dst),
- (ins DoubleRegs:$dst2, DoubleRegs:$src1, IntRegs:$src2),
- "$dst += vrcmpys($src1, $src2):<<1:sat", [],
- "$dst2 = $dst">;
-
-def M2_vrcmpys_s1rp_h : T_MType_vrcmpy <"vrcmpys", 0b101, 0b110, 1>;
-def M2_vrcmpys_s1rp_l : T_MType_vrcmpy <"vrcmpys", 0b101, 0b111, 0>;
-
-// Assembler mapped to M2_vrcmpys_s1rp_h or M2_vrcmpys_s1rp_l
-let isAsmParserOnly = 1 in
-def M2_vrcmpys_s1rp
- : MInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rd=vrcmpys($Rss,$Rt):<<1:rnd:sat">;
-
-
-// S2_cabacdecbin: Cabac decode bin.
-let Defs = [P0], isPredicateLate = 1, Itinerary = S_3op_tc_1_SLOT23 in
-def S2_cabacdecbin : T_S3op_64 < "decbin", 0b11, 0b110, 0>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
deleted file mode 100644
index 18943a082d28..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ /dev/null
@@ -1,3301 +0,0 @@
-//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V4 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-def DuplexIClass0: InstDuplex < 0 >;
-def DuplexIClass1: InstDuplex < 1 >;
-def DuplexIClass2: InstDuplex < 2 >;
-let isExtendable = 1 in {
- def DuplexIClass3: InstDuplex < 3 >;
- def DuplexIClass4: InstDuplex < 4 >;
- def DuplexIClass5: InstDuplex < 5 >;
- def DuplexIClass6: InstDuplex < 6 >;
- def DuplexIClass7: InstDuplex < 7 >;
-}
-def DuplexIClass8: InstDuplex < 8 >;
-def DuplexIClass9: InstDuplex < 9 >;
-def DuplexIClassA: InstDuplex < 0xA >;
-def DuplexIClassB: InstDuplex < 0xB >;
-def DuplexIClassC: InstDuplex < 0xC >;
-def DuplexIClassD: InstDuplex < 0xD >;
-def DuplexIClassE: InstDuplex < 0xE >;
-def DuplexIClassF: InstDuplex < 0xF >;
-
-let hasSideEffects = 0 in
-class T_Immext<Operand ImmType>
- : EXTENDERInst<(outs), (ins ImmType:$imm),
- "immext(#$imm)", []> {
- bits<32> imm;
- let IClass = 0b0000;
-
- let Inst{27-16} = imm{31-20};
- let Inst{13-0} = imm{19-6};
- }
-
-def A4_ext : T_Immext<u26_6Imm>;
-let isCodeGenOnly = 1 in {
- let isBranch = 1 in
- def A4_ext_b : T_Immext<brtarget>;
- let isCall = 1 in
- def A4_ext_c : T_Immext<calltarget>;
- def A4_ext_g : T_Immext<globaladdress>;
-}
-
-// Hexagon V4 Architecture spec defines 8 instruction classes:
-// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
-// compiler)
-
-// LD Instructions:
-// ========================================
-// Loads (8/16/32/64 bit)
-// Deallocframe
-
-// ST Instructions:
-// ========================================
-// Stores (8/16/32/64 bit)
-// Allocframe
-
-// ALU32 Instructions:
-// ========================================
-// Arithmetic / Logical (32 bit)
-// Vector Halfword
-
-// XTYPE Instructions (32/64 bit):
-// ========================================
-// Arithmetic, Logical, Bit Manipulation
-// Multiply (Integer, Fractional, Complex)
-// Permute / Vector Permute Operations
-// Predicate Operations
-// Shift / Shift with Add/Sub/Logical
-// Vector Byte ALU
-// Vector Halfword (ALU, Shift, Multiply)
-// Vector Word (ALU, Shift)
-
-// J Instructions:
-// ========================================
-// Jump/Call PC-relative
-
-// JR Instructions:
-// ========================================
-// Jump/Call Register
-
-// MEMOP Instructions:
-// ========================================
-// Operation on memory (8/16/32 bit)
-
-// NV Instructions:
-// ========================================
-// New-value Jumps
-// New-value Stores
-
-// CR Instructions:
-// ========================================
-// Control-Register Transfers
-// Hardware Loop Setup
-// Predicate Logicals & Reductions
-
-// SYSTEM Instructions (not implemented in the compiler):
-// ========================================
-// Prefetch
-// Cache Maintenance
-// Bus Operations
-
-
-//===----------------------------------------------------------------------===//
-// ALU32 +
-//===----------------------------------------------------------------------===//
-
-class T_ALU32_3op_not<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- bit OpsRev>
- : T_ALU32_3op<mnemonic, MajOp, MinOp, OpsRev, 0> {
- let AsmString = "$Rd = "#mnemonic#"($Rs, ~$Rt)";
-}
-
-let BaseOpcode = "andn_rr", CextOpcode = "andn" in
-def A4_andn : T_ALU32_3op_not<"and", 0b001, 0b100, 1>;
-let BaseOpcode = "orn_rr", CextOpcode = "orn" in
-def A4_orn : T_ALU32_3op_not<"or", 0b001, 0b101, 1>;
-
-let CextOpcode = "rcmp.eq" in
-def A4_rcmpeq : T_ALU32_3op<"cmp.eq", 0b011, 0b010, 0, 1>;
-let CextOpcode = "!rcmp.eq" in
-def A4_rcmpneq : T_ALU32_3op<"!cmp.eq", 0b011, 0b011, 0, 1>;
-
-def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.eq", 0b00, 1, 1>;
-def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>;
-def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
-
-class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
- : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
- ImmRegRel {
- let InputType = "reg";
- let CextOpcode = mnemonic;
- let isCompare = 1;
- let isCommutable = IsComm;
- let hasSideEffects = 0;
-
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1100;
- let Inst{27-21} = 0b0111110;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{1-0} = Pd;
-}
-
-def A4_cmpbeq : T_CMP_rrbh<"cmpb.eq", 0b110, 1>;
-def A4_cmpbgt : T_CMP_rrbh<"cmpb.gt", 0b010, 0>;
-def A4_cmpbgtu : T_CMP_rrbh<"cmpb.gtu", 0b111, 0>;
-def A4_cmpheq : T_CMP_rrbh<"cmph.eq", 0b011, 1>;
-def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>;
-def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
-
-class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
- Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits>
- : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
- "$Pd = "#mnemonic#"($Rs, #$Imm)", [], "", ALU64_tc_2early_SLOT23>,
- ImmRegRel {
- let InputType = "imm";
- let CextOpcode = mnemonic;
- let isCompare = 1;
- let isCommutable = IsComm;
- let hasSideEffects = 0;
- let isExtendable = IsImmExt;
- let opExtendable = !if (IsImmExt, 2, 0);
- let isExtentSigned = IsImmSigned;
- let opExtentBits = ImmBits;
-
- bits<2> Pd;
- bits<5> Rs;
- bits<8> Imm;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b1101;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-5} = Imm;
- let Inst{4} = 0b0;
- let Inst{3} = IsHalf;
- let Inst{1-0} = Pd;
-}
-
-def A4_cmpbeqi : T_CMP_ribh<"cmpb.eq", 0b00, 0, 1, u8_0Imm, 0, 0, 8>;
-def A4_cmpbgti : T_CMP_ribh<"cmpb.gt", 0b01, 0, 0, s8_0Imm, 0, 1, 8>;
-def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7_0Ext, 1, 0, 7>;
-def A4_cmpheqi : T_CMP_ribh<"cmph.eq", 0b00, 1, 1, s8_0Ext, 1, 1, 8>;
-def A4_cmphgti : T_CMP_ribh<"cmph.gt", 0b01, 1, 0, s8_0Ext, 1, 1, 8>;
-def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7_0Ext, 1, 0, 7>;
-
-class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
- : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8_0Ext:$s8),
- "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
- ImmRegRel {
- let InputType = "imm";
- let CextOpcode = !if (IsNeg, "!rcmp.eq", "rcmp.eq");
- let isExtendable = 1;
- let opExtendable = 2;
- let isExtentSigned = 1;
- let opExtentBits = 8;
- let hasNewValue = 1;
-
- bits<5> Rd;
- bits<5> Rs;
- bits<8> s8;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b0011;
- let Inst{22} = 0b1;
- let Inst{21} = IsNeg;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b1;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rd;
-}
-
-def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
-def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
-
-//===----------------------------------------------------------------------===//
-// ALU32 -
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM +
-//===----------------------------------------------------------------------===//
-
-// Combine a word and an immediate into a register pair.
-let hasSideEffects = 0, isExtentSigned = 1, isExtendable = 1,
- opExtentBits = 8 in
-class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
- : ALU32Inst <(outs DoubleRegs:$Rdd), ins, AsmStr> {
- bits<5> Rdd;
- bits<5> Rs;
- bits<8> s8;
-
- let IClass = 0b0111;
- let Inst{27-24} = 0b0011;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b1;
- let Inst{12-5} = s8;
- let Inst{4-0} = Rdd;
- }
-
-let opExtendable = 2 in
-def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8_0Ext:$s8),
- "$Rdd = combine($Rs, #$s8)">;
-
-let opExtendable = 1 in
-def A4_combineir : T_Combine1<0b01, (ins s8_0Ext:$s8, IntRegs:$Rs),
- "$Rdd = combine(#$s8, $Rs)">;
-
-// A4_combineii: Set two small immediates.
-let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
-def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8_0Imm:$s8, u6_0Ext:$U6),
- "$Rdd = combine(#$s8, #$U6)"> {
- bits<5> Rdd;
- bits<8> s8;
- bits<6> U6;
-
- let IClass = 0b0111;
- let Inst{27-23} = 0b11001;
- let Inst{20-16} = U6{5-1};
- let Inst{13} = U6{0};
- let Inst{12-5} = s8;
- let Inst{4-0} = Rdd;
- }
-
-//===----------------------------------------------------------------------===//
-// ALU32/PERM -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// LD +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Template class for load instructions with Absolute set addressing mode.
-//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet,
- hasSideEffects = 0 in
-class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>:
- LDInst<(outs RC:$dst1, IntRegs:$dst2),
- (ins u6_0Ext:$addr),
- "$dst1 = "#mnemonic#"($dst2 = #$addr)",
- []> {
- bits<7> name;
- bits<5> dst1;
- bits<5> dst2;
- bits<6> addr;
-
- let IClass = 0b1001;
- let Inst{27-25} = 0b101;
- let Inst{24-21} = MajOp;
- let Inst{13-12} = 0b01;
- let Inst{4-0} = dst1;
- let Inst{20-16} = dst2;
- let Inst{11-8} = addr{5-2};
- let Inst{6-5} = addr{1-0};
-}
-
-let accessSize = ByteAccess, hasNewValue = 1 in {
- def L4_loadrb_ap : T_LD_abs_set <"memb", IntRegs, 0b1000>;
- def L4_loadrub_ap : T_LD_abs_set <"memub", IntRegs, 0b1001>;
-}
-
-let accessSize = HalfWordAccess, hasNewValue = 1 in {
- def L4_loadrh_ap : T_LD_abs_set <"memh", IntRegs, 0b1010>;
- def L4_loadruh_ap : T_LD_abs_set <"memuh", IntRegs, 0b1011>;
- def L4_loadbsw2_ap : T_LD_abs_set <"membh", IntRegs, 0b0001>;
- def L4_loadbzw2_ap : T_LD_abs_set <"memubh", IntRegs, 0b0011>;
-}
-
-let accessSize = WordAccess, hasNewValue = 1 in
- def L4_loadri_ap : T_LD_abs_set <"memw", IntRegs, 0b1100>;
-
-let accessSize = WordAccess in {
- def L4_loadbzw4_ap : T_LD_abs_set <"memubh", DoubleRegs, 0b0101>;
- def L4_loadbsw4_ap : T_LD_abs_set <"membh", DoubleRegs, 0b0111>;
-}
-
-let accessSize = DoubleWordAccess in
-def L4_loadrd_ap : T_LD_abs_set <"memd", DoubleRegs, 0b1110>;
-
-let accessSize = ByteAccess in
- def L4_loadalignb_ap : T_LD_abs_set <"memb_fifo", DoubleRegs, 0b0100>;
-
-let accessSize = HalfWordAccess in
-def L4_loadalignh_ap : T_LD_abs_set <"memh_fifo", DoubleRegs, 0b0010>;
-
-// Load - Indirect with long offset
-let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1,
-opExtentBits = 6, opExtendable = 3 in
-class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC,
- bits<4> MajOp>
- : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3),
- "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)",
- [] >, ImmRegShl {
- bits<5> dst;
- bits<5> src1;
- bits<2> src2;
- bits<6> src3;
- let CextOpcode = CextOp;
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
-
- let IClass = 0b1001;
- let Inst{27-25} = 0b110;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2{1};
- let Inst{12} = 0b1;
- let Inst{11-8} = src3{5-2};
- let Inst{7} = src2{0};
- let Inst{6-5} = src3{1-0};
- let Inst{4-0} = dst;
- }
-
-let accessSize = ByteAccess in {
- def L4_loadrb_ur : T_LoadAbsReg<"memb", "LDrib", IntRegs, 0b1000>;
- def L4_loadrub_ur : T_LoadAbsReg<"memub", "LDriub", IntRegs, 0b1001>;
- def L4_loadalignb_ur : T_LoadAbsReg<"memb_fifo", "LDrib_fifo",
- DoubleRegs, 0b0100>;
-}
-
-let accessSize = HalfWordAccess in {
- def L4_loadrh_ur : T_LoadAbsReg<"memh", "LDrih", IntRegs, 0b1010>;
- def L4_loadruh_ur : T_LoadAbsReg<"memuh", "LDriuh", IntRegs, 0b1011>;
- def L4_loadbsw2_ur : T_LoadAbsReg<"membh", "LDribh2", IntRegs, 0b0001>;
- def L4_loadbzw2_ur : T_LoadAbsReg<"memubh", "LDriubh2", IntRegs, 0b0011>;
- def L4_loadalignh_ur : T_LoadAbsReg<"memh_fifo", "LDrih_fifo",
- DoubleRegs, 0b0010>;
-}
-
-let accessSize = WordAccess in {
- def L4_loadri_ur : T_LoadAbsReg<"memw", "LDriw", IntRegs, 0b1100>;
- def L4_loadbsw4_ur : T_LoadAbsReg<"membh", "LDribh4", DoubleRegs, 0b0111>;
- def L4_loadbzw4_ur : T_LoadAbsReg<"memubh", "LDriubh4", DoubleRegs, 0b0101>;
-}
-
-let accessSize = DoubleWordAccess in
-def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
-
-
-//===----------------------------------------------------------------------===//
-// Template classes for the non-predicated load instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>:
- LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2_0Imm:$u2),
- "$dst = "#mnemonic#"($src1 + $src2<<#$u2)",
- [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel {
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<2> u2;
-
- let IClass = 0b0011;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{12-8} = src2;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the predicated load instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicated = 1 in
-class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit isNot, bit isPredNew>:
- LDInst <(outs RC:$dst),
- (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2_0Imm:$u2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)",
- [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel {
- bits<5> dst;
- bits<2> src1;
- bits<5> src2;
- bits<5> src3;
- bits<2> u2;
-
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
-
- let IClass = 0b0011;
-
- let Inst{27-26} = 0b00;
- let Inst{25} = isPredNew;
- let Inst{24} = isNot;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{12-8} = src3;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{6-5} = src1;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// multiclass for load instructions with base + register offset
-// addressing mode
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, addrMode = BaseRegOffset in
-multiclass ld_idxd_shl <string mnemonic, string CextOp, RegisterClass RC,
- bits<3> MajOp > {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl,
- InputType = "reg" in {
- let isPredicable = 1 in
- def L4_#NAME#_rr : T_load_rr <mnemonic, RC, MajOp>;
-
- // Predicated
- def L4_p#NAME#t_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 0>;
- def L4_p#NAME#f_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 0>;
-
- // Predicated new
- def L4_p#NAME#tnew_rr : T_pload_rr <mnemonic, RC, MajOp, 0, 1>;
- def L4_p#NAME#fnew_rr : T_pload_rr <mnemonic, RC, MajOp, 1, 1>;
- }
-}
-
-let hasNewValue = 1, accessSize = ByteAccess in {
- defm loadrb : ld_idxd_shl<"memb", "LDrib", IntRegs, 0b000>;
- defm loadrub : ld_idxd_shl<"memub", "LDriub", IntRegs, 0b001>;
-}
-
-let hasNewValue = 1, accessSize = HalfWordAccess in {
- defm loadrh : ld_idxd_shl<"memh", "LDrih", IntRegs, 0b010>;
- defm loadruh : ld_idxd_shl<"memuh", "LDriuh", IntRegs, 0b011>;
-}
-
-let hasNewValue = 1, accessSize = WordAccess in
-defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
-
-let accessSize = DoubleWordAccess in
-defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// LD -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ST +
-//===----------------------------------------------------------------------===//
-///
-//===----------------------------------------------------------------------===//
-// Template class for store instructions with Absolute set addressing mode.
-//===----------------------------------------------------------------------===//
-let isExtended = 1, opExtendable = 1, opExtentBits = 6,
- addrMode = AbsoluteSet in
-class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
- bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
- : STInst<(outs IntRegs:$dst),
- (ins u6_0Ext:$addr, RC:$src),
- mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel {
- bits<5> dst;
- bits<6> addr;
- bits<5> src;
- let accessSize = AccessSz;
- let BaseOpcode = BaseOp#"_AbsSet";
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = dst;
- let Inst{13} = 0b0;
- let Inst{12-8} = src;
- let Inst{7} = 0b1;
- let Inst{5-0} = addr;
- }
-
-def S4_storerb_ap : T_ST_absset <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
-def S4_storerh_ap : T_ST_absset <"memh", "STrih", IntRegs, 0b010,
- HalfWordAccess>;
-def S4_storeri_ap : T_ST_absset <"memw", "STriw", IntRegs, 0b100, WordAccess>;
-
-let isNVStorable = 0 in {
- def S4_storerf_ap : T_ST_absset <"memh", "STrif", IntRegs,
- 0b011, HalfWordAccess, 1>;
- def S4_storerd_ap : T_ST_absset <"memd", "STrid", DoubleRegs,
- 0b110, DoubleWordAccess>;
-}
-
-let opExtendable = 1, isNewValue = 1, isNVStore = 1, opNewValue = 2,
-isExtended = 1, opExtentBits= 6 in
-class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp,
- MemAccessSize AccessSz >
- : NVInst <(outs IntRegs:$dst),
- (ins u6_0Ext:$addr, IntRegs:$src),
- mnemonic#"($dst = #$addr) = $src.new">, NewValueRel {
- bits<5> dst;
- bits<6> addr;
- bits<3> src;
- let accessSize = AccessSz;
- let BaseOpcode = BaseOp#"_AbsSet";
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = dst;
- let Inst{13-11} = 0b000;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src;
- let Inst{7} = 0b1;
- let Inst{5-0} = addr;
- }
-
-let mayStore = 1, addrMode = AbsoluteSet in {
- def S4_storerbnew_ap : T_ST_absset_nv <"memb", "STrib", 0b00, ByteAccess>;
- def S4_storerhnew_ap : T_ST_absset_nv <"memh", "STrih", 0b01, HalfWordAccess>;
- def S4_storerinew_ap : T_ST_absset_nv <"memw", "STriw", 0b10, WordAccess>;
-}
-
-let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
- addrMode = BaseLongOffset, AddedComplexity = 40 in
-class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
- bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
- : STInst<(outs),
- (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, RC:$src4),
- mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""),
- []>, ImmRegShl, NewValueRel {
-
- bits<5> src1;
- bits<2> src2;
- bits<6> src3;
- bits<5> src4;
-
- let accessSize = AccessSz;
- let CextOpcode = CextOp;
- let BaseOpcode = CextOp#"_shl";
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} =0b1101;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = src1;
- let Inst{13} = src2{1};
- let Inst{12-8} = src4;
- let Inst{7} = 0b1;
- let Inst{6} = src2{0};
- let Inst{5-0} = src3;
-}
-
-def S4_storerb_ur : T_StoreAbsReg <"memb", "STrib", IntRegs, 0b000, ByteAccess>;
-def S4_storerh_ur : T_StoreAbsReg <"memh", "STrih", IntRegs, 0b010,
- HalfWordAccess>;
-def S4_storerf_ur : T_StoreAbsReg <"memh", "STrif", IntRegs, 0b011,
- HalfWordAccess, 1>;
-def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>;
-def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
- DoubleWordAccess>;
-
-let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
- opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
-class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
- MemAccessSize AccessSz>
- : NVInst <(outs ),
- (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, IntRegs:$src4),
- mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel {
- bits<5> src1;
- bits<2> src2;
- bits<6> src3;
- bits<3> src4;
-
- let CextOpcode = CextOp;
- let BaseOpcode = CextOp#"_shl";
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1101101;
- let Inst{12-11} = 0b00;
- let Inst{7} = 0b1;
- let Inst{20-16} = src1;
- let Inst{13} = src2{1};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src4;
- let Inst{6} = src2{0};
- let Inst{5-0} = src3;
- }
-
-def S4_storerbnew_ur : T_StoreAbsRegNV <"memb", "STrib", 0b00, ByteAccess>;
-def S4_storerhnew_ur : T_StoreAbsRegNV <"memh", "STrih", 0b01, HalfWordAccess>;
-def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Template classes for the non-predicated store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicable = 1 in
-class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
- : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt),
- mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
- [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel {
-
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<5> Rt;
-
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
-
- let IClass = 0b0011;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{4-0} = Rt;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the predicated store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicated = 1 in
-class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit isNot, bit isPredNew, bit isH>
- : STInst <(outs),
- (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt),
-
- !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
- [], "", V4LDST_tc_st_SLOT01> , AddrModeRel{
- bits<2> Pv;
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<5> Rt;
-
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
-
- let IClass = 0b0011;
-
- let Inst{27-26} = 0b01;
- let Inst{25} = isPredNew;
- let Inst{24} = isNot;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{6-5} = Pv;
- let Inst{4-0} = Rt;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the new-value store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, isNewValue = 1, opNewValue = 3 in
-class T_store_new_rr <string mnemonic, bits<2> MajOp> :
- NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt),
- mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new",
- [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel {
-
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<3> Nt;
-
- let IClass = 0b0011;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{4-3} = MajOp;
- let Inst{2-0} = Nt;
- }
-
-//===----------------------------------------------------------------------===//
-// Template classes for the predicated new-value store instructions with
-// base + register offset addressing mode
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, isNewValue = 1, opNewValue = 4 in
-class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew>
- : NVInst<(outs),
- (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt),
- !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new",
- [], "", V4LDST_tc_st_SLOT0>, AddrModeRel {
- bits<2> Pv;
- bits<5> Rs;
- bits<5> Ru;
- bits<2> u2;
- bits<3> Nt;
-
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
-
- let IClass = 0b0011;
- let Inst{27-26} = 0b01;
- let Inst{25} = isPredNew;
- let Inst{24} = isNot;
- let Inst{23-21} = 0b101;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Ru;
- let Inst{13} = u2{1};
- let Inst{7} = u2{0};
- let Inst{6-5} = Pv;
- let Inst{4-3} = MajOp;
- let Inst{2-0} = Nt;
- }
-
-//===----------------------------------------------------------------------===//
-// multiclass for store instructions with base + register offset addressing
-// mode
-//===----------------------------------------------------------------------===//
-let isNVStorable = 1 in
-multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC,
- bits<3> MajOp, bit isH = 0> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
- def S4_#NAME#_rr : T_store_rr <mnemonic, RC, MajOp, isH>;
-
- // Predicated
- def S4_p#NAME#t_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 0, isH>;
- def S4_p#NAME#f_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 0, isH>;
-
- // Predicated new
- def S4_p#NAME#tnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 0, 1, isH>;
- def S4_p#NAME#fnew_rr : T_pstore_rr <mnemonic, RC, MajOp, 1, 1, isH>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass for new-value store instructions with base + register offset
-// addressing mode.
-//===----------------------------------------------------------------------===//
-let mayStore = 1, isNVStore = 1 in
-multiclass ST_Idxd_shl_nv <string mnemonic, string CextOp, RegisterClass RC,
- bits<2> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
- def S4_#NAME#new_rr : T_store_new_rr<mnemonic, MajOp>;
-
- // Predicated
- def S4_p#NAME#newt_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 0>;
- def S4_p#NAME#newf_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 0>;
-
- // Predicated new
- def S4_p#NAME#newtnew_rr : T_pstore_new_rr <mnemonic, MajOp, 0, 1>;
- def S4_p#NAME#newfnew_rr : T_pstore_new_rr <mnemonic, MajOp, 1, 1>;
- }
-}
-
-let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in {
- let accessSize = ByteAccess in
- defm storerb: ST_Idxd_shl<"memb", "STrib", IntRegs, 0b000>,
- ST_Idxd_shl_nv<"memb", "STrib", IntRegs, 0b00>;
-
- let accessSize = HalfWordAccess in
- defm storerh: ST_Idxd_shl<"memh", "STrih", IntRegs, 0b010>,
- ST_Idxd_shl_nv<"memh", "STrih", IntRegs, 0b01>;
-
- let accessSize = WordAccess in
- defm storeri: ST_Idxd_shl<"memw", "STriw", IntRegs, 0b100>,
- ST_Idxd_shl_nv<"memw", "STriw", IntRegs, 0b10>;
-
- let isNVStorable = 0, accessSize = DoubleWordAccess in
- defm storerd: ST_Idxd_shl<"memd", "STrid", DoubleRegs, 0b110>;
-
- let isNVStorable = 0, accessSize = HalfWordAccess in
- defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
- opExtendable = 2 in
-class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp >
- : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8_0Ext:$S8),
- mnemonic#"($Rs+#$offset)=#$S8",
- [], "", V4LDST_tc_st_SLOT01>,
- ImmRegRel, PredNewRel {
- bits<5> Rs;
- bits<8> S8;
- bits<8> offset;
- bits<6> offsetBits;
-
- string OffsetOpStr = !cast<string>(OffsetOp);
- let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
- !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
- /* u6_0Imm */ offset{5-0}));
-
- let IClass = 0b0011;
-
- let Inst{27-25} = 0b110;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{12-7} = offsetBits;
- let Inst{13} = S8{7};
- let Inst{6-0} = S8{6-0};
- }
-
-let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6,
- opExtendable = 3 in
-class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
- bit isPredNot, bit isPredNew >
- : STInst <(outs ),
- (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6_0Ext:$S6),
- !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($Rs+#$offset)=#$S6",
- [], "", V4LDST_tc_st_SLOT01>,
- ImmRegRel, PredNewRel {
- bits<2> Pv;
- bits<5> Rs;
- bits<6> S6;
- bits<8> offset;
- bits<6> offsetBits;
-
- string OffsetOpStr = !cast<string>(OffsetOp);
- let offsetBits = !if (!eq(OffsetOpStr, "u6_2Imm"), offset{7-2},
- !if (!eq(OffsetOpStr, "u6_1Imm"), offset{6-1},
- /* u6_0Imm */ offset{5-0}));
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b0011;
-
- let Inst{27-25} = 0b100;
- let Inst{24} = isPredNew;
- let Inst{23} = isPredNot;
- let Inst{22-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = S6{5};
- let Inst{12-7} = offsetBits;
- let Inst{6-5} = Pv;
- let Inst{4-0} = S6{4-0};
- }
-
-
-//===----------------------------------------------------------------------===//
-// multiclass for store instructions with base + immediate offset
-// addressing mode and immediate stored value.
-// mem[bhw](Rx++#s4:3)=#s8
-// if ([!]Pv[.new]) mem[bhw](Rx++#s4:3)=#s6
-//===----------------------------------------------------------------------===//
-
-multiclass ST_Imm_Pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
- bit PredNot> {
- def _io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 0>;
- // Predicate new
- def new_io : T_StoreImm_pred <mnemonic, OffsetOp, MajOp, PredNot, 1>;
-}
-
-multiclass ST_Imm <string mnemonic, string CextOp, Operand OffsetOp,
- bits<2> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
- def _io : T_StoreImm <mnemonic, OffsetOp, MajOp>;
-
- defm t : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 0>;
- defm f : ST_Imm_Pred <mnemonic, OffsetOp, MajOp, 1>;
- }
-}
-
-let hasSideEffects = 0, addrMode = BaseImmOffset,
- InputType = "imm" in {
- let accessSize = ByteAccess in
- defm S4_storeirb : ST_Imm<"memb", "STrib", u6_0Imm, 0b00>;
-
- let accessSize = HalfWordAccess in
- defm S4_storeirh : ST_Imm<"memh", "STrih", u6_1Imm, 0b01>;
-
- let accessSize = WordAccess in
- defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
-}
-
-//===----------------------------------------------------------------------===
-// ST -
-//===----------------------------------------------------------------------===
-
-
-//===----------------------------------------------------------------------===//
-// NV/ST +
-//===----------------------------------------------------------------------===//
-
-let opNewValue = 2, opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
-class T_store_io_nv <string mnemonic, RegisterClass RC,
- Operand ImmOp, bits<2>MajOp>
- : NVInst_V4 <(outs),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2) = $src3.new",
- [],"",ST_tc_st_SLOT0> {
- bits<5> src1;
- bits<13> src2; // Actual address offset
- bits<3> src3;
- bits<11> offsetBits; // Represents offset encoding
-
- let opExtentBits = !if (!eq(mnemonic, "memb"), 11,
- !if (!eq(mnemonic, "memh"), 12,
- !if (!eq(mnemonic, "memw"), 13, 0)));
-
- let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
- !if (!eq(mnemonic, "memh"), 1,
- !if (!eq(mnemonic, "memw"), 2, 0)));
-
- let offsetBits = !if (!eq(mnemonic, "memb"), src2{10-0},
- !if (!eq(mnemonic, "memh"), src2{11-1},
- !if (!eq(mnemonic, "memw"), src2{12-2}, 0)));
-
- let IClass = 0b1010;
-
- let Inst{27} = 0b0;
- let Inst{26-25} = offsetBits{10-9};
- let Inst{24-21} = 0b1101;
- let Inst{20-16} = src1;
- let Inst{13} = offsetBits{8};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src3;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-let opExtendable = 2, opNewValue = 3, isPredicated = 1 in
-class T_pstore_io_nv <string mnemonic, RegisterClass RC, Operand predImmOp,
- bits<2>MajOp, bit PredNot, bit isPredNew>
- : NVInst_V4 <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC:$src4),
- !if(PredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2+#$src3) = $src4.new",
- [],"",V2LDST_tc_st_SLOT0> {
- bits<2> src1;
- bits<5> src2;
- bits<9> src3;
- bits<3> src4;
- bits<6> offsetBits; // Represents offset encoding
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = PredNot;
- let opExtentBits = !if (!eq(mnemonic, "memb"), 6,
- !if (!eq(mnemonic, "memh"), 7,
- !if (!eq(mnemonic, "memw"), 8, 0)));
-
- let opExtentAlign = !if (!eq(mnemonic, "memb"), 0,
- !if (!eq(mnemonic, "memh"), 1,
- !if (!eq(mnemonic, "memw"), 2, 0)));
-
- let offsetBits = !if (!eq(mnemonic, "memb"), src3{5-0},
- !if (!eq(mnemonic, "memh"), src3{6-1},
- !if (!eq(mnemonic, "memw"), src3{7-2}, 0)));
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b0;
- let Inst{26} = PredNot;
- let Inst{25} = isPredNew;
- let Inst{24-21} = 0b0101;
- let Inst{20-16} = src2;
- let Inst{13} = offsetBits{5};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src4;
- let Inst{7-3} = offsetBits{4-0};
- let Inst{2} = 0b0;
- let Inst{1-0} = src1;
- }
-
-// multiclass for new-value store instructions with base + immediate offset.
-//
-let mayStore = 1, isNVStore = 1, isNewValue = 1, hasSideEffects = 0,
- isExtendable = 1 in
-multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, Operand predImmOp, bits<2> MajOp> {
-
- let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
- def S2_#NAME#new_io : T_store_io_nv <mnemonic, RC, ImmOp, MajOp>;
- // Predicated
- def S2_p#NAME#newt_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 0, 0>;
- def S2_p#NAME#newf_io :T_pstore_io_nv <mnemonic, RC, predImmOp, MajOp, 1, 0>;
- // Predicated new
- def S4_p#NAME#newtnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
- MajOp, 0, 1>;
- def S4_p#NAME#newfnew_io :T_pstore_io_nv <mnemonic, RC, predImmOp,
- MajOp, 1, 1>;
- }
-}
-
-let addrMode = BaseImmOffset, InputType = "imm" in {
- let accessSize = ByteAccess in
- defm storerb: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
- u6_0Ext, 0b00>, AddrModeRel;
-
- let accessSize = HalfWordAccess, opExtentAlign = 1 in
- defm storerh: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
- u6_1Ext, 0b01>, AddrModeRel;
-
- let accessSize = WordAccess, opExtentAlign = 2 in
- defm storeri: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
- u6_2Ext, 0b10>, AddrModeRel;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment loads with register offset.
-//===----------------------------------------------------------------------===//
-
-let hasNewValue = 1 in
-def L2_loadbsw2_pr : T_load_pr <"membh", IntRegs, 0b0001, HalfWordAccess>;
-
-def L2_loadbsw4_pr : T_load_pr <"membh", DoubleRegs, 0b0111, WordAccess>;
-
-let hasSideEffects = 0, addrMode = PostInc in
-class T_loadalign_pr <string mnemonic, bits<4> MajOp, MemAccessSize AccessSz>
- : LDInstPI <(outs DoubleRegs:$dst, IntRegs:$_dst_),
- (ins DoubleRegs:$src1, IntRegs:$src2, ModRegs:$src3),
- "$dst = "#mnemonic#"($src2++$src3)", [],
- "$src1 = $dst, $src2 = $_dst_"> {
- bits<5> dst;
- bits<5> src2;
- bits<1> src3;
-
- let accessSize = AccessSz;
- let IClass = 0b1001;
-
- let Inst{27-25} = 0b110;
- let Inst{24-21} = MajOp;
- let Inst{20-16} = src2;
- let Inst{13} = src3;
- let Inst{12} = 0b0;
- let Inst{7} = 0b0;
- let Inst{4-0} = dst;
- }
-
-def L2_loadalignb_pr : T_loadalign_pr <"memb_fifo", 0b0100, ByteAccess>;
-def L2_loadalignh_pr : T_loadalign_pr <"memh_fifo", 0b0010, HalfWordAccess>;
-
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated post increment .new stores
-// mem[bhwd](Rx++#s4:[0123])=Nt.new
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
- isNewValue = 1, opNewValue = 3 in
-class T_StorePI_nv <string mnemonic, Operand ImmOp, bits<2> MajOp >
- : NVInstPI_V4 <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$offset, IntRegs:$src2),
- mnemonic#"($src1++#$offset) = $src2.new",
- [], "$src1 = $_dst_">,
- AddrModeRel {
- bits<5> src1;
- bits<3> src2;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0}));
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = src1;
- let Inst{13} = 0b0;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src2;
- let Inst{7} = 0b0;
- let Inst{6-3} = offsetBits;
- let Inst{1} = 0b0;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated post increment .new stores
-// if([!]Pv[.new]) mem[bhwd](Rx++#s4:[0123])=Nt.new
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc, isNVStore = 1,
- isNewValue = 1, opNewValue = 4 in
-class T_StorePI_nv_pred <string mnemonic, Operand ImmOp,
- bits<2> MajOp, bit isPredNot, bit isPredNew >
- : NVInstPI_V4 <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2,
- ImmOp:$offset, IntRegs:$src3),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#mnemonic#"($src2++#$offset) = $src3.new",
- [], "$src2 = $_dst_">,
- AddrModeRel {
- bits<2> src1;
- bits<5> src2;
- bits<3> src3;
- bits<7> offset;
- bits<4> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
- !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
- /* s4_0Imm */ offset{3-0}));
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = src2;
- let Inst{13} = 0b1;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src3;
- let Inst{7} = isPredNew;
- let Inst{6-3} = offsetBits;
- let Inst{2} = isPredNot;
- let Inst{1-0} = src1;
- }
-
-multiclass ST_PostInc_Pred_nv<string mnemonic, Operand ImmOp,
- bits<2> MajOp, bit PredNot> {
- def _pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 0>;
-
- // Predicate new
- def new_pi : T_StorePI_nv_pred <mnemonic, ImmOp, MajOp, PredNot, 1>;
-}
-
-multiclass ST_PostInc_nv<string mnemonic, string BaseOp, Operand ImmOp,
- bits<2> MajOp> {
- let BaseOpcode = "POST_"#BaseOp in {
- def S2_#NAME#_pi : T_StorePI_nv <mnemonic, ImmOp, MajOp>;
-
- // Predicated
- defm S2_p#NAME#t : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 0>;
- defm S2_p#NAME#f : ST_PostInc_Pred_nv <mnemonic, ImmOp, MajOp, 1>;
- }
-}
-
-let accessSize = ByteAccess in
-defm storerbnew: ST_PostInc_nv <"memb", "STrib", s4_0Imm, 0b00>;
-
-let accessSize = HalfWordAccess in
-defm storerhnew: ST_PostInc_nv <"memh", "STrih", s4_1Imm, 0b01>;
-
-let accessSize = WordAccess in
-defm storerinew: ST_PostInc_nv <"memw", "STriw", s4_2Imm, 0b10>;
-
-//===----------------------------------------------------------------------===//
-// Template class for post increment .new stores with register offset
-//===----------------------------------------------------------------------===//
-let isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 3 in
-class T_StorePI_RegNV <string mnemonic, bits<2> MajOp, MemAccessSize AccessSz>
- : NVInstPI_V4 <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, IntRegs:$src3),
- #mnemonic#"($src1++$src2) = $src3.new",
- [], "$src1 = $_dst_"> {
- bits<5> src1;
- bits<1> src2;
- bits<3> src3;
- let accessSize = AccessSz;
-
- let IClass = 0b1010;
-
- let Inst{27-21} = 0b1101101;
- let Inst{20-16} = src1;
- let Inst{13} = src2;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src3;
- let Inst{7} = 0b0;
- }
-
-def S2_storerbnew_pr : T_StorePI_RegNV<"memb", 0b00, ByteAccess>;
-def S2_storerhnew_pr : T_StorePI_RegNV<"memh", 0b01, HalfWordAccess>;
-def S2_storerinew_pr : T_StorePI_RegNV<"memw", 0b10, WordAccess>;
-
-// memb(Rx++#s4:0:circ(Mu))=Nt.new
-// memb(Rx++I:circ(Mu))=Nt.new
-// memb(Rx++Mu:brev)=Nt.new
-// memh(Rx++#s4:1:circ(Mu))=Nt.new
-// memh(Rx++I:circ(Mu))=Nt.new
-// memh(Rx++Mu)=Nt.new
-// memh(Rx++Mu:brev)=Nt.new
-
-// memw(Rx++#s4:2:circ(Mu))=Nt.new
-// memw(Rx++I:circ(Mu))=Nt.new
-// memw(Rx++Mu)=Nt.new
-// memw(Rx++Mu:brev)=Nt.new
-
-//===----------------------------------------------------------------------===//
-// NV/ST -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// NV/J +
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// multiclass/template class for the new-value compare jumps with the register
-// operands.
-//===----------------------------------------------------------------------===//
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
- opExtentAlign = 2 in
-class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
- bit isNegCond, bit isTak>
- : NVInst_V4<(outs),
- (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
- "if ("#!if(isNegCond, "!","")#mnemonic#
- "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
- "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
- #!if(isTak, "t","nt")#" $offset", []> {
-
- bits<5> src1;
- bits<5> src2;
- bits<3> Ns; // New-Value Operand
- bits<5> RegOp; // Non-New-Value Operand
- bits<11> offset;
-
- let isTaken = isTak;
- let isPredicatedFalse = isNegCond;
- let opNewValue{0} = NvOpNum;
-
- let Ns = !if(!eq(NvOpNum, 0), src1{2-0}, src2{2-0});
- let RegOp = !if(!eq(NvOpNum, 0), src2, src1);
-
- let IClass = 0b0010;
- let Inst{27-26} = 0b00;
- let Inst{25-23} = majOp;
- let Inst{22} = isNegCond;
- let Inst{18-16} = Ns;
- let Inst{13} = isTak;
- let Inst{12-8} = RegOp;
- let Inst{21-20} = offset{10-9};
- let Inst{7-1} = offset{8-2};
-}
-
-
-multiclass NVJrr_cond<string mnemonic, bits<3> majOp, bit NvOpNum,
- bit isNegCond> {
- // Branch not taken:
- def _nt: NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 0>;
- // Branch taken:
- def _t : NVJrr_template<mnemonic, majOp, NvOpNum, isNegCond, 1>;
-}
-
-// NvOpNum = 0 -> First Operand is a new-value Register
-// NvOpNum = 1 -> Second Operand is a new-value Register
-
-multiclass NVJrr_base<string mnemonic, string BaseOp, bits<3> majOp,
- bit NvOpNum> {
- let BaseOpcode = BaseOp#_NVJ in {
- defm _t_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 0>; // True cond
- defm _f_jumpnv : NVJrr_cond<mnemonic, majOp, NvOpNum, 1>; // False cond
- }
-}
-
-// if ([!]cmp.eq(Ns.new,Rt)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Ns.new,Rt)) jump:[n]t #r9:2
-// if ([!]cmp.gtu(Ns.new,Rt)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Rt,Ns.new)) jump:[n]t #r9:2
-// if ([!]cmp.gtu(Rt,Ns.new)) jump:[n]t #r9:2
-
-let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], hasSideEffects = 0 in {
- defm J4_cmpeq : NVJrr_base<"cmp.eq", "CMPEQ", 0b000, 0>, PredRel;
- defm J4_cmpgt : NVJrr_base<"cmp.gt", "CMPGT", 0b001, 0>, PredRel;
- defm J4_cmpgtu : NVJrr_base<"cmp.gtu", "CMPGTU", 0b010, 0>, PredRel;
- defm J4_cmplt : NVJrr_base<"cmp.gt", "CMPLT", 0b011, 1>, PredRel;
- defm J4_cmpltu : NVJrr_base<"cmp.gtu", "CMPLTU", 0b100, 1>, PredRel;
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass/template class for the new-value compare jumps instruction
-// with a register and an unsigned immediate (U5) operand.
-//===----------------------------------------------------------------------===//
-
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
- opExtentAlign = 2 in
-class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
- bit isTak>
- : NVInst_V4<(outs),
- (ins IntRegs:$src1, u5_0Imm:$src2, brtarget:$offset),
- "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
- #!if(isTak, "t","nt")#" $offset", []> {
-
- let isTaken = isTak;
- let isPredicatedFalse = isNegCond;
- let isTaken = isTak;
-
- bits<3> src1;
- bits<5> src2;
- bits<11> offset;
-
- let IClass = 0b0010;
- let Inst{26} = 0b1;
- let Inst{25-23} = majOp;
- let Inst{22} = isNegCond;
- let Inst{18-16} = src1;
- let Inst{13} = isTak;
- let Inst{12-8} = src2;
- let Inst{21-20} = offset{10-9};
- let Inst{7-1} = offset{8-2};
-}
-
-multiclass NVJri_cond<string mnemonic, bits<3> majOp, bit isNegCond> {
- // Branch not taken:
- def _nt: NVJri_template<mnemonic, majOp, isNegCond, 0>;
- // Branch taken:
- def _t : NVJri_template<mnemonic, majOp, isNegCond, 1>;
-}
-
-multiclass NVJri_base<string mnemonic, string BaseOp, bits<3> majOp> {
- let BaseOpcode = BaseOp#_NVJri in {
- defm _t_jumpnv : NVJri_cond<mnemonic, majOp, 0>; // True Cond
- defm _f_jumpnv : NVJri_cond<mnemonic, majOp, 1>; // False cond
- }
-}
-
-// if ([!]cmp.eq(Ns.new,#U5)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Ns.new,#U5)) jump:[n]t #r9:2
-// if ([!]cmp.gtu(Ns.new,#U5)) jump:[n]t #r9:2
-
-let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
- Defs = [PC], hasSideEffects = 0 in {
- defm J4_cmpeqi : NVJri_base<"cmp.eq", "CMPEQ", 0b000>, PredRel;
- defm J4_cmpgti : NVJri_base<"cmp.gt", "CMPGT", 0b001>, PredRel;
- defm J4_cmpgtui : NVJri_base<"cmp.gtu", "CMPGTU", 0b010>, PredRel;
-}
-
-//===----------------------------------------------------------------------===//
-// multiclass/template class for the new-value compare jumps instruction
-// with a register and an hardcoded 0/-1 immediate value.
-//===----------------------------------------------------------------------===//
-
-let isExtendable = 1, isExtentSigned = 1, opExtentBits = 11,
- opExtentAlign = 2 in
-class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
- bit isNegCond, bit isTak>
- : NVInst_V4<(outs),
- !if(!eq(ImmVal, "{-1}"),
- (ins IntRegs:$src1, n1Const:$n1, brtarget:$offset),
- (ins IntRegs:$src1, brtarget:$offset)),
- "if ("#!if(isNegCond, "!","")#mnemonic
- #"($src1.new, #" # !if(!eq(ImmVal, "{-1}"), "$n1", ImmVal) # ")) jump:"
- #!if(isTak, "t","nt")#" $offset", []> {
-
- let isTaken = isTak;
- let isPredicatedFalse = isNegCond;
- let isTaken = isTak;
- let opExtendable = !if(!eq(ImmVal, "{-1}"), 2, 1);
-
- bits<3> src1;
- bits<11> offset;
- let IClass = 0b0010;
- let Inst{26} = 0b1;
- let Inst{25-23} = majOp;
- let Inst{22} = isNegCond;
- let Inst{18-16} = src1;
- let Inst{13} = isTak;
- let Inst{21-20} = offset{10-9};
- let Inst{7-1} = offset{8-2};
-}
-
-multiclass NVJ_ConstImm_cond<string mnemonic, bits<3> majOp, string ImmVal,
- bit isNegCond> {
- // Branch not taken:
- def _nt: NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 0>;
- // Branch taken:
- def _t : NVJ_ConstImm_template<mnemonic, majOp, ImmVal, isNegCond, 1>;
-}
-
-multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
- string ImmVal> {
- let BaseOpcode = BaseOp#_NVJ_ConstImm in {
- defm _t_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 0>; // True
- defm _f_jumpnv : NVJ_ConstImm_cond<mnemonic, majOp, ImmVal, 1>; // False
- }
-}
-
-// if ([!]tstbit(Ns.new,#0)) jump:[n]t #r9:2
-// if ([!]cmp.eq(Ns.new,#-1)) jump:[n]t #r9:2
-// if ([!]cmp.gt(Ns.new,#-1)) jump:[n]t #r9:2
-
-let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
- Defs = [PC], hasSideEffects = 0 in {
- defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
- defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ", 0b100, "{-1}">, PredRel;
- defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT", 0b101, "{-1}">, PredRel;
-}
-
-// J4_hintjumpr: Hint indirect conditional jump.
-let isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def J4_hintjumpr: JRInst <
- (outs),
- (ins IntRegs:$Rs),
- "hintjr($Rs)"> {
- bits<5> Rs;
- let IClass = 0b0101;
- let Inst{27-21} = 0b0010101;
- let Inst{20-16} = Rs;
- }
-
-//===----------------------------------------------------------------------===//
-// NV/J -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// CR +
-//===----------------------------------------------------------------------===//
-
-// PC-relative add
-let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
- isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in
-def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6_0Ext:$u6),
- "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
- bits<5> Rd;
- bits<6> u6;
-
- let IClass = 0b0110;
- let Inst{27-16} = 0b101001001001;
- let Inst{12-7} = u6;
- let Inst{4-0} = Rd;
- }
-
-
-
-let hasSideEffects = 0 in
-class T_LOGICAL_3OP<string MnOp1, string MnOp2, bits<2> OpBits, bit IsNeg>
- : CRInst<(outs PredRegs:$Pd),
- (ins PredRegs:$Ps, PredRegs:$Pt, PredRegs:$Pu),
- "$Pd = " # MnOp1 # "($Ps, " # MnOp2 # "($Pt, " #
- !if (IsNeg,"!","") # "$Pu))",
- [], "", CR_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<2> Ps;
- bits<2> Pt;
- bits<2> Pu;
-
- let IClass = 0b0110;
- let Inst{27-24} = 0b1011;
- let Inst{23} = IsNeg;
- let Inst{22-21} = OpBits;
- let Inst{20} = 0b1;
- let Inst{17-16} = Ps;
- let Inst{13} = 0b0;
- let Inst{9-8} = Pt;
- let Inst{7-6} = Pu;
- let Inst{1-0} = Pd;
-}
-
-def C4_and_and : T_LOGICAL_3OP<"and", "and", 0b00, 0>;
-def C4_and_or : T_LOGICAL_3OP<"and", "or", 0b01, 0>;
-def C4_or_and : T_LOGICAL_3OP<"or", "and", 0b10, 0>;
-def C4_or_or : T_LOGICAL_3OP<"or", "or", 0b11, 0>;
-def C4_and_andn : T_LOGICAL_3OP<"and", "and", 0b00, 1>;
-def C4_and_orn : T_LOGICAL_3OP<"and", "or", 0b01, 1>;
-def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>;
-def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>;
-
-//===----------------------------------------------------------------------===//
-// CR -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/ALU +
-//===----------------------------------------------------------------------===//
-
-// Logical with-not instructions.
-def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
-def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>;
-
-let hasNewValue = 1, hasSideEffects = 0 in
-def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0101111;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-// Add and accumulate.
-// Rd=add(Rs,add(Ru,#s6))
-let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
- opExtendable = 3 in
-def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Ru, s6_0Ext:$s6),
- "$Rd = add($Rs, add($Ru, #$s6))" , [],
- "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Ru;
- bits<6> s6;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b10110;
- let Inst{22-21} = s6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = s6{3};
- let Inst{12-8} = Rd;
- let Inst{7-5} = s6{2-0};
- let Inst{4-0} = Ru;
- }
-
-let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
- opExtentBits = 6, opExtendable = 2 in
-def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, s6_0Ext:$s6, IntRegs:$Ru),
- "$Rd = add($Rs, sub(#$s6, $Ru))",
- [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<6> s6;
- bits<5> Ru;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b10111;
- let Inst{22-21} = s6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = s6{3};
- let Inst{12-8} = Rd;
- let Inst{7-5} = s6{2-0};
- let Inst{4-0} = Ru;
- }
-
-def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>;
-def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6_0Imm>;
-
-let hasNewValue = 1 in {
- def S4_extract_rp : T_S3op_extract<"extract", 0b01>;
- def S4_extract : T_S2op_extract <"extract", 0b1101, IntRegs, u5_0Imm>;
-}
-
-// Complex add/sub halfwords/words
-let Defs = [USR_OVF] in {
- def S4_vxaddsubh : T_S3op_64 < "vxaddsubh", 0b01, 0b100, 0, 1>;
- def S4_vxaddsubw : T_S3op_64 < "vxaddsubw", 0b01, 0b000, 0, 1>;
- def S4_vxsubaddh : T_S3op_64 < "vxsubaddh", 0b01, 0b110, 0, 1>;
- def S4_vxsubaddw : T_S3op_64 < "vxsubaddw", 0b01, 0b010, 0, 1>;
-}
-
-let Defs = [USR_OVF] in {
- def S4_vxaddsubhr : T_S3op_64 < "vxaddsubh", 0b11, 0b000, 0, 1, 1, 1>;
- def S4_vxsubaddhr : T_S3op_64 < "vxsubaddh", 0b11, 0b010, 0, 1, 1, 1>;
-}
-
-let Itinerary = M_tc_3x_SLOT23, Defs = [USR_OVF] in {
- def M4_mac_up_s1_sat: T_MType_acc_rr<"+= mpy", 0b011, 0b000, 0, [], 0, 1, 1>;
- def M4_nac_up_s1_sat: T_MType_acc_rr<"-= mpy", 0b011, 0b001, 0, [], 0, 1, 1>;
-}
-
-// Logical xor with xor accumulation.
-// Rxx^=xor(Rss,Rtt)
-let hasSideEffects = 0 in
-def M4_xor_xacc
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx ^= xor($Rss, $Rtt)", [],
- "$dst2 = $Rxx", S_3op_tc_1_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b101010;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rxx;
- }
-
-// Rotate and reduce bytes
-// Rdd=vrcrotate(Rss,Rt,#u2)
-let hasSideEffects = 0 in
-def S4_vrcrotate
- : SInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2),
- "$Rdd = vrcrotate($Rss, $Rt, #$u2)",
- [], "", S_3op_tc_3x_SLOT23> {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rt;
- bits<2> u2;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b001111;
- let Inst{20-16} = Rss;
- let Inst{13} = u2{1};
- let Inst{12-8} = Rt;
- let Inst{7-6} = 0b11;
- let Inst{5} = u2{0};
- let Inst{4-0} = Rdd;
- }
-
-// Rotate and reduce bytes with accumulation
-// Rxx+=vrcrotate(Rss,Rt,#u2)
-let hasSideEffects = 0 in
-def S4_vrcrotate_acc
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2),
- "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [],
- "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rt;
- bits<2> u2;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = Rss;
- let Inst{13} = u2{1};
- let Inst{12-8} = Rt;
- let Inst{5} = u2{0};
- let Inst{4-0} = Rxx;
- }
-
-// Vector reduce conditional negate halfwords
-let hasSideEffects = 0 in
-def S2_vrcnegh
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rxx += vrcnegh($Rss, $Rt)", [],
- "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b1011001;
- let Inst{20-16} = Rss;
- let Inst{13} = 0b1;
- let Inst{12-8} = Rt;
- let Inst{7-5} = 0b111;
- let Inst{4-0} = Rxx;
- }
-
-// Split bitfield
-def A4_bitspliti : T_S2op_2_di <"bitsplit", 0b110, 0b100>;
-
-// Arithmetic/Convergent round
-def A4_cround_ri : T_S2op_2_ii <"cround", 0b111, 0b000>;
-
-def A4_round_ri : T_S2op_2_ii <"round", 0b111, 0b100>;
-
-let Defs = [USR_OVF] in
-def A4_round_ri_sat : T_S2op_2_ii <"round", 0b111, 0b110, 1>;
-
-// Logical-logical words.
-// Compound or-and -- Rx=or(Ru,and(Rx,#s10))
-let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
- opExtendable = 3 in
-def S4_or_andix:
- ALU64Inst<(outs IntRegs:$Rx),
- (ins IntRegs:$Ru, IntRegs:$_src_, s10_0Ext:$s10),
- "$Rx = or($Ru, and($_src_, #$s10))" , [] ,
- "$_src_ = $Rx", ALU64_tc_2_SLOT23> {
- bits<5> Rx;
- bits<5> Ru;
- bits<10> s10;
-
- let IClass = 0b1101;
-
- let Inst{27-22} = 0b101001;
- let Inst{20-16} = Rx;
- let Inst{21} = s10{9};
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Ru;
- }
-
-// Miscellaneous ALU64 instructions.
-//
-let hasNewValue = 1, hasSideEffects = 0 in
-def A4_modwrapu: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = modwrap($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0011111;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7-5} = 0b111;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0 in
-def A4_bitsplit: ALU64Inst<(outs DoubleRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = bitsplit($Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0100;
- let Inst{21} = 0b1;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0 in
-def dep_S2_packhl: ALU64Inst<(outs DoubleRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = packhl($Rs, $Rt):deprecated", [], "", ALU64_tc_1_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b0100;
- let Inst{21} = 0b0;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-def dep_A2_addsat: ALU64Inst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = add($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0101100;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b0;
- let Inst{4-0} = Rd;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-def dep_A2_subsat: ALU64Inst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = sub($Rs, $Rt):sat:deprecated", [], "", ALU64_tc_2_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b0101100;
- let Inst{20-16} = Rt;
- let Inst{12-8} = Rs;
- let Inst{7} = 0b1;
- let Inst{4-0} = Rd;
-}
-
-// Rx[&|]=xor(Rs,Rt)
-def M4_or_xor : T_MType_acc_rr < "|= xor", 0b110, 0b001, 0>;
-def M4_and_xor : T_MType_acc_rr < "&= xor", 0b010, 0b010, 0>;
-
-// Rx[&|^]=or(Rs,Rt)
-def M4_xor_or : T_MType_acc_rr < "^= or", 0b110, 0b011, 0>;
-
-let CextOpcode = "ORr_ORr" in
-def M4_or_or : T_MType_acc_rr < "|= or", 0b110, 0b000, 0>;
-def M4_and_or : T_MType_acc_rr < "&= or", 0b010, 0b001, 0>;
-
-// Rx[&|^]=and(Rs,Rt)
-def M4_xor_and : T_MType_acc_rr < "^= and", 0b110, 0b010, 0>;
-
-let CextOpcode = "ORr_ANDr" in
-def M4_or_and : T_MType_acc_rr < "|= and", 0b010, 0b011, 0>;
-def M4_and_and : T_MType_acc_rr < "&= and", 0b010, 0b000, 0>;
-
-// Rx[&|^]=and(Rs,~Rt)
-def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
-def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
-def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
-
-// Compound or-or and or-and
-let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
- opExtentBits = 10, opExtendable = 3 in
-class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
- : MInst_acc <(outs IntRegs:$Rx),
- (ins IntRegs:$src1, IntRegs:$Rs, s10_0Ext:$s10),
- "$Rx |= "#mnemonic#"($Rs, #$s10)", [],
- "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
- bits<5> Rx;
- bits<5> Rs;
- bits<10> s10;
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b1010;
- let Inst{23-22} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{21} = s10{9};
- let Inst{13-5} = s10{8-0};
- let Inst{4-0} = Rx;
- }
-
-let CextOpcode = "ORr_ANDr" in
-def S4_or_andi : T_CompOR <"and", 0b00, and>;
-
-let CextOpcode = "ORr_ORr" in
-def S4_or_ori : T_CompOR <"or", 0b10, or>;
-
-// Modulo wrap
-// Rd=modwrap(Rs,Rt)
-// Round
-// Rd=cround(Rs,#u5)
-// Rd=cround(Rs,Rt)
-// Rd=round(Rs,#u5)[:sat]
-// Rd=round(Rs,Rt)[:sat]
-// Vector reduce add unsigned halfwords
-// Rd=vraddh(Rss,Rtt)
-// Vector add bytes
-// Rdd=vaddb(Rss,Rtt)
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-// Rxx+=vrcnegh(Rss,Rt)
-// Vector maximum bytes
-// Rdd=vmaxb(Rtt,Rss)
-// Vector reduce maximum halfwords
-// Rxx=vrmaxh(Rss,Ru)
-// Rxx=vrmaxuh(Rss,Ru)
-// Vector reduce maximum words
-// Rxx=vrmaxuw(Rss,Ru)
-// Rxx=vrmaxw(Rss,Ru)
-// Vector minimum bytes
-// Rdd=vminb(Rtt,Rss)
-// Vector reduce minimum halfwords
-// Rxx=vrminh(Rss,Ru)
-// Rxx=vrminuh(Rss,Ru)
-// Vector reduce minimum words
-// Rxx=vrminuw(Rss,Ru)
-// Rxx=vrminw(Rss,Ru)
-// Vector subtract bytes
-// Rdd=vsubb(Rss,Rtt)
-
-//===----------------------------------------------------------------------===//
-// XTYPE/ALU -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/BIT +
-//===----------------------------------------------------------------------===//
-
-// Bit reverse
-def S2_brevp : T_S2op_3 <"brev", 0b11, 0b110>;
-
-// Bit count
-def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
-def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
-def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6_0Imm:$s6),
- "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
- bits<5> Rs;
- bits<5> Rd;
- bits<6> s6;
- let IClass = 0b1000;
- let Inst{27-24} = 0b1100;
- let Inst{23-21} = 0b001;
- let Inst{20-16} = Rs;
- let Inst{13-8} = s6;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rd;
-}
-
-let hasSideEffects = 0, hasNewValue = 1 in
-def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6_0Imm:$s6),
- "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
- bits<5> Rs;
- bits<5> Rd;
- bits<6> s6;
- let IClass = 0b1000;
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = 0b011;
- let Inst{20-16} = Rs;
- let Inst{13-8} = s6;
- let Inst{7-5} = 0b010;
- let Inst{4-0} = Rd;
-}
-
-
-// Bit test/set/clear
-def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
-def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
-
-def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
-def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
-def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/BIT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/MPY +
-//===----------------------------------------------------------------------===//
-
-// Rd=add(#u6,mpyi(Rs,#U6)) -- Multiply by immed and add immed.
-
-let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
-def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
- (ins u6_0Ext:$u6, IntRegs:$Rs, u6_0Imm:$U6),
- "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [],"",ALU64_tc_3x_SLOT23> {
- bits<5> Rd;
- bits<6> u6;
- bits<5> Rs;
- bits<6> U6;
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b1000;
- let Inst{23} = U6{5};
- let Inst{22-21} = u6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = u6{3};
- let Inst{12-8} = Rd;
- let Inst{7-5} = u6{2-0};
- let Inst{4-0} = U6{4-0};
- }
-
-// Rd=add(#u6,mpyi(Rs,Rt))
-let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
- isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
-def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
- (ins u6_0Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [], "", ALU64_tc_3x_SLOT23>, ImmRegRel {
- bits<5> Rd;
- bits<6> u6;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b01110;
- let Inst{22-21} = u6{5-4};
- let Inst{20-16} = Rs;
- let Inst{13} = u6{3};
- let Inst{12-8} = Rt;
- let Inst{7-5} = u6{2-0};
- let Inst{4-0} = Rd;
- }
-
-let hasNewValue = 1 in
-class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
- : ALU64Inst <(outs IntRegs:$dst), ins,
- "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))",
- "#$src2, $src3))"), [],
- "", ALU64_tc_3x_SLOT23> {
- bits<5> dst;
- bits<5> src1;
- bits<8> src2;
- bits<5> src3;
-
- let IClass = 0b1101;
-
- bits<6> ImmValue = !if(MajOp, src2{5-0}, src2{7-2});
-
- let Inst{27-24} = 0b1111;
- let Inst{23} = MajOp;
- let Inst{22-21} = ImmValue{5-4};
- let Inst{20-16} = src3;
- let Inst{13} = ImmValue{3};
- let Inst{12-8} = dst;
- let Inst{7-5} = ImmValue{2-0};
- let Inst{4-0} = src1;
- }
-
-def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
- (ins IntRegs:$src1, u6_2Imm:$src2, IntRegs:$src3)>;
-
-let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
- CextOpcode = "ADD_MPY", InputType = "imm" in
-def M4_mpyri_addr : T_AddMpy<0b1, u32_0ImmPred,
- (ins IntRegs:$src1, IntRegs:$src3, u6_0Ext:$src2)>, ImmRegRel;
-
-// Rx=add(Ru,mpyi(Rx,Rs))
-let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
-def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
- (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
- "$Rx = add($Ru, mpyi($_src_, $Rs))", [],
- "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel {
- bits<5> Rx;
- bits<5> Ru;
- bits<5> Rs;
-
- let IClass = 0b1110;
-
- let Inst{27-21} = 0b0011000;
- let Inst{12-8} = Rx;
- let Inst{4-0} = Ru;
- let Inst{20-16} = Rs;
- }
-
-
-// Vector reduce multiply word by signed half (32x16)
-//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
-def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>;
-def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>;
-def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>;
-
-//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
-def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>;
-def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>;
-
-//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
-def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>;
-def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>;
-
-// Vector multiply halfwords, signed by unsigned
-// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat
-def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>;
-def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>;
-
-// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
-def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>;
-def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>;
-
-// Vector polynomial multiply halfwords
-// Rdd=vpmpyh(Rs,Rt)
-def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>;
-
-// Rxx^=vpmpyh(Rs,Rt)
-def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>;
-
-// Polynomial multiply words
-// Rdd=pmpyw(Rs,Rt)
-def M4_pmpyw : T_XTYPE_mpy64 < "pmpyw", 0b010, 0b111, 0, 0, 0>;
-
-// Rxx^=pmpyw(Rs,Rt)
-def M4_pmpyw_acc : T_XTYPE_mpy64_acc < "pmpyw", "^", 0b001, 0b111, 0, 0, 0>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/MPY -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ALU64/Vector compare
-//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Template class for vector compare
-//===----------------------------------------------------------------------===//
-
-let hasSideEffects = 0 in
-class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd>
- : ALU64_rr <(outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, ImmOprnd:$Imm),
- "$Pd = "#Str#"($Rss, #$Imm)",
- [], "", ALU64_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rss;
- bits<32> Imm;
- bits<8> ImmBits;
- let ImmBits{6-0} = Imm{6-0};
- let ImmBits{7} = !if (!eq(cmpOp,0b10), 0b0, Imm{7}); // 0 for vcmp[bhw].gtu
-
- let IClass = 0b1101;
-
- let Inst{27-24} = 0b1100;
- let Inst{22-21} = cmpOp;
- let Inst{20-16} = Rss;
- let Inst{12-5} = ImmBits;
- let Inst{4-3} = minOp;
- let Inst{1-0} = Pd;
- }
-
-// Vector compare bytes
-def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>;
-
-let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
-def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
-
-def A4_vcmpbeqi : T_vcmpImm <"vcmpb.eq", 0b00, 0b00, u8_0Imm>;
-def A4_vcmpbgti : T_vcmpImm <"vcmpb.gt", 0b01, 0b00, s8_0Imm>;
-def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7_0Imm>;
-
-// Vector compare halfwords
-def A4_vcmpheqi : T_vcmpImm <"vcmph.eq", 0b00, 0b01, s8_0Imm>;
-def A4_vcmphgti : T_vcmpImm <"vcmph.gt", 0b01, 0b01, s8_0Imm>;
-def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7_0Imm>;
-
-// Vector compare words
-def A4_vcmpweqi : T_vcmpImm <"vcmpw.eq", 0b00, 0b10, s8_0Imm>;
-def A4_vcmpwgti : T_vcmpImm <"vcmpw.gt", 0b01, 0b10, s8_0Imm>;
-def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7_0Imm>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/SHIFT +
-//===----------------------------------------------------------------------===//
-// Shift by immediate and accumulate/logical.
-// Rx=add(#u8,asl(Rx,#U5)) Rx=add(#u8,lsr(Rx,#U5))
-// Rx=sub(#u8,asl(Rx,#U5)) Rx=sub(#u8,lsr(Rx,#U5))
-// Rx=and(#u8,asl(Rx,#U5)) Rx=and(#u8,lsr(Rx,#U5))
-// Rx=or(#u8,asl(Rx,#U5)) Rx=or(#u8,lsr(Rx,#U5))
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
- hasNewValue = 1, opNewValue = 0 in
-class T_S4_ShiftOperate<string MnOp, string MnSh, bit asl_lsr,
- bits<2> MajOp, InstrItinClass Itin>
- : MInst_acc<(outs IntRegs:$Rd), (ins u8_0Ext:$u8, IntRegs:$Rx, u5_0Imm:$U5),
- "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
- [], "$Rd = $Rx", Itin> {
-
- bits<5> Rd;
- bits<8> u8;
- bits<5> Rx;
- bits<5> U5;
-
- let IClass = 0b1101;
- let Inst{27-24} = 0b1110;
- let Inst{23-21} = u8{7-5};
- let Inst{20-16} = Rd;
- let Inst{13} = u8{4};
- let Inst{12-8} = U5;
- let Inst{7-5} = u8{3-1};
- let Inst{4} = asl_lsr;
- let Inst{3} = u8{0};
- let Inst{2-1} = MajOp;
-}
-
-multiclass T_ShiftOperate<string mnemonic, bits<2> MajOp, InstrItinClass Itin> {
- def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", 0, MajOp, Itin>;
- def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", 1, MajOp, Itin>;
-}
-
-defm S4_addi : T_ShiftOperate<"add", 0b10, ALU64_tc_2_SLOT23>;
-defm S4_andi : T_ShiftOperate<"and", 0b00, ALU64_tc_2_SLOT23>;
-defm S4_ori : T_ShiftOperate<"or", 0b01, ALU64_tc_1_SLOT23>;
-defm S4_subi : T_ShiftOperate<"sub", 0b11, ALU64_tc_1_SLOT23>;
-
-// Vector conditional negate
-// Rdd=vcnegh(Rss,Rt)
-let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
-def S2_vcnegh : T_S3op_shiftVect < "vcnegh", 0b11, 0b01>;
-
-// Rd=[cround|round](Rs,Rt)
-let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in {
- def A4_cround_rr : T_S3op_3 < "cround", IntRegs, 0b11, 0b00>;
- def A4_round_rr : T_S3op_3 < "round", IntRegs, 0b11, 0b10>;
-}
-
-// Rd=round(Rs,Rt):sat
-let hasNewValue = 1, Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
-def A4_round_rr_sat : T_S3op_3 < "round", IntRegs, 0b11, 0b11, 1>;
-
-// Rd=[cmpyiwh|cmpyrwh](Rss,Rt):<<1:rnd:sat
-let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
- def M4_cmpyi_wh : T_S3op_8<"cmpyiwh", 0b100, 1, 1, 1>;
- def M4_cmpyr_wh : T_S3op_8<"cmpyrwh", 0b110, 1, 1, 1>;
-}
-
-// Rdd=[add|sub](Rss,Rtt,Px):carry
-let isPredicateLate = 1, hasSideEffects = 0 in
-class T_S3op_carry <string mnemonic, bits<3> MajOp>
- : SInst < (outs DoubleRegs:$Rdd, PredRegs:$Px),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, PredRegs:$Pu),
- "$Rdd = "#mnemonic#"($Rss, $Rtt, $Pu):carry",
- [], "$Px = $Pu", S_3op_tc_1_SLOT23 > {
- bits<5> Rdd;
- bits<5> Rss;
- bits<5> Rtt;
- bits<2> Pu;
-
- let IClass = 0b1100;
-
- let Inst{27-24} = 0b0010;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rdd;
- }
-
-def A4_addp_c : T_S3op_carry < "add", 0b110 >;
-def A4_subp_c : T_S3op_carry < "sub", 0b111 >;
-
-let Itinerary = S_3op_tc_3_SLOT23, hasSideEffects = 0 in
-class T_S3op_6 <string mnemonic, bits<3> MinOp, bit isUnsigned>
- : SInst <(outs DoubleRegs:$Rxx),
- (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Ru),
- "$Rxx = "#mnemonic#"($Rss, $Ru)" ,
- [] , "$dst2 = $Rxx"> {
- bits<5> Rxx;
- bits<5> Rss;
- bits<5> Ru;
-
- let IClass = 0b1100;
-
- let Inst{27-21} = 0b1011001;
- let Inst{20-16} = Rss;
- let Inst{13} = isUnsigned;
- let Inst{12-8} = Rxx;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Ru;
- }
-
-// Vector reduce maximum halfwords
-// Rxx=vrmax[u]h(Rss,Ru)
-def A4_vrmaxh : T_S3op_6 < "vrmaxh", 0b001, 0>;
-def A4_vrmaxuh : T_S3op_6 < "vrmaxuh", 0b001, 1>;
-
-// Vector reduce maximum words
-// Rxx=vrmax[u]w(Rss,Ru)
-def A4_vrmaxw : T_S3op_6 < "vrmaxw", 0b010, 0>;
-def A4_vrmaxuw : T_S3op_6 < "vrmaxuw", 0b010, 1>;
-
-// Vector reduce minimum halfwords
-// Rxx=vrmin[u]h(Rss,Ru)
-def A4_vrminh : T_S3op_6 < "vrminh", 0b101, 0>;
-def A4_vrminuh : T_S3op_6 < "vrminuh", 0b101, 1>;
-
-// Vector reduce minimum words
-// Rxx=vrmin[u]w(Rss,Ru)
-def A4_vrminw : T_S3op_6 < "vrminw", 0b110, 0>;
-def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>;
-
-// Shift an immediate left by register amount.
-let hasNewValue = 1, hasSideEffects = 0 in
-def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6_0Imm:$s6, IntRegs:$Rt),
- "$Rd = lsl(#$s6, $Rt)" , [], "", S_3op_tc_1_SLOT23> {
- bits<5> Rd;
- bits<6> s6;
- bits<5> Rt;
-
- let IClass = 0b1100;
-
- let Inst{27-22} = 0b011010;
- let Inst{20-16} = s6{5-1};
- let Inst{12-8} = Rt;
- let Inst{7-6} = 0b11;
- let Inst{4-0} = Rd;
- let Inst{5} = s6{0};
- }
-
-//===----------------------------------------------------------------------===//
-// XTYPE/SHIFT -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// MEMOP
-//===----------------------------------------------------------------------===//
-
-
-//===----------------------------------------------------------------------===//
-// Template class for MemOp instructions with the register value.
-//===----------------------------------------------------------------------===//
-class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
- string memOp, bits<2> memOpBits> :
- MEMInst_V4<(outs),
- (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta),
- opc#"($base+#$offset)"#memOp#"$delta",
- []>,
- Requires<[UseMEMOP]> {
-
- bits<5> base;
- bits<5> delta;
- bits<32> offset;
- bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
-
- let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
- !if (!eq(opcBits, 0b01), offset{6-1},
- !if (!eq(opcBits, 0b10), offset{7-2},0)));
-
- let opExtentAlign = opcBits;
- let IClass = 0b0011;
- let Inst{27-24} = 0b1110;
- let Inst{22-21} = opcBits;
- let Inst{20-16} = base;
- let Inst{13} = 0b0;
- let Inst{12-7} = offsetBits;
- let Inst{6-5} = memOpBits;
- let Inst{4-0} = delta;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for MemOp instructions with the immediate value.
-//===----------------------------------------------------------------------===//
-class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
- string memOp, bits<2> memOpBits> :
- MEMInst_V4 <(outs),
- (ins IntRegs:$base, ImmOp:$offset, u5_0Imm:$delta),
- opc#"($base+#$offset)"#memOp#"#$delta"
- #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
- []>,
- Requires<[UseMEMOP]> {
-
- bits<5> base;
- bits<5> delta;
- bits<32> offset;
- bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2
-
- let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0},
- !if (!eq(opcBits, 0b01), offset{6-1},
- !if (!eq(opcBits, 0b10), offset{7-2},0)));
-
- let opExtentAlign = opcBits;
- let IClass = 0b0011;
- let Inst{27-24} = 0b1111;
- let Inst{22-21} = opcBits;
- let Inst{20-16} = base;
- let Inst{13} = 0b0;
- let Inst{12-7} = offsetBits;
- let Inst{6-5} = memOpBits;
- let Inst{4-0} = delta;
-}
-
-// multiclass to define MemOp instructions with register operand.
-multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> {
- def L4_add#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add
- def L4_sub#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub
- def L4_and#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and
- def L4_or#NAME : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or
-}
-
-// multiclass to define MemOp instructions with immediate Operand.
-multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> {
- def L4_iadd#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >;
- def L4_isub#NAME : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >;
- def L4_iand#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = clrbit(", 0b10>;
- def L4_ior#NAME : MemOp_ri_base<opc, opcBits, ImmOp, " = setbit(", 0b11>;
-}
-
-multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> {
- defm _#NAME : MemOp_rr <opc, opcBits, ImmOp>;
- defm _#NAME : MemOp_ri <opc, opcBits, ImmOp>;
-}
-
-// Define MemOp instructions.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
- let opExtentBits = 6, accessSize = ByteAccess in
- defm memopb_io : MemOp_base <"memb", 0b00, u6_0Ext>;
-
- let opExtentBits = 7, accessSize = HalfWordAccess in
- defm memoph_io : MemOp_base <"memh", 0b01, u6_1Ext>;
-
- let opExtentBits = 8, accessSize = WordAccess in
- defm memopw_io : MemOp_base <"memw", 0b10, u6_2Ext>;
-}
-
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PRED +
-//===----------------------------------------------------------------------===//
-
-// Hexagon V4 only supports these flavors of byte/half compare instructions:
-// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
-// hardware. However, compiler can still implement these patterns through
-// appropriate patterns combinations based on current implemented patterns.
-// The implemented patterns are: EQ/GT/GTU.
-// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
-
-// Following instruction is not being extended as it results into the
-// incorrect code for negative numbers.
-// Pd=cmpb.eq(Rs,#u8)
-
-// p=!cmp.eq(r1,#s10)
-def C4_cmpneqi : T_CMP <"cmp.eq", 0b00, 1, s10_0Ext>;
-def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10_0Ext>;
-def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9_0Ext>;
-
-//===----------------------------------------------------------------------===//
-// XTYPE/PRED -
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Multiclass for DeallocReturn
-//===----------------------------------------------------------------------===//
-class L4_RETURN<string mnemonic, bit isNot, bit isPredNew, bit isTak>
- : LD0Inst<(outs), (ins PredRegs:$src),
- !if(isNot, "if (!$src", "if ($src")#
- !if(isPredNew, ".new) ", ") ")#mnemonic#
- !if(isPredNew, #!if(isTak,":t", ":nt"),""),
- [], "", LD_tc_3or4stall_SLOT0> {
-
- bits<2> src;
- let BaseOpcode = "L4_RETURN";
- let isPredicatedFalse = isNot;
- let isPredicatedNew = isPredNew;
- let isTaken = isTak;
- let IClass = 0b1001;
-
- let Inst{27-16} = 0b011000011110;
-
- let Inst{13} = isNot;
- let Inst{12} = isTak;
- let Inst{11} = isPredNew;
- let Inst{10} = 0b0;
- let Inst{9-8} = src;
- let Inst{4-0} = 0b11110;
- }
-
-// Produce all predicated forms, p, !p, p.new, !p.new, :t, :nt
-multiclass L4_RETURN_PRED<string mnemonic, bit PredNot> {
- let isPredicated = 1 in {
- def _#NAME# : L4_RETURN <mnemonic, PredNot, 0, 1>;
- def _#NAME#new_pnt : L4_RETURN <mnemonic, PredNot, 1, 0>;
- def _#NAME#new_pt : L4_RETURN <mnemonic, PredNot, 1, 1>;
- }
-}
-
-multiclass LD_MISC_L4_RETURN<string mnemonic> {
- let isBarrier = 1, isPredicable = 1 in
- def NAME : LD0Inst <(outs), (ins), mnemonic, [], "",
- LD_tc_3or4stall_SLOT0> {
- let BaseOpcode = "L4_RETURN";
- let IClass = 0b1001;
- let Inst{27-16} = 0b011000011110;
- let Inst{13-10} = 0b0000;
- let Inst{4-0} = 0b11110;
- }
- defm t : L4_RETURN_PRED<mnemonic, 0 >;
- defm f : L4_RETURN_PRED<mnemonic, 1 >;
-}
-
-let isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], Uses = [R30], hasSideEffects = 0 in
-defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
-
-// Restore registers and dealloc return function call.
-let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
- Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
-
- let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
- def RESTORE_DEALLOC_RET_JMP_V4_PIC : T_JMP<"">;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC : T_JMP<"">;
- }
-}
-
-// Restore registers and dealloc frame before a tail call.
-let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<0, "">, PredRel;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC : T_Call<0, "">, PredRel;
-
- let isExtended = 1, opExtendable = 0 in
- def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC : T_Call<0, "">, PredRel;
- }
-}
-
-// Save registers function call.
-let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
- def SAVE_REGISTERS_CALL_V4 : T_Call<0, "">, PredRel;
-
- let isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4_EXT : T_Call<0, "">, PredRel;
-
- let Defs = [P0] in
- def SAVE_REGISTERS_CALL_V4STK : T_Call<0, "">, PredRel;
-
- let Defs = [P0], isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4STK_EXT : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28] in
- def SAVE_REGISTERS_CALL_V4_PIC : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28], isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4_EXT_PIC : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28, P0] in
- def SAVE_REGISTERS_CALL_V4STK_PIC : T_Call<0, "">, PredRel;
-
- let Defs = [R14, R15, R28, P0], isExtended = 1, opExtendable = 0 in
- def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<0, "">, PredRel;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for non predicated store instructions with
-// GP-Relative or absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicable = 1 in
-class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<2>MajOp, bit isAbs, bit isHalf>
- : STInst<(outs), (ins ImmOp:$addr, RC:$src),
- mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""),
- [], "", V2LDST_tc_st_SLOT01> {
- bits<19> addr;
- bits<5> src;
- bits<16> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
- !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
- !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
- /* u16_0Imm */ addr{15-0})));
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
- let Uses = !if (isAbs, [], [GP]);
-
- let IClass = 0b0100;
- let Inst{27} = 1;
- let Inst{26-25} = offsetBits{15-14};
- let Inst{24} = 0b0;
- let Inst{23-22} = MajOp;
- let Inst{21} = isHalf;
- let Inst{20-16} = offsetBits{13-9};
- let Inst{13} = offsetBits{8};
- let Inst{12-8} = src;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated store instructions with
-// GP-Relative or absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
-class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
- bit isHalf, bit isNot, bit isNew>
- : STInst<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, RC: $src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
- ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
- [], "", ST_tc_st_SLOT01>, AddrModeRel {
- bits<2> src1;
- bits<6> absaddr;
- bits<5> src2;
-
- let isPredicatedNew = isNew;
- let isPredicatedFalse = isNot;
- // Store upper-half and store doubleword cannot be NV.
- let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-22} = MajOp;
- let Inst{21} = isHalf;
- let Inst{17-16} = absaddr{5-4};
- let Inst{13} = isNew;
- let Inst{12-8} = src2;
- let Inst{7} = 0b1;
- let Inst{6-3} = absaddr{3-0};
- let Inst{2} = isNot;
- let Inst{1-0} = src1;
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated store instructions with absolute addressing.
-//===----------------------------------------------------------------------===//
-class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<2> MajOp, bit isHalf>
- : T_StoreAbsGP <mnemonic, RC, u32_0MustExt, MajOp, 1, isHalf>,
- AddrModeRel {
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
- !if (!eq(ImmOpStr, "u16_2Imm"), 18,
- !if (!eq(ImmOpStr, "u16_1Imm"), 17,
- /* u16_0Imm */ 16)));
-
- let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
- !if (!eq(ImmOpStr, "u16_2Imm"), 2,
- !if (!eq(ImmOpStr, "u16_1Imm"), 1,
- /* u16_0Imm */ 0)));
-}
-
-//===----------------------------------------------------------------------===//
-// Multiclass for store instructions with absolute addressing.
-//===----------------------------------------------------------------------===//
-let addrMode = Absolute, isExtended = 1 in
-multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 0, isPredicable = 1 in
- def PS_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>;
-
- // Predicated
- def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>;
- def S4_p#NAME#f_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 0>;
-
- // .new Predicated
- def S4_p#NAME#tnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 1>;
- def S4_p#NAME#fnew_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 1, 1>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for non predicated new-value store instructions with
-// GP-Relative or absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicable = 1, mayStore = 1, isNVStore = 1,
- isNewValue = 1, opNewValue = 1 in
-class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp>
- : NVInst_V4<(outs), (ins ImmOp:$addr, IntRegs:$src),
- mnemonic #"(#$addr) = $src.new",
- [], "", V2LDST_tc_st_SLOT0> {
- bits<19> addr;
- bits<3> src;
- bits<16> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
- !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
- !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
- /* u16_0Imm */ addr{15-0})));
- let IClass = 0b0100;
-
- let Inst{27} = 1;
- let Inst{26-25} = offsetBits{15-14};
- let Inst{24-21} = 0b0101;
- let Inst{20-16} = offsetBits{13-9};
- let Inst{13} = offsetBits{8};
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src;
- let Inst{7-0} = offsetBits{7-0};
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated new-value store instructions with
-// absolute addressing.
-//===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1,
- isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in
-class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew>
- : NVInst_V4<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, IntRegs:$src2),
- !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
- ") ")#mnemonic#"(#$absaddr) = $src2.new",
- [], "", ST_tc_st_SLOT0>, AddrModeRel {
- bits<2> src1;
- bits<6> absaddr;
- bits<3> src2;
-
- let isPredicatedNew = isNew;
- let isPredicatedFalse = isNot;
-
- let IClass = 0b1010;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = 0b101;
- let Inst{17-16} = absaddr{5-4};
- let Inst{13} = isNew;
- let Inst{12-11} = MajOp;
- let Inst{10-8} = src2;
- let Inst{7} = 0b1;
- let Inst{6-3} = absaddr{3-0};
- let Inst{2} = isNot;
- let Inst{1-0} = src1;
-}
-
-//===----------------------------------------------------------------------===//
-// Template class for non-predicated new-value store instructions with
-// absolute addressing.
-//===----------------------------------------------------------------------===//
-class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
- : T_StoreAbsGP_NV <mnemonic, u32_0MustExt, MajOp>, AddrModeRel {
-
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
- !if (!eq(ImmOpStr, "u16_2Imm"), 18,
- !if (!eq(ImmOpStr, "u16_1Imm"), 17,
- /* u16_0Imm */ 16)));
-
- let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
- !if (!eq(ImmOpStr, "u16_2Imm"), 2,
- !if (!eq(ImmOpStr, "u16_1Imm"), 1,
- /* u16_0Imm */ 0)));
-}
-
-//===----------------------------------------------------------------------===//
-// Multiclass for new-value store instructions with absolute addressing.
-//===----------------------------------------------------------------------===//
-let addrMode = Absolute, isExtended = 1 in
-multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
- bits<2> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 0, isPredicable = 1 in
- def PS_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>;
-
- // Predicated
- def S4_p#NAME#newt_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>;
- def S4_p#NAME#newf_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 0>;
-
- // .new Predicated
- def S4_p#NAME#newtnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 1>;
- def S4_p#NAME#newfnew_abs : T_StoreAbs_NV_Pred <mnemonic, MajOp, 1, 1>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Stores with absolute addressing
-//===----------------------------------------------------------------------===//
-let accessSize = ByteAccess in
-defm storerb : ST_Abs <"memb", "STrib", IntRegs, u16_0Imm, 0b00>,
- ST_Abs_NV <"memb", "STrib", u16_0Imm, 0b00>;
-
-let accessSize = HalfWordAccess in
-defm storerh : ST_Abs <"memh", "STrih", IntRegs, u16_1Imm, 0b01>,
- ST_Abs_NV <"memh", "STrih", u16_1Imm, 0b01>;
-
-let accessSize = WordAccess in
-defm storeri : ST_Abs <"memw", "STriw", IntRegs, u16_2Imm, 0b10>,
- ST_Abs_NV <"memw", "STriw", u16_2Imm, 0b10>;
-
-let isNVStorable = 0, accessSize = DoubleWordAccess in
-defm storerd : ST_Abs <"memd", "STrid", DoubleRegs, u16_3Imm, 0b11>;
-
-let isNVStorable = 0, accessSize = HalfWordAccess in
-defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
-
-//===----------------------------------------------------------------------===//
-// GP-relative stores.
-// mem[bhwd](#global)=Rt
-// Once predicated, these instructions map to absolute addressing mode.
-// if ([!]Pv[.new]) mem[bhwd](##global)=Rt
-//===----------------------------------------------------------------------===//
-
-let Uses = [GP], isAsmParserOnly = 1 in
-class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
- Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
- : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> {
- // Set BaseOpcode same as absolute addressing instructions so that
- // non-predicated GP-Rel instructions can have relate with predicated
- // Absolute instruction.
- let BaseOpcode = BaseOp#_abs;
- }
-
-let Uses = [GP], isAsmParserOnly = 1 in
-multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
- bits<2> MajOp, bit isHalf = 0> {
- // Set BaseOpcode same as absolute addressing instructions so that
- // non-predicated GP-Rel instructions can have relate with predicated
- // Absolute instruction.
- let BaseOpcode = BaseOp#_abs in {
- def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
- 0, isHalf>;
- // New-value store
- def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp> ;
- }
-}
-
-let accessSize = ByteAccess in
-defm S2_storerb : ST_GP<"memb", "STrib", u16_0Imm, 0b00>, NewValueRel;
-
-let accessSize = HalfWordAccess in
-defm S2_storerh : ST_GP<"memh", "STrih", u16_1Imm, 0b01>, NewValueRel;
-
-let accessSize = WordAccess in
-defm S2_storeri : ST_GP<"memw", "STriw", u16_2Imm, 0b10>, NewValueRel;
-
-let isNVStorable = 0, accessSize = DoubleWordAccess in
-def S2_storerdgp : T_StoreGP <"memd", "STrid", DoubleRegs,
- u16_3Imm, 0b11>, PredNewRel;
-
-let isNVStorable = 0, accessSize = HalfWordAccess in
-def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
- u16_1Imm, 0b01, 1>, PredNewRel;
-
-//===----------------------------------------------------------------------===//
-// Template class for non predicated load instructions with
-// absolute addressing mode.
-//===----------------------------------------------------------------------===//
-let isPredicable = 1, hasSideEffects = 0 in
-class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp>
- : LDInst <(outs RC:$dst), (ins ImmOp:$addr),
- "$dst = "#mnemonic# "(#$addr)",
- [], "", V2LDST_tc_ld_SLOT01> {
- bits<5> dst;
- bits<19> addr;
- bits<16> offsetBits;
-
- string ImmOpStr = !cast<string>(ImmOp);
- let offsetBits = !if (!eq(ImmOpStr, "u16_3Imm"), addr{18-3},
- !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
- !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
- /* u16_0Imm */ addr{15-0})));
-
- let IClass = 0b0100;
-
- let Inst{27} = 0b1;
- let Inst{26-25} = offsetBits{15-14};
- let Inst{24} = 0b1;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = offsetBits{13-9};
- let Inst{13-5} = offsetBits{8-0};
- let Inst{4-0} = dst;
- }
-
-class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, u32_0MustExt, MajOp>, AddrModeRel {
-
- string ImmOpStr = !cast<string>(ImmOp);
- let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
- !if (!eq(ImmOpStr, "u16_2Imm"), 18,
- !if (!eq(ImmOpStr, "u16_1Imm"), 17,
- /* u16_0Imm */ 16)));
-
- let opExtentAlign = !if (!eq(ImmOpStr, "u16_3Imm"), 3,
- !if (!eq(ImmOpStr, "u16_2Imm"), 2,
- !if (!eq(ImmOpStr, "u16_1Imm"), 1,
- /* u16_0Imm */ 0)));
- }
-
-//===----------------------------------------------------------------------===//
-// Template class for predicated load instructions with
-// absolute addressing mode.
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
- opExtendable = 2 in
-class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit isPredNot, bit isPredNew>
- : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32_0MustExt:$absaddr),
- !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
- ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
- bits<5> dst;
- bits<2> src1;
- bits<6> absaddr;
-
- let isPredicatedNew = isPredNew;
- let isPredicatedFalse = isPredNot;
- let hasNewValue = !if (!eq(!cast<string>(RC), "DoubleRegs"), 0, 1);
-
- let IClass = 0b1001;
-
- let Inst{27-24} = 0b1111;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = absaddr{5-1};
- let Inst{13} = 0b1;
- let Inst{12} = isPredNew;
- let Inst{11} = isPredNot;
- let Inst{10-9} = src1;
- let Inst{8} = absaddr{0};
- let Inst{7} = 0b1;
- let Inst{4-0} = dst;
- }
-
-//===----------------------------------------------------------------------===//
-// Multiclass for the load instructions with absolute addressing mode.
-//===----------------------------------------------------------------------===//
-multiclass LD_Abs_Pred<string mnemonic, RegisterClass RC, bits<3> MajOp,
- bit PredNot> {
- def _abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 0>;
- // Predicate new
- def new_abs : T_LoadAbs_Pred <mnemonic, RC, MajOp, PredNot, 1>;
-}
-
-let addrMode = Absolute, isExtended = 1 in
-multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
- Operand ImmOp, bits<3> MajOp> {
- let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
- let opExtendable = 1, isPredicable = 1 in
- def PS_#NAME#abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>;
-
- // Predicated
- defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>;
- defm L4_p#NAME#f : LD_Abs_Pred<mnemonic, RC, MajOp, 1>;
- }
-}
-
-let accessSize = ByteAccess, hasNewValue = 1 in {
- defm loadrb : LD_Abs<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
- defm loadrub : LD_Abs<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
-}
-
-let accessSize = HalfWordAccess, hasNewValue = 1 in {
- defm loadrh : LD_Abs<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
- defm loadruh : LD_Abs<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
-}
-
-let accessSize = WordAccess, hasNewValue = 1 in
-defm loadri : LD_Abs<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
-
-let accessSize = DoubleWordAccess in
-defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// multiclass for load instructions with GP-relative addressing mode.
-// Rx=mem[bhwd](##global)
-// Once predicated, these instructions map to absolute addressing mode.
-// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
-//===----------------------------------------------------------------------===//
-
-let isAsmParserOnly = 1, Uses = [GP] in
-class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
- bits<3> MajOp>
- : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
- let BaseOpcode = BaseOp#_abs;
- }
-
-let accessSize = ByteAccess, hasNewValue = 1 in {
- def L2_loadrbgp : T_LoadGP<"memb", "LDrib", IntRegs, u16_0Imm, 0b000>;
- def L2_loadrubgp : T_LoadGP<"memub", "LDriub", IntRegs, u16_0Imm, 0b001>;
-}
-
-let accessSize = HalfWordAccess, hasNewValue = 1 in {
- def L2_loadrhgp : T_LoadGP<"memh", "LDrih", IntRegs, u16_1Imm, 0b010>;
- def L2_loadruhgp : T_LoadGP<"memuh", "LDriuh", IntRegs, u16_1Imm, 0b011>;
-}
-
-let accessSize = WordAccess, hasNewValue = 1 in
-def L2_loadrigp : T_LoadGP<"memw", "LDriw", IntRegs, u16_2Imm, 0b100>;
-
-let accessSize = DoubleWordAccess in
-def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
-
-//===----------------------------------------------------------------------===//
-// :raw for of boundscheck:hi:lo insns
-//===----------------------------------------------------------------------===//
-
-// A4_boundscheck_lo: Detect if a register is within bounds.
-let hasSideEffects = 0 in
-def A4_boundscheck_lo: ALU64Inst <
- (outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Pd = boundscheck($Rss, $Rtt):raw:lo"> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00100;
- let Inst{13} = 0b1;
- let Inst{7-5} = 0b100;
- let Inst{1-0} = Pd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-// A4_boundscheck_hi: Detect if a register is within bounds.
-let hasSideEffects = 0 in
-def A4_boundscheck_hi: ALU64Inst <
- (outs PredRegs:$Pd),
- (ins DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Pd = boundscheck($Rss, $Rtt):raw:hi"> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> Rtt;
-
- let IClass = 0b1101;
-
- let Inst{27-23} = 0b00100;
- let Inst{13} = 0b1;
- let Inst{7-5} = 0b101;
- let Inst{1-0} = Pd;
- let Inst{20-16} = Rss;
- let Inst{12-8} = Rtt;
- }
-
-let hasSideEffects = 0, isAsmParserOnly = 1 in
-def A4_boundscheck : MInst <
- (outs PredRegs:$Pd), (ins IntRegs:$Rs, DoubleRegs:$Rtt),
- "$Pd=boundscheck($Rs,$Rtt)">;
-
-// A4_tlbmatch: Detect if a VA/ASID matches a TLB entry.
-let isPredicateLate = 1, hasSideEffects = 0 in
-def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
- (ins DoubleRegs:$Rs, IntRegs:$Rt),
- "$Pd = tlbmatch($Rs, $Rt)",
- [], "", ALU64_tc_2early_SLOT23> {
- bits<2> Pd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1101;
- let Inst{27-23} = 0b00100;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b1;
- let Inst{12-8} = Rt;
- let Inst{7-5} = 0b011;
- let Inst{1-0} = Pd;
- }
-
-// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
-// really do a load.
-let hasSideEffects = 1, mayLoad = 0 in
-def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
- "dcfetch($Rs + #$u11_3)",
- [], "", LD_tc_ld_SLOT0> {
- bits<5> Rs;
- bits<14> u11_3;
-
- let IClass = 0b1001;
- let Inst{27-21} = 0b0100000;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{10-0} = u11_3{13-3};
-}
-
-
-//===----------------------------------------------------------------------===//
-// Compound instructions
-//===----------------------------------------------------------------------===//
-
-let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
- isPredicated = 1, isPredicatedNew = 1, isExtendable = 1,
- opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
- isTerminator = 1 in
-class CJInst_tstbit_R0<string px, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
- ""#px#" = tstbit($Rs, #0); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-26} = 0b00;
- let Inst{25} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
- let Inst{24-23} = 0b11;
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- let Inst{9-8} = 0b11;
- let Inst{7-1} = r9_2{8-2};
-}
-
-let Defs = [PC, P0], Uses = [P0] in {
- def J4_tstbit0_tp0_jump_nt : CJInst_tstbit_R0<"p0", 0, "nt">;
- def J4_tstbit0_tp0_jump_t : CJInst_tstbit_R0<"p0", 0, "t">;
- def J4_tstbit0_fp0_jump_nt : CJInst_tstbit_R0<"p0", 1, "nt">;
- def J4_tstbit0_fp0_jump_t : CJInst_tstbit_R0<"p0", 1, "t">;
-}
-
-let Defs = [PC, P1], Uses = [P1] in {
- def J4_tstbit0_tp1_jump_nt : CJInst_tstbit_R0<"p1", 0, "nt">;
- def J4_tstbit0_tp1_jump_t : CJInst_tstbit_R0<"p1", 0, "t">;
- def J4_tstbit0_fp1_jump_nt : CJInst_tstbit_R0<"p1", 1, "nt">;
- def J4_tstbit0_fp1_jump_t : CJInst_tstbit_R0<"p1", 1, "t">;
-}
-
-
-let isBranch = 1, hasSideEffects = 0,
- isExtentSigned = 1, isPredicated = 1, isPredicatedNew = 1,
- isExtendable = 1, opExtentBits = 11, opExtentAlign = 2,
- opExtendable = 2, isTerminator = 1 in
-class CJInst_RR<string px, string op, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, IntRegs:$Rt, brtarget:$r9_2),
- ""#px#" = cmp."#op#"($Rs, $Rt); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<4> Rt;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-23} = !if (!eq(op, "eq"), 0b01000,
- !if (!eq(op, "gt"), 0b01001,
- !if (!eq(op, "gtu"), 0b01010, 0)));
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- // px: Predicate reg 0/1
- let Inst{12} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
- let Inst{11-8} = Rt;
- let Inst{7-1} = r9_2{8-2};
-}
-
-// P[10] taken/not taken.
-multiclass T_tnt_CJInst_RR<string op, bit np> {
- let Defs = [PC, P0], Uses = [P0] in {
- def NAME#p0_jump_nt : CJInst_RR<"p0", op, np, "nt">;
- def NAME#p0_jump_t : CJInst_RR<"p0", op, np, "t">;
- }
- let Defs = [PC, P1], Uses = [P1] in {
- def NAME#p1_jump_nt : CJInst_RR<"p1", op, np, "nt">;
- def NAME#p1_jump_t : CJInst_RR<"p1", op, np, "t">;
- }
-}
-// Predicate / !Predicate
-multiclass T_pnp_CJInst_RR<string op>{
- defm J4_cmp#NAME#_t : T_tnt_CJInst_RR<op, 0>;
- defm J4_cmp#NAME#_f : T_tnt_CJInst_RR<op, 1>;
-}
-// TypeCJ Instructions compare RR and jump
-defm eq : T_pnp_CJInst_RR<"eq">;
-defm gt : T_pnp_CJInst_RR<"gt">;
-defm gtu : T_pnp_CJInst_RR<"gtu">;
-
-let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
- isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in
-class CJInst_RU5<string px, string op, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, u5_0Imm:$U5, brtarget:$r9_2),
- ""#px#" = cmp."#op#"($Rs, #$U5); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<5> U5;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-26} = 0b00;
- // px: Predicate reg 0/1
- let Inst{25} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
- let Inst{24-23} = !if (!eq(op, "eq"), 0b00,
- !if (!eq(op, "gt"), 0b01,
- !if (!eq(op, "gtu"), 0b10, 0)));
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- let Inst{12-8} = U5;
- let Inst{7-1} = r9_2{8-2};
-}
-// P[10] taken/not taken.
-multiclass T_tnt_CJInst_RU5<string op, bit np> {
- let Defs = [PC, P0], Uses = [P0] in {
- def NAME#p0_jump_nt : CJInst_RU5<"p0", op, np, "nt">;
- def NAME#p0_jump_t : CJInst_RU5<"p0", op, np, "t">;
- }
- let Defs = [PC, P1], Uses = [P1] in {
- def NAME#p1_jump_nt : CJInst_RU5<"p1", op, np, "nt">;
- def NAME#p1_jump_t : CJInst_RU5<"p1", op, np, "t">;
- }
-}
-// Predicate / !Predicate
-multiclass T_pnp_CJInst_RU5<string op>{
- defm J4_cmp#NAME#i_t : T_tnt_CJInst_RU5<op, 0>;
- defm J4_cmp#NAME#i_f : T_tnt_CJInst_RU5<op, 1>;
-}
-// TypeCJ Instructions compare RI and jump
-defm eq : T_pnp_CJInst_RU5<"eq">;
-defm gt : T_pnp_CJInst_RU5<"gt">;
-defm gtu : T_pnp_CJInst_RU5<"gtu">;
-
-let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
- isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
- isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 2,
- isTerminator = 1 in
-class CJInst_Rn1<string px, string op, bit np, string tnt>
- : InstHexagon<(outs), (ins IntRegs:$Rs, n1Const:$n1, brtarget:$r9_2),
- ""#px#" = cmp."#op#"($Rs,#$n1); if ("
- #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
- [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
- bits<4> Rs;
- bits<11> r9_2;
-
- // np: !p[01]
- let isPredicatedFalse = np;
- // tnt: Taken/Not Taken
- let isBrTaken = !if (!eq(tnt, "t"), "true", "false");
- let isTaken = !if (!eq(tnt, "t"), 1, 0);
-
- let IClass = 0b0001;
- let Inst{27-26} = 0b00;
- let Inst{25} = !if (!eq(px, "!p1"), 1,
- !if (!eq(px, "p1"), 1, 0));
-
- let Inst{24-23} = 0b11;
- let Inst{22} = np;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rs;
- let Inst{13} = !if (!eq(tnt, "t"), 1, 0);
- let Inst{9-8} = !if (!eq(op, "eq"), 0b00,
- !if (!eq(op, "gt"), 0b01, 0));
- let Inst{7-1} = r9_2{8-2};
-}
-
-// P[10] taken/not taken.
-multiclass T_tnt_CJInst_Rn1<string op, bit np> {
- let Defs = [PC, P0], Uses = [P0] in {
- def NAME#p0_jump_nt : CJInst_Rn1<"p0", op, np, "nt">;
- def NAME#p0_jump_t : CJInst_Rn1<"p0", op, np, "t">;
- }
- let Defs = [PC, P1], Uses = [P1] in {
- def NAME#p1_jump_nt : CJInst_Rn1<"p1", op, np, "nt">;
- def NAME#p1_jump_t : CJInst_Rn1<"p1", op, np, "t">;
- }
-}
-// Predicate / !Predicate
-multiclass T_pnp_CJInst_Rn1<string op>{
- defm J4_cmp#NAME#n1_t : T_tnt_CJInst_Rn1<op, 0>;
- defm J4_cmp#NAME#n1_f : T_tnt_CJInst_Rn1<op, 1>;
-}
-// TypeCJ Instructions compare -1 and jump
-defm eq : T_pnp_CJInst_Rn1<"eq">;
-defm gt : T_pnp_CJInst_Rn1<"gt">;
-
-// J4_jumpseti: Direct unconditional jump and set register to immediate.
-let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
- isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2 in
-def J4_jumpseti: CJInst_JMPSET <
- (outs IntRegs:$Rd),
- (ins u6_0Imm:$U6, brtarget:$r9_2),
- "$Rd = #$U6 ; jump $r9_2"> {
- bits<4> Rd;
- bits<6> U6;
- bits<11> r9_2;
-
- let IClass = 0b0001;
- let Inst{27-24} = 0b0110;
- let Inst{21-20} = r9_2{10-9};
- let Inst{19-16} = Rd;
- let Inst{13-8} = U6;
- let Inst{7-1} = r9_2{8-2};
- }
-
-// J4_jumpsetr: Direct unconditional jump and transfer register.
-let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
- isExtentSigned = 1, opNewValue = 0, isExtendable = 1, opExtentBits = 11,
- opExtentAlign = 2, opExtendable = 2 in
-def J4_jumpsetr: CJInst_JMPSET <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, brtarget:$r9_2),
- "$Rd = $Rs ; jump $r9_2"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<11> r9_2;
-
- let IClass = 0b0001;
- let Inst{27-24} = 0b0111;
- let Inst{21-20} = r9_2{10-9};
- let Inst{11-8} = Rd;
- let Inst{19-16} = Rs;
- let Inst{7-1} = r9_2{8-2};
- }
-
-// Duplex instructions
-//===----------------------------------------------------------------------===//
-include "HexagonIsetDx.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
deleted file mode 100644
index cd19b6916f21..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ /dev/null
@@ -1,497 +0,0 @@
-//=- HexagonInstrInfoV5.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V5 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// XTYPE/MPY
-//===----------------------------------------------------------------------===//
-
- //Rdd[+]=vrmpybsu(Rss,Rtt)
-let Predicates = [HasV5T] in {
- def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>;
- def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>;
-
- //Rdd[+]=vrmpybu(Rss,Rtt)
- def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>;
- def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>;
-
- def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>;
- def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>;
-}
-
-// Vector multiply bytes
-// Rdd=vmpyb[s]u(Rs,Rt)
-let Predicates = [HasV5T] in {
- def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>;
- def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>;
-
- // Rxx+=vmpyb[s]u(Rs,Rt)
- def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>;
- def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>;
-
- // Rd=vaddhub(Rss,Rtt):sat
- let hasNewValue = 1, opNewValue = 0 in
- def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
-}
-
-def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm, [], 1>,
- Requires<[HasV5T]> {
- bits<6> src2;
- let Inst{13-8} = src2;
-}
-
-let isAsmParserOnly = 1 in
-def S2_asr_i_p_rnd_goodsyntax
- : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6_0Imm:$src2),
- "$dst = asrrnd($src1, #$src2)">;
-
-def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
- Requires<[HasV5T]> {
- let Inst{13,7,4} = 0b111;
-}
-
-def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
- Requires<[HasV5T]> {
- let Inst{20,13,7,4} = 0b1111;
-}
-
-let hasNewValue = 1, validSubTargets = HasV5SubT in
-def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
- "$Rd = popcount($Rss)", [], "", S_2op_tc_2_SLOT23>,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rss;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1000011;
- let Inst{7-5} = 0b011;
- let Inst{4-0} = Rd;
- let Inst{20-16} = Rss;
- }
-
-let isFP = 1, hasNewValue = 1, opNewValue = 0 in
-class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
- : MInst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = "#mnemonic#"($Rs, $Rt)", [],
- "" , M_tc_3or4x_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-let isCommutable = 1 in {
- def F2_sfadd : T_MInstFloat < "sfadd", 0b000, 0b000>;
- def F2_sfmpy : T_MInstFloat < "sfmpy", 0b010, 0b000>;
-}
-
-def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
-
-let Itinerary = M_tc_3x_SLOT23 in {
- def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
- def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
-}
-
-let Itinerary = M_tc_3or4x_SLOT23 in {
-def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
-def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
-}
-
-// F2_sfrecipa: Reciprocal approximation for division.
-let Uses = [USR], isPredicateLate = 1, isFP = 1,
- hasSideEffects = 0, hasNewValue = 1, Itinerary = M_tc_3or4x_SLOT23 in
-def F2_sfrecipa: MInst <
- (outs IntRegs:$Rd, PredRegs:$Pe),
- (ins IntRegs:$Rs, IntRegs:$Rt),
- "$Rd, $Pe = sfrecipa($Rs, $Rt)">,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<2> Pe;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
- let Inst{27-21} = 0b1011111;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b1;
- let Inst{6-5} = Pe;
- let Inst{4-0} = Rd;
- }
-
-// F2_dfcmpeq: Floating point compare for equal.
-let Uses = [USR], isCompare = 1, isFP = 1 in
-class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
- list<dag> pattern = [] >
- : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2),
- "$dst = "#mnemonic#"($src1, $src2)", pattern,
- "" , ALU64_tc_2early_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<2> dst;
- bits<5> src1;
- bits<5> src2;
-
- let IClass = 0b1101;
-
- let Inst{27-21} = 0b0010111;
- let Inst{20-16} = src1;
- let Inst{12-8} = src2;
- let Inst{7-5} = MinOp;
- let Inst{1-0} = dst;
- }
-
-class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
- : T_fcmp <mnemonic, DoubleRegs, MinOp, []> {
- let IClass = 0b1101;
- let Inst{27-21} = 0b0010111;
-}
-
-class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
- : T_fcmp <mnemonic, IntRegs, MinOp, []> {
- let IClass = 0b1100;
- let Inst{27-21} = 0b0111111;
-}
-
-def F2_dfcmpeq : T_fcmp64<"dfcmp.eq", setoeq, 0b000>;
-def F2_dfcmpgt : T_fcmp64<"dfcmp.gt", setogt, 0b001>;
-def F2_dfcmpge : T_fcmp64<"dfcmp.ge", setoge, 0b010>;
-def F2_dfcmpuo : T_fcmp64<"dfcmp.uo", setuo, 0b011>;
-
-def F2_sfcmpge : T_fcmp32<"sfcmp.ge", setoge, 0b000>;
-def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo, 0b001>;
-def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
-def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
-
-// F2 convert template classes:
-let Uses = [USR], isFP = 1 in
-class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
- string chop ="">
- : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
- "$Rdd = "#mnemonic#"($Rss)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rdd;
- bits<5> Rss;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b0000111;
- let Inst{20-16} = Rss;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- }
-
-let Uses = [USR], isFP = 1 in
-class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
- string chop ="">
- : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs),
- "$Rdd = "#mnemonic#"($Rs)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rdd;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b0100100;
- let Inst{20-16} = Rs;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rdd;
- }
-
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
- string chop ="">
- : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
- "$Rd = "#mnemonic#"($Rss)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rd;
- bits<5> Rss;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b1000;
- let Inst{23-21} = MinOp;
- let Inst{20-16} = Rss;
- let Inst{7-5} = 0b001;
- let Inst{4-0} = Rd;
- }
-
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- string chop ="">
- : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = "#mnemonic#"($Rs)"#chop, [], "",
- S_2op_tc_3or4x_SLOT23> {
- bits<5> Rd;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-24} = 0b1011;
- let Inst{23-21} = MajOp;
- let Inst{20-16} = Rs;
- let Inst{7-5} = MinOp;
- let Inst{4-0} = Rd;
- }
-
-// Convert single precision to double precision and vice-versa.
-def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000>;
-def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000>;
-
-// Convert Integer to Floating Point.
-def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010>;
-def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001>;
-def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000>;
-def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000>;
-def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011>;
-def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010>;
-def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001>;
-def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010>;
-
-// Convert Floating Point to Integer.
-def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, ":chop">;
-def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, ":chop">;
-def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001,
- ":chop">;
-def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001,
- ":chop">;
-def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, ":chop">;
-def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, ":chop">;
-def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, ":chop">;
-def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, ":chop">;
-
-// Convert Floating Point to Integer: non-chopped.
-let AddedComplexity = 20, Predicates = [HasV5T] in {
- def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000>;
- def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001>;
- def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011>;
- def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100>;
- def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011>;
- def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100>;
- def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000>;
- def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000>;
-}
-
-// Fix up radicand.
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = sffixupr($Rs)",
- [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1011101;
- let Inst{20-16} = Rs;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rd;
- }
-
-// F2_sffma: Floating-point fused multiply add.
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-class T_sfmpy_acc <bit isSub, bit isLib>
- : MInst<(outs IntRegs:$Rx),
- (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
- "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
- [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
-
- let IClass = 0b1110;
-
- let Inst{27-21} = 0b1111000;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b1;
- let Inst{6} = isLib;
- let Inst{5} = isSub;
- let Inst{4-0} = Rx;
- }
-
-def F2_sffma: T_sfmpy_acc <0, 0>;
-def F2_sffms: T_sfmpy_acc <1, 0>;
-def F2_sffma_lib: T_sfmpy_acc <0, 1>;
-def F2_sffms_lib: T_sfmpy_acc <1, 1>;
-
-// Floating-point fused multiply add w/ additional scaling (2**pu).
-let Uses = [USR], isFP = 1, hasNewValue = 1 in
-def F2_sffma_sc: MInst <
- (outs IntRegs:$Rx),
- (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
- "$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
- [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
- Requires<[HasV5T]> {
- bits<5> Rx;
- bits<5> Rs;
- bits<5> Rt;
- bits<2> Pu;
-
- let IClass = 0b1110;
-
- let Inst{27-21} = 0b1111011;
- let Inst{20-16} = Rs;
- let Inst{13} = 0b0;
- let Inst{12-8} = Rt;
- let Inst{7} = 0b1;
- let Inst{6-5} = Pu;
- let Inst{4-0} = Rx;
- }
-
-//===----------------------------------------------------------------------===//
-// :natural forms of vasrh and vasrhub insns
-//===----------------------------------------------------------------------===//
-// S5_asrhub_rnd_sat: Vector arithmetic shift right by immediate with round,
-// saturate, and pack.
-let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_ASRHUB<bit isSat>
- : SInst <(outs IntRegs:$Rd),
- (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"),
- [], "", S_2op_tc_2_SLOT23>,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<5> Rss;
- bits<4> u4;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1000011;
- let Inst{20-16} = Rss;
- let Inst{13-12} = 0b00;
- let Inst{11-8} = u4;
- let Inst{7-6} = 0b10;
- let Inst{5} = isSat;
- let Inst{4-0} = Rd;
- }
-
-def S5_asrhub_rnd_sat : T_ASRHUB <0>;
-def S5_asrhub_sat : T_ASRHUB <1>;
-
-let isAsmParserOnly = 1 in
-def S5_asrhub_rnd_sat_goodsyntax
- : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
-
-// S5_vasrhrnd: Vector arithmetic shift right by immediate with round.
-let hasSideEffects = 0 in
-def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
- (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rdd = vasrh($Rss, #$u4):raw">,
- Requires<[HasV5T]> {
- bits<5> Rdd;
- bits<5> Rss;
- bits<4> u4;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b0000001;
- let Inst{20-16} = Rss;
- let Inst{13-12} = 0b00;
- let Inst{11-8} = u4;
- let Inst{7-5} = 0b000;
- let Inst{4-0} = Rdd;
- }
-
-let isAsmParserOnly = 1 in
-def S5_vasrhrnd_goodsyntax
- : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4_0Imm:$u4),
- "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>;
-
-// Floating point reciprocal square root approximation
-let Uses = [USR], isPredicateLate = 1, isFP = 1,
- hasSideEffects = 0, hasNewValue = 1, opNewValue = 0,
- validSubTargets = HasV5SubT in
-def F2_sfinvsqrta: SInst <
- (outs IntRegs:$Rd, PredRegs:$Pe),
- (ins IntRegs:$Rs),
- "$Rd, $Pe = sfinvsqrta($Rs)" > ,
- Requires<[HasV5T]> {
- bits<5> Rd;
- bits<2> Pe;
- bits<5> Rs;
-
- let IClass = 0b1000;
-
- let Inst{27-21} = 0b1011111;
- let Inst{20-16} = Rs;
- let Inst{7} = 0b0;
- let Inst{6-5} = Pe;
- let Inst{4-0} = Rd;
- }
-
-// Complex multiply 32x16
-let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
- def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>;
- def M4_cmpyr_whc : T_S3op_8<"cmpyrwh", 0b111, 1, 1, 1, 1>;
-}
-
-// Classify floating-point value
-let Uses = [USR], isFP = 1 in
-def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>, Requires<[HasV5T]>;
-
-let Uses = [USR], isFP = 1 in
-def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5_0Imm:$u5),
- "$Pd = dfclass($Rss, #$u5)",
- [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
- bits<2> Pd;
- bits<5> Rss;
- bits<5> u5;
-
- let IClass = 0b1101;
- let Inst{27-21} = 0b1100100;
- let Inst{20-16} = Rss;
- let Inst{12-10} = 0b000;
- let Inst{9-5} = u5;
- let Inst{4-3} = 0b10;
- let Inst{1-0} = Pd;
- }
-
-// Instructions to create floating point constant
-class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
- : ALU64Inst<(outs RC:$dst), (ins u10_0Imm:$src),
- "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
- [], "", ALU64_tc_2_SLOT23>, Requires<[HasV5T]> {
- bits<5> dst;
- bits<10> src;
-
- let IClass = 0b1101;
- let Inst{27-24} = RegType;
- let Inst{23} = 0b0;
- let Inst{22} = isNeg;
- let Inst{21} = src{9};
- let Inst{13-5} = src{8-0};
- let Inst{4-0} = dst;
- }
-
-let hasNewValue = 1, opNewValue = 0 in {
- def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
- def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
-}
-
-def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
-def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV60.td b/lib/Target/Hexagon/HexagonInstrInfoV60.td
deleted file mode 100644
index c50141b18ead..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoV60.td
+++ /dev/null
@@ -1,2068 +0,0 @@
-//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon V60 instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-// Vector load
-let Predicates = [HasV60T, UseHVX] in
-let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
- class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = CVI_VM_LD,
- IType type = TypeCVI_VM_LD>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
-
-// Vector store
-let Predicates = [HasV60T, UseHVX] in
-let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
-class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = CVI_VM_ST,
- IType type = TypeCVI_VM_ST>
-: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
-
-//===----------------------------------------------------------------------===//
-// Vector loads with base + immediate offset
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, accessSize = Vector64Access in
-class T_vload_ai<string asmStr>
- : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2),
- asmStr>;
-
-let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in
-class T_vload_ai_128B<string asmStr>
- : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2),
- asmStr>;
-
-let isCVLoadable = 1, hasNewValue = 1 in {
- def V6_vL32b_ai : T_vload_ai <"$dst = vmem($src1+#$src2)">,
- V6_vL32b_ai_enc;
- def V6_vL32b_nt_ai : T_vload_ai <"$dst = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_ai_enc;
- // 128B
- def V6_vL32b_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">,
- V6_vL32b_ai_128B_enc;
- def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in {
- def V6_vL32Ub_ai : T_vload_ai <"$dst = vmemu($src1+#$src2)">,
- V6_vL32Ub_ai_enc;
- def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">,
- V6_vL32Ub_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1,
- hasNewValue = 1 in {
- def V6_vL32b_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">,
- V6_vL32b_cur_ai_enc;
- def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_cur_ai_enc;
- // 128B
- def V6_vL32b_cur_ai_128B : T_vload_ai_128B
- <"$dst.cur = vmem($src1+#$src2)">,
- V6_vL32b_cur_ai_128B_enc;
- def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B
- <"$dst.cur = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_cur_ai_128B_enc;
-}
-
-
-let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in {
- def V6_vL32b_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">,
- V6_vL32b_tmp_ai_enc;
- def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">,
- V6_vL32b_nt_tmp_ai_enc;
- // 128B
- def V6_vL32b_tmp_ai_128B : T_vload_ai_128B
- <"$dst.tmp = vmem($src1+#$src2)">,
- V6_vL32b_tmp_ai_128B_enc;
- def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B
- <"$dst.tmp = vmem($src1+#$src2)">,
- V6_vL32b_nt_tmp_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - unconditional
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, accessSize = Vector64Access, isPredicable = 1 in
-class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isNT>
- : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_ai : T_vstore_ai_64B <"vmem", "vS32b_ai">,
- V6_vS32b_ai_enc;
- def V6_vS32b_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai">,
- V6_vS32b_ai_128B_enc;
-}
-
-let isNVStorable = 1, isNonTemporal = 1 in {
- def V6_vS32b_nt_ai : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_nt_ai_enc;
- def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_nt_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_ai : T_vstore_ai_64B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_ai_enc;
- def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_ai_128B_enc;
-}
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - unconditional new
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1,
- isPredicable = 1, Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in
-class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
- : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_ai_64B <string baseOp, bit isNT = 0>
- : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_ai_128B <string baseOp, bit isNT = 0>
- : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
-
-def V6_vS32b_new_ai : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc;
-def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">,
- V6_vS32b_new_ai_128B_enc;
-
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_ai : T_vstore_new_ai_64B<"vS32b_ai", 1>,
- V6_vS32b_nt_new_ai_enc;
- def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>,
- V6_vS32b_nt_new_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - conditional
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, isPredicated = 1 in
-class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) "
- #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_pred_ai_64B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_pred_ai_128B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">,
- V6_vS32b_pred_ai_enc;
- def V6_vS32b_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_npred_ai_enc;
- // 128B
- def V6_vS32b_pred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">,
- V6_vS32b_pred_ai_128B_enc;
- def V6_vS32b_npred_ai_128B : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
- V6_vS32b_npred_ai_128B_enc;
-}
-
-
-let isNVStorable = 1, isNonTemporal = 1 in {
- def V6_vS32b_nt_pred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
- V6_vS32b_nt_pred_ai_enc;
- def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>,
- V6_vS32b_nt_npred_ai_enc;
- // 128B
- def V6_vS32b_nt_pred_ai_128B : T_vstore_pred_ai_128B
- <"vmem", "vS32b_ai", 0, 1>,
- V6_vS32b_nt_pred_ai_128B_enc;
- def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B
- <"vmem", "vS32b_ai", 1, 1>,
- V6_vS32b_nt_npred_ai_128B_enc;
-}
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_pred_ai_enc;
- def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>,
- V6_vS32Ub_npred_ai_enc;
- // 128B
- def V6_vS32Ub_pred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">,
- V6_vS32Ub_pred_ai_128B_enc;
- def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>,
- V6_vS32Ub_npred_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - byte-enabled aligned
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset in
-class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC,
- bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs),
- (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
- #!if(isNT, ":nt", "")#" = $src4"> {
- let isPredicatedFalse = isPredNot;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>;
-
-def V6_vS32b_qpred_ai : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc;
-def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>,
- V6_vS32b_nqpred_ai_enc;
-def V6_vS32b_nt_qpred_ai : T_vstore_qpred_ai_64B <0, 1>,
- V6_vS32b_nt_qpred_ai_enc;
-def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>,
- V6_vS32b_nt_nqpred_ai_enc;
-// 128B
-def V6_vS32b_qpred_ai_128B : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc;
-def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>,
- V6_vS32b_nqpred_ai_128B_enc;
-def V6_vS32b_nt_qpred_ai_128B : T_vstore_qpred_ai_128B<0, 1>,
- V6_vS32b_nt_qpred_ai_128B_enc;
-def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>,
- V6_vS32b_nt_nqpred_ai_128B_enc;
-
-
-//===----------------------------------------------------------------------===//
-// Vector stores with base + immediate offset - conditional new
-//===----------------------------------------------------------------------===//
-let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3,
- isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in
-class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC,
- bit isPredNot, bit isNT>
- : V6_STInst <(outs),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
- #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-
-def V6_vS32b_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai">,
- V6_vS32b_new_pred_ai_enc;
-def V6_vS32b_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>,
- V6_vS32b_new_npred_ai_enc;
-// 128B
-def V6_vS32b_new_pred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai">,
- V6_vS32b_new_pred_ai_128B_enc;
-def V6_vS32b_new_npred_ai_128B : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>,
- V6_vS32b_new_npred_ai_128B_enc;
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_pred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>,
- V6_vS32b_nt_new_pred_ai_enc;
- def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>,
- V6_vS32b_nt_new_npred_ai_enc;
- // 128B
- def V6_vS32b_nt_new_pred_ai_128B : T_vstore_new_pred_ai_128B
- <"vS32b_ai", 0, 1>,
- V6_vS32b_nt_new_pred_ai_128B_enc;
- def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B
- <"vS32b_ai", 1, 1>,
- V6_vS32b_nt_new_npred_ai_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector loads with immediate offset.
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc, hasNewValue = 1 in
-class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC>
- : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$src2), asmStr, [],
- "$src1 = $_dst_">;
-
-let accessSize = Vector64Access in
-class T_vload_pi_64B <string asmStr>
- : T_vload_pi <asmStr, s3_6Imm, VectorRegs>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vload_pi_128B <string asmStr>
- : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>;
-
-let isCVLoadable = 1 in {
- def V6_vL32b_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">,
- V6_vL32b_pi_enc;
- def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_pi_enc;
- // 128B
- def V6_vL32b_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">,
- V6_vL32b_pi_128B_enc;
- def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_pi_128B_enc;
-}
-
-let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in {
- def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">,
- V6_vL32Ub_pi_enc;
- // 128B
- def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">,
- V6_vL32Ub_pi_128B_enc;
-}
-
-let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in {
- def V6_vL32b_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">,
- V6_vL32b_cur_pi_enc;
- def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_cur_pi_enc;
- // 128B
- def V6_vL32b_cur_pi_128B : T_vload_pi_128B
- <"$dst.cur = vmem($src1++#$src2)">,
- V6_vL32b_cur_pi_128B_enc;
- def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B
- <"$dst.cur = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_cur_pi_128B_enc;
-}
-
-let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
- def V6_vL32b_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">,
- V6_vL32b_tmp_pi_enc;
- def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_tmp_pi_enc;
- //128B
- def V6_vL32b_tmp_pi_128B : T_vload_pi_128B
- <"$dst.tmp = vmem($src1++#$src2)">,
- V6_vL32b_tmp_pi_128B_enc;
- def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B
- <"$dst.tmp = vmem($src1++#$src2):nt">,
- V6_vL32b_nt_tmp_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with immediate offset.
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc, isPredicable = 1 in
-class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isNT>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
- "$src1 = $_dst_">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0>
- : T_vstore_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_pi : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc;
- def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">,
- V6_vS32b_pi_128B_enc;
-}
-
-let isNVStorable = 1 , isNonTemporal = 1 in {
- def V6_vS32b_nt_pi : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_nt_pi_enc;
- def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_nt_pi_128B_enc;
-}
-
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pi : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pi_enc;
- def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment unconditional .new vector stores with immediate offset.
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc, isNVStore = 1 in
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
- isPredicable = 1, opNewValue = 3, isNVStore = 1 in
-class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
- "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
- "$src1 = $_dst_">, NewValueRel {
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_pi_64B <string baseOp, bit isNT = 0>
- : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_pi_128B <string baseOp, bit isNT = 0>
- : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
-
-
-def V6_vS32b_new_pi : T_vstore_new_pi_64B <"vS32b_pi">,
- V6_vS32b_new_pi_enc;
-def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">,
- V6_vS32b_new_pi_128B_enc;
-
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_pi : T_vstore_new_pi_64B <"vS32b_pi", 1>,
- V6_vS32b_nt_new_pi_enc;
- def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>,
- V6_vS32b_nt_new_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional vector stores with immediate offset
-//===----------------------------------------------------------------------===//
-let isPredicated = 1, addrMode = PostInc in
-class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp,
- RegisterClass RC, bit isPredNot, bit isNT>
- : V6_STInst<(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">, NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_pred_pi_64B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_pred_pi_128B <string mnemonic, string baseOp,
- bit isPredNot = 0, bit isNT = 0>
- : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-let isNVStorable = 1 in {
- def V6_vS32b_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">,
- V6_vS32b_pred_pi_enc;
- def V6_vS32b_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_npred_pi_enc;
- // 128B
- def V6_vS32b_pred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">,
- V6_vS32b_pred_pi_128B_enc;
- def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>,
- V6_vS32b_npred_pi_128B_enc;
-}
-let isNVStorable = 1, isNonTemporal = 1 in {
- def V6_vS32b_nt_pred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>,
- V6_vS32b_nt_pred_pi_enc;
- def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>,
- V6_vS32b_nt_npred_pi_enc;
- // 128B
- def V6_vS32b_nt_pred_pi_128B : T_vstore_pred_pi_128B
- <"vmem", "vS32b_pi", 0, 1>,
- V6_vS32b_nt_pred_pi_128B_enc;
- def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B
- <"vmem", "vS32b_pi", 1, 1>,
- V6_vS32b_nt_npred_pi_128B_enc;
-}
-
-let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pred_pi_enc;
- def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>,
- V6_vS32Ub_npred_pi_enc;
- // 128B
- def V6_vS32Ub_pred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">,
- V6_vS32Ub_pred_pi_128B_enc;
- def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>,
- V6_vS32Ub_npred_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with immediate offset - byte-enabled aligned
-//===----------------------------------------------------------------------===//
-let addrMode = PostInc in
-class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0,
- bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">;
-
-let accessSize = Vector64Access in
-class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0>
- : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>;
-
-def V6_vS32b_qpred_pi : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc;
-def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc;
-// 128B
-def V6_vS32b_qpred_pi_128B : T_vstore_qpred_pi_128B,
- V6_vS32b_qpred_pi_128B_enc;
-def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>,
- V6_vS32b_nqpred_pi_128B_enc;
-
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_qpred_pi : T_vstore_qpred_pi_64B <0, 1>,
- V6_vS32b_nt_qpred_pi_enc;
- def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>,
- V6_vS32b_nt_nqpred_pi_enc;
- // 128B
- def V6_vS32b_nt_qpred_pi_128B : T_vstore_qpred_pi_128B<0, 1>,
- V6_vS32b_nt_qpred_pi_128B_enc;
- def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>,
- V6_vS32b_nt_nqpred_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional .new vector stores with immediate offset
-//===----------------------------------------------------------------------===//
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
- isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in
-class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC,
- bit isPredNot, bit isNT>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
- "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
- #!if(isNT, ":nt", "")#" = $src4.new", [],
- "$src2 = $_dst_"> , NewValueRel {
- let isPredicatedFalse = isPredNot;
- let BaseOpcode = baseOp;
-}
-
-let accessSize = Vector64Access in
-class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
-
-let isCodeGenOnly = 1, accessSize = Vector128Access in
-class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
- : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B,
- isPredNot, isNT>;
-
-def V6_vS32b_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi">,
- V6_vS32b_new_pred_pi_enc;
-def V6_vS32b_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>,
- V6_vS32b_new_npred_pi_enc;
-// 128B
-def V6_vS32b_new_pred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi">,
- V6_vS32b_new_pred_pi_128B_enc;
-def V6_vS32b_new_npred_pi_128B : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>,
- V6_vS32b_new_npred_pi_128B_enc;
-let isNonTemporal = 1 in {
- def V6_vS32b_nt_new_pred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>,
- V6_vS32b_nt_new_pred_pi_enc;
- def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>,
- V6_vS32b_nt_new_npred_pi_enc;
- // 128B
- def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B
- <"vS32b_pi", 0, 1>,
- V6_vS32b_nt_new_pred_pi_128B_enc;
- def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B
- <"vS32b_pi", 1, 1>,
- V6_vS32b_nt_new_npred_pi_128B_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector loads with register offset
-//===----------------------------------------------------------------------===//
-let hasNewValue = 1 in
-class T_vload_ppu<string asmStr>
- : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2), asmStr, [],
- "$src1 = $_dst_">, NewValueRel;
-
-let isCVLoadable = 1 in {
- def V6_vL32b_ppu : T_vload_ppu <"$dst = vmem($src1++$src2)">,
- V6_vL32b_ppu_enc;
- def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">,
- V6_vL32b_nt_ppu_enc;
-}
-
-let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in
-def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">,
- V6_vL32Ub_ppu_enc;
-
-let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in {
- def V6_vL32b_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">,
- V6_vL32b_cur_ppu_enc;
- def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">,
- V6_vL32b_nt_cur_ppu_enc;
-}
-
-let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
- def V6_vL32b_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">,
- V6_vL32b_tmp_ppu_enc;
- def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">,
- V6_vL32b_nt_tmp_ppu_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with register offset
-//===----------------------------------------------------------------------===//
-let isPredicable = 1 in
-class T_vstore_ppu <string mnemonic, bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
- mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
- "$src1 = $_dst_">, NewValueRel;
-
-let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
- def V6_vS32b_ppu : T_vstore_ppu <"vmem">,
- V6_vS32b_ppu_enc;
- let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in
- def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>,
- V6_vS32b_nt_ppu_enc;
-}
-
-let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in
-def V6_vS32Ub_ppu : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc;
-
-//===----------------------------------------------------------------------===//
-// Post increment .new vector stores with register offset
-//===----------------------------------------------------------------------===//
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
- isPredicable = 1, opNewValue = 3, isNVStore = 1 in
-class T_vstore_new_ppu <bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
- "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
- "$src1 = $_dst_">, NewValueRel;
-
-let BaseOpcode = "vS32b_ppu" in
-def V6_vS32b_new_ppu : T_vstore_new_ppu, V6_vS32b_new_ppu_enc;
-
-let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in
-def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc;
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional .new vector stores with register offset
-//===----------------------------------------------------------------------===//
-let isPredicated = 1 in
-class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0>
- : V6_STInst<(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">, NewValueRel {
- let isPredicatedFalse = isPredNot;
-}
-
-let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
- def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc;
- def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc;
-}
-
-let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
- def V6_vS32b_nt_pred_ppu : T_vstore_pred_ppu <"vmem", 0, 1>,
- V6_vS32b_nt_pred_ppu_enc;
- def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>,
- V6_vS32b_nt_npred_ppu_enc;
-}
-
-let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU,
- Type = TypeCVI_VM_STU in {
- def V6_vS32Ub_pred_ppu : T_vstore_pred_ppu <"vmemu">,
- V6_vS32Ub_pred_ppu_enc;
- def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>,
- V6_vS32Ub_npred_ppu_enc;
-}
-
-//===----------------------------------------------------------------------===//
-// Post increment vector stores with register offset - byte-enabled aligned
-//===----------------------------------------------------------------------===//
-class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
- "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
- #!if(isNT, ":nt", "")#" = $src4", [],
- "$src2 = $_dst_">, NewValueRel;
-
-def V6_vS32b_qpred_ppu : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc;
-def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc;
-def V6_vS32b_nt_qpred_ppu : T_vstore_qpred_ppu<0, 1>,
- V6_vS32b_nt_qpred_ppu_enc;
-def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>,
- V6_vS32b_nt_nqpred_ppu_enc;
-
-//===----------------------------------------------------------------------===//
-// Post increment conditional .new vector stores with register offset
-//===----------------------------------------------------------------------===//
-let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
- isNewValue = 1, opNewValue = 4, isNVStore = 1 in
-class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0>
- : V6_STInst <(outs IntRegs:$_dst_),
- (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
- "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
- #!if(isNT, ":nt", "")#" = $src4.new", [],
- "$src2 = $_dst_">, NewValueRel {
- let isPredicatedFalse = isPredNot;
-}
-
-let BaseOpcode = "vS32b_ppu" in {
- def V6_vS32b_new_pred_ppu : T_vstore_new_pred_ppu,
- V6_vS32b_new_pred_ppu_enc;
- def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>,
- V6_vS32b_new_npred_ppu_enc;
-}
-
-let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
-def V6_vS32b_nt_new_pred_ppu : T_vstore_new_pred_ppu<0, 1>,
- V6_vS32b_nt_new_pred_ppu_enc;
-def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
- V6_vS32b_nt_new_npred_ppu_enc;
-}
-
-
-// Vector load/store pseudos
-
-let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
-class STrivv_template<RegisterClass RC>
- : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>;
-
-def PS_vstorerw_ai: STrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vstorerwu_ai: STrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-
-
-let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
-class LDrivv_template<RegisterClass RC>
- : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>;
-
-def PS_vloadrw_ai: LDrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vloadrwu_ai: LDrivv_template<VecDblRegs>,
- Requires<[HasV60T,UseHVXSgl]>;
-def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>,
- Requires<[HasV60T,UseHVXDbl]>;
-
-// Store vector predicate pseudo.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
- isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
- def PS_vstorerq_ai : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_vstorerq_ai_128B : STInst<(outs),
- (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXDbl]>;
-}
-
-// Load vector predicate pseudo.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
- opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
- def PS_vloadrq_ai : LDInst<(outs VecPredRegs:$dst),
- (ins IntRegs:$base, s32_0Imm:$offset),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_vloadrq_ai_128B : LDInst<(outs VecPredRegs128B:$dst),
- (ins IntRegs:$base, s32_0Imm:$offset),
- ".error \"should not emit\"", []>,
- Requires<[HasV60T,UseHVXDbl]>;
-}
-
-class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "", InstrItinClass itin = CVI_VA_DV,
- IType type = TypeCVI_VA_DV>
- : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
-
-let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
- def PS_vselect: VSELInst<(outs VectorRegs:$dst),
- (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), "", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst),
- (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3),
- "", []>, Requires<[HasV60T,UseHVXDbl]>;
- def PS_wselect: VSELInst<(outs VecDblRegs:$dst),
- (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), "", []>,
- Requires<[HasV60T,UseHVXSgl]>;
- def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst),
- (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3),
- "", []>, Requires<[HasV60T,UseHVXDbl]>;
-}
-
-let hasNewValue = 1 in
-class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
- asmString >;
-
-multiclass T_vmpy <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_vmpy <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_vmpy_VV <string asmString>:
- T_vmpy <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_vmpy_WW <string asmString>:
- T_vmpy <asmString, VecDblRegs, VecDblRegs>;
-
-multiclass T_vmpy_VW <string asmString>:
- T_vmpy <asmString, VectorRegs, VecDblRegs>;
-
-multiclass T_vmpy_WV <string asmString>:
- T_vmpy <asmString, VecDblRegs, VectorRegs>;
-
-defm V6_vtmpyb :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc;
-defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc;
-defm V6_vdsaduh :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc;
-defm V6_vmpybus :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc;
-defm V6_vmpabus :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc;
-defm V6_vmpahb :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc;
-defm V6_vmpyh :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc;
-defm V6_vmpyuh :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc;
-defm V6_vmpyiwh :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc;
-defm V6_vtmpyhb :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc;
-defm V6_vmpyub :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc;
-
-let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in
-defm V6_vmpyihb :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc;
-
-defm V6_vdmpybus_dv :
- T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc;
-defm V6_vdmpyhsusat :
- T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc;
-defm V6_vdmpyhsuisat :
- T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc;
-defm V6_vdmpyhsat :
- T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc;
-defm V6_vdmpyhisat :
- T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc;
-defm V6_vdmpyhb_dv :
- T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc;
-defm V6_vmpyhss :
- T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc;
-defm V6_vmpyhsrs :
- T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc;
-
-let Itinerary = CVI_VP, Type = TypeCVI_VP in
-defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc;
-
-let Itinerary = CVI_VX, Type = TypeCVI_VX in {
-defm V6_vdmpyhb : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc;
-defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc;
-defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc;
-defm V6_vmpyiwb : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc;
-defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vasrw : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc;
-defm V6_vasrh : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc;
-defm V6_vaslw : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc;
-defm V6_vaslh : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc;
-defm V6_vlsrw : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc;
-defm V6_vlsrh : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc;
-}
-
-let hasNewValue = 1 in
-class T_HVX_alu <string asmString, InstrItinClass itin,
- RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
- asmString >{
- let Itinerary = itin;
- let Type = !cast<IType>("Type"#itin);
-}
-
-multiclass T_HVX_alu <string asmString, RegisterClass RCout,
- RegisterClass RCin, InstrItinClass itin> {
- def NAME : T_HVX_alu <asmString, itin, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_alu <asmString, itin,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_alu_VV <string asmString>:
- T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>;
-
-multiclass T_HVX_alu_WW <string asmString>:
- T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>;
-
-multiclass T_HVX_alu_WV <string asmString>:
- T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>;
-
-
-let Itinerary = CVI_VX, Type = TypeCVI_VX in {
-defm V6_vrmpyubv :
- T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc;
-defm V6_vrmpybv :
- T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc;
-defm V6_vrmpybusv :
- T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc;
-defm V6_vabsdiffub :
- T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc;
-defm V6_vabsdiffh :
- T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc;
-defm V6_vabsdiffuh :
- T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc;
-defm V6_vabsdiffw :
- T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc;
-}
-
-let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
-defm V6_vdmpyhvsat :
- T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc;
-defm V6_vmpyhvsrs :
- T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc;
-defm V6_vmpyih :
- T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc;
-}
-
-defm V6_vand :
- T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc;
-defm V6_vor :
- T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc;
-defm V6_vxor :
- T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc;
-defm V6_vaddw :
- T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc;
-defm V6_vaddubsat :
- T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc;
-defm V6_vadduhsat :
- T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc;
-defm V6_vaddhsat :
- T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc;
-defm V6_vaddwsat :
- T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc;
-defm V6_vsubb :
- T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc;
-defm V6_vsubh :
- T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc;
-defm V6_vsubw :
- T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc;
-defm V6_vsububsat :
- T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc;
-defm V6_vsubuhsat :
- T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc;
-defm V6_vsubhsat :
- T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc;
-defm V6_vsubwsat :
- T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc;
-defm V6_vavgub :
- T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc;
-defm V6_vavguh :
- T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc;
-defm V6_vavgh :
- T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc;
-defm V6_vavgw :
- T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc;
-defm V6_vnavgub :
- T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc;
-defm V6_vnavgh :
- T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc;
-defm V6_vnavgw :
- T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc;
-defm V6_vavgubrnd :
- T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc;
-defm V6_vavguhrnd :
- T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc;
-defm V6_vavghrnd :
- T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc;
-defm V6_vavgwrnd :
- T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc;
-
-defm V6_vmpybv :
- T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc;
-defm V6_vmpyubv :
- T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc;
-defm V6_vmpybusv :
- T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc;
-defm V6_vmpyhv :
- T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc;
-defm V6_vmpyuhv :
- T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc;
-defm V6_vmpyhus :
- T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc;
-defm V6_vaddubh :
- T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc;
-defm V6_vadduhw :
- T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc;
-defm V6_vaddhw :
- T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc;
-defm V6_vsububh :
- T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc;
-defm V6_vsubuhw :
- T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc;
-defm V6_vsubhw :
- T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc;
-
-defm V6_vaddb_dv :
- T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc;
-defm V6_vaddh_dv :
- T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc;
-defm V6_vaddw_dv :
- T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc;
-defm V6_vaddubsat_dv :
- T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc;
-defm V6_vadduhsat_dv :
- T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc;
-defm V6_vaddhsat_dv :
- T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc;
-defm V6_vaddwsat_dv :
- T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc;
-defm V6_vsubb_dv :
- T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc;
-defm V6_vsubh_dv :
- T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc;
-defm V6_vsubw_dv :
- T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc;
-defm V6_vsububsat_dv :
- T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc;
-defm V6_vsubuhsat_dv :
- T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc;
-defm V6_vsubhsat_dv :
- T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc;
-defm V6_vsubwsat_dv :
- T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc;
-
-let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in {
-defm V6_vmpabusv :
- T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc;
-defm V6_vmpabuuv :
- T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc;
-}
-
-let isAccumulator = 1, hasNewValue = 1 in
-class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout,
- RegisterClass RCin1, RegisterClass RCin2>
- : CVI_VA_Resource1 <(outs RCout:$dst),
- (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString,
- [], "$dst = $_src_" > {
- let Itinerary = itin;
- let Type = !cast<IType>("Type"#itin);
-}
-
-multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout,
- RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > {
- def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vmpyacc <asmString, itin,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin1#"128B"),
- !cast<RegisterClass>(RCin2#
- !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>;
-}
-
-multiclass T_HVX_vmpyacc_VVR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>;
-
-multiclass T_HVX_vmpyacc_VWR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_WVR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_WWR <string asmString>:
- T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_VVV <string asmString>:
- T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
-
-multiclass T_HVX_vmpyacc_WVV <string asmString>:
- T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
-
-
-defm V6_vtmpyb_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">,
- V6_vtmpyb_acc_enc;
-defm V6_vtmpybus_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">,
- V6_vtmpybus_acc_enc;
-defm V6_vtmpyhb_acc :
- T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">,
- V6_vtmpyhb_acc_enc;
-defm V6_vdmpyhb_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">,
- V6_vdmpyhb_acc_enc;
-defm V6_vrmpyub_acc :
- T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
- V6_vrmpyub_acc_enc;
-defm V6_vrmpybus_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">,
- V6_vrmpybus_acc_enc;
-defm V6_vdmpybus_acc :
- T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
- V6_vdmpybus_acc_enc;
-defm V6_vdmpybus_dv_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
- V6_vdmpybus_dv_acc_enc;
-defm V6_vdmpyhsuisat_acc :
- T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">,
- V6_vdmpyhsuisat_acc_enc;
-defm V6_vdmpyhisat_acc :
- T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
- V6_vdmpyhisat_acc_enc;
-defm V6_vdmpyhb_dv_acc :
- T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">,
- V6_vdmpyhb_dv_acc_enc;
-defm V6_vmpybus_acc :
- T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">,
- V6_vmpybus_acc_enc;
-defm V6_vmpabus_acc :
- T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">,
- V6_vmpabus_acc_enc;
-defm V6_vmpahb_acc :
- T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">,
- V6_vmpahb_acc_enc;
-defm V6_vmpyhsat_acc :
- T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">,
- V6_vmpyhsat_acc_enc;
-defm V6_vmpyuh_acc :
- T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
- V6_vmpyuh_acc_enc;
-defm V6_vmpyiwb_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">,
- V6_vmpyiwb_acc_enc;
-defm V6_vdsaduh_acc :
- T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">,
- V6_vdsaduh_acc_enc;
-defm V6_vmpyihb_acc :
- T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">,
- V6_vmpyihb_acc_enc;
-defm V6_vmpyub_acc :
- T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
- V6_vmpyub_acc_enc;
-
-let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
-defm V6_vdmpyhsusat_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">,
- V6_vdmpyhsusat_acc_enc;
-defm V6_vdmpyhsat_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
- V6_vdmpyhsat_acc_enc;
-defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR
- <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vaslw_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc;
-defm V6_vasrw_acc :
- T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc;
-}
-
-defm V6_vdmpyhvsat_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
- V6_vdmpyhvsat_acc_enc;
-defm V6_vmpybusv_acc :
- T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">,
- V6_vmpybusv_acc_enc;
-defm V6_vmpybv_acc :
- T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc;
-defm V6_vmpyhus_acc :
- T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc;
-defm V6_vmpyhv_acc :
- T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc;
-defm V6_vmpyiewh_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">,
- V6_vmpyiewh_acc_enc;
-defm V6_vmpyiewuh_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">,
- V6_vmpyiewuh_acc_enc;
-defm V6_vmpyih_acc :
- T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc;
-defm V6_vmpyowh_rnd_sacc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">,
- V6_vmpyowh_rnd_sacc_enc;
-defm V6_vmpyowh_sacc :
- T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">,
- V6_vmpyowh_sacc_enc;
-defm V6_vmpyubv_acc :
- T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
- V6_vmpyubv_acc_enc;
-defm V6_vmpyuhv_acc :
- T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
- V6_vmpyuhv_acc_enc;
-defm V6_vrmpybusv_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">,
- V6_vrmpybusv_acc_enc;
-defm V6_vrmpybv_acc :
- T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc;
-defm V6_vrmpyubv_acc :
- T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
- V6_vrmpyubv_acc_enc;
-
-
-class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString,
- [], "$dst = $_src_" > {
- let Itinerary = CVI_VA;
- let Type = TypeCVI_VA;
-}
-
-multiclass T_HVX_vcmp <string asmString> {
- def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>;
-}
-
-defm V6_veqb_and :
- T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc;
-defm V6_veqh_and :
- T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc;
-defm V6_veqw_and :
- T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc;
-defm V6_vgtb_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc;
-defm V6_vgth_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc;
-defm V6_vgtw_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc;
-defm V6_vgtub_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc;
-defm V6_vgtuh_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc;
-defm V6_vgtuw_and :
- T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc;
-defm V6_veqb_or :
- T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc;
-defm V6_veqh_or :
- T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc;
-defm V6_veqw_or :
- T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc;
-defm V6_vgtb_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc;
-defm V6_vgth_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc;
-defm V6_vgtw_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc;
-defm V6_vgtub_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc;
-defm V6_vgtuh_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc;
-defm V6_vgtuw_or :
- T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc;
-defm V6_veqb_xor :
- T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc;
-defm V6_veqh_xor :
- T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc;
-defm V6_veqw_xor :
- T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc;
-defm V6_vgtb_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc;
-defm V6_vgth_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc;
-defm V6_vgtw_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc;
-defm V6_vgtub_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc;
-defm V6_vgtuh_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc;
-defm V6_vgtuw_xor :
- T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc;
-
-defm V6_vminub :
- T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc;
-defm V6_vminuh :
- T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc;
-defm V6_vminh :
- T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc;
-defm V6_vminw :
- T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc;
-defm V6_vmaxub :
- T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc;
-defm V6_vmaxuh :
- T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc;
-defm V6_vmaxh :
- T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc;
-defm V6_vmaxw :
- T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc;
-defm V6_vshuffeb :
- T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc;
-defm V6_vshuffob :
- T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc;
-defm V6_vshufeh :
- T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc;
-defm V6_vshufoh :
- T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc;
-
-let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
-defm V6_vmpyowh_rnd :
- T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">,
- V6_vmpyowh_rnd_enc;
-defm V6_vmpyiewuh :
- T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc;
-defm V6_vmpyewuh :
- T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc;
-defm V6_vmpyowh :
- T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc;
-defm V6_vmpyiowh :
- T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc;
-}
-let Itinerary = CVI_VX, Type = TypeCVI_VX in
-defm V6_vmpyieoh :
- T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc;
-
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in {
-defm V6_vshufoeh :
- T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc;
-defm V6_vshufoeb :
- T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc;
-}
-
-let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
-defm V6_vcombine :
- T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
-
-let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
-defm V6_vsathub :
- T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
-defm V6_vsatwh :
- T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vroundwh :
- T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc;
-defm V6_vroundwuh :
- T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc;
-defm V6_vroundhb :
- T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc;
-defm V6_vroundhub :
- T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc;
-defm V6_vasrwv :
- T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc;
-defm V6_vlsrwv :
- T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc;
-defm V6_vlsrhv :
- T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc;
-defm V6_vasrhv :
- T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc;
-defm V6_vaslwv :
- T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc;
-defm V6_vaslhv :
- T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc;
-}
-
-defm V6_vaddb :
- T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc;
-defm V6_vaddh :
- T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc;
-
-let Itinerary = CVI_VP, Type = TypeCVI_VP in {
-defm V6_vdelta :
- T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc;
-defm V6_vrdelta :
- T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc;
-defm V6_vdealb4w :
- T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc;
-defm V6_vpackeb :
- T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc;
-defm V6_vpackeh :
- T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc;
-defm V6_vpackhub_sat :
- T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc;
-defm V6_vpackhb_sat :
- T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc;
-defm V6_vpackwuh_sat :
- T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc;
-defm V6_vpackwh_sat :
- T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc;
-defm V6_vpackob :
- T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc;
-defm V6_vpackoh :
- T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
-}
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
- : CVI_VA_Resource1 <(outs RC2:$dst),
- (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString,
- [], "$dst = $_src_" > {
- let Itinerary = CVI_VA;
- let Type = TypeCVI_VA;
-}
-
-multiclass T_HVX_condALU <string asmString> {
- def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>;
-}
-
-defm V6_vaddbq : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">,
- V6_vaddbq_enc;
-defm V6_vaddhq : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">,
- V6_vaddhq_enc;
-defm V6_vaddwq : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">,
- V6_vaddwq_enc;
-defm V6_vsubbq : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">,
- V6_vsubbq_enc;
-defm V6_vsubhq : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">,
- V6_vsubhq_enc;
-defm V6_vsubwq : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">,
- V6_vsubwq_enc;
-defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">,
- V6_vaddbnq_enc;
-defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">,
- V6_vaddhnq_enc;
-defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">,
- V6_vaddwnq_enc;
-defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">,
- V6_vsubbnq_enc;
-defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">,
- V6_vsubhnq_enc;
-defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">,
- V6_vsubwnq_enc;
-
-let hasNewValue = 1 in
-class T_HVX_alu_2op <string asmString, InstrItinClass itin,
- RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1),
- asmString >{
- let Itinerary = itin;
- let Type = !cast<IType>("Type"#itin);
-}
-
-multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout,
- RegisterClass RCin, InstrItinClass itin> {
- def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_alu_2op <asmString, itin,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-let hasNewValue = 1 in
-multiclass T_HVX_alu_2op_VV <string asmString>:
- T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>;
-
-multiclass T_HVX_alu_2op_WV <string asmString>:
- T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>;
-
-
-defm V6_vabsh : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">,
- V6_vabsh_enc;
-defm V6_vabsw : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">,
- V6_vabsw_enc;
-defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">,
- V6_vabsh_sat_enc;
-defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">,
- V6_vabsw_sat_enc;
-defm V6_vnot : T_HVX_alu_2op_VV <"$dst = vnot($src1)">,
- V6_vnot_enc;
-defm V6_vassign : T_HVX_alu_2op_VV <"$dst = $src1">,
- V6_vassign_enc;
-
-defm V6_vzb : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">,
- V6_vzb_enc;
-defm V6_vzh : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">,
- V6_vzh_enc;
-defm V6_vsb : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">,
- V6_vsb_enc;
-defm V6_vsh : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">,
- V6_vsh_enc;
-
-let Itinerary = CVI_VP, Type = TypeCVI_VP in {
-defm V6_vdealh : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">,
- V6_vdealh_enc;
-defm V6_vdealb : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">,
- V6_vdealb_enc;
-defm V6_vshuffh : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">,
- V6_vshuffh_enc;
-defm V6_vshuffb : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">,
- V6_vshuffb_enc;
-}
-
-let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in {
-defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">,
- V6_vunpackub_enc;
-defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">,
- V6_vunpackuh_enc;
-defm V6_vunpackb : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">,
- V6_vunpackb_enc;
-defm V6_vunpackh : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">,
- V6_vunpackh_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vcl0w : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">,
- V6_vcl0w_enc;
-defm V6_vcl0h : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">,
- V6_vcl0h_enc;
-defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">,
- V6_vnormamtw_enc;
-defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">,
- V6_vnormamth_enc;
-defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">,
- V6_vpopcounth_enc;
-}
-
-let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
- Type = TypeCVI_VX_DV in
-class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
- : CVI_VA_Resource1 <(outs RC:$dst),
- (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1_0Imm:$src3),
- asmString, [], "$dst = $_src_" > ;
-
-
-multiclass T_HVX_vmpyacc2 <string asmString> {
- def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>;
-}
-
-defm V6_vrmpybusi_acc :
- T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">,
- V6_vrmpybusi_acc_enc;
-defm V6_vrsadubi_acc :
- T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">,
- V6_vrsadubi_acc_enc;
-defm V6_vrmpyubi_acc :
- T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">,
- V6_vrmpyubi_acc_enc;
-
-
-let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
-class T_HVX_vmpy2 <string asmString, RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1_0Imm:$src3),
- asmString>;
-
-
-multiclass T_HVX_vmpy2 <string asmString> {
- def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>;
-}
-
-defm V6_vrmpybusi :
- T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc;
-defm V6_vrsadubi :
- T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc;
-defm V6_vrmpyubi :
- T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc;
-
-
-let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS,
- hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in
-class T_HVX_perm <string asmString, RegisterClass RC>
- : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_),
- (ins RC:$src1, RC:$src2, IntRegs:$src3),
- asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >;
-
-multiclass T_HVX_perm <string asmString> {
- def NAME : T_HVX_perm <asmString, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>;
-}
-
-let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in {
- defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc;
- defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc;
-}
-
-// Conditional vector move.
-let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_HVX_cmov <bit isPredNot, RegisterClass RC>
- : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2),
- "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> {
- let isPredicatedFalse = isPredNot;
-}
-
-multiclass T_HVX_cmov <bit isPredNot = 0> {
- def NAME : T_HVX_cmov <isPredNot, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>;
-}
-
-defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc;
-defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc;
-
-// Conditional vector combine.
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1,
- hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 < (outs RCout:$dst),
- (ins PredRegs:$src1, RCin:$src2, RCin:$src3),
- "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> {
- let isPredicatedFalse = isPredNot;
-}
-
-multiclass T_HVX_ccombine <bit isPredNot = 0> {
- def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>;
-}
-
-defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc;
-defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc;
-
-let hasNewValue = 1 in
-class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_DV_Resource1<(outs RCout:$dst),
- (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
- asmString >;
-
-multiclass T_HVX_shift <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_HVX_shift <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_shift_VV <string asmString>:
- T_HVX_shift <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_HVX_shift_WV <string asmString>:
- T_HVX_shift <asmString, VecDblRegs, VectorRegs>;
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in {
-defm V6_valignb :
- T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc;
-defm V6_vlalignb :
- T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc;
-}
-
-let Itinerary = CVI_VS, Type = TypeCVI_VS in {
-defm V6_vasrwh :
- T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc;
-defm V6_vasrwhsat :
- T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">,
- V6_vasrwhsat_enc;
-defm V6_vasrwhrndsat :
- T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">,
- V6_vasrwhrndsat_enc;
-defm V6_vasrwuhsat :
- T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">,
- V6_vasrwuhsat_enc;
-defm V6_vasrhubsat :
- T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">,
- V6_vasrhubsat_enc;
-defm V6_vasrhubrndsat :
- T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">,
- V6_vasrhubrndsat_enc;
-defm V6_vasrhbrndsat :
- T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">,
- V6_vasrhbrndsat_enc;
-}
-
-// Assembler mapped -- alias?
-//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc;
-let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in {
-defm V6_vshuffvdd :
- T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc;
-defm V6_vdealvdd :
- T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc;
-}
-
-let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in
-class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1),
- asmString, [], "$dst = $_src_">;
-
-multiclass T_HVX_unpack <string asmString> {
- def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>;
-}
-
-defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc;
-defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc;
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
- hasSideEffects = 0 in
-class T_HVX_valign <string asmString, RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3_0Imm:$src3),
- asmString>;
-
-multiclass T_HVX_valign <string asmString> {
- def NAME : T_HVX_valign <asmString, VectorRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>;
-}
-
-defm V6_valignbi :
- T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc;
-defm V6_vlalignbi :
- T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc;
-
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
-class T_HVX_predAlu <string asmString, RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2),
- asmString>;
-
-multiclass T_HVX_predAlu <string asmString> {
- def NAME : T_HVX_predAlu <asmString, VecPredRegs>;
-
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>;
-}
-
-defm V6_pred_and : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc;
-defm V6_pred_or : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc;
-defm V6_pred_xor : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc;
-defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc;
-defm V6_pred_and_n :
- T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc;
-
-let Itinerary = CVI_VA, Type = TypeCVI_VA in
-class T_HVX_prednot <RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1),
- "$dst = not($src1)">, V6_pred_not_enc;
-
-def V6_pred_not : T_HVX_prednot <VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>;
-
-let Itinerary = CVI_VA, Type = TypeCVI_VA in
-class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
- asmString >;
-
-multiclass T_HVX_vcmp2 <string asmString> {
- def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>;
-}
-
-defm V6_veqb : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc;
-defm V6_veqh : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc;
-defm V6_veqw : T_HVX_vcmp2 <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc;
-defm V6_vgtb : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc;
-defm V6_vgth : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc;
-defm V6_vgtw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc;
-defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc;
-defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc;
-defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc;
-
-let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in
-class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
- "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc;
-
-def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>;
-
-let isAccumulator = 1 in
-class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
- "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc;
-
-def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>;
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst),
- (ins RCin:$src1, IntRegs:$src2),
- "$dst = vand($src1,$src2)" >, V6_vandqrt_enc;
-
-def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>;
-
-let hasNewValue = 1, hasSideEffects = 0 in
-class T_V6_lvsplatw <RegisterClass RC>
- : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1),
- "$dst = vsplat($src1)" >, V6_lvsplatw_enc;
-
-def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>;
-
-
-let hasNewValue = 1 in
-class T_V6_vinsertwr <RegisterClass RC>
- : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1),
- "$dst.w = vinsert($src1)", [], "$dst = $_src_">,
- V6_vinsertwr_enc;
-
-def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>;
-
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in
-class T_V6_pred_scalar2 <RegisterClass RC>
- : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1),
- "$dst = vsetq($src1)">, V6_pred_scalar2_enc;
-
-def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>;
-let isCodeGenOnly = 1 in
-def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>;
-
-class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin>
- : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
- "$dst = vand($src1,$src2)">, V6_vandvrt_enc;
-
-def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>;
-
-let validSubTargets = HasV60SubT in
-class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
- : SInst2 <(outs RC:$dst), (ins RC:$src1, ImmOp:$src2), asmString>;
-
-class T_HVX_rol_R <string asmString>
- : T_HVX_rol <asmString, IntRegs, u5_0Imm>;
-class T_HVX_rol_P <string asmString>
- : T_HVX_rol <asmString, DoubleRegs, u6_0Imm>;
-
-def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
-let hasNewValue = 1, opNewValue = 0 in
-def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc;
-
-let validSubTargets = HasV60SubT in
-class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
- : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2),
- asmString, [], "$dst = $_src_" >;
-
-class T_HVX_rol_acc_P <string asmString>
- : T_HVX_rol_acc <asmString, DoubleRegs, u6_0Imm>;
-
-class T_HVX_rol_acc_R <string asmString>
- : T_HVX_rol_acc <asmString, IntRegs, u5_0Imm>;
-
-def S6_rol_i_p_nac :
- T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
-def S6_rol_i_p_acc :
- T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc;
-def S6_rol_i_p_and :
- T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc;
-def S6_rol_i_p_or :
- T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc;
-def S6_rol_i_p_xacc :
- T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc;
-
-let hasNewValue = 1, opNewValue = 0 in {
-def S6_rol_i_r_nac :
- T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc;
-def S6_rol_i_r_acc :
- T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc;
-def S6_rol_i_r_and :
- T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc;
-def S6_rol_i_r_or :
- T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc;
-def S6_rol_i_r_xacc :
- T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc;
-}
-
-let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in
-class T_V6_extractw <RegisterClass RC>
- : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2),
- "$dst = vextract($src1,$src2)">, V6_extractw_enc;
-
-def V6_extractw : T_V6_extractw <VectorRegs>;
-let isCodeGenOnly = 1 in
-def V6_extractw_128B : T_V6_extractw <VectorRegs128B>;
-
-let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT in
-class T_sys0op <string asmString>
- : ST1Inst <(outs), (ins), asmString>;
-
-let isSolo = 1, validSubTargets = HasV55SubT in {
-def Y5_l2gunlock : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc;
-def Y5_l2gclean : T_sys0op <"l2gclean">, Y5_l2gclean_enc;
-def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc;
-}
-
-class T_sys1op <string asmString, RegisterClass RC>
- : ST1Inst <(outs), (ins RC:$src1), asmString>;
-
-class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>;
-class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>;
-
-let isSoloAX = 1, validSubTargets = HasV55SubT in
-def Y5_l2unlocka : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc;
-
-let isSolo = 1, validSubTargets = HasV60SubT in {
-def Y6_l2gcleanpa : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc;
-def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc;
-}
-
-let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1,
- validSubTargets = HasV55SubT in
-def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1),
- "$dst = l2locka($src1)">, Y5_l2locka_enc;
-
-// not defined on etc side. why?
-// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc;
-
-let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1,
- hasSideEffects = 0,
-validSubTargets = HasV55SubT in
-def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2),
- (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2),
- "$dst1,$dst2 = vacsh($src1,$src2)", [],
- "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc;
-
-let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1,
- hasSideEffects = 0 in
-class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1,
- RegisterClass RCin2>
- : CVI_VA_Resource1<(outs RCout:$dst),
- (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>;
-
-multiclass T_HVX_alu2 <string asmString, RegisterClass RC > {
- def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"),
- VecPredRegs128B, VectorRegs128B>;
-}
-
-multiclass T_HVX_alu2_V <string asmString> :
- T_HVX_alu2 <asmString, VectorRegs>;
-
-multiclass T_HVX_alu2_W <string asmString> :
- T_HVX_alu2 <asmString, VecDblRegs>;
-
-defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc;
-
-let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1,
- hasSideEffects = 0 in
-defm V6_vmux : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc;
-
-class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin>
- : CVI_VA_Resource1<(outs RCout:$dst),
- (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>;
-
-multiclass T_HVX_vlutb <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_HVX_vlutb <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_vlutb_V <string asmString> :
- T_HVX_vlutb <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_HVX_vlutb_W <string asmString> :
- T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>;
-
-let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in
-class T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
- RegisterClass RCin>
- : CVI_VA_Resource1<(outs RCout:$dst),
- (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
- asmString, [], "$dst = $_src_">;
-
-multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
- RegisterClass RCin> {
- def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>;
- let isCodeGenOnly = 1 in
- def NAME#_128B : T_HVX_vlutb_acc<asmString,
- !cast<RegisterClass>(RCout#"128B"),
- !cast<RegisterClass>(RCin#"128B")>;
-}
-
-multiclass T_HVX_vlutb_acc_V <string asmString> :
- T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>;
-
-multiclass T_HVX_vlutb_acc_W <string asmString> :
- T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>;
-
-
-let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in
-defm V6_vlutvvb:
- T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc;
-
-let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in
-defm V6_vlutvwh:
- T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc;
-
-let hasNewValue = 1 in {
- defm V6_vlutvvb_oracc:
- T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">,
- V6_vlutvvb_oracc_enc;
- defm V6_vlutvwh_oracc:
- T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">,
- V6_vlutvwh_oracc_enc;
-}
-
-// It's a fake instruction and should not be defined?
-def S2_cabacencbin
- : SInst2<(outs DoubleRegs:$dst),
- (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
- "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc;
-
-// Vhist instructions
-def V6_vhistq
- : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1),
- "vhist($src1)">, V6_vhistq_enc;
-
-def V6_vhist
- : CVI_HIST_Resource1 <(outs), (ins),
- "vhist" >, V6_vhist_enc;
-
-
-let isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
- def V6_vd0: CVI_VA_Resource<(outs VectorRegs:$dst), (ins), "$dst = #0", []>;
- def V6_vd0_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst), (ins),
- "$dst = #0", []>;
-
- def V6_vassignp: CVI_VA_Resource<(outs VecDblRegs:$dst),
- (ins VecDblRegs:$src), "", []>;
- def V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
- (ins VecDblRegs128B:$src), "", []>;
-
- def V6_lo: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1),
- "", []>;
- def V6_lo_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst),
- (ins VecDblRegs128B:$src1), "", []>;
-
- def V6_hi: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1),
- "", []>;
- def V6_hi_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst),
- (ins VecDblRegs128B:$src1), "", []>;
-}
diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td
deleted file mode 100644
index e3520bd6e515..000000000000
--- a/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ /dev/null
@@ -1,69 +0,0 @@
-//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon Vector instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// Vector shift support. Vector shifting in Hexagon is rather different
-// from internal representation of LLVM.
-// LLVM assumes all shifts (in vector case) will have the form
-// <VT> = SHL/SRA/SRL <VT> by <VT>
-// while Hexagon has the following format:
-// <VT> = SHL/SRA/SRL <VT> by <IT/i32>
-// As a result, special care is needed to guarantee correctness and
-// performance.
-class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
- : S_2OpInstImm<Str, MajOp, MinOp, u4_0Imm, []> {
- bits<4> src2;
- let Inst{11-8} = src2;
-}
-
-class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
- : S_2OpInstImm<Str, MajOp, MinOp, u5_0Imm, []> {
- bits<5> src2;
- let Inst{12-8} = src2;
-}
-
-def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
-def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
-def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
-
-def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
-def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
-def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
-
-// Vector shift words by register
-def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
-def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
-def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
-def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
-
-// Vector shift halfwords by register
-def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
-def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
-def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
-def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
-
-
-// Hexagon doesn't have a vector multiply with C semantics.
-// Instead, generate a pseudo instruction that gets expaneded into two
-// scalar MPYI instructions.
-// This is expanded by ExpandPostRAPseudos.
-let isPseudo = 1 in
-def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd),
- (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>;
-
-let isPseudo = 1 in
-def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd),
- (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [],
- "$Rd = $Rx">;
-
-
-
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index d4f303bf6ff0..c611857ec26a 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -1347,6 +1347,25 @@ def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred, I32>;
def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred, I64>;
def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
+multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
+ def : Pat<(IntID VecPredRegs:$src1, IntRegs:$src2, VectorRegs:$src3),
+ (MI VecPredRegs:$src1, IntRegs:$src2, #0, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2,
+ VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1,
+ IntRegs:$src2, #0,
+ VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : MaskedStore <V6_vS32b_qpred_ai, int_hexagon_V6_vmaskedstoreq>;
+defm : MaskedStore <V6_vS32b_nqpred_ai, int_hexagon_V6_vmaskedstorenq>;
+defm : MaskedStore <V6_vS32b_nt_qpred_ai, int_hexagon_V6_vmaskedstorentq>;
+defm : MaskedStore <V6_vS32b_nt_nqpred_ai, int_hexagon_V6_vmaskedstorentnq>;
+
include "HexagonIntrinsicsV3.td"
include "HexagonIntrinsicsV4.td"
include "HexagonIntrinsicsV5.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index a45e1c9d7be4..f438b3e0368f 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -790,7 +790,7 @@ def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>;
defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>;
defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>;
-def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
+//def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
def: Pat<(v64i16 (trunc v64i32:$Vdd)),
(v64i16 (V6_vpackwh_sat_128B
diff --git a/lib/Target/Hexagon/HexagonIsetDx.td b/lib/Target/Hexagon/HexagonIsetDx.td
deleted file mode 100644
index ebedf2cbaf17..000000000000
--- a/lib/Target/Hexagon/HexagonIsetDx.td
+++ /dev/null
@@ -1,728 +0,0 @@
-//=- HexagonIsetDx.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon duplex instructions.
-//
-//===----------------------------------------------------------------------===//
-
-// SA1_combine1i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine1i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#1, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b01;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SL2_jumpr31_f: Indirect conditional jump if false.
-// SL2_jumpr31_f -> SL2_jumpr31_fnew
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_f: SUBInst <
- (outs ),
- (ins ),
- "if (!p0) jumpr r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b101;
- }
-
-// SL2_deallocframe: Deallocate stack frame.
-let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
-def SL2_deallocframe: SUBInst <
- (outs ),
- (ins ),
- "deallocframe"> {
- let Inst{12-6} = 0b1111100;
- let Inst{2} = 0b0;
- }
-
-// SL2_return_f: Deallocate stack frame and return.
-// SL2_return_f -> SL2_return_fnew
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_f: SUBInst <
- (outs ),
- (ins ),
- "if (!p0) dealloc_return"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b101;
- }
-
-// SA1_combine3i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine3i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#3, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b11;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SS2_storebi0: Store byte.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def SS2_storebi0: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0),
- "memb($Rs + #$u4_0)=#0"> {
- bits<4> Rs;
- bits<4> u4_0;
-
- let Inst{12-8} = 0b10010;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_0;
- }
-
-// SA1_clrtnew: Clear if true.
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrtnew: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if ($Pu.new) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b100;
- let Inst{3-0} = Rd;
- }
-
-// SL2_loadruh_io: Load half.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadruh_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u3_1Imm:$u3_1),
- "$Rd = memuh($Rs + #$u3_1)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<4> u3_1;
-
- let Inst{12-11} = 0b01;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_1{3-1};
- }
-
-// SL2_jumpr31_tnew: Indirect conditional jump if true.
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_tnew: SUBInst <
- (outs ),
- (ins ),
- "if (p0.new) jumpr:nt r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b110;
- }
-
-// SA1_addi: Add.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in
-def SA1_addi: SUBInst <
- (outs IntRegs:$Rx),
- (ins IntRegs:$_src_, s7_0Ext:$s7),
- "$Rx = add($_src_, #$s7)" ,
- [] ,
- "$_src_ = $Rx"> {
- bits<4> Rx;
- bits<7> s7;
-
- let Inst{12-11} = 0b00;
- let Inst{3-0} = Rx;
- let Inst{10-4} = s7;
- }
-
-// SL1_loadrub_io: Load byte.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
-def SL1_loadrub_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0),
- "$Rd = memub($Rs + #$u4_0)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<4> u4_0;
-
- let Inst{12} = 0b1;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_0;
- }
-
-// SL1_loadri_io: Load word.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL1_loadri_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2),
- "$Rd = memw($Rs + #$u4_2)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<6> u4_2;
-
- let Inst{12} = 0b0;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_2{5-2};
- }
-
-// SA1_cmpeqi: Compareimmed.
-let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_cmpeqi: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u2_0Imm:$u2),
- "p0 = cmp.eq($Rs, #$u2)"> {
- bits<4> Rs;
- bits<2> u2;
-
- let Inst{12-8} = 0b11001;
- let Inst{7-4} = Rs;
- let Inst{1-0} = u2;
- }
-
-// SA1_combinerz: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combinerz: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins IntRegs:$Rs),
- "$Rdd = combine($Rs, #0)"> {
- bits<3> Rdd;
- bits<4> Rs;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b1;
- let Inst{3} = 0b1;
- let Inst{2-0} = Rdd;
- let Inst{7-4} = Rs;
- }
-
-// SL2_return_t: Deallocate stack frame and return.
-// SL2_return_t -> SL2_return_tnew
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_t: SUBInst <
- (outs ),
- (ins ),
- "if (p0) dealloc_return"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b100;
- }
-
-// SS2_allocframe: Allocate stack frame.
-let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
-def SS2_allocframe: SUBInst <
- (outs ),
- (ins u5_3Imm:$u5_3),
- "allocframe(#$u5_3)"> {
- bits<8> u5_3;
-
- let Inst{12-9} = 0b1110;
- let Inst{8-4} = u5_3{7-3};
- }
-
-// SS2_storeh_io: Store half.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in
-def SS2_storeh_io: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt),
- "memh($Rs + #$u3_1) = $Rt"> {
- bits<4> Rs;
- bits<4> u3_1;
- bits<4> Rt;
-
- let Inst{12-11} = 0b00;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_1{3-1};
- let Inst{3-0} = Rt;
- }
-
-// SS2_storewi0: Store word.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS2_storewi0: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2),
- "memw($Rs + #$u4_2)=#0"> {
- bits<4> Rs;
- bits<6> u4_2;
-
- let Inst{12-8} = 0b10000;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_2{5-2};
- }
-
-// SS2_storewi1: Store word.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS2_storewi1: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2),
- "memw($Rs + #$u4_2)=#1"> {
- bits<4> Rs;
- bits<6> u4_2;
-
- let Inst{12-8} = 0b10001;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_2{5-2};
- }
-
-// SL2_jumpr31: Indirect conditional jump if true.
-let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31: SUBInst <
- (outs ),
- (ins ),
- "jumpr r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2} = 0b0;
- }
-
-// SA1_combinezr: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combinezr: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins IntRegs:$Rs),
- "$Rdd = combine(#0, $Rs)"> {
- bits<3> Rdd;
- bits<4> Rs;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b1;
- let Inst{3} = 0b0;
- let Inst{2-0} = Rdd;
- let Inst{7-4} = Rs;
- }
-
-// SL2_loadrh_io: Load half.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadrh_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u3_1Imm:$u3_1),
- "$Rd = memh($Rs + #$u3_1)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<4> u3_1;
-
- let Inst{12-11} = 0b00;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_1{3-1};
- }
-
-// SA1_addrx: Add.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_addrx: SUBInst <
- (outs IntRegs:$Rx),
- (ins IntRegs:$_src_, IntRegs:$Rs),
- "$Rx = add($_src_, $Rs)" ,
- [] ,
- "$_src_ = $Rx"> {
- bits<4> Rx;
- bits<4> Rs;
-
- let Inst{12-8} = 0b11000;
- let Inst{3-0} = Rx;
- let Inst{7-4} = Rs;
- }
-
-// SA1_setin1: Set to -1.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_setin1: SUBInst <
- (outs IntRegs:$Rd),
- (ins ),
- "$Rd = #{-1}"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6} = 0b0;
- let Inst{3-0} = Rd;
- }
-
-// SA1_sxth: Sxth.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_sxth: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = sxth($Rs)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10100;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_combine0i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine0i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#0, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b00;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SA1_combine2i: Combines.
-let isCodeGenOnly = 1, hasSideEffects = 0 in
-def SA1_combine2i: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u2_0Imm:$u2),
- "$Rdd = combine(#2, #$u2)"> {
- bits<3> Rdd;
- bits<2> u2;
-
- let Inst{12-10} = 0b111;
- let Inst{8} = 0b0;
- let Inst{4-3} = 0b10;
- let Inst{2-0} = Rdd;
- let Inst{6-5} = u2;
- }
-
-// SA1_sxtb: Sxtb.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_sxtb: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = sxtb($Rs)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10101;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_clrf: Clear if false.
-// SA1_clrf -> SA1_clrfnew
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrf: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if (!$Pu) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b111;
- let Inst{3-0} = Rd;
- }
-
-// SL2_loadrb_io: Load byte.
-let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadrb_io: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u3_0Imm:$u3_0),
- "$Rd = memb($Rs + #$u3_0)"> {
- bits<4> Rd;
- bits<4> Rs;
- bits<3> u3_0;
-
- let Inst{12-11} = 0b10;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- let Inst{10-8} = u3_0;
- }
-
-// SA1_tfr: Tfr.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_tfr: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = $Rs"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10000;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SL2_loadrd_sp: Load dword.
-let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
-def SL2_loadrd_sp: SUBInst <
- (outs DoubleRegs:$Rdd),
- (ins u5_3Imm:$u5_3),
- "$Rdd = memd(r29 + #$u5_3)"> {
- bits<3> Rdd;
- bits<8> u5_3;
-
- let Inst{12-8} = 0b11110;
- let Inst{2-0} = Rdd;
- let Inst{7-3} = u5_3{7-3};
- }
-
-// SA1_and1: And #1.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_and1: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = and($Rs, #1)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10010;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SS2_storebi1: Store byte.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def SS2_storebi1: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0),
- "memb($Rs + #$u4_0)=#1"> {
- bits<4> Rs;
- bits<4> u4_0;
-
- let Inst{12-8} = 0b10011;
- let Inst{7-4} = Rs;
- let Inst{3-0} = u4_0;
- }
-
-// SA1_inc: Inc.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_inc: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = add($Rs, #1)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10001;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SS2_stored_sp: Store dword.
-let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
-def SS2_stored_sp: SUBInst <
- (outs ),
- (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt),
- "memd(r29 + #$s6_3) = $Rtt"> {
- bits<9> s6_3;
- bits<3> Rtt;
-
- let Inst{12-9} = 0b0101;
- let Inst{8-3} = s6_3{8-3};
- let Inst{2-0} = Rtt;
- }
-
-// SS2_storew_sp: Store word.
-let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS2_storew_sp: SUBInst <
- (outs ),
- (ins u5_2Imm:$u5_2, IntRegs:$Rt),
- "memw(r29 + #$u5_2) = $Rt"> {
- bits<7> u5_2;
- bits<4> Rt;
-
- let Inst{12-9} = 0b0100;
- let Inst{8-4} = u5_2{6-2};
- let Inst{3-0} = Rt;
- }
-
-// SL2_jumpr31_fnew: Indirect conditional jump if false.
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_fnew: SUBInst <
- (outs ),
- (ins ),
- "if (!p0.new) jumpr:nt r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b111;
- }
-
-// SA1_clrt: Clear if true.
-// SA1_clrt -> SA1_clrtnew
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrt: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if ($Pu) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b110;
- let Inst{3-0} = Rd;
- }
-
-// SL2_return: Deallocate stack frame and return.
-let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return: SUBInst <
- (outs ),
- (ins ),
- "dealloc_return"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2} = 0b0;
- }
-
-// SA1_dec: Dec.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_dec: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = add($Rs,#{-1})"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10011;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_seti: Set immed.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in
-def SA1_seti: SUBInst <
- (outs IntRegs:$Rd),
- (ins u6_0Ext:$u6),
- "$Rd = #$u6"> {
- bits<4> Rd;
- bits<6> u6;
-
- let Inst{12-10} = 0b010;
- let Inst{3-0} = Rd;
- let Inst{9-4} = u6;
- }
-
-// SL2_jumpr31_t: Indirect conditional jump if true.
-// SL2_jumpr31_t -> SL2_jumpr31_tnew
-let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def SL2_jumpr31_t: SUBInst <
- (outs ),
- (ins ),
- "if (p0) jumpr r31"> {
- let Inst{12-6} = 0b1111111;
- let Inst{2-0} = 0b100;
- }
-
-// SA1_clrfnew: Clear if false.
-let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_clrfnew: SUBInst <
- (outs IntRegs:$Rd),
- (ins PredRegs:$Pu),
- "if (!$Pu.new) $Rd = #0"> {
- bits<4> Rd;
-
- let Inst{12-9} = 0b1101;
- let Inst{6-4} = 0b101;
- let Inst{3-0} = Rd;
- }
-
-// SS1_storew_io: Store word.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def SS1_storew_io: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt),
- "memw($Rs + #$u4_2) = $Rt"> {
- bits<4> Rs;
- bits<6> u4_2;
- bits<4> Rt;
-
- let Inst{12} = 0b0;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_2{5-2};
- let Inst{3-0} = Rt;
- }
-
-// SA1_zxtb: Zxtb.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_zxtb: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = and($Rs, #255)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10111;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
-// SA1_addsp: Add.
-let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_addsp: SUBInst <
- (outs IntRegs:$Rd),
- (ins u6_2Imm:$u6_2),
- "$Rd = add(r29, #$u6_2)"> {
- bits<4> Rd;
- bits<8> u6_2;
-
- let Inst{12-10} = 0b011;
- let Inst{3-0} = Rd;
- let Inst{9-4} = u6_2{7-2};
- }
-
-// SL2_loadri_sp: Load word.
-let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
-def SL2_loadri_sp: SUBInst <
- (outs IntRegs:$Rd),
- (ins u5_2Imm:$u5_2),
- "$Rd = memw(r29 + #$u5_2)"> {
- bits<4> Rd;
- bits<7> u5_2;
-
- let Inst{12-9} = 0b1110;
- let Inst{3-0} = Rd;
- let Inst{8-4} = u5_2{6-2};
- }
-
-// SS1_storeb_io: Store byte.
-let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def SS1_storeb_io: SUBInst <
- (outs ),
- (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt),
- "memb($Rs + #$u4_0) = $Rt"> {
- bits<4> Rs;
- bits<4> u4_0;
- bits<4> Rt;
-
- let Inst{12} = 0b1;
- let Inst{7-4} = Rs;
- let Inst{11-8} = u4_0;
- let Inst{3-0} = Rt;
- }
-
-// SL2_return_tnew: Deallocate stack frame and return.
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_tnew: SUBInst <
- (outs ),
- (ins ),
- "if (p0.new) dealloc_return:nt"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b110;
- }
-
-// SL2_return_fnew: Deallocate stack frame and return.
-let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def SL2_return_fnew: SUBInst <
- (outs ),
- (ins ),
- "if (!p0.new) dealloc_return:nt"> {
- let Inst{12-6} = 0b1111101;
- let Inst{2-0} = 0b111;
- }
-
-// SA1_zxth: Zxth.
-let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def SA1_zxth: SUBInst <
- (outs IntRegs:$Rd),
- (ins IntRegs:$Rs),
- "$Rd = zxth($Rs)"> {
- bits<4> Rd;
- bits<4> Rs;
-
- let Inst{12-8} = 0b10110;
- let Inst{3-0} = Rd;
- let Inst{7-4} = Rs;
- }
-
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
new file mode 100644
index 000000000000..b5948475e1f7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -0,0 +1,2338 @@
+//===--- HexagonLoopIdiomRecognition.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-lir"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <array>
+
+using namespace llvm;
+
+static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
+ cl::Hidden, cl::init(false),
+ cl::desc("Disable generation of memcpy in loop idiom recognition"));
+
+static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
+ cl::Hidden, cl::init(false),
+ cl::desc("Disable generation of memmove in loop idiom recognition"));
+
+static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
+ cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
+ "check guarding the memmove."));
+
+static cl::opt<unsigned> CompileTimeMemSizeThreshold(
+ "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
+ cl::desc("Threshold (in bytes) to perform the transformation, if the "
+ "runtime loop count (mem transfer size) is known at compile-time."));
+
+static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
+ cl::Hidden, cl::init(true),
+ cl::desc("Only enable generating memmove in non-nested loops"));
+
+cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
+ cl::Hidden, cl::init(false),
+ cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
+
+static const char *HexagonVolatileMemcpyName
+ = "hexagon_memcpy_forward_vp4cp4n2";
+
+
+namespace llvm {
+ void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
+ Pass *createHexagonLoopIdiomPass();
+}
+
+namespace {
+ class HexagonLoopIdiomRecognize : public LoopPass {
+ public:
+ static char ID;
+ explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
+ initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+ }
+ StringRef getPassName() const override {
+ return "Recognize Hexagon-specific loop idioms";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<AAResultsWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ private:
+ unsigned getStoreSizeInBytes(StoreInst *SI);
+ int getSCEVStride(const SCEVAddRecExpr *StoreEv);
+ bool isLegalStore(Loop *CurLoop, StoreInst *SI);
+ void collectStores(Loop *CurLoop, BasicBlock *BB,
+ SmallVectorImpl<StoreInst*> &Stores);
+ bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
+ bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
+ bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
+ SmallVectorImpl<BasicBlock*> &ExitBlocks);
+ bool runOnCountableLoop(Loop *L);
+
+ AliasAnalysis *AA;
+ const DataLayout *DL;
+ DominatorTree *DT;
+ LoopInfo *LF;
+ const TargetLibraryInfo *TLI;
+ ScalarEvolution *SE;
+ bool HasMemcpy, HasMemmove;
+ };
+}
+
+char HexagonLoopIdiomRecognize::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
+ "Recognize Hexagon-specific loop idioms", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",
+ "Recognize Hexagon-specific loop idioms", false, false)
+
+
+namespace {
+ struct Simplifier {
+ typedef std::function<Value* (Instruction*, LLVMContext&)> Rule;
+
+ void addRule(const Rule &R) { Rules.push_back(R); }
+
+ private:
+ struct WorkListType {
+ WorkListType() = default;
+
+ void push_back(Value* V) {
+ // Do not push back duplicates.
+ if (!S.count(V)) { Q.push_back(V); S.insert(V); }
+ }
+ Value *pop_front_val() {
+ Value *V = Q.front(); Q.pop_front(); S.erase(V);
+ return V;
+ }
+ bool empty() const { return Q.empty(); }
+
+ private:
+ std::deque<Value*> Q;
+ std::set<Value*> S;
+ };
+
+ typedef std::set<Value*> ValueSetType;
+ std::vector<Rule> Rules;
+
+ public:
+ struct Context {
+ typedef DenseMap<Value*,Value*> ValueMapType;
+
+ Value *Root;
+ ValueSetType Used; // The set of all cloned values used by Root.
+ ValueSetType Clones; // The set of all cloned values.
+ LLVMContext &Ctx;
+
+ Context(Instruction *Exp)
+ : Ctx(Exp->getParent()->getParent()->getContext()) {
+ initialize(Exp);
+ }
+ ~Context() { cleanup(); }
+ void print(raw_ostream &OS, const Value *V) const;
+
+ Value *materialize(BasicBlock *B, BasicBlock::iterator At);
+
+ private:
+ void initialize(Instruction *Exp);
+ void cleanup();
+
+ template <typename FuncT> void traverse(Value *V, FuncT F);
+ void record(Value *V);
+ void use(Value *V);
+ void unuse(Value *V);
+
+ bool equal(const Instruction *I, const Instruction *J) const;
+ Value *find(Value *Tree, Value *Sub) const;
+ Value *subst(Value *Tree, Value *OldV, Value *NewV);
+ void replace(Value *OldV, Value *NewV);
+ void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
+
+ friend struct Simplifier;
+ };
+
+ Value *simplify(Context &C);
+ };
+
+ struct PE {
+ PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
+ const Simplifier::Context &C;
+ const Value *V;
+ };
+
+ raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED;
+ raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
+ P.C.print(OS, P.V ? P.V : P.C.Root);
+ return OS;
+ }
+}
+
+
+template <typename FuncT>
+void Simplifier::Context::traverse(Value *V, FuncT F) {
+ WorkListType Q;
+ Q.push_back(V);
+
+ while (!Q.empty()) {
+ Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
+ if (!U || U->getParent())
+ continue;
+ if (!F(U))
+ continue;
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+}
+
+
+void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
+ const auto *U = dyn_cast<const Instruction>(V);
+ if (!U) {
+ OS << V << '(' << *V << ')';
+ return;
+ }
+
+ if (U->getParent()) {
+ OS << U << '(';
+ U->printAsOperand(OS, true);
+ OS << ')';
+ return;
+ }
+
+ unsigned N = U->getNumOperands();
+ if (N != 0)
+ OS << U << '(';
+ OS << U->getOpcodeName();
+ for (const Value *Op : U->operands()) {
+ OS << ' ';
+ print(OS, Op);
+ }
+ if (N != 0)
+ OS << ')';
+}
+
+
+void Simplifier::Context::initialize(Instruction *Exp) {
+ // Perform a deep clone of the expression, set Root to the root
+ // of the clone, and build a map from the cloned values to the
+ // original ones.
+ ValueMapType M;
+ BasicBlock *Block = Exp->getParent();
+ WorkListType Q;
+ Q.push_back(Exp);
+
+ while (!Q.empty()) {
+ Value *V = Q.pop_front_val();
+ if (M.find(V) != M.end())
+ continue;
+ if (Instruction *U = dyn_cast<Instruction>(V)) {
+ if (isa<PHINode>(U) || U->getParent() != Block)
+ continue;
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ M.insert({U, U->clone()});
+ }
+ }
+
+ for (std::pair<Value*,Value*> P : M) {
+ Instruction *U = cast<Instruction>(P.second);
+ for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
+ auto F = M.find(U->getOperand(i));
+ if (F != M.end())
+ U->setOperand(i, F->second);
+ }
+ }
+
+ auto R = M.find(Exp);
+ assert(R != M.end());
+ Root = R->second;
+
+ record(Root);
+ use(Root);
+}
+
+
+void Simplifier::Context::record(Value *V) {
+ auto Record = [this](Instruction *U) -> bool {
+ Clones.insert(U);
+ return true;
+ };
+ traverse(V, Record);
+}
+
+
+void Simplifier::Context::use(Value *V) {
+ auto Use = [this](Instruction *U) -> bool {
+ Used.insert(U);
+ return true;
+ };
+ traverse(V, Use);
+}
+
+
+void Simplifier::Context::unuse(Value *V) {
+ if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
+ return;
+
+ auto Unuse = [this](Instruction *U) -> bool {
+ if (!U->use_empty())
+ return false;
+ Used.erase(U);
+ return true;
+ };
+ traverse(V, Unuse);
+}
+
+
+Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
+ if (Tree == OldV)
+ return NewV;
+ if (OldV == NewV)
+ return Tree;
+
+ WorkListType Q;
+ Q.push_back(Tree);
+ while (!Q.empty()) {
+ Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
+ // If U is not an instruction, or it's not a clone, skip it.
+ if (!U || U->getParent())
+ continue;
+ for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
+ Value *Op = U->getOperand(i);
+ if (Op == OldV) {
+ U->setOperand(i, NewV);
+ unuse(OldV);
+ } else {
+ Q.push_back(Op);
+ }
+ }
+ }
+ return Tree;
+}
+
+
+void Simplifier::Context::replace(Value *OldV, Value *NewV) {
+ if (Root == OldV) {
+ Root = NewV;
+ use(Root);
+ return;
+ }
+
+ // NewV may be a complex tree that has just been created by one of the
+ // transformation rules. We need to make sure that it is commoned with
+ // the existing Root to the maximum extent possible.
+ // Identify all subtrees of NewV (including NewV itself) that have
+ // equivalent counterparts in Root, and replace those subtrees with
+ // these counterparts.
+ WorkListType Q;
+ Q.push_back(NewV);
+ while (!Q.empty()) {
+ Value *V = Q.pop_front_val();
+ Instruction *U = dyn_cast<Instruction>(V);
+ if (!U || U->getParent())
+ continue;
+ if (Value *DupV = find(Root, V)) {
+ if (DupV != V)
+ NewV = subst(NewV, V, DupV);
+ } else {
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+ }
+
+ // Now, simply replace OldV with NewV in Root.
+ Root = subst(Root, OldV, NewV);
+ use(Root);
+}
+
+
+void Simplifier::Context::cleanup() {
+ for (Value *V : Clones) {
+ Instruction *U = cast<Instruction>(V);
+ if (!U->getParent())
+ U->dropAllReferences();
+ }
+
+ for (Value *V : Clones) {
+ Instruction *U = cast<Instruction>(V);
+ if (!U->getParent())
+ delete U;
+ }
+}
+
+
+bool Simplifier::Context::equal(const Instruction *I,
+ const Instruction *J) const {
+ if (I == J)
+ return true;
+ if (!I->isSameOperationAs(J))
+ return false;
+ if (isa<PHINode>(I))
+ return I->isIdenticalTo(J);
+
+ for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
+ Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
+ if (OpI == OpJ)
+ continue;
+ auto *InI = dyn_cast<const Instruction>(OpI);
+ auto *InJ = dyn_cast<const Instruction>(OpJ);
+ if (InI && InJ) {
+ if (!equal(InI, InJ))
+ return false;
+ } else if (InI != InJ || !InI)
+ return false;
+ }
+ return true;
+}
+
+
+Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
+ Instruction *SubI = dyn_cast<Instruction>(Sub);
+ WorkListType Q;
+ Q.push_back(Tree);
+
+ while (!Q.empty()) {
+ Value *V = Q.pop_front_val();
+ if (V == Sub)
+ return V;
+ Instruction *U = dyn_cast<Instruction>(V);
+ if (!U || U->getParent())
+ continue;
+ if (SubI && equal(SubI, U))
+ return U;
+ assert(!isa<PHINode>(U));
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+ return nullptr;
+}
+
+
+void Simplifier::Context::link(Instruction *I, BasicBlock *B,
+ BasicBlock::iterator At) {
+ if (I->getParent())
+ return;
+
+ for (Value *Op : I->operands()) {
+ if (Instruction *OpI = dyn_cast<Instruction>(Op))
+ link(OpI, B, At);
+ }
+
+ B->getInstList().insert(At, I);
+}
+
+
+Value *Simplifier::Context::materialize(BasicBlock *B,
+ BasicBlock::iterator At) {
+ if (Instruction *RootI = dyn_cast<Instruction>(Root))
+ link(RootI, B, At);
+ return Root;
+}
+
+
+Value *Simplifier::simplify(Context &C) {
+ WorkListType Q;
+ Q.push_back(C.Root);
+ unsigned Count = 0;
+ const unsigned Limit = 100000;
+
+ while (!Q.empty()) {
+ if (Count++ >= Limit)
+ break;
+ Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
+ if (!U || U->getParent() || !C.Used.count(U))
+ continue;
+ bool Changed = false;
+ for (Rule &R : Rules) {
+ Value *W = R(U, C.Ctx);
+ if (!W)
+ continue;
+ Changed = true;
+ C.record(W);
+ C.replace(U, W);
+ Q.push_back(C.Root);
+ break;
+ }
+ if (!Changed) {
+ for (Value *Op : U->operands())
+ Q.push_back(Op);
+ }
+ }
+ assert(Count < Limit && "Infinite loop in HLIR/simplify?");
+ return C.Root;
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// Implementation of PolynomialMultiplyRecognize
+//
+//===----------------------------------------------------------------------===//
+
+namespace {
+ class PolynomialMultiplyRecognize {
+ public:
+ explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
+ const DominatorTree &dt, const TargetLibraryInfo &tli,
+ ScalarEvolution &se)
+ : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
+
+ bool recognize();
+ private:
+ typedef SetVector<Value*> ValueSeq;
+
+ IntegerType *getPmpyType() const {
+ LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
+ return IntegerType::get(Ctx, 32);
+ }
+ bool isPromotableTo(Value *V, IntegerType *Ty);
+ void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
+ bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
+
+ Value *getCountIV(BasicBlock *BB);
+ bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
+ void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
+ ValueSeq &Late);
+ bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
+ bool commutesWithShift(Instruction *I);
+ bool highBitsAreZero(Value *V, unsigned IterCount);
+ bool keepsHighBitsZero(Value *V, unsigned IterCount);
+ bool isOperandShifted(Instruction *I, Value *Op);
+ bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
+ unsigned IterCount);
+ void cleanupLoopBody(BasicBlock *LoopB);
+
+ struct ParsedValues {
+ ParsedValues() : M(nullptr), P(nullptr), Q(nullptr), R(nullptr),
+ X(nullptr), Res(nullptr), IterCount(0), Left(false), Inv(false) {}
+ Value *M, *P, *Q, *R, *X;
+ Instruction *Res;
+ unsigned IterCount;
+ bool Left, Inv;
+ };
+
+ bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
+ bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
+ bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
+ Value *CIV, ParsedValues &PV, bool PreScan);
+ unsigned getInverseMxN(unsigned QP);
+ Value *generate(BasicBlock::iterator At, ParsedValues &PV);
+
+ void setupSimplifier();
+
+ Simplifier Simp;
+ Loop *CurLoop;
+ const DataLayout &DL;
+ const DominatorTree &DT;
+ const TargetLibraryInfo &TLI;
+ ScalarEvolution &SE;
+ };
+}
+
+
+Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (std::distance(PI, PE) != 2)
+ return nullptr;
+ BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
+
+ for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
+ auto *PN = cast<PHINode>(I);
+ Value *InitV = PN->getIncomingValueForBlock(PB);
+ if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
+ continue;
+ Value *IterV = PN->getIncomingValueForBlock(BB);
+ if (!isa<BinaryOperator>(IterV))
+ continue;
+ auto *BO = dyn_cast<BinaryOperator>(IterV);
+ if (BO->getOpcode() != Instruction::Add)
+ continue;
+ Value *IncV = nullptr;
+ if (BO->getOperand(0) == PN)
+ IncV = BO->getOperand(1);
+ else if (BO->getOperand(1) == PN)
+ IncV = BO->getOperand(0);
+ if (IncV == nullptr)
+ continue;
+
+ if (auto *T = dyn_cast<ConstantInt>(IncV))
+ if (T->getZExtValue() == 1)
+ return PN;
+ }
+ return nullptr;
+}
+
+
+static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
+ for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
+ if (BB == II->getParent())
+ II->replaceUsesOfWith(I, J);
+ }
+}
+
+
+bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
+ Value *CIV, ParsedValues &PV) {
+ // Match the following:
+ // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
+ // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
+ // The condition may also check for equality with the masked value, i.e
+ // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
+ // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
+
+ Value *CondV = SelI->getCondition();
+ Value *TrueV = SelI->getTrueValue();
+ Value *FalseV = SelI->getFalseValue();
+
+ using namespace PatternMatch;
+
+ CmpInst::Predicate P;
+ Value *A = nullptr, *B = nullptr, *C = nullptr;
+
+ if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
+ !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
+ return false;
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
+ return false;
+ // Matched: select (A & B) == C ? ... : ...
+ // select (A & B) != C ? ... : ...
+
+ Value *X = nullptr, *Sh1 = nullptr;
+ // Check (A & B) for (X & (1 << i)):
+ if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
+ Sh1 = A;
+ X = B;
+ } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
+ Sh1 = B;
+ X = A;
+ } else {
+ // TODO: Could also check for an induction variable containing single
+ // bit shifted left by 1 in each iteration.
+ return false;
+ }
+
+ bool TrueIfZero;
+
+ // Check C against the possible values for comparison: 0 and (1 << i):
+ if (match(C, m_Zero()))
+ TrueIfZero = (P == CmpInst::ICMP_EQ);
+ else if (C == Sh1)
+ TrueIfZero = (P == CmpInst::ICMP_NE);
+ else
+ return false;
+
+ // So far, matched:
+ // select (X & (1 << i)) ? ... : ...
+ // including variations of the check against zero/non-zero value.
+
+ Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
+ if (TrueIfZero) {
+ ShouldSameV = TrueV;
+ ShouldXoredV = FalseV;
+ } else {
+ ShouldSameV = FalseV;
+ ShouldXoredV = TrueV;
+ }
+
+ Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
+ Value *T = nullptr;
+ if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
+ // Matched: select +++ ? ... : Y ^ Z
+ // select +++ ? Y ^ Z : ...
+ // where +++ denotes previously checked matches.
+ if (ShouldSameV == Y)
+ T = Z;
+ else if (ShouldSameV == Z)
+ T = Y;
+ else
+ return false;
+ R = ShouldSameV;
+ // Matched: select +++ ? R : R ^ T
+ // select +++ ? R ^ T : R
+ // depending on TrueIfZero.
+
+ } else if (match(ShouldSameV, m_Zero())) {
+ // Matched: select +++ ? 0 : ...
+ // select +++ ? ... : 0
+ if (!SelI->hasOneUse())
+ return false;
+ T = ShouldXoredV;
+ // Matched: select +++ ? 0 : T
+ // select +++ ? T : 0
+
+ Value *U = *SelI->user_begin();
+ if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
+ !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
+ return false;
+ // Matched: xor (select +++ ? 0 : T), R
+ // xor (select +++ ? T : 0), R
+ } else
+ return false;
+
+ // The xor input value T is isolated into its own match so that it could
+ // be checked against an induction variable containing a shifted bit
+ // (todo).
+ // For now, check against (Q << i).
+ if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
+ !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
+ return false;
+ // Matched: select +++ ? R : R ^ (Q << i)
+ // select +++ ? R ^ (Q << i) : R
+
+ PV.X = X;
+ PV.Q = Q;
+ PV.R = R;
+ PV.Left = true;
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
+ ParsedValues &PV) {
+ // Match the following:
+ // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
+ // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
+ // The condition may also check for equality with the masked value, i.e
+ // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
+ // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
+
+ Value *CondV = SelI->getCondition();
+ Value *TrueV = SelI->getTrueValue();
+ Value *FalseV = SelI->getFalseValue();
+
+ using namespace PatternMatch;
+
+ Value *C = nullptr;
+ CmpInst::Predicate P;
+ bool TrueIfZero;
+
+ if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
+ match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
+ return false;
+ // Matched: select C == 0 ? ... : ...
+ // select C != 0 ? ... : ...
+ TrueIfZero = (P == CmpInst::ICMP_EQ);
+ } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
+ match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
+ if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
+ return false;
+ // Matched: select C == 1 ? ... : ...
+ // select C != 1 ? ... : ...
+ TrueIfZero = (P == CmpInst::ICMP_NE);
+ } else
+ return false;
+
+ Value *X = nullptr;
+ if (!match(C, m_And(m_Value(X), m_One())) &&
+ !match(C, m_And(m_One(), m_Value(X))))
+ return false;
+ // Matched: select (X & 1) == +++ ? ... : ...
+ // select (X & 1) != +++ ? ... : ...
+
+ Value *R = nullptr, *Q = nullptr;
+ if (TrueIfZero) {
+ // The select's condition is true if the tested bit is 0.
+ // TrueV must be the shift, FalseV must be the xor.
+ if (!match(TrueV, m_LShr(m_Value(R), m_One())))
+ return false;
+ // Matched: select +++ ? (R >> 1) : ...
+ if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
+ !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
+ return false;
+ // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
+ // with commuting ^.
+ } else {
+ // The select's condition is true if the tested bit is 1.
+ // TrueV must be the xor, FalseV must be the shift.
+ if (!match(FalseV, m_LShr(m_Value(R), m_One())))
+ return false;
+ // Matched: select +++ ? ... : (R >> 1)
+ if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
+ !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
+ return false;
+ // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
+ // with commuting ^.
+ }
+
+ PV.X = X;
+ PV.Q = Q;
+ PV.R = R;
+ PV.Left = false;
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
+ BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
+ bool PreScan) {
+ using namespace PatternMatch;
+ // The basic pattern for R = P.Q is:
+ // for i = 0..31
+ // R = phi (0, R')
+ // if (P & (1 << i)) ; test-bit(P, i)
+ // R' = R ^ (Q << i)
+ //
+ // Similarly, the basic pattern for R = (P/Q).Q - P
+ // for i = 0..31
+ // R = phi(P, R')
+ // if (R & (1 << i))
+ // R' = R ^ (Q << i)
+
+ // There exist idioms, where instead of Q being shifted left, P is shifted
+ // right. This produces a result that is shifted right by 32 bits (the
+ // non-shifted result is 64-bit).
+ //
+ // For R = P.Q, this would be:
+ // for i = 0..31
+ // R = phi (0, R')
+ // if ((P >> i) & 1)
+ // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
+ // else ; be shifted by 1, not i.
+ // R' = R >> 1
+ //
+ // And for the inverse:
+ // for i = 0..31
+ // R = phi (P, R')
+ // if (R & 1)
+ // R' = (R >> 1) ^ Q
+ // else
+ // R' = R >> 1
+
+ // The left-shifting idioms share the same pattern:
+ // select (X & (1 << i)) ? R ^ (Q << i) : R
+ // Similarly for right-shifting idioms:
+ // select (X & 1) ? (R >> 1) ^ Q
+
+ if (matchLeftShift(SelI, CIV, PV)) {
+ // If this is a pre-scan, getting this far is sufficient.
+ if (PreScan)
+ return true;
+
+ // Need to make sure that the SelI goes back into R.
+ auto *RPhi = dyn_cast<PHINode>(PV.R);
+ if (!RPhi)
+ return false;
+ if (SelI != RPhi->getIncomingValueForBlock(LoopB))
+ return false;
+ PV.Res = SelI;
+
+ // If X is loop invariant, it must be the input polynomial, and the
+ // idiom is the basic polynomial multiply.
+ if (CurLoop->isLoopInvariant(PV.X)) {
+ PV.P = PV.X;
+ PV.Inv = false;
+ } else {
+ // X is not loop invariant. If X == R, this is the inverse pmpy.
+ // Otherwise, check for an xor with an invariant value. If the
+ // variable argument to the xor is R, then this is still a valid
+ // inverse pmpy.
+ PV.Inv = true;
+ if (PV.X != PV.R) {
+ Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
+ if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
+ return false;
+ auto *I1 = dyn_cast<Instruction>(X1);
+ auto *I2 = dyn_cast<Instruction>(X2);
+ if (!I1 || I1->getParent() != LoopB) {
+ Var = X2;
+ Inv = X1;
+ } else if (!I2 || I2->getParent() != LoopB) {
+ Var = X1;
+ Inv = X2;
+ } else
+ return false;
+ if (Var != PV.R)
+ return false;
+ PV.M = Inv;
+ }
+ // The input polynomial P still needs to be determined. It will be
+ // the entry value of R.
+ Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
+ PV.P = EntryP;
+ }
+
+ return true;
+ }
+
+ if (matchRightShift(SelI, PV)) {
+ // If this is an inverse pattern, the Q polynomial must be known at
+ // compile time.
+ if (PV.Inv && !isa<ConstantInt>(PV.Q))
+ return false;
+ if (PreScan)
+ return true;
+ // There is no exact matching of right-shift pmpy.
+ return false;
+ }
+
+ return false;
+}
+
+
+bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
+ IntegerType *DestTy) {
+ IntegerType *T = dyn_cast<IntegerType>(Val->getType());
+ if (!T || T->getBitWidth() > DestTy->getBitWidth())
+ return false;
+ if (T->getBitWidth() == DestTy->getBitWidth())
+ return true;
+ // Non-instructions are promotable. The reason why an instruction may not
+ // be promotable is that it may produce a different result if its operands
+ // and the result are promoted, for example, it may produce more non-zero
+ // bits. While it would still be possible to represent the proper result
+ // in a wider type, it may require adding additional instructions (which
+ // we don't want to do).
+ Instruction *In = dyn_cast<Instruction>(Val);
+ if (!In)
+ return true;
+ // The bitwidth of the source type is smaller than the destination.
+ // Check if the individual operation can be promoted.
+ switch (In->getOpcode()) {
+ case Instruction::PHI:
+ case Instruction::ZExt:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::LShr: // Shift right is ok.
+ case Instruction::Select:
+ return true;
+ case Instruction::ICmp:
+ if (CmpInst *CI = cast<CmpInst>(In))
+ return CI->isEquality() || CI->isUnsigned();
+ llvm_unreachable("Cast failed unexpectedly");
+ case Instruction::Add:
+ return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
+ }
+ return false;
+}
+
+
+void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
+ IntegerType *DestTy, BasicBlock *LoopB) {
+ // Leave boolean values alone.
+ if (!In->getType()->isIntegerTy(1))
+ In->mutateType(DestTy);
+ unsigned DestBW = DestTy->getBitWidth();
+
+ // Handle PHIs.
+ if (PHINode *P = dyn_cast<PHINode>(In)) {
+ unsigned N = P->getNumIncomingValues();
+ for (unsigned i = 0; i != N; ++i) {
+ BasicBlock *InB = P->getIncomingBlock(i);
+ if (InB == LoopB)
+ continue;
+ Value *InV = P->getIncomingValue(i);
+ IntegerType *Ty = cast<IntegerType>(InV->getType());
+ // Do not promote values in PHI nodes of type i1.
+ if (Ty != P->getType()) {
+ // If the value type does not match the PHI type, the PHI type
+ // must have been promoted.
+ assert(Ty->getBitWidth() < DestBW);
+ InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
+ P->setIncomingValue(i, InV);
+ }
+ }
+ } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
+ Value *Op = Z->getOperand(0);
+ if (Op->getType() == Z->getType())
+ Z->replaceAllUsesWith(Op);
+ Z->eraseFromParent();
+ return;
+ }
+
+ // Promote immediates.
+ for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
+ if (CI->getType()->getBitWidth() < DestBW)
+ In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
+ }
+}
+
+
+bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
+ BasicBlock *ExitB) {
+ assert(LoopB);
+ // Skip loops where the exit block has more than one predecessor. The values
+ // coming from the loop block will be promoted to another type, and so the
+ // values coming into the exit block from other predecessors would also have
+ // to be promoted.
+ if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
+ return false;
+ IntegerType *DestTy = getPmpyType();
+ // Check if the exit values have types that are no wider than the type
+ // that we want to promote to.
+ unsigned DestBW = DestTy->getBitWidth();
+ for (Instruction &In : *ExitB) {
+ PHINode *P = dyn_cast<PHINode>(&In);
+ if (!P)
+ break;
+ if (P->getNumIncomingValues() != 1)
+ return false;
+ assert(P->getIncomingBlock(0) == LoopB);
+ IntegerType *T = dyn_cast<IntegerType>(P->getType());
+ if (!T || T->getBitWidth() > DestBW)
+ return false;
+ }
+
+ // Check all instructions in the loop.
+ for (Instruction &In : *LoopB)
+ if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
+ return false;
+
+ // Perform the promotion.
+ std::vector<Instruction*> LoopIns;
+ std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
+ [](Instruction &In) { return &In; });
+ for (Instruction *In : LoopIns)
+ promoteTo(In, DestTy, LoopB);
+
+ // Fix up the PHI nodes in the exit block.
+ Instruction *EndI = ExitB->getFirstNonPHI();
+ BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
+ for (auto I = ExitB->begin(); I != End; ++I) {
+ PHINode *P = dyn_cast<PHINode>(I);
+ if (!P)
+ break;
+ Type *Ty0 = P->getIncomingValue(0)->getType();
+ Type *PTy = P->getType();
+ if (PTy != Ty0) {
+ assert(Ty0 == DestTy);
+ // In order to create the trunc, P must have the promoted type.
+ P->mutateType(Ty0);
+ Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
+ // In order for the RAUW to work, the types of P and T must match.
+ P->mutateType(PTy);
+ P->replaceAllUsesWith(T);
+ // Final update of the P's type.
+ P->mutateType(Ty0);
+ cast<Instruction>(T)->setOperand(0, P);
+ }
+ }
+
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
+ ValueSeq &Cycle) {
+ // Out = ..., In, ...
+ if (Out == In)
+ return true;
+
+ auto *BB = cast<Instruction>(Out)->getParent();
+ bool HadPhi = false;
+
+ for (auto U : Out->users()) {
+ auto *I = dyn_cast<Instruction>(&*U);
+ if (I == nullptr || I->getParent() != BB)
+ continue;
+ // Make sure that there are no multi-iteration cycles, e.g.
+ // p1 = phi(p2)
+ // p2 = phi(p1)
+ // The cycle p1->p2->p1 would span two loop iterations.
+ // Check that there is only one phi in the cycle.
+ bool IsPhi = isa<PHINode>(I);
+ if (IsPhi && HadPhi)
+ return false;
+ HadPhi |= IsPhi;
+ if (Cycle.count(I))
+ return false;
+ Cycle.insert(I);
+ if (findCycle(I, In, Cycle))
+ break;
+ Cycle.remove(I);
+ }
+ return !Cycle.empty();
+}
+
+
+void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
+ ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
+ // All the values in the cycle that are between the phi node and the
+ // divider instruction will be classified as "early", all other values
+ // will be "late".
+
+ bool IsE = true;
+ unsigned I, N = Cycle.size();
+ for (I = 0; I < N; ++I) {
+ Value *V = Cycle[I];
+ if (DivI == V)
+ IsE = false;
+ else if (!isa<PHINode>(V))
+ continue;
+ // Stop if found either.
+ break;
+ }
+ // "I" is the index of either DivI or the phi node, whichever was first.
+ // "E" is "false" or "true" respectively.
+ ValueSeq &First = !IsE ? Early : Late;
+ for (unsigned J = 0; J < I; ++J)
+ First.insert(Cycle[J]);
+
+ ValueSeq &Second = IsE ? Early : Late;
+ Second.insert(Cycle[I]);
+ for (++I; I < N; ++I) {
+ Value *V = Cycle[I];
+ if (DivI == V || isa<PHINode>(V))
+ break;
+ Second.insert(V);
+ }
+
+ for (; I < N; ++I)
+ First.insert(Cycle[I]);
+}
+
+
+bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
+ ValueSeq &Early, ValueSeq &Late) {
+ // Select is an exception, since the condition value does not have to be
+ // classified in the same way as the true/false values. The true/false
+ // values do have to be both early or both late.
+ if (UseI->getOpcode() == Instruction::Select) {
+ Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
+ if (Early.count(TV) || Early.count(FV)) {
+ if (Late.count(TV) || Late.count(FV))
+ return false;
+ Early.insert(UseI);
+ } else if (Late.count(TV) || Late.count(FV)) {
+ if (Early.count(TV) || Early.count(FV))
+ return false;
+ Late.insert(UseI);
+ }
+ return true;
+ }
+
+ // Not sure what would be the example of this, but the code below relies
+ // on having at least one operand.
+ if (UseI->getNumOperands() == 0)
+ return true;
+
+ bool AE = true, AL = true;
+ for (auto &I : UseI->operands()) {
+ if (Early.count(&*I))
+ AL = false;
+ else if (Late.count(&*I))
+ AE = false;
+ }
+ // If the operands appear "all early" and "all late" at the same time,
+ // then it means that none of them are actually classified as either.
+ // This is harmless.
+ if (AE && AL)
+ return true;
+ // Conversely, if they are neither "all early" nor "all late", then
+ // we have a mixture of early and late operands that is not a known
+ // exception.
+ if (!AE && !AL)
+ return false;
+
+ // Check that we have covered the two special cases.
+ assert(AE != AL);
+
+ if (AE)
+ Early.insert(UseI);
+ else
+ Late.insert(UseI);
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::LShr:
+ case Instruction::Shl:
+ case Instruction::Select:
+ case Instruction::ICmp:
+ case Instruction::PHI:
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
+ unsigned IterCount) {
+ auto *T = dyn_cast<IntegerType>(V->getType());
+ if (!T)
+ return false;
+
+ unsigned BW = T->getBitWidth();
+ APInt K0(BW, 0), K1(BW, 0);
+ computeKnownBits(V, K0, K1, DL);
+ return K0.countLeadingOnes() >= IterCount;
+}
+
+
+bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
+ unsigned IterCount) {
+ // Assume that all inputs to the value have the high bits zero.
+ // Check if the value itself preserves the zeros in the high bits.
+ if (auto *C = dyn_cast<ConstantInt>(V))
+ return C->getValue().countLeadingZeros() >= IterCount;
+
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ switch (I->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::LShr:
+ case Instruction::Select:
+ case Instruction::ICmp:
+ case Instruction::PHI:
+ case Instruction::ZExt:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
+ unsigned Opc = I->getOpcode();
+ if (Opc == Instruction::Shl || Opc == Instruction::LShr)
+ return Op != I->getOperand(1);
+ return true;
+}
+
+
+bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
+ BasicBlock *ExitB, unsigned IterCount) {
+ Value *CIV = getCountIV(LoopB);
+ if (CIV == nullptr)
+ return false;
+ auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
+ if (CIVTy == nullptr)
+ return false;
+
+ ValueSeq RShifts;
+ ValueSeq Early, Late, Cycled;
+
+ // Find all value cycles that contain logical right shifts by 1.
+ for (Instruction &I : *LoopB) {
+ using namespace PatternMatch;
+ Value *V = nullptr;
+ if (!match(&I, m_LShr(m_Value(V), m_One())))
+ continue;
+ ValueSeq C;
+ if (!findCycle(&I, V, C))
+ continue;
+
+ // Found a cycle.
+ C.insert(&I);
+ classifyCycle(&I, C, Early, Late);
+ Cycled.insert(C.begin(), C.end());
+ RShifts.insert(&I);
+ }
+
+ // Find the set of all values affected by the shift cycles, i.e. all
+ // cycled values, and (recursively) all their users.
+ ValueSeq Users(Cycled.begin(), Cycled.end());
+ for (unsigned i = 0; i < Users.size(); ++i) {
+ Value *V = Users[i];
+ if (!isa<IntegerType>(V->getType()))
+ return false;
+ auto *R = cast<Instruction>(V);
+ // If the instruction does not commute with shifts, the loop cannot
+ // be unshifted.
+ if (!commutesWithShift(R))
+ return false;
+ for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
+ auto *T = cast<Instruction>(*I);
+ // Skip users from outside of the loop. They will be handled later.
+ // Also, skip the right-shifts and phi nodes, since they mix early
+ // and late values.
+ if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
+ continue;
+
+ Users.insert(T);
+ if (!classifyInst(T, Early, Late))
+ return false;
+ }
+ }
+
+ if (Users.size() == 0)
+ return false;
+
+ // Verify that high bits remain zero.
+ ValueSeq Internal(Users.begin(), Users.end());
+ ValueSeq Inputs;
+ for (unsigned i = 0; i < Internal.size(); ++i) {
+ auto *R = dyn_cast<Instruction>(Internal[i]);
+ if (!R)
+ continue;
+ for (Value *Op : R->operands()) {
+ auto *T = dyn_cast<Instruction>(Op);
+ if (T && T->getParent() != LoopB)
+ Inputs.insert(Op);
+ else
+ Internal.insert(Op);
+ }
+ }
+ for (Value *V : Inputs)
+ if (!highBitsAreZero(V, IterCount))
+ return false;
+ for (Value *V : Internal)
+ if (!keepsHighBitsZero(V, IterCount))
+ return false;
+
+ // Finally, the work can be done. Unshift each user.
+ IRBuilder<> IRB(LoopB);
+ std::map<Value*,Value*> ShiftMap;
+ typedef std::map<std::pair<Value*,Type*>,Value*> CastMapType;
+ CastMapType CastMap;
+
+ auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
+ IntegerType *Ty) -> Value* {
+ auto H = CM.find(std::make_pair(V, Ty));
+ if (H != CM.end())
+ return H->second;
+ Value *CV = IRB.CreateIntCast(V, Ty, false);
+ CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
+ return CV;
+ };
+
+ for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
+ if (isa<PHINode>(I) || !Users.count(&*I))
+ continue;
+ using namespace PatternMatch;
+ // Match lshr x, 1.
+ Value *V = nullptr;
+ if (match(&*I, m_LShr(m_Value(V), m_One()))) {
+ replaceAllUsesOfWithIn(&*I, V, LoopB);
+ continue;
+ }
+ // For each non-cycled operand, replace it with the corresponding
+ // value shifted left.
+ for (auto &J : I->operands()) {
+ Value *Op = J.get();
+ if (!isOperandShifted(&*I, Op))
+ continue;
+ if (Users.count(Op))
+ continue;
+ // Skip shifting zeros.
+ if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
+ continue;
+ // Check if we have already generated a shift for this value.
+ auto F = ShiftMap.find(Op);
+ Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
+ if (W == nullptr) {
+ IRB.SetInsertPoint(&*I);
+ // First, the shift amount will be CIV or CIV+1, depending on
+ // whether the value is early or late. Instead of creating CIV+1,
+ // do a single shift of the value.
+ Value *ShAmt = CIV, *ShVal = Op;
+ auto *VTy = cast<IntegerType>(ShVal->getType());
+ auto *ATy = cast<IntegerType>(ShAmt->getType());
+ if (Late.count(&*I))
+ ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
+ // Second, the types of the shifted value and the shift amount
+ // must match.
+ if (VTy != ATy) {
+ if (VTy->getBitWidth() < ATy->getBitWidth())
+ ShVal = upcast(CastMap, IRB, ShVal, ATy);
+ else
+ ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
+ }
+ // Ready to generate the shift and memoize it.
+ W = IRB.CreateShl(ShVal, ShAmt);
+ ShiftMap.insert(std::make_pair(Op, W));
+ }
+ I->replaceUsesOfWith(Op, W);
+ }
+ }
+
+ // Update the users outside of the loop to account for having left
+ // shifts. They would normally be shifted right in the loop, so shift
+ // them right after the loop exit.
+ // Take advantage of the loop-closed SSA form, which has all the post-
+ // loop values in phi nodes.
+ IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
+ for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
+ if (!isa<PHINode>(P))
+ break;
+ auto *PN = cast<PHINode>(P);
+ Value *U = PN->getIncomingValueForBlock(LoopB);
+ if (!Users.count(U))
+ continue;
+ Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
+ PN->replaceAllUsesWith(S);
+ // The above RAUW will create
+ // S = lshr S, IterCount
+ // so we need to fix it back into
+ // S = lshr PN, IterCount
+ cast<User>(S)->replaceUsesOfWith(S, PN);
+ }
+
+ return true;
+}
+
+
+void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
+ for (auto &I : *LoopB)
+ if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT))
+ I.replaceAllUsesWith(SV);
+
+ for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
+ N = std::next(I);
+ RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
+ }
+}
+
+
+unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
+ // Arrays of coefficients of Q and the inverse, C.
+ // Q[i] = coefficient at x^i.
+ std::array<char,32> Q, C;
+
+ for (unsigned i = 0; i < 32; ++i) {
+ Q[i] = QP & 1;
+ QP >>= 1;
+ }
+ assert(Q[0] == 1);
+
+ // Find C, such that
+ // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
+ //
+ // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
+ // operations * and + are & and ^ respectively.
+ //
+ // Find C[i] recursively, by comparing i-th coefficient in the product
+ // with 0 (or 1 for i=0).
+ //
+ // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
+ C[0] = 1;
+ for (unsigned i = 1; i < 32; ++i) {
+ // Solve for C[i] in:
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
+ // This is equivalent to
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
+ // which is
+ // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
+ unsigned T = 0;
+ for (unsigned j = 0; j < i; ++j)
+ T = T ^ (C[j] & Q[i-j]);
+ C[i] = T;
+ }
+
+ unsigned QV = 0;
+ for (unsigned i = 0; i < 32; ++i)
+ if (C[i])
+ QV |= (1 << i);
+
+ return QV;
+}
+
+
+Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
+ ParsedValues &PV) {
+ IRBuilder<> B(&*At);
+ Module *M = At->getParent()->getParent()->getParent();
+ Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
+
+ Value *P = PV.P, *Q = PV.Q, *P0 = P;
+ unsigned IC = PV.IterCount;
+
+ if (PV.M != nullptr)
+ P0 = P = B.CreateXor(P, PV.M);
+
+ // Create a bit mask to clear the high bits beyond IterCount.
+ auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
+
+ if (PV.IterCount != 32)
+ P = B.CreateAnd(P, BMI);
+
+ if (PV.Inv) {
+ auto *QI = dyn_cast<ConstantInt>(PV.Q);
+ assert(QI && QI->getBitWidth() <= 32);
+
+ // Again, clearing bits beyond IterCount.
+ unsigned M = (1 << PV.IterCount) - 1;
+ unsigned Tmp = (QI->getZExtValue() | 1) & M;
+ unsigned QV = getInverseMxN(Tmp) & M;
+ auto *QVI = ConstantInt::get(QI->getType(), QV);
+ P = B.CreateCall(PMF, {P, QVI});
+ P = B.CreateTrunc(P, QI->getType());
+ if (IC != 32)
+ P = B.CreateAnd(P, BMI);
+ }
+
+ Value *R = B.CreateCall(PMF, {P, Q});
+
+ if (PV.M != nullptr)
+ R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
+
+ return R;
+}
+
+
+void PolynomialMultiplyRecognize::setupSimplifier() {
+ Simp.addRule(
+ // Sink zext past bitwise operations.
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::ZExt)
+ return nullptr;
+ Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
+ if (!T)
+ return nullptr;
+ switch (T->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ default:
+ return nullptr;
+ }
+ IRBuilder<> B(Ctx);
+ return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
+ B.CreateZExt(T->getOperand(0), I->getType()),
+ B.CreateZExt(T->getOperand(1), I->getType()));
+ });
+ Simp.addRule(
+ // (xor (and x a) (and y a)) -> (and (xor x y) a)
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::Xor)
+ return nullptr;
+ Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
+ Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
+ if (!And0 || !And1)
+ return nullptr;
+ if (And0->getOpcode() != Instruction::And ||
+ And1->getOpcode() != Instruction::And)
+ return nullptr;
+ if (And0->getOperand(1) != And1->getOperand(1))
+ return nullptr;
+ IRBuilder<> B(Ctx);
+ return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
+ And0->getOperand(1));
+ });
+ Simp.addRule(
+ // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
+ // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
+ if (!BO)
+ return nullptr;
+ Instruction::BinaryOps Op = BO->getOpcode();
+ if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
+ IRBuilder<> B(Ctx);
+ Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
+ Value *Z = BO->getOperand(1);
+ return B.CreateSelect(Sel->getCondition(),
+ B.CreateBinOp(Op, X, Z),
+ B.CreateBinOp(Op, Y, Z));
+ }
+ if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
+ IRBuilder<> B(Ctx);
+ Value *X = BO->getOperand(0);
+ Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
+ return B.CreateSelect(Sel->getCondition(),
+ B.CreateBinOp(Op, X, Y),
+ B.CreateBinOp(Op, X, Z));
+ }
+ return nullptr;
+ });
+ Simp.addRule(
+ // (select c (select c x y) z) -> (select c x z)
+ // (select c x (select c y z)) -> (select c x z)
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ SelectInst *Sel = dyn_cast<SelectInst>(I);
+ if (!Sel)
+ return nullptr;
+ IRBuilder<> B(Ctx);
+ Value *C = Sel->getCondition();
+ if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
+ if (Sel0->getCondition() == C)
+ return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
+ }
+ if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
+ if (Sel1->getCondition() == C)
+ return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
+ }
+ return nullptr;
+ });
+ Simp.addRule(
+ // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::Or)
+ return nullptr;
+ Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0));
+ if (!LShr || LShr->getOpcode() != Instruction::LShr)
+ return nullptr;
+ ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1));
+ if (!One || One->getZExtValue() != 1)
+ return nullptr;
+ ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
+ return nullptr;
+ return IRBuilder<>(Ctx).CreateXor(LShr, Msb);
+ });
+ Simp.addRule(
+ // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::LShr)
+ return nullptr;
+ BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
+ if (!BitOp)
+ return nullptr;
+ switch (BitOp->getOpcode()) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ break;
+ default:
+ return nullptr;
+ }
+ IRBuilder<> B(Ctx);
+ Value *S = I->getOperand(1);
+ return B.CreateBinOp(BitOp->getOpcode(),
+ B.CreateLShr(BitOp->getOperand(0), S),
+ B.CreateLShr(BitOp->getOperand(1), S));
+ });
+ Simp.addRule(
+ // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ auto IsBitOp = [](unsigned Op) -> bool {
+ switch (Op) {
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return true;
+ }
+ return false;
+ };
+ BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
+ if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
+ return nullptr;
+ BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
+ if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
+ return nullptr;
+ ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
+ ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
+ if (!CA || !CB)
+ return nullptr;
+ IRBuilder<> B(Ctx);
+ Value *X = BitOp2->getOperand(0);
+ return B.CreateBinOp(BitOp2->getOpcode(), X,
+ B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
+ });
+}
+
+
+bool PolynomialMultiplyRecognize::recognize() {
+ DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
+ << *CurLoop << '\n');
+ // Restrictions:
+ // - The loop must consist of a single block.
+ // - The iteration count must be known at compile-time.
+ // - The loop must have an induction variable starting from 0, and
+ // incremented in each iteration of the loop.
+ BasicBlock *LoopB = CurLoop->getHeader();
+ DEBUG(dbgs() << "Loop header:\n" << *LoopB);
+
+ if (LoopB != CurLoop->getLoopLatch())
+ return false;
+ BasicBlock *ExitB = CurLoop->getExitBlock();
+ if (ExitB == nullptr)
+ return false;
+ BasicBlock *EntryB = CurLoop->getLoopPreheader();
+ if (EntryB == nullptr)
+ return false;
+
+ unsigned IterCount = 0;
+ const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
+ if (isa<SCEVCouldNotCompute>(CT))
+ return false;
+ if (auto *CV = dyn_cast<SCEVConstant>(CT))
+ IterCount = CV->getValue()->getZExtValue() + 1;
+
+ Value *CIV = getCountIV(LoopB);
+ ParsedValues PV;
+ PV.IterCount = IterCount;
+ DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n');
+
+ setupSimplifier();
+
+ // Perform a preliminary scan of select instructions to see if any of them
+ // looks like a generator of the polynomial multiply steps. Assume that a
+ // loop can only contain a single transformable operation, so stop the
+ // traversal after the first reasonable candidate was found.
+ // XXX: Currently this approach can modify the loop before being 100% sure
+ // that the transformation can be carried out.
+ bool FoundPreScan = false;
+ for (Instruction &In : *LoopB) {
+ SelectInst *SI = dyn_cast<SelectInst>(&In);
+ if (!SI)
+ continue;
+
+ Simplifier::Context C(SI);
+ Value *T = Simp.simplify(C);
+ SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
+ DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n');
+ if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
+ FoundPreScan = true;
+ if (SelI != SI) {
+ Value *NewSel = C.materialize(LoopB, SI->getIterator());
+ SI->replaceAllUsesWith(NewSel);
+ RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
+ }
+ break;
+ }
+ }
+
+ if (!FoundPreScan) {
+ DEBUG(dbgs() << "Have not found candidates for pmpy\n");
+ return false;
+ }
+
+ if (!PV.Left) {
+ // The right shift version actually only returns the higher bits of
+ // the result (each iteration discards the LSB). If we want to convert it
+ // to a left-shifting loop, the working data type must be at least as
+ // wide as the target's pmpy instruction.
+ if (!promoteTypes(LoopB, ExitB))
+ return false;
+ convertShiftsToLeft(LoopB, ExitB, IterCount);
+ cleanupLoopBody(LoopB);
+ }
+
+ // Scan the loop again, find the generating select instruction.
+ bool FoundScan = false;
+ for (Instruction &In : *LoopB) {
+ SelectInst *SelI = dyn_cast<SelectInst>(&In);
+ if (!SelI)
+ continue;
+ DEBUG(dbgs() << "scanSelect: " << *SelI << '\n');
+ FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
+ if (FoundScan)
+ break;
+ }
+ assert(FoundScan);
+
+ DEBUG({
+ StringRef PP = (PV.M ? "(P+M)" : "P");
+ if (!PV.Inv)
+ dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";
+ else
+ dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "
+ << PP << "\n";
+ dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";
+ if (PV.M)
+ dbgs() << " M:" << *PV.M << "\n";
+ dbgs() << " Q:" << *PV.Q << "\n";
+ dbgs() << " Iteration count:" << PV.IterCount << "\n";
+ });
+
+ BasicBlock::iterator At(EntryB->getTerminator());
+ Value *PM = generate(At, PV);
+ if (PM == nullptr)
+ return false;
+
+ if (PM->getType() != PV.Res->getType())
+ PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
+
+ PV.Res->replaceAllUsesWith(PM);
+ PV.Res->eraseFromParent();
+ return true;
+}
+
+
+unsigned HexagonLoopIdiomRecognize::getStoreSizeInBytes(StoreInst *SI) {
+ uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+ assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
+ "Don't overflow unsigned.");
+ return (unsigned)SizeInBits >> 3;
+}
+
+
+int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
+ return SC->getAPInt().getSExtValue();
+ return 0;
+}
+
+
+bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
+ // Allow volatile stores if HexagonVolatileMemcpy is enabled.
+ if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
+ return false;
+
+ Value *StoredVal = SI->getValueOperand();
+ Value *StorePtr = SI->getPointerOperand();
+
+ // Reject stores that are so large that they overflow an unsigned.
+ uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+ if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided store. If we have something else, it's a
+ // random store we can't handle.
+ auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ return false;
+
+ // Check to see if the stride matches the size of the store. If so, then we
+ // know that every byte is touched in the loop.
+ int Stride = getSCEVStride(StoreEv);
+ if (Stride == 0)
+ return false;
+ unsigned StoreSize = getStoreSizeInBytes(SI);
+ if (StoreSize != unsigned(std::abs(Stride)))
+ return false;
+
+ // The store must be feeding a non-volatile load.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ if (!LI || !LI->isSimple())
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ Value *LoadPtr = LI->getPointerOperand();
+ auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+
+ // The store and load must share the same stride.
+ if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+ return false;
+
+ // Success. This store can be converted into a memcpy.
+ return true;
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access. The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool
+mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+ const SCEV *BECount, unsigned StoreSize,
+ AliasAnalysis &AA,
+ SmallPtrSetImpl<Instruction *> &Ignored) {
+ // Get the location that may be stored across the loop. Since the access
+ // is strided positively through memory, we say that the modified location
+ // starts at the pointer and has infinite size.
+ uint64_t AccessSize = MemoryLocation::UnknownSize;
+
+ // If the loop iterates a fixed number of times, we can refine the access
+ // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
+ if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
+
+ // TODO: For this to be really effective, we have to dive into the pointer
+ // operand in the store. Store to &A[i] of 100 will always return may alias
+ // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+ // which will then no-alias a store to &A[100].
+ MemoryLocation StoreLoc(Ptr, AccessSize);
+
+ for (auto *B : L->blocks())
+ for (auto &I : *B)
+ if (Ignored.count(&I) == 0 && (AA.getModRefInfo(&I, StoreLoc) & Access))
+ return true;
+
+ return false;
+}
+
+
+void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
+ SmallVectorImpl<StoreInst*> &Stores) {
+ Stores.clear();
+ for (Instruction &I : *BB)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+ if (isLegalStore(CurLoop, SI))
+ Stores.push_back(SI);
+}
+
+
+bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
+ StoreInst *SI, const SCEV *BECount) {
+ assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&
+ "Expected only non-volatile stores, or Hexagon-specific memcpy"
+ "to volatile destination.");
+
+ Value *StorePtr = SI->getPointerOperand();
+ auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+ unsigned Stride = getSCEVStride(StoreEv);
+ unsigned StoreSize = getStoreSizeInBytes(SI);
+ if (Stride != StoreSize)
+ return false;
+
+ // See if the pointer expression is an AddRec like {base,+,1} on the current
+ // loop, which indicates a strided load. If we have something else, it's a
+ // random load we can't handle.
+ LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+ auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+
+ // The trip count of the loop and the base pointer of the addrec SCEV is
+ // guaranteed to be loop invariant, which means that it should dominate the
+ // header. This allows us to insert code for it in the preheader.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ Instruction *ExpPt = Preheader->getTerminator();
+ IRBuilder<> Builder(ExpPt);
+ SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
+
+ Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
+
+ // Okay, we have a strided store "p[i]" of a loaded value. We can turn
+ // this into a memcpy/memmove in the loop preheader now if we want. However,
+ // this would be unsafe to do if there is anything else in the loop that may
+ // read or write the memory region we're storing to. For memcpy, this
+ // includes the load that feeds the stores. Check for an alias by generating
+ // the base address and checking everything.
+ Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
+ Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
+ Value *LoadBasePtr = nullptr;
+
+ bool Overlap = false;
+ bool DestVolatile = SI->isVolatile();
+ Type *BECountTy = BECount->getType();
+
+ if (DestVolatile) {
+ // The trip count must fit in i32, since it is the type of the "num_words"
+ // argument to hexagon_memcpy_forward_vp4cp4n2.
+ if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
+CleanupAndExit:
+ // If we generated new code for the base pointer, clean up.
+ Expander.clear();
+ if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
+ RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
+ StoreBasePtr = nullptr;
+ }
+ if (LoadBasePtr) {
+ RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
+ LoadBasePtr = nullptr;
+ }
+ return false;
+ }
+ }
+
+ SmallPtrSet<Instruction*, 2> Ignore1;
+ Ignore1.insert(SI);
+ if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ StoreSize, *AA, Ignore1)) {
+ // Check if the load is the offending instruction.
+ Ignore1.insert(LI);
+ if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+ StoreSize, *AA, Ignore1)) {
+ // Still bad. Nothing we can do.
+ goto CleanupAndExit;
+ }
+ // It worked with the load ignored.
+ Overlap = true;
+ }
+
+ if (!Overlap) {
+ if (DisableMemcpyIdiom || !HasMemcpy)
+ goto CleanupAndExit;
+ } else {
+ // Don't generate memmove if this function will be inlined. This is
+ // because the caller will undergo this transformation after inlining.
+ Function *Func = CurLoop->getHeader()->getParent();
+ if (Func->hasFnAttribute(Attribute::AlwaysInline))
+ goto CleanupAndExit;
+
+ // In case of a memmove, the call to memmove will be executed instead
+ // of the loop, so we need to make sure that there is nothing else in
+ // the loop than the load, store and instructions that these two depend
+ // on.
+ SmallVector<Instruction*,2> Insts;
+ Insts.push_back(SI);
+ Insts.push_back(LI);
+ if (!coverLoop(CurLoop, Insts))
+ goto CleanupAndExit;
+
+ if (DisableMemmoveIdiom || !HasMemmove)
+ goto CleanupAndExit;
+ bool IsNested = CurLoop->getParentLoop() != 0;
+ if (IsNested && OnlyNonNestedMemmove)
+ goto CleanupAndExit;
+ }
+
+ // For a memcpy, we have to make sure that the input array is not being
+ // mutated by the loop.
+ LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
+ Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
+
+ SmallPtrSet<Instruction*, 2> Ignore2;
+ Ignore2.insert(SI);
+ if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
+ *AA, Ignore2))
+ goto CleanupAndExit;
+
+ // Check the stride.
+ bool StridePos = getSCEVStride(LoadEv) >= 0;
+
+ // Currently, the volatile memcpy only emulates traversing memory forward.
+ if (!StridePos && DestVolatile)
+ goto CleanupAndExit;
+
+ bool RuntimeCheck = (Overlap || DestVolatile);
+
+ BasicBlock *ExitB;
+ if (RuntimeCheck) {
+ // The runtime check needs a single exit block.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1)
+ goto CleanupAndExit;
+ ExitB = ExitBlocks[0];
+ }
+
+ // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+ // pointer size if it isn't already.
+ LLVMContext &Ctx = SI->getContext();
+ BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
+ unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment());
+ DebugLoc DLoc = SI->getDebugLoc();
+
+ const SCEV *NumBytesS =
+ SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
+ if (StoreSize != 1)
+ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
+ SCEV::FlagNUW);
+ Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
+ if (Instruction *In = dyn_cast<Instruction>(NumBytes))
+ if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
+ NumBytes = Simp;
+
+ CallInst *NewCall;
+
+ if (RuntimeCheck) {
+ unsigned Threshold = RuntimeMemSizeThreshold;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
+ uint64_t C = CI->getZExtValue();
+ if (Threshold != 0 && C < Threshold)
+ goto CleanupAndExit;
+ if (C < CompileTimeMemSizeThreshold)
+ goto CleanupAndExit;
+ }
+
+ BasicBlock *Header = CurLoop->getHeader();
+ Function *Func = Header->getParent();
+ Loop *ParentL = LF->getLoopFor(Preheader);
+ StringRef HeaderName = Header->getName();
+
+ // Create a new (empty) preheader, and update the PHI nodes in the
+ // header to use the new preheader.
+ BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
+ Func, Header);
+ if (ParentL)
+ ParentL->addBasicBlockToLoop(NewPreheader, *LF);
+ IRBuilder<>(NewPreheader).CreateBr(Header);
+ for (auto &In : *Header) {
+ PHINode *PN = dyn_cast<PHINode>(&In);
+ if (!PN)
+ break;
+ int bx = PN->getBasicBlockIndex(Preheader);
+ if (bx >= 0)
+ PN->setIncomingBlock(bx, NewPreheader);
+ }
+ DT->addNewBlock(NewPreheader, Preheader);
+ DT->changeImmediateDominator(Header, NewPreheader);
+
+ // Check for safe conditions to execute memmove.
+ // If stride is positive, copying things from higher to lower addresses
+ // is equivalent to memmove. For negative stride, it's the other way
+ // around. Copying forward in memory with positive stride may not be
+ // same as memmove since we may be copying values that we just stored
+ // in some previous iteration.
+ Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
+ Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
+ Value *LowA = StridePos ? SA : LA;
+ Value *HighA = StridePos ? LA : SA;
+ Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
+ Value *Cond = CmpA;
+
+ // Check for distance between pointers.
+ Value *Dist = Builder.CreateSub(HighA, LowA);
+ Value *CmpD = Builder.CreateICmpSLT(NumBytes, Dist);
+ Value *CmpEither = Builder.CreateOr(Cond, CmpD);
+ Cond = CmpEither;
+
+ if (Threshold != 0) {
+ Type *Ty = NumBytes->getType();
+ Value *Thr = ConstantInt::get(Ty, Threshold);
+ Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
+ Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
+ Cond = CmpBoth;
+ }
+ BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
+ Func, NewPreheader);
+ if (ParentL)
+ ParentL->addBasicBlockToLoop(MemmoveB, *LF);
+ Instruction *OldT = Preheader->getTerminator();
+ Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
+ OldT->eraseFromParent();
+ Preheader->setName(Preheader->getName()+".old");
+ DT->addNewBlock(MemmoveB, Preheader);
+ // Find the new immediate dominator of the exit block.
+ BasicBlock *ExitD = Preheader;
+ for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
+ BasicBlock *PB = *PI;
+ ExitD = DT->findNearestCommonDominator(ExitD, PB);
+ if (!ExitD)
+ break;
+ }
+ // If the prior immediate dominator of ExitB was dominated by the
+ // old preheader, then the old preheader becomes the new immediate
+ // dominator. Otherwise don't change anything (because the newly
+ // added blocks are dominated by the old preheader).
+ if (ExitD && DT->dominates(Preheader, ExitD)) {
+ DomTreeNode *BN = DT->getNode(ExitB);
+ DomTreeNode *DN = DT->getNode(ExitD);
+ BN->setIDom(DN);
+ }
+
+ // Add a call to memmove to the conditional block.
+ IRBuilder<> CondBuilder(MemmoveB);
+ CondBuilder.CreateBr(ExitB);
+ CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
+
+ if (DestVolatile) {
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+ Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ Module *M = Func->getParent();
+ Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
+ Int32PtrTy, Int32PtrTy, Int32Ty);
+ Function *Fn = cast<Function>(CF);
+ Fn->setLinkage(Function::ExternalLinkage);
+
+ const SCEV *OneS = SE->getConstant(Int32Ty, 1);
+ const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
+ const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
+ Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
+ MemmoveB->getTerminator());
+ if (Instruction *In = dyn_cast<Instruction>(NumWords))
+ if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
+ NumWords = Simp;
+
+ Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
+ ? StoreBasePtr
+ : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
+ Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
+ ? LoadBasePtr
+ : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
+ NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
+ } else {
+ NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr,
+ NumBytes, Alignment);
+ }
+ } else {
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr,
+ NumBytes, Alignment);
+ // Okay, the memcpy has been formed. Zap the original store and
+ // anything that feeds into it.
+ RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
+ }
+
+ NewCall->setDebugLoc(DLoc);
+
+ DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")
+ << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+
+ return true;
+}
+
+
+// \brief Check if the instructions in Insts, together with their dependencies
+// cover the loop in the sense that the loop could be safely eliminated once
+// the instructions in Insts are removed.
+bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
+ SmallVectorImpl<Instruction*> &Insts) const {
+ SmallSet<BasicBlock*,8> LoopBlocks;
+ for (auto *B : L->blocks())
+ LoopBlocks.insert(B);
+
+ SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
+
+ // Collect all instructions from the loop that the instructions in Insts
+ // depend on (plus their dependencies, etc.). These instructions will
+ // constitute the expression trees that feed those in Insts, but the trees
+ // will be limited only to instructions contained in the loop.
+ for (unsigned i = 0; i < Worklist.size(); ++i) {
+ Instruction *In = Worklist[i];
+ for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
+ Instruction *OpI = dyn_cast<Instruction>(I);
+ if (!OpI)
+ continue;
+ BasicBlock *PB = OpI->getParent();
+ if (!LoopBlocks.count(PB))
+ continue;
+ Worklist.insert(OpI);
+ }
+ }
+
+ // Scan all instructions in the loop, if any of them have a user outside
+ // of the loop, or outside of the expressions collected above, then either
+ // the loop has a side-effect visible outside of it, or there are
+ // instructions in it that are not involved in the original set Insts.
+ for (auto *B : L->blocks()) {
+ for (auto &In : *B) {
+ if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
+ continue;
+ if (!Worklist.count(&In) && In.mayHaveSideEffects())
+ return false;
+ for (const auto &K : In.users()) {
+ Instruction *UseI = dyn_cast<Instruction>(K);
+ if (!UseI)
+ continue;
+ BasicBlock *UseB = UseI->getParent();
+ if (LF->getLoopFor(UseB) != L)
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count. This block is known to be in the current
+/// loop and not in any subloops.
+bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
+ const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+ // We can only promote stores in this block if they are unconditionally
+ // executed in the loop. For a block to be unconditionally executed, it has
+ // to dominate all the exit blocks of the loop. Verify this now.
+ auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
+ return DT->dominates(BB, EB);
+ };
+ if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
+ return false;
+
+ bool MadeChange = false;
+ // Look for store instructions, which may be optimized to memset/memcpy.
+ SmallVector<StoreInst*,8> Stores;
+ collectStores(CurLoop, BB, Stores);
+
+ // Optimize the store into a memcpy, if it feeds an similarly strided load.
+ for (auto &SI : Stores)
+ MadeChange |= processCopyingStore(CurLoop, SI, BECount);
+
+ return MadeChange;
+}
+
+
+bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
+ PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
+ if (PMR.recognize())
+ return true;
+
+ if (!HasMemcpy && !HasMemmove)
+ return false;
+
+ const SCEV *BECount = SE->getBackedgeTakenCount(L);
+ assert(!isa<SCEVCouldNotCompute>(BECount) &&
+ "runOnCountableLoop() called on a loop without a predictable"
+ "backedge-taken count");
+
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getUniqueExitBlocks(ExitBlocks);
+
+ bool Changed = false;
+
+ // Scan all the blocks in the loop that are not in subloops.
+ for (auto *BB : L->getBlocks()) {
+ // Ignore blocks in subloops.
+ if (LF->getLoopFor(BB) != L)
+ continue;
+ Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
+ }
+
+ return Changed;
+}
+
+
+bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+ const Module &M = *L->getHeader()->getParent()->getParent();
+ if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
+ return false;
+
+ if (skipLoop(L))
+ return false;
+
+ // If the loop could not be converted to canonical form, it must have an
+ // indirectbr in it, just give up.
+ if (!L->getLoopPreheader())
+ return false;
+
+ // Disable loop idiom recognition if the function's name is a common idiom.
+ StringRef Name = L->getHeader()->getParent()->getName();
+ if (Name == "memset" || Name == "memcpy" || Name == "memmove")
+ return false;
+
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DL = &L->getHeader()->getModule()->getDataLayout();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
+ HasMemcpy = TLI->has(LibFunc_memcpy);
+ HasMemmove = TLI->has(LibFunc_memmove);
+
+ if (SE->hasLoopInvariantBackedgeTakenCount(L))
+ return runOnCountableLoop(L);
+ return false;
+}
+
+
+Pass *llvm::createHexagonLoopIdiomPass() {
+ return new HexagonLoopIdiomRecognize();
+}
+
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index a5dc002642c8..7189b5a52c42 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -109,11 +109,14 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_RegisterMask:
+ continue;
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
- if (MO.isImplicit()) continue;
+ if (MO.isImplicit())
+ continue;
MCO = MCOperand::createReg(MO.getReg());
break;
case MachineOperand::MO_FPImmediate: {
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 9ff9d93ea0c3..20dc9b0da1db 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -74,7 +74,9 @@ bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
return false;
// TypeXTYPE are 64 bit operations.
- if (HII.getType(*Inst2.getInstr()) == HexagonII::TypeXTYPE)
+ unsigned Type = HII.getType(*Inst2.getInstr());
+ if (Type == HexagonII::TypeS_2op || Type == HexagonII::TypeS_3op ||
+ Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM)
return true;
return false;
}
diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
new file mode 100644
index 000000000000..0b4ac14c7a47
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV62.gen.td
@@ -0,0 +1,204 @@
+//===--- HexagonMapAsm2IntrinV62.gen.td -----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+multiclass T_VR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2),
+ (MI VectorRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVL_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegsLow8:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, IntRegsLow8:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegsLow8:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegsLow8:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VV_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2),
+ (MI VectorRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WW_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVV_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2),
+ (MI VecDblRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+ (MI VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2, IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VecDblRegs128B:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_ZR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2),
+ (MI VecPredRegs:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, IntRegs:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, IntRegs:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VZR_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3),
+ (MI VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VecPredRegs128B:$src2, IntRegs:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VecPredRegs128B:$src2, IntRegs:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_ZV_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2),
+ (MI VecPredRegs:$src1, VectorRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, VectorRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, VectorRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_R_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID IntRegs:$src1),
+ (MI IntRegs:$src1)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1),
+ (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_ZZ_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2),
+ (MI VecPredRegs:$src1, VecPredRegs:$src2)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1, VecPredRegs128B:$src2),
+ (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1, VecPredRegs128B:$src2)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVI_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3),
+ (MI VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, imm:$src3),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, imm:$src3)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVVI_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4),
+ (MI VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVVI_HVX_gen_pat <InstHexagon MI, Intrinsic IntID> {
+ def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4),
+ (MI VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3, imm:$src4)>,
+ Requires<[UseHVXSgl]>;
+ def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4),
+ (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3, imm:$src4)>,
+ Requires<[UseHVXDbl]>;
+}
+
+def : T_R_pat <S6_vsplatrbp, int_hexagon_S6_vsplatrbp>;
+def : T_PP_pat <M6_vabsdiffb, int_hexagon_M6_vabsdiffb>;
+def : T_PP_pat <M6_vabsdiffub, int_hexagon_M6_vabsdiffub>;
+def : T_PP_pat <S6_vtrunehb_ppp, int_hexagon_S6_vtrunehb_ppp>;
+def : T_PP_pat <S6_vtrunohb_ppp, int_hexagon_S6_vtrunohb_ppp>;
+
+defm : T_VR_HVX_gen_pat <V6_vlsrb, int_hexagon_V6_vlsrb>;
+defm : T_VR_HVX_gen_pat <V6_vmpyiwub, int_hexagon_V6_vmpyiwub>;
+defm : T_VVL_HVX_gen_pat <V6_vasrwuhrndsat, int_hexagon_V6_vasrwuhrndsat>;
+defm : T_VVL_HVX_gen_pat <V6_vasruwuhrndsat, int_hexagon_V6_vasruwuhrndsat>;
+defm : T_VVL_HVX_gen_pat <V6_vasrhbsat, int_hexagon_V6_vasrhbsat>;
+defm : T_VVL_HVX_gen_pat <V6_vlutvvb_nm, int_hexagon_V6_vlutvvb_nm>;
+defm : T_VVL_HVX_gen_pat <V6_vlutvwh_nm, int_hexagon_V6_vlutvwh_nm>;
+defm : T_VV_HVX_gen_pat <V6_vrounduwuh, int_hexagon_V6_vrounduwuh>;
+defm : T_VV_HVX_gen_pat <V6_vrounduhub, int_hexagon_V6_vrounduhub>;
+defm : T_VV_HVX_gen_pat <V6_vadduwsat, int_hexagon_V6_vadduwsat>;
+defm : T_VV_HVX_gen_pat <V6_vsubuwsat, int_hexagon_V6_vsubuwsat>;
+defm : T_VV_HVX_gen_pat <V6_vaddbsat, int_hexagon_V6_vaddbsat>;
+defm : T_VV_HVX_gen_pat <V6_vsubbsat, int_hexagon_V6_vsubbsat>;
+defm : T_VV_HVX_gen_pat <V6_vaddububb_sat, int_hexagon_V6_vaddububb_sat>;
+defm : T_VV_HVX_gen_pat <V6_vsubububb_sat, int_hexagon_V6_vsubububb_sat>;
+defm : T_VV_HVX_gen_pat <V6_vmpyewuh_64, int_hexagon_V6_vmpyewuh_64>;
+defm : T_VV_HVX_gen_pat <V6_vmaxb, int_hexagon_V6_vmaxb>;
+defm : T_VV_HVX_gen_pat <V6_vminb, int_hexagon_V6_vminb>;
+defm : T_VV_HVX_gen_pat <V6_vsatuwuh, int_hexagon_V6_vsatuwuh>;
+defm : T_VV_HVX_gen_pat <V6_vaddclbw, int_hexagon_V6_vaddclbw>;
+defm : T_VV_HVX_gen_pat <V6_vaddclbh, int_hexagon_V6_vaddclbh>;
+defm : T_WW_HVX_gen_pat <V6_vadduwsat_dv, int_hexagon_V6_vadduwsat_dv>;
+defm : T_WW_HVX_gen_pat <V6_vsubuwsat_dv, int_hexagon_V6_vsubuwsat_dv>;
+defm : T_WW_HVX_gen_pat <V6_vaddbsat_dv, int_hexagon_V6_vaddbsat_dv>;
+defm : T_WW_HVX_gen_pat <V6_vsubbsat_dv, int_hexagon_V6_vsubbsat_dv>;
+defm : T_WVV_HVX_gen_pat <V6_vaddhw_acc, int_hexagon_V6_vaddhw_acc>;
+defm : T_WVV_HVX_gen_pat <V6_vadduhw_acc, int_hexagon_V6_vadduhw_acc>;
+defm : T_WVV_HVX_gen_pat <V6_vaddubh_acc, int_hexagon_V6_vaddubh_acc>;
+defm : T_WVV_HVX_gen_pat <V6_vmpyowh_64_acc, int_hexagon_V6_vmpyowh_64_acc>;
+defm : T_WR_HVX_gen_pat <V6_vmpauhb, int_hexagon_V6_vmpauhb>;
+defm : T_WWR_HVX_gen_pat <V6_vmpauhb_acc, int_hexagon_V6_vmpauhb_acc>;
+defm : T_VVR_HVX_gen_pat <V6_vmpyiwub_acc, int_hexagon_V6_vmpyiwub_acc>;
+defm : T_ZR_HVX_gen_pat <V6_vandnqrt, int_hexagon_V6_vandnqrt>;
+defm : T_VZR_HVX_gen_pat <V6_vandnqrt_acc, int_hexagon_V6_vandnqrt_acc>;
+defm : T_ZV_HVX_gen_pat <V6_vandvqv, int_hexagon_V6_vandvqv>;
+defm : T_ZV_HVX_gen_pat <V6_vandvnqv, int_hexagon_V6_vandvnqv>;
+defm : T_R_HVX_gen_pat <V6_pred_scalar2v2, int_hexagon_V6_pred_scalar2v2>;
+defm : T_R_HVX_gen_pat <V6_lvsplath, int_hexagon_V6_lvsplath>;
+defm : T_R_HVX_gen_pat <V6_lvsplatb, int_hexagon_V6_lvsplatb>;
+defm : T_ZZ_HVX_gen_pat <V6_shuffeqw, int_hexagon_V6_shuffeqw>;
+defm : T_ZZ_HVX_gen_pat <V6_shuffeqh, int_hexagon_V6_shuffeqh>;
+defm : T_VVI_HVX_gen_pat <V6_vlutvvbi, int_hexagon_V6_vlutvvbi>;
+defm : T_VVI_HVX_gen_pat <V6_vlutvwhi, int_hexagon_V6_vlutvwhi>;
+defm : T_VVVI_HVX_gen_pat <V6_vlutvvb_oracci, int_hexagon_V6_vlutvvb_oracci>;
+defm : T_WVVI_HVX_gen_pat <V6_vlutvwh_oracci, int_hexagon_V6_vlutvwh_oracci>;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 72d8011277e6..d73fc7c73185 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -130,6 +130,8 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
if (II->getOpcode() == TargetOpcode::KILL)
return false;
+ if (II->isImplicitDef())
+ return false;
// Make sure there there is no 'def' or 'use' of any of the uses of
// feeder insn between it's definition, this MI and jump, jmpInst
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 983310571563..f87a1b8e424d 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -1,298 +1,33 @@
-//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
+//===--- HexagonOperands.td -----------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
-// This file is distributed under the University of Illnois Open Source
+// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; }
-def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; }
-def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; }
-def s8_0Imm64Operand : AsmOperandClass { let Name = "s8_0Imm64"; }
-def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; }
-def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
-def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
-def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
-def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
-def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
-def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
-def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; }
-def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; }
-def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
-def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
-def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
-def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
-def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
-def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
-def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; }
-def u9_0ImmOperand : AsmOperandClass { let Name = "u9_0Imm"; }
-def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; }
-def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; }
-def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
-def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
-def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
-def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
-def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; }
-def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; }
-def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; }
-def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; }
-def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; }
-def n8_0ImmOperand : AsmOperandClass { let Name = "n8_0Imm"; }
-// Immediate operands.
-
-let OperandType = "OPERAND_IMMEDIATE",
- DecoderMethod = "unsignedImmDecoder" in {
- def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand;
- let DecoderMethod = "s32_0ImmDecoder"; }
- def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; }
- def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand;
- let DecoderMethod = "s8_0ImmDecoder"; }
- def s8_0Imm64 : Operand<i64> { let ParserMatchClass = s8_0Imm64Operand;
- let DecoderMethod = "s8_0ImmDecoder"; }
- def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand;
- let DecoderMethod = "s6_0ImmDecoder"; }
- def s6_3Imm : Operand<i32>;
- def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
- let DecoderMethod = "s4_0ImmDecoder"; }
- def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
- let DecoderMethod = "s4_1ImmDecoder"; }
- def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand;
- let DecoderMethod = "s4_2ImmDecoder"; }
- def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
- let DecoderMethod = "s4_3ImmDecoder"; }
- def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; }
- def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; }
- def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
- def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
- def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
- def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
- def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
- def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
- def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; }
- def u9_0Imm : Operand<i32> { let ParserMatchClass = u9_0ImmOperand; }
- def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; }
- def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; }
- def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
- def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
- def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
- def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
- def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; }
- def u5_1Imm : Operand<i32>;
- def u5_2Imm : Operand<i32>;
- def u5_3Imm : Operand<i32>;
- def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; }
- def u4_1Imm : Operand<i32>;
- def u4_2Imm : Operand<i32>;
- def u4_3Imm : Operand<i32>;
- def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; }
- def u3_1Imm : Operand<i32>;
- def u3_2Imm : Operand<i32>;
- def u3_3Imm : Operand<i32>;
- def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; }
- def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; }
- def n8_0Imm : Operand<i32> { let ParserMatchClass = n8_0ImmOperand; }
-}
-
-let OperandType = "OPERAND_IMMEDIATE" in {
- def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand;
- let PrintMethod = "prints4_6ImmOperand";
- let DecoderMethod = "s4_6ImmDecoder";}
- def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand";
- let DecoderMethod = "s4_6ImmDecoder";}
- def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand;
- let PrintMethod = "prints3_6ImmOperand";
- let DecoderMethod = "s3_6ImmDecoder";}
- def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
- let DecoderMethod = "s3_6ImmDecoder";}
-}
-def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; }
-def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; }
-
-//
-// Immediate predicates
-//
-def s32_0ImmPred : PatLeaf<(i32 imm), [{
+def f32ImmOperand : AsmOperandClass { let Name = "f32Imm"; }
+def f32Imm : Operand<f32> { let ParserMatchClass = f32ImmOperand; }
+def f64ImmOperand : AsmOperandClass { let Name = "f64Imm"; }
+def f64Imm : Operand<f64> { let ParserMatchClass = f64ImmOperand; }
+def s8_0Imm64Pred : PatLeaf<(i64 imm), [{ return isInt<8>(N->getSExtValue()); }]>;
+def s9_0ImmOperand : AsmOperandClass { let Name = "s9_0Imm"; }
+def s9_0Imm : Operand<i32> { let ParserMatchClass = s9_0ImmOperand; }
+def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; let RenderMethod = "addSignedImmOperands"; }
+def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; }
+def r32_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isInt<32>(v);
}]>;
-
-def s31_1ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<31,1>(v);
-}]>;
-
-def s30_2ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<30,2>(v);
-}]>;
-
-def s29_3ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<29,3>(v);
-}]>;
-
-def s10_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<10>(v);
-}]>;
-
-def s8_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<8>(v);
-}]>;
-
-def s8_0Imm64Pred : PatLeaf<(i64 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<8>(v);
-}]>;
-
-def s6_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<6>(v);
-}]>;
-
-def s4_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isInt<4>(v);
-}]>;
-
-def s4_1ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,1>(v);
-}]>;
-
-def s4_2ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,2>(v);
-}]>;
-
-def s4_3ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<4,3>(v);
-}]>;
-
-def u32_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<32>(v);
-}]>;
-
-def u16_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<16>(v);
-}]>;
-
-def u11_3ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<11,3>(v);
-}]>;
-
def u9_0ImmPred : PatLeaf<(i32 imm), [{
int64_t v = (int64_t)N->getSExtValue();
return isUInt<9>(v);
}]>;
-
-def u8_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<8>(v);
-}]>;
-
-def u6_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<6>(v);
-}]>;
-
-def u6_1ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,1>(v);
-}]>;
-
-def u6_2ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedUInt<6,2>(v);
-}]>;
-
-def u5_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<5>(v);
-}]>;
-
-def u4_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<4>(v);
-}]>;
-
-def u3_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<3>(v);
-}]>;
-
-def u2_0ImmPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isUInt<2>(v);
-}]>;
-
-// Extendable immediate operands.
-def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
-def s16_0ExtOperand : AsmOperandClass { let Name = "s16_0Ext"; }
-def s12_0ExtOperand : AsmOperandClass { let Name = "s12_0Ext"; }
-def s10_0ExtOperand : AsmOperandClass { let Name = "s10_0Ext"; }
-def s9_0ExtOperand : AsmOperandClass { let Name = "s9_0Ext"; }
-def s8_0ExtOperand : AsmOperandClass { let Name = "s8_0Ext"; }
-def s7_0ExtOperand : AsmOperandClass { let Name = "s7_0Ext"; }
-def s6_0ExtOperand : AsmOperandClass { let Name = "s6_0Ext"; }
-def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
-def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
-def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
-def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
-def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
-def u7_0ExtOperand : AsmOperandClass { let Name = "u7_0Ext"; }
-def u8_0ExtOperand : AsmOperandClass { let Name = "u8_0Ext"; }
-def u9_0ExtOperand : AsmOperandClass { let Name = "u9_0Ext"; }
-def u10_0ExtOperand : AsmOperandClass { let Name = "u10_0Ext"; }
-def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
-def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
-def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
-def u32_0MustExtOperand : AsmOperandClass { let Name = "u32_0MustExt"; }
-
-
-
-let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
- DecoderMethod = "unsignedImmDecoder" in {
- def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
- def s16_0Ext : Operand<i32> { let ParserMatchClass = s16_0ExtOperand;
- let DecoderMethod = "s16_0ImmDecoder"; }
- def s12_0Ext : Operand<i32> { let ParserMatchClass = s12_0ExtOperand;
- let DecoderMethod = "s12_0ImmDecoder"; }
- def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
- let DecoderMethod = "s11_0ImmDecoder"; }
- def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
- let DecoderMethod = "s11_1ImmDecoder"; }
- def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
- let DecoderMethod = "s11_2ImmDecoder"; }
- def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
- let DecoderMethod = "s11_3ImmDecoder"; }
- def s10_0Ext : Operand<i32> { let ParserMatchClass = s10_0ExtOperand;
- let DecoderMethod = "s10_0ImmDecoder"; }
- def s9_0Ext : Operand<i32> { let ParserMatchClass = s9_0ExtOperand;
- let DecoderMethod = "s9_0ImmDecoder"; }
- def s8_0Ext : Operand<i32> { let ParserMatchClass = s8_0ExtOperand;
- let DecoderMethod = "s8_0ImmDecoder"; }
- def s7_0Ext : Operand<i32> { let ParserMatchClass = s7_0ExtOperand; }
- def s6_0Ext : Operand<i32> { let ParserMatchClass = s6_0ExtOperand;
- let DecoderMethod = "s6_0ImmDecoder"; }
- def u7_0Ext : Operand<i32> { let ParserMatchClass = u7_0ExtOperand; }
- def u8_0Ext : Operand<i32> { let ParserMatchClass = u8_0ExtOperand; }
- def u9_0Ext : Operand<i32> { let ParserMatchClass = u9_0ExtOperand; }
- def u10_0Ext : Operand<i32> { let ParserMatchClass = u10_0ExtOperand; }
- def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
- def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
- def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
- def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
- def u32_0MustExt : Operand<i32> { let ParserMatchClass = u32_0MustExtOperand; }
-}
-
+def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; let RenderMethod = "addImmOperands"; }
+def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; }
+def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; }
+def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; }
// This complex pattern exists only to create a machine instruction operand
// of type "frame index". There doesn't seem to be a way to do that directly
@@ -305,28 +40,6 @@ def AddrFI : ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>;
def AddrGA : ComplexPattern<i32, 1, "SelectAddrGA", [], []>;
def AddrGP : ComplexPattern<i32, 1, "SelectAddrGP", [], []>;
-// Address operands.
-
-let PrintMethod = "printGlobalOperand" in {
- def globaladdress : Operand<i32>;
- def globaladdressExt : Operand<i32>;
-}
-
-let PrintMethod = "printJumpTable" in
-def jumptablebase : Operand<i32>;
-
-def brtarget : Operand<OtherVT> {
- let DecoderMethod = "brtargetDecoder";
- let PrintMethod = "printBrtarget";
-}
-def brtargetExt : Operand<OtherVT> {
- let DecoderMethod = "brtargetDecoder";
- let PrintMethod = "printBrtarget";
-}
-def calltarget : Operand<i32> {
- let DecoderMethod = "brtargetDecoder";
- let PrintMethod = "printBrtarget";
-}
def bblabel : Operand<i32>;
def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 89db46799cb3..b243de317dc5 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -208,7 +208,16 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA,
NodeAddr<UseNode *> UN = *I;
RegisterRef UR = UN.Addr->getRegRef(*DFG);
NodeSet Visited, Defs;
- const auto &ReachingDefs = LV->getAllReachingDefsRec(UR, UN, Visited, Defs);
+ const auto &P = LV->getAllReachingDefsRec(UR, UN, Visited, Defs);
+ if (!P.second) {
+ DEBUG({
+ dbgs() << "*** Unable to collect all reaching defs for use ***\n"
+ << PrintNode<UseNode*>(UN, *DFG) << '\n'
+ << "The program's complexity may exceed the limits.\n";
+ });
+ return false;
+ }
+ const auto &ReachingDefs = P.first;
if (ReachingDefs.size() > 1) {
DEBUG({
dbgs() << "*** Multiple Reaching Defs found!!! ***\n";
@@ -230,7 +239,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG->IsDef, *DFG)) {
DEBUG(dbgs() << "\t\t[DefNode]: " << Print<NodeAddr<DefNode *>>(DA, *DFG)
<< "\n");
- RegisterRef DR = DFG->normalizeRef(DA.Addr->getRegRef(*DFG));
+ RegisterRef DR = DFG->getPRI().normalize(DA.Addr->getRegRef(*DFG));
auto UseSet = LV->getAllReachedUses(DR, DA);
@@ -250,7 +259,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
<< Print<Liveness::RefMap>(phiUse, *DFG) << "\n");
if (!phiUse.empty()) {
for (auto I : phiUse) {
- if (DR.Reg != I.first)
+ if (!DFG->getPRI().alias(RegisterRef(I.first), DR))
continue;
auto phiUseSet = I.second;
for (auto phiUI : phiUseSet) {
@@ -333,17 +342,17 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->getBaseWithLongOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(0));
- MIB.addOperand(OldMI->getOperand(2));
- MIB.addOperand(OldMI->getOperand(3));
- MIB.addOperand(ImmOp);
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(OldMI->getOperand(2));
+ MIB.add(OldMI->getOperand(3));
+ MIB.add(ImmOp);
OpStart = 4;
Changed = true;
} else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
short NewOpCode = HII->getAbsoluteForm(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode))
- .addOperand(OldMI->getOperand(0));
+ .add(OldMI->getOperand(0));
const GlobalValue *GV = ImmOp.getGlobal();
int64_t Offset = ImmOp.getOffset() + OldMI->getOperand(2).getImm();
@@ -359,9 +368,9 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->xformRegToImmOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(0));
- MIB.addOperand(OldMI->getOperand(1));
- MIB.addOperand(ImmOp);
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(OldMI->getOperand(1));
+ MIB.add(ImmOp);
OpStart = 4;
Changed = true;
DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
@@ -370,7 +379,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
if (Changed)
for (unsigned i = OpStart; i < OpEnd; ++i)
- MIB.addOperand(OldMI->getOperand(i));
+ MIB.add(OldMI->getOperand(i));
return Changed;
}
@@ -390,10 +399,10 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->getBaseWithLongOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(1));
- MIB.addOperand(OldMI->getOperand(2));
- MIB.addOperand(ImmOp);
- MIB.addOperand(OldMI->getOperand(3));
+ MIB.add(OldMI->getOperand(1));
+ MIB.add(OldMI->getOperand(2));
+ MIB.add(ImmOp);
+ MIB.add(OldMI->getOperand(3));
OpStart = 4;
} else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
short NewOpCode = HII->getAbsoluteForm(*OldMI);
@@ -402,7 +411,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
const GlobalValue *GV = ImmOp.getGlobal();
int64_t Offset = ImmOp.getOffset() + OldMI->getOperand(1).getImm();
MIB.addGlobalAddress(GV, Offset, ImmOp.getTargetFlags());
- MIB.addOperand(OldMI->getOperand(2));
+ MIB.add(OldMI->getOperand(2));
OpStart = 3;
}
Changed = true;
@@ -412,9 +421,9 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
short NewOpCode = HII->xformRegToImmOffset(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
- MIB.addOperand(OldMI->getOperand(0));
- MIB.addOperand(ImmOp);
- MIB.addOperand(OldMI->getOperand(1));
+ MIB.add(OldMI->getOperand(0));
+ MIB.add(ImmOp);
+ MIB.add(OldMI->getOperand(1));
OpStart = 2;
Changed = true;
DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
@@ -422,7 +431,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
}
if (Changed)
for (unsigned i = OpStart; i < OpEnd; ++i)
- MIB.addOperand(OldMI->getOperand(i));
+ MIB.add(OldMI->getOperand(i));
return Changed;
}
@@ -473,26 +482,26 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
BuildMI(*BB, InsertPt, UseMI->getDebugLoc(), HII->get(NewOpCode));
// change mem(Rs + # ) -> mem(Rt << # + ##)
if (UseMID.mayLoad()) {
- MIB.addOperand(UseMI->getOperand(0));
- MIB.addOperand(AddAslMI->getOperand(2));
- MIB.addOperand(AddAslMI->getOperand(3));
+ MIB.add(UseMI->getOperand(0));
+ MIB.add(AddAslMI->getOperand(2));
+ MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
MIB.addGlobalAddress(GV, UseMI->getOperand(2).getImm(),
ImmOp.getTargetFlags());
OpStart = 3;
} else if (UseMID.mayStore()) {
- MIB.addOperand(AddAslMI->getOperand(2));
- MIB.addOperand(AddAslMI->getOperand(3));
+ MIB.add(AddAslMI->getOperand(2));
+ MIB.add(AddAslMI->getOperand(3));
const GlobalValue *GV = ImmOp.getGlobal();
MIB.addGlobalAddress(GV, UseMI->getOperand(1).getImm(),
ImmOp.getTargetFlags());
- MIB.addOperand(UseMI->getOperand(2));
+ MIB.add(UseMI->getOperand(2));
OpStart = 3;
} else
llvm_unreachable("Unhandled instruction");
for (unsigned i = OpStart; i < OpEnd; ++i)
- MIB.addOperand(UseMI->getOperand(i));
+ MIB.add(UseMI->getOperand(i));
Deleted.insert(UseMI);
}
@@ -617,7 +626,7 @@ bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) {
for (NodeAddr<InstrNode *> IA : BA.Addr->members(*DFG)) {
updateMap(IA);
- DFG->pushDefs(IA, DefM);
+ DFG->pushAllDefs(IA, DefM);
}
MachineDomTreeNode *N = MDT->getNode(B);
@@ -629,6 +638,9 @@ bool HexagonOptAddrMode::constructDefMap(MachineBasicBlock *B) {
}
bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(*MF.getFunction()))
+ return false;
+
bool Changed = false;
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &MRI = MF.getRegInfo();
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index ad81287007e6..b8c3bf0745ce 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -17,6 +17,16 @@ def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
def IsOrAdd: PatFrag<(ops node:$Addr, node:$off),
(or node:$Addr, node:$off), [{ return isOrEquivalentToAdd(N); }]>;
+def Iss4_6 : PatLeaf<(i32 imm), [{
+ int32_t V = N->getSExtValue();
+ return isShiftedInt<4,6>(V);
+}]>;
+
+def Iss4_7 : PatLeaf<(i32 imm), [{
+ int32_t V = N->getSExtValue();
+ return isShiftedInt<4,7>(V);
+}]>;
+
def IsPow2_32 : PatLeaf<(i32 imm), [{
uint32_t V = N->getZExtValue();
return isPowerOf2_32(V);
@@ -89,6 +99,11 @@ def LogN2_64 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32);
}]>;
+def ToZext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A4_combineir 0, (i32 $Rs)))>;
+def ToSext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A2_sxtw (i32 $Rs)))>;
+
class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
: Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)),
@@ -153,8 +168,12 @@ def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
def: Pat<(not I32:$src1),
(A2_subri -1, IntRegs:$src1)>;
+def TruncI64ToI32: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
-def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>;
+def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>;
def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs),
(C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
@@ -274,7 +293,7 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
-def: Pat<(br bb:$dst), (J2_jump brtarget:$dst)>;
+def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>;
def: Pat<(brcond I1:$src1, bb:$block), (J2_jumpt PredRegs:$src1, bb:$block)>;
def: Pat<(brind I32:$dst), (J2_jumpr IntRegs:$dst)>;
@@ -695,8 +714,8 @@ def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
// Map TLS addressses to A2_tfrsi.
-def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
-def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>;
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s32_0Imm:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s32_0Imm:$label)>;
def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
def: Pat<(i1 0), (PS_false)>;
@@ -898,26 +917,35 @@ def: Pat<(i1 (setule I64:$src1, I64:$src2)),
(C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
// Sign extends.
-// i1 -> i32
-def: Pat<(i32 (sext I1:$src1)),
- (C2_muxii PredRegs:$src1, -1, 0)>;
+// sext i1->i32
+def: Pat<(i32 (sext I1:$Pu)),
+ (C2_muxii I1:$Pu, -1, 0)>;
-// i1 -> i64
-def: Pat<(i64 (sext I1:$src1)),
- (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
+// sext i1->i64
+def: Pat<(i64 (sext I1:$Pu)),
+ (A2_combinew (C2_muxii PredRegs:$Pu, -1, 0),
+ (C2_muxii PredRegs:$Pu, -1, 0))>;
// Zero extends.
-// i1 -> i32
-def: Pat<(i32 (zext I1:$src1)),
- (C2_muxii PredRegs:$src1, 1, 0)>;
+// zext i1->i32
+def: Pat<(i32 (zext I1:$Pu)),
+ (C2_muxii PredRegs:$Pu, 1, 0)>;
+
+// zext i1->i64
+def: Pat<(i64 (zext I1:$Pu)),
+ (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(Zext64 I32:$Rs),
+ (ToZext64 IntRegs:$Rs)>;
// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def: Pat<(i32 (anyext I1:$src1)),
- (C2_muxii PredRegs:$src1, 1, 0)>;
+def: Pat<(i32 (anyext I1:$Pu)),
+ (C2_muxii PredRegs:$Pu, 1, 0)>;
-// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
-def: Pat<(i64 (anyext I1:$src1)),
- (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
+// Map from Rss = Pd to Rdd = combine(#0, (mux(Pd, #1, #0)))
+def: Pat<(i64 (anyext I1:$Pu)),
+ (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>;
// Clear the sign bit in a 64-bit register.
def ClearSign : OutPatFrag<(ops node:$Rss),
@@ -1244,11 +1272,6 @@ def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8),
}
-def ToZext64: OutPatFrag<(ops node:$Rs),
- (i64 (A4_combineir 0, (i32 $Rs)))>;
-def ToSext64: OutPatFrag<(ops node:$Rs),
- (i64 (A2_sxtw (i32 $Rs)))>;
-
// Patterns to generate indexed loads with different forms of the address:
// - frameindex,
// - base + offset,
@@ -1349,14 +1372,6 @@ let AddedComplexity = 20 in {
def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
}
-// zext i1->i64
-def: Pat<(i64 (zext I1:$src1)),
- (ToZext64 (C2_muxii PredRegs:$src1, 1, 0))>;
-
-// zext i32->i64
-def: Pat<(Zext64 I32:$src1),
- (ToZext64 IntRegs:$src1)>;
-
let AddedComplexity = 40 in
multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
PatFrag stOp> {
@@ -1587,6 +1602,15 @@ def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>;
def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>;
def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>;
+def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>;
+def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>;
+
+def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>;
+def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>;
+
+def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>;
+def: Pat<(bswap I64:$Rss), (A2_combinew (A2_swiz (LoReg $Rss)),
+ (A2_swiz (HiReg $Rss)))>;
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
@@ -2235,12 +2259,6 @@ def ftoi : SDNodeXForm<fpimm, [{
def: Pat<(sra (i64 (add (sra I64:$src1, u6_0ImmPred:$src2), 1)), (i32 1)),
(S2_asr_i_p_rnd I64:$src1, imm:$src2)>;
-def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
- SDTCisVT<1, i64>]>;
-def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
-
-def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>;
-
let AddedComplexity = 20 in {
defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
@@ -2718,17 +2736,6 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [
}]>;
-def s4_6ImmPred: PatLeaf<(i32 imm), [{
- int64_t V = N->getSExtValue();
- return isShiftedInt<4,6>(V);
-}]>;
-
-def s4_7ImmPred: PatLeaf<(i32 imm), [{
- int64_t V = N->getSExtValue();
- return isShiftedInt<4,7>(V);
-}]>;
-
-
multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Aligned stores
def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
@@ -2749,25 +2756,25 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector store.
let AddedComplexity = 10 in {
def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
- (add IntRegs:$src2, s4_6ImmPred:$offset)),
- (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_6:$offset)),
+ (V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset,
(VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
- (add IntRegs:$src2, s4_6ImmPred:$offset)),
- (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_6:$offset)),
+ (V6_vS32Ub_ai IntRegs:$src2, Iss4_6:$offset,
(VTSgl VectorRegs:$src1))>,
Requires<[UseHVXSgl]>;
// Fold Add R+OFF into vector store 128B.
def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
- (add IntRegs:$src2, s4_7ImmPred:$offset)),
- (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_7:$offset)),
+ (V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset,
(VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
- (add IntRegs:$src2, s4_7ImmPred:$offset)),
- (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (add IntRegs:$src2, Iss4_7:$offset)),
+ (V6_vS32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset,
(VTDbl VectorRegs128B:$src1))>,
Requires<[UseHVXDbl]>;
}
@@ -2798,18 +2805,18 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
// Fold Add R+OFF into vector load.
let AddedComplexity = 10 in {
- def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
- (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))),
+ (V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
- def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
- (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, Iss4_7:$offset))),
+ (V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>,
Requires<[UseHVXDbl]>;
- def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
- (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))),
+ (V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>,
Requires<[UseHVXSgl]>;
- def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
- (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, Iss4_6:$offset))),
+ (V6_vL32Ub_ai IntRegs:$src2, Iss4_6:$offset)>,
Requires<[UseHVXSgl]>;
}
}
@@ -3253,8 +3260,8 @@ def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
(M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
- (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
- (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+ (LoReg (S2_vtrunewh (A2_combineii 0, 0),
+ (vmpyh V2I16:$Rs, V2I16:$Rt)))>;
// Multiplies two v4i16 vectors.
def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
@@ -3345,3 +3352,11 @@ def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
(S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
+
+// Read cycle counter.
+//
+def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
+def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
+ [SDNPHasChain]>;
+
+def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
new file mode 100644
index 000000000000..5a720e794562
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -0,0 +1,537 @@
+//===--- HexagonPseudo.td -------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+let PrintMethod = "printGlobalOperand" in {
+ def globaladdress : Operand<i32>;
+ def globaladdressExt : Operand<i32>;
+}
+
+let isPseudo = 1 in {
+let isCodeGenOnly = 0 in
+def A2_iconst : Pseudo<(outs IntRegs:$Rd32), (ins s23_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">;
+def DUPLEX_Pseudo : InstHexagon<(outs), (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>;
+}
+
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+ isAsmParserOnly = 1 in
+def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
+ (ins s32_0Imm:$src1, s8_0Imm:$src2),
+ "$dst=combine(#$src1,#$src2)">;
+
+// HI/LO Instructions
+let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
+ hasNewValue = 1, opNewValue = 0 in
+class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp>
+ : InstHexagon<(outs IntRegs:$dst),
+ (ins u16_0Imm:$imm_value),
+ "$dst"#RegHalf#"=#$imm_value", [], "", ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>, OpcodeHexagon {
+ bits<5> dst;
+ bits<32> imm_value;
+
+ let Inst{27} = Rs;
+ let Inst{26-24} = MajOp;
+ let Inst{21} = MinOp;
+ let Inst{20-16} = dst;
+ let Inst{23-22} = imm_value{15-14};
+ let Inst{13-0} = imm_value{13-0};
+}
+
+let isAsmParserOnly = 1 in {
+ def LO : REG_IMMED<".l", 0b0, 0b001, 0b1>;
+ def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>;
+}
+
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in {
+ def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v),
+ "$Rd = CONST32(#$v)", []>;
+ def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v),
+ "$Rd = CONST64(#$v)", []>;
+}
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>;
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in
+def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ ".error \"should not emit\" ", []>;
+
+let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ ".error \"should not emit\" ", []>;
+
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : Endloop<(outs), (ins b30_2Imm:$offset),
+ ":endloop0",
+ []>;
+}
+
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC1], Uses = [SA1, LC1] in {
+def ENDLOOP1 : Endloop<(outs), (ins b30_2Imm:$offset),
+ ":endloop1",
+ []>;
+}
+
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, u10_0Imm:$src2),
+ #mnemonic#"($offset,#$src2)",
+ [], "" , CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<10> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b100100;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2{9-5};
+ let Inst{12-8} = offset{8-4};
+ let Inst{7-5} = src2{4-2};
+ let Inst{4-3} = offset{3-2};
+ let Inst{1-0} = src2{1-0};
+}
+
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
+ opExtendable = 0, hasSideEffects = 0 in
+class LOOP_rBase<string mnemonic, Operand brOp, bit mustExtend = 0>
+ : CRInst<(outs), (ins brOp:$offset, IntRegs:$src2),
+ #mnemonic#"($offset,$src2)",
+ [], "" ,CR_tc_3x_SLOT3> {
+ bits<9> offset;
+ bits<5> src2;
+
+ let IClass = 0b0110;
+
+ let Inst{27-22} = 0b000000;
+ let Inst{21} = !if (!eq(mnemonic, "loop0"), 0b0, 0b1);
+ let Inst{20-16} = src2;
+ let Inst{12-8} = offset{8-4};
+ let Inst{4-3} = offset{3-2};
+ }
+
+multiclass LOOP_ri<string mnemonic> {
+ let isCodeGenOnly = 1, isExtended = 1, opExtendable = 0 in {
+ def iext: LOOP_iBase<mnemonic, b30_2Imm, 1>;
+ def rext: LOOP_rBase<mnemonic, b30_2Imm, 1>;
+ }
+}
+
+
+let Defs = [SA0, LC0, USR] in
+defm J2_loop0 : LOOP_ri<"loop0">;
+
+// Interestingly only loop0's appear to set usr.lpcfg
+let Defs = [SA1, LC1] in
+defm J2_loop1 : LOOP_ri<"loop1">;
+
+let isCall = 1, hasSideEffects = 1, isPredicable = 0,
+ isExtended = 0, isExtendable = 1, opExtendable = 0,
+ isExtentSigned = 1, opExtentBits = 24, opExtentAlign = 2 in
+class T_Call<string ExtStr>
+ : JInst<(outs), (ins a30_2Imm:$dst),
+ "call " # ExtStr # "$dst", [], "", J_tc_2early_SLOT23> {
+ let BaseOpcode = "call";
+ bits<24> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b101;
+ let Inst{24-16,13-1} = dst{23-2};
+ let Inst{0} = 0b0;
+}
+
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [R16],
+ isPredicable = 0 in
+def CALLProfile : T_Call<"">;
+
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
+ Defs = [PC, R31, R6, R7, P0] in
+def PS_call_stk : T_Call<"">;
+
+let isCall = 1, hasSideEffects = 1, cofMax1 = 1 in
+class JUMPR_MISC_CALLR<bit isPred, bit isPredNot,
+ dag InputDag = (ins IntRegs:$Rs)>
+ : JInst<(outs), InputDag,
+ !if(isPred, !if(isPredNot, "if (!$Pu) callr $Rs",
+ "if ($Pu) callr $Rs"),
+ "callr $Rs"),
+ [], "", J_tc_2early_SLOT2> {
+ bits<5> Rs;
+ bits<2> Pu;
+ let isPredicated = isPred;
+ let isPredicatedFalse = isPredNot;
+
+ let IClass = 0b0101;
+ let Inst{27-25} = 0b000;
+ let Inst{24-23} = !if (isPred, 0b10, 0b01);
+ let Inst{22} = 0;
+ let Inst{21} = isPredNot;
+ let Inst{9-8} = !if (isPred, Pu, 0b00);
+ let Inst{20-16} = Rs;
+
+ }
+
+let isCodeGenOnly = 1 in {
+ def PS_callr_nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
+}
+
+let isCall = 1, hasSideEffects = 1,
+ isExtended = 0, isExtendable = 1, opExtendable = 0, isCodeGenOnly = 1,
+ BaseOpcode = "PS_call_nr", isExtentSigned = 1, opExtentAlign = 2,
+ Itinerary = J_tc_2early_SLOT23 in
+class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops>
+ : Pseudo<(outs), iops, "">, PredRel {
+ bits<2> Pu;
+ bits<17> dst;
+ let opExtentBits = nbits;
+ let isPredicable = 0; // !if(isPred, 0, 1);
+ let isPredicated = 0; // isPred;
+ let isPredicatedFalse = isFalse;
+}
+
+def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii)>;
+//def PS_call_nrt: Call_nr<17, 1, 0, (ins PredRegs:$Pu, s32_0Imm:$dst)>;
+//def PS_call_nrf: Call_nr<17, 1, 1, (ins PredRegs:$Pu, s32_0Imm:$dst)>;
+
+let isBranch = 1, isIndirectBranch = 1, isBarrier = 1, Defs = [PC],
+ isPredicable = 1, hasSideEffects = 0, InputType = "reg",
+ cofMax1 = 1 in
+class T_JMPr
+ : InstHexagon<(outs), (ins IntRegs:$dst), "jumpr $dst", [],
+ "", J_tc_2early_SLOT2, TypeJ>, OpcodeHexagon {
+ bits<5> dst;
+
+ let IClass = 0b0101;
+ let Inst{27-21} = 0b0010100;
+ let Inst{20-16} = dst;
+}
+
+// A return through builtin_eh_return.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
+ isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
+def EH_RETURN_JMPR : T_JMPr;
+
+// Indirect tail-call.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def PS_tailcall_r : T_JMPr;
+
+//
+// Direct tail-calls.
+let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
+ isTerminator = 1, isCodeGenOnly = 1 in
+def PS_tailcall_i : Pseudo<(outs), (ins a30_2Imm:$dst), "", []>;
+
+let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
+def PS_aligna : Pseudo<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>;
+
+// Generate frameindex addresses. The main reason for the offset operand is
+// that every instruction that is allowed to have frame index as an operand
+// will then have that operand followed by an immediate operand (the offset).
+// This simplifies the frame-index elimination code.
+//
+let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
+ isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
+ def PS_fi : Pseudo<(outs IntRegs:$Rd),
+ (ins IntRegs:$fi, s32_0Imm:$off), "">;
+ def PS_fia : Pseudo<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">;
+}
+
+class CondStr<string CReg, bit True, bit New> {
+ string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
+}
+class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
+ string S = Mnemonic # !if(Taken, ":t", ":nt");
+}
+let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
+ hasSideEffects = 0, InputType = "reg", cofMax1 = 1 in
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>
+ : InstHexagon<(outs), (ins PredRegs:$src, IntRegs:$dst),
+ CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
+ JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst",
+ [], "", J_tc_2early_SLOT2, TypeJ>, OpcodeHexagon {
+
+ let isTaken = isTak;
+ let isPredicatedFalse = PredNot;
+ let isPredicatedNew = isPredNew;
+ bits<2> src;
+ bits<5> dst;
+
+ let IClass = 0b0101;
+
+ let Inst{27-22} = 0b001101;
+ let Inst{21} = PredNot;
+ let Inst{20-16} = dst;
+ let Inst{12} = isTak;
+ let Inst{11} = isPredNew;
+ let Inst{9-8} = src;
+}
+multiclass JMPR_Pred<bit PredNot> {
+ def NAME : T_JMPr_c<PredNot, 0, 0>; // not taken
+ // Predicate new
+ def NAME#newpt : T_JMPr_c<PredNot, 1, 1>; // taken
+ def NAME#new : T_JMPr_c<PredNot, 1, 0>; // not taken
+}
+multiclass JMPR_base<string BaseOp> {
+ let BaseOpcode = BaseOp in {
+ def NAME : T_JMPr;
+ defm t : JMPR_Pred<0>;
+ defm f : JMPR_Pred<1>;
+ }
+}
+let isTerminator = 1, hasSideEffects = 0, isReturn = 1, isCodeGenOnly = 1, isBarrier = 1 in
+defm PS_jmpret : JMPR_base<"JMPret">, PredNewRel;
+
+//defm V6_vtran2x2_map : HexagonMapping<(outs VectorRegs:$Vy32, VectorRegs:$Vx32), (ins VectorRegs:$Vx32in, IntRegs:$Rt32), "vtrans2x2(${Vy32},${Vx32},${Rt32})", (V6_vshuff VectorRegs:$Vy32, VectorRegs:$Vx32, VectorRegs:$Vx32in, IntRegs:$Rt32)>;
+
+// The reason for the custom inserter is to record all ALLOCA instructions
+// in MachineFunctionInfo.
+let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in
+def PS_alloca: InstHexagon<(outs IntRegs:$Rd),
+ (ins IntRegs:$Rs, u32_0Imm:$A), "",
+ [], "", ALU32_2op_tc_1_SLOT0123, TypeALU32_2op>;
+
+// Load predicate.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+ (ins IntRegs:$addr, s32_0Imm:$off),
+ ".error \"should not emit\"", []>;
+
+// Load modifier.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def LDriw_mod : LDInst<(outs ModRegs:$dst),
+ (ins IntRegs:$addr, s32_0Imm:$off),
+ ".error \"should not emit\"", []>;
+
+// Vector load
+let Predicates = [HasV60T, UseHVX] in
+let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+ class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_LD,
+ IType type = TypeCVI_VM_LD>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+// Vector store
+let Predicates = [HasV60T, UseHVX] in
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VM_ST,
+ IType type = TypeCVI_VM_ST>
+: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1 in
+def PS_pselect : ALU64_rr<(outs DoubleRegs:$Rd),
+ (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+ ".error \"should not emit\" ", []>;
+
+let isBranch = 1, isBarrier = 1, Defs = [PC], hasSideEffects = 0,
+ isPredicable = 1,
+ isExtendable = 1, opExtendable = 0, isExtentSigned = 1,
+ opExtentBits = 24, opExtentAlign = 2, InputType = "imm" in
+class T_JMP<string ExtStr>
+ : JInst_CJUMP_UCJUMP<(outs), (ins b30_2Imm:$dst),
+ "jump " # ExtStr # "$dst",
+ [], "", J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT> {
+ bits<24> dst;
+ let IClass = 0b0101;
+
+ let Inst{27-25} = 0b100;
+ let Inst{24-16} = dst{23-15};
+ let Inst{13-1} = dst{14-2};
+}
+
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+ Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
+
+ let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_RET_JMP_V4_PIC : T_JMP<"">;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC : T_JMP<"">;
+ }
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28, R29, R30, R31, PC] in {
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC : T_Call<"">, PredRel;
+
+ let isExtended = 1, opExtendable = 0 in
+ def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC : T_Call<"">, PredRel;
+ }
+}
+
+// Save registers function call.
+let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
+ def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
+
+ let isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel;
+
+ let Defs = [P0] in
+ def SAVE_REGISTERS_CALL_V4STK : T_Call<"">, PredRel;
+
+ let Defs = [P0], isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4STK_EXT : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28] in
+ def SAVE_REGISTERS_CALL_V4_PIC : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28], isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4_EXT_PIC : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28, P0] in
+ def SAVE_REGISTERS_CALL_V4STK_PIC : T_Call<"">, PredRel;
+
+ let Defs = [R14, R15, R28, P0], isExtended = 1, opExtendable = 0 in
+ def SAVE_REGISTERS_CALL_V4STK_EXT_PIC : T_Call<"">, PredRel;
+}
+
+// Vector load/store pseudos
+
+let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+class STrivv_template<RegisterClass RC>
+ : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>;
+
+def PS_vstorerw_ai: STrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerwu_ai: STrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+
+let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<RegisterClass RC>
+ : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>;
+
+def PS_vloadrw_ai: LDrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrwu_ai: LDrivv_template<VecDblRegs>,
+ Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+// Store vector predicate pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+ def PS_vstorerq_ai : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+ def PS_vstorerq_ai_128B : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VectorRegs:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+
+ def PS_vloadrq_ai : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+
+ def PS_vloadrq_ai_128B : STInst<(outs),
+ (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
+ ".error \"should not emit\" ", []>,
+ Requires<[HasV60T,UseHVXDbl]>;
+}
+
+class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = CVI_VA_DV,
+ IType type = TypeCVI_VA_DV>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+ def PS_vselect: VSELInst<(outs VectorRegs:$dst),
+ (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), "", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+ def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst),
+ (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3),
+ "", []>, Requires<[HasV60T,UseHVXDbl]>;
+ def PS_wselect: VSELInst<(outs VecDblRegs:$dst),
+ (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), "", []>,
+ Requires<[HasV60T,UseHVXSgl]>;
+ def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst),
+ (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3),
+ "", []>, Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Store predicate.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def STriw_pred : STInst<(outs),
+ (ins IntRegs:$addr, s32_0Imm:$off, PredRegs:$src1),
+ ".error \"should not emit\"", []>;
+// Store modifier.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+ isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
+def STriw_mod : STInst<(outs),
+ (ins IntRegs:$addr, s32_0Imm:$off, ModRegs:$src1),
+ ".error \"should not emit\"", []>;
+
+let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
+ isAsmParserOnly = 1 in
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64_0Imm:$src1),
+ "$dst = #$src1">;
+
+// Hexagon doesn't have a vector multiply with C semantics.
+// Instead, generate a pseudo instruction that gets expaneded into two
+// scalar MPYI instructions.
+// This is expanded by ExpandPostRAPseudos.
+let isPseudo = 1 in
+def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>;
+
+let isPseudo = 1 in
+def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd),
+ (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [],
+ "$Rd = $Rx">;
+
+def DuplexIClass0: InstDuplex < 0 >;
+def DuplexIClass1: InstDuplex < 1 >;
+def DuplexIClass2: InstDuplex < 2 >;
+let isExtendable = 1 in {
+ def DuplexIClass3: InstDuplex < 3 >;
+ def DuplexIClass4: InstDuplex < 4 >;
+ def DuplexIClass5: InstDuplex < 5 >;
+ def DuplexIClass6: InstDuplex < 6 >;
+ def DuplexIClass7: InstDuplex < 7 >;
+}
+def DuplexIClass8: InstDuplex < 8 >;
+def DuplexIClass9: InstDuplex < 9 >;
+def DuplexIClassA: InstDuplex < 0xA >;
+def DuplexIClassB: InstDuplex < 0xB >;
+def DuplexIClassC: InstDuplex < 0xC >;
+def DuplexIClassD: InstDuplex < 0xD >;
+def DuplexIClassE: InstDuplex < 0xE >;
+def DuplexIClassF: InstDuplex < 0xF >;
diff --git a/lib/Target/Hexagon/HexagonRDFOpt.cpp b/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 30640e19ebac..b3aba50b5625 100644
--- a/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -94,7 +94,7 @@ struct HexagonDCE : public DeadCodeElimination {
bool HexagonCP::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
- auto mapRegs = [MI,&EM] (RegisterRef DstR, RegisterRef SrcR) -> void {
+ auto mapRegs = [&EM] (RegisterRef DstR, RegisterRef SrcR) -> void {
EM.insert(std::make_pair(DstR, SrcR));
};
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index d3f230d3f8a6..2a1bb63af789 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -36,6 +36,9 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
+
using namespace llvm;
HexagonRegisterInfo::HexagonRegisterInfo()
@@ -125,6 +128,7 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
case HexagonSubtarget::V5:
case HexagonSubtarget::V55:
case HexagonSubtarget::V60:
+ case HexagonSubtarget::V62:
return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3;
}
@@ -133,25 +137,47 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
+const uint32_t *HexagonRegisterInfo::getCallPreservedMask(
+ const MachineFunction &MF, CallingConv::ID) const {
+ return HexagonCSR_RegMask;
+}
+
+
BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
const {
BitVector Reserved(getNumRegs());
Reserved.set(Hexagon::R29);
Reserved.set(Hexagon::R30);
Reserved.set(Hexagon::R31);
- Reserved.set(Hexagon::PC);
- Reserved.set(Hexagon::D14);
- Reserved.set(Hexagon::D15);
- Reserved.set(Hexagon::LC0);
- Reserved.set(Hexagon::LC1);
- Reserved.set(Hexagon::SA0);
- Reserved.set(Hexagon::SA1);
- Reserved.set(Hexagon::UGP);
- Reserved.set(Hexagon::GP);
- Reserved.set(Hexagon::CS0);
- Reserved.set(Hexagon::CS1);
- Reserved.set(Hexagon::CS);
- Reserved.set(Hexagon::USR);
+ // Control registers.
+ Reserved.set(Hexagon::SA0); // C0
+ Reserved.set(Hexagon::LC0); // C1
+ Reserved.set(Hexagon::SA1); // C2
+ Reserved.set(Hexagon::LC1); // C3
+ Reserved.set(Hexagon::P3_0); // C4
+ Reserved.set(Hexagon::USR); // C8
+ Reserved.set(Hexagon::PC); // C9
+ Reserved.set(Hexagon::UGP); // C10
+ Reserved.set(Hexagon::GP); // C11
+ Reserved.set(Hexagon::CS0); // C12
+ Reserved.set(Hexagon::CS1); // C13
+ Reserved.set(Hexagon::UPCYCLELO); // C14
+ Reserved.set(Hexagon::UPCYCLEHI); // C15
+ Reserved.set(Hexagon::FRAMELIMIT); // C16
+ Reserved.set(Hexagon::FRAMEKEY); // C17
+ Reserved.set(Hexagon::PKTCOUNTLO); // C18
+ Reserved.set(Hexagon::PKTCOUNTHI); // C19
+ Reserved.set(Hexagon::UTIMERLO); // C30
+ Reserved.set(Hexagon::UTIMERHI); // C31
+ // Out of the control registers, only C8 is explicitly defined in
+ // HexagonRegisterInfo.td. If others are defined, make sure to add
+ // them here as well.
+ Reserved.set(Hexagon::C8);
+ Reserved.set(Hexagon::USR_OVF);
+
+ for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x))
+ markSuperRegs(Reserved, x);
+
return Reserved;
}
@@ -267,6 +293,3 @@ unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
return Hexagon::R6;
}
-
-#define GET_REGINFO_TARGET_DESC
-#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 1fb295b5bd8c..8a3f175b8488 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -35,7 +35,8 @@ public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF)
const override;
-
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index a75f3514dbd2..93ab2f731207 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -140,41 +140,54 @@ let Namespace = "Hexagon" in {
}
// Control registers.
- def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
- def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
- def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
- def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
- def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
- DwarfRegNum<[71]>;
- def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>; // future use
- def C6 : Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>;
- def C7 : Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>;
+ def SA0: Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
+ def LC0: Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
+ def SA1: Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
+ def LC1: Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
+ def P3_0: Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
+ DwarfRegNum<[71]>;
+ // When defining more Cn registers, make sure to explicitly mark them
+ // as reserved in HexagonRegisterInfo.cpp.
+ def C5: Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>;
+ def C6: Rc<6, "c6", [], [M0]>, DwarfRegNum<[73]>;
+ def C7: Rc<7, "c7", [], [M1]>, DwarfRegNum<[74]>;
// Define C8 separately and make it aliased with USR.
// The problem is that USR has subregisters (e.g. overflow). If USR was
// specified as a subregister of C9_8, it would imply that subreg_overflow
// and isub_lo can be composed, which leads to all kinds of issues
// with lane masks.
- def C8 : Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>;
- def PC : Rc<9, "pc">, DwarfRegNum<[76]>;
- def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
- def GP : Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>;
- def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
- def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
- def UPCL : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
- def UPCH : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
+ def C8: Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>;
+ def PC: Rc<9, "pc">, DwarfRegNum<[76]>;
+ def UGP: Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
+ def GP: Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>;
+ def CS0: Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
+ def CS1: Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
+ def UPCYCLELO: Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
+ def UPCYCLEHI: Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
+ def FRAMELIMIT: Rc<16, "framelimit", ["c16"]>, DwarfRegNum<[83]>;
+ def FRAMEKEY: Rc<17, "framekey", ["c17"]>, DwarfRegNum<[84]>;
+ def PKTCOUNTLO: Rc<18, "pktcountlo", ["c18"]>, DwarfRegNum<[85]>;
+ def PKTCOUNTHI: Rc<19, "pktcounthi", ["c19"]>, DwarfRegNum<[86]>;
+ def UTIMERLO: Rc<30, "utimerlo", ["c30"]>, DwarfRegNum<[97]>;
+ def UTIMERHI: Rc<31, "utimerhi", ["c31"]>, DwarfRegNum<[98]>;
}
// Control registers pairs.
let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
- def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
- def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
- def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>;
- def C7_6 : Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>;
+ def C1_0: Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
+ def C3_2: Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
+ def C5_4: Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>;
+ def C7_6: Rcc<6, "c7:6", [C6, C7], ["m1:0"]>, DwarfRegNum<[72]>;
// Use C8 instead of USR as a subregister of C9_8.
- def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>;
- def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
- def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
- def UPC : Rcc<14, "c15:14", [UPCL, UPCH]>, DwarfRegNum<[80]>;
+ def C9_8: Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>;
+ def C11_10: Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
+ def CS: Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
+ def UPCYCLE: Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI]>, DwarfRegNum<[80]>;
+ def C17_16: Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>;
+ def PKTCOUNT: Rcc<18, "c19:18", [PKTCOUNTLO, PKTCOUNTHI], ["pktcount"]>,
+ DwarfRegNum<[85]>;
+ def UTIMER: Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>,
+ DwarfRegNum<[97]>;
}
foreach i = 0-31 in {
@@ -219,6 +232,10 @@ def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
}
// Registers are listed in reverse order for allocation preference reasons.
+def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add R23, R22, R21, R20, R19, R18, R17,
+ R16, R7, R6, R5, R4, R3, R2, R1, R0)>;
+
def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
(add R7, R6, R5, R4, R3, R2, R1, R0)> ;
@@ -226,6 +243,10 @@ def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
(add (sequence "D%u", 0, 4),
(sequence "D%u", 6, 13), D5, D14, D15)>;
+def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
+ (add D11, D10, D9, D8, D3, D2, D1,
+ D0)>;
+
def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512,
(add (sequence "V%u", 0, 31))>;
@@ -259,28 +280,28 @@ def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
let Size = 32, isAllocatable = 0 in
def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
- (add LC0, SA0, LC1, SA1,
- P3_0, C5,
- M0, M1, C6, C7, C8, CS0, CS1, UPCL, UPCH,
- USR, UGP, GP, PC)>;
+ (add LC0, SA0, LC1, SA1, P3_0, C5, C6, C7,
+ C8, PC, UGP, GP, CS0, CS1, UPCYCLELO, UPCYCLEHI,
+ FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI,
+ M0, M1, USR)>;
let isAllocatable = 0 in
def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;
let Size = 64, isAllocatable = 0 in
def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
- (add C1_0, C3_2, C7_6, C9_8, C11_10, CS, UPC)>;
-
-def VolatileV3 {
- list<Register> Regs = [D0, D1, D2, D3, D4, D5, D6, D7,
- R28, R31,
- P0, P1, P2, P3,
- M0, M1,
- LC0, LC1, SA0, SA1, USR, USR_OVF, CS0, CS1,
- V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V11,
- V12, V13, V14, V15, V16, V17, V18, V19, V20, V21,
- V22, V23, V24, V25, V26, V27, V28, V29, V30, V31,
- W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11,
- W12, W13, W14, W15,
- Q0, Q1, Q2, Q3];
-}
+ (add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE, C17_16,
+ PKTCOUNT, UTIMER)>;
+
+// These registers are new for v62 and onward.
+// The function RegisterMatchesArch() uses this list for validation.
+let isAllocatable = 0 in
+def V62Regs : RegisterClass<"Hexagon", [i32], 32,
+ (add FRAMELIMIT, FRAMEKEY, C17_16,
+ PKTCOUNTLO, PKTCOUNTHI, PKTCOUNT,
+ UTIMERLO, UTIMERHI, UTIMER)>;
+
+
+def HexagonCSR
+ : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23,
+ R24, R25, R26, R27)>;
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index 6e4987b7e4e3..9b5fbea04d18 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -21,4 +21,12 @@ include "HexagonScheduleV55.td"
//===----------------------------------------------------------------------===//
include "HexagonScheduleV60.td"
+include "HexagonIICScalar.td"
+include "HexagonIICHVX.td"
+
+//===----------------------------------------------------------------------===//
+// V62 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV62.td"
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index 7416baab392c..880cc0a02b6a 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -61,15 +61,21 @@ def J_tc_2early_SLOT23 : InstrItinClass;
def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT : InstrItinClass;
def J_tc_2early_SLOT2 : InstrItinClass;
def LD_tc_ld_SLOT01 : InstrItinClass;
+def LD_tc_ld_pi_SLOT01 : InstrItinClass;
def LD_tc_ld_SLOT0 : InstrItinClass;
def LD_tc_3or4stall_SLOT0 : InstrItinClass;
def M_tc_2_SLOT23 : InstrItinClass;
+def M_tc_2_acc_SLOT23 : InstrItinClass;
def M_tc_3_SLOT23 : InstrItinClass;
def M_tc_1_SLOT23 : InstrItinClass;
def M_tc_3x_SLOT23 : InstrItinClass;
+def M_tc_3x_acc_SLOT23 : InstrItinClass;
def M_tc_3or4x_SLOT23 : InstrItinClass;
+def M_tc_3or4x_acc_SLOT23 : InstrItinClass;
def ST_tc_st_SLOT01 : InstrItinClass;
+def ST_tc_st_pi_SLOT01 : InstrItinClass;
def ST_tc_st_SLOT0 : InstrItinClass;
+def ST_tc_st_pi_SLOT0 : InstrItinClass;
def ST_tc_ld_SLOT0 : InstrItinClass;
def ST_tc_3stall_SLOT0 : InstrItinClass;
def S_2op_tc_1_SLOT23 : InstrItinClass;
@@ -131,21 +137,27 @@ def HexagonItinerariesV4 :
//Load
InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<LD_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
// M
InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
// Store
// ST
InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
// ST0
InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>]>,
InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
// S
diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td
index b2a75f7200d7..06cbcb16abb7 100644
--- a/lib/Target/Hexagon/HexagonScheduleV55.td
+++ b/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -88,6 +88,8 @@ def HexagonItinerariesV55 :
// Load
InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
[2, 1]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
+ [2, 1]>,
InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<1, [SLOT0]>], [2, 1]>,
InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [2, 1]>,
@@ -96,21 +98,30 @@ def HexagonItinerariesV55 :
[1, 1, 1]>,
InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[2, 1, 1]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [2, 1, 1]>,
InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[1, 1, 1]>,
InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[3, 1, 1]>,
+ InstrItinData<M_tc_3x_acc_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1, 1]>,
InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
[3, 1, 1]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
+ [3, 1, 1]>,
InstrItinData<M_tc_3stall_SLOT23, [InstrStage<1, [SLOT2, SLOT3]>],
[3, 1, 1]>,
// Store
InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>],
[1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>],
+ [1, 1, 1]>,
InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<1, [SLOT0]>], [2, 1, 1]>,
InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>], [2, 1, 1]>,
InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>], [1, 1, 1]>,
// S
InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>],
diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td
index dc2ce43b0579..63784710f52b 100644
--- a/lib/Target/Hexagon/HexagonScheduleV60.td
+++ b/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -19,6 +19,8 @@ def CVI_LD : FuncUnit;
def CVI_XLSHF : FuncUnit;
def CVI_MPY01 : FuncUnit;
def CVI_ALL : FuncUnit;
+def CVI_XLMPY0 : FuncUnit;
+def CVI_SHFMPY1: FuncUnit;
// Combined functional unit data.
def HexagonComboFuncsV60 :
@@ -26,7 +28,9 @@ def HexagonComboFuncsV60 :
ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>,
ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>,
ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT,
- CVI_MPY0, CVI_MPY1, CVI_LD]>
+ CVI_MPY0, CVI_MPY1, CVI_LD]>,
+ ComboFuncData<CVI_XLMPY0 , [CVI_XLANE, CVI_MPY0]>,
+ ComboFuncData<CVI_SHFMPY1 , [CVI_SHIFT, CVI_MPY1]>
]>;
// Note: When adding additional vector scheduling classes, add the
@@ -39,6 +43,7 @@ def CVI_VX : InstrItinClass;
def CVI_VX_DV_LONG : InstrItinClass;
def CVI_VX_DV : InstrItinClass;
def CVI_VX_DV_SLOT2 : InstrItinClass;
+def CVI_VX_DV_SLOT2_LONG_EARLY : InstrItinClass;
def CVI_VP : InstrItinClass;
def CVI_VP_LONG : InstrItinClass;
def CVI_VP_VS_EARLY : InstrItinClass;
@@ -150,22 +155,28 @@ def HexagonItinerariesV60 :
// Load
InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_ld_pi_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
// M
InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_2_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_acc_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_acc_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_acc_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
// Store
InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT01, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<3, [SLOT0]>]>,
InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<ST_tc_st_pi_SLOT0 , [InstrStage<1, [SLOT0]>]>,
// S
InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
diff --git a/lib/Target/Hexagon/HexagonScheduleV62.td b/lib/Target/Hexagon/HexagonScheduleV62.td
new file mode 100644
index 000000000000..0758788a600b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV62.td
@@ -0,0 +1,129 @@
+//=-HexagonScheduleV62.td - HexagonV62 Scheduling Definitions *- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// V62 follows the same schedule as V60 with following exceptions:
+// Following instructions are permissible on any slot on V62:
+// V4_J4_cmpeq_fp0_jump_nt
+// V4_J4_cmpeq_fp0_jump_t
+// V4_J4_cmpeq_fp1_jump_nt
+// V4_J4_cmpeq_fp1_jump_t
+// V4_J4_cmpeq_tp0_jump_nt
+// V4_J4_cmpeq_tp0_jump_t
+// V4_J4_cmpeq_tp1_jump_nt
+// V4_J4_cmpeq_tp1_jump_t
+// V4_J4_cmpeqi_fp0_jump_nt
+// V4_J4_cmpeqi_fp0_jump_t
+// V4_J4_cmpeqi_fp1_jump_nt
+// V4_J4_cmpeqi_fp1_jump_t
+// V4_J4_cmpeqi_tp0_jump_nt
+// V4_J4_cmpeqi_tp0_jump_t
+// V4_J4_cmpeqi_tp1_jump_nt
+// V4_J4_cmpeqi_tp1_jump_t
+// V4_J4_cmpeqn1_fp0_jump_nt
+// V4_J4_cmpeqn1_fp0_jump_t
+// V4_J4_cmpeqn1_fp1_jump_nt
+// V4_J4_cmpeqn1_fp1_jump_t
+// V4_J4_cmpeqn1_tp0_jump_nt
+// V4_J4_cmpeqn1_tp0_jump_t
+// V4_J4_cmpeqn1_tp1_jump_nt
+// V4_J4_cmpeqn1_tp1_jump_t
+// V4_J4_cmpgt_fp0_jump_nt
+// V4_J4_cmpgt_fp0_jump_t
+// V4_J4_cmpgt_fp1_jump_nt
+// V4_J4_cmpgt_fp1_jump_t
+// V4_J4_cmpgt_tp0_jump_nt
+// V4_J4_cmpgt_tp0_jump_t
+// V4_J4_cmpgt_tp1_jump_nt
+// V4_J4_cmpgt_tp1_jump_t
+// V4_J4_cmpgti_fp0_jump_nt
+// V4_J4_cmpgti_fp0_jump_t
+// V4_J4_cmpgti_fp1_jump_nt
+// V4_J4_cmpgti_fp1_jump_t
+// V4_J4_cmpgti_tp0_jump_nt
+// V4_J4_cmpgti_tp0_jump_t
+// V4_J4_cmpgti_tp1_jump_nt
+// V4_J4_cmpgti_tp1_jump_t
+// V4_J4_cmpgtn1_fp0_jump_nt
+// V4_J4_cmpgtn1_fp0_jump_t
+// V4_J4_cmpgtn1_fp1_jump_nt
+// V4_J4_cmpgtn1_fp1_jump_t
+// V4_J4_cmpgtn1_tp0_jump_nt
+// V4_J4_cmpgtn1_tp0_jump_t
+// V4_J4_cmpgtn1_tp1_jump_nt
+// V4_J4_cmpgtn1_tp1_jump_t
+// V4_J4_cmpgtu_fp0_jump_nt
+// V4_J4_cmpgtu_fp0_jump_t
+// V4_J4_cmpgtu_fp1_jump_nt
+// V4_J4_cmpgtu_fp1_jump_t
+// V4_J4_cmpgtu_tp0_jump_nt
+// V4_J4_cmpgtu_tp0_jump_t
+// V4_J4_cmpgtu_tp1_jump_nt
+// V4_J4_cmpgtu_tp1_jump_t
+// V4_J4_cmpgtui_fp0_jump_nt
+// V4_J4_cmpgtui_fp0_jump_t
+// V4_J4_cmpgtui_fp1_jump_nt
+// V4_J4_cmpgtui_fp1_jump_t
+// V4_J4_cmpgtui_tp0_jump_nt
+// V4_J4_cmpgtui_tp0_jump_t
+// V4_J4_cmpgtui_tp1_jump_nt
+// V4_J4_cmpgtui_tp1_jump_t
+// V4_J4_tstbit0_fp0_jump_nt
+// V4_J4_tstbit0_fp0_jump_t
+// V4_J4_tstbit0_fp1_jump_nt
+// V4_J4_tstbit0_fp1_jump_t
+// V4_J4_tstbit0_tp0_jump_nt
+// V4_J4_tstbit0_tp0_jump_t
+// V4_J4_tstbit0_tp1_jump_nt
+// V4_J4_tstbit0_tp1_jump_t
+// JMP
+// JMPEXT
+// JMPEXT_f
+// JMPEXT_fnew_nt
+// JMPEXT_fnew_t
+// JMPEXT_t
+// JMPEXT_tnew_nt
+// JMPEXT_tnew_t
+// JMPNOTEXT
+// JMPNOTEXT_f
+// JMPNOTEXT_fnew_nt
+// JMPNOTEXT_fnew_t
+// JMPNOTEXT_t
+// JMPNOTEXT_tnew_nt
+// JMPNOTEXT_tnew_t
+// JMP_f
+// JMP_fnew_nt
+// JMP_fnew_t
+// JMP_t
+// JMP_tnew_nt
+// JMP_tnew_t
+// RESTORE_DEALLOC_RET_JMP_V4
+// RESTORE_DEALLOC_RET_JMP_V4_EXT
+
+def HexagonV62ItinList : ScalarItin, HVXV62Itin {
+ list<InstrItinData> ItinList =
+ !listconcat(ScalarItin_list, HVXV62Itin_list);
+}
+
+def HexagonItinerariesV62 :
+ ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+ CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+ CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL],
+ [], HexagonV62ItinList.ItinList>;
+
+def HexagonModelV62 : SchedMachineModel {
+ // Max issue per cycle == bundle width.
+ let IssueWidth = 4;
+ let Itineraries = HexagonItinerariesV62;
+ let LoadLatency = 1;
+ let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V62 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 10730536080e..002e87fb32ce 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -51,11 +51,12 @@ SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getTargetExternalSymbol(SpecialMemcpyName,
- TLI.getPointerTy(DAG.getDataLayout()), Flags),
- std::move(Args))
+ .setLibCallee(
+ TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getTargetExternalSymbol(
+ SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout()), Flags),
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 2c937216d463..471e32221b29 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -131,13 +131,15 @@ namespace {
INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
"Hexagon Split Double Registers", false, false)
-void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
const USet &Part, const TargetRegisterInfo &TRI) {
dbgs() << '{';
for (auto I : Part)
dbgs() << ' ' << PrintReg(I, &TRI);
dbgs() << " }";
}
+#endif
bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
for (auto I : IRM) {
@@ -391,7 +393,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
const {
- unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+ unsigned FixedNum = 0, LoopPhiNum = 0;
int32_t TotalP = 0;
for (unsigned DR : Part) {
@@ -428,7 +430,6 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
LoopPhiNum++;
}
// Splittable instruction.
- SplitNum++;
int32_t P = profit(UseI);
if (P == std::numeric_limits<int>::min())
return false;
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 8c23a2465dd6..033b93fc910a 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -88,6 +88,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
{ "hexagonv5", V5 },
{ "hexagonv55", V55 },
{ "hexagonv60", V60 },
+ { "hexagonv62", V62 },
};
auto foundIt = CpuTable.find(CPUString);
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index f2b9cdaad1ae..6a3e7f13be4c 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -38,9 +38,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool ModeIEEERndNear;
public:
- enum HexagonArchEnum {
- V4, V5, V55, V60
- };
+#include "HexagonDepArch.h"
HexagonArchEnum HexagonArchVersion;
/// True if the target should use Back-Skip-Back scheduling. This is the
@@ -98,6 +96,9 @@ public:
bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; }
bool hasV60TOps() const { return getHexagonArchVersion() >= V60; }
bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; }
+ bool hasV62TOps() const { return getHexagonArchVersion() >= V62; }
+ bool hasV62TOpsOnly() const { return getHexagonArchVersion() == V62; }
+
bool modeIEEERndNear() const { return ModeIEEERndNear; }
bool useHVXOps() const { return UseHVXOps; }
bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
diff --git a/lib/Target/Hexagon/HexagonSystemInst.td b/lib/Target/Hexagon/HexagonSystemInst.td
deleted file mode 100644
index 629a98749ee9..000000000000
--- a/lib/Target/Hexagon/HexagonSystemInst.td
+++ /dev/null
@@ -1,134 +0,0 @@
-//==- HexagonSystemInst.td - System Instructions for Hexagon -*- tablegen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Hexagon instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Cache manipulation instructions.
-//===----------------------------------------------------------------------===//
-let mayStore = 1 in
-class ST_MISC_CACHEOP<dag outs, dag ins,
- string asmstr, list<dag> pattern = [],
- bits<3> amode, bits<3> type, bits<1> un>
- : ST0Inst<outs, ins, asmstr, pattern, "", ST_tc_ld_SLOT0> {
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
- let Inst{31-28} = 0b1010;
- let Inst{27-25} = amode;
- let Inst{24-22} = type;
- let Inst{21} = un;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-let mayStore = 1 in
-class ST_MISC_CACHEOP_SYS<dag outs, dag ins,
- string asmstr, list<dag> pattern = [],
- bits<3> amode, bits<3> type, bits<1> un>
- : SYSInst<outs, ins, asmstr, pattern, ""> {
-
- bits<5> Rs;
- bits<5> Rt;
- bits<5> Rd;
- let Inst{31-28} = 0b1010;
- let Inst{27-25} = amode;
- let Inst{24-22} = type;
- let Inst{21} = un;
- let Inst{20-16} = Rs;
- let Inst{12-8} = Rt;
- let Inst{4-0} = Rd;
-}
-
-
-let isSolo = 1, Rs = 0, Rt = 0, Rd = 0 in {
-def Y2_syncht: ST_MISC_CACHEOP <(outs), (ins),
- "syncht" , [], 0b100, 0b001, 0b0>;
-}
-
-let Rt = 0, Rd = 0 in {
-let isSoloAin1 = 1 in {
- def Y2_dccleana: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
- "dccleana($Rs)", [], 0b000, 0b000, 0b0>;
- def Y2_dcinva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
- "dcinva($Rs)", [], 0b000, 0b000, 0b1>;
- def Y2_dccleaninva: ST_MISC_CACHEOP <(outs), (ins IntRegs:$Rs),
- "dccleaninva($Rs)", [], 0b000, 0b001, 0b0>;
- }
-}
-
-let isSoloAX = 1, hasSideEffects = 1, Rd = 0 in {
- def Y4_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, IntRegs:$Rt),
- "l2fetch($Rs, $Rt)", [], 0b011, 0b000, 0b0>;
- def Y5_l2fetch: ST_MISC_CACHEOP_SYS<(outs), (ins IntRegs:$Rs, DoubleRegs:$Rt),
- "l2fetch($Rs, $Rt)", [], 0b011, 0b010, 0b0>;
-}
-
-let hasSideEffects = 0, isSolo = 1 in
-class Y2_INVALIDATE_CACHE<string mnemonic, bit MajOp>
- : JRInst <
- (outs), (ins IntRegs:$Rs),
- #mnemonic#"($Rs)" > {
- bits<5> Rs;
-
- let IClass = 0b0101;
- let Inst{27-21} = 0b0110110;
- let Inst{20-16} = Rs;
- let Inst{13-12} = 0b00;
- let Inst{11} = MajOp;
- }
-// Instruction cache invalidate
-def Y2_icinva : Y2_INVALIDATE_CACHE<"icinva", 0b0>;
-
-// Zero an aligned 32-byte cacheline.
-let isSoloAin1 = 1 in
-def Y2_dczeroa: ST0Inst <(outs), (ins IntRegs:$Rs),
- "dczeroa($Rs)"> {
- bits<5> Rs;
- let IClass = 0b1010;
- let Inst{27-21} = 0b0000110;
- let Inst{13} = 0b0;
- let Inst{20-16} = Rs;
- }
-
-// Memory synchronization.
-let hasSideEffects = 0, isSolo = 1 in
-def Y2_isync: JRInst <(outs), (ins),
- "isync"> {
- let IClass = 0b0101;
- let Inst{27-16} = 0b011111000000;
- let Inst{13} = 0b0;
- let Inst{9-0} = 0b0000000010;
- }
-
-//===----------------------------------------------------------------------===//
-// System/User instructions.
-//===----------------------------------------------------------------------===//
-// traps and pause
-let hasSideEffects = 0, isSolo = 1 in
-class J2_MISC_TRAP_PAUSE<string mnemonic, bits<2> MajOp>
- : JRInst
- <(outs), (ins u8_0Imm:$u8),
- #mnemonic#"(#$u8)"> {
- bits<8> u8;
-
- let IClass = 0b0101;
- let Inst{27-24} = 0b0100;
- let Inst{23-22} = MajOp;
- let Inst{12-8} = u8{7-3};
- let Inst{4-2} = u8{2-0};
- }
-def J2_trap0 : J2_MISC_TRAP_PAUSE<"trap0", 0b00>;
-def J2_trap1 : J2_MISC_TRAP_PAUSE<"trap1", 0b10>;
-def J2_pause : J2_MISC_TRAP_PAUSE<"pause", 0b01>;
-
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 132d12a66d46..06fc9195fa67 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
using namespace llvm;
@@ -98,11 +99,6 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
extern "C" int HexagonTargetMachineModule;
int HexagonTargetMachineModule = 0;
-extern "C" void LLVMInitializeHexagonTarget() {
- // Register the target.
- RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
-}
-
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
return new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
}
@@ -114,6 +110,8 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
namespace llvm {
extern char &HexagonExpandCondsetsID;
void initializeHexagonExpandCondsetsPass(PassRegistry&);
+ void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
+ Pass *createHexagonLoopIdiomPass();
FunctionPass *createHexagonBitSimplify();
FunctionPass *createHexagonBranchRelaxation();
@@ -150,6 +148,12 @@ static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
return *RM;
}
+extern "C" void LLVMInitializeHexagonTarget() {
+ // Register the target.
+ RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
+ initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+}
+
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
@@ -172,11 +176,11 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
const HexagonSubtarget *
HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
- AttributeSet FnAttrs = F.getAttributes();
+ AttributeList FnAttrs = F.getAttributes();
Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-cpu");
Attribute FSAttr =
- FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+ FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-features");
std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
? CPUAttr.getValueAsString().str()
@@ -196,6 +200,14 @@ HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
+void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
+ PMB.addExtension(
+ PassManagerBuilder::EP_LateLoopOptimizations,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createHexagonLoopIdiomPass());
+ });
+}
+
TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
return TargetIRAnalysis([this](const Function &F) {
return TargetTransformInfo(HexagonTTIImpl(this, F));
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 70835c0d4ac5..3d01929fbfb8 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -37,6 +37,7 @@ public:
static unsigned getModuleMatchQuality(const Module &M);
+ void adjustPassManager(PassManagerBuilder &PMB) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 7b1247d815a5..3a789a5f7e0b 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -440,7 +440,7 @@ bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI,
}
bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) {
- int NewOpcode = HII->getDotOldOp(MI.getOpcode());
+ int NewOpcode = HII->getDotOldOp(MI);
MI.setDesc(HII->get(NewOpcode));
return true;
}
@@ -720,6 +720,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
// %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
// S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
for (auto &MO : PacketMI.operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
+ return false;
if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
continue;
unsigned R = MO.getReg();
@@ -759,9 +761,12 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI,
}
static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) {
- for (auto &MO : I.operands())
+ for (auto &MO : I.operands()) {
+ if (MO.isRegMask() && MO.clobbersPhysReg(DepReg))
+ return true;
if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
return true;
+ }
return false;
}
@@ -1046,7 +1051,9 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
// XTYPE instructions. Since there is no convenient way of identifying fp
// XTYPE instructions, only allow grouping with ALU32 for now.
unsigned TJ = HII.getType(MJ);
- if (TJ != HexagonII::TypeALU32)
+ if (TJ != HexagonII::TypeALU32_2op &&
+ TJ != HexagonII::TypeALU32_3op &&
+ TJ != HexagonII::TypeALU32_ADDI)
return true;
break;
}
@@ -1171,6 +1178,36 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I,
(J.isBranch() || J.isCall() || J.isBarrier());
}
+bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
+ const MachineInstr &J) {
+ // Adding I to a packet that has J.
+
+ // Regmasks are not reflected in the scheduling dependency graph, so
+ // we need to check them manually. This code assumes that regmasks only
+ // occur on calls, and the problematic case is when we add an instruction
+ // defining a register R to a packet that has a call that clobbers R via
+ // a regmask. Those cannot be packetized together, because the call will
+ // be executed last. That's also a reson why it is ok to add a call
+ // clobbering R to a packet that defines R.
+
+ // Look for regmasks in J.
+ for (const MachineOperand &OpJ : J.operands()) {
+ if (!OpJ.isRegMask())
+ continue;
+ assert((J.isCall() || HII->isTailCall(J)) && "Regmask on a non-call");
+ for (const MachineOperand &OpI : I.operands()) {
+ if (OpI.isReg()) {
+ if (OpJ.clobbersPhysReg(OpI.getReg()))
+ return true;
+ } else if (OpI.isRegMask()) {
+ // Both are regmasks. Assume that they intersect.
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
const MachineInstr &J) {
bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
@@ -1217,6 +1254,14 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
if (Dependence)
return false;
+ // Regmasks are not accounted for in the scheduling graph, so we need
+ // to explicitly check for dependencies caused by them. They should only
+ // appear on calls, so it's not too pessimistic to reject all regmask
+ // dependencies.
+ Dependence = hasRegMaskDependence(I, J);
+ if (Dependence)
+ return false;
+
// V4 allows dual stores. It does not allow second store, if the first
// store is not in SLOT0. New value store, new value jump, dealloc_return
// and memop always take SLOT0. Arch spec 3.4.4.2.
@@ -1465,13 +1510,19 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// R0 = ... ; SUI
// Those cannot be packetized together, since the call will observe
// the effect of the assignment to R0.
- if (DepType == SDep::Anti && J.isCall()) {
+ if ((DepType == SDep::Anti || DepType == SDep::Output) && J.isCall()) {
// Check if I defines any volatile register. We should also check
// registers that the call may read, but these happen to be a
// subset of the volatile register set.
- for (const MCPhysReg *P = J.getDesc().ImplicitDefs; P && *P; ++P) {
- if (!I.modifiesRegister(*P, HRI))
+ for (const MachineOperand &Op : I.operands()) {
+ if (Op.isReg() && Op.isDef()) {
+ unsigned R = Op.getReg();
+ if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI))
+ continue;
+ } else if (!Op.isRegMask()) {
+ // If I has a regmask assume dependency.
continue;
+ }
FoundSequentialDependence = true;
break;
}
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index b28b926ec300..3f28dc5b79ce 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -7,6 +7,9 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
namespace llvm {
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
+
class HexagonPacketizerList : public VLIWPacketizerList {
// Vector of instructions assigned to the packet that has just been created.
std::vector<MachineInstr*> OldPacketMIs;
@@ -109,6 +112,7 @@ protected:
void reserveResourcesForConstExt();
bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
+ bool hasRegMaskDependence(const MachineInstr &I, const MachineInstr &J);
bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
bool producesStall(const MachineInstr &MI);
};
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
index e0077acc7af0..7a27a8c5e10f 100644
--- a/lib/Target/Hexagon/LLVMBuild.txt
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -36,6 +36,7 @@ required_libraries =
HexagonAsmParser
HexagonDesc
HexagonInfo
+ IPO
MC
Scalar
SelectionDAG
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index c140bd1d7ee2..337af294eb86 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -9,10 +9,10 @@
#include "Hexagon.h"
#include "HexagonFixupKinds.h"
-#include "HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCChecker.h"
#include "MCTargetDesc/HexagonMCCodeEmitter.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -59,9 +59,10 @@ class HexagonAsmBackend : public MCAsmBackend {
RF.getFixups() = Fixups;
}
public:
- HexagonAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) :
- OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *),
- Extender(nullptr) {}
+ HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI,
+ StringRef CPU) :
+ OSABI(OSABI), CPU(CPU), MCII(T.createMCInstrInfo()),
+ RelaxTarget(new MCInst *), Extender(nullptr) {}
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createHexagonELFObjectWriter(OS, OSABI, CPU);
@@ -88,101 +89,101 @@ public:
// This table *must* be in same the order of fixup_* kinds in
// HexagonFixupKinds.h.
//
- // namei offset bits flags
- { "fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_LO16", 0, 32, 0 },
- { "fixup_Hexagon_HI16", 0, 32, 0 },
- { "fixup_Hexagon_32", 0, 32, 0 },
- { "fixup_Hexagon_16", 0, 32, 0 },
- { "fixup_Hexagon_8", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_0", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_1", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_2", 0, 32, 0 },
- { "fixup_Hexagon_GPREL16_3", 0, 32, 0 },
- { "fixup_Hexagon_HL16", 0, 32, 0 },
- { "fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_16_X", 0, 32, 0 },
- { "fixup_Hexagon_12_X", 0, 32, 0 },
- { "fixup_Hexagon_11_X", 0, 32, 0 },
- { "fixup_Hexagon_10_X", 0, 32, 0 },
- { "fixup_Hexagon_9_X", 0, 32, 0 },
- { "fixup_Hexagon_8_X", 0, 32, 0 },
- { "fixup_Hexagon_7_X", 0, 32, 0 },
- { "fixup_Hexagon_6_X", 0, 32, 0 },
- { "fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_COPY", 0, 32, 0 },
- { "fixup_Hexagon_GLOB_DAT", 0, 32, 0 },
- { "fixup_Hexagon_JMP_SLOT", 0, 32, 0 },
- { "fixup_Hexagon_RELATIVE", 0, 32, 0 },
- { "fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_GOTREL_LO16", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_HI16", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_32", 0, 32, 0 },
- { "fixup_Hexagon_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_DTPMOD_32", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_LO16", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_HI16", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_32", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_16", 0, 32, 0 },
- { "fixup_Hexagon_GD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_LD_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_GD_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_IE_LO16", 0, 32, 0 },
- { "fixup_Hexagon_IE_HI16", 0, 32, 0 },
- { "fixup_Hexagon_IE_32", 0, 32, 0 },
- { "fixup_Hexagon_IE_16", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_LO16", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_HI16", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_32", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_16", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_LO16", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_HI16", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_32", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_16", 0, 32, 0 },
- { "fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_16_X", 0, 32, 0 },
- { "fixup_Hexagon_GOTREL_11_X", 0, 32, 0 },
- { "fixup_Hexagon_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_16_X", 0, 32, 0 },
- { "fixup_Hexagon_DTPREL_11_X", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_GD_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_LD_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_16_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_16_X", 0, 32, 0 },
- { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 },
- { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 }
+ // namei offset bits flags
+ { "fixup_Hexagon_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B15_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B7_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_32", 0, 32, 0 },
+ { "fixup_Hexagon_16", 0, 32, 0 },
+ { "fixup_Hexagon_8", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_0", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_1", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_2", 0, 32, 0 },
+ { "fixup_Hexagon_GPREL16_3", 0, 32, 0 },
+ { "fixup_Hexagon_HL16", 0, 32, 0 },
+ { "fixup_Hexagon_B13_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B9_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B32_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_B22_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B15_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B13_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B9_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_B7_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_12_X", 0, 32, 0 },
+ { "fixup_Hexagon_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_10_X", 0, 32, 0 },
+ { "fixup_Hexagon_9_X", 0, 32, 0 },
+ { "fixup_Hexagon_8_X", 0, 32, 0 },
+ { "fixup_Hexagon_7_X", 0, 32, 0 },
+ { "fixup_Hexagon_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_32_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_COPY", 0, 32, 0 },
+ { "fixup_Hexagon_GLOB_DAT", 0, 32, 0 },
+ { "fixup_Hexagon_JMP_SLOT", 0, 32, 0 },
+ { "fixup_Hexagon_RELATIVE", 0, 32, 0 },
+ { "fixup_Hexagon_PLT_B22_PCREL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_GOTREL_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_32", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_DTPMOD_32", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_32", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_16", 0, 32, 0 },
+ { "fixup_Hexagon_GD_PLT_B22_PCREL",0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_LD_PLT_B22_PCREL",0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_GD_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_32", 0, 32, 0 },
+ { "fixup_Hexagon_IE_16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_32", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_16", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_LO16", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_HI16", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_32", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_16", 0, 32, 0 },
+ { "fixup_Hexagon_6_PCREL_X", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Hexagon_GOTREL_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOTREL_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_DTPREL_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_GD_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_LD_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_IE_GOT_11_X", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_32_6_X", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_16_X", 0, 32, 0 },
+ { "fixup_Hexagon_TPREL_11_X", 0, 32, 0 }
};
if (Kind < FirstTargetFixupKind)
@@ -401,7 +402,8 @@ public:
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t FixupValue, bool IsPCRel) const override {
+ uint64_t FixupValue, bool IsPCRel,
+ MCContext &Ctx) const override {
// When FixupValue is 0 the relocation is external and there
// is nothing for us to do.
@@ -524,10 +526,9 @@ public:
bool Relaxable = false;
// Branches and loop-setup insns are handled as necessary by relaxation.
if (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeJ ||
- (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) ==
- HexagonII::TypeCOMPOUND &&
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCJ &&
MCID.isBranch()) ||
- (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNV &&
+ (llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeNCJ &&
MCID.isBranch()) ||
(llvm::HexagonMCInstrInfo::getType(*MCII, HMI) == HexagonII::TypeCR &&
HMI.getOpcode() != Hexagon::C4_addipc))
@@ -724,7 +725,8 @@ public:
Size = 0;
}
}
- bool Error = HexagonMCShuffle(*MCII, RF.getSubtargetInfo(), Inst);
+ bool Error = HexagonMCShuffle(true, *MCII, RF.getSubtargetInfo(),
+ Inst);
//assert(!Error);
(void)Error;
ReplaceInstruction(Asm.getEmitter(), RF, Inst);
@@ -739,15 +741,17 @@ public:
}
}
}
-};
-} // end anonymous namespace
+}; // class HexagonAsmBackend
-namespace llvm {
-MCAsmBackend *createHexagonAsmBackend(Target const &T,
+} // namespace
+
+// MCAsmBackend
+MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T,
MCRegisterInfo const & /*MRI*/,
const Triple &TT, StringRef CPU,
const MCTargetOptions &Options) {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- return new HexagonAsmBackend(T, OSABI, CPU);
-}
+
+ StringRef CPUString = Hexagon_MC::selectHexagonCPU(TT, CPU);
+ return new HexagonAsmBackend(T, TT, OSABI, CPUString);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 4292f6b3faa4..9c80312b790d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -17,6 +17,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONBASEINFO_H
+#include "HexagonDepITypes.h"
#include "HexagonMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include <stdint.h>
@@ -27,57 +28,14 @@ namespace llvm {
/// instruction info tracks.
///
namespace HexagonII {
- // *** The code below must match HexagonInstrFormat*.td *** //
-
- // Insn types.
- // *** Must match HexagonInstrFormat*.td ***
- enum Type {
- TypePSEUDO = 0,
- TypeALU32 = 1,
- TypeCR = 2,
- TypeJR = 3,
- TypeJ = 4,
- TypeLD = 5,
- TypeST = 6,
- TypeSYSTEM = 7,
- TypeXTYPE = 8,
- TypeV4LDST = 9,
- TypeNV = 10,
- TypeDUPLEX = 11,
- TypeCOMPOUND = 12,
- TypeCVI_FIRST = 13,
- TypeCVI_VA = TypeCVI_FIRST,
- TypeCVI_VA_DV = 14,
- TypeCVI_VX = 15,
- TypeCVI_VX_DV = 16,
- TypeCVI_VP = 17,
- TypeCVI_VP_VS = 18,
- TypeCVI_VS = 19,
- TypeCVI_VINLANESAT= 20,
- TypeCVI_VM_LD = 21,
- TypeCVI_VM_TMP_LD = 22,
- TypeCVI_VM_CUR_LD = 23,
- TypeCVI_VM_VP_LDU = 24,
- TypeCVI_VM_ST = 25,
- TypeCVI_VM_NEW_ST = 26,
- TypeCVI_VM_STU = 27,
- TypeCVI_HIST = 28,
- TypeCVI_LAST = TypeCVI_HIST,
- TypePREFIX = 30, // Such as extenders.
- TypeENDLOOP = 31 // Such as end of a HW loop.
- };
+ unsigned const TypeCVI_FIRST = TypeCVI_HIST;
+ unsigned const TypeCVI_LAST = TypeCVI_VX_DV;
enum SubTarget {
- HasV2SubT = 0xf,
- HasV2SubTOnly = 0x1,
- NoV2SubT = 0x0,
- HasV3SubT = 0xe,
- HasV3SubTOnly = 0x2,
- NoV3SubT = 0x1,
- HasV4SubT = 0xc,
- NoV4SubT = 0x3,
- HasV5SubT = 0x8,
- NoV5SubT = 0x7
+ HasV4SubT = 0x3f,
+ HasV5SubT = 0x3e,
+ HasV55SubT = 0x3c,
+ HasV60SubT = 0x38,
};
enum AddrMode {
@@ -107,102 +65,101 @@ namespace HexagonII {
enum {
// This 5-bit field describes the insn type.
TypePos = 0,
- TypeMask = 0x1f,
+ TypeMask = 0x3f,
// Solo instructions.
- SoloPos = 5,
+ SoloPos = 6,
SoloMask = 0x1,
// Packed only with A or X-type instructions.
- SoloAXPos = 6,
+ SoloAXPos = 7,
SoloAXMask = 0x1,
// Only A-type instruction in first slot or nothing.
- SoloAin1Pos = 7,
+ SoloAin1Pos = 8,
SoloAin1Mask = 0x1,
// Predicated instructions.
- PredicatedPos = 8,
+ PredicatedPos = 9,
PredicatedMask = 0x1,
- PredicatedFalsePos = 9,
+ PredicatedFalsePos = 10,
PredicatedFalseMask = 0x1,
- PredicatedNewPos = 10,
+ PredicatedNewPos = 11,
PredicatedNewMask = 0x1,
- PredicateLatePos = 11,
+ PredicateLatePos = 12,
PredicateLateMask = 0x1,
// New-Value consumer instructions.
- NewValuePos = 12,
+ NewValuePos = 13,
NewValueMask = 0x1,
// New-Value producer instructions.
- hasNewValuePos = 13,
+ hasNewValuePos = 14,
hasNewValueMask = 0x1,
// Which operand consumes or produces a new value.
- NewValueOpPos = 14,
+ NewValueOpPos = 15,
NewValueOpMask = 0x7,
// Stores that can become new-value stores.
- mayNVStorePos = 17,
+ mayNVStorePos = 18,
mayNVStoreMask = 0x1,
// New-value store instructions.
- NVStorePos = 18,
+ NVStorePos = 19,
NVStoreMask = 0x1,
// Loads that can become current-value loads.
- mayCVLoadPos = 19,
+ mayCVLoadPos = 20,
mayCVLoadMask = 0x1,
// Current-value load instructions.
- CVLoadPos = 20,
+ CVLoadPos = 21,
CVLoadMask = 0x1,
// Extendable insns.
- ExtendablePos = 21,
+ ExtendablePos = 22,
ExtendableMask = 0x1,
// Insns must be extended.
- ExtendedPos = 22,
+ ExtendedPos = 23,
ExtendedMask = 0x1,
// Which operand may be extended.
- ExtendableOpPos = 23,
+ ExtendableOpPos = 24,
ExtendableOpMask = 0x7,
// Signed or unsigned range.
- ExtentSignedPos = 26,
+ ExtentSignedPos = 27,
ExtentSignedMask = 0x1,
// Number of bits of range before extending operand.
- ExtentBitsPos = 27,
+ ExtentBitsPos = 28,
ExtentBitsMask = 0x1f,
// Alignment power-of-two before extending operand.
- ExtentAlignPos = 32,
+ ExtentAlignPos = 33,
ExtentAlignMask = 0x3,
// Valid subtargets
- validSubTargetPos = 34,
- validSubTargetMask = 0xf,
+ validSubTargetPos = 35,
+ validSubTargetMask = 0x3f,
// Addressing mode for load/store instructions.
- AddrModePos = 40,
+ AddrModePos = 41,
AddrModeMask = 0x7,
// Access size for load/store instructions.
- MemAccessSizePos = 43,
+ MemAccessSizePos = 44,
MemAccesSizeMask = 0xf,
// Branch predicted taken.
- TakenPos = 47,
+ TakenPos = 48,
TakenMask = 0x1,
// Floating-point instructions.
- FPPos = 48,
+ FPPos = 49,
FPMask = 0x1,
// New-Value producer-2 instructions.
- hasNewValuePos2 = 50,
+ hasNewValuePos2 = 51,
hasNewValueMask2 = 0x1,
-
// Which operand consumes or produces a new value.
- NewValueOpPos2 = 51,
+ NewValueOpPos2 = 52,
NewValueOpMask2 = 0x7,
// Accumulator instructions.
- AccumulatorPos = 54,
+ AccumulatorPos = 55,
AccumulatorMask = 0x1,
// Complex XU, prevent xu competition by preferring slot3
- PrefersSlot3Pos = 55,
+ PrefersSlot3Pos = 56,
PrefersSlot3Mask = 0x1,
CofMax1Pos = 60,
@@ -217,8 +174,6 @@ namespace HexagonII {
// Hexagon Specific MachineOperand flags.
MO_NO_FLAG,
- HMOTF_ConstExtended = 1,
-
/// MO_PCREL - On a symbol operand, indicates a PC-relative relocation
/// Used for computing a global address for PIC compilations
MO_PCREL,
@@ -250,7 +205,13 @@ namespace HexagonII {
// MO_TPREL - indicates relocation for TLS
// local Executable method
- MO_TPREL
+ MO_TPREL,
+
+ // HMOTF_ConstExtended
+ // Addendum to abovem, indicates a const extended op
+ // Can be used as a mask.
+ HMOTF_ConstExtended = 0x80
+
};
// Hexagon Sub-instruction classes.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 42fcc5a6aa89..dd790fd41257 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -125,46 +125,6 @@ void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
O << -1;
}
-void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<9>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
- O << formatImm(Imm/64);
-}
-
-void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<10>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
- O << formatImm(Imm/128);
-}
-
-void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<10>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
- O << formatImm(Imm/64);
-}
-
-void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const {
- int64_t Imm;
- bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
- Imm = SignExtend64<11>(Imm);
- assert(Success); (void)Success;
- assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
- O << formatImm(Imm/128);
-}
-
void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const {
printOperand(MI, OpNo, O);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 5f421184b20a..ac8e391905e0 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -44,14 +44,6 @@ public:
raw_ostream &O) const;
void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const;
- void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
- void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
- raw_ostream &O) const;
void printBranchOperand(MCInst const *MI, unsigned OpNo,
raw_ostream &O) const;
void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index c619c36164cf..446b3b2ce668 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -23,6 +23,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
Data32bitsDirective = "\t.word\t";
Data64bitsDirective = nullptr; // .xword is only supported by V9.
CommentString = "//";
+ SupportsDebugInformation = true;
LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
InlineAsmStart = "# InlineAsm Start";
@@ -30,8 +31,8 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
ZeroDirective = "\t.space\t";
AscizDirective = "\t.string\t";
- SupportsDebugInformation = true;
MinInstAlignment = 4;
UsesELFSectionDirectiveForBSS = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
+ UseLogicalShr = false;
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 07c9ad96a0d7..62b21c419f30 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -47,12 +47,40 @@ void HexagonMCChecker::init() {
if (HexagonMCInstrInfo::isBundle(MCB))
// Unfurl a bundle.
for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
- init(*I.getInst());
+ MCInst const &Inst = *I.getInst();
+ if (HexagonMCInstrInfo::isDuplex(MCII, Inst)) {
+ init(*Inst.getOperand(0).getInst());
+ init(*Inst.getOperand(1).getInst());
+ }
+ else
+ init(Inst);
}
else
init(MCB);
}
+void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg,
+ bool &isTrue) {
+ if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+ // Note an used predicate register.
+ PredReg = R;
+ isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
+
+ // Note use of new predicate register.
+ if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+ NewPreds.insert(PredReg);
+ }
+ else
+ // Note register use. Super-registers are not tracked directly,
+ // but their components.
+ for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+ SRI.isValid();
+ ++SRI)
+ if (!MCSubRegIterator(*SRI, &RI).isValid())
+ // Skip super-registers used indirectly.
+ Uses.insert(*SRI);
+}
+
void HexagonMCChecker::init(MCInst const& MCI) {
const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
unsigned PredReg = Hexagon::NoRegister;
@@ -60,28 +88,10 @@ void HexagonMCChecker::init(MCInst const& MCI) {
// Get used registers.
for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
- if (MCI.getOperand(i).isReg()) {
- unsigned R = MCI.getOperand(i).getReg();
-
- if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
- // Note an used predicate register.
- PredReg = R;
- isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
-
- // Note use of new predicate register.
- if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
- NewPreds.insert(PredReg);
- }
- else
- // Note register use. Super-registers are not tracked directly,
- // but their components.
- for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
- SRI.isValid();
- ++SRI)
- if (!MCSubRegIterator(*SRI, &RI).isValid())
- // Skip super-registers used indirectly.
- Uses.insert(*SRI);
- }
+ if (MCI.getOperand(i).isReg())
+ initReg(MCI, MCI.getOperand(i).getReg(), PredReg, isTrue);
+ for (unsigned i = 0; i < MCID.getNumImplicitUses(); ++i)
+ initReg(MCI, MCID.getImplicitUses()[i], PredReg, isTrue);
// Get implicit register definitions.
if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
@@ -216,9 +226,11 @@ void HexagonMCChecker::init(MCInst const& MCI) {
if (!MCSubRegIterator(N, &RI).isValid()) {
// Super-registers cannot use new values.
if (MCID.isBranch())
- NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV);
+ NewUses[N] = NewSense::Jmp(
+ llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ);
else
- NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+ NewUses[N] = NewSense::Use(
+ PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
}
}
}
@@ -230,14 +242,18 @@ HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo cons
init();
}
-bool HexagonMCChecker::check() {
+bool HexagonMCChecker::check(bool FullCheck) {
bool chkB = checkBranches();
bool chkP = checkPredicates();
bool chkNV = checkNewValues();
bool chkR = checkRegisters();
bool chkS = checkSolo();
- bool chkSh = checkShuffle();
- bool chkSl = checkSlots();
+ bool chkSh = true;
+ if (FullCheck)
+ chkSh = checkShuffle();
+ bool chkSl = true;
+ if (FullCheck)
+ chkSl = checkSlots();
bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
return chk;
@@ -271,8 +287,8 @@ bool HexagonMCChecker::checkBranches() {
HexagonMCErrInfo errInfo;
if (HexagonMCInstrInfo::isBundle(MCB)) {
bool hasConditional = false;
- unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0,
- NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+ unsigned Branches = 0,
+ Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
@@ -284,12 +300,6 @@ bool HexagonMCChecker::checkBranches() {
if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() ||
HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) {
++Branches;
- if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() &&
- HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
- ++NewIndirectBranches;
- if (HexagonMCInstrInfo::isNewValue(MCII, MCI))
- ++NewValueBranches;
-
if (HexagonMCInstrInfo::isPredicated(MCII, MCI) ||
HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) {
hasConditional = true;
@@ -298,9 +308,6 @@ bool HexagonMCChecker::checkBranches() {
Unconditional = i; // Record the position of the unconditional branch.
}
}
- if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() &&
- HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad())
- ++Returns;
}
if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
@@ -504,7 +511,7 @@ bool HexagonMCChecker::checkShuffle() {
HexagonMCErrInfo errInfo;
// Branch info is lost when duplexing. The unduplexed insns must be
// checked and only branch errors matter for this case.
- HexagonMCShuffler MCS(MCII, STI, MCB);
+ HexagonMCShuffler MCS(true, MCII, STI, MCB);
if (!MCS.check()) {
if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
@@ -513,7 +520,7 @@ bool HexagonMCChecker::checkShuffle() {
return false;
}
}
- HexagonMCShuffler MCSDX(MCII, STI, MCBDX);
+ HexagonMCShuffler MCSDX(true, MCII, STI, MCBDX);
if (!MCSDX.check()) {
errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
errInfo.setShuffleError(MCSDX.getError());
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index 33e22798c954..c3b3d4c14c88 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -168,6 +168,7 @@ class HexagonMCChecker {
void init();
void init(MCInst const&);
+ void initReg(MCInst const &, unsigned, unsigned &PredReg, bool &isTrue);
// Checks performed.
bool checkBranches();
@@ -177,6 +178,7 @@ class HexagonMCChecker {
bool checkSolo();
bool checkShuffle();
bool checkSlots();
+ bool checkSize();
static void compoundRegisterMap(unsigned&);
@@ -196,7 +198,7 @@ class HexagonMCChecker {
explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
const MCRegisterInfo& ri);
- bool check();
+ bool check(bool FullCheck = true);
/// add a new error/warning
void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 2645a17b9bd0..c0956520de73 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -35,38 +35,40 @@ STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
MCContext &aMCT)
: MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
- Extended(new bool(false)), CurrentBundle(new MCInst const *) {}
+ Extended(new bool(false)), CurrentBundle(new MCInst const *),
+ CurrentIndex(new size_t(0)) {}
-uint32_t HexagonMCCodeEmitter::parseBits(size_t Instruction, size_t Last,
+uint32_t HexagonMCCodeEmitter::parseBits(size_t Last,
MCInst const &MCB,
MCInst const &MCI) const {
bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI);
- if (Instruction == 0) {
+ if (*CurrentIndex == 0) {
if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
assert(!Duplex);
- assert(Instruction != Last);
+ assert(*CurrentIndex != Last);
return HexagonII::INST_PARSE_LOOP_END;
}
}
- if (Instruction == 1) {
+ if (*CurrentIndex == 1) {
if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
assert(!Duplex);
- assert(Instruction != Last);
+ assert(*CurrentIndex != Last);
return HexagonII::INST_PARSE_LOOP_END;
}
}
if (Duplex) {
- assert(Instruction == Last);
+ assert(*CurrentIndex == Last);
return HexagonII::INST_PARSE_DUPLEX;
}
- if(Instruction == Last)
+ if(*CurrentIndex == Last)
return HexagonII::INST_PARSE_PACKET_END;
return HexagonII::INST_PARSE_NOT_END;
}
-void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
+/// EncodeInstruction - Emit the bundle
+void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- MCSubtargetInfo const &STI) const {
+ const MCSubtargetInfo &STI) const {
MCInst &HMB = const_cast<MCInst &>(MI);
assert(HexagonMCInstrInfo::isBundle(HMB));
@@ -74,7 +76,7 @@ void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
*Addend = 0;
*Extended = false;
*CurrentBundle = &MI;
- size_t Instruction = 0;
+ *CurrentIndex = 0;
size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
MCInst &HMI = const_cast<MCInst &>(*I.getInst());
@@ -82,11 +84,10 @@ void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
computeAvailableFeatures(STI.getFeatureBits()));
EncodeSingleInstruction(HMI, OS, Fixups, STI,
- parseBits(Instruction, Last, HMB, HMI),
- Instruction);
+ parseBits(Last, HMB, HMI));
*Extended = HexagonMCInstrInfo::isImmext(HMI);
*Addend += HEXAGON_INSTR_SIZE;
- ++Instruction;
+ ++*CurrentIndex;
}
return;
}
@@ -107,165 +108,44 @@ static bool RegisterMatches(unsigned Consumer, unsigned Producer,
/// EncodeSingleInstruction - Emit a single
void HexagonMCCodeEmitter::EncodeSingleInstruction(
const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI, uint32_t Parse, size_t Index) const {
- MCInst HMB = MI;
- assert(!HexagonMCInstrInfo::isBundle(HMB));
+ const MCSubtargetInfo &STI, uint32_t Parse) const {
+ assert(!HexagonMCInstrInfo::isBundle(MI));
uint64_t Binary;
- // Compound instructions are limited to using registers 0-7 and 16-23
- // and here we make a map 16-23 to 8-15 so they can be correctly encoded.
- static unsigned RegMap[8] = {Hexagon::R8, Hexagon::R9, Hexagon::R10,
- Hexagon::R11, Hexagon::R12, Hexagon::R13,
- Hexagon::R14, Hexagon::R15};
-
// Pseudo instructions don't get encoded and shouldn't be here
// in the first place!
- assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
+ assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo() &&
"pseudo-instruction found");
DEBUG(dbgs() << "Encoding insn"
- " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'"
"\n");
- if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) {
- for (unsigned i = 0; i < HMB.getNumOperands(); ++i)
- if (HMB.getOperand(i).isReg()) {
- unsigned Reg =
- MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg());
- if ((Reg <= 23) && (Reg >= 16))
- HMB.getOperand(i).setReg(RegMap[Reg - 16]);
- }
- }
-
- if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
- // Calculate the new value distance to the associated producer
- MCOperand &MCO =
- HMB.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, HMB));
- unsigned SOffset = 0;
- unsigned VOffset = 0;
- unsigned Register = MCO.getReg();
- unsigned Register1;
- unsigned Register2;
- auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
- auto i = Instructions.begin() + Index - 1;
- for (;; --i) {
- assert(i != Instructions.begin() - 1 && "Couldn't find producer");
- MCInst const &Inst = *i->getInst();
- if (HexagonMCInstrInfo::isImmext(Inst))
- continue;
- ++SOffset;
- if (HexagonMCInstrInfo::isVector(MCII, Inst))
- // Vector instructions don't count scalars
- ++VOffset;
- Register1 =
- HexagonMCInstrInfo::hasNewValue(MCII, Inst)
- ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
- : static_cast<unsigned>(Hexagon::NoRegister);
- Register2 =
- HexagonMCInstrInfo::hasNewValue2(MCII, Inst)
- ? HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg()
- : static_cast<unsigned>(Hexagon::NoRegister);
- if (!RegisterMatches(Register, Register1, Register2))
- // This isn't the register we're looking for
- continue;
- if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
- // Producer is unpredicated
- break;
- assert(HexagonMCInstrInfo::isPredicated(MCII, HMB) &&
- "Unpredicated consumer depending on predicated producer");
- if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
- HexagonMCInstrInfo::isPredicatedTrue(MCII, HMB))
- // Producer predicate sense matched ours
- break;
- }
- // Hexagon PRM 10.11 Construct Nt from distance
- unsigned Offset =
- HexagonMCInstrInfo::isVector(MCII, HMB) ? VOffset : SOffset;
- Offset <<= 1;
- Offset |=
- HexagonMCInstrInfo::SubregisterBit(Register, Register1, Register2);
- MCO.setReg(Offset + Hexagon::R0);
- }
-
- Binary = getBinaryCodeForInstr(HMB, Fixups, STI);
+ Binary = getBinaryCodeForInstr(MI, Fixups, STI);
// Check for unimplemented instructions. Immediate extenders
// are encoded as zero, so they need to be accounted for.
- if ((!Binary) &&
- ((HMB.getOpcode() != DuplexIClass0) && (HMB.getOpcode() != A4_ext) &&
- (HMB.getOpcode() != A4_ext_b) && (HMB.getOpcode() != A4_ext_c) &&
- (HMB.getOpcode() != A4_ext_g))) {
+ if (!Binary &&
+ MI.getOpcode() != DuplexIClass0 &&
+ MI.getOpcode() != A4_ext) {
DEBUG(dbgs() << "Unimplemented inst: "
- " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
+ " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'"
"\n");
llvm_unreachable("Unimplemented Instruction");
}
Binary |= Parse;
// if we need to emit a duplexed instruction
- if (HMB.getOpcode() >= Hexagon::DuplexIClass0 &&
- HMB.getOpcode() <= Hexagon::DuplexIClassF) {
+ if (MI.getOpcode() >= Hexagon::DuplexIClass0 &&
+ MI.getOpcode() <= Hexagon::DuplexIClassF) {
assert(Parse == HexagonII::INST_PARSE_DUPLEX &&
"Emitting duplex without duplex parse bits");
- unsigned dupIClass;
- switch (HMB.getOpcode()) {
- case Hexagon::DuplexIClass0:
- dupIClass = 0;
- break;
- case Hexagon::DuplexIClass1:
- dupIClass = 1;
- break;
- case Hexagon::DuplexIClass2:
- dupIClass = 2;
- break;
- case Hexagon::DuplexIClass3:
- dupIClass = 3;
- break;
- case Hexagon::DuplexIClass4:
- dupIClass = 4;
- break;
- case Hexagon::DuplexIClass5:
- dupIClass = 5;
- break;
- case Hexagon::DuplexIClass6:
- dupIClass = 6;
- break;
- case Hexagon::DuplexIClass7:
- dupIClass = 7;
- break;
- case Hexagon::DuplexIClass8:
- dupIClass = 8;
- break;
- case Hexagon::DuplexIClass9:
- dupIClass = 9;
- break;
- case Hexagon::DuplexIClassA:
- dupIClass = 10;
- break;
- case Hexagon::DuplexIClassB:
- dupIClass = 11;
- break;
- case Hexagon::DuplexIClassC:
- dupIClass = 12;
- break;
- case Hexagon::DuplexIClassD:
- dupIClass = 13;
- break;
- case Hexagon::DuplexIClassE:
- dupIClass = 14;
- break;
- case Hexagon::DuplexIClassF:
- dupIClass = 15;
- break;
- default:
- llvm_unreachable("Unimplemented DuplexIClass");
- break;
- }
+ unsigned dupIClass = MI.getOpcode() - Hexagon::DuplexIClass0;
// 29 is the bit position.
// 0b1110 =0xE bits are masked off and down shifted by 1 bit.
// Last bit is moved to bit position 13
Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13);
- const MCInst *subInst0 = HMB.getOperand(0).getInst();
- const MCInst *subInst1 = HMB.getOperand(1).getInst();
+ const MCInst *subInst0 = MI.getOperand(0).getInst();
+ const MCInst *subInst1 = MI.getOperand(1).getInst();
// get subinstruction slot 0
unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI);
@@ -293,14 +173,13 @@ void raise_relocation_error(unsigned bits, unsigned kind) {
/// getFixupNoBits - Some insns are not extended and thus have no
/// bits. These cases require a more brute force method for determining
/// the correct relocation.
-namespace {
-Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
- const MCOperand &MO,
- const MCSymbolRefExpr::VariantKind kind) {
+Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits(
+ MCInstrInfo const &MCII, const MCInst &MI, const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind kind) const {
const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
unsigned insnType = llvm::HexagonMCInstrInfo::getType(MCII, MI);
- if (insnType == HexagonII::TypePREFIX) {
+ if (insnType == HexagonII::TypeEXTENDER) {
switch (kind) {
case MCSymbolRefExpr::VK_GOTREL:
return Hexagon::fixup_Hexagon_GOTREL_32_6_X;
@@ -319,11 +198,21 @@ Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
case MCSymbolRefExpr::VK_Hexagon_PCREL:
- case MCSymbolRefExpr::VK_None:
- if (MCID.isBranch())
- return Hexagon::fixup_Hexagon_B32_PCREL_X;
- else
- return Hexagon::fixup_Hexagon_32_6_X;
+ return Hexagon::fixup_Hexagon_B32_PCREL_X;
+ case MCSymbolRefExpr::VK_None: {
+ auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) {
+ if (I->getInst() == &MI) {
+ const MCInst &NextI = *(I+1)->getInst();
+ const MCInstrDesc &D = HexagonMCInstrInfo::getDesc(MCII, NextI);
+ if (D.isBranch() || D.isCall() ||
+ HexagonMCInstrInfo::getType(MCII, NextI) == HexagonII::TypeCR)
+ return Hexagon::fixup_Hexagon_B32_PCREL_X;
+ return Hexagon::fixup_Hexagon_32_6_X;
+ }
+ }
+ raise_relocation_error(0, kind);
+ }
default:
raise_relocation_error(0, kind);
}
@@ -406,7 +295,6 @@ Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
}
llvm_unreachable("Relocation exit not taken");
}
-}
namespace llvm {
extern const MCInstrDesc HexagonInsts[];
@@ -450,7 +338,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
int64_t Value;
if (ME->evaluateAsAbsolute(Value))
return Value;
- assert(ME->getKind() == MCExpr::SymbolRef || ME->getKind() == MCExpr::Binary);
+ assert(ME->getKind() == MCExpr::SymbolRef ||
+ ME->getKind() == MCExpr::Binary);
if (ME->getKind() == MCExpr::Binary) {
MCBinaryExpr const *Binary = cast<MCBinaryExpr>(ME);
getExprOpValue(MI, MO, Binary->getLHS(), Fixups, STI);
@@ -581,7 +470,30 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
if (HexagonMCInstrInfo::s23_2_reloc(*MO.getExpr()))
FixupKind = Hexagon::fixup_Hexagon_23_REG;
else
- raise_relocation_error(bits, kind);
+ if (MCID.mayStore() || MCID.mayLoad()) {
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
+ ++ImpUses) {
+ if (*ImpUses != Hexagon::GP)
+ continue;
+ switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+ case HexagonII::MemAccessSize::ByteAccess:
+ FixupKind = fixup_Hexagon_GPREL16_0;
+ break;
+ case HexagonII::MemAccessSize::HalfWordAccess:
+ FixupKind = fixup_Hexagon_GPREL16_1;
+ break;
+ case HexagonII::MemAccessSize::WordAccess:
+ FixupKind = fixup_Hexagon_GPREL16_2;
+ break;
+ case HexagonII::MemAccessSize::DoubleWordAccess:
+ FixupKind = fixup_Hexagon_GPREL16_3;
+ break;
+ default:
+ raise_relocation_error(bits, kind);
+ }
+ }
+ } else
+ raise_relocation_error(bits, kind);
break;
}
case MCSymbolRefExpr::VK_DTPREL:
@@ -795,10 +707,71 @@ unsigned
HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const {
+#ifndef NDEBUG
+ size_t OperandNumber = ~0U;
+ for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i)
+ if (&MI.getOperand(i) == &MO) {
+ OperandNumber = i;
+ break;
+ }
+ assert((OperandNumber != ~0U) && "Operand not found");
+#endif
+
+ if (HexagonMCInstrInfo::isNewValue(MCII, MI) &&
+ &MO == &MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI))) {
+ // Calculate the new value distance to the associated producer
+ MCOperand const &MCO =
+ MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI));
+ unsigned SOffset = 0;
+ unsigned VOffset = 0;
+ unsigned Register = MCO.getReg();
+ unsigned Register1;
+ unsigned Register2;
+ auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+ auto i = Instructions.begin() + *CurrentIndex - 1;
+ for (;; --i) {
+ assert(i != Instructions.begin() - 1 && "Couldn't find producer");
+ MCInst const &Inst = *i->getInst();
+ if (HexagonMCInstrInfo::isImmext(Inst))
+ continue;
+ ++SOffset;
+ if (HexagonMCInstrInfo::isVector(MCII, Inst))
+ // Vector instructions don't count scalars
+ ++VOffset;
+ Register1 =
+ HexagonMCInstrInfo::hasNewValue(MCII, Inst)
+ ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
+ : static_cast<unsigned>(Hexagon::NoRegister);
+ Register2 =
+ HexagonMCInstrInfo::hasNewValue2(MCII, Inst)
+ ? HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg()
+ : static_cast<unsigned>(Hexagon::NoRegister);
+ if (!RegisterMatches(Register, Register1, Register2))
+ // This isn't the register we're looking for
+ continue;
+ if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
+ // Producer is unpredicated
+ break;
+ assert(HexagonMCInstrInfo::isPredicated(MCII, MI) &&
+ "Unpredicated consumer depending on predicated producer");
+ if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
+ HexagonMCInstrInfo::isPredicatedTrue(MCII, MI))
+ // Producer predicate sense matched ours
+ break;
+ }
+ // Hexagon PRM 10.11 Construct Nt from distance
+ unsigned Offset =
+ HexagonMCInstrInfo::isVector(MCII, MI) ? VOffset : SOffset;
+ Offset <<= 1;
+ Offset |=
+ HexagonMCInstrInfo::SubregisterBit(Register, Register1, Register2);
+ return Offset;
+ }
assert(!MO.isImm());
if (MO.isReg()) {
unsigned Reg = MO.getReg();
- if (HexagonMCInstrInfo::isSubInstruction(MI))
+ if (HexagonMCInstrInfo::isSubInstruction(MI) ||
+ llvm::HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCJ)
return HexagonMCInstrInfo::getDuplexRegisterNumbering(Reg);
switch(MI.getOpcode()){
case Hexagon::A2_tfrrcr:
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 8e0667d9ac8e..c3a4beec313f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -15,6 +15,7 @@
#ifndef HEXAGONMCCODEEMITTER_H
#define HEXAGONMCCODEEMITTER_H
+#include "MCTargetDesc/HexagonFixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -31,18 +32,22 @@ class HexagonMCCodeEmitter : public MCCodeEmitter {
std::unique_ptr<unsigned> Addend;
std::unique_ptr<bool> Extended;
std::unique_ptr<MCInst const *> CurrentBundle;
+ std::unique_ptr<size_t> CurrentIndex;
// helper routine for getMachineOpValue()
unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
+ const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind kind) const;
+
public:
HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
// Return parse bits for instruction `MCI' inside bundle `MCB'
- uint32_t parseBits(size_t Instruction, size_t Last, MCInst const &MCB,
- MCInst const &MCI) const;
+ uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const;
void encodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -51,7 +56,7 @@ public:
void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI,
- uint32_t Parse, size_t Index) const;
+ uint32_t Parse) const;
// \brief TableGen'erated function for getting the
// binary encoding for an instruction.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 9a09a17767a6..ffa980ca6563 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -14,6 +14,7 @@
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Debug.h"
@@ -396,7 +397,7 @@ static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
/// is found update the contents fo the bundle with the compound insn.
/// If a compound instruction is found then the bundle will have one
/// additional slot.
-void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
+void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCI) {
assert(HexagonMCInstrInfo::isBundle(MCI) &&
"Non-Bundle where Bundle expected");
@@ -405,8 +406,23 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
if (MCI.size() < 2)
return;
+ bool StartedValid = llvm::HexagonMCShuffle(false, MCII, STI, MCI);
+
+ // Create a vector, needed to keep the order of jump instructions.
+ MCInst CheckList(MCI);
+
// Look for compounds until none are found, only update the bundle when
// a compound is found.
- while (lookForCompound(MCII, Context, MCI))
- ;
+ while (lookForCompound(MCII, Context, CheckList)) {
+ // Keep the original bundle around in case the shuffle fails.
+ MCInst OriginalBundle(MCI);
+
+ // Need to update the bundle.
+ MCI = CheckList;
+
+ if (StartedValid && !llvm::HexagonMCShuffle(false, MCII, STI, MCI)) {
+ DEBUG(dbgs() << "Found ERROR\n");
+ MCI = OriginalBundle;
+ }
+ }
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 413f052aa4bd..e8f154a1fa53 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -262,6 +263,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
case Hexagon::EH_RETURN_JMPR:
case Hexagon::J2_jumpr:
+ case Hexagon::PS_jmpret:
// jumpr r31
// Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
DstReg = MCI.getOperand(0).getReg();
@@ -275,6 +277,12 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
case Hexagon::J2_jumprfnew:
case Hexagon::J2_jumprtnewpt:
case Hexagon::J2_jumprfnewpt:
+ case Hexagon::PS_jmprett:
+ case Hexagon::PS_jmpretf:
+ case Hexagon::PS_jmprettnew:
+ case Hexagon::PS_jmpretfnew:
+ case Hexagon::PS_jmprettnewpt:
+ case Hexagon::PS_jmpretfnewpt:
DstReg = MCI.getOperand(1).getReg();
SrcReg = MCI.getOperand(0).getReg();
// [if ([!]p0[.new])] jumpr r31
@@ -284,15 +292,10 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
}
break;
case Hexagon::L4_return_t:
-
case Hexagon::L4_return_f:
-
case Hexagon::L4_return_tnew_pnt:
-
case Hexagon::L4_return_fnew_pnt:
-
case Hexagon::L4_return_tnew_pt:
-
case Hexagon::L4_return_fnew_pt:
// [if ([!]p0[.new])] dealloc_return
SrcReg = MCI.getOperand(0).getReg();
@@ -565,7 +568,8 @@ bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
MCInst const &MIa, bool ExtendedA,
MCInst const &MIb, bool ExtendedB,
- bool bisReversable) {
+ bool bisReversable,
+ MCSubtargetInfo const &STI) {
// Slot 1 cannot be extended in duplexes PRM 10.5
if (ExtendedA)
return false;
@@ -625,11 +629,16 @@ bool HexagonMCInstrInfo::isOrderedDuplexPair(MCInstrInfo const &MCII,
return false;
}
- // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
- // therefore, not duplexable if slot 1 is a store, and slot 0 is not.
- if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) {
- if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2))
- return false;
+ if (STI.getCPU().equals_lower("hexagonv4") ||
+ STI.getCPU().equals_lower("hexagonv5") ||
+ STI.getCPU().equals_lower("hexagonv55") ||
+ STI.getCPU().equals_lower("hexagonv60")) {
+ // If a store appears, it must be in slot 0 (MIa) 1st, and then slot 1 (MIb);
+ // therefore, not duplexable if slot 1 is a store, and slot 0 is not.
+ if ((MIbG == HexagonII::HSIG_S1) || (MIbG == HexagonII::HSIG_S2)) {
+ if ((MIaG != HexagonII::HSIG_S1) && (MIaG != HexagonII::HSIG_S2))
+ return false;
+ }
}
return (isDuplexPairMatch(MIaG, MIbG));
@@ -703,6 +712,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
Result.setOpcode(Hexagon::SA1_dec);
addOps(Result, Inst, 0);
addOps(Result, Inst, 1);
+ addOps(Result, Inst, 2);
break;
} // 1,2 SUBInst $Rd = add($Rs,#-1)
else if (Inst.getOperand(1).getReg() == Hexagon::R29) {
@@ -806,20 +816,27 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
break; // none SUBInst deallocframe
case Hexagon::EH_RETURN_JMPR:
case Hexagon::J2_jumpr:
+ case Hexagon::PS_jmpret:
Result.setOpcode(Hexagon::SL2_jumpr31);
break; // none SUBInst jumpr r31
case Hexagon::J2_jumprf:
+ case Hexagon::PS_jmpretf:
Result.setOpcode(Hexagon::SL2_jumpr31_f);
break; // none SUBInst if (!p0) jumpr r31
case Hexagon::J2_jumprfnew:
case Hexagon::J2_jumprfnewpt:
+ case Hexagon::PS_jmpretfnewpt:
+ case Hexagon::PS_jmpretfnew:
Result.setOpcode(Hexagon::SL2_jumpr31_fnew);
break; // none SUBInst if (!p0.new) jumpr:nt r31
case Hexagon::J2_jumprt:
+ case Hexagon::PS_jmprett:
Result.setOpcode(Hexagon::SL2_jumpr31_t);
break; // none SUBInst if (p0) jumpr r31
case Hexagon::J2_jumprtnew:
case Hexagon::J2_jumprtnewpt:
+ case Hexagon::PS_jmprettnewpt:
+ case Hexagon::PS_jmprettnew:
Result.setOpcode(Hexagon::SL2_jumpr31_tnew);
break; // none SUBInst if (p0.new) jumpr:nt r31
case Hexagon::L2_loadrb_io:
@@ -966,6 +983,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
if (Absolute && Value == -1) {
Result.setOpcode(Hexagon::SA1_setin1);
addOps(Result, Inst, 0);
+ addOps(Result, Inst, 1);
break; // 2 1 SUBInst $Rd = #-1
} else {
Result.setOpcode(Hexagon::SA1_seti);
@@ -1005,6 +1023,7 @@ static bool isStoreInst(unsigned opCode) {
SmallVector<DuplexCandidate, 8>
HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
MCInst const &MCB) {
assert(isBundle(MCB));
SmallVector<DuplexCandidate, 8> duplexToTry;
@@ -1033,7 +1052,7 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
*MCB.getOperand(j).getInst(),
HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
- bisReversable)) {
+ bisReversable, STI)) {
// Get iClass.
unsigned iClass = iClassOfDuplexPair(
getDuplexCandidateGroup(*MCB.getOperand(k).getInst()),
@@ -1058,7 +1077,7 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
HexagonMCInstrInfo::hasExtenderForIndex(MCB, j - 1),
*MCB.getOperand(k).getInst(),
HexagonMCInstrInfo::hasExtenderForIndex(MCB, k - 1),
- bisReversable)) {
+ bisReversable, STI)) {
// Get iClass.
unsigned iClass = iClassOfDuplexPair(
getDuplexCandidateGroup(*MCB.getOperand(j).getInst()),
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 226470cfbced..9e1ff9ca35d7 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -37,30 +37,19 @@
using namespace llvm;
-static cl::opt<unsigned>
- GPSize("gpsize", cl::NotHidden,
- cl::desc("Global Pointer Addressing Size. The default size is 8."),
- cl::Prefix, cl::init(8));
-
-void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
- const MCSubtargetInfo &STI) {
- MCInst HMI = HexagonMCInstrInfo::createBundle();
- MCInst *MCB;
-
- if (MCK.getOpcode() != Hexagon::BUNDLE) {
- HMI.addOperand(MCOperand::createInst(&MCK));
- MCB = &HMI;
- } else
- MCB = const_cast<MCInst *>(&MCK);
-
- // Examines packet and pad the packet, if needed, when an
- // end-loop is in the bundle.
- HexagonMCInstrInfo::padEndloop(getContext(), *MCB);
- HexagonMCShuffle(*MCII, STI, *MCB);
-
- assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
+static cl::opt<unsigned> GPSize
+ ("gpsize", cl::NotHidden,
+ cl::desc("Global Pointer Addressing Size. The default size is 8."),
+ cl::Prefix,
+ cl::init(8));
+
+void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
+ const MCSubtargetInfo &STI, bool) {
+ assert(MCB.getOpcode() == Hexagon::BUNDLE);
+ assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
+ assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
bool Extended = false;
- for (auto &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+ for (auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
MCInst *MCI = const_cast<MCInst *>(I.getInst());
if (Extended) {
if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
@@ -77,11 +66,12 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
// At this point, MCB is a bundle
// Iterate through the bundle and assign addends for the instructions
- for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MCB)) {
+ for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
MCInst *MCI = const_cast<MCInst *>(I.getInst());
EmitSymbol(*MCI);
}
- MCObjectStreamer::EmitInstruction(*MCB, STI);
+
+ MCObjectStreamer::EmitInstruction(MCB, STI);
}
void HexagonMCELFStreamer::EmitSymbol(const MCInst &Inst) {
@@ -119,9 +109,11 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
MCSectionSubPair P = getCurrentSection();
SwitchSection(&Section);
- EmitValueToAlignment(ByteAlignment, 0, 1, 0);
- EmitLabel(Symbol);
- EmitZeros(Size);
+ if (ELFSymbol->isUndefined(false)) {
+ EmitValueToAlignment(ByteAlignment, 0, 1, 0);
+ EmitLabel(Symbol);
+ EmitZeros(Size);
+ }
// Update the maximum alignment of the section if necessary.
if (ByteAlignment > Section.getAlignment())
@@ -144,9 +136,10 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
ELFSymbol->setSize(MCConstantExpr::create(Size, getContext()));
}
-void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
- MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment,
- unsigned AccessSize) {
+void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol,
+ uint64_t Size,
+ unsigned ByteAlignment,
+ unsigned AccessSize) {
getAssembler().registerSymbol(*Symbol);
auto ELFSymbol = cast<MCSymbolELF>(Symbol);
ELFSymbol->setBinding(ELF::STB_LOCAL);
@@ -154,11 +147,12 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment, AccessSize);
}
-namespace llvm {
-MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_pwrite_stream &OS, MCCodeEmitter *CE) {
- return new HexagonMCELFStreamer(Context, MAB, OS, CE);
-}
+namespace llvm {
+ MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *CE) {
+ return new HexagonMCELFStreamer(Context, MAB, OS, CE);
+ }
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index 0ac1a68d4ef9..024dff1a2f97 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -27,7 +27,15 @@ public:
: MCELFStreamer(Context, TAB, OS, Emitter),
MCII(createHexagonMCInstrInfo()) {}
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ HexagonMCELFStreamer(MCContext &Context,
+ MCAsmBackend &TAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *Emitter,
+ MCAssembler *Assembler) :
+ MCELFStreamer(Context, TAB, OS, Emitter),
+ MCII (createHexagonMCInstrInfo()) {}
+
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override;
void EmitSymbol(const MCInst &Inst);
void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
@@ -36,8 +44,9 @@ public:
unsigned ByteAlignment, unsigned AccessSize);
};
-MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_pwrite_stream &OS, MCCodeEmitter *CE);
+MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
+ MCCodeEmitter *CE);
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index e93906a0a396..14300edc7e1b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -11,7 +11,9 @@
#include "HexagonMCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Object/ELF.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -36,7 +38,47 @@ MCFragment *llvm::HexagonMCExpr::findAssociatedFragment() const {
return Expr->findAssociatedFragment();
}
-void HexagonMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
+ switch (Expr->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Cannot handle nested target MCExpr");
+ break;
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *be = cast<MCBinaryExpr>(Expr);
+ fixELFSymbolsInTLSFixupsImpl(be->getLHS(), Asm);
+ fixELFSymbolsInTLSFixupsImpl(be->getRHS(), Asm);
+ break;
+ }
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(Expr);
+ switch (symRef.getKind()) {
+ default:
+ return;
+ case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
+ case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
+ case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
+ case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
+ case MCSymbolRefExpr::VK_Hexagon_IE:
+ case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
+ case MCSymbolRefExpr::VK_TPREL:
+ break;
+ }
+ cast<MCSymbolELF>(symRef.getSymbol()).setType(ELF::STT_TLS);
+ break;
+ }
+ case MCExpr::Unary:
+ fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void HexagonMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
+ auto expr = getExpr();
+ fixELFSymbolsInTLSFixupsImpl(expr, Asm);
+}
MCExpr const *HexagonMCExpr::getExpr() const { return Expr; }
@@ -75,4 +117,4 @@ void HexagonMCExpr::setSignMismatch(bool Val) {
bool HexagonMCExpr::signMismatch() const {
return SignMismatch;
-} \ No newline at end of file
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index e627f026c8ad..553ffba508a1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -16,10 +16,9 @@
#include "Hexagon.h"
#include "HexagonBaseInfo.h"
#include "HexagonMCChecker.h"
-
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
@@ -59,31 +58,36 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCB,
HexagonMCChecker *Check) {
- // Examine the packet and convert pairs of instructions to compound
- // instructions when possible.
- if (!HexagonDisableCompound)
- HexagonMCInstrInfo::tryCompound(MCII, Context, MCB);
// Check the bundle for errors.
- bool CheckOk = Check ? Check->check() : true;
+ bool CheckOk = Check ? Check->check(false) : true;
if (!CheckOk)
return false;
- HexagonMCShuffle(MCII, STI, MCB);
+ // Examine the packet and convert pairs of instructions to compound
+ // instructions when possible.
+ if (!HexagonDisableCompound)
+ HexagonMCInstrInfo::tryCompound(MCII, STI, Context, MCB);
+ HexagonMCShuffle(false, MCII, STI, MCB);
// Examine the packet and convert pairs of instructions to duplex
// instructions when possible.
MCInst InstBundlePreDuplex = MCInst(MCB);
if (!HexagonDisableDuplex) {
SmallVector<DuplexCandidate, 8> possibleDuplexes;
- possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB);
+ possibleDuplexes =
+ HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB);
HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
}
// Examines packet and pad the packet, if needed, when an
// end-loop is in the bundle.
- HexagonMCInstrInfo::padEndloop(Context, MCB);
+ HexagonMCInstrInfo::padEndloop(MCB, Context);
// If compounding and duplexing didn't reduce the size below
// 4 or less we have a packet that is too big.
if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
return false;
- HexagonMCShuffle(MCII, STI, MCB);
+ // Check the bundle for errors.
+ CheckOk = Check ? Check->check(true) : true;
+ if (!CheckOk)
+ return false;
+ HexagonMCShuffle(true, MCII, STI, MCB);
return true;
}
@@ -111,32 +115,14 @@ MCInst HexagonMCInstrInfo::createBundle() {
return Result;
}
-MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
- MCInst const &inst0,
- MCInst const &inst1) {
- assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf");
- MCInst *duplexInst = new (Context) MCInst;
- duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass);
-
- MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0));
- MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1));
- duplexInst->addOperand(MCOperand::createInst(SubInst0));
- duplexInst->addOperand(MCOperand::createInst(SubInst1));
- return duplexInst;
-}
-
MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
MCInst const &Inst,
MCOperand const &MO) {
assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) ||
HexagonMCInstrInfo::isExtended(MCII, Inst));
- MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
MCInst XMI;
- XMI.setOpcode((Desc.isBranch() || Desc.isCall() ||
- HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR)
- ? Hexagon::A4_ext_b
- : Hexagon::A4_ext);
+ XMI.setOpcode(Hexagon::A4_ext);
if (MO.isImm())
XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f)));
else if (MO.isExpr())
@@ -146,6 +132,20 @@ MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
return XMI;
}
+MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
+ MCInst const &inst0,
+ MCInst const &inst1) {
+ assert((iClass <= 0xf) && "iClass must have range of 0 to 0xf");
+ MCInst *duplexInst = new (Context) MCInst;
+ duplexInst->setOpcode(Hexagon::DuplexIClass0 + iClass);
+
+ MCInst *SubInst0 = new (Context) MCInst(deriveSubInst(inst0));
+ MCInst *SubInst1 = new (Context) MCInst(deriveSubInst(inst1));
+ duplexInst->addOperand(MCOperand::createInst(SubInst0));
+ duplexInst->addOperand(MCOperand::createInst(SubInst1));
+ return duplexInst;
+}
+
MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
size_t Index) {
assert(Index <= bundleSize(MCB));
@@ -173,22 +173,9 @@ HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
HexagonII::MemAccesSizeMask));
}
-unsigned HexagonMCInstrInfo::getBitCount(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
-}
-
-// Return constant extended operand number.
-unsigned short HexagonMCInstrInfo::getCExtOpNum(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
-}
-
MCInstrDesc const &HexagonMCInstrInfo::getDesc(MCInstrInfo const &MCII,
MCInst const &MCI) {
- return (MCII.get(MCI.getOpcode()));
+ return MCII.get(MCI.getOpcode());
}
unsigned HexagonMCInstrInfo::getDuplexRegisterNumbering(unsigned Reg) {
@@ -276,34 +263,32 @@ unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
}
-// Return the max value that a constant extendable operand can have
-// without being extended.
+/// Return the maximum value of an extendable operand.
int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- unsigned isSigned =
- (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- if (isSigned) // if value is signed
- return ~(-1U << (bits - 1));
- else
- return ~(-1U << bits);
+ assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MCI));
+
+ if (S) // if value is signed
+ return (1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1)) - 1;
+ return (1 << HexagonMCInstrInfo::getExtentBits(MCII, MCI)) - 1;
}
-// Return the min value that a constant extendable operand can have
-// without being extended.
+/// Return the minimum value of an extendable operand.
int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- unsigned isSigned =
- (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- unsigned bits = (F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
- if (isSigned) // if value is signed
- return -1U << (bits - 1);
- else
- return 0;
+ assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MCI));
+
+ if (S) // if value is signed
+ return -(1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1));
+ return 0;
}
StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
@@ -319,9 +304,7 @@ unsigned short HexagonMCInstrInfo::getNewValueOp(MCInstrInfo const &MCII,
MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
MCInst const &MCI) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- unsigned const O =
- (F >> HexagonII::NewValueOpPos) & HexagonII::NewValueOpMask;
+ unsigned O = HexagonMCInstrInfo::getNewValueOp(MCII, MCI);
MCOperand const &MCO = MCI.getOperand(O);
assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
@@ -349,6 +332,13 @@ HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII,
return (MCO);
}
+/// Return the Hexagon ISA class for the insn.
+unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = MCII.get(MCI.getOpcode()).TSFlags;
+ return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+}
+
int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -361,33 +351,55 @@ int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
return Hexagon::ArchV4;
case HexagonII::HasV5SubT:
return Hexagon::ArchV5;
+ case HexagonII::HasV55SubT:
+ return Hexagon::ArchV55;
+ case HexagonII::HasV60SubT:
+ return Hexagon::ArchV60;
}
}
-// Return the Hexagon ISA class for the insn.
-unsigned HexagonMCInstrInfo::getType(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
-
- return ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
-}
-
+/// Return the slots this instruction can execute out of
unsigned HexagonMCInstrInfo::getUnits(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI,
MCInst const &MCI) {
-
const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
return ((II[SchedClass].FirstStage + HexagonStages)->getUnits());
}
-bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+/// Return the slots this instruction consumes in addition to
+/// the slot(s) it can execute out of
+
+unsigned HexagonMCInstrInfo::getOtherReservedSlots(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI,
+ MCInst const &MCI) {
+ const InstrItinerary *II = STI.getSchedModel().InstrItineraries;
+ int SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
+ unsigned Slots = 0;
+
+ // FirstStage are slots that this instruction can execute in.
+ // FirstStage+1 are slots that are also consumed by this instruction.
+ // For example: vmemu can only execute in slot 0 but also consumes slot 1.
+ for (unsigned Stage = II[SchedClass].FirstStage + 1;
+ Stage < II[SchedClass].LastStage; ++Stage) {
+ unsigned Units = (Stage + HexagonStages)->getUnits();
+ if (Units > HexagonGetLastSlot())
+ break;
+ // fyi: getUnits() will return 0x1, 0x2, 0x4 or 0x8
+ Slots |= Units;
+ }
+
+ // if 0 is returned, then no additional slots are consumed by this inst.
+ return Slots;
+}
+
+bool HexagonMCInstrInfo::hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
if (!HexagonMCInstrInfo::isBundle(MCI))
return false;
for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
auto MI = I.getInst();
- if (isImmext(*MI))
+ if (HexagonMCInstrInfo::isDuplex(MCII, *MI))
return true;
}
@@ -398,7 +410,20 @@ bool HexagonMCInstrInfo::hasExtenderForIndex(MCInst const &MCB, size_t Index) {
return extenderForIndex(MCB, Index) != nullptr;
}
-// Return whether the instruction is a legal new-value producer.
+bool HexagonMCInstrInfo::hasImmExt(MCInst const &MCI) {
+ if (!HexagonMCInstrInfo::isBundle(MCI))
+ return false;
+
+ for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCI)) {
+ auto MI = I.getInst();
+ if (isImmext(*MI))
+ return true;
+ }
+
+ return false;
+}
+
+/// Return whether the insn produces a value.
bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -418,46 +443,19 @@ MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
return *MCB.getOperand(bundleInstructionsOffset + Index).getInst();
}
+/// Return where the instruction is an accumulator.
+bool HexagonMCInstrInfo::isAccumulator(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
+}
+
bool HexagonMCInstrInfo::isBundle(MCInst const &MCI) {
auto Result = Hexagon::BUNDLE == MCI.getOpcode();
assert(!Result || (MCI.size() > 0 && MCI.getOperand(0).isImm()));
return Result;
}
-// Return whether the insn is an actual insn.
-bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
- return (!HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
- !HexagonMCInstrInfo::isPrefix(MCII, MCI) &&
- HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
-}
-
-bool HexagonMCInstrInfo::isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::CofMax1Pos) & HexagonII::CofMax1Mask);
-}
-
-bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
-}
-
-bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
- return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
- (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
-}
-
-bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
- return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI);
-}
-
-// Return whether the instruction needs to be constant extended.
-// 1) Always return true if the instruction has 'isExtended' flag set.
-//
-// isExtendable:
-// 2) For immediate extended operands, return true only if the value is
-// out-of-range.
-// 3) For global address, always return true.
-
bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
MCInst const &MCI) {
if (HexagonMCInstrInfo::isExtended(MCII, MCI))
@@ -470,9 +468,9 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
return true;
// Branch insns are handled as necessary by relaxation.
if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) ||
- (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND &&
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCJ &&
HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) ||
- (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV &&
+ (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNCJ &&
HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()))
return false;
// Otherwise loop instructions and other CR insts are handled by relaxation
@@ -492,6 +490,30 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
return (MinValue > Value || Value > MaxValue);
}
+bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return !HexagonMCInstrInfo::getDesc(MCII, MCI).isPseudo() &&
+ !HexagonMCInstrInfo::isPrefix(MCII, MCI);
+}
+
+bool HexagonMCInstrInfo::isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::CofMax1Pos) & HexagonII::CofMax1Mask);
+}
+
+bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ return (getType(MCII, MCI) == HexagonII::TypeCJ);
+}
+
+bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
+ return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
+ (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
+}
+
+bool HexagonMCInstrInfo::isDuplex(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return HexagonII::TypeDUPLEX == HexagonMCInstrInfo::getType(MCII, MCI);
+}
+
bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
MCInst const &MCI) {
uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -510,9 +532,7 @@ bool HexagonMCInstrInfo::isFloat(MCInstrInfo const &MCII, MCInst const &MCI) {
}
bool HexagonMCInstrInfo::isImmext(MCInst const &MCI) {
- auto Op = MCI.getOpcode();
- return (Op == Hexagon::A4_ext_b || Op == Hexagon::A4_ext_c ||
- Op == Hexagon::A4_ext_g || Op == Hexagon::A4_ext);
+ return MCI.getOpcode() == Hexagon::A4_ext;
}
bool HexagonMCInstrInfo::isInnerLoop(MCInst const &MCI) {
@@ -530,20 +550,17 @@ bool HexagonMCInstrInfo::isIntRegForSubInst(unsigned Reg) {
(Reg >= Hexagon::R16 && Reg <= Hexagon::R23));
}
-// Return whether the insn is a new-value consumer.
+/// Return whether the insn expects newly produced value.
bool HexagonMCInstrInfo::isNewValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
}
-// Return whether the operand can be constant extended.
-bool HexagonMCInstrInfo::isOperandExtended(MCInstrInfo const &MCII,
- MCInst const &MCI,
- unsigned short OperandNum) {
- uint64_t const F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask) ==
- OperandNum;
+/// Return whether the operand is extendable.
+bool HexagonMCInstrInfo::isOpExtendable(MCInstrInfo const &MCII,
+ MCInst const &MCI, unsigned short O) {
+ return (O == HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
}
bool HexagonMCInstrInfo::isOuterLoop(MCInst const &MCI) {
@@ -558,6 +575,10 @@ bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
}
+bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
+ return HexagonII::TypeEXTENDER == HexagonMCInstrInfo::getType(MCII, MCI);
+}
+
bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII,
MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -582,12 +603,22 @@ bool HexagonMCInstrInfo::isPredReg(unsigned Reg) {
return (Reg >= Hexagon::P0 && Reg <= Hexagon::P3_0);
}
-bool HexagonMCInstrInfo::isPrefix(MCInstrInfo const &MCII, MCInst const &MCI) {
- return (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypePREFIX);
+/// Return whether the insn can be packaged only with A and X-type insns.
+bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
}
-bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+/// Return whether the insn can be packaged only with an A-type insn in slot #1.
+bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
+}
+
+/// Return whether the insn is solo, i.e., cannot be in a packet.
+bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
+ const uint64_t F = MCII.get(MCI.getOpcode()).TSFlags;
return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
}
@@ -663,17 +694,6 @@ bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) {
}
}
-bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
-}
-
-bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
- MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
-}
-
bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
(getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
@@ -705,16 +725,26 @@ bool HexagonMCInstrInfo::mustExtend(MCExpr const &Expr) {
return HExpr.mustExtend();
}
void HexagonMCInstrInfo::setMustNotExtend(MCExpr const &Expr, bool Val) {
- HexagonMCExpr &HExpr =
- const_cast<HexagonMCExpr &>(cast<HexagonMCExpr>(Expr));
+ HexagonMCExpr &HExpr = const_cast<HexagonMCExpr &>(cast<HexagonMCExpr>(Expr));
HExpr.setMustNotExtend(Val);
}
bool HexagonMCInstrInfo::mustNotExtend(MCExpr const &Expr) {
HexagonMCExpr const &HExpr = cast<HexagonMCExpr>(Expr);
return HExpr.mustNotExtend();
}
+void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) {
+ HexagonMCExpr &HExpr =
+ const_cast<HexagonMCExpr &>(*llvm::cast<HexagonMCExpr>(&Expr));
+ HExpr.setS23_2_reloc(Val);
+}
+bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) {
+ HexagonMCExpr const *HExpr = llvm::dyn_cast<HexagonMCExpr>(&Expr);
+ if (!HExpr)
+ return false;
+ return HExpr->s23_2_reloc();
+}
-void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
+void HexagonMCInstrInfo::padEndloop(MCInst &MCB, MCContext &Context) {
MCInst Nop;
Nop.setOpcode(Hexagon::A2_nop);
assert(isBundle(MCB));
@@ -727,22 +757,8 @@ void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
MCInst const &MCI) {
- if (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR)
- return false;
-
- unsigned SchedClass = HexagonMCInstrInfo::getDesc(MCII, MCI).getSchedClass();
- switch (SchedClass) {
- case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
- case Hexagon::Sched::ALU64_tc_2_SLOT23:
- case Hexagon::Sched::ALU64_tc_3x_SLOT23:
- case Hexagon::Sched::M_tc_2_SLOT23:
- case Hexagon::Sched::M_tc_3x_SLOT23:
- case Hexagon::Sched::S_2op_tc_2_SLOT23:
- case Hexagon::Sched::S_3op_tc_2_SLOT23:
- case Hexagon::Sched::S_3op_tc_3x_SLOT23:
- return true;
- }
- return false;
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::PrefersSlot3Pos) & HexagonII::PrefersSlot3Mask;
}
void HexagonMCInstrInfo::replaceDuplex(MCContext &Context, MCInst &MCB,
@@ -778,15 +794,6 @@ void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) {
Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask);
assert(isMemStoreReorderEnabled(MCI));
}
-void HexagonMCInstrInfo::setS23_2_reloc(MCExpr const &Expr, bool Val) {
- HexagonMCExpr &HExpr =
- const_cast<HexagonMCExpr &>(*llvm::cast<HexagonMCExpr>(&Expr));
- HExpr.setS23_2_reloc(Val);
-}
-bool HexagonMCInstrInfo::s23_2_reloc(MCExpr const &Expr) {
- HexagonMCExpr const &HExpr = *llvm::cast<HexagonMCExpr>(&Expr);
- return HExpr.s23_2_reloc();
-}
void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
assert(isBundle(MCI));
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index d701c3ade69e..2e989adb5ccb 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -19,11 +19,8 @@
namespace llvm {
class HexagonMCChecker;
-class MCContext;
class MCInstrDesc;
class MCInstrInfo;
-class MCInst;
-class MCOperand;
class MCSubtargetInfo;
namespace HexagonII {
enum class MemAccessSize;
@@ -67,16 +64,6 @@ bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &MCB,
HexagonMCChecker *Checker);
-// Clamp off upper 26 bits of extendable operand for emission
-void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
-
-MCInst createBundle();
-
-// Return the extender for instruction at Index or nullptr if none
-MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
-void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
- MCInst const &MCI);
-
// Create a duplex instruction given the two subinsts
MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
MCInst const &inst1);
@@ -86,27 +73,28 @@ MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
// Convert this instruction in to a duplex subinst
MCInst deriveSubInst(MCInst const &Inst);
+// Clamp off upper 26 bits of extendable operand for emission
+void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+
+MCInst createBundle();
+
// Return the extender for instruction at Index or nullptr if none
MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+ MCInst const &MCI);
// Return memory access size
HexagonII::MemAccessSize getAccessSize(MCInstrInfo const &MCII,
MCInst const &MCI);
-
-// Return number of bits in the constant extended operand.
-unsigned getBitCount(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return constant extended operand number.
-unsigned short getCExtOpNum(MCInstrInfo const &MCII, MCInst const &MCI);
-
MCInstrDesc const &getDesc(MCInstrInfo const &MCII, MCInst const &MCI);
// Return which duplex group this instruction belongs to
unsigned getDuplexCandidateGroup(MCInst const &MI);
// Return a list of all possible instruction duplex combinations
-SmallVector<DuplexCandidate, 8> getDuplexPossibilties(MCInstrInfo const &MCII,
- MCInst const &MCB);
+SmallVector<DuplexCandidate, 8>
+getDuplexPossibilties(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCInst const &MCB);
unsigned getDuplexRegisterNumbering(unsigned Reg);
MCExpr const &getExpr(MCExpr const &Expr);
@@ -143,7 +131,6 @@ MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
MCInst const &MCI);
-
int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the Hexagon ISA class for the insn.
@@ -152,6 +139,9 @@ unsigned getType(MCInstrInfo const &MCII, MCInst const &MCI);
/// Return the slots used by the insn.
unsigned getUnits(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCInst const &MCI);
+unsigned getOtherReservedSlots(MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst const &MCI);
+bool hasDuplex(MCInstrInfo const &MCII, MCInst const &MCI);
// Does the packet have an extender for the instruction at Index
bool hasExtenderForIndex(MCInst const &MCB, size_t Index);
@@ -161,19 +151,6 @@ bool hasImmExt(MCInst const &MCI);
// Return whether the instruction is a legal new-value producer.
bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return the instruction at Index
-MCInst const &instruction(MCInst const &MCB, size_t Index);
-
-// Returns whether this MCInst is a wellformed bundle
-bool isBundle(MCInst const &MCI);
-
-// Return whether the insn is an actual insn.
-bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
-bool isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI);
-bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return the duplex iclass given the two duplex classes
unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
int64_t minConstant(MCInst const &MCI, size_t Index);
@@ -189,6 +166,18 @@ template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) {
return isUInt<N>(minConstant(MCI, Index));
}
+// Return the instruction at Index
+MCInst const &instruction(MCInst const &MCB, size_t Index);
+bool isAccumulator(MCInstrInfo const &MCII, MCInst const &MCI);
+
+// Returns whether this MCInst is a wellformed bundle
+bool isBundle(MCInst const &MCI);
+
+// Return whether the insn is an actual insn.
+bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
+
// Return whether the instruction needs to be constant extended.
bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -229,15 +218,12 @@ bool isMemStoreReorderEnabled(MCInst const &MCI);
// Return whether the insn is a new-value consumer.
bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
-
-// Return true if the operand can be constant extended.
-bool isOperandExtended(MCInstrInfo const &MCII, MCInst const &MCI,
- unsigned short OperandNum);
+bool isOpExtendable(MCInstrInfo const &MCII, MCInst const &MCI, unsigned short);
// Can these two instructions be duplexed
bool isOrderedDuplexPair(MCInstrInfo const &MCII, MCInst const &MIa,
bool ExtendedA, MCInst const &MIb, bool ExtendedB,
- bool bisReversable);
+ bool bisReversable, MCSubtargetInfo const &STI);
// Returns whether this bundle is an endloop1
bool isOuterLoop(MCInst const &MCI);
@@ -270,12 +256,11 @@ bool mustExtend(MCExpr const &Expr);
bool mustNotExtend(MCExpr const &Expr);
// Pad the bundle with nops to satisfy endloop requirements
-void padEndloop(MCContext &Context, MCInst &MCI);
-
+void padEndloop(MCInst &MCI, MCContext &Context);
bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
// Replace the instructions inside MCB, represented by Candidate
-void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
+void replaceDuplex(MCContext &Context, MCInst &MCI, DuplexCandidate Candidate);
bool s23_2_reloc(MCExpr const &Expr);
// Marks a bundle as endloop0
@@ -295,7 +280,8 @@ unsigned SubregisterBit(unsigned Consumer, unsigned Producer,
unsigned Producer2);
// Attempt to find and replace compound pairs
-void tryCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+void tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+ MCContext &Context, MCInst &MCI);
}
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index 7f8e7a4edb0c..529a5fd5ed82 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -33,42 +33,39 @@ void HexagonMCShuffler::init(MCInst &MCB) {
MCInst const *Extender = nullptr;
// Copy the bundle for the shuffling.
for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
- assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
- MCInst *MI = const_cast<MCInst *>(I.getInst());
+ MCInst &MI = *const_cast<MCInst *>(I.getInst());
+ DEBUG(dbgs() << "Shuffling: " << MCII.getName(MI.getOpcode()) << '\n');
+ assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo());
- if (!HexagonMCInstrInfo::isImmext(*MI)) {
- append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
- false);
+ if (!HexagonMCInstrInfo::isImmext(MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, MI));
Extender = nullptr;
} else
- Extender = MI;
+ Extender = &MI;
}
}
BundleFlags = MCB.getOperand(0).getImm();
}
-void HexagonMCShuffler::init(MCInst &MCB, MCInst const *AddMI,
+void HexagonMCShuffler::init(MCInst &MCB, MCInst const &AddMI,
bool bInsertAtFront) {
if (HexagonMCInstrInfo::isBundle(MCB)) {
- if (bInsertAtFront && AddMI)
- append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
- false);
+ if (bInsertAtFront)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, AddMI));
MCInst const *Extender = nullptr;
// Copy the bundle for the shuffling.
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
assert(!HexagonMCInstrInfo::getDesc(MCII, *I.getInst()).isPseudo());
- MCInst *MI = const_cast<MCInst *>(I.getInst());
- if (!HexagonMCInstrInfo::isImmext(*MI)) {
- append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, *MI),
- false);
+ MCInst &MI = *const_cast<MCInst *>(I.getInst());
+ if (!HexagonMCInstrInfo::isImmext(MI)) {
+ append(MI, Extender, HexagonMCInstrInfo::getUnits(MCII, STI, MI));
Extender = nullptr;
} else
- Extender = MI;
+ Extender = &MI;
}
- if (!bInsertAtFront && AddMI)
- append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, *AddMI),
- false);
+ if (!bInsertAtFront)
+ append(AddMI, nullptr, HexagonMCInstrInfo::getUnits(MCII, STI, AddMI));
}
BundleFlags = MCB.getOperand(0).getImm();
@@ -80,11 +77,11 @@ void HexagonMCShuffler::copyTo(MCInst &MCB) {
// Copy the results into the bundle.
for (HexagonShuffler::iterator I = begin(); I != end(); ++I) {
- MCInst const *MI = I->getDesc();
+ MCInst const &MI = I->getDesc();
MCInst const *Extender = I->getExtender();
if (Extender)
MCB.addOperand(MCOperand::createInst(Extender));
- MCB.addOperand(MCOperand::createInst(MI));
+ MCB.addOperand(MCOperand::createInst(&MI));
}
}
@@ -98,9 +95,9 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
return (!getError());
}
-bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB) {
- HexagonMCShuffler MCS(MCII, STI, MCB);
+bool llvm::HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst &MCB) {
+ HexagonMCShuffler MCS(true, MCII, STI, MCB);
if (DisableShuffle)
// Ignore if user chose so.
@@ -124,6 +121,18 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
if (!MCS.reshuffleTo(MCB)) {
// Unless there is any error, which should not happen at this point.
unsigned shuffleError = MCS.getError();
+
+ if (!Fatal && (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS))
+ return false;
+ if (shuffleError != HexagonShuffler::SHUFFLE_SUCCESS) {
+ errs() << "\nFailing packet:\n";
+ for (const auto& I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+ MCInst *MI = const_cast<MCInst *>(I.getInst());
+ errs() << HexagonMCInstrInfo::getName(MCII, *MI) << ' ' << HexagonMCInstrInfo::getDesc(MCII, *MI).getOpcode() << '\n';
+ }
+ errs() << '\n';
+ }
+
switch (shuffleError) {
default:
llvm_unreachable("unknown error");
@@ -176,7 +185,7 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
DuplexCandidate duplexToTry = possibleDuplexes.pop_back_val();
MCInst Attempt(MCB);
HexagonMCInstrInfo::replaceDuplex(Context, Attempt, duplexToTry);
- HexagonMCShuffler MCS(MCII, STI, Attempt); // copy packet to the shuffler
+ HexagonMCShuffler MCS(true, MCII, STI, Attempt); // copy packet to the shuffler
if (MCS.size() == 1) { // case of one duplex
// copy the created duplex in the shuffler to the bundle
MCS.copyTo(MCB);
@@ -191,7 +200,7 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
}
if (doneShuffling == false) {
- HexagonMCShuffler MCS(MCII, STI, MCB);
+ HexagonMCShuffler MCS(true, MCII, STI, MCB);
doneShuffling = MCS.reshuffleTo(MCB); // shuffle
shuffleError = MCS.getError();
}
@@ -202,8 +211,8 @@ llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
}
bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB, MCInst const *AddMI, int fixupCount) {
- if (!HexagonMCInstrInfo::isBundle(MCB) || !AddMI)
+ MCInst &MCB, MCInst const &AddMI, int fixupCount) {
+ if (!HexagonMCInstrInfo::isBundle(MCB))
return false;
// if fixups present, make sure we don't insert too many nops that would
@@ -211,8 +220,15 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
unsigned int bundleSize = HexagonMCInstrInfo::bundleSize(MCB);
if (bundleSize >= HEXAGON_PACKET_SIZE)
return false;
+ bool bhasDuplex = HexagonMCInstrInfo::hasDuplex(MCII, MCB);
if (fixupCount >= 2) {
- return false;
+ if (bhasDuplex) {
+ if (bundleSize >= HEXAGON_PACKET_SIZE - 1) {
+ return false;
+ }
+ } else {
+ return false;
+ }
} else {
if (bundleSize == HEXAGON_PACKET_SIZE - 1 && fixupCount)
return false;
@@ -221,7 +237,16 @@ bool llvm::HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
if (DisableShuffle)
return false;
- HexagonMCShuffler MCS(MCII, STI, MCB, AddMI);
+ // mgl: temporary code (shuffler doesn't take into account the fact that
+ // a duplex takes up two slots. for example, 3 nops can be put into a packet
+ // containing a duplex oversubscribing slots by 1).
+ unsigned maxBundleSize = (HexagonMCInstrInfo::hasImmExt(MCB))
+ ? HEXAGON_PACKET_SIZE
+ : HEXAGON_PACKET_SIZE - 1;
+ if (bhasDuplex && bundleSize >= maxBundleSize)
+ return false;
+
+ HexagonMCShuffler MCS(MCII, STI, MCB, AddMI, false);
if (!MCS.reshuffleTo(MCB)) {
unsigned shuffleError = MCS.getError();
switch (shuffleError) {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
index a21cce1fc240..14bbfda4c914 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h
@@ -27,16 +27,16 @@ class HexagonMCShuffler : public HexagonShuffler {
bool duplex_present;
public:
- HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB)
+ HexagonMCShuffler(bool Fatal, MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst &MCB)
: HexagonShuffler(MCII, STI) {
init(MCB);
};
HexagonMCShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &MCB, const MCInst *AddMI,
- bool bInsertAtFront = false)
+ MCInst &MCB, MCInst const &AddMI,
+ bool InsertAtFront)
: HexagonShuffler(MCII, STI) {
- init(MCB, AddMI, bInsertAtFront);
+ init(MCB, AddMI, InsertAtFront);
};
// Copy reordered bundle to another.
@@ -49,14 +49,14 @@ public:
private:
void init(MCInst &MCB);
- void init(MCInst &MCB, const MCInst *AddMI, bool bInsertAtFront = false);
+ void init(MCInst &MCB, MCInst const &AddMI, bool InsertAtFront);
};
// Invocation of the shuffler.
+bool HexagonMCShuffle(bool Fatal, MCInstrInfo const &MCII,
+ MCSubtargetInfo const &STI, MCInst &);
bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &);
-bool HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
- MCInst &, const MCInst *, int);
+ MCInst &, MCInst const &, int);
unsigned HexagonMCShuffle(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
MCContext &Context, MCInst &,
SmallVector<DuplexCandidate, 8>);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 694cf582f8d9..bb98c2bbef6d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -66,6 +67,12 @@ static cl::opt<bool> HexagonV55ArchVariant("mv55", cl::Hidden, cl::init(false),
static cl::opt<bool> HexagonV60ArchVariant("mv60", cl::Hidden, cl::init(false),
cl::desc("Build for Hexagon V60"));
+static cl::opt<bool> HexagonV62ArchVariant("mv62", cl::Hidden, cl::init(false),
+ cl::desc("Build for Hexagon V62"));
+
+static cl::opt<bool> EnableHVX("mhvx", cl::Hidden, cl::init(false),
+ cl::desc("Enable Hexagon Vector Extension (HVX)"));
+
static StringRef DefaultArch = "hexagonv60";
static StringRef HexagonGetArchVariant() {
@@ -77,6 +84,8 @@ static StringRef HexagonGetArchVariant() {
return "hexagonv55";
if (HexagonV60ArchVariant)
return "hexagonv60";
+ if (HexagonV62ArchVariant)
+ return "hexagonv62";
return "";
}
@@ -95,31 +104,16 @@ StringRef Hexagon_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
return ArchV;
}
-MCInstrInfo *llvm::createHexagonMCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitHexagonMCInstrInfo(X);
- return X;
-}
-
-static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitHexagonMCRegisterInfo(X, Hexagon::R31);
- return X;
-}
-
-static MCSubtargetInfo *
-createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
- CPU = Hexagon_MC::selectHexagonCPU(TT, CPU);
- return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
-}
+unsigned llvm::HexagonGetLastSlot() { return HexagonItinerariesV4FU::SLOT3; }
namespace {
class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
public:
HexagonTargetAsmStreamer(MCStreamer &S,
- formatted_raw_ostream &, bool,
- MCInstPrinter &)
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm,
+ MCInstPrinter &IP)
: HexagonTargetStreamer(S) {}
void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
@@ -156,24 +150,15 @@ public:
class HexagonTargetELFStreamer : public HexagonTargetStreamer {
public:
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI)
: HexagonTargetStreamer(S) {
- auto Bits = STI.getFeatureBits();
- unsigned Flags = 0;
- if (Bits[Hexagon::ArchV60])
- Flags = ELF::EF_HEXAGON_MACH_V60;
- else if (Bits[Hexagon::ArchV55])
- Flags = ELF::EF_HEXAGON_MACH_V55;
- else if (Bits[Hexagon::ArchV5])
- Flags = ELF::EF_HEXAGON_MACH_V5;
- else if (Bits[Hexagon::ArchV4])
- Flags = ELF::EF_HEXAGON_MACH_V4;
- getStreamer().getAssembler().setELFHeaderEFlags(Flags);
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCA.setELFHeaderEFlags(Hexagon_MC::GetELFFlags(STI));
}
- MCELFStreamer &getStreamer() {
- return static_cast<MCELFStreamer &>(Streamer);
- }
void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
@@ -196,13 +181,26 @@ public:
} // end anonymous namespace
+llvm::MCInstrInfo *llvm::createHexagonMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitHexagonMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitHexagonMCRegisterInfo(X, Hexagon::R31);
+ return X;
+}
+
static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
MCAsmInfo *MAI = new HexagonMCAsmInfo(TT);
// VirtualFP = (R30 + #0).
MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(nullptr, Hexagon::R30, 0);
+ MCCFIInstruction::createDefCfa(nullptr,
+ MRI.getDwarfRegNum(Hexagon::R30, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -212,31 +210,138 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI) {
+ const MCRegisterInfo &MRI)
+{
if (SyntaxVariant == 0)
- return (new HexagonInstPrinter(MAI, MII, MRI));
+ return new HexagonInstPrinter(MAI, MII, MRI);
else
return nullptr;
}
-static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
- formatted_raw_ostream &OS,
- MCInstPrinter *InstPrint,
- bool IsVerboseAsm) {
- return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *InstPrint);
+static MCTargetStreamer *
+createMCAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS,
+ MCInstPrinter *IP, bool IsVerboseAsm) {
+ return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *IP);
}
-static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context,
- MCAsmBackend &MAB, raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter, bool RelaxAll) {
- return createHexagonELFStreamer(Context, MAB, OS, Emitter);
+static MCStreamer *createMCStreamer(Triple const &T,
+ MCContext &Context,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter,
+ bool RelaxAll) {
+ return createHexagonELFStreamer(T, Context, MAB, OS, Emitter);
}
static MCTargetStreamer *
-createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) {
+createHexagonObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new HexagonTargetELFStreamer(S, STI);
}
+static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
+ uint64_t FB = STI->getFeatureBits().to_ullong();
+ if (FB & (1ULL << F))
+ STI->ToggleFeature(F);
+}
+
+static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
+ uint64_t FB = STI->getFeatureBits().to_ullong();
+ return (FB & (1ULL << F)) != 0;
+}
+
+StringRef Hexagon_MC::ParseHexagonTriple(const Triple &TT, StringRef CPU) {
+ StringRef CPUName = Hexagon_MC::selectHexagonCPU(TT, CPU);
+ StringRef FS = "";
+ if (EnableHVX) {
+ if (CPUName.equals_lower("hexagonv60") ||
+ CPUName.equals_lower("hexagonv62"))
+ FS = "+hvx";
+ }
+ return FS;
+}
+
+static bool isCPUValid(std::string CPU)
+{
+ std::vector<std::string> table
+ {
+ "hexagonv4",
+ "hexagonv5",
+ "hexagonv55",
+ "hexagonv60",
+ "hexagonv62",
+ };
+
+ return std::find(table.begin(), table.end(), CPU) != table.end();
+}
+
+MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT,
+ StringRef CPU,
+ StringRef FS) {
+ StringRef ArchFS = (FS.size()) ? FS : Hexagon_MC::ParseHexagonTriple(TT, CPU);
+ StringRef CPUName = Hexagon_MC::selectHexagonCPU(TT, CPU);
+ if (!isCPUValid(CPUName.str())) {
+ errs() << "error: invalid CPU \"" << CPUName.str().c_str()
+ << "\" specified\n";
+ return nullptr;
+ }
+
+ MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS);
+ if (X->getFeatureBits()[Hexagon::ExtensionHVXDbl]) {
+ llvm::FeatureBitset Features = X->getFeatureBits();
+ X->setFeatureBits(Features.set(Hexagon::ExtensionHVX));
+ }
+ return X;
+}
+
+unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) {
+ static std::map<StringRef,unsigned> ElfFlags = {
+ {"hexagonv4", ELF::EF_HEXAGON_MACH_V4},
+ {"hexagonv5", ELF::EF_HEXAGON_MACH_V5},
+ {"hexagonv55", ELF::EF_HEXAGON_MACH_V55},
+ {"hexagonv60", ELF::EF_HEXAGON_MACH_V60},
+ {"hexagonv62", ELF::EF_HEXAGON_MACH_V62},
+ };
+
+ auto F = ElfFlags.find(STI.getCPU());
+ assert(F != ElfFlags.end() && "Unrecognized Architecture");
+ return F->second;
+}
+
+namespace {
+class HexagonMCInstrAnalysis : public MCInstrAnalysis {
+public:
+ HexagonMCInstrAnalysis(MCInstrInfo const *Info) : MCInstrAnalysis(Info) {}
+
+ bool isUnconditionalBranch(MCInst const &Inst) const override {
+ //assert(!HexagonMCInstrInfo::isBundle(Inst));
+ return MCInstrAnalysis::isUnconditionalBranch(Inst);
+ }
+
+ bool isConditionalBranch(MCInst const &Inst) const override {
+ //assert(!HexagonMCInstrInfo::isBundle(Inst));
+ return MCInstrAnalysis::isConditionalBranch(Inst);
+ }
+
+ bool evaluateBranch(MCInst const &Inst, uint64_t Addr,
+ uint64_t Size, uint64_t &Target) const override {
+ //assert(!HexagonMCInstrInfo::isBundle(Inst));
+ if(!HexagonMCInstrInfo::isExtendable(*Info, Inst))
+ return false;
+ auto const &Extended(HexagonMCInstrInfo::getExtendableOperand(*Info, Inst));
+ assert(Extended.isExpr());
+ int64_t Value;
+ if(!Extended.getExpr()->evaluateAsAbsolute(Value))
+ return false;
+ Target = Value;
+ return true;
+ }
+};
+}
+
+static MCInstrAnalysis *createHexagonMCInstrAnalysis(const MCInstrInfo *Info) {
+ return new HexagonMCInstrAnalysis(Info);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC asm info.
@@ -252,7 +357,7 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(getTheHexagonTarget(),
- createHexagonMCSubtargetInfo);
+ Hexagon_MC::createHexagonMCSubtargetInfo);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(getTheHexagonTarget(),
@@ -262,8 +367,18 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
TargetRegistry::RegisterMCAsmBackend(getTheHexagonTarget(),
createHexagonAsmBackend);
+
+ // Register the MC instruction analyzer.
+ TargetRegistry::RegisterMCInstrAnalysis(getTheHexagonTarget(),
+ createHexagonMCInstrAnalysis);
+
// Register the obj streamer
- TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(), createMCStreamer);
+ TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(),
+ createMCStreamer);
+
+ // Register the obj target streamer
+ TargetRegistry::RegisterObjectTargetStreamer(getTheHexagonTarget(),
+ createHexagonObjectTargetStreamer);
// Register the asm streamer
TargetRegistry::RegisterAsmTargetStreamer(getTheHexagonTarget(),
@@ -272,7 +387,4 @@ extern "C" void LLVMInitializeHexagonTargetMC() {
// Register the MC Inst Printer
TargetRegistry::RegisterMCInstPrinter(getTheHexagonTarget(),
createHexagonMCInstPrinter);
-
- TargetRegistry::RegisterObjectTargetStreamer(
- getTheHexagonTarget(), createHexagonObjectTargetStreamer);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 6e677e9d9f86..6bb69be6142e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -41,6 +41,18 @@ extern cl::opt<bool> HexagonDisableDuplex;
extern const InstrStage HexagonStages[];
MCInstrInfo *createHexagonMCInstrInfo();
+MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT);
+
+namespace Hexagon_MC {
+ StringRef ParseHexagonTriple(const Triple &TT, StringRef CPU);
+ StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
+
+ /// Create a Hexagon MCSubtargetInfo instance. This is exposed so Asm parser,
+ /// etc. do not need to go through TargetRegistry.
+ MCSubtargetInfo *createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU,
+ StringRef FS);
+ unsigned GetELFFlags(const MCSubtargetInfo &STI);
+}
MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -54,13 +66,9 @@ MCAsmBackend *createHexagonAsmBackend(const Target &T,
MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI, StringRef CPU);
-namespace Hexagon_MC {
-
- StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
-
-} // end namespace Hexagon_MC
+unsigned HexagonGetLastSlot();
-} // end namespace llvm
+} // End llvm namespace
// Define symbolic names for Hexagon registers. This defines a mapping from
// register name to register number.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 88f37d620dcf..853f76213d38 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -22,6 +22,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "HexagonShuffler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -37,16 +38,16 @@ class HexagonBid {
unsigned Bid;
public:
- HexagonBid() : Bid(0){};
- HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; };
+ HexagonBid() : Bid(0){}
+ HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; }
// Check if the insn priority is overflowed.
- bool isSold() const { return (Bid >= MAX); };
+ bool isSold() const { return (Bid >= MAX); }
HexagonBid &operator+=(const HexagonBid &B) {
Bid += B.Bid;
return *this;
- };
+ }
};
// Slot shuffling allocation.
@@ -56,7 +57,7 @@ class HexagonUnitAuction {
unsigned isSold : HEXAGON_PACKET_SIZE;
public:
- HexagonUnitAuction() : isSold(0){};
+ HexagonUnitAuction(unsigned cs = 0) : isSold(cs){};
// Allocate slots.
bool bid(unsigned B) {
@@ -70,29 +71,29 @@ public:
isSold |= Scores[i].isSold() << i;
}
return true;
- ;
} else
// Error if the desired slots are already full.
return false;
- };
+ }
};
} // end anonymous namespace
unsigned HexagonResource::setWeight(unsigned s) {
const unsigned SlotWeight = 8;
const unsigned MaskWeight = SlotWeight - 1;
- bool Key = (1 << s) & getUnits();
-
- // TODO: Improve this API so that we can prevent misuse statically.
- assert(SlotWeight * s < 32 && "Argument to setWeight too large.");
+ unsigned Units = getUnits();
+ unsigned Key = ((1u << s) & Units) != 0;
// Calculate relative weight of the insn for the given slot, weighing it the
// heavier the more restrictive the insn is and the lowest the slots that the
// insn may be executed in.
- Weight =
- (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits()))
- << countTrailingZeros(getUnits()));
- return (Weight);
+ if (Key == 0 || Units == 0 || (SlotWeight*s >= 32))
+ return Weight = 0;
+
+ unsigned Ctpop = countPopulation(Units);
+ unsigned Cttz = countTrailingZeros(Units);
+ Weight = (1u << (SlotWeight * s)) * ((MaskWeight - Ctpop) << Cttz);
+ return Weight;
}
void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
@@ -104,7 +105,10 @@ void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
(*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
(*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
(*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
- (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1);
+ (*TUL)[HexagonII::TypeCVI_VINLANESAT] =
+ (CPU == "hexagonv60" || CPU == "hexagonv61" || CPU == "hexagonv61v1") ?
+ UnitsAndLanes(CVI_SHIFT, 1) :
+ UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
(*TUL)[HexagonII::TypeCVI_VM_LD] =
UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
(*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
@@ -141,6 +145,40 @@ HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
}
}
+struct CVIUnits {
+ unsigned Units;
+ unsigned Lanes;
+};
+typedef SmallVector<struct CVIUnits, 8> HVXInstsT;
+
+static unsigned makeAllBits(unsigned startBit, unsigned Lanes)
+
+{
+ for (unsigned i = 1 ; i < Lanes ; ++i)
+ startBit = (startBit << 1) | startBit;
+ return startBit;
+}
+
+static bool checkHVXPipes(const HVXInstsT& hvxInsts, unsigned startIdx, unsigned usedUnits)
+
+{
+ if (startIdx < hvxInsts.size()) {
+ if (!hvxInsts[startIdx].Units)
+ return checkHVXPipes(hvxInsts, startIdx + 1, usedUnits);
+ for (unsigned b = 0x1 ; b <= 0x8 ; b <<= 1) {
+ if ((hvxInsts[startIdx].Units & b) == 0)
+ continue;
+ unsigned allBits = makeAllBits(b, hvxInsts[startIdx].Lanes);
+ if ((allBits & usedUnits) == 0) {
+ if (checkHVXPipes(hvxInsts, startIdx + 1, usedUnits | allBits))
+ return true;
+ }
+ }
+ return false;
+ }
+ return true;
+}
+
HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI)
: MCII(MCII), STI(STI) {
@@ -154,21 +192,82 @@ void HexagonShuffler::reset() {
Error = SHUFFLE_SUCCESS;
}
-void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
- unsigned S, bool X) {
- HexagonInstr PI(&TUL, MCII, ID, Extender, S, X);
+void HexagonShuffler::append(MCInst const &ID, MCInst const *Extender,
+ unsigned S) {
+ HexagonInstr PI(&TUL, MCII, &ID, Extender, S);
Packet.push_back(PI);
}
+static struct {
+ unsigned first;
+ unsigned second;
+} jumpSlots[] = { {8, 4}, {8, 2}, {8, 1}, {4, 2}, {4, 1}, {2, 1} };
+#define MAX_JUMP_SLOTS (sizeof(jumpSlots)/sizeof(jumpSlots[0]))
+
+namespace {
+bool isDuplexAGroup(unsigned Opcode) {
+ switch (Opcode) {
+ case Hexagon::SA1_addi:
+ case Hexagon::SA1_addrx:
+ case Hexagon::SA1_addsp:
+ case Hexagon::SA1_and1:
+ case Hexagon::SA1_clrf:
+ case Hexagon::SA1_clrfnew:
+ case Hexagon::SA1_clrt:
+ case Hexagon::SA1_clrtnew:
+ case Hexagon::SA1_cmpeqi:
+ case Hexagon::SA1_combine0i:
+ case Hexagon::SA1_combine1i:
+ case Hexagon::SA1_combine2i:
+ case Hexagon::SA1_combine3i:
+ case Hexagon::SA1_combinerz:
+ case Hexagon::SA1_combinezr:
+ case Hexagon::SA1_dec:
+ case Hexagon::SA1_inc:
+ case Hexagon::SA1_seti:
+ case Hexagon::SA1_setin1:
+ case Hexagon::SA1_sxtb:
+ case Hexagon::SA1_sxth:
+ case Hexagon::SA1_tfr:
+ case Hexagon::SA1_zxtb:
+ case Hexagon::SA1_zxth:
+ return true;
+ break;
+ default:
+ return false;
+ }
+}
+
+unsigned countNeitherAnorX(MCInstrInfo const &MCII, MCInst const &ID) {
+ unsigned Result = 0;
+ unsigned Type = HexagonMCInstrInfo::getType(MCII, ID);
+ if (Type == HexagonII::TypeDUPLEX) {
+ unsigned subInst0Opcode = ID.getOperand(0).getInst()->getOpcode();
+ unsigned subInst1Opcode = ID.getOperand(1).getInst()->getOpcode();
+ Result += !isDuplexAGroup(subInst0Opcode);
+ Result += !isDuplexAGroup(subInst1Opcode);
+ } else
+ Result += Type != HexagonII::TypeALU32_2op &&
+ Type != HexagonII::TypeALU32_3op &&
+ Type != HexagonII::TypeALU32_ADDI &&
+ Type != HexagonII::TypeS_2op &&
+ Type != HexagonII::TypeS_3op &&
+ Type != HexagonII::TypeALU64 &&
+ (Type != HexagonII::TypeM ||
+ HexagonMCInstrInfo::isFloat(MCII, ID));
+ return Result;
+}
+}
+
/// Check that the packet is legal and enforce relative insn order.
bool HexagonShuffler::check() {
// Descriptive slot masks.
const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2,
- slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4,
+ slotThree = 0x8, //slotFirstJump = 0x8,
slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1;
// Highest slots for branches and stores used to keep their original order.
- unsigned slotJump = slotFirstJump;
+ //unsigned slotJump = slotFirstJump;
unsigned slotLoadStore = slotFirstLoadStore;
// Number of branches, solo branches, indirect branches.
unsigned jumps = 0, jump1 = 0;
@@ -188,36 +287,41 @@ bool HexagonShuffler::check() {
unsigned onlyNo1 = 0;
unsigned xtypeFloat = 0;
unsigned pSlot3Cnt = 0;
+ unsigned memops = 0;
+ unsigned deallocs = 0;
iterator slot3ISJ = end();
+ std::vector<iterator> foundBranches;
+ unsigned reservedSlots = 0;
// Collect information from the insns in the packet.
for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const *ID = ISJ->getDesc();
-
- if (HexagonMCInstrInfo::isSolo(MCII, *ID))
- solo += !ISJ->isSoloException();
- else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID))
- onlyAX += !ISJ->isSoloException();
- else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID))
- onlyAin1 += !ISJ->isSoloException();
- if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 &&
- HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE)
- ++neitherAnorX;
- if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) {
+ MCInst const &ID = ISJ->getDesc();
+
+ if (HexagonMCInstrInfo::isSolo(MCII, ID))
+ solo++;
+ else if (HexagonMCInstrInfo::isSoloAX(MCII, ID))
+ onlyAX++;
+ else if (HexagonMCInstrInfo::isSoloAin1(MCII, ID))
+ onlyAin1++;
+ neitherAnorX += countNeitherAnorX(MCII, ID);
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, ID)) {
++pSlot3Cnt;
slot3ISJ = ISJ;
}
- if (HexagonMCInstrInfo::isCofMax1(MCII, *ID))
+ reservedSlots |= HexagonMCInstrInfo::getOtherReservedSlots(MCII, STI, ID);
+ if (HexagonMCInstrInfo::isCofMax1(MCII, ID))
++jump1;
- switch (HexagonMCInstrInfo::getType(MCII, *ID)) {
- case HexagonII::TypeXTYPE:
- if (HexagonMCInstrInfo::isFloat(MCII, *ID))
+ switch (HexagonMCInstrInfo::getType(MCII, ID)) {
+ case HexagonII::TypeS_2op:
+ case HexagonII::TypeS_3op:
+ case HexagonII::TypeALU64:
+ if (HexagonMCInstrInfo::isFloat(MCII, ID))
++xtypeFloat;
break;
- case HexagonII::TypeJR:
case HexagonII::TypeJ:
++jumps;
+ foundBranches.push_back(ISJ);
break;
case HexagonII::TypeCVI_VM_VP_LDU:
++onlyNo1;
@@ -228,10 +332,14 @@ bool HexagonShuffler::check() {
case HexagonII::TypeLD:
++loads;
++memory;
- if (ISJ->Core.getUnits() == slotSingleLoad)
+ if (ISJ->Core.getUnits() == slotSingleLoad ||
+ HexagonMCInstrInfo::getType(MCII, ID) ==
+ HexagonII::TypeCVI_VM_VP_LDU)
++load0;
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
- ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).isReturn()) {
+ ++deallocs, ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
+ foundBranches.push_back(ISJ);
+ }
break;
case HexagonII::TypeCVI_VM_STU:
++onlyNo1;
@@ -241,27 +349,66 @@ bool HexagonShuffler::check() {
case HexagonII::TypeST:
++stores;
++memory;
- if (ISJ->Core.getUnits() == slotSingleStore)
+ if (ISJ->Core.getUnits() == slotSingleStore ||
+ HexagonMCInstrInfo::getType(MCII, ID) == HexagonII::TypeCVI_VM_STU)
++store0;
break;
case HexagonII::TypeV4LDST:
++loads;
++stores;
++store1;
+ ++memops;
++memory;
break;
- case HexagonII::TypeNV:
+ case HexagonII::TypeNCJ:
++memory; // NV insns are memory-like.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch())
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()) {
++jumps, ++jump1;
+ foundBranches.push_back(ISJ);
+ }
+ break;
+ case HexagonII::TypeV2LDST:
+ if(HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
+ ++loads;
+ ++memory;
+ if (ISJ->Core.getUnits() == slotSingleLoad ||
+ HexagonMCInstrInfo::getType(MCII,ID) ==
+ HexagonII::TypeCVI_VM_VP_LDU)
+ ++load0;
+ }
+ else {
+ assert(HexagonMCInstrInfo::getDesc(MCII, ID).mayStore());
+ ++memory;
+ ++stores;
+ }
break;
case HexagonII::TypeCR:
// Legacy conditional branch predicated on a register.
- case HexagonII::TypeSYSTEM:
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad())
- ++loads;
+ case HexagonII::TypeCJ:
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).isBranch()) {
+ ++jumps;
+ foundBranches.push_back(ISJ);
+ }
+ break;
+ case HexagonII::TypeDUPLEX: {
+ ++duplex;
+ MCInst const &Inst0 = *ID.getOperand(0).getInst();
+ MCInst const &Inst1 = *ID.getOperand(1).getInst();
+ if (HexagonMCInstrInfo::isCofMax1(MCII, Inst0))
+ ++jump1;
+ if (HexagonMCInstrInfo::isCofMax1(MCII, Inst1))
+ ++jump1;
+ if (HexagonMCInstrInfo::getDesc(MCII, Inst0).isBranch()) {
+ ++jumps;
+ foundBranches.push_back(ISJ);
+ }
+ if (HexagonMCInstrInfo::getDesc(MCII, Inst1).isBranch()) {
+ ++jumps;
+ foundBranches.push_back(ISJ);
+ }
break;
}
+ }
}
// Check if the packet is legal.
@@ -277,12 +424,20 @@ bool HexagonShuffler::check() {
Error = SHUFFLE_ERROR_BRANCHES;
return false;
}
+ if (memops && stores > 1) {
+ Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT;
+ return false;
+ }
+ if (deallocs && stores) {
+ Error = SHUFFLE_ERROR_STORE_LOAD_CONFLICT;
+ return false;
+ }
// Modify packet accordingly.
// TODO: need to reserve slots #0 and #1 for duplex insns.
bool bOnlySlot3 = false;
for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const *ID = ISJ->getDesc();
+ MCInst const &ID = ISJ->getDesc();
if (!ISJ->Core.getUnits()) {
// Error if insn may not be executed in any slot.
@@ -291,40 +446,26 @@ bool HexagonShuffler::check() {
}
// Exclude from slot #1 any insn but A2_nop.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop)
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).getOpcode() != Hexagon::A2_nop)
if (onlyNo1)
ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
// Exclude from slot #1 any insn but A-type.
- if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32)
+ if (HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_2op &&
+ HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_3op &&
+ HexagonMCInstrInfo::getType(MCII, ID) != HexagonII::TypeALU32_ADDI)
if (onlyAin1)
ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne);
- // Branches must keep the original order.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() ||
- HexagonMCInstrInfo::getDesc(MCII, *ID).isCall())
- if (jumps > 1) {
- if (slotJump < slotLastJump) {
- // Error if indirect branch with another branch or
- // no more slots available for branches.
- Error = SHUFFLE_ERROR_BRANCHES;
- return false;
- }
- // Pin the branch to the highest slot available to it.
- ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump);
- // Update next highest slot available to branches.
- slotJump >>= 1;
- }
-
// A single load must use slot #0.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) {
- if (loads == 1 && loads == memory)
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).mayLoad()) {
+ if (loads == 1 && loads == memory && memops == 0)
// Pin the load to slot #0.
ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad);
}
// A single store must use slot #0.
- if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) {
+ if (HexagonMCInstrInfo::getDesc(MCII, ID).mayStore()) {
if (!store0) {
if (stores == 1)
ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore);
@@ -347,7 +488,7 @@ bool HexagonShuffler::check() {
}
}
- // flag if an instruction can only be executed in slot 3
+ // flag if an instruction requires to be in slot 3
if (ISJ->Core.getUnits() == slotThree)
bOnlySlot3 = true;
@@ -358,14 +499,61 @@ bool HexagonShuffler::check() {
}
}
+ // preserve branch order
bool validateSlots = true;
- if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+ if (jumps > 1) {
+ if (foundBranches.size() > 2) {
+ Error = SHUFFLE_ERROR_BRANCHES;
+ return false;
+ }
+
+ // try all possible choices
+ for (unsigned int i = 0 ; i < MAX_JUMP_SLOTS ; ++i) {
+ // validate first jump with this slot rule
+ if (!(jumpSlots[i].first & foundBranches[0]->Core.getUnits()))
+ continue;
+
+ // validate second jump with this slot rule
+ if (!(jumpSlots[i].second & foundBranches[1]->Core.getUnits()))
+ continue;
+
+ // both valid for this configuration, set new slot rules
+ PacketSave = Packet;
+ foundBranches[0]->Core.setUnits(jumpSlots[i].first);
+ foundBranches[1]->Core.setUnits(jumpSlots[i].second);
+
+ HexagonUnitAuction AuctionCore(reservedSlots);
+ std::sort(begin(), end(), HexagonInstr::lessCore);
+
+ // see if things ok with that instruction being pinned to slot "slotJump"
+ bool bFail = false;
+ for (iterator I = begin(); I != end() && bFail != true; ++I)
+ if (!AuctionCore.bid(I->Core.getUnits()))
+ bFail = true;
+
+ // if yes, great, if not then restore original slot mask
+ if (!bFail) {
+ validateSlots = false; // all good, no need to re-do auction
+ break;
+ }
+ else
+ // restore original values
+ Packet = PacketSave;
+ }
+ if (validateSlots == true) {
+ Error = SHUFFLE_ERROR_NOSLOTS;
+ return false;
+ }
+ }
+
+ if (jumps <= 1 && bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) {
+ validateSlots = true;
// save off slot mask of instruction marked with A_PREFER_SLOT3
// and then pin it to slot #3
unsigned saveUnits = slot3ISJ->Core.getUnits();
slot3ISJ->Core.setUnits(saveUnits & slotThree);
- HexagonUnitAuction AuctionCore;
+ HexagonUnitAuction AuctionCore(reservedSlots);
std::sort(begin(), end(), HexagonInstr::lessCore);
// see if things ok with that instruction being pinned to slot #3
@@ -379,16 +567,16 @@ bool HexagonShuffler::check() {
validateSlots = false; // all good, no need to re-do auction
else
for (iterator ISJ = begin(); ISJ != end(); ++ISJ) {
- MCInst const *ID = ISJ->getDesc();
- if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID))
+ MCInst const &ID = ISJ->getDesc();
+ if (HexagonMCInstrInfo::prefersSlot3(MCII, ID))
ISJ->Core.setUnits(saveUnits);
}
}
- // Check if any slot, core, is over-subscribed.
+ // Check if any slot, core or CVI, is over-subscribed.
// Verify the core slot subscriptions.
if (validateSlots) {
- HexagonUnitAuction AuctionCore;
+ HexagonUnitAuction AuctionCore(reservedSlots);
std::sort(begin(), end(), HexagonInstr::lessCore);
@@ -399,17 +587,27 @@ bool HexagonShuffler::check() {
}
}
// Verify the CVI slot subscriptions.
- {
- HexagonUnitAuction AuctionCVI;
-
- std::sort(begin(), end(), HexagonInstr::lessCVI);
-
- for (iterator I = begin(); I != end(); ++I)
- for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid?
- if (!AuctionCVI.bid(I->CVI.getUnits() << i)) {
- Error = SHUFFLE_ERROR_SLOTS;
- return false;
- }
+ std::sort(begin(), end(), HexagonInstr::lessCVI);
+ // create vector of hvx instructions to check
+ HVXInstsT hvxInsts;
+ hvxInsts.clear();
+ for (iterator I = begin(); I != end(); ++I) {
+ struct CVIUnits inst;
+ inst.Units = I->CVI.getUnits();
+ inst.Lanes = I->CVI.getLanes();
+ if (inst.Units == 0)
+ continue; // not an hvx inst or an hvx inst that doesn't uses any pipes
+ hvxInsts.push_back(inst);
+ }
+ // if there are any hvx instructions in this packet, check pipe usage
+ if (hvxInsts.size() > 0) {
+ unsigned startIdx, usedUnits;
+ startIdx = usedUnits = 0x0;
+ if (checkHVXPipes(hvxInsts, startIdx, usedUnits) == false) {
+ // too many pipes used to be valid
+ Error = SHUFFLE_ERROR_SLOTS;
+ return false;
+ }
}
Error = SHUFFLE_SUCCESS;
@@ -452,10 +650,12 @@ bool HexagonShuffler::shuffle() {
}
for (iterator ISJ = begin(); ISJ != end(); ++ISJ)
- DEBUG(dbgs().write_hex(ISJ->Core.getUnits());
- dbgs() << ':'
- << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc())
- .getOpcode();
+ DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) {
+ dbgs() << '/';
+ dbgs().write_hex(ISJ->CVI.getUnits()) << '|';
+ dbgs() << ISJ->CVI.getLanes();
+ } dbgs() << ':'
+ << HexagonMCInstrInfo::getDesc(MCII, ISJ->getDesc()).getOpcode();
dbgs() << '\n');
DEBUG(dbgs() << '\n');
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index a093f8545132..36e8fa19d467 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -35,7 +35,8 @@ public:
HexagonResource(unsigned s) { setUnits(s); };
void setUnits(unsigned s) {
- Slots = s & ~(~0U << HEXAGON_PACKET_SIZE);
+ Slots = s & ((1u << HEXAGON_PACKET_SIZE) - 1);
+ setWeight(s);
};
unsigned setWeight(unsigned s);
@@ -44,7 +45,8 @@ public:
// Check if the resources are in ascending slot order.
static bool lessUnits(const HexagonResource &A, const HexagonResource &B) {
- return (countPopulation(A.getUnits()) < countPopulation(B.getUnits()));
+ return (countPopulation(A.getUnits()) <
+ countPopulation(B.getUnits()));
};
// Check if the resources are in ascending weight order.
static bool lessWeight(const HexagonResource &A, const HexagonResource &B) {
@@ -86,10 +88,10 @@ public:
unsigned s, MCInst const *id);
static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU);
- bool isValid() const { return (Valid); };
- unsigned getLanes() const { return (Lanes); };
- bool mayLoad() const { return (Load); };
- bool mayStore() const { return (Store); };
+ bool isValid() const { return Valid; };
+ unsigned getLanes() const { return Lanes; };
+ bool mayLoad() const { return Load; };
+ bool mayStore() const { return Store; };
};
// Handle to an insn used by the shuffling algorithm.
@@ -100,21 +102,17 @@ class HexagonInstr {
MCInst const *Extender;
HexagonResource Core;
HexagonCVIResource CVI;
- bool SoloException;
public:
HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
MCInstrInfo const &MCII, MCInst const *id,
- MCInst const *Extender, unsigned s, bool x = false)
- : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id),
- SoloException(x) {};
+ MCInst const *Extender, unsigned s)
+ : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id) {}
- MCInst const *getDesc() const { return (ID); };
+ MCInst const &getDesc() const { return *ID; };
MCInst const *getExtender() const { return Extender; }
- unsigned isSoloException() const { return (SoloException); };
-
// Check if the handles are in ascending order for shuffling purposes.
bool operator<(const HexagonInstr &B) const {
return (HexagonResource::lessWeight(B.Core, Core));
@@ -136,6 +134,7 @@ class HexagonShuffler {
// Insn handles in a bundle.
HexagonPacket Packet;
+ HexagonPacket PacketSave;
// Shuffling error code.
unsigned Error;
@@ -178,8 +177,7 @@ public:
iterator end() { return (Packet.end()); };
// Add insn handle to the bundle .
- void append(MCInst const *ID, MCInst const *Extender, unsigned S,
- bool X = false);
+ void append(MCInst const &ID, MCInst const *Extender, unsigned S);
// Return the error code for the last check or shuffling of the bundle.
void setError(unsigned Err) { Error = Err; };
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
index 392871628d98..57ce9fabc5e3 100644
--- a/lib/Target/Hexagon/RDFCopy.cpp
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -11,6 +11,7 @@
#include "RDFCopy.h"
#include "RDFGraph.h"
+#include "RDFLiveness.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -53,47 +54,12 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
CopyMap.insert(std::make_pair(SA.Id, EM));
Copies.push_back(SA.Id);
-
- for (auto I : EM) {
- auto FS = DefM.find(I.second.Reg);
- if (FS == DefM.end() || FS->second.empty())
- continue; // Undefined source
- RDefMap[I.second][SA.Id] = FS->second.top()->Id;
- // Insert DstR into the map.
- RDefMap[I.first];
- }
-}
-
-
-void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
- RegisterSet RRs;
- for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
- RRs.insert(RA.Addr->getRegRef(DFG));
- bool Common = false;
- for (auto &R : RDefMap) {
- if (!RRs.count(R.first))
- continue;
- Common = true;
- break;
- }
- if (!Common)
- return;
-
- for (auto &R : RDefMap) {
- if (!RRs.count(R.first))
- continue;
- auto F = DefM.find(R.first.Reg);
- if (F == DefM.end() || F->second.empty())
- continue;
- R.second[IA.Id] = F->second.top()->Id;
- }
}
bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
bool Changed = false;
auto BA = DFG.getFunc().Addr->findBlock(B, DFG);
- DFG.markBlock(BA.Id, DefM);
for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
@@ -102,20 +68,30 @@ bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
if (interpretAsCopy(SA.Addr->getCode(), EM))
recordCopy(SA, EM);
}
-
- updateMap(IA);
- DFG.pushDefs(IA, DefM);
}
MachineDomTreeNode *N = MDT.getNode(B);
for (auto I : *N)
Changed |= scanBlock(I->getBlock());
- DFG.releaseBlock(BA.Id, DefM);
return Changed;
}
+NodeId CopyPropagation::getLocalReachingDef(RegisterRef RefRR,
+ NodeAddr<InstrNode*> IA) {
+ NodeAddr<RefNode*> RA = L.getNearestAliasedRef(RefRR, IA);
+ if (RA.Id != 0) {
+ if (RA.Addr->getKind() == NodeAttrs::Def)
+ return RA.Id;
+ assert(RA.Addr->getKind() == NodeAttrs::Use);
+ if (NodeId RD = RA.Addr->getReachingDef())
+ return RD;
+ }
+ return 0;
+}
+
+
bool CopyPropagation::run() {
scanBlock(&DFG.getMF().front());
@@ -129,14 +105,6 @@ bool CopyPropagation::run() {
<< Print<RegisterRef>(J.second, DFG);
dbgs() << " }\n";
}
- dbgs() << "\nRDef map:\n";
- for (auto R : RDefMap) {
- dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {";
- for (auto &M : R.second)
- dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':'
- << Print<NodeId>(M.second, DFG);
- dbgs() << " }\n";
- }
}
bool Changed = false;
@@ -176,8 +144,7 @@ bool CopyPropagation::run() {
if (DR == SR)
continue;
- auto &RDefSR = RDefMap[SR];
- NodeId RDefSR_SA = RDefSR[SA.Id];
+ NodeId AtCopy = getLocalReachingDef(SR, SA);
for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) {
auto UA = DFG.addr<UseNode*>(N);
@@ -190,7 +157,8 @@ bool CopyPropagation::run() {
NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG);
assert(DFG.IsCode<NodeAttrs::Stmt>(IA));
- if (RDefSR[IA.Id] != RDefSR_SA)
+ NodeId AtUse = getLocalReachingDef(SR, IA);
+ if (AtCopy != AtUse)
continue;
MachineOperand &Op = UA.Addr->getOp();
@@ -206,8 +174,8 @@ bool CopyPropagation::run() {
Op.setReg(NewReg);
Op.setSubReg(0);
DFG.unlinkUse(UA, false);
- if (RDefSR_SA != 0) {
- UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(RDefSR_SA));
+ if (AtCopy != 0) {
+ UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(AtCopy));
} else {
UA.Addr->setReachingDef(0);
UA.Addr->setSibling(0);
diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h
index 5ece11bd5ce4..bbd625c5f5f6 100644
--- a/lib/Target/Hexagon/RDFCopy.h
+++ b/lib/Target/Hexagon/RDFCopy.h
@@ -11,6 +11,9 @@
#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
#include <map>
#include <vector>
@@ -24,7 +27,7 @@ namespace rdf {
struct CopyPropagation {
CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
- Trace(false) {}
+ L(dfg.getMF().getRegInfo(), dfg), Trace(false) {}
virtual ~CopyPropagation() = default;
@@ -39,18 +42,16 @@ namespace rdf {
private:
const MachineDominatorTree &MDT;
DataFlowGraph &DFG;
- DataFlowGraph::DefStackMap DefM;
+ Liveness L;
bool Trace;
- // map: register -> (map: stmt -> reaching def)
- std::map<RegisterRef,std::map<NodeId,NodeId>> RDefMap;
// map: statement -> (map: dst reg -> src reg)
std::map<NodeId, EqualityMap> CopyMap;
std::vector<NodeId> Copies;
void recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM);
- void updateMap(NodeAddr<InstrNode*> IA);
bool scanBlock(MachineBasicBlock *B);
+ NodeId getLocalReachingDef(RegisterRef RefRR, NodeAddr<InstrNode*> IA);
};
} // end namespace rdf
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 63177d51cada..9aa8ad68e07e 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -62,9 +62,19 @@ bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const {
return true;
if (MI->isPHI())
return false;
- for (auto &Op : MI->operands())
+ for (auto &Op : MI->operands()) {
if (Op.isReg() && MRI.isReserved(Op.getReg()))
return true;
+ if (Op.isRegMask()) {
+ const uint32_t *BM = Op.getRegMask();
+ for (unsigned R = 0, RN = DFG.getTRI().getNumRegs(); R != RN; ++R) {
+ if (BM[R/32] & (1u << (R%32)))
+ continue;
+ if (MRI.isReserved(R))
+ return true;
+ }
+ }
+ }
return false;
}
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index fa272ea1a76a..7a2895aa4e8c 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -276,7 +276,7 @@ raw_ostream &operator<< (raw_ostream &OS,
MachineBasicBlock *BB = P.Obj.Addr->getCode();
unsigned NP = BB->pred_size();
std::vector<int> Ns;
- auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void {
+ auto PrintBBs = [&OS] (std::vector<int> Ns) -> void {
unsigned N = Ns.size();
for (int I : Ns) {
OS << "BB#" << I;
@@ -424,7 +424,7 @@ RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
return G.unpack(Ref.PR);
assert(Ref.Op != nullptr);
- return G.makeRegRef(Ref.Op->getReg(), Ref.Op->getSubReg());
+ return G.makeRegRef(*Ref.Op);
}
// Set the register reference in the reference node directly (for references
@@ -617,8 +617,12 @@ bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
// Check if the definition of RR produces an unspecified value.
bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
const {
+ const MachineOperand &Op = In.getOperand(OpNum);
+ if (Op.isRegMask())
+ return true;
+ assert(Op.isReg());
if (In.isCall())
- if (In.getOperand(OpNum).isImplicit())
+ if (Op.isDef() && Op.isDead())
return true;
return false;
}
@@ -654,109 +658,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
return false;
}
-RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
- RegisterId SuperReg = RR.Reg;
- while (true) {
- MCSuperRegIterator SR(SuperReg, &TRI, false);
- if (!SR.isValid())
- break;
- SuperReg = *SR;
- }
-
- const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
- LaneBitmask Common = RR.Mask & RC.LaneMask;
- uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
- LaneBitmask SuperMask = TRI.composeSubRegIndexLaneMask(Sub, Common);
- return RegisterRef(SuperReg, SuperMask);
-}
-
-bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
- RegisterRef NR = normalize(RR);
- auto F = Masks.find(NR.Reg);
- if (F != Masks.end()) {
- if ((F->second & NR.Mask).any())
- return true;
- }
- if (CheckUnits) {
- for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U)
- if (ExpAliasUnits.test(*U))
- return true;
- }
- return false;
-}
-
-bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
- // Always have a cover for empty lane mask.
- RegisterRef NR = normalize(RR);
- if (NR.Mask.none())
- return true;
- auto F = Masks.find(NR.Reg);
- if (F == Masks.end())
- return false;
- return (NR.Mask & F->second) == NR.Mask;
-}
-
-RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
- RegisterRef NR = normalize(RR);
- auto F = Masks.find(NR.Reg);
- if (F == Masks.end())
- Masks.insert({NR.Reg, NR.Mask});
- else
- F->second |= NR.Mask;
-
- // Visit all register units to see if there are any that were created
- // by explicit aliases. Add those that were to the bit vector.
- for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U) {
- MCRegUnitRootIterator R(*U, &TRI);
- ++R;
- if (!R.isValid())
- continue;
- ExpAliasUnits.set(*U);
- CheckUnits = true;
- }
- return *this;
-}
-
-RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) {
- for (std::pair<RegisterId,LaneBitmask> P : RG.Masks)
- insert(RegisterRef(P.first, P.second));
- return *this;
-}
-
-RegisterAggr &RegisterAggr::clear(RegisterRef RR) {
- RegisterRef NR = normalize(RR);
- auto F = Masks.find(NR.Reg);
- if (F == Masks.end())
- return *this;
- LaneBitmask NewM = F->second & ~NR.Mask;
- if (NewM.none())
- Masks.erase(F);
- else
- F->second = NewM;
- return *this;
-}
-
-RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) {
- for (std::pair<RegisterId,LaneBitmask> P : RG.Masks)
- clear(RegisterRef(P.first, P.second));
- return *this;
-}
-
-RegisterRef RegisterAggr::clearIn(RegisterRef RR) const {
- RegisterAggr T(TRI);
- T.insert(RR).clear(*this);
- if (T.empty())
- return RegisterRef();
- return RegisterRef(T.begin()->first, T.begin()->second);
-}
-
-void RegisterAggr::print(raw_ostream &OS) const {
- OS << '{';
- for (auto I : Masks)
- OS << ' ' << PrintReg(I.first, &TRI) << PrintLaneMaskOpt(I.second);
- OS << " }";
-}
-
//
// The data flow graph construction.
//
@@ -764,7 +665,8 @@ void RegisterAggr::print(raw_ostream &OS) const {
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
- : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
+ : MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi),
+ LiveIns(PRI) {
}
// The implementation of the definition stack.
@@ -857,17 +759,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
// Register information.
-// Get the list of references aliased to RR. Lane masks are ignored.
-RegisterSet DataFlowGraph::getAliasSet(RegisterId Reg) const {
- // Do not include RR in the alias set.
- RegisterSet AS;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-
- for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
- AS.insert(RegisterRef(*AI));
- return AS;
-}
-
RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
RegisterSet LR;
const Function &F = *MF.getFunction();
@@ -1010,11 +901,22 @@ void DataFlowGraph::build(unsigned Options) {
BlockRefsMap RefM;
buildBlockRefs(EA, RefM);
- // Add function-entry phi nodes.
+ // Collect function live-ins and entry block live-ins.
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+ MachineBasicBlock &EntryB = *EA.Addr->getCode();
+ assert(EntryB.pred_empty() && "Function entry block has predecessors");
+ for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
+ LiveIns.insert(RegisterRef(I->first));
+ if (MRI.tracksLiveness()) {
+ for (auto I : EntryB.liveins())
+ LiveIns.insert(RegisterRef(I.PhysReg, I.LaneMask));
+ }
+
+ // Add function-entry phi nodes for the live-in registers.
+ //for (std::pair<RegisterId,LaneBitmask> P : LiveIns) {
+ for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
+ RegisterRef RR = *I;
NodeAddr<PhiNode*> PA = newPhi(EA);
- RegisterRef RR = RegisterRef(I->first);
uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
PA.Addr->addMember(DA, *this);
@@ -1071,27 +973,19 @@ void DataFlowGraph::build(unsigned Options) {
}
RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(PhysicalRegisterInfo::isRegMaskId(Reg) ||
+ TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Reg != 0);
if (Sub != 0)
Reg = TRI.getSubReg(Reg, Sub);
return RegisterRef(Reg);
}
-RegisterRef DataFlowGraph::normalizeRef(RegisterRef RR) const {
- // FIXME copied from RegisterAggr
- RegisterId SuperReg = RR.Reg;
- while (true) {
- MCSuperRegIterator SR(SuperReg, &TRI, false);
- if (!SR.isValid())
- break;
- SuperReg = *SR;
- }
-
- uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
- const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
- LaneBitmask SuperMask = RR.Mask &
- TRI.composeSubRegIndexLaneMask(Sub, RC.LaneMask);
- return RegisterRef(SuperReg, SuperMask);
+RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
+ assert(Op.isReg() || Op.isRegMask());
+ if (Op.isReg())
+ return makeRegRef(Op.getReg(), Op.getSubReg());
+ return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll());
}
RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
@@ -1100,13 +994,13 @@ RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
}
#ifndef NDEBUG
- RegisterRef NAR = normalizeRef(AR);
- RegisterRef NBR = normalizeRef(BR);
- assert(NAR.Reg != NBR.Reg);
+// RegisterRef NAR = PRI.normalize(AR);
+// RegisterRef NBR = PRI.normalize(BR);
+// assert(NAR.Reg != NBR.Reg);
#endif
// This isn't strictly correct, because the overlap may happen in the
// part masked out.
- if (TRI.regsOverlap(AR.Reg, BR.Reg))
+ if (PRI.alias(AR, BR))
return AR;
return RegisterRef();
}
@@ -1137,11 +1031,61 @@ void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
+void DataFlowGraph::pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+ pushClobbers(IA, DefM);
+ pushDefs(IA, DefM);
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+ NodeSet Visited;
+ std::set<RegisterId> Defined;
+
+ // The important objectives of this function are:
+ // - to be able to handle instructions both while the graph is being
+ // constructed, and after the graph has been constructed, and
+ // - maintain proper ordering of definitions on the stack for each
+ // register reference:
+ // - if there are two or more related defs in IA (i.e. coming from
+ // the same machine operand), then only push one def on the stack,
+ // - if there are multiple unrelated defs of non-overlapping
+ // subregisters of S, then the stack for S will have both (in an
+ // unspecified order), but the order does not matter from the data-
+ // -flow perspective.
+
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
+ if (Visited.count(DA.Id))
+ continue;
+ if (!(DA.Addr->getFlags() & NodeAttrs::Clobbering))
+ continue;
+
+ NodeList Rel = getRelatedRefs(IA, DA);
+ NodeAddr<DefNode*> PDA = Rel.front();
+ RegisterRef RR = PDA.Addr->getRegRef(*this);
+
+ // Push the definition on the stack for the register and all aliases.
+ // The def stack traversal in linkNodeUp will check the exact aliasing.
+ DefM[RR.Reg].push(DA);
+ Defined.insert(RR.Reg);
+ for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
+ // Check that we don't push the same def twice.
+ assert(A != RR.Reg);
+ if (!Defined.count(A))
+ DefM[A].push(DA);
+ }
+ // Mark all the related defs as visited.
+ for (NodeAddr<NodeBase*> T : Rel)
+ Visited.insert(T.Id);
+ }
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
- NodeList Defs = IA.Addr->members_if(IsDef, *this);
NodeSet Visited;
#ifndef NDEBUG
- RegisterSet Defined;
+ std::set<RegisterId> Defined;
#endif
// The important objectives of this function are:
@@ -1156,9 +1100,11 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// unspecified order), but the order does not matter from the data-
// -flow perspective.
- for (NodeAddr<DefNode*> DA : Defs) {
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
if (Visited.count(DA.Id))
continue;
+ if (DA.Addr->getFlags() & NodeAttrs::Clobbering)
+ continue;
NodeList Rel = getRelatedRefs(IA, DA);
NodeAddr<DefNode*> PDA = Rel.front();
@@ -1166,7 +1112,7 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
#ifndef NDEBUG
// Assert if the register is defined in two or more unrelated defs.
// This could happen if there are two or more def operands defining it.
- if (!Defined.insert(RR).second) {
+ if (!Defined.insert(RR.Reg).second) {
MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
dbgs() << "Multiple definitions of register: "
<< Print<RegisterRef>(RR, *this) << " in\n " << *MI
@@ -1177,10 +1123,10 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// Push the definition on the stack for the register and all aliases.
// The def stack traversal in linkNodeUp will check the exact aliasing.
DefM[RR.Reg].push(DA);
- for (RegisterRef A : getAliasSet(RR.Reg /*FIXME? use RegisterRef*/)) {
+ for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
// Check that we don't push the same def twice.
- assert(A != RR);
- DefM[A.Reg].push(DA);
+ assert(A != RR.Reg);
+ DefM[A].push(DA);
}
// Mark all the related defs as visited.
for (NodeAddr<NodeBase*> T : Rel)
@@ -1203,59 +1149,6 @@ NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
return Refs;
}
-// Return true if RA and RB overlap, false otherwise.
-bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
- assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg));
- assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg));
-
- MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
- MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
- // Reg units are returned in the numerical order.
- while (UMA.isValid() && UMB.isValid()) {
- std::pair<uint32_t,LaneBitmask> PA = *UMA;
- std::pair<uint32_t,LaneBitmask> PB = *UMB;
- if (PA.first == PB.first) {
- // Lane mask of 0 (given by the iterator) should be treated as "full".
- // This can happen when the register has only one unit, or when the
- // unit corresponds to explicit aliasing. In such cases, the lane mask
- // from RegisterRef should be ignored.
- if (PA.second.none() || PB.second.none())
- return true;
-
- // At this point the common unit corresponds to a subregister. The lane
- // masks correspond to the lane mask of that unit within the original
- // register, for example assuming register quadruple q0 = r3:0, and
- // a register pair d1 = r3:2, the lane mask of r2 in q0 may be 0b0100,
- // while the lane mask of r2 in d1 may be 0b0001.
- LaneBitmask LA = PA.second & RA.Mask;
- LaneBitmask LB = PB.second & RB.Mask;
- if (LA.any() && LB.any()) {
- unsigned Root = *MCRegUnitRootIterator(PA.first, &TRI);
- // If register units were guaranteed to only have 1 bit in any lane
- // mask, the code below would not be necessary. This is because LA
- // and LB would have at most 1 bit set each, and that bit would be
- // guaranteed to correspond to the given register unit.
- uint32_t SubA = TRI.getSubRegIndex(RA.Reg, Root);
- uint32_t SubB = TRI.getSubRegIndex(RB.Reg, Root);
- const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(Root);
- LaneBitmask MaskA = TRI.reverseComposeSubRegIndexLaneMask(SubA, LA);
- LaneBitmask MaskB = TRI.reverseComposeSubRegIndexLaneMask(SubB, LB);
- if ((MaskA & MaskB & RC.LaneMask).any())
- return true;
- }
-
- ++UMA;
- ++UMB;
- continue;
- }
- if (PA.first < PB.first)
- ++UMA;
- else if (PB.first < PA.first)
- ++UMB;
- }
- return false;
-}
-
// Clear all information in the graph.
void DataFlowGraph::reset() {
Memory.clear();
@@ -1370,58 +1263,53 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (In.isCall())
return true;
// Is tail call?
- if (In.isBranch())
+ if (In.isBranch()) {
for (const MachineOperand &Op : In.operands())
if (Op.isGlobal() || Op.isSymbol())
return true;
+ // Assume indirect branches are calls. This is for the purpose of
+ // keeping implicit operands, and so it won't hurt on intra-function
+ // indirect branches.
+ if (In.isIndirectBranch())
+ return true;
+ }
return false;
};
auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool {
// This instruction defines DR. Check if there is a use operand that
// would make DR live on entry to the instruction.
- for (const MachineOperand &UseOp : In.operands()) {
- if (!UseOp.isReg() || !UseOp.isUse() || UseOp.isUndef())
+ for (const MachineOperand &Op : In.operands()) {
+ if (!Op.isReg() || Op.getReg() == 0 || !Op.isUse() || Op.isUndef())
continue;
- RegisterRef UR = makeRegRef(UseOp.getReg(), UseOp.getSubReg());
- if (alias(DR, UR))
+ RegisterRef UR = makeRegRef(Op);
+ if (PRI.alias(DR, UR))
return false;
}
return true;
};
- // Collect a set of registers that this instruction implicitly uses
- // or defines. Implicit operands from an instruction will be ignored
- // unless they are listed here.
- RegisterSet ImpUses, ImpDefs;
- if (const uint16_t *ImpD = In.getDesc().getImplicitDefs())
- while (uint16_t R = *ImpD++)
- ImpDefs.insert(RegisterRef(R));
- if (const uint16_t *ImpU = In.getDesc().getImplicitUses())
- while (uint16_t R = *ImpU++)
- ImpUses.insert(RegisterRef(R));
-
bool IsCall = isCall(In);
- bool NeedsImplicit = IsCall || In.isInlineAsm() || In.isReturn();
- bool IsPredicated = TII.isPredicated(In);
unsigned NumOps = In.getNumOperands();
// Avoid duplicate implicit defs. This will not detect cases of implicit
// defs that define registers that overlap, but it is not clear how to
// interpret that in the absence of explicit defs. Overlapping explicit
// defs are likely illegal already.
- RegisterSet DoneDefs;
+ BitVector DoneDefs(TRI.getNumRegs());
// Process explicit defs first.
for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
- RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
+ unsigned R = Op.getReg();
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R))
+ continue;
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
Flags |= NodeAttrs::Preserving;
// If the def is preserving, check if it is also undefined.
- if (isDefUndef(In, RR))
+ if (isDefUndef(In, makeRegRef(Op)))
Flags |= NodeAttrs::Undef;
}
if (TOI.isClobbering(In, OpN))
@@ -1432,7 +1320,25 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
Flags |= NodeAttrs::Dead;
NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
- DoneDefs.insert(RR);
+ assert(!DoneDefs.test(R));
+ DoneDefs.set(R);
+ }
+
+ // Process reg-masks (as clobbers).
+ BitVector DoneClobbers(TRI.getNumRegs());
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isRegMask())
+ continue;
+ uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed |
+ NodeAttrs::Dead;
+ NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ // Record all clobbered registers in DoneDefs.
+ const uint32_t *RM = Op.getRegMask();
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i)
+ if (!(RM[i/32] & (1u << (i%32))))
+ DoneClobbers.set(i);
}
// Process implicit defs, skipping those that have already been added
@@ -1441,11 +1347,10 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
continue;
- RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
- if (!NeedsImplicit && !ImpDefs.count(RR))
- continue;
- if (DoneDefs.count(RR))
+ unsigned R = Op.getReg();
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R) || DoneDefs.test(R))
continue;
+ RegisterRef RR = makeRegRef(Op);
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
Flags |= NodeAttrs::Preserving;
@@ -1457,24 +1362,22 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
Flags |= NodeAttrs::Clobbering;
if (TOI.isFixedReg(In, OpN))
Flags |= NodeAttrs::Fixed;
- if (IsCall && Op.isDead())
+ if (IsCall && Op.isDead()) {
+ if (DoneClobbers.test(R))
+ continue;
Flags |= NodeAttrs::Dead;
+ }
NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
- DoneDefs.insert(RR);
+ DoneDefs.set(R);
}
for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isReg() || !Op.isUse())
continue;
- RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
- // Add implicit uses on return and call instructions, and on predicated
- // instructions regardless of whether or not they appear in the instruction
- // descriptor's list.
- bool Implicit = Op.isImplicit();
- bool TakeImplicit = NeedsImplicit || IsPredicated;
- if (Implicit && !TakeImplicit && !ImpUses.count(RR))
+ unsigned R = Op.getReg();
+ if (!R || !TargetRegisterInfo::isPhysicalRegister(R))
continue;
uint16_t Flags = NodeAttrs::None;
if (Op.isUndef())
@@ -1570,7 +1473,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
for (RegisterRef I : RRs)
- if (I != RR && RegisterAggr::isCoverOf(I, RR, TRI))
+ if (I != RR && RegisterAggr::isCoverOf(I, RR, PRI))
RR = I;
return RR;
};
@@ -1597,7 +1500,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
auto Aliased = [this,&MaxRefs](RegisterRef RR,
std::vector<unsigned> &Closure) -> bool {
for (unsigned I : Closure)
- if (alias(RR, MaxRefs[I]))
+ if (PRI.alias(RR, MaxRefs[I]))
return true;
return false;
};
@@ -1708,7 +1611,7 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
NodeAddr<T> TAP;
// References from the def stack that have been examined so far.
- RegisterAggr Defs(TRI);
+ RegisterAggr Defs(PRI);
for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
RegisterRef QR = I->Addr->getRegRef(*this);
@@ -1744,13 +1647,15 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
}
// Create data-flow links for all reference nodes in the statement node SA.
-void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) {
+template <typename Predicate>
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
+ Predicate P) {
#ifndef NDEBUG
RegisterSet Defs;
#endif
// Link all nodes (upwards in the data-flow) with their reaching defs.
- for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) {
+ for (NodeAddr<RefNode*> RA : SA.Addr->members_if(P, *this)) {
uint16_t Kind = RA.Addr->getKind();
assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
RegisterRef RR = RA.Addr->getRegRef(*this);
@@ -1779,6 +1684,13 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
// Push block delimiters.
markBlock(BA.Id, DefM);
+ auto IsClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ return IsDef(RA) && (RA.Addr->getFlags() & NodeAttrs::Clobbering);
+ };
+ auto IsNoClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ return IsDef(RA) && !(RA.Addr->getFlags() & NodeAttrs::Clobbering);
+ };
+
assert(BA.Addr && "block node address is needed to create a data-flow link");
// For each non-phi instruction in the block, link all the defs and uses
// to their reaching defs. For any member of the block (including phis),
@@ -1786,10 +1698,17 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
// Ignore phi nodes here. They will be linked part by part from the
// predecessors.
- if (IA.Addr->getKind() == NodeAttrs::Stmt)
- linkStmtRefs(DefM, IA);
+ if (IA.Addr->getKind() == NodeAttrs::Stmt) {
+ linkStmtRefs(DefM, IA, IsUse);
+ linkStmtRefs(DefM, IA, IsClobber);
+ }
// Push the definitions on the stack.
+ pushClobbers(IA, DefM);
+
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ linkStmtRefs(DefM, IA, IsNoClobber);
+
pushDefs(IA, DefM);
}
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
index 49d78a8b22b5..d5faca4cd6f4 100644
--- a/lib/Target/Hexagon/RDFGraph.h
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -225,6 +225,7 @@
#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+#include "RDFRegisters.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/LaneBitmask.h"
@@ -260,7 +261,6 @@ namespace llvm {
namespace rdf {
typedef uint32_t NodeId;
- typedef uint32_t RegisterId;
struct DataFlowGraph;
@@ -412,25 +412,6 @@ namespace rdf {
AllocatorTy MemPool;
};
- struct RegisterRef {
- RegisterId Reg;
- LaneBitmask Mask;
-
- RegisterRef() : RegisterRef(0) {}
- explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
- : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
-
- operator bool() const { return Reg != 0 && Mask.any(); }
- bool operator== (const RegisterRef &RR) const {
- return Reg == RR.Reg && Mask == RR.Mask;
- }
- bool operator!= (const RegisterRef &RR) const {
- return !operator==(RR);
- }
- bool operator< (const RegisterRef &RR) const {
- return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
- }
- };
typedef std::set<RegisterRef> RegisterSet;
struct TargetOperandInfo {
@@ -450,39 +431,6 @@ namespace rdf {
uint32_t MaskId;
};
- // Template class for a map translating uint32_t into arbitrary types.
- // The map will act like an indexed set: upon insertion of a new object,
- // it will automatically assign a new index to it. Index of 0 is treated
- // as invalid and is never allocated.
- template <typename T, unsigned N = 32>
- struct IndexedSet {
- IndexedSet() : Map() { Map.reserve(N); }
-
- T get(uint32_t Idx) const {
- // Index Idx corresponds to Map[Idx-1].
- assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
- return Map[Idx-1];
- }
-
- uint32_t insert(T Val) {
- // Linear search.
- auto F = llvm::find(Map, Val);
- if (F != Map.end())
- return F - Map.begin() + 1;
- Map.push_back(Val);
- return Map.size(); // Return actual_index + 1.
- }
-
- uint32_t find(T Val) const {
- auto F = llvm::find(Map, Val);
- assert(F != Map.end());
- return F - Map.begin();
- }
-
- private:
- std::vector<T> Map;
- };
-
struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
LaneMaskIndex() = default;
@@ -497,55 +445,6 @@ namespace rdf {
assert(LM.any());
return LM.all() ? 0 : find(LM);
}
-
- PackedRegisterRef pack(RegisterRef RR) {
- return { RR.Reg, getIndexForLaneMask(RR.Mask) };
- }
- PackedRegisterRef pack(RegisterRef RR) const {
- return { RR.Reg, getIndexForLaneMask(RR.Mask) };
- }
-
- RegisterRef unpack(PackedRegisterRef PR) const {
- return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
- }
- };
-
- struct RegisterAggr {
- RegisterAggr(const TargetRegisterInfo &tri)
- : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
- RegisterAggr(const RegisterAggr &RG) = default;
-
- bool empty() const { return Masks.empty(); }
- bool hasAliasOf(RegisterRef RR) const;
- bool hasCoverOf(RegisterRef RR) const;
- static bool isCoverOf(RegisterRef RA, RegisterRef RB,
- const TargetRegisterInfo &TRI) {
- return RegisterAggr(TRI).insert(RA).hasCoverOf(RB);
- }
-
- RegisterAggr &insert(RegisterRef RR);
- RegisterAggr &insert(const RegisterAggr &RG);
- RegisterAggr &clear(RegisterRef RR);
- RegisterAggr &clear(const RegisterAggr &RG);
-
- RegisterRef clearIn(RegisterRef RR) const;
-
- void print(raw_ostream &OS) const;
-
- private:
- typedef std::unordered_map<RegisterId, LaneBitmask> MapType;
-
- public:
- typedef MapType::const_iterator iterator;
- iterator begin() const { return Masks.begin(); }
- iterator end() const { return Masks.end(); }
- RegisterRef normalize(RegisterRef RR) const;
-
- private:
- MapType Masks;
- BitVector ExpAliasUnits; // Register units for explicit aliases.
- bool CheckUnits;
- const TargetRegisterInfo &TRI;
};
struct NodeBase {
@@ -761,8 +660,10 @@ namespace rdf {
MachineFunction &getMF() const { return MF; }
const TargetInstrInfo &getTII() const { return TII; }
const TargetRegisterInfo &getTRI() const { return TRI; }
+ const PhysicalRegisterInfo &getPRI() const { return PRI; }
const MachineDominatorTree &getDT() const { return MDT; }
const MachineDominanceFrontier &getDF() const { return MDF; }
+ const RegisterAggr &getLiveIns() const { return LiveIns; }
struct DefStack {
DefStack() = default;
@@ -828,15 +729,22 @@ namespace rdf {
typedef std::unordered_map<RegisterId,DefStack> DefStackMap;
void build(unsigned Options = BuildOptions::None);
- void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
void markBlock(NodeId B, DefStackMap &DefM);
void releaseBlock(NodeId B, DefStackMap &DefM);
- PackedRegisterRef pack(RegisterRef RR) { return LMI.pack(RR); }
- PackedRegisterRef pack(RegisterRef RR) const { return LMI.pack(RR); }
- RegisterRef unpack(PackedRegisterRef PR) const { return LMI.unpack(PR); }
+ PackedRegisterRef pack(RegisterRef RR) {
+ return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
+ }
+ PackedRegisterRef pack(RegisterRef RR) const {
+ return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
+ }
+ RegisterRef unpack(PackedRegisterRef PR) const {
+ return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId));
+ }
+
RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
- RegisterRef normalizeRef(RegisterRef RR) const;
+ RegisterRef makeRegRef(const MachineOperand &Op) const;
RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const;
NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
@@ -853,6 +761,10 @@ namespace rdf {
NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
NodeAddr<RefNode*> RA) const;
+ NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const {
+ return BlockNodes.at(BB);
+ }
+
void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) {
unlinkUseDF(UA);
if (RemoveFromOwner)
@@ -898,13 +810,9 @@ namespace rdf {
return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
}
- // Register aliasing.
- bool alias(RegisterRef RA, RegisterRef RB) const;
-
private:
void reset();
- RegisterSet getAliasSet(RegisterId Reg) const;
RegisterSet getLandingPadLiveIns() const;
NodeAddr<NodeBase*> newNode(uint16_t Attrs);
@@ -940,9 +848,12 @@ namespace rdf {
NodeAddr<BlockNode*> BA);
void removeUnusedPhis();
+ void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
NodeAddr<T> TA, DefStack &DS);
- void linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA);
+ template <typename Predicate> void linkStmtRefs(DefStackMap &DefM,
+ NodeAddr<StmtNode*> SA, Predicate P);
void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
void unlinkUseDF(NodeAddr<UseNode*> UA);
@@ -953,23 +864,21 @@ namespace rdf {
IA.Addr->removeMember(RA, *this);
}
- NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) {
- return BlockNodes[BB];
- }
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo PRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const TargetOperandInfo &TOI;
+ RegisterAggr LiveIns;
NodeAddr<FuncNode*> Func;
NodeAllocator Memory;
// Local map: MachineBasicBlock -> NodeAddr<BlockNode*>
std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes;
// Lane mask map.
LaneMaskIndex LMI;
-
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- const TargetOperandInfo &TOI;
}; // struct DataFlowGraph
template <typename Predicate>
@@ -1013,12 +922,6 @@ namespace rdf {
return MM;
}
- // Optionally print the lane mask, if it is not ~0.
- struct PrintLaneMaskOpt {
- PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
- LaneBitmask Mask;
- };
- raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
template <typename T> struct Print;
template <typename T>
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index e74c4bfc1645..b0532f933b16 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -31,11 +31,15 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
using namespace rdf;
+static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
+ cl::Hidden, cl::desc("Maximum recursion level"));
+
namespace llvm {
namespace rdf {
template<>
@@ -85,7 +89,8 @@ namespace rdf {
// the data-flow.
NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, bool FullChain, const RegisterAggr &DefRRs) {
+ NodeAddr<RefNode*> RefA, bool TopShadows, bool FullChain,
+ const RegisterAggr &DefRRs) {
NodeList RDefs; // Return value.
SetVector<NodeId> DefQ;
SetVector<NodeId> Owners;
@@ -105,6 +110,11 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
auto SNA = DFG.addr<RefNode*>(Start);
if (NodeId RD = SNA.Addr->getReachingDef())
DefQ.insert(RD);
+ if (TopShadows) {
+ for (auto S : DFG.getRelatedRefs(RefA.Addr->getOwner(DFG), RefA))
+ if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ DefQ.insert(RD);
+ }
// Collect all the reaching defs, going up until a phi node is encountered,
// or there are no more reaching defs. From this set, the actual set of
@@ -119,7 +129,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// Stop at the covering/overwriting def of the initial register reference.
RegisterRef RR = TA.Addr->getRegRef(DFG);
if (!DFG.IsPreservingDef(TA))
- if (RegisterAggr::isCoverOf(RR, RefRR, TRI))
+ if (RegisterAggr::isCoverOf(RR, RefRR, PRI))
continue;
// Get the next level of reaching defs. This will include multiple
// reaching defs for shadows.
@@ -134,7 +144,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
for (NodeId N : DefQ) {
auto TA = DFG.addr<DefNode*>(N);
bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
- if (!IsPhi && !DFG.alias(RefRR, TA.Addr->getRegRef(DFG)))
+ if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
continue;
Defs.insert(TA.Id);
Owners.insert(TA.Addr->getOwner(DFG).Id);
@@ -241,20 +251,30 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
}
-NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs) {
+std::pair<NodeSet,bool>
+Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+ NodeSet &Visited, const NodeSet &Defs) {
+ return getAllReachingDefsRecImpl(RefRR, RefA, Visited, Defs, 0, MaxRecNest);
+}
+
+
+std::pair<NodeSet,bool>
+Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+ NodeSet &Visited, const NodeSet &Defs, unsigned Nest, unsigned MaxNest) {
+ if (Nest > MaxNest)
+ return { NodeSet(), false };
// Collect all defined registers. Do not consider phis to be defining
// anything, only collect "real" definitions.
- RegisterAggr DefRRs(TRI);
+ RegisterAggr DefRRs(PRI);
for (NodeId D : Defs) {
const auto DA = DFG.addr<const DefNode*>(D);
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
DefRRs.insert(DA.Addr->getRegRef(DFG));
}
- NodeList RDs = getAllReachingDefs(RefRR, RefA, true, DefRRs);
+ NodeList RDs = getAllReachingDefs(RefRR, RefA, false, true, DefRRs);
if (RDs.empty())
- return Defs;
+ return { Defs, true };
// Make a copy of the preexisting definitions and add the newly found ones.
NodeSet TmpDefs = Defs;
@@ -273,12 +293,74 @@ NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR,
Visited.insert(PA.Id);
// Go over all phi uses and get the reaching defs for each use.
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
- const auto &T = getAllReachingDefsRec(RefRR, U, Visited, TmpDefs);
- Result.insert(T.begin(), T.end());
+ const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
+ Nest+1, MaxNest);
+ if (!T.second)
+ return { T.first, false };
+ Result.insert(T.first.begin(), T.first.end());
}
}
- return Result;
+ return { Result, true };
+}
+
+/// Find the nearest ref node aliased to RefRR, going upwards in the data
+/// flow, starting from the instruction immediately preceding Inst.
+NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode*> IA) {
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ NodeList Ins = BA.Addr->members(DFG);
+ NodeId FindId = IA.Id;
+ auto E = Ins.rend();
+ auto B = std::find_if(Ins.rbegin(), E,
+ [FindId] (const NodeAddr<InstrNode*> T) {
+ return T.Id == FindId;
+ });
+ // Do not scan IA (which is what B would point to).
+ if (B != E)
+ ++B;
+
+ do {
+ // Process the range of instructions from B to E.
+ for (NodeAddr<InstrNode*> I : make_range(B, E)) {
+ NodeList Refs = I.Addr->members(DFG);
+ NodeAddr<RefNode*> Clob, Use;
+ // Scan all the refs in I aliased to RefRR, and return the one that
+ // is the closest to the output of I, i.e. def > clobber > use.
+ for (NodeAddr<RefNode*> R : Refs) {
+ if (!PRI.alias(R.Addr->getRegRef(DFG), RefRR))
+ continue;
+ if (DFG.IsDef(R)) {
+ // If it's a non-clobbering def, just return it.
+ if (!(R.Addr->getFlags() & NodeAttrs::Clobbering))
+ return R;
+ Clob = R;
+ } else {
+ Use = R;
+ }
+ }
+ if (Clob.Id != 0)
+ return Clob;
+ if (Use.Id != 0)
+ return Use;
+ }
+
+ // Go up to the immediate dominator, if any.
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ BA = NodeAddr<BlockNode*>();
+ if (MachineDomTreeNode *N = MDT.getNode(BB)) {
+ if ((N = N->getIDom()))
+ BA = DFG.findBlock(N->getBlock());
+ }
+ if (!BA.Id)
+ break;
+
+ Ins = BA.Addr->members(DFG);
+ B = Ins.rbegin();
+ E = Ins.rend();
+ } while (true);
+
+ return NodeAddr<RefNode*>();
}
@@ -299,7 +381,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
auto UA = DFG.addr<UseNode*>(U);
if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) {
RegisterRef UR = UA.Addr->getRegRef(DFG);
- if (DFG.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
+ if (PRI.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
Uses.insert(U);
}
U = UA.Addr->getSibling();
@@ -312,7 +394,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
RegisterRef DR = DA.Addr->getRegRef(DFG);
// If this def is already covered, it cannot reach anything new.
// Similarly, skip it if it is not aliased to the interesting register.
- if (DefRRs.hasCoverOf(DR) || !DFG.alias(RefRR, DR))
+ if (DefRRs.hasCoverOf(DR) || !PRI.alias(RefRR, DR))
continue;
NodeSet T;
if (DFG.IsPreservingDef(DA)) {
@@ -343,6 +425,7 @@ void Liveness::computePhiInfo() {
// phi use -> (map: reaching phi -> set of registers defined in between)
std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
+ std::map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
// Go over all phis.
for (NodeAddr<PhiNode*> PhiA : Phis) {
@@ -355,12 +438,15 @@ void Liveness::computePhiInfo() {
// For each def, add to the queue all reached (non-phi) defs.
SetVector<NodeId> DefQ;
NodeSet PhiDefs;
+ RegisterAggr DRs(PRI);
for (NodeAddr<RefNode*> R : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Def>(R))
continue;
+ DRs.insert(R.Addr->getRegRef(DFG));
DefQ.insert(R.Id);
PhiDefs.insert(R.Id);
}
+ PhiDRs.insert(std::make_pair(PhiA.Id, DRs));
// Collect the super-set of all possible reached uses. This set will
// contain all uses reached from this phi, either directly from the
@@ -377,9 +463,9 @@ void Liveness::computePhiInfo() {
NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
uint16_t F = A.Addr->getFlags();
if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
- RegisterRef R = DFG.normalizeRef(getRestrictedRegRef(A));
+ RegisterRef R = PRI.normalize(A.Addr->getRegRef(DFG));
RealUses[R.Reg].insert({A.Id,R.Mask});
- }
+ }
UN = A.Addr->getSibling();
}
// Visit all reached defs, and add them to the queue. These defs may
@@ -424,17 +510,13 @@ void Liveness::computePhiInfo() {
auto UA = DFG.addr<UseNode*>(I->first);
// Undef flag is checked above.
assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0);
- RegisterRef R(UI->first, I->second);
+ RegisterRef R(UI->first, I->second);
NodeList RDs = getAllReachingDefs(R, UA);
- if (any_of(RDs, InPhiDefs))
- ++I;
- else
- I = Uses.erase(I);
+ // If none of the reaching defs of R are from this phi, remove this
+ // use of R.
+ I = any_of(RDs, InPhiDefs) ? std::next(I) : Uses.erase(I);
}
- if (Uses.empty())
- UI = RealUses.erase(UI);
- else
- ++UI;
+ UI = Uses.empty() ? RealUses.erase(UI) : std::next(UI);
}
// If this phi reaches some "real" uses, add it to the queue for upward
@@ -452,32 +534,29 @@ void Liveness::computePhiInfo() {
for (auto I : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id))
continue;
- NodeAddr<UseNode*> UA = I;
-
- // Given a phi use UA, traverse all related phi uses (including UA).
- // The related phi uses may reach different phi nodes or may reach the
- // same phi node. If multiple uses reach the same phi P, the intervening
- // defs must be accumulated for all such uses. To group all such uses
- // into one set, map their node ids to the first use id that reaches P.
- std::map<NodeId,NodeId> FirstUse; // Phi reached up -> first phi use.
-
- for (NodeAddr<UseNode*> VA : DFG.getRelatedRefs(PhiA, UA)) {
- SeenUses.insert(VA.Id);
- RegisterAggr DefRRs(TRI);
- for (NodeAddr<DefNode*> DA : getAllReachingDefs(VA)) {
- if (DA.Addr->getFlags() & NodeAttrs::PhiRef) {
- NodeId RP = DA.Addr->getOwner(DFG).Id;
- NodeId FU = FirstUse.insert({RP,VA.Id}).first->second;
- std::map<NodeId,RegisterAggr> &M = PhiUp[FU];
- auto F = M.find(RP);
- if (F == M.end())
- M.insert(std::make_pair(RP, DefRRs));
- else
- F->second.insert(DefRRs);
- }
- DefRRs.insert(DA.Addr->getRegRef(DFG));
+ NodeAddr<PhiUseNode*> PUA = I;
+ if (PUA.Addr->getReachingDef() == 0)
+ continue;
+
+ RegisterRef UR = PUA.Addr->getRegRef(DFG);
+ NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs);
+ RegisterAggr DefRRs(PRI);
+
+ for (NodeAddr<DefNode*> D : Ds) {
+ if (D.Addr->getFlags() & NodeAttrs::PhiRef) {
+ NodeId RP = D.Addr->getOwner(DFG).Id;
+ std::map<NodeId,RegisterAggr> &M = PhiUp[PUA.Id];
+ auto F = M.find(RP);
+ if (F == M.end())
+ M.insert(std::make_pair(RP, DefRRs));
+ else
+ F->second.insert(DefRRs);
}
+ DefRRs.insert(D.Addr->getRegRef(DFG));
}
+
+ for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PhiA, PUA))
+ SeenUses.insert(T.Id);
}
}
@@ -522,7 +601,7 @@ void Liveness::computePhiInfo() {
for (NodeAddr<UseNode*> UA : PUs) {
std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
- RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(UA));
+ RegisterRef UR = PRI.normalize(UA.Addr->getRegRef(DFG));
for (const std::pair<NodeId,RegisterAggr> &P : PUM) {
bool Changed = false;
const RegisterAggr &MidDefs = P.second;
@@ -540,14 +619,19 @@ void Liveness::computePhiInfo() {
// then add (R-MidDefs,U) to RealUseMap[P]
//
for (const std::pair<RegisterId,NodeRefSet> &T : RUM) {
- RegisterRef R = DFG.restrictRef(RegisterRef(T.first), UR);
- if (!R)
+ RegisterRef R(T.first);
+ // The current phi (PA) could be a phi for a regmask. It could
+ // reach a whole variety of uses that are not related to the
+ // specific upward phi (P.first).
+ const RegisterAggr &DRs = PhiDRs.at(P.first);
+ if (!DRs.hasAliasOf(R))
continue;
+ R = DRs.intersectWith(R);
for (std::pair<NodeId,LaneBitmask> V : T.second) {
- RegisterRef S = DFG.restrictRef(RegisterRef(R.Reg, V.second), R);
- if (!S)
+ LaneBitmask M = R.Mask & V.second;
+ if (M.none())
continue;
- if (RegisterRef SS = MidDefs.clearIn(S)) {
+ if (RegisterRef SS = MidDefs.clearIn(RegisterRef(R.Reg, M))) {
NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
Changed |= RS.insert({V.first,SS.Mask}).second;
}
@@ -645,30 +729,43 @@ void Liveness::computeLiveIns() {
if (RUs.empty())
continue;
+ NodeSet SeenUses;
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ if (!SeenUses.insert(U.Id).second)
+ continue;
NodeAddr<PhiUseNode*> PUA = U;
if (PUA.Addr->getReachingDef() == 0)
continue;
- // Mark all reached "real" uses of P as live on exit in the
- // predecessor.
- // Remap all the RUs so that they have a correct reaching def.
+ // Each phi has some set (possibly empty) of reached "real" uses,
+ // that is, uses that are part of the compiled program. Such a use
+ // may be located in some farther block, but following a chain of
+ // reaching defs will eventually lead to this phi.
+ // Any chain of reaching defs may fork at a phi node, but there
+ // will be a path upwards that will lead to this phi. Now, this
+ // chain will need to fork at this phi, since some of the reached
+ // uses may have definitions joining in from multiple predecessors.
+ // For each reached "real" use, identify the set of reaching defs
+ // coming from each predecessor P, and add them to PhiLOX[P].
+ //
auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor());
RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
- RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(PUA));
- for (const std::pair<RegisterId,NodeRefSet> &T : RUs) {
- // Check if T.first aliases UR?
- LaneBitmask M;
- for (std::pair<NodeId,LaneBitmask> P : T.second)
- M |= P.second;
-
- RegisterRef S = DFG.restrictRef(RegisterRef(T.first, M), UR);
- if (!S)
- continue;
- for (NodeAddr<DefNode*> D : getAllReachingDefs(S, PUA))
- LOX[S.Reg].insert({D.Id, S.Mask});
+ for (const std::pair<RegisterId,NodeRefSet> &RS : RUs) {
+ // We need to visit each individual use.
+ for (std::pair<NodeId,LaneBitmask> P : RS.second) {
+ // Create a register ref corresponding to the use, and find
+ // all reaching defs starting from the phi use, and treating
+ // all related shadows as a single use cluster.
+ RegisterRef S(RS.first, P.second);
+ NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs);
+ for (NodeAddr<DefNode*> D : Ds)
+ LOX[S.Reg].insert({D.Id, S.Mask});
+ }
}
+
+ for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PA, PUA))
+ SeenUses.insert(T.Id);
} // for U : phi uses
} // for P : Phis
} // for B : Blocks
@@ -684,9 +781,7 @@ void Liveness::computeLiveIns() {
traverse(&MF.front(), LiveIn);
// Add function live-ins to the live-in set of the function entry block.
- auto &EntryIn = LiveMap[&MF.front()];
- for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
- EntryIn.insert(RegisterRef(I->first));
+ LiveMap[&MF.front()].insert(DFG.getLiveIns());
if (Trace) {
// Dump the liveness map
@@ -702,19 +797,9 @@ void Liveness::computeLiveIns() {
//dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n';
LV.clear();
- for (std::pair<RegisterId,LaneBitmask> P : LiveMap[&B]) {
- MCSubRegIndexIterator S(P.first, &TRI);
- if (!S.isValid()) {
- LV.push_back(RegisterRef(P.first));
- continue;
- }
- do {
- LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
- if ((M & P.second).any())
- LV.push_back(RegisterRef(S.getSubReg()));
- ++S;
- } while (S.isValid());
- }
+ const RegisterAggr &LG = LiveMap[&B];
+ for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
+ LV.push_back(*I);
std::sort(LV.begin(), LV.end());
dbgs() << "\tcomp = {";
for (auto I : LV)
@@ -735,9 +820,10 @@ void Liveness::resetLiveIns() {
for (auto I : T)
B.removeLiveIn(I);
// Add the newly computed live-ins.
- auto &LiveIns = LiveMap[&B];
- for (auto I : LiveIns) {
- B.addLiveIn({MCPhysReg(I.first), I.second});
+ const RegisterAggr &LiveIns = LiveMap[&B];
+ for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
+ RegisterRef R = *I;
+ B.addLiveIn({MCPhysReg(R.Reg), R.Mask});
}
}
}
@@ -791,7 +877,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
Live.reset(*SR);
}
for (auto &Op : MI->operands()) {
- if (!Op.isReg() || !Op.isUse())
+ if (!Op.isReg() || !Op.isUse() || Op.isUndef())
continue;
unsigned R = Op.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(R))
@@ -803,9 +889,8 @@ void Liveness::resetKills(MachineBasicBlock *B) {
IsLive = true;
break;
}
- if (IsLive)
- continue;
- Op.setIsKill(true);
+ if (!IsLive)
+ Op.setIsKill(true);
for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
Live.set(*SR);
}
@@ -813,17 +898,6 @@ void Liveness::resetKills(MachineBasicBlock *B) {
}
-RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
- assert(DFG.IsRef<NodeAttrs::Use>(RA));
- if (RA.Addr->getFlags() & NodeAttrs::Shadow) {
- NodeId RD = RA.Addr->getReachingDef();
- assert(RD);
- RA = DFG.addr<DefNode*>(RD);
- }
- return RA.Addr->getRegRef(DFG);
-}
-
-
// Helper function to obtain the basic block containing the reaching def
// of the given use.
MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const {
@@ -921,7 +995,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// propagated upwards. This only applies to non-preserving defs,
// and to the parts of the register actually covered by those defs.
// (Note that phi defs should always be preserving.)
- RegisterAggr RRs(TRI);
+ RegisterAggr RRs(PRI);
LRef.Mask = OR.second;
if (!DFG.IsPreservingDef(DA)) {
@@ -949,10 +1023,9 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// registers are not covering LRef. The first def from the
// upward chain will be live.
// Subtract all accumulated defs (RRs) from LRef.
- RegisterAggr L(TRI);
- L.insert(LRef).clear(RRs);
- assert(!L.empty());
- NewDefs.insert({TA.Id,L.begin()->second});
+ RegisterRef T = RRs.clearIn(LRef);
+ assert(T);
+ NewDefs.insert({TA.Id,T.Mask});
break;
}
@@ -983,7 +1056,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
if (UA.Addr->getFlags() & NodeAttrs::Undef)
continue;
- RegisterRef RR = DFG.normalizeRef(UA.Addr->getRegRef(DFG));
+ RegisterRef RR = PRI.normalize(UA.Addr->getRegRef(DFG));
for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
if (getBlockWithRef(D.Id) != B)
LiveIn[RR.Reg].insert({D.Id,RR.Mask});
diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h
index c88396f36bbb..6f2615b7c4f3 100644
--- a/lib/Target/Hexagon/RDFLiveness.h
+++ b/lib/Target/Hexagon/RDFLiveness.h
@@ -33,7 +33,7 @@ namespace rdf {
// This is really a std::map, except that it provides a non-trivial
// default constructor to the element accessed via [].
struct LiveMapType {
- LiveMapType(const TargetRegisterInfo &tri) : Empty(tri) {}
+ LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {}
RegisterAggr &operator[] (MachineBasicBlock *B) {
return Map.emplace(B, Empty).first->second;
@@ -49,26 +49,31 @@ namespace rdf {
typedef std::map<RegisterId,NodeRefSet> RefMap;
Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
- : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()),
- MRI(mri), LiveMap(g.getTRI()), Empty(), NoRegs(g.getTRI()),
- Trace(false) {}
+ : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
+ MDF(g.getDF()), LiveMap(g.getPRI()), Empty(),
+ NoRegs(g.getPRI()), Trace(false) {}
NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- bool FullChain, const RegisterAggr &DefRRs);
+ bool TopShadows, bool FullChain, const RegisterAggr &DefRRs);
NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false, NoRegs);
+ return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false,
+ false, NoRegs);
}
NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefRR, RefA, false, NoRegs);
+ return getAllReachingDefs(RefRR, RefA, false, false, NoRegs);
}
- NodeSet getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs);
NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA,
const RegisterAggr &DefRRs);
NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) {
return getAllReachedUses(RefRR, DefA, NoRegs);
}
+ std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs);
+
+ NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode*> IA);
+
LiveMapType &getLiveMap() { return LiveMap; }
const LiveMapType &getLiveMap() const { return LiveMap; }
const RefMap &getRealUses(NodeId P) const {
@@ -87,9 +92,9 @@ namespace rdf {
private:
const DataFlowGraph &DFG;
const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo &PRI;
const MachineDominatorTree &MDT;
const MachineDominanceFrontier &MDF;
- MachineRegisterInfo &MRI;
LiveMapType LiveMap;
const RefMap Empty;
const RegisterAggr NoRegs;
@@ -121,12 +126,13 @@ namespace rdf {
// the dominator tree), create a map: block -> set of uses live on exit.
std::map<MachineBasicBlock*,RefMap> PhiLOX;
- bool isRestrictedToRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- RegisterRef RR) const;
- RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const;
MachineBasicBlock *getBlockWithRef(NodeId RN) const;
void traverse(MachineBasicBlock *B, RefMap &LiveIn);
void emptify(RefMap &M);
+
+ std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs,
+ unsigned Nest, unsigned MaxNest);
};
} // namespace rdf
} // namespace llvm
diff --git a/lib/Target/Hexagon/RDFRegisters.cpp b/lib/Target/Hexagon/RDFRegisters.cpp
new file mode 100644
index 000000000000..5c5496a548af
--- /dev/null
+++ b/lib/Target/Hexagon/RDFRegisters.cpp
@@ -0,0 +1,368 @@
+//===--- RDFRegisters.cpp ---------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RDFRegisters.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+using namespace rdf;
+
+PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
+ const MachineFunction &mf)
+ : TRI(tri) {
+ RegInfos.resize(TRI.getNumRegs());
+
+ BitVector BadRC(TRI.getNumRegs());
+ for (const TargetRegisterClass *RC : TRI.regclasses()) {
+ for (MCPhysReg R : *RC) {
+ RegInfo &RI = RegInfos[R];
+ if (RI.RegClass != nullptr && !BadRC[R]) {
+ if (RC->LaneMask != RI.RegClass->LaneMask) {
+ BadRC.set(R);
+ RI.RegClass = nullptr;
+ }
+ } else
+ RI.RegClass = RC;
+ }
+ }
+
+ UnitInfos.resize(TRI.getNumRegUnits());
+
+ for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
+ if (UnitInfos[U].Reg != 0)
+ continue;
+ MCRegUnitRootIterator R(U, &TRI);
+ assert(R.isValid());
+ RegisterId F = *R;
+ ++R;
+ if (R.isValid()) {
+ UnitInfos[U].Mask = LaneBitmask::getAll();
+ UnitInfos[U].Reg = F;
+ } else {
+ for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) {
+ std::pair<uint32_t,LaneBitmask> P = *I;
+ UnitInfo &UI = UnitInfos[P.first];
+ UI.Reg = F;
+ if (P.second.any()) {
+ UI.Mask = P.second;
+ } else {
+ if (const TargetRegisterClass *RC = RegInfos[F].RegClass)
+ UI.Mask = RC->LaneMask;
+ else
+ UI.Mask = LaneBitmask::getAll();
+ }
+ }
+ }
+ }
+
+ for (const uint32_t *RM : TRI.getRegMasks())
+ RegMasks.insert(RM);
+ for (const MachineBasicBlock &B : mf)
+ for (const MachineInstr &In : B)
+ for (const MachineOperand &Op : In.operands())
+ if (Op.isRegMask())
+ RegMasks.insert(Op.getRegMask());
+}
+
+RegisterRef PhysicalRegisterInfo::normalize(RegisterRef RR) const {
+ return RR;
+}
+
+std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
+ // Do not include RR in the alias set.
+ std::set<RegisterId> AS;
+ assert(isRegMaskId(Reg) || TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (isRegMaskId(Reg)) {
+ // XXX SLOW
+ const uint32_t *MB = getRegMaskBits(Reg);
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (MB[i/32] & (1u << (i%32)))
+ continue;
+ AS.insert(i);
+ }
+ for (const uint32_t *RM : RegMasks) {
+ RegisterId MI = getRegMaskId(RM);
+ if (MI != Reg && aliasMM(RegisterRef(Reg), RegisterRef(MI)))
+ AS.insert(MI);
+ }
+ return AS;
+ }
+
+ for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
+ AS.insert(*AI);
+ for (const uint32_t *RM : RegMasks) {
+ RegisterId MI = getRegMaskId(RM);
+ if (aliasRM(RegisterRef(Reg), RegisterRef(MI)))
+ AS.insert(MI);
+ }
+ return AS;
+}
+
+bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg));
+ assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg));
+
+ MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
+ MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
+ // Reg units are returned in the numerical order.
+ while (UMA.isValid() && UMB.isValid()) {
+ // Skip units that are masked off in RA.
+ std::pair<RegisterId,LaneBitmask> PA = *UMA;
+ if (PA.second.any() && (PA.second & RA.Mask).none()) {
+ ++UMA;
+ continue;
+ }
+ // Skip units that are masked off in RB.
+ std::pair<RegisterId,LaneBitmask> PB = *UMB;
+ if (PB.second.any() && (PB.second & RB.Mask).none()) {
+ ++UMB;
+ continue;
+ }
+
+ if (PA.first == PB.first)
+ return true;
+ if (PA.first < PB.first)
+ ++UMA;
+ else if (PB.first < PA.first)
+ ++UMB;
+ }
+ return false;
+}
+
+bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const {
+ assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg));
+ const uint32_t *MB = getRegMaskBits(RM.Reg);
+ bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32));
+ // If the lane mask information is "full", e.g. when the given lane mask
+ // is a superset of the lane mask from the register class, check the regmask
+ // bit directly.
+ if (RR.Mask == LaneBitmask::getAll())
+ return !Preserved;
+ const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass;
+ if (RC != nullptr && (RR.Mask & RC->LaneMask) == RC->LaneMask)
+ return !Preserved;
+
+ // Otherwise, check all subregisters whose lane mask overlaps the given
+ // mask. For each such register, if it is preserved by the regmask, then
+ // clear the corresponding bits in the given mask. If at the end, all
+ // bits have been cleared, the register does not alias the regmask (i.e.
+ // is it preserved by it).
+ LaneBitmask M = RR.Mask;
+ for (MCSubRegIndexIterator SI(RR.Reg, &TRI); SI.isValid(); ++SI) {
+ LaneBitmask SM = TRI.getSubRegIndexLaneMask(SI.getSubRegIndex());
+ if ((SM & RR.Mask).none())
+ continue;
+ unsigned SR = SI.getSubReg();
+ if (!(MB[SR/32] & (1u << (SR%32))))
+ continue;
+ // The subregister SR is preserved.
+ M &= ~SM;
+ if (M.none())
+ return false;
+ }
+
+ return true;
+}
+
+bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const {
+ assert(isRegMaskId(RM.Reg) && isRegMaskId(RN.Reg));
+ unsigned NumRegs = TRI.getNumRegs();
+ const uint32_t *BM = getRegMaskBits(RM.Reg);
+ const uint32_t *BN = getRegMaskBits(RN.Reg);
+
+ for (unsigned w = 0, nw = NumRegs/32; w != nw; ++w) {
+ // Intersect the negations of both words. Disregard reg=0,
+ // i.e. 0th bit in the 0th word.
+ uint32_t C = ~BM[w] & ~BN[w];
+ if (w == 0)
+ C &= ~1;
+ if (C)
+ return true;
+ }
+
+ // Check the remaining registers in the last word.
+ unsigned TailRegs = NumRegs % 32;
+ if (TailRegs == 0)
+ return false;
+ unsigned TW = NumRegs / 32;
+ uint32_t TailMask = (1u << TailRegs) - 1;
+ if (~BM[TW] & ~BN[TW] & TailMask)
+ return true;
+
+ return false;
+}
+
+
+bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ // XXX SLOW
+ const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
+ for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
+ if (MB[i/32] & (1u << (i%32)))
+ continue;
+ if (hasAliasOf(RegisterRef(i, LaneBitmask::getAll())))
+ return true;
+ }
+ return false;
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t,LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ if (Units.test(P.first))
+ return true;
+ }
+ return false;
+}
+
+bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ // XXX SLOW
+ const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
+ for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
+ if (MB[i/32] & (1u << (i%32)))
+ continue;
+ if (!hasCoverOf(RegisterRef(i, LaneBitmask::getAll())))
+ return false;
+ }
+ return true;
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t,LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ if (!Units.test(P.first))
+ return false;
+ }
+ return true;
+}
+
+RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
+ if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ BitVector PU(PRI.getTRI().getNumRegUnits()); // Preserved units.
+ const uint32_t *MB = PRI.getRegMaskBits(RR.Reg);
+ for (unsigned i = 1, e = PRI.getTRI().getNumRegs(); i != e; ++i) {
+ if (!(MB[i/32] & (1u << (i%32))))
+ continue;
+ for (MCRegUnitIterator U(i, &PRI.getTRI()); U.isValid(); ++U)
+ PU.set(*U);
+ }
+ Units |= PU.flip();
+ return *this;
+ }
+
+ for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
+ std::pair<uint32_t,LaneBitmask> P = *U;
+ if (P.second.none() || (P.second & RR.Mask).any())
+ Units.set(P.first);
+ }
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) {
+ Units |= RG.Units;
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::intersect(RegisterRef RR) {
+ return intersect(RegisterAggr(PRI).insert(RR));
+}
+
+RegisterAggr &RegisterAggr::intersect(const RegisterAggr &RG) {
+ Units &= RG.Units;
+ return *this;
+}
+
+RegisterAggr &RegisterAggr::clear(RegisterRef RR) {
+ return clear(RegisterAggr(PRI).insert(RR));
+}
+
+RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) {
+ Units.reset(RG.Units);
+ return *this;
+}
+
+RegisterRef RegisterAggr::intersectWith(RegisterRef RR) const {
+ RegisterAggr T(PRI);
+ T.insert(RR).intersect(*this);
+ if (T.empty())
+ return RegisterRef();
+ RegisterRef NR = T.makeRegRef();
+ assert(NR);
+ return NR;
+}
+
+RegisterRef RegisterAggr::clearIn(RegisterRef RR) const {
+ return RegisterAggr(PRI).insert(RR).clear(*this).makeRegRef();
+}
+
+RegisterRef RegisterAggr::makeRegRef() const {
+ int U = Units.find_first();
+ if (U < 0)
+ return RegisterRef();
+
+ auto AliasedRegs = [this] (uint32_t Unit, BitVector &Regs) {
+ for (MCRegUnitRootIterator R(Unit, &PRI.getTRI()); R.isValid(); ++R)
+ for (MCSuperRegIterator S(*R, &PRI.getTRI(), true); S.isValid(); ++S)
+ Regs.set(*S);
+ };
+
+ // Find the set of all registers that are aliased to all the units
+ // in this aggregate.
+
+ // Get all the registers aliased to the first unit in the bit vector.
+ BitVector Regs(PRI.getTRI().getNumRegs());
+ AliasedRegs(U, Regs);
+ U = Units.find_next(U);
+
+ // For each other unit, intersect it with the set of all registers
+ // aliased that unit.
+ while (U >= 0) {
+ BitVector AR(PRI.getTRI().getNumRegs());
+ AliasedRegs(U, AR);
+ Regs &= AR;
+ U = Units.find_next(U);
+ }
+
+ // If there is at least one register remaining, pick the first one,
+ // and consolidate the masks of all of its units contained in this
+ // aggregate.
+
+ int F = Regs.find_first();
+ if (F <= 0)
+ return RegisterRef();
+
+ LaneBitmask M;
+ for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) {
+ std::pair<uint32_t,LaneBitmask> P = *I;
+ if (Units.test(P.first))
+ M |= P.second.none() ? LaneBitmask::getAll() : P.second;
+ }
+ return RegisterRef(F, M);
+}
+
+void RegisterAggr::print(raw_ostream &OS) const {
+ OS << '{';
+ for (int U = Units.find_first(); U >= 0; U = Units.find_next(U))
+ OS << ' ' << PrintRegUnit(U, &PRI.getTRI());
+ OS << " }";
+}
+
+RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
+ bool End)
+ : Owner(&RG) {
+ for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) {
+ RegisterRef R = RG.PRI.getRefForUnit(U);
+ Masks[R.Reg] |= R.Mask;
+ }
+ Pos = End ? Masks.end() : Masks.begin();
+ Index = End ? Masks.size() : 0;
+}
+
diff --git a/lib/Target/Hexagon/RDFRegisters.h b/lib/Target/Hexagon/RDFRegisters.h
new file mode 100644
index 000000000000..4b35c85a6b62
--- /dev/null
+++ b/lib/Target/Hexagon/RDFRegisters.h
@@ -0,0 +1,209 @@
+//===--- RDFRegisters.h -----------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <set>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+namespace rdf {
+
+ typedef uint32_t RegisterId;
+
+ // Template class for a map translating uint32_t into arbitrary types.
+ // The map will act like an indexed set: upon insertion of a new object,
+ // it will automatically assign a new index to it. Index of 0 is treated
+ // as invalid and is never allocated.
+ template <typename T, unsigned N = 32>
+ struct IndexedSet {
+ IndexedSet() : Map() { Map.reserve(N); }
+
+ T get(uint32_t Idx) const {
+ // Index Idx corresponds to Map[Idx-1].
+ assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
+ return Map[Idx-1];
+ }
+
+ uint32_t insert(T Val) {
+ // Linear search.
+ auto F = llvm::find(Map, Val);
+ if (F != Map.end())
+ return F - Map.begin() + 1;
+ Map.push_back(Val);
+ return Map.size(); // Return actual_index + 1.
+ }
+
+ uint32_t find(T Val) const {
+ auto F = llvm::find(Map, Val);
+ assert(F != Map.end());
+ return F - Map.begin() + 1;
+ }
+
+ typedef typename std::vector<T>::const_iterator const_iterator;
+ const_iterator begin() const { return Map.begin(); }
+ const_iterator end() const { return Map.end(); }
+
+ private:
+ std::vector<T> Map;
+ };
+
+ struct RegisterRef {
+ RegisterId Reg = 0;
+ LaneBitmask Mask = LaneBitmask::getNone();
+
+ RegisterRef() = default;
+ explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
+ : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+
+ operator bool() const {
+ return Reg != 0 && Mask.any();
+ }
+ bool operator== (const RegisterRef &RR) const {
+ return Reg == RR.Reg && Mask == RR.Mask;
+ }
+ bool operator!= (const RegisterRef &RR) const {
+ return !operator==(RR);
+ }
+ bool operator< (const RegisterRef &RR) const {
+ return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
+ }
+ };
+
+
+ struct PhysicalRegisterInfo {
+ PhysicalRegisterInfo(const TargetRegisterInfo &tri,
+ const MachineFunction &mf);
+
+ static bool isRegMaskId(RegisterId R) {
+ return TargetRegisterInfo::isStackSlot(R);
+ }
+ RegisterId getRegMaskId(const uint32_t *RM) const {
+ return TargetRegisterInfo::index2StackSlot(RegMasks.find(RM));
+ }
+ const uint32_t *getRegMaskBits(RegisterId R) const {
+ return RegMasks.get(TargetRegisterInfo::stackSlot2Index(R));
+ }
+ RegisterRef normalize(RegisterRef RR) const;
+
+ bool alias(RegisterRef RA, RegisterRef RB) const {
+ if (!isRegMaskId(RA.Reg))
+ return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB);
+ return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB);
+ }
+ std::set<RegisterId> getAliasSet(RegisterId Reg) const;
+
+ RegisterRef getRefForUnit(uint32_t U) const {
+ return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
+ }
+
+ const TargetRegisterInfo &getTRI() const { return TRI; }
+
+ private:
+ struct RegInfo {
+ const TargetRegisterClass *RegClass = nullptr;
+ };
+ struct UnitInfo {
+ RegisterId Reg = 0;
+ LaneBitmask Mask;
+ };
+
+ const TargetRegisterInfo &TRI;
+ std::vector<RegInfo> RegInfos;
+ std::vector<UnitInfo> UnitInfos;
+ IndexedSet<const uint32_t*> RegMasks;
+
+ bool aliasRR(RegisterRef RA, RegisterRef RB) const;
+ bool aliasRM(RegisterRef RR, RegisterRef RM) const;
+ bool aliasMM(RegisterRef RM, RegisterRef RN) const;
+ };
+
+
+ struct RegisterAggr {
+ RegisterAggr(const PhysicalRegisterInfo &pri)
+ : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
+ RegisterAggr(const RegisterAggr &RG) = default;
+
+ bool empty() const { return Units.empty(); }
+ bool hasAliasOf(RegisterRef RR) const;
+ bool hasCoverOf(RegisterRef RR) const;
+ static bool isCoverOf(RegisterRef RA, RegisterRef RB,
+ const PhysicalRegisterInfo &PRI) {
+ return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
+ }
+
+ RegisterAggr &insert(RegisterRef RR);
+ RegisterAggr &insert(const RegisterAggr &RG);
+ RegisterAggr &intersect(RegisterRef RR);
+ RegisterAggr &intersect(const RegisterAggr &RG);
+ RegisterAggr &clear(RegisterRef RR);
+ RegisterAggr &clear(const RegisterAggr &RG);
+
+ RegisterRef intersectWith(RegisterRef RR) const;
+ RegisterRef clearIn(RegisterRef RR) const;
+ RegisterRef makeRegRef() const;
+
+ void print(raw_ostream &OS) const;
+
+ struct rr_iterator {
+ typedef std::map<RegisterId,LaneBitmask> MapType;
+ private:
+ MapType Masks;
+ MapType::iterator Pos;
+ unsigned Index;
+ const RegisterAggr *Owner;
+ public:
+ rr_iterator(const RegisterAggr &RG, bool End);
+ RegisterRef operator*() const {
+ return RegisterRef(Pos->first, Pos->second);
+ }
+ rr_iterator &operator++() {
+ ++Pos;
+ ++Index;
+ return *this;
+ }
+ bool operator==(const rr_iterator &I) const {
+ assert(Owner == I.Owner);
+ return Index == I.Index;
+ }
+ bool operator!=(const rr_iterator &I) const {
+ return !(*this == I);
+ }
+ };
+
+ rr_iterator rr_begin() const {
+ return rr_iterator(*this, false);
+ }
+ rr_iterator rr_end() const {
+ return rr_iterator(*this, true);
+ }
+
+ private:
+ BitVector Units;
+ const PhysicalRegisterInfo &PRI;
+ };
+
+
+ // Optionally print the lane mask, if it is not ~0.
+ struct PrintLaneMaskOpt {
+ PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
+ LaneBitmask Mask;
+ };
+ raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
+
+} // namespace rdf
+} // namespace llvm
+
+#endif
+
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 57ead973b56e..1d6c07974beb 100644
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -1096,7 +1096,7 @@ StringRef LanaiAsmParser::splitMnemonic(StringRef Name, SMLoc NameLoc,
return Mnemonic;
}
-bool IsMemoryAssignmentError(const OperandVector &Operands) {
+static bool IsMemoryAssignmentError(const OperandVector &Operands) {
// Detects if a memory operation has an erroneous base register modification.
// Memory operations are detected by matching the types of operands.
//
diff --git a/lib/Target/Lanai/InstPrinter/CMakeLists.txt b/lib/Target/Lanai/InstPrinter/CMakeLists.txt
index 6badb1c98a6d..7f76b895e6ec 100644
--- a/lib/Target/Lanai/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Lanai/InstPrinter/CMakeLists.txt
@@ -1,3 +1,3 @@
-add_llvm_library(LLVMLanaiInstPrinter
+add_llvm_library(LLVMLanaiAsmPrinter
LanaiInstPrinter.cpp
)
diff --git a/lib/Target/Lanai/InstPrinter/LLVMBuild.txt b/lib/Target/Lanai/InstPrinter/LLVMBuild.txt
index eed9a587d14a..8d9768c6017b 100644
--- a/lib/Target/Lanai/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/Lanai/InstPrinter/LLVMBuild.txt
@@ -17,7 +17,7 @@
[component_0]
type = Library
-name = LanaiInstPrinter
+name = LanaiAsmPrinter
parent = Lanai
required_libraries = MC Support
add_to_library_groups = Lanai
diff --git a/lib/Target/Lanai/LLVMBuild.txt b/lib/Target/Lanai/LLVMBuild.txt
index 798fc351bd70..cb91ffb61a98 100644
--- a/lib/Target/Lanai/LLVMBuild.txt
+++ b/lib/Target/Lanai/LLVMBuild.txt
@@ -36,7 +36,7 @@ required_libraries =
LanaiAsmParser
LanaiDesc
LanaiInfo
- LanaiInstPrinter
+ LanaiAsmPrinter
MC
SelectionDAG
Support
diff --git a/lib/Target/Lanai/LanaiInstrInfo.cpp b/lib/Target/Lanai/LanaiInstrInfo.cpp
index fcd5da876b15..a7c9a7a7f280 100644
--- a/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -518,7 +518,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI,
const MCInstrDesc &DefDesc = DefMI->getDesc();
for (unsigned i = 1, e = DefDesc.getNumOperands();
i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
- NewMI.addOperand(DefMI->getOperand(i));
+ NewMI.add(DefMI->getOperand(i));
unsigned CondCode = MI.getOperand(3).getImm();
if (Invert)
@@ -531,7 +531,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI,
// register operand tied to the first def. The tie makes the register
// allocator ensure the FalseReg is allocated the same register as operand 0.
FalseReg.setImplicit();
- NewMI.addOperand(FalseReg);
+ NewMI.add(FalseReg);
NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
// Update SeenMIs set: register newly created MI and erase removed DefMI.
diff --git a/lib/Target/Lanai/LanaiMCInstLower.cpp b/lib/Target/Lanai/LanaiMCInstLower.cpp
index 39c633578d43..90ede6566acf 100644
--- a/lib/Target/Lanai/LanaiMCInstLower.cpp
+++ b/lib/Target/Lanai/LanaiMCInstLower.cpp
@@ -130,7 +130,7 @@ void LanaiMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
break;
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
}
diff --git a/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt b/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
index 8070dbabb1a6..05e52ccc18d7 100644
--- a/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/Lanai/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = LanaiDesc
parent = Lanai
-required_libraries = LanaiInfo LanaiInstPrinter MC MCDisassembler Support
+required_libraries = LanaiInfo LanaiAsmPrinter MC MCDisassembler Support
add_to_library_groups = Lanai
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index a04fe8112fb9..0ef1401ef531 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -50,7 +50,7 @@ public:
: MCAsmBackend(), OSType(OST) {}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -90,7 +90,7 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void LanaiAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned /*DataSize*/, uint64_t Value,
- bool /*IsPCRel*/) const {
+ bool /*IsPCRel*/, MCContext & /*Ctx*/) const {
MCFixupKind Kind = Fixup.getKind();
Value = adjustFixupValue(static_cast<unsigned>(Kind), Value);
diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index f5b5335bb989..10254677a5ad 100644
--- a/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -89,7 +89,7 @@ public:
} // end anonymous namespace
-Lanai::Fixups FixupKind(const MCExpr *Expr) {
+static Lanai::Fixups FixupKind(const MCExpr *Expr) {
if (isa<MCSymbolRefExpr>(Expr))
return Lanai::FIXUP_LANAI_21;
if (const LanaiMCExpr *McExpr = dyn_cast<LanaiMCExpr>(Expr)) {
@@ -134,8 +134,8 @@ unsigned LanaiMCCodeEmitter::getMachineOpValue(
}
// Helper function to adjust P and Q bits on load and store instructions.
-unsigned adjustPqBits(const MCInst &Inst, unsigned Value, unsigned PBitShift,
- unsigned QBitShift) {
+static unsigned adjustPqBits(const MCInst &Inst, unsigned Value,
+ unsigned PBitShift, unsigned QBitShift) {
const MCOperand AluOp = Inst.getOperand(3);
unsigned AluCode = AluOp.getImm();
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 5fd6b6305f68..424b5ae418f7 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -194,8 +194,8 @@ bool MSP430BSel::expandBranches(OffsetVector &BlockOffsets) {
// Jump over the long branch on the opposite condition
TII->reverseBranchCondition(Cond);
MI = BuildMI(*MBB, MI, dl, TII->get(MSP430::JCC))
- .addMBB(NextMBB)
- .addOperand(Cond[0]);
+ .addMBB(NextMBB)
+ .add(Cond[0]);
InstrSizeDiff += TII->getInstSizeInBytes(*MI);
++MI;
}
diff --git a/lib/Target/MSP430/MSP430CallingConv.td b/lib/Target/MSP430/MSP430CallingConv.td
index b38f5781c84a..0434f8abfbf4 100644
--- a/lib/Target/MSP430/MSP430CallingConv.td
+++ b/lib/Target/MSP430/MSP430CallingConv.td
@@ -13,11 +13,11 @@
// MSP430 Return Value Calling Convention
//===----------------------------------------------------------------------===//
def RetCC_MSP430 : CallingConv<[
- // i8 are returned in registers R15B, R14B, R13B, R12B
- CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+ // i8 are returned in registers R12B, R13B, R14B, R15B
+ CCIfType<[i8], CCAssignToReg<[R12B, R13B, R14B, R15B]>>,
- // i16 are returned in registers R15, R14, R13, R12
- CCIfType<[i16], CCAssignToReg<[R15, R14, R13, R12]>>
+ // i16 are returned in registers R12, R13, R14, R15
+ CCIfType<[i16], CCAssignToReg<[R12, R13, R14, R15]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 6e481b68e038..cd58eda5d924 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -61,7 +61,8 @@ namespace {
return GV != nullptr || CP != nullptr || ES != nullptr || JT != -1;
}
- void dump() {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() {
errs() << "MSP430ISelAddressMode " << this << '\n';
if (BaseType == RegBase && Base.Reg.getNode() != nullptr) {
errs() << "Base.Reg ";
@@ -83,6 +84,7 @@ namespace {
} else if (JT != -1)
errs() << " JT" << JT << " Align" << Align << '\n';
}
+#endif
};
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 73346b9ce41d..40b1dd3cc2eb 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -245,13 +245,20 @@ MSP430TargetLowering::getRegForInlineAsmConstraint(
template<typename ArgT>
static void ParseFunctionArgs(const SmallVectorImpl<ArgT> &Args,
SmallVectorImpl<unsigned> &Out) {
- unsigned CurrentArgIndex = ~0U;
- for (unsigned i = 0, e = Args.size(); i != e; i++) {
- if (CurrentArgIndex == Args[i].OrigArgIndex) {
- Out.back()++;
+ unsigned CurrentArgIndex;
+
+ if (Args.empty())
+ return;
+
+ CurrentArgIndex = Args[0].OrigArgIndex;
+ Out.push_back(0);
+
+ for (auto &Arg : Args) {
+ if (CurrentArgIndex == Arg.OrigArgIndex) {
+ Out.back() += 1;
} else {
Out.push_back(1);
- CurrentArgIndex++;
+ CurrentArgIndex = Arg.OrigArgIndex;
}
}
}
@@ -275,7 +282,7 @@ static void AnalyzeArguments(CCState &State,
SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<ArgT> &Args) {
static const MCPhysReg RegList[] = {
- MSP430::R15, MSP430::R14, MSP430::R13, MSP430::R12
+ MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
};
static const unsigned NbRegs = array_lengthof(RegList);
@@ -288,7 +295,7 @@ static void AnalyzeArguments(CCState &State,
ParseFunctionArgs(Args, ArgsParts);
unsigned RegsLeft = NbRegs;
- bool UseStack = false;
+ bool UsedStack = false;
unsigned ValNo = 0;
for (unsigned i = 0, e = ArgsParts.size(); i != e; i++) {
@@ -316,20 +323,22 @@ static void AnalyzeArguments(CCState &State,
unsigned Parts = ArgsParts[i];
- if (!UseStack && Parts <= RegsLeft) {
- unsigned FirstVal = ValNo;
+ if (!UsedStack && Parts == 2 && RegsLeft == 1) {
+ // Special case for 32-bit register split, see EABI section 3.3.3
+ unsigned Reg = State.AllocateReg(RegList);
+ State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
+ RegsLeft -= 1;
+
+ UsedStack = true;
+ CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State);
+ } else if (Parts <= RegsLeft) {
for (unsigned j = 0; j < Parts; j++) {
unsigned Reg = State.AllocateReg(RegList);
State.addLoc(CCValAssign::getReg(ValNo++, ArgVT, Reg, LocVT, LocInfo));
RegsLeft--;
}
-
- // Reverse the order of the pieces to agree with the "big endian" format
- // required in the calling convention ABI.
- SmallVectorImpl<CCValAssign>::iterator B = ArgLocs.begin() + FirstVal;
- std::reverse(B, B + Parts);
} else {
- UseStack = true;
+ UsedStack = true;
for (unsigned j = 0; j < Parts; j++)
CC_MSP430_AssignStack(ValNo++, ArgVT, LocVT, LocInfo, ArgFlags, State);
}
@@ -351,10 +360,6 @@ static void AnalyzeReturnValues(CCState &State,
SmallVectorImpl<CCValAssign> &RVLocs,
const SmallVectorImpl<ArgT> &Args) {
AnalyzeRetResult(State, Args);
-
- // Reverse splitted return values to get the "big endian" format required
- // to agree with the calling convention ABI.
- std::reverse(RVLocs.begin(), RVLocs.end());
}
SDValue MSP430TargetLowering::LowerFormalArguments(
@@ -496,9 +501,33 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
}
}
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ if (Ins[i].Flags.isSRet()) {
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(
+ getRegClassFor(MVT::i16));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ }
+ }
+
return Chain;
}
+bool
+MSP430TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_MSP430);
+}
+
SDValue
MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -506,6 +535,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
// CCValAssign - represent the assignment of the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
@@ -537,6 +568,22 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ if (MF.getFunction()->hasStructRetAttr()) {
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+
+ if (!Reg)
+ llvm_unreachable("sret virtual register not created in entry block");
+
+ SDValue Val =
+ DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy(DAG.getDataLayout()));
+ unsigned R12 = MSP430::R12;
+
+ Chain = DAG.getCopyToReg(Chain, dl, R12, Val, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(R12, getPointerTy(DAG.getDataLayout())));
+ }
+
unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
@@ -1219,7 +1266,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr &MI,
BB->end());
RemBB->transferSuccessorsAndUpdatePHIs(BB);
- // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+ // Add edges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
BB->addSuccessor(LoopBB);
BB->addSuccessor(RemBB);
LoopBB->addSuccessor(RemBB);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 8864807e999e..3a729623c99a 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -158,6 +158,12 @@ namespace llvm {
LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 47b0e270c5b3..e7716382b222 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -119,7 +119,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 2d937318c7e5..fcaa8a1d6c72 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -33,15 +33,23 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
public:
MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
explicit MSP430MachineFunctionInfo(MachineFunction &MF)
- : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
+ : CalleeSavedFrameSize(0), ReturnAddrIndex(0), SRetReturnReg(0) {}
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
int getRAIndex() const { return ReturnAddrIndex; }
void setRAIndex(int Index) { ReturnAddrIndex = Index; }
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d054578deb67..d407774574be 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -7,47 +7,67 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsABIFlagsSection.h"
#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "MipsRegisterInfo.h"
-#include "MipsTargetObjectFile.h"
#include "MipsTargetStreamer.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <memory>
+#include <string>
+#include <utility>
using namespace llvm;
#define DEBUG_TYPE "mips-asm-parser"
namespace llvm {
+
class MCInstrInfo;
-}
+
+} // end namespace llvm
namespace {
+
class MipsAssemblerOptions {
public:
- MipsAssemblerOptions(const FeatureBitset &Features_) :
- ATReg(1), Reorder(true), Macro(true), Features(Features_) {}
+ MipsAssemblerOptions(const FeatureBitset &Features_) : Features(Features_) {}
MipsAssemblerOptions(const MipsAssemblerOptions *Opts) {
ATReg = Opts->getATRegIndex();
@@ -84,12 +104,13 @@ public:
static const FeatureBitset AllArchRelatedMask;
private:
- unsigned ATReg;
- bool Reorder;
- bool Macro;
+ unsigned ATReg = 1;
+ bool Reorder = true;
+ bool Macro = true;
FeatureBitset Features;
};
-}
+
+} // end anonymous namespace
const FeatureBitset MipsAssemblerOptions::AllArchRelatedMask = {
Mips::FeatureMips1, Mips::FeatureMips2, Mips::FeatureMips3,
@@ -103,6 +124,7 @@ const FeatureBitset MipsAssemblerOptions::AllArchRelatedMask = {
};
namespace {
+
class MipsAsmParser : public MCTargetAsmParser {
MipsTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
@@ -147,6 +169,8 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseBracketSuffix(StringRef Name, OperandVector &Operands);
+ bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);
+
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -252,6 +276,18 @@ class MipsAsmParser : public MCTargetAsmParser {
bool expandAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
+ bool expandMulImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandMulO(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandMulOU(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
+ bool expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI);
+
bool expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI, bool IsLoad);
@@ -339,6 +375,8 @@ class MipsAsmParser : public MCTargetAsmParser {
/// This should be used in pseudo-instruction expansions which need AT.
unsigned getATReg(SMLoc Loc);
+ bool canUseATReg();
+
bool processInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI);
@@ -466,9 +504,11 @@ public:
bool isGP64bit() const {
return getSTI().getFeatureBits()[Mips::FeatureGP64Bit];
}
+
bool isFP64bit() const {
return getSTI().getFeatureBits()[Mips::FeatureFP64Bit];
}
+
const MipsABIInfo &getABI() const { return ABI; }
bool isABI_N32() const { return ABI.IsN32(); }
bool isABI_N64() const { return ABI.IsN64(); }
@@ -484,48 +524,63 @@ public:
bool inMicroMipsMode() const {
return getSTI().getFeatureBits()[Mips::FeatureMicroMips];
}
+
bool hasMips1() const {
return getSTI().getFeatureBits()[Mips::FeatureMips1];
}
+
bool hasMips2() const {
return getSTI().getFeatureBits()[Mips::FeatureMips2];
}
+
bool hasMips3() const {
return getSTI().getFeatureBits()[Mips::FeatureMips3];
}
+
bool hasMips4() const {
return getSTI().getFeatureBits()[Mips::FeatureMips4];
}
+
bool hasMips5() const {
return getSTI().getFeatureBits()[Mips::FeatureMips5];
}
+
bool hasMips32() const {
return getSTI().getFeatureBits()[Mips::FeatureMips32];
}
+
bool hasMips64() const {
return getSTI().getFeatureBits()[Mips::FeatureMips64];
}
+
bool hasMips32r2() const {
return getSTI().getFeatureBits()[Mips::FeatureMips32r2];
}
+
bool hasMips64r2() const {
return getSTI().getFeatureBits()[Mips::FeatureMips64r2];
}
+
bool hasMips32r3() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips32r3]);
}
+
bool hasMips64r3() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips64r3]);
}
+
bool hasMips32r5() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips32r5]);
}
+
bool hasMips64r5() const {
return (getSTI().getFeatureBits()[Mips::FeatureMips64r5]);
}
+
bool hasMips32r6() const {
return getSTI().getFeatureBits()[Mips::FeatureMips32r6];
}
+
bool hasMips64r6() const {
return getSTI().getFeatureBits()[Mips::FeatureMips64r6];
}
@@ -533,15 +588,19 @@ public:
bool hasDSP() const {
return getSTI().getFeatureBits()[Mips::FeatureDSP];
}
+
bool hasDSPR2() const {
return getSTI().getFeatureBits()[Mips::FeatureDSPR2];
}
+
bool hasDSPR3() const {
return getSTI().getFeatureBits()[Mips::FeatureDSPR3];
}
+
bool hasMSA() const {
return getSTI().getFeatureBits()[Mips::FeatureMSA];
}
+
bool hasCnMips() const {
return (getSTI().getFeatureBits()[Mips::FeatureCnMips]);
}
@@ -627,9 +686,6 @@ public:
}
}
};
-}
-
-namespace {
/// MipsOperand - Instances of this class represent a parsed Mips machine
/// instruction.
@@ -671,6 +727,22 @@ public:
MipsOperand(KindTy K, MipsAsmParser &Parser)
: MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
+ ~MipsOperand() override {
+ switch (Kind) {
+ case k_Immediate:
+ break;
+ case k_Memory:
+ delete Mem.Base;
+ break;
+ case k_RegList:
+ delete RegList.List;
+ case k_RegisterIndex:
+ case k_Token:
+ case k_RegPair:
+ break;
+ }
+ }
+
private:
/// For diagnostics, and checking the assembler temporary
MipsAsmParser &AsmParser;
@@ -716,7 +788,7 @@ private:
const MCRegisterInfo *RegInfo,
SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_RegisterIndex, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_RegisterIndex, Parser);
Op->RegIdx.Index = Index;
Op->RegIdx.RegInfo = RegInfo;
Op->RegIdx.Kind = RegKind;
@@ -896,6 +968,16 @@ public:
/// Render the operand to an MCInst as a GPR32
/// Asserts if the wrong number of operands are requested, or the operand
/// is not a k_RegisterIndex compatible with RegKind_GPR
+ void addGPR32ZeroAsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
+ }
+
+ void addGPR32NonZeroAsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
+ }
+
void addGPR32AsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getGPR32Reg()));
@@ -1104,45 +1186,58 @@ public:
// $0/$zero here so that MCK_ZERO works correctly.
return isGPRAsmReg() && RegIdx.Index == 0;
}
+
bool isRegIdx() const { return Kind == k_RegisterIndex; }
bool isImm() const override { return Kind == k_Immediate; }
+
bool isConstantImm() const {
int64_t Res;
return isImm() && getImm()->evaluateAsAbsolute(Res);
}
+
bool isConstantImmz() const {
return isConstantImm() && getConstantImm() == 0;
}
+
template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
}
+
template <unsigned Bits> bool isSImm() const {
return isConstantImm() ? isInt<Bits>(getConstantImm()) : isImm();
}
+
template <unsigned Bits> bool isUImm() const {
return isConstantImm() ? isUInt<Bits>(getConstantImm()) : isImm();
}
+
template <unsigned Bits> bool isAnyImm() const {
return isConstantImm() ? (isInt<Bits>(getConstantImm()) ||
isUInt<Bits>(getConstantImm()))
: isImm();
}
+
template <unsigned Bits, int Offset = 0> bool isConstantSImm() const {
return isConstantImm() && isInt<Bits>(getConstantImm() - Offset);
}
+
template <unsigned Bottom, unsigned Top> bool isConstantUImmRange() const {
return isConstantImm() && getConstantImm() >= Bottom &&
getConstantImm() <= Top;
}
+
bool isToken() const override {
// Note: It's not possible to pretend that other operand kinds are tokens.
// The matcher emitter checks tokens first.
return Kind == k_Token;
}
+
bool isMem() const override { return Kind == k_Memory; }
+
bool isConstantMemOff() const {
return isMem() && isa<MCConstantExpr>(getMemOff());
}
+
// Allow relocation operators.
// FIXME: This predicate and others need to look through binary expressions
// and determine whether a Value is a constant or not.
@@ -1160,28 +1255,34 @@ public:
bool IsReloc = getMemOff()->evaluateAsRelocatable(Res, nullptr, nullptr);
return IsReloc && isShiftedInt<Bits, ShiftAmount>(Res.getConstant());
}
+
bool isMemWithGRPMM16Base() const {
return isMem() && getMemBase()->isMM16AsmReg();
}
+
template <unsigned Bits> bool isMemWithUimmOffsetSP() const {
return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
&& getMemBase()->isRegIdx() && (getMemBase()->getGPR32Reg() == Mips::SP);
}
+
template <unsigned Bits> bool isMemWithUimmWordAlignedOffsetSP() const {
return isMem() && isConstantMemOff() && isUInt<Bits>(getConstantMemOff())
&& (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
&& (getMemBase()->getGPR32Reg() == Mips::SP);
}
+
template <unsigned Bits> bool isMemWithSimmWordAlignedOffsetGP() const {
return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff())
&& (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
&& (getMemBase()->getGPR32Reg() == Mips::GP);
}
+
template <unsigned Bits, unsigned ShiftLeftAmount>
bool isScaledUImm() const {
return isConstantImm() &&
isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
}
+
template <unsigned Bits, unsigned ShiftLeftAmount>
bool isScaledSImm() const {
if (isConstantImm() && isShiftedInt<Bits, ShiftLeftAmount>(getConstantImm()))
@@ -1193,6 +1294,7 @@ public:
bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr);
return Success && isShiftedInt<Bits, ShiftLeftAmount>(Res.getConstant());
}
+
bool isRegList16() const {
if (!isRegList())
return false;
@@ -1217,14 +1319,18 @@ public:
return true;
}
+
bool isInvNum() const { return Kind == k_Immediate; }
+
bool isLSAImm() const {
if (!isConstantImm())
return false;
int64_t Val = getConstantImm();
return 1 <= Val && Val <= 4;
}
+
bool isRegList() const { return Kind == k_RegList; }
+
bool isMovePRegPair() const {
if (Kind != k_RegList || RegList.List->size() != 2)
return false;
@@ -1257,6 +1363,7 @@ public:
assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
+
bool isRegPair() const {
return Kind == k_RegPair && RegIdx.Index <= 30;
}
@@ -1310,7 +1417,7 @@ public:
static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_Token, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_Token, Parser);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -1385,7 +1492,7 @@ public:
static std::unique_ptr<MipsOperand>
CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_Immediate, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_Immediate, Parser);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -1395,7 +1502,7 @@ public:
static std::unique_ptr<MipsOperand>
CreateMem(std::unique_ptr<MipsOperand> Base, const MCExpr *Off, SMLoc S,
SMLoc E, MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_Memory, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_Memory, Parser);
Op->Mem.Base = Base.release();
Op->Mem.Off = Off;
Op->StartLoc = S;
@@ -1406,9 +1513,9 @@ public:
static std::unique_ptr<MipsOperand>
CreateRegList(SmallVectorImpl<unsigned> &Regs, SMLoc StartLoc, SMLoc EndLoc,
MipsAsmParser &Parser) {
- assert (Regs.size() > 0 && "Empty list not allowed");
+ assert(Regs.size() > 0 && "Empty list not allowed");
- auto Op = make_unique<MipsOperand>(k_RegList, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_RegList, Parser);
Op->RegList.List = new SmallVector<unsigned, 10>(Regs.begin(), Regs.end());
Op->StartLoc = StartLoc;
Op->EndLoc = EndLoc;
@@ -1418,7 +1525,7 @@ public:
static std::unique_ptr<MipsOperand> CreateRegPair(const MipsOperand &MOP,
SMLoc S, SMLoc E,
MipsAsmParser &Parser) {
- auto Op = make_unique<MipsOperand>(k_RegPair, Parser);
+ auto Op = llvm::make_unique<MipsOperand>(k_RegPair, Parser);
Op->RegIdx.Index = MOP.RegIdx.Index;
Op->RegIdx.RegInfo = MOP.RegIdx.RegInfo;
Op->RegIdx.Kind = MOP.RegIdx.Kind;
@@ -1427,14 +1534,25 @@ public:
return Op;
}
+ bool isGPRZeroAsmReg() const {
+ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index == 0;
+ }
+
+ bool isGPRNonZeroAsmReg() const {
+ return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index > 0 &&
+ RegIdx.Index <= 31;
+ }
+
bool isGPRAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_GPR && RegIdx.Index <= 31;
}
+
bool isMM16AsmReg() const {
if (!(isRegIdx() && RegIdx.Kind))
return false;
return ((RegIdx.Index >= 2 && RegIdx.Index <= 7)
|| RegIdx.Index == 16 || RegIdx.Index == 17);
+
}
bool isMM16AsmRegZero() const {
if (!(isRegIdx() && RegIdx.Kind))
@@ -1443,42 +1561,53 @@ public:
(RegIdx.Index >= 2 && RegIdx.Index <= 7) ||
RegIdx.Index == 17);
}
+
bool isMM16AsmRegMoveP() const {
if (!(isRegIdx() && RegIdx.Kind))
return false;
return (RegIdx.Index == 0 || (RegIdx.Index >= 2 && RegIdx.Index <= 3) ||
(RegIdx.Index >= 16 && RegIdx.Index <= 20));
}
+
bool isFGRAsmReg() const {
// AFGR64 is $0-$15 but we handle this in getAFGR64()
return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
}
+
bool isHWRegsAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_HWRegs && RegIdx.Index <= 31;
}
+
bool isCCRAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_CCR && RegIdx.Index <= 31;
}
+
bool isFCCAsmReg() const {
if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
return false;
return RegIdx.Index <= 7;
}
+
bool isACCAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3;
}
+
bool isCOP0AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP0 && RegIdx.Index <= 31;
}
+
bool isCOP2AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP2 && RegIdx.Index <= 31;
}
+
bool isCOP3AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP3 && RegIdx.Index <= 31;
}
+
bool isMSA128AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_MSA128 && RegIdx.Index <= 31;
}
+
bool isMSACtrlAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_MSACtrl && RegIdx.Index <= 7;
}
@@ -1488,22 +1617,6 @@ public:
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const override { return EndLoc; }
- virtual ~MipsOperand() {
- switch (Kind) {
- case k_Immediate:
- break;
- case k_Memory:
- delete Mem.Base;
- break;
- case k_RegList:
- delete RegList.List;
- case k_RegisterIndex:
- case k_Token:
- case k_RegPair:
- break;
- }
- }
-
void print(raw_ostream &OS) const override {
switch (Kind) {
case k_Immediate:
@@ -1553,11 +1666,15 @@ public:
}
}
}; // class MipsOperand
-} // namespace
+
+} // end anonymous namespace
namespace llvm {
+
extern const MCInstrDesc MipsInsts[];
-}
+
+} // end namespace llvm
+
static const MCInstrDesc &getInstDesc(unsigned Opcode) {
return MipsInsts[Opcode];
}
@@ -1785,6 +1902,61 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
+ // Warn on division by zero. We're checking here as all instructions get
+ // processed here, not just the macros that need expansion.
+ //
+ // The MIPS backend models most of the divison instructions and macros as
+ // three operand instructions. The pre-R6 divide instructions however have
+ // two operands and explicitly define HI/LO as part of the instruction,
+ // not in the operands.
+ unsigned FirstOp = 1;
+ unsigned SecondOp = 2;
+ switch (Inst.getOpcode()) {
+ default:
+ break;
+ case Mips::SDivIMacro:
+ case Mips::UDivIMacro:
+ case Mips::DSDivIMacro:
+ case Mips::DUDivIMacro:
+ if (Inst.getOperand(2).getImm() == 0) {
+ if (Inst.getOperand(1).getReg() == Mips::ZERO ||
+ Inst.getOperand(1).getReg() == Mips::ZERO_64)
+ Warning(IDLoc, "dividing zero by zero");
+ else
+ Warning(IDLoc, "division by zero");
+ }
+ break;
+ case Mips::DSDIV:
+ case Mips::SDIV:
+ case Mips::UDIV:
+ case Mips::DUDIV:
+ case Mips::UDIV_MM:
+ case Mips::SDIV_MM:
+ FirstOp = 0;
+ SecondOp = 1;
+ case Mips::SDivMacro:
+ case Mips::DSDivMacro:
+ case Mips::UDivMacro:
+ case Mips::DUDivMacro:
+ case Mips::DIV:
+ case Mips::DIVU:
+ case Mips::DDIV:
+ case Mips::DDIVU:
+ case Mips::DIVU_MMR6:
+ case Mips::DDIVU_MM64R6:
+ case Mips::DIV_MMR6:
+ case Mips::DDIV_MM64R6:
+ if (Inst.getOperand(SecondOp).getReg() == Mips::ZERO ||
+ Inst.getOperand(SecondOp).getReg() == Mips::ZERO_64) {
+ if (Inst.getOperand(FirstOp).getReg() == Mips::ZERO ||
+ Inst.getOperand(FirstOp).getReg() == Mips::ZERO_64)
+ Warning(IDLoc, "dividing zero by zero");
+ else
+ Warning(IDLoc, "division by zero");
+ }
+ break;
+ }
+
// For PIC code convert unconditional jump to unconditional branch.
if ((Inst.getOpcode() == Mips::J || Inst.getOpcode() == Mips::J_MM) &&
inPicMode()) {
@@ -2135,6 +2307,8 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return expandJalWithRegs(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::BneImm:
case Mips::BeqImm:
+ case Mips::BEQLImmMacro:
+ case Mips::BNELImmMacro:
return expandBranchImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::BLT:
case Mips::BLE:
@@ -2170,15 +2344,19 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::BGTULImmMacro:
return expandCondBranches(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::SDivMacro:
+ case Mips::SDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, false, true) ? MER_Fail
: MER_Success;
case Mips::DSDivMacro:
+ case Mips::DSDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, true, true) ? MER_Fail
: MER_Success;
case Mips::UDivMacro:
+ case Mips::UDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, false, false) ? MER_Fail
: MER_Success;
case Mips::DUDivMacro:
+ case Mips::DUDivIMacro:
return expandDiv(Inst, IDLoc, Out, STI, true, false) ? MER_Fail
: MER_Success;
case Mips::PseudoTRUNC_W_S:
@@ -2200,11 +2378,24 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::Usw:
return expandUxw(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::NORImm:
+ case Mips::NORImm64:
+ return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SLTImm64:
+ if (isInt<16>(Inst.getOperand(2).getImm())) {
+ Inst.setOpcode(Mips::SLTi64);
+ return MER_NotAMacro;
+ }
+ return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::SLTUImm64:
+ if (isInt<16>(Inst.getOperand(2).getImm())) {
+ Inst.setOpcode(Mips::SLTiu64);
+ return MER_NotAMacro;
+ }
return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
- case Mips::ADDi:
- case Mips::ADDiu:
- case Mips::SLTi:
- case Mips::SLTiu:
+ case Mips::ADDi: case Mips::ADDi_MM:
+ case Mips::ADDiu: case Mips::ADDiu_MM:
+ case Mips::SLTi: case Mips::SLTi_MM:
+ case Mips::SLTiu: case Mips::SLTiu_MM:
if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
int64_t ImmValue = Inst.getOperand(2).getImm();
@@ -2214,9 +2405,9 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
: MER_Success;
}
return MER_NotAMacro;
- case Mips::ANDi:
- case Mips::ORi:
- case Mips::XORi:
+ case Mips::ANDi: case Mips::ANDi_MM: case Mips::ANDi64:
+ case Mips::ORi: case Mips::ORi_MM: case Mips::ORi64:
+ case Mips::XORi: case Mips::XORi_MM: case Mips::XORi64:
if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
int64_t ImmValue = Inst.getOperand(2).getImm();
@@ -2240,6 +2431,17 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return expandDRotationImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::ABSMacro:
return expandAbs(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::MULImmMacro:
+ case Mips::DMULImmMacro:
+ return expandMulImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::MULOMacro:
+ case Mips::DMULOMacro:
+ return expandMulO(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::MULOUMacro:
+ case Mips::DMULOUMacro:
+ return expandMulOU(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+ case Mips::DMULMacro:
+ return expandDMULMacro(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
case Mips::LDMacro:
case Mips::SDMacro:
return expandLoadStoreDMacro(Inst, IDLoc, Out, STI,
@@ -2392,7 +2594,6 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
uint16_t Bits15To0 = ImmValue & 0xffff;
-
if (!Is32BitImm && !isInt<32>(ImmValue)) {
// Traditional behaviour seems to special case this particular value. It's
// not clear why other masks are handled differently.
@@ -2618,20 +2819,24 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
// This is the 64-bit symbol address expansion.
if (ABI.ArePtrs64bit() && isGP64bit()) {
- // We always need AT for the 64-bit expansion.
- // If it is not available we exit.
- unsigned ATReg = getATReg(IDLoc);
- if (!ATReg)
- return true;
+ // We need AT for the 64-bit expansion in the cases where the optional
+ // source register is the destination register and for the superscalar
+ // scheduled form.
+ //
+ // If it is not available we exit if the destination is the same as the
+ // source register.
const MipsMCExpr *HighestExpr =
MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, SymExpr, getContext());
const MipsMCExpr *HigherExpr =
MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, SymExpr, getContext());
- if (UseSrcReg &&
- getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg,
- SrcReg)) {
+ bool RdRegIsRsReg =
+ getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, SrcReg);
+
+ if (canUseATReg() && UseSrcReg && RdRegIsRsReg) {
+ unsigned ATReg = getATReg(IDLoc);
+
// If $rs is the same as $rd:
// (d)la $rd, sym($rd) => lui $at, %highest(sym)
// daddiu $at, $at, %higher(sym)
@@ -2653,29 +2858,65 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
TOut.emitRRR(Mips::DADDu, DstReg, ATReg, SrcReg, IDLoc, STI);
return false;
- }
+ } else if (canUseATReg() && !RdRegIsRsReg) {
+ unsigned ATReg = getATReg(IDLoc);
- // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
- // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
- // lui $at, %hi(sym)
- // daddiu $rd, $rd, %higher(sym)
- // daddiu $at, $at, %lo(sym)
- // dsll32 $rd, $rd, 0
- // daddu $rd, $rd, $at
- // (daddu $rd, $rd, $rs)
- TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
- STI);
- TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI);
- TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
- MCOperand::createExpr(HigherExpr), IDLoc, STI);
- TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr),
- IDLoc, STI);
- TOut.emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, STI);
- TOut.emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, STI);
- if (UseSrcReg)
- TOut.emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, STI);
+ // If the $rs is different from $rd or if $rs isn't specified and we
+ // have $at available:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
+ // lui $at, %hi(sym)
+ // daddiu $rd, $rd, %higher(sym)
+ // daddiu $at, $at, %lo(sym)
+ // dsll32 $rd, $rd, 0
+ // daddu $rd, $rd, $at
+ // (daddu $rd, $rd, $rs)
+ //
+ // Which is preferred for superscalar issue.
+ TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ STI);
+ TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(HigherExpr), IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr),
+ IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, STI);
+ TOut.emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, STI);
+ if (UseSrcReg)
+ TOut.emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, STI);
- return false;
+ return false;
+ } else if (!canUseATReg() && !RdRegIsRsReg) {
+ // Otherwise, synthesize the address in the destination register
+ // serially:
+ // (d)la $rd, sym/sym($rs) => lui $rd, %highest(sym)
+ // daddiu $rd, $rd, %higher(sym)
+ // dsll $rd, $rd, 16
+ // daddiu $rd, $rd, %hi(sym)
+ // dsll $rd, $rd, 16
+ // daddiu $rd, $rd, %lo(sym)
+ TOut.emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
+ STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(HigherExpr), IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, DstReg, DstReg, 16, IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(HiExpr), IDLoc, STI);
+ TOut.emitRRI(Mips::DSLL, DstReg, DstReg, 16, IDLoc, STI);
+ TOut.emitRRX(Mips::DADDiu, DstReg, DstReg,
+ MCOperand::createExpr(LoExpr), IDLoc, STI);
+ if (UseSrcReg)
+ TOut.emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, STI);
+
+ return false;
+ } else {
+ // We have a case where SrcReg == DstReg and we don't have $at
+ // available. We can't expand this case, so error out appropriately.
+ assert(SrcReg == DstReg && !canUseATReg() &&
+ "Could have expanded dla but didn't?");
+ reportParseError(IDLoc,
+ "pseudo-instruction requires $at, which is not available");
+ return true;
+ }
}
// And now, the 32-bit symbol address expansion:
@@ -2769,6 +3010,8 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
assert((MemOffsetOp.isImm() || MemOffsetOp.isExpr()) &&
"expected immediate or expression operand");
+ bool IsLikely = false;
+
unsigned OpCode = 0;
switch(Inst.getOpcode()) {
case Mips::BneImm:
@@ -2777,16 +3020,29 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
case Mips::BeqImm:
OpCode = Mips::BEQ;
break;
+ case Mips::BEQLImmMacro:
+ OpCode = Mips::BEQL;
+ IsLikely = true;
+ break;
+ case Mips::BNELImmMacro:
+ OpCode = Mips::BNEL;
+ IsLikely = true;
+ break;
default:
llvm_unreachable("Unknown immediate branch pseudo-instruction.");
break;
}
int64_t ImmValue = ImmOp.getImm();
- if (ImmValue == 0)
- TOut.emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc,
- STI);
- else {
+ if (ImmValue == 0) {
+ if (IsLikely) {
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO,
+ MCOperand::createExpr(MemOffsetOp.getExpr()), IDLoc, STI);
+ TOut.emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+ } else
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc,
+ STI);
+ } else {
warnIfNoMacro(IDLoc);
unsigned ATReg = getATReg(IDLoc);
@@ -2797,7 +3053,12 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
IDLoc, Out, STI))
return true;
- TOut.emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, STI);
+ if (IsLikely) {
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), ATReg,
+ MCOperand::createExpr(MemOffsetOp.getExpr()), IDLoc, STI);
+ TOut.emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, STI);
+ } else
+ TOut.emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, STI);
}
return false;
}
@@ -2904,9 +3165,9 @@ bool MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
unsigned Opcode = Inst.getOpcode();
unsigned NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM32_MM : Mips::LWM32_MM;
- assert (Inst.getOperand(OpNum - 1).isImm() &&
- Inst.getOperand(OpNum - 2).isReg() &&
- Inst.getOperand(OpNum - 3).isReg() && "Invalid instruction operand.");
+ assert(Inst.getOperand(OpNum - 1).isImm() &&
+ Inst.getOperand(OpNum - 2).isReg() &&
+ Inst.getOperand(OpNum - 3).isReg() && "Invalid instruction operand.");
if (OpNum < 8 && Inst.getOperand(OpNum - 1).getImm() <= 60 &&
Inst.getOperand(OpNum - 1).getImm() >= 0 &&
@@ -3185,6 +3446,14 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
return false;
}
+// Expand a integer division macro.
+//
+// Notably we don't have to emit a warning when encountering $rt as the $zero
+// register, or 0 as an immediate. processInstruction() has already done that.
+//
+// The destination register can only be $zero when expanding (S)DivIMacro or
+// D(S)DivMacro.
+
bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI, const bool IsMips64,
const bool Signed) {
@@ -3200,67 +3469,88 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
assert(RsRegOp.isReg() && "expected register operand kind");
unsigned RsReg = RsRegOp.getReg();
- const MCOperand &RtRegOp = Inst.getOperand(2);
- assert(RtRegOp.isReg() && "expected register operand kind");
- unsigned RtReg = RtRegOp.getReg();
+ unsigned RtReg;
+ int64_t ImmValue;
+
+ const MCOperand &RtOp = Inst.getOperand(2);
+ assert((RtOp.isReg() || RtOp.isImm()) &&
+ "expected register or immediate operand kind");
+ if (RtOp.isReg())
+ RtReg = RtOp.getReg();
+ else
+ ImmValue = RtOp.getImm();
+
unsigned DivOp;
unsigned ZeroReg;
+ unsigned SubOp;
if (IsMips64) {
DivOp = Signed ? Mips::DSDIV : Mips::DUDIV;
ZeroReg = Mips::ZERO_64;
+ SubOp = Mips::DSUB;
} else {
DivOp = Signed ? Mips::SDIV : Mips::UDIV;
ZeroReg = Mips::ZERO;
+ SubOp = Mips::SUB;
}
bool UseTraps = useTraps();
- if (RsReg == Mips::ZERO || RsReg == Mips::ZERO_64) {
- if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)
- Warning(IDLoc, "dividing zero by zero");
- if (IsMips64) {
- if (Signed && (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)) {
- if (UseTraps) {
- TOut.emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, STI);
- return false;
- }
+ if (RtOp.isImm()) {
+ unsigned ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+ if (ImmValue == 0) {
+ if (UseTraps)
+ TOut.emitRRI(Mips::TEQ, ZeroReg, ZeroReg, 0x7, IDLoc, STI);
+ else
TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
- return false;
- }
+ return false;
+ }
+
+ if (ImmValue == 1) {
+ TOut.emitRRR(Mips::OR, RdReg, RsReg, Mips::ZERO, IDLoc, STI);
+ return false;
+ } else if (Signed && ImmValue == -1) {
+ TOut.emitRRR(SubOp, RdReg, ZeroReg, RsReg, IDLoc, STI);
+ return false;
} else {
- TOut.emitRR(DivOp, RsReg, RtReg, IDLoc, STI);
+ if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, isInt<32>(ImmValue),
+ false, Inst.getLoc(), Out, STI))
+ return true;
+ TOut.emitRR(DivOp, RsReg, ATReg, IDLoc, STI);
+ TOut.emitR(Mips::MFLO, RdReg, IDLoc, STI);
return false;
}
+ return true;
}
+ // If the macro expansion of (d)div(u) would always trap or break, insert
+ // the trap/break and exit. This gives a different result to GAS. GAS has
+ // an inconsistency/missed optimization in that not all cases are handled
+ // equivalently. As the observed behaviour is the same, we're ok.
if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64) {
- Warning(IDLoc, "division by zero");
- if (Signed) {
- if (UseTraps) {
- TOut.emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, STI);
- return false;
- }
-
- TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
+ if (UseTraps) {
+ TOut.emitRRI(Mips::TEQ, ZeroReg, ZeroReg, 0x7, IDLoc, STI);
return false;
}
+ TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
+ return false;
}
- // FIXME: The values for these two BranchTarget variables may be different in
- // micromips. These magic numbers need to be removed.
- unsigned BranchTargetNoTraps;
- unsigned BranchTarget;
+ // Temporary label for first branch traget
+ MCContext &Context = TOut.getStreamer().getContext();
+ MCSymbol *BrTarget;
+ MCOperand LabelOp;
if (UseTraps) {
- BranchTarget = IsMips64 ? 12 : 8;
TOut.emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, STI);
} else {
- BranchTarget = IsMips64 ? 20 : 16;
- BranchTargetNoTraps = 8;
// Branch to the li instruction.
- TOut.emitRRI(Mips::BNE, RtReg, ZeroReg, BranchTargetNoTraps, IDLoc, STI);
+ BrTarget = Context.createTempSymbol();
+ LabelOp = MCOperand::createExpr(MCSymbolRefExpr::create(BrTarget, Context));
+ TOut.emitRRX(Mips::BNE, RtReg, ZeroReg, LabelOp, IDLoc, STI);
}
TOut.emitRR(DivOp, RsReg, RtReg, IDLoc, STI);
@@ -3269,6 +3559,9 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitII(Mips::BREAK, 0x7, 0, IDLoc, STI);
if (!Signed) {
+ if (!UseTraps)
+ TOut.getStreamer().EmitLabel(BrTarget);
+
TOut.emitR(Mips::MFLO, RdReg, IDLoc, STI);
return false;
}
@@ -3277,15 +3570,23 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
if (!ATReg)
return true;
+ if (!UseTraps)
+ TOut.getStreamer().EmitLabel(BrTarget);
+
TOut.emitRRI(Mips::ADDiu, ATReg, ZeroReg, -1, IDLoc, STI);
+
+ // Temporary label for the second branch target.
+ MCSymbol *BrTargetEnd = Context.createTempSymbol();
+ MCOperand LabelOpEnd =
+ MCOperand::createExpr(MCSymbolRefExpr::create(BrTargetEnd, Context));
+
+ // Branch to the mflo instruction.
+ TOut.emitRRX(Mips::BNE, RtReg, ATReg, LabelOpEnd, IDLoc, STI);
+
if (IsMips64) {
- // Branch to the mflo instruction.
- TOut.emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, STI);
TOut.emitRRI(Mips::ADDiu, ATReg, ZeroReg, 1, IDLoc, STI);
TOut.emitRRI(Mips::DSLL32, ATReg, ATReg, 0x1f, IDLoc, STI);
} else {
- // Branch to the mflo instruction.
- TOut.emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, STI);
TOut.emitRI(Mips::LUi, ATReg, (uint16_t)0x8000, IDLoc, STI);
}
@@ -3293,10 +3594,12 @@ bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
TOut.emitRRI(Mips::TEQ, RsReg, ATReg, 0x6, IDLoc, STI);
else {
// Branch to the mflo instruction.
- TOut.emitRRI(Mips::BNE, RsReg, ATReg, BranchTargetNoTraps, IDLoc, STI);
+ TOut.emitRRX(Mips::BNE, RsReg, ATReg, LabelOpEnd, IDLoc, STI);
TOut.emitRRI(Mips::SLL, ZeroReg, ZeroReg, 0, IDLoc, STI);
TOut.emitII(Mips::BREAK, 0x6, 0, IDLoc, STI);
}
+
+ TOut.getStreamer().EmitLabel(BrTargetEnd);
TOut.emitR(Mips::MFLO, RdReg, IDLoc, STI);
return false;
}
@@ -3503,10 +3806,10 @@ bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
const MCSubtargetInfo *STI) {
MipsTargetStreamer &TOut = getTargetStreamer();
- assert (Inst.getNumOperands() == 3 && "Invalid operand count");
- assert (Inst.getOperand(0).isReg() &&
- Inst.getOperand(1).isReg() &&
- Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+ assert(Inst.getNumOperands() == 3 && "Invalid operand count");
+ assert(Inst.getOperand(0).isReg() &&
+ Inst.getOperand(1).isReg() &&
+ Inst.getOperand(2).isImm() && "Invalid instruction operand.");
unsigned ATReg = Mips::NoRegister;
unsigned FinalDstReg = Mips::NoRegister;
@@ -3514,7 +3817,7 @@ bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
unsigned SrcReg = Inst.getOperand(1).getReg();
int64_t ImmValue = Inst.getOperand(2).getImm();
- bool Is32Bit = isInt<32>(ImmValue) || isUInt<32>(ImmValue);
+ bool Is32Bit = isInt<32>(ImmValue) || (!isGP64bit() && isUInt<32>(ImmValue));
unsigned FinalOpcode = Inst.getOpcode();
@@ -3530,30 +3833,69 @@ bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
switch (FinalOpcode) {
default:
llvm_unreachable("unimplemented expansion");
- case (Mips::ADDi):
+ case Mips::ADDi:
FinalOpcode = Mips::ADD;
break;
- case (Mips::ADDiu):
+ case Mips::ADDiu:
FinalOpcode = Mips::ADDu;
break;
- case (Mips::ANDi):
+ case Mips::ANDi:
FinalOpcode = Mips::AND;
break;
- case (Mips::NORImm):
+ case Mips::NORImm:
FinalOpcode = Mips::NOR;
break;
- case (Mips::ORi):
+ case Mips::ORi:
FinalOpcode = Mips::OR;
break;
- case (Mips::SLTi):
+ case Mips::SLTi:
FinalOpcode = Mips::SLT;
break;
- case (Mips::SLTiu):
+ case Mips::SLTiu:
FinalOpcode = Mips::SLTu;
break;
- case (Mips::XORi):
+ case Mips::XORi:
FinalOpcode = Mips::XOR;
break;
+ case Mips::ADDi_MM:
+ FinalOpcode = Mips::ADD_MM;
+ break;
+ case Mips::ADDiu_MM:
+ FinalOpcode = Mips::ADDu_MM;
+ break;
+ case Mips::ANDi_MM:
+ FinalOpcode = Mips::AND_MM;
+ break;
+ case Mips::ORi_MM:
+ FinalOpcode = Mips::OR_MM;
+ break;
+ case Mips::SLTi_MM:
+ FinalOpcode = Mips::SLT_MM;
+ break;
+ case Mips::SLTiu_MM:
+ FinalOpcode = Mips::SLTu_MM;
+ break;
+ case Mips::XORi_MM:
+ FinalOpcode = Mips::XOR_MM;
+ break;
+ case Mips::ANDi64:
+ FinalOpcode = Mips::AND64;
+ break;
+ case Mips::NORImm64:
+ FinalOpcode = Mips::NOR64;
+ break;
+ case Mips::ORi64:
+ FinalOpcode = Mips::OR64;
+ break;
+ case Mips::SLTImm64:
+ FinalOpcode = Mips::SLT64;
+ break;
+ case Mips::SLTUImm64:
+ FinalOpcode = Mips::SLTu64;
+ break;
+ case Mips::XORi64:
+ FinalOpcode = Mips::XOR64;
+ break;
}
if (FinalDstReg == Mips::NoRegister)
@@ -3578,7 +3920,6 @@ bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
unsigned SecondShift = Mips::NOP;
if (hasMips32r2()) {
-
if (DReg == SReg) {
TmpReg = getATReg(Inst.getLoc());
if (!TmpReg)
@@ -3600,7 +3941,6 @@ bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
}
if (hasMips32()) {
-
switch (Inst.getOpcode()) {
default:
llvm_unreachable("unexpected instruction opcode");
@@ -3642,7 +3982,6 @@ bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc,
unsigned SecondShift = Mips::NOP;
if (hasMips32r2()) {
-
if (Inst.getOpcode() == Mips::ROLImm) {
uint64_t MaxShift = 32;
uint64_t ShiftValue = ImmValue;
@@ -3661,7 +4000,6 @@ bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc,
}
if (hasMips32()) {
-
if (ImmValue == 0) {
TOut.emitRRI(Mips::SRL, DReg, SReg, 0, Inst.getLoc(), STI);
return false;
@@ -3707,7 +4045,6 @@ bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
unsigned SecondShift = Mips::NOP;
if (hasMips64r2()) {
-
if (TmpReg == SReg) {
TmpReg = getATReg(Inst.getLoc());
if (!TmpReg)
@@ -3729,7 +4066,6 @@ bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
}
if (hasMips64()) {
-
switch (Inst.getOpcode()) {
default:
llvm_unreachable("unexpected instruction opcode");
@@ -3773,7 +4109,6 @@ bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
MCInst TmpInst;
if (hasMips64r2()) {
-
unsigned FinalOpcode = Mips::NOP;
if (ImmValue == 0)
FinalOpcode = Mips::DROTR;
@@ -3801,7 +4136,6 @@ bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
}
if (hasMips64()) {
-
if (ImmValue == 0) {
TOut.emitRRI(Mips::DSRL, DReg, SReg, 0, Inst.getLoc(), STI);
return false;
@@ -3871,6 +4205,119 @@ bool MipsAsmParser::expandAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
return false;
}
+bool MipsAsmParser::expandMulImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ int32_t ImmValue = Inst.getOperand(2).getImm();
+
+ ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ loadImmediate(ImmValue, ATReg, Mips::NoRegister, true, false, IDLoc, Out, STI);
+
+ TOut.emitRR(Inst.getOpcode() == Mips::MULImmMacro ? Mips::MULT : Mips::DMULT,
+ SrcReg, ATReg, IDLoc, STI);
+
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ return false;
+}
+
+bool MipsAsmParser::expandMulO(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned TmpReg = Inst.getOperand(2).getReg();
+
+ ATReg = getATReg(Inst.getLoc());
+ if (!ATReg)
+ return true;
+
+ TOut.emitRR(Inst.getOpcode() == Mips::MULOMacro ? Mips::MULT : Mips::DMULT,
+ SrcReg, TmpReg, IDLoc, STI);
+
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ TOut.emitRRI(Inst.getOpcode() == Mips::MULOMacro ? Mips::SRA : Mips::DSRA32,
+ DstReg, DstReg, 0x1F, IDLoc, STI);
+
+ TOut.emitR(Mips::MFHI, ATReg, IDLoc, STI);
+
+ if (useTraps()) {
+ TOut.emitRRI(Mips::TNE, DstReg, ATReg, 6, IDLoc, STI);
+ } else {
+ MCContext & Context = TOut.getStreamer().getContext();
+ MCSymbol * BrTarget = Context.createTempSymbol();
+ MCOperand LabelOp =
+ MCOperand::createExpr(MCSymbolRefExpr::create(BrTarget, Context));
+
+ TOut.emitRRX(Mips::BEQ, DstReg, ATReg, LabelOp, IDLoc, STI);
+ if (AssemblerOptions.back()->isReorder())
+ TOut.emitNop(IDLoc, STI);
+ TOut.emitII(Mips::BREAK, 6, 0, IDLoc, STI);
+
+ TOut.getStreamer().EmitLabel(BrTarget);
+ }
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ return false;
+}
+
+bool MipsAsmParser::expandMulOU(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned ATReg = Mips::NoRegister;
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned TmpReg = Inst.getOperand(2).getReg();
+
+ ATReg = getATReg(IDLoc);
+ if (!ATReg)
+ return true;
+
+ TOut.emitRR(Inst.getOpcode() == Mips::MULOUMacro ? Mips::MULTu : Mips::DMULTu,
+ SrcReg, TmpReg, IDLoc, STI);
+
+ TOut.emitR(Mips::MFHI, ATReg, IDLoc, STI);
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+ if (useTraps()) {
+ TOut.emitRRI(Mips::TNE, ATReg, Mips::ZERO, 6, IDLoc, STI);
+ } else {
+ MCContext & Context = TOut.getStreamer().getContext();
+ MCSymbol * BrTarget = Context.createTempSymbol();
+ MCOperand LabelOp =
+ MCOperand::createExpr(MCSymbolRefExpr::create(BrTarget, Context));
+
+ TOut.emitRRX(Mips::BEQ, ATReg, Mips::ZERO, LabelOp, IDLoc, STI);
+ if (AssemblerOptions.back()->isReorder())
+ TOut.emitNop(IDLoc, STI);
+ TOut.emitII(Mips::BREAK, 6, 0, IDLoc, STI);
+
+ TOut.getStreamer().EmitLabel(BrTarget);
+ }
+
+ return false;
+}
+
+bool MipsAsmParser::expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+ const MCSubtargetInfo *STI) {
+ MipsTargetStreamer &TOut = getTargetStreamer();
+ unsigned DstReg = Inst.getOperand(0).getReg();
+ unsigned SrcReg = Inst.getOperand(1).getReg();
+ unsigned TmpReg = Inst.getOperand(2).getReg();
+
+ TOut.emitRR(Mips::DMULTu, SrcReg, TmpReg, IDLoc, STI);
+ TOut.emitR(Mips::MFLO, DstReg, IDLoc, STI);
+
+ return false;
+}
+
static unsigned nextReg(unsigned Reg) {
switch (Reg) {
case Mips::ZERO: return Mips::AT;
@@ -3985,7 +4432,6 @@ bool MipsAsmParser::expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
const MCSubtargetInfo *STI) {
-
warnIfNoMacro(IDLoc);
MipsTargetStreamer &TOut = getTargetStreamer();
@@ -4158,17 +4604,15 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
-
MCInst Inst;
unsigned MatchResult =
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
- case Match_Success: {
+ case Match_Success:
if (processInstruction(Inst, IDLoc, Out, STI))
return true;
return false;
- }
case Match_MissingFeature:
Error(IDLoc, "instruction requires a CPU feature not currently enabled");
return true;
@@ -4441,7 +4885,6 @@ int MipsAsmParser::matchHWRegsRegisterName(StringRef Name) {
}
int MipsAsmParser::matchFPURegisterName(StringRef Name) {
-
if (Name[0] == 'f') {
StringRef NumString = Name.substr(1);
unsigned IntVal;
@@ -4455,7 +4898,6 @@ int MipsAsmParser::matchFPURegisterName(StringRef Name) {
}
int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
-
if (Name.startswith("fcc")) {
StringRef NumString = Name.substr(3);
unsigned IntVal;
@@ -4469,7 +4911,6 @@ int MipsAsmParser::matchFCCRegisterName(StringRef Name) {
}
int MipsAsmParser::matchACRegisterName(StringRef Name) {
-
if (Name.startswith("ac")) {
StringRef NumString = Name.substr(2);
unsigned IntVal;
@@ -4511,6 +4952,10 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
return CC;
}
+bool MipsAsmParser::canUseATReg() {
+ return AssemblerOptions.back()->getATRegIndex() != 0;
+}
+
unsigned MipsAsmParser::getATReg(SMLoc Loc) {
unsigned ATIndex = AssemblerOptions.back()->getATRegIndex();
if (ATIndex == 0) {
@@ -4589,7 +5034,6 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
-
switch (Expr->getKind()) {
case MCExpr::Constant:
return true;
@@ -5522,7 +5966,7 @@ bool MipsAsmParser::parseSetPushDirective() {
// Create a copy of the current assembler options environment and push it.
AssemblerOptions.push_back(
- make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
+ llvm::make_unique<MipsAssemblerOptions>(AssemblerOptions.back().get()));
getTargetStreamer().emitDirectiveSetPush();
return false;
@@ -5914,6 +6358,14 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetAtDirective();
} else if (Tok.getString() == "arch") {
return parseSetArchDirective();
+ } else if (Tok.getString() == "bopt") {
+ Warning(Tok.getLoc(), "'bopt' feature is unsupported");
+ getParser().Lex();
+ return false;
+ } else if (Tok.getString() == "nobopt") {
+ // We're already running in nobopt mode, so nothing to do.
+ getParser().Lex();
+ return false;
} else if (Tok.getString() == "fp") {
return parseSetFpDirective();
} else if (Tok.getString() == "oddspreg") {
@@ -6001,7 +6453,7 @@ bool MipsAsmParser::parseDirectiveSet() {
bool MipsAsmParser::parseDataDirective(unsigned Size, SMLoc L) {
MCAsmParser &Parser = getParser();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
+ while (true) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
return true;
@@ -6773,3 +7225,15 @@ extern "C" void LLVMInitializeMipsAsmParser() {
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "MipsGenAsmMatcher.inc"
+
+bool MipsAsmParser::mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) {
+ // Find the appropriate table for this asm variant.
+ const MatchEntry *Start, *End;
+ switch (VariantID) {
+ default: llvm_unreachable("invalid variant!");
+ case 0: Start = std::begin(MatchTable0); End = std::end(MatchTable0); break;
+ }
+ // Search the table.
+ auto MnemonicRange = std::equal_range(Start, End, Mnemonic, LessOpcode());
+ return MnemonicRange.first != MnemonicRange.second;
+}
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index f80efb18507b..ecdf6b0de6e7 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- MipsDisassembler.cpp - Disassembler for Mips -------------*- C++ -*-===//
+//===- MipsDisassembler.cpp - Disassembler for Mips -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,15 +12,21 @@
//===----------------------------------------------------------------------===//
#include "Mips.h"
-#include "MipsRegisterInfo.h"
-#include "MipsSubtarget.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -33,6 +39,7 @@ namespace {
class MipsDisassembler : public MCDisassembler {
bool IsMicroMips;
bool IsBigEndian;
+
public:
MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool IsBigEndian)
: MCDisassembler(STI, Ctx),
@@ -42,9 +49,11 @@ public:
bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; }
bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
bool hasMips32() const { return STI.getFeatureBits()[Mips::FeatureMips32]; }
+
bool hasMips32r6() const {
return STI.getFeatureBits()[Mips::FeatureMips32r6];
}
+
bool isFP64() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; }
bool isGP64() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
@@ -527,11 +536,13 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
const void *Decoder);
namespace llvm {
+
Target &getTheMipselTarget();
Target &getTheMipsTarget();
Target &getTheMips64Target();
Target &getTheMips64elTarget();
-}
+
+} // end namespace llvm
static MCDisassembler *createMipsDisassembler(
const Target &T,
@@ -1106,6 +1117,7 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
raw_ostream &CStream) const {
uint32_t Insn;
DecodeStatus Result;
+ Size = 0;
if (IsMicroMips) {
Result = readInstruction16(Bytes, Address, Size, Insn, IsBigEndian);
@@ -1168,98 +1180,88 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
}
}
- // This is an invalid instruction. Let the disassembler move forward by the
- // minimum instruction size.
+ // This is an invalid instruction. Claim that the Size is 2 bytes. Since
+ // microMIPS instructions have a minimum alignment of 2, the next 2 bytes
+ // could form a valid instruction. The two bytes we rejected as an
+ // instruction could have actually beeen an inline constant pool that is
+ // unconditionally branched over.
Size = 2;
return MCDisassembler::Fail;
}
+ // Attempt to read the instruction so that we can attempt to decode it. If
+ // the buffer is not 4 bytes long, let the higher level logic figure out
+ // what to do with a size of zero and MCDisassembler::Fail.
Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
- if (Result == MCDisassembler::Fail) {
- Size = 4;
+ if (Result == MCDisassembler::Fail)
return MCDisassembler::Fail;
- }
+
+ // The only instruction size for standard encoded MIPS.
+ Size = 4;
if (hasCOP3()) {
DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
Result =
decodeInstruction(DecoderTableCOP3_32, Instr, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips32r6() && isGP64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (GPR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips32r6() && isPTR64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (PTR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r6_PTR6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips32r6()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r632, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasMips2() && isPTR64()) {
DEBUG(dbgs() << "Trying Mips32r6_64r6 (PTR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32_64_PTR6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (hasCnMips()) {
DEBUG(dbgs() << "Trying CnMips table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableCnMips32, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
if (isGP64()) {
DEBUG(dbgs() << "Trying Mips64 (GPR64) table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips6432, Instr, Insn,
Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
}
DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
Result =
decodeInstruction(DecoderTableMips32, Instr, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
+ if (Result != MCDisassembler::Fail)
return Result;
- }
- Size = 4;
return MCDisassembler::Fail;
}
@@ -1267,16 +1269,13 @@ static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder) {
-
return MCDisassembler::Fail;
-
}
static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder) {
-
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -1620,7 +1619,7 @@ static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
switch(Inst.getOpcode())
{
default:
- assert (0 && "Unexpected instruction");
+ assert(false && "Unexpected instruction");
return MCDisassembler::Fail;
break;
case Mips::LD_B:
@@ -1980,7 +1979,6 @@ static DecodeStatus DecodeAFGR64RegisterClass(MCInst &Inst,
if (RegNo > 30 || RegNo %2)
return MCDisassembler::Fail;
- ;
unsigned Reg = getReg(Decoder, Mips::AFGR64RegClassID, RegNo /2);
Inst.addOperand(MCOperand::createReg(Reg));
return MCDisassembler::Success;
@@ -2128,7 +2126,6 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
unsigned Insn,
uint64_t Address,
const void *Decoder) {
-
unsigned JumpOffset = fieldFromInstruction(Insn, 0, 26) << 2;
Inst.addOperand(MCOperand::createImm(JumpOffset));
return MCDisassembler::Success;
@@ -2267,7 +2264,14 @@ static DecodeStatus DecodeInsSize(MCInst &Inst,
const void *Decoder) {
// First we need to grab the pos(lsb) from MCInst.
int Pos = Inst.getOperand(2).getImm();
- int Size = (int) Insn - Pos + 1;
+ if (Inst.getOpcode() == Mips::DINSU)
+ Pos += 32;
+ int Size;
+ if (Inst.getOpcode() == Mips::DINSM ||
+ Inst.getOpcode() == Mips::DINSU)
+ Size = (int) Insn - Pos + 33;
+ else
+ Size = (int) Insn - Pos + 1;
Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size)));
return MCDisassembler::Success;
}
@@ -2363,7 +2367,6 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
-
unsigned RegPair = fieldFromInstruction(Insn, 7, 3);
switch (RegPair) {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
index 932d38a0b9fe..4a2b75b9ae46 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -1,4 +1,4 @@
-//===-- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---*- C++ -*--===//
+//===- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,11 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsABIFlagsSection.h"
+#include "MCTargetDesc/MipsABIFlagsSection.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MipsABIFlags.h"
using namespace llvm;
@@ -51,6 +55,7 @@ uint8_t MipsABIFlagsSection::getCPR1SizeValue() {
}
namespace llvm {
+
MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
// Write out a Elf_Internal_ABIFlags_v0 struct
OS.EmitIntValue(ABIFlagsSection.getVersionValue(), 2); // version
@@ -66,4 +71,5 @@ MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2
return OS;
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
index 3966cae9fe33..f38541027023 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -1,4 +1,4 @@
-//===-- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -----*- C++ -*--===//
+//===- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,9 +10,10 @@
#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MipsABIFlags.h"
+#include <cstdint>
namespace llvm {
@@ -23,36 +24,32 @@ struct MipsABIFlagsSection {
enum class FpABIKind { ANY, XX, S32, S64, SOFT };
// Version of flags structure.
- uint16_t Version;
+ uint16_t Version = 0;
// The level of the ISA: 1-5, 32, 64.
- uint8_t ISALevel;
+ uint8_t ISALevel = 0;
// The revision of ISA: 0 for MIPS V and below, 1-n otherwise.
- uint8_t ISARevision;
+ uint8_t ISARevision = 0;
// The size of general purpose registers.
- Mips::AFL_REG GPRSize;
+ Mips::AFL_REG GPRSize = Mips::AFL_REG_NONE;
// The size of co-processor 1 registers.
- Mips::AFL_REG CPR1Size;
+ Mips::AFL_REG CPR1Size = Mips::AFL_REG_NONE;
// The size of co-processor 2 registers.
- Mips::AFL_REG CPR2Size;
+ Mips::AFL_REG CPR2Size = Mips::AFL_REG_NONE;
// Processor-specific extension.
- Mips::AFL_EXT ISAExtension;
+ Mips::AFL_EXT ISAExtension = Mips::AFL_EXT_NONE;
// Mask of ASEs used.
- uint32_t ASESet;
+ uint32_t ASESet = 0;
- bool OddSPReg;
+ bool OddSPReg = false;
- bool Is32BitABI;
+ bool Is32BitABI = false;
protected:
// The floating-point ABI.
- FpABIKind FpABI;
+ FpABIKind FpABI = FpABIKind::ANY;
public:
- MipsABIFlagsSection()
- : Version(0), ISALevel(0), ISARevision(0), GPRSize(Mips::AFL_REG_NONE),
- CPR1Size(Mips::AFL_REG_NONE), CPR2Size(Mips::AFL_REG_NONE),
- ISAExtension(Mips::AFL_EXT_NONE), ASESet(0), OddSPReg(false),
- Is32BitABI(false), FpABI(FpABIKind::ANY) {}
+ MipsABIFlagsSection() = default;
uint16_t getVersionValue() { return (uint16_t)Version; }
uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
@@ -80,6 +77,7 @@ public:
FpABI = Value;
Is32BitABI = IsABI32Bit;
}
+
StringRef getFpABIString(FpABIKind Value);
template <class PredicateLibrary>
@@ -195,6 +193,7 @@ public:
};
MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection);
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIFLAGSSECTION_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 38b11f78e36d..3304449efb91 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
// Prepare value for the target space for it
static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext *Ctx = nullptr) {
+ MCContext &Ctx) {
unsigned Kind = Fixup.getKind();
@@ -74,8 +74,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// address range. Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isInt<16>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC16 fixup");
return 0;
}
break;
@@ -84,8 +84,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 4;
// We now check if Value can be encoded as a 19-bit signed immediate.
- if (!isInt<19>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC19 fixup");
+ if (!isInt<19>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC19 fixup");
return 0;
}
break;
@@ -121,8 +121,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 7-bit signed immediate.
- if (!isInt<7>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC7 fixup");
+ if (!isInt<7>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC7 fixup");
return 0;
}
break;
@@ -131,8 +131,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 2;
// We now check if Value can be encoded as a 10-bit signed immediate.
- if (!isInt<10>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC10 fixup");
+ if (!isInt<10>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC10 fixup");
return 0;
}
break;
@@ -141,8 +141,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 16-bit signed immediate.
- if (!isInt<16>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+ if (!isInt<16>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC16 fixup");
return 0;
}
break;
@@ -150,21 +150,21 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 8;
// We now check if Value can be encoded as a 18-bit signed immediate.
- if (!isInt<18>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ if (!isInt<18>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup");
return 0;
}
break;
case Mips::fixup_MICROMIPS_PC18_S3:
// Check alignment.
- if ((Value & 7) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ if ((Value & 7)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup");
}
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 8;
// We now check if Value can be encoded as a 18-bit signed immediate.
- if (!isInt<18>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+ if (!isInt<18>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC18 fixup");
return 0;
}
break;
@@ -172,8 +172,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 21-bit signed immediate.
- if (!isInt<21>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup");
+ if (!isInt<21>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC21 fixup");
return 0;
}
break;
@@ -181,8 +181,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t) Value / 4;
// We now check if Value can be encoded as a 26-bit signed immediate.
- if (!isInt<26>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC26 fixup");
+ if (!isInt<26>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC26 fixup");
return 0;
}
break;
@@ -190,8 +190,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 26-bit signed immediate.
- if (!isInt<26>(Value) && Ctx) {
- Ctx->reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
+ if (!isInt<26>(Value)) {
+ Ctx.reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
return 0;
}
break;
@@ -199,8 +199,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Forcing a signed division because Value can be negative.
Value = (int64_t)Value / 2;
// We now check if Value can be encoded as a 21-bit signed immediate.
- if (!isInt<21>(Value) && Ctx) {
- Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup");
+ if (!isInt<21>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "out of range PC21 fixup");
return 0;
}
break;
@@ -236,10 +236,10 @@ static unsigned calculateMMLEIndex(unsigned i) {
/// data fragment, at the offset specified by the fixup and following the
/// fixup kind as appropriate.
void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ unsigned DataSize, uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const {
MCFixupKind Kind = Fixup.getKind();
- Value = adjustFixupValue(Fixup, Value);
+ Value = adjustFixupValue(Fixup, Value, Ctx);
if (!Value)
return; // Doesn't change encoding.
@@ -471,24 +471,6 @@ bool MipsAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
-/// processFixupValue - Target hook to process the literal value of a fixup
-/// if necessary.
-void MipsAsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target,
- uint64_t &Value,
- bool &IsResolved) {
- // At this point we'll ignore the value returned by adjustFixupValue as
- // we are only checking if the fixup can be applied correctly. We have
- // access to MCContext from here which allows us to report a fatal error
- // with *possibly* a source code location.
- // The caller will also ignore any changes we make to Value
- // (recordRelocation() overwrites it with it's own calculation).
- (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
-}
-
// MCAsmBackend
MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T,
const MCRegisterInfo &MRI,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index f260cfa566c9..4b3cc6e21f4c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -39,7 +39,7 @@ public:
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
@@ -82,11 +82,6 @@ public:
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
-
}; // class MipsAsmBackend
} // namespace
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index b2efd726da53..324fd3c6fe14 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -7,33 +7,38 @@
//
//===----------------------------------------------------------------------===//
-#include <algorithm>
-#include <list>
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsFixupKinds.h"
-#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <list>
+#include <utility>
#define DEBUG_TYPE "mips-elf-object-writer"
using namespace llvm;
namespace {
+
/// Holds additional information needed by the relocation ordering algorithm.
struct MipsRelocationEntry {
const ELFRelocationEntry R; ///< The relocation.
- bool Matched; ///< Is this relocation part of a match.
+ bool Matched = false; ///< Is this relocation part of a match.
- MipsRelocationEntry(const ELFRelocationEntry &R) : R(R), Matched(false) {}
+ MipsRelocationEntry(const ELFRelocationEntry &R) : R(R) {}
void print(raw_ostream &Out) const {
R.print(Out);
@@ -53,23 +58,33 @@ public:
MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI, bool _isN64,
bool IsLittleEndian);
- ~MipsELFObjectWriter() override;
+ ~MipsELFObjectWriter() override = default;
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
bool needsRelocateWithSymbol(const MCSymbol &Sym,
unsigned Type) const override;
- virtual void sortRelocs(const MCAssembler &Asm,
- std::vector<ELFRelocationEntry> &Relocs) override;
+ void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) override;
+};
+
+/// The possible results of the Predicate function used by find_best.
+enum FindBestPredicateResult {
+ FindBest_NoMatch = 0, ///< The current element is not a match.
+ FindBest_Match, ///< The current element is a match but better ones are
+ /// possible.
+ FindBest_PerfectMatch, ///< The current element is an unbeatable match.
};
+} // end anonymous namespace
+
/// Copy elements in the range [First, Last) to d1 when the predicate is true or
/// d2 when the predicate is false. This is essentially both std::copy_if and
/// std::remove_copy_if combined into a single pass.
template <class InputIt, class OutputIt1, class OutputIt2, class UnaryPredicate>
-std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
- OutputIt1 d1, OutputIt2 d2,
- UnaryPredicate Predicate) {
+static std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
+ OutputIt1 d1, OutputIt2 d2,
+ UnaryPredicate Predicate) {
for (InputIt I = First; I != Last; ++I) {
if (Predicate(*I)) {
*d1 = *I;
@@ -83,14 +98,6 @@ std::pair<OutputIt1, OutputIt2> copy_if_else(InputIt First, InputIt Last,
return std::make_pair(d1, d2);
}
-/// The possible results of the Predicate function used by find_best.
-enum FindBestPredicateResult {
- FindBest_NoMatch = 0, ///< The current element is not a match.
- FindBest_Match, ///< The current element is a match but better ones are
- /// possible.
- FindBest_PerfectMatch, ///< The current element is an unbeatable match.
-};
-
/// Find the best match in the range [First, Last).
///
/// An element matches when Predicate(X) returns FindBest_Match or
@@ -101,8 +108,8 @@ enum FindBestPredicateResult {
/// This is similar to std::find_if but finds the best of multiple possible
/// matches.
template <class InputIt, class UnaryPredicate, class Comparator>
-InputIt find_best(InputIt First, InputIt Last, UnaryPredicate Predicate,
- Comparator BetterThan) {
+static InputIt find_best(InputIt First, InputIt Last, UnaryPredicate Predicate,
+ Comparator BetterThan) {
InputIt Best = Last;
for (InputIt I = First; I != Last; ++I) {
@@ -202,16 +209,12 @@ static void dumpRelocs(const char *Prefix, const Container &Relocs) {
}
#endif
-} // end anonymous namespace
-
MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
bool _isN64, bool IsLittleEndian)
: MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
/*HasRelocationAddend*/ _isN64,
/*IsN64*/ _isN64) {}
-MipsELFObjectWriter::~MipsELFObjectWriter() {}
-
unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
@@ -419,7 +422,6 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
/// always match using the expressions from the source.
void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
std::vector<ELFRelocationEntry> &Relocs) {
-
// We do not need to sort the relocation table for RELA relocations which
// N32/N64 uses as the relocation addend contains the value we require,
// rather than it being split across a pair of relocations.
@@ -524,6 +526,8 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MIPS_GOT16:
case ELF::R_MIPS16_GOT16:
case ELF::R_MICROMIPS_GOT16:
+ case ELF::R_MIPS_HIGHER:
+ case ELF::R_MIPS_HIGHEST:
case ELF::R_MIPS_HI16:
case ELF::R_MIPS16_HI16:
case ELF::R_MICROMIPS_HI16:
@@ -567,8 +571,6 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
case ELF::R_MIPS_INSERT_A:
case ELF::R_MIPS_INSERT_B:
case ELF::R_MIPS_DELETE:
- case ELF::R_MIPS_HIGHER:
- case ELF::R_MIPS_HIGHEST:
case ELF::R_MIPS_CALL_HI16:
case ELF::R_MIPS_CALL_LO16:
case ELF::R_MIPS_SCN_DISP:
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index e7d687e89a8a..ae3278322311 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -8,15 +8,19 @@
//===----------------------------------------------------------------------===//
#include "MipsELFStreamer.h"
+#include "MipsOptionRecord.h"
#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
MCELFStreamer::EmitInstruction(Inst, STI);
MCContext &Context = getContext();
@@ -51,7 +55,7 @@ void MipsELFStreamer::createPendingLabelRelocs() {
Labels.clear();
}
-void MipsELFStreamer::EmitLabel(MCSymbol *Symbol) {
+void MipsELFStreamer::EmitLabel(MCSymbol *Symbol, SMLoc Loc) {
MCELFStreamer::EmitLabel(Symbol);
Labels.push_back(Symbol);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index a241cdebdcc8..f5eda112817e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -1,4 +1,4 @@
-//===-------- MipsELFStreamer.h - ELF Object Output -----------------------===//
+//===- MipsELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -21,6 +21,7 @@
#include <memory>
namespace llvm {
+
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
@@ -31,12 +32,10 @@ class MipsELFStreamer : public MCELFStreamer {
MipsRegInfoRecord *RegInfoRecord;
SmallVector<MCSymbol*, 4> Labels;
-
public:
MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter)
: MCELFStreamer(Context, MAB, OS, Emitter) {
-
RegInfoRecord = new MipsRegInfoRecord(this, Context);
MipsOptionRecords.push_back(
std::unique_ptr<MipsRegInfoRecord>(RegInfoRecord));
@@ -46,12 +45,13 @@ public:
/// \p Inst is actually emitted. For example, we can inspect the operands and
/// gather sufficient information that allows us to reason about the register
/// usage for the translation unit.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool = false) override;
/// Overriding this function allows us to record all labels that should be
/// marked as microMIPS. Based on this data marking is done in
/// EmitInstruction.
- void EmitLabel(MCSymbol *Symbol) override;
+ void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
/// Overriding this function allows us to dismiss all labels that are
/// candidates for marking as microMIPS when .section directive is processed.
@@ -72,5 +72,6 @@ public:
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll);
-} // namespace llvm.
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index a44a35f49e5f..ebe3c5784888 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -19,9 +19,7 @@ using namespace llvm;
void MipsMCAsmInfo::anchor() { }
MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
- if ((TheTriple.getArch() == Triple::mips) ||
- (TheTriple.getArch() == Triple::mips64))
- IsLittleEndian = false;
+ IsLittleEndian = TheTriple.isLittleEndian();
if ((TheTriple.getArch() == Triple::mips64el) ||
(TheTriple.getArch() == Triple::mips64)) {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 0614316d5ac7..5685f0426e9b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -10,22 +10,29 @@
// This file implements the MipsMCCodeEmitter class.
//
//===----------------------------------------------------------------------===//
-//
-#include "MipsMCCodeEmitter.h"
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MipsMCCodeEmitter.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -34,6 +41,7 @@
#undef GET_INSTRMAP_INFO
namespace llvm {
+
MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx) {
@@ -45,12 +53,12 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
MCContext &Ctx) {
return new MipsMCCodeEmitter(MCII, Ctx, true);
}
-} // End of namespace llvm.
+
+} // end namespace llvm
// If the D<shift> instruction has a shift amount that is greater
// than 31 (checked in calling routine), lower it to a D<shift>32 instruction
static void LowerLargeShift(MCInst& Inst) {
-
assert(Inst.getNumOperands() == 3 && "Invalid no. of operands for shift!");
assert(Inst.getOperand(2).isImm());
@@ -103,24 +111,25 @@ static void LowerDins(MCInst& InstIn) {
assert(InstIn.getOperand(3).isImm());
int64_t size = InstIn.getOperand(3).getImm();
- if (size <= 32) {
- if (pos < 32) // DINS, do nothing
- return;
+ assert((pos + size) <= 64 &&
+ "DINS cannot have position plus size over 64");
+ if (pos < 32) {
+ if ((pos + size) > 0 && (pos + size) <= 32)
+ return; // DINS, do nothing
+ else if ((pos + size) > 32) {
+ //DINSM
+ InstIn.getOperand(3).setImm(size - 32);
+ InstIn.setOpcode(Mips::DINSM);
+ }
+ } else if ((pos + size) > 32 && (pos + size) <= 64) {
// DINSU
InstIn.getOperand(2).setImm(pos - 32);
InstIn.setOpcode(Mips::DINSU);
- return;
}
- // DINSM
- assert(pos < 32 && "DINS cannot have both size and pos > 32");
- InstIn.getOperand(3).setImm(size - 32);
- InstIn.setOpcode(Mips::DINSM);
- return;
}
// Fix a bad compact branch encoding for beqc/bnec.
void MipsMCCodeEmitter::LowerCompactBranch(MCInst& Inst) const {
-
// Encoding may be illegal !(rs < rt), but this situation is
// easily fixed.
unsigned RegOp0 = Inst.getOperand(0).getReg();
@@ -146,7 +155,6 @@ void MipsMCCodeEmitter::LowerCompactBranch(MCInst& Inst) const {
Inst.getOperand(0).setReg(RegOp1);
Inst.getOperand(1).setReg(RegOp0);
-
}
bool MipsMCCodeEmitter::isMicroMips(const MCSubtargetInfo &STI) const {
@@ -186,7 +194,6 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const
{
-
// Non-pseudo instructions that get changed for direct object
// only based on operand values.
// If this list of instructions get much longer we will move
@@ -272,7 +279,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -295,7 +301,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValue1SImm16(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -318,7 +323,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueMMR6(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -342,7 +346,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueLsl2MMR6(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -366,7 +369,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget7OpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -388,7 +390,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueMMPC10(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -410,7 +411,6 @@ unsigned MipsMCCodeEmitter::
getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -433,7 +433,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -456,7 +455,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget21OpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -479,7 +477,6 @@ unsigned MipsMCCodeEmitter::
getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
@@ -501,7 +498,6 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
unsigned MipsMCCodeEmitter::getBranchTarget26OpValueMM(
const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
@@ -525,7 +521,6 @@ unsigned MipsMCCodeEmitter::
getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) return MO.getImm();
@@ -544,7 +539,6 @@ unsigned MipsMCCodeEmitter::
getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 4.
if (MO.isImm()) return MO.getImm()>>2;
@@ -562,7 +556,6 @@ unsigned MipsMCCodeEmitter::
getJumpTargetOpValueMM(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
// If the destination is an immediate, divide by 2.
if (MO.isImm()) return MO.getImm() >> 1;
@@ -580,7 +573,6 @@ unsigned MipsMCCodeEmitter::
getUImm5Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
// The immediate is encoded as 'immediate << 2'.
@@ -599,7 +591,6 @@ unsigned MipsMCCodeEmitter::
getSImm3Lsa2Value(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
int Value = MO.getImm();
@@ -613,7 +604,6 @@ unsigned MipsMCCodeEmitter::
getUImm6Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
unsigned Value = MO.getImm();
@@ -627,7 +617,6 @@ unsigned MipsMCCodeEmitter::
getSImm9AddiuspValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
unsigned Binary = (MO.getImm() >> 2) & 0x0000ffff;
@@ -711,7 +700,7 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
case MipsMCExpr::MEK_GPREL:
FixupKind = Mips::fixup_Mips_GPREL16;
break;
- case MipsMCExpr::MEK_LO: {
+ case MipsMCExpr::MEK_LO:
// Check for %lo(%neg(%gp_rel(X)))
if (MipsExpr->isGpOff()) {
FixupKind = Mips::fixup_Mips_GPOFF_LO;
@@ -720,7 +709,6 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_LO16
: Mips::fixup_Mips_LO16;
break;
- }
case MipsMCExpr::MEK_HIGHEST:
FixupKind = Mips::fixup_Mips_HIGHEST;
break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 2d041dcbf040..d12d3195521a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -1,4 +1,4 @@
-//===-- MipsMCCodeEmitter.h - Convert Mips Code to Machine Code -----------===//
+//===- MipsMCCodeEmitter.h - Convert Mips Code to Machine Code --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,29 +10,25 @@
// This file defines the MipsMCCodeEmitter class.
//
//===----------------------------------------------------------------------===//
-//
#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/Support/DataTypes.h"
-
-using namespace llvm;
+#include <cstdint>
namespace llvm {
+
class MCContext;
class MCExpr;
+class MCFixup;
class MCInst;
class MCInstrInfo;
-class MCFixup;
class MCOperand;
class MCSubtargetInfo;
class raw_ostream;
class MipsMCCodeEmitter : public MCCodeEmitter {
- MipsMCCodeEmitter(const MipsMCCodeEmitter &) = delete;
- void operator=(const MipsMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
MCContext &Ctx;
bool IsLittleEndian;
@@ -43,8 +39,9 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
public:
MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
: MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
-
- ~MipsMCCodeEmitter() override {}
+ MipsMCCodeEmitter(const MipsMCCodeEmitter &) = delete;
+ MipsMCCodeEmitter &operator=(const MipsMCCodeEmitter &) = delete;
+ ~MipsMCCodeEmitter() override = default;
void EmitByte(unsigned char C, raw_ostream &OS) const;
@@ -270,9 +267,11 @@ public:
unsigned getRegisterListOpValue16(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- private:
+
+private:
void LowerCompactBranch(MCInst& Inst) const;
-}; // class MipsMCCodeEmitter
-} // namespace llvm.
+};
+
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCCODEEMITTER_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 082bb87fcb8a..be04480044d4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -11,9 +11,15 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
using namespace llvm;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index d1a4334ec640..495d525ccff4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -1,4 +1,4 @@
-//===-- MipsMCExpr.h - Mips specific MC expression classes ------*- C++ -*-===//
+//===- MipsMCExpr.h - Mips specific MC expression classes -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -70,6 +70,7 @@ public:
bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
const MCFixup *Fixup) const override;
void visitUsedExpr(MCStreamer &Streamer) const override;
+
MCFragment *findAssociatedFragment() const override {
return getSubExpr()->findAssociatedFragment();
}
@@ -86,6 +87,7 @@ public:
return isGpOff(Kind);
}
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSMCEXPR_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index aef9bd3a8e2a..9266f0e216d1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -20,7 +20,11 @@
#include "Mips.h"
#include "MipsELFStreamer.h"
#include "MipsMCNaCl.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
using namespace llvm;
@@ -38,14 +42,14 @@ class MipsNaClELFStreamer : public MipsELFStreamer {
public:
MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
- : MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
+ : MipsELFStreamer(Context, TAB, OS, Emitter) {}
- ~MipsNaClELFStreamer() override {}
+ ~MipsNaClELFStreamer() override = default;
private:
// Whether we started the sandboxing sequence for calls. Calls are bundled
// with branch delays and aligned to the bundle end.
- bool PendingCall;
+ bool PendingCall = false;
bool isIndirectJump(const MCInst &MI) {
if (MI.getOpcode() == Mips::JALR) {
@@ -135,8 +139,8 @@ private:
public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to mask dangerous instructions.
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
// Sandbox indirect jumps.
if (isIndirectJump(Inst)) {
if (PendingCall)
@@ -265,4 +269,4 @@ MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
return S;
}
-}
+} // end namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
index 24b602810d6e..74d5e4cc9841 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
@@ -1,4 +1,4 @@
-//===-- MipsOptionRecord.cpp - Abstraction for storing information --------===//
+//===- MipsOptionRecord.cpp - Abstraction for storing information ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,10 +7,16 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsOptionRecord.h"
+#include "MipsABIInfo.h"
#include "MipsELFStreamer.h"
+#include "MipsOptionRecord.h"
#include "MipsTargetStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+#include <cassert>
using namespace llvm;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 7f79eb400f59..2d4083b27ed1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsTargetStreamer.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MipsELFStreamer.h"
@@ -685,6 +686,17 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
// issues as well.
unsigned EFlags = MCA.getELFHeaderEFlags();
+ // FIXME: Fix a dependency issue by instantiating the ABI object to some
+ // default based off the triple. The triple doesn't describe the target
+ // fully, but any external user of the API that uses the MCTargetStreamer
+ // would otherwise crash on assertion failure.
+
+ ABI = MipsABIInfo(
+ STI.getTargetTriple().getArch() == Triple::ArchType::mipsel ||
+ STI.getTargetTriple().getArch() == Triple::ArchType::mips
+ ? MipsABIInfo::O32()
+ : MipsABIInfo::N64());
+
// Architecture
if (Features[Mips::FeatureMips64r6])
EFlags |= ELF::EF_MIPS_ARCH_64R6;
@@ -721,23 +733,18 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
if (Features[Mips::FeatureNaN2008])
EFlags |= ELF::EF_MIPS_NAN2008;
- // -mabicalls and -mplt are not implemented but we should act as if they were
- // given.
- EFlags |= ELF::EF_MIPS_CPIC;
-
MCA.setELFHeaderEFlags(EFlags);
}
void MipsTargetELFStreamer::emitLabel(MCSymbol *S) {
auto *Symbol = cast<MCSymbolELF>(S);
- if (!isMicroMipsEnabled())
- return;
getStreamer().getAssembler().registerSymbol(*Symbol);
uint8_t Type = Symbol->getType();
if (Type != ELF::STT_FUNC)
return;
- Symbol->setOther(ELF::STO_MIPS_MICROMIPS);
+ if (isMicroMipsEnabled())
+ Symbol->setOther(ELF::STO_MIPS_MICROMIPS);
}
void MipsTargetELFStreamer::finish() {
@@ -795,10 +802,13 @@ void MipsTargetELFStreamer::finish() {
} else if (Features[Mips::FeatureMips64r2] || Features[Mips::FeatureMips64])
EFlags |= ELF::EF_MIPS_32BITMODE;
- // If we've set the cpic eflag and we're n64, go ahead and set the pic
- // one as well.
- if (EFlags & ELF::EF_MIPS_CPIC && getABI().IsN64())
- EFlags |= ELF::EF_MIPS_PIC;
+ // -mplt is not implemented but we should act as if it was
+ // given.
+ if (!Features[Mips::FeatureNoABICalls])
+ EFlags |= ELF::EF_MIPS_CPIC;
+
+ if (Pic)
+ EFlags |= ELF::EF_MIPS_PIC | ELF::EF_MIPS_CPIC;
MCA.setELFHeaderEFlags(EFlags);
@@ -904,10 +914,10 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
const MCExpr *Size = MCBinaryExpr::createSub(
MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context),
ExprRef, Context);
- int64_t AbsSize;
- if (!Size->evaluateAsAbsolute(AbsSize, MCA))
- llvm_unreachable("Function size must be evaluatable as absolute");
- Size = MCConstantExpr::create(AbsSize, Context);
+
+ // The ELFObjectWriter can determine the absolute size as it has access to
+ // the layout information of the assembly file, so a size expression rather
+ // than an absolute value is ok here.
static_cast<MCSymbolELF *>(Sym)->setSize(Size);
}
diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td
index 05aad515da46..6b7f39e9dd79 100644
--- a/lib/Target/Mips/MicroMips64r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -475,29 +475,11 @@ defm : MaterializeImms<i64, ZERO_64, DADDIU_MM64R6, LUi64, ORi64>;
//
//===----------------------------------------------------------------------===//
-def : MipsPat<(MipsLo tglobaladdr:$in),
- (DADDIU_MM64R6 ZERO_64, tglobaladdr:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tblockaddress:$in),
- (DADDIU_MM64R6 ZERO_64, tblockaddress:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tjumptable:$in),
- (DADDIU_MM64R6 ZERO_64, tjumptable:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tconstpool:$in),
- (DADDIU_MM64R6 ZERO_64, tconstpool:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo tglobaltlsaddr:$in),
- (DADDIU_MM64R6 ZERO_64, tglobaltlsaddr:$in)>, ISA_MICROMIPS64R6;
-def : MipsPat<(MipsLo texternalsym:$in),
- (DADDIU_MM64R6 ZERO_64, texternalsym:$in)>, ISA_MICROMIPS64R6;
-
-def : MipsPat<(add GPR64:$hi, (MipsLo tglobaladdr:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tglobaladdr:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tblockaddress:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tblockaddress:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tjumptable:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tjumptable:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tconstpool:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tconstpool:$lo)>, ISA_MICROMIPS64R6;
-def : MipsPat<(add GPR64:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (DADDIU_MM64R6 GPR64:$hi, tglobaltlsaddr:$lo)>, ISA_MICROMIPS64R6;
+defm : MipsHiLoRelocs<LUi64, DADDIU_MM64R6, ZERO_64, GPR64Opnd>, SYM_32,
+ ISA_MICROMIPS64R6;
+
+defm : MipsHighestHigherHiLoRelocs<LUi64, DADDIU_MM64R6>, SYM_64,
+ ISA_MICROMIPS64R6;
def : MipsPat<(addc GPR64:$lhs, GPR64:$rhs),
(DADDU_MM64R6 GPR64:$lhs, GPR64:$rhs)>, ISA_MICROMIPS64R6;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index c0de9e7390a4..ee554bc7f69a 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -1136,12 +1136,6 @@ let Predicates = [InMicroMips] in {
def : MipsInstAlias<
"sgtu $rs, $rt",
(SLTu_MM GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
- def : MipsInstAlias<"slt $rs, $rt, $imm",
- (SLTi_MM GPR32Opnd:$rs, GPR32Opnd:$rt,
- simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<"sltu $rs, $rt, $imm",
- (SLTiu_MM GPR32Opnd:$rs, GPR32Opnd:$rt,
- simm32_relaxed:$imm), 0>;
def : MipsInstAlias<"sll $rd, $rt, $rs",
(SLLV_MM GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"sra $rd, $rt, $rs",
@@ -1163,18 +1157,21 @@ let Predicates = [InMicroMips] in {
def : MipsInstAlias<"rotr $rt, $imm",
(ROTR_MM GPR32Opnd:$rt, GPR32Opnd:$rt, uimm5:$imm), 0>;
def : MipsInstAlias<"syscall", (SYSCALL_MM 0), 1>;
- def : MipsInstAlias<"and $rs, $rt, $imm",
- (ANDi_MM GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
- def : MipsInstAlias<"and $rs, $imm",
- (ANDi_MM GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
- def : MipsInstAlias<"or $rs, $rt, $imm",
- (ORi_MM GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
- def : MipsInstAlias<"or $rs, $imm",
- (ORi_MM GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
- def : MipsInstAlias<"xor $rs, $rt, $imm",
- (XORi_MM GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
- def : MipsInstAlias<"xor $rs, $imm",
- (XORi_MM GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"add", ADDi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"addu", ADDiu_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"and", ANDi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"or", ORi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"slt", SLTi_MM>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"sltu", SLTiu_MM>;
+
def : MipsInstAlias<"not $rt, $rs",
(NOR_MM GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
def : MipsInstAlias<"not $rt",
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 670272d47e95..9615bc38bfce 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -156,6 +156,8 @@ def FeatureMips64r6 : SubtargetFeature<"mips64r6", "MipsArchVersion",
"Mips64r6 ISA Support [experimental]",
[FeatureMips32r6, FeatureMips64r5,
FeatureNaN2008]>;
+def FeatureSym32 : SubtargetFeature<"sym32", "HasSym32", "true",
+ "Symbols are 32 bit on Mips64">;
def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true",
"Mips16 mode">;
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 191006d6463c..a71b161b24cc 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -405,7 +405,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
"__mips16_ret_dc"
};
const char *Name = Helper[RV];
- AttributeSet A;
+ AttributeList A;
Value *Params[] = {RVal};
Modified = true;
//
@@ -414,13 +414,13 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
// during call setup, the proper call lowering to the helper
// functions will take place.
//
- A = A.addAttribute(C, AttributeSet::FunctionIndex,
+ A = A.addAttribute(C, AttributeList::FunctionIndex,
"__Mips16RetHelper");
- A = A.addAttribute(C, AttributeSet::FunctionIndex,
+ A = A.addAttribute(C, AttributeList::FunctionIndex,
Attribute::ReadNone);
- A = A.addAttribute(C, AttributeSet::FunctionIndex,
+ A = A.addAttribute(C, AttributeList::FunctionIndex,
Attribute::NoInline);
- Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
+ Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T));
CallInst::Create(F, Params, "", &I);
} else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
FunctionType *FT = CI->getFunctionType();
@@ -490,15 +490,15 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
// remove the use-soft-float attribute
//
static void removeUseSoftFloat(Function &F) {
- AttributeSet A;
+ AttributeList A;
DEBUG(errs() << "removing -use-soft-float\n");
- A = A.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
+ A = A.addAttribute(F.getContext(), AttributeList::FunctionIndex,
"use-soft-float", "false");
- F.removeAttributes(AttributeSet::FunctionIndex, A);
+ F.removeAttributes(AttributeList::FunctionIndex, A);
if (F.hasFnAttribute("use-soft-float")) {
DEBUG(errs() << "still has -use-soft-float\n");
}
- F.addAttributes(AttributeSet::FunctionIndex, A);
+ F.addAttributes(AttributeList::FunctionIndex, A);
}
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 021fb8678686..52bf690a8083 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -766,6 +766,7 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIM16Alu> {
let hasDelaySlot = 1;
let isTerminator=1;
let isBarrier=1;
+ let isReturn=1;
}
def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> {
@@ -773,6 +774,7 @@ def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> {
let isIndirectBranch = 1;
let isTerminator=1;
let isBarrier=1;
+ let isReturn=1;
}
def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIM16Alu> {
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 1b4d73b79895..3272319ad50f 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -917,6 +917,12 @@ def : MipsInstAlias<"jrc $rs", (JIC GPR32Opnd:$rs, 0), 1>, ISA_MIPS32R6, GPR_32;
let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"jalrc $rs", (JIALC GPR32Opnd:$rs, 0), 1>, ISA_MIPS32R6, GPR_32;
}
+
+def : MipsInstAlias<"div $rs, $rt", (DIV GPR32Opnd:$rs, GPR32Opnd:$rs,
+ GPR32Opnd:$rt)>, ISA_MIPS32R6;
+def : MipsInstAlias<"divu $rs, $rt", (DIVU GPR32Opnd:$rs, GPR32Opnd:$rs,
+ GPR32Opnd:$rt)>, ISA_MIPS32R6;
+
//===----------------------------------------------------------------------===//
//
// Patterns and Pseudo Instructions
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 521e22fb7992..99025fe1341d 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -326,6 +326,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
EXT_FM<5>, ISA_MIPS64R2;
}
+let isCodeGenOnly = 1, AdditionalPredicates = [NotInMicroMips] in {
+ def DEXT64_32 : InstSE<(outs GPR64Opnd:$rt),
+ (ins GPR32Opnd:$rs, uimm5_report_uimm6:$pos,
+ uimm5_plus1:$size),
+ "dext $rt, $rs, $pos, $size", [], II_EXT, FrmR, "dext">,
+ EXT_FM<3>, ISA_MIPS64R2;
+}
+
let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
def DSLL64_32 : FR<0x00, 0x3c, (outs GPR64:$rd), (ins GPR32:$rt),
"dsll\t$rd, $rt, 32", [], II_DSLL>;
@@ -356,11 +364,11 @@ class Count1s<string opstr, RegisterOperand RO>:
let TwoOperandAliasConstraint = "$rd = $rs";
}
-class ExtsCins<string opstr, InstrItinClass itin,
- SDPatternOperator Op = null_frag>:
- InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, uimm5:$pos, uimm5:$lenm1),
- !strconcat(opstr, " $rt, $rs, $pos, $lenm1"),
- [(set GPR64Opnd:$rt, (Op GPR64Opnd:$rs, imm:$pos, imm:$lenm1))],
+class ExtsCins<string opstr, InstrItinClass itin, RegisterOperand RO,
+ PatFrag PosImm, SDPatternOperator Op = null_frag>:
+ InstSE<(outs RO:$rt), (ins RO:$rs, uimm5:$pos, uimm5:$lenm1),
+ !strconcat(opstr, "\t$rt, $rs, $pos, $lenm1"),
+ [(set RO:$rt, (Op RO:$rs, PosImm:$pos, imm:$lenm1))],
itin, FrmR, opstr> {
let TwoOperandAliasConstraint = "$rt = $rs";
}
@@ -424,13 +432,28 @@ def DMUL : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>,
let Defs = [HI0, LO0, P0, P1, P2];
}
-// Extract a signed bit field /+32
-def EXTS : ExtsCins<"exts", II_EXT>, EXTS_FM<0x3a>, ASE_CNMIPS;
-def EXTS32: ExtsCins<"exts32", II_EXT>, EXTS_FM<0x3b>, ASE_CNMIPS;
-
-// Clear and insert a bit field /+32
-def CINS : ExtsCins<"cins", II_INS>, EXTS_FM<0x32>, ASE_CNMIPS;
-def CINS32: ExtsCins<"cins32", II_INS>, EXTS_FM<0x33>, ASE_CNMIPS;
+let AdditionalPredicates = [NotInMicroMips] in {
+ // Extract a signed bit field /+32
+ def EXTS : ExtsCins<"exts", II_EXT, GPR64Opnd, immZExt5>, EXTS_FM<0x3a>,
+ ASE_MIPS64_CNMIPS;
+ def EXTS32: ExtsCins<"exts32", II_EXT, GPR64Opnd, immZExt5Plus32>,
+ EXTS_FM<0x3b>, ASE_MIPS64_CNMIPS;
+
+ // Clear and insert a bit field /+32
+ def CINS : ExtsCins<"cins", II_INS, GPR64Opnd, immZExt5, MipsCIns>,
+ EXTS_FM<0x32>, ASE_MIPS64_CNMIPS;
+ def CINS32: ExtsCins<"cins32", II_INS, GPR64Opnd, immZExt5Plus32, MipsCIns>,
+ EXTS_FM<0x33>, ASE_MIPS64_CNMIPS;
+ let isCodeGenOnly = 1 in {
+ def CINS_i32 : ExtsCins<"cins", II_INS, GPR32Opnd, immZExt5, MipsCIns>,
+ EXTS_FM<0x32>, ASE_MIPS64_CNMIPS;
+ def CINS64_32 :InstSE<(outs GPR64Opnd:$rt),
+ (ins GPR32Opnd:$rs, uimm5:$pos, uimm5:$lenm1),
+ "cins\t$rt, $rs, $pos, $lenm1", [], II_INS, FrmR,
+ "cins">,
+ EXTS_FM<0x32>, ASE_MIPS64_CNMIPS;
+ }
+}
// Move to multiplier/product register
def MTM0 : MoveToLOHI<"mtm0", GPR64Opnd, [MPL0, P0, P1, P2]>, MTMR_FM<0x08>,
@@ -513,41 +536,87 @@ def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
// hi/lo relocs
-def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
-def : MipsPat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
-def : MipsPat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
-def : MipsPat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
-def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
-def : MipsPat<(MipsHi texternalsym:$in), (LUi64 texternalsym:$in)>;
+let AdditionalPredicates = [NotInMicroMips] in
+defm : MipsHiLoRelocs<LUi64, DADDiu, ZERO_64, GPR64Opnd>, SYM_32;
+
+def : MipsPat<(MipsGotHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : MipsPat<(MipsGotHi texternalsym:$in), (LUi64 texternalsym:$in)>;
+
+multiclass MipsHighestHigherHiLoRelocs<Instruction Lui, Instruction Daddiu> {
+ def : MipsPat<(MipsJmpLink (i64 texternalsym:$dst)),
+ (JAL texternalsym:$dst)>;
+ def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)),
+ (Lui tglobaladdr:$in)>;
+ def : MipsPat<(MipsHighest (i64 tblockaddress:$in)),
+ (Lui tblockaddress:$in)>;
+ def : MipsPat<(MipsHighest (i64 tjumptable:$in)),
+ (Lui tjumptable:$in)>;
+ def : MipsPat<(MipsHighest (i64 tconstpool:$in)),
+ (Lui tconstpool:$in)>;
+ def : MipsPat<(MipsHighest (i64 tglobaltlsaddr:$in)),
+ (Lui tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsHighest (i64 texternalsym:$in)),
+ (Lui texternalsym:$in)>;
+
+ def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)),
+ (Daddiu ZERO_64, tglobaladdr:$in)>;
+ def : MipsPat<(MipsHigher (i64 tblockaddress:$in)),
+ (Daddiu ZERO_64, tblockaddress:$in)>;
+ def : MipsPat<(MipsHigher (i64 tjumptable:$in)),
+ (Daddiu ZERO_64, tjumptable:$in)>;
+ def : MipsPat<(MipsHigher (i64 tconstpool:$in)),
+ (Daddiu ZERO_64, tconstpool:$in)>;
+ def : MipsPat<(MipsHigher (i64 tglobaltlsaddr:$in)),
+ (Daddiu ZERO_64, tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsHigher (i64 texternalsym:$in)),
+ (Daddiu ZERO_64, texternalsym:$in)>;
+
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tblockaddress:$lo))),
+ (Daddiu GPR64:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tjumptable:$lo))),
+ (Daddiu GPR64:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tconstpool:$lo))),
+ (Daddiu GPR64:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaltlsaddr:$lo))),
+ (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>;
+
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tblockaddress:$lo))),
+ (Daddiu GPR64:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tjumptable:$lo))),
+ (Daddiu GPR64:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tconstpool:$lo))),
+ (Daddiu GPR64:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaltlsaddr:$lo))),
+ (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>;
+
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))),
+ (Daddiu GPR64:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tblockaddress:$lo))),
+ (Daddiu GPR64:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tjumptable:$lo))),
+ (Daddiu GPR64:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tconstpool:$lo))),
+ (Daddiu GPR64:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaltlsaddr:$lo))),
+ (Daddiu GPR64:$hi, tglobaltlsaddr:$lo)>;
+
+}
+
+// highest/higher/hi/lo relocs
+let AdditionalPredicates = [NotInMicroMips] in
+defm : MipsHighestHigherHiLoRelocs<LUi64, DADDiu>, SYM_64;
+
+def : WrapperPat<tglobaladdr, DADDiu, GPR64>;
+def : WrapperPat<tconstpool, DADDiu, GPR64>;
+def : WrapperPat<texternalsym, DADDiu, GPR64>;
+def : WrapperPat<tblockaddress, DADDiu, GPR64>;
+def : WrapperPat<tjumptable, DADDiu, GPR64>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, GPR64>;
-let AdditionalPredicates = [NotInMicroMips] in {
- def : MipsPat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
- def : MipsPat<(MipsLo tblockaddress:$in),
- (DADDiu ZERO_64, tblockaddress:$in)>;
- def : MipsPat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
- def : MipsPat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
- def : MipsPat<(MipsLo tglobaltlsaddr:$in),
- (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
- def : MipsPat<(MipsLo texternalsym:$in), (DADDiu ZERO_64, texternalsym:$in)>;
-
- def : MipsPat<(add GPR64:$hi, (MipsLo tglobaladdr:$lo)),
- (DADDiu GPR64:$hi, tglobaladdr:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tblockaddress:$lo)),
- (DADDiu GPR64:$hi, tblockaddress:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tjumptable:$lo)),
- (DADDiu GPR64:$hi, tjumptable:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tconstpool:$lo)),
- (DADDiu GPR64:$hi, tconstpool:$lo)>;
- def : MipsPat<(add GPR64:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (DADDiu GPR64:$hi, tglobaltlsaddr:$lo)>;
-
- def : WrapperPat<tglobaladdr, DADDiu, GPR64>;
- def : WrapperPat<tconstpool, DADDiu, GPR64>;
- def : WrapperPat<texternalsym, DADDiu, GPR64>;
- def : WrapperPat<tblockaddress, DADDiu, GPR64>;
- def : WrapperPat<tjumptable, DADDiu, GPR64>;
- def : WrapperPat<tglobaltlsaddr, DADDiu, GPR64>;
-}
defm : BrcondPats<GPR64, BEQ64, BEQ, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
ZERO_64>;
@@ -600,6 +669,14 @@ def : MipsPat<(i64 (anyext GPR32:$src)),
def : MipsPat<(i64 (zext GPR32:$src)), (DSRL (DSLL64_32 GPR32:$src), 32)>;
def : MipsPat<(i64 (sext GPR32:$src)), (SLL64_32 GPR32:$src)>;
+let AdditionalPredicates = [NotInMicroMips] in {
+ def : MipsPat<(i64 (zext GPR32:$src)), (DEXT64_32 GPR32:$src, 0, 32)>,
+ ISA_MIPS64R2;
+ def : MipsPat<(i64 (zext (i32 (shl GPR32:$rt, immZExt5:$imm)))),
+ (CINS64_32 GPR32:$rt, imm:$imm, (immZExt5To31 imm:$imm))>,
+ ASE_MIPS64_CNMIPS;
+}
+
// Sign extend in register
def : MipsPat<(i64 (sext_inreg GPR64:$src, i32)),
(SLL64_64 GPR64:$src)>;
@@ -661,6 +738,15 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"daddu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
0>, ISA_MIPS3;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"and", ANDi64, GPR64Opnd, imm64>,
+ GPR_64;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"or", ORi64, GPR64Opnd, imm64>,
+ GPR_64;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi64, GPR64Opnd, imm64>,
+ GPR_64;
}
def : MipsInstAlias<"dsll $rd, $rt, $rs",
(DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
@@ -741,21 +827,21 @@ def : MipsInstAlias<"bbit1 $rs, $p, $offset",
def : MipsInstAlias<"exts $rt, $rs, $pos, $lenm1",
(EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
def : MipsInstAlias<"exts $rt, $pos, $lenm1",
(EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
// cins with $pos 32-63 in converted to cins32 with $pos 0-31
def : MipsInstAlias<"cins $rt, $rs, $pos, $lenm1",
(CINS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
def : MipsInstAlias<"cins $rt, $pos, $lenm1",
(CINS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
- ASE_CNMIPS;
+ ASE_MIPS64_CNMIPS;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
@@ -770,3 +856,81 @@ def LoadAddrReg64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins mem:$addr),
"dla\t$rt, $addr">;
def LoadAddrImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins imm64:$imm64),
"dla\t$rt, $imm64">;
+
+def DMULImmMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ simm32_relaxed:$imm),
+ "dmul\t$rs, $rt, $imm">,
+ ISA_MIPS3_NOT_32R6_64R6;
+def DMULOMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ GPR64Opnd:$rd),
+ "dmulo\t$rs, $rt, $rd">,
+ ISA_MIPS3_NOT_32R6_64R6;
+def DMULOUMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ GPR64Opnd:$rd),
+ "dmulou\t$rs, $rt, $rd">,
+ ISA_MIPS3_NOT_32R6_64R6;
+
+def DMULMacro : MipsAsmPseudoInst<(outs), (ins GPR64Opnd:$rs, GPR64Opnd:$rt,
+ GPR64Opnd:$rd),
+ "dmul\t$rs, $rt, $rd"> {
+ let InsnPredicates = [HasMips3, NotMips64r6, NotCnMips];
+}
+
+let AdditionalPredicates = [NotInMicroMips] in {
+ def DSDivMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+ "ddiv\t$rd, $rs, $rt">,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def DSDivIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "ddiv\t$rd, $rs, $imm">,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def DUDivMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, GPR64Opnd:$rt),
+ "ddivu\t$rd, $rs, $rt">,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def DUDivIMacro : MipsAsmPseudoInst<(outs GPR64Opnd:$rd),
+ (ins GPR64Opnd:$rs, imm64:$imm),
+ "ddivu\t$rd, $rs, $imm">,
+ ISA_MIPS3_NOT_32R6_64R6;
+
+ // GAS expands 'div' and 'ddiv' differently when the destination
+ // register is $zero and the instruction is in the two operand
+ // form. 'ddiv' gets expanded, while 'div' is not expanded.
+
+ def : MipsInstAlias<"ddiv $rs, $rt", (DSDivMacro GPR64Opnd:$rs,
+ GPR64Opnd:$rs,
+ GPR64Opnd:$rt), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def : MipsInstAlias<"ddiv $rd, $imm", (DSDivIMacro GPR64Opnd:$rd,
+ GPR64Opnd:$rd,
+ imm64:$imm), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+
+ // GAS expands 'divu' and 'ddivu' differently when the destination
+ // register is $zero and the instruction is in the two operand
+ // form. 'ddivu' gets expanded, while 'divu' is not expanded.
+
+ def : MipsInstAlias<"ddivu $rt, $rs", (DUDivMacro GPR64Opnd:$rt,
+ GPR64Opnd:$rt,
+ GPR64Opnd:$rs), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+ def : MipsInstAlias<"ddivu $rd, $imm", (DUDivIMacro GPR64Opnd:$rd,
+ GPR64Opnd:$rd,
+ imm64:$imm), 0>,
+ ISA_MIPS3_NOT_32R6_64R6;
+}
+
+def NORImm64 : NORIMM_DESC_BASE<GPR64Opnd, imm64>, GPR_64;
+def : MipsInstAlias<"nor\t$rs, $imm", (NORImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
+ imm64:$imm)>, GPR_64;
+def SLTImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs),
+ (ins GPR64Opnd:$rt, imm64:$imm),
+ "slt\t$rs, $rt, $imm">, GPR_64;
+def : MipsInstAlias<"slt\t$rs, $imm", (SLTImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
+ imm64:$imm)>, GPR_64;
+def SLTUImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rs),
+ (ins GPR64Opnd:$rt, imm64:$imm),
+ "sltu\t$rs, $rt, $imm">, GPR_64;
+def : MipsInstAlias<"sltu\t$rs, $imm", (SLTUImm64 GPR64Opnd:$rs, GPR64Opnd:$rs,
+ imm64:$imm)>, GPR_64;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 04d6529a073d..2a9d96205eb9 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -79,6 +80,9 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
NaClAlignIndirectJumpTargets(MF);
AsmPrinter::runOnMachineFunction(MF);
+
+ EmitXRayTable();
+
return true;
}
@@ -132,6 +136,7 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MipsTargetStreamer &TS = getTargetStreamer();
+ unsigned Opc = MI->getOpcode();
TS.forbidModuleDirective();
if (MI->isDebugValue()) {
@@ -143,20 +148,20 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
// If we just ended a constant pool, mark it as such.
- if (InConstantPool && MI->getOpcode() != Mips::CONSTPOOL_ENTRY) {
+ if (InConstantPool && Opc != Mips::CONSTPOOL_ENTRY) {
OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
InConstantPool = false;
}
- if (MI->getOpcode() == Mips::CONSTPOOL_ENTRY) {
+ if (Opc == Mips::CONSTPOOL_ENTRY) {
// CONSTPOOL_ENTRY - This instruction represents a floating
- //constant pool in the function. The first operand is the ID#
+ // constant pool in the function. The first operand is the ID#
// for this instruction, the second is the index into the
// MachineConstantPool that this is, the third is the size in
// bytes of this constant pool entry.
// The required alignment is specified on the basic block holding this MI.
//
unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
- unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
+ unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex();
// If this is the first entry of the pool, mark it.
if (!InConstantPool) {
@@ -174,6 +179,17 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ switch (Opc) {
+ case Mips::PATCHABLE_FUNCTION_ENTER:
+ LowerPATCHABLE_FUNCTION_ENTER(*MI);
+ return;
+ case Mips::PATCHABLE_FUNCTION_EXIT:
+ LowerPATCHABLE_FUNCTION_EXIT(*MI);
+ return;
+ case Mips::PATCHABLE_TAIL_CALL:
+ LowerPATCHABLE_TAIL_CALL(*MI);
+ return;
+ }
MachineBasicBlock::const_instr_iterator I = MI->getIterator();
MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
@@ -574,6 +590,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
case MipsII::MO_GOT: O << "%got("; break;
case MipsII::MO_ABS_HI: O << "%hi("; break;
case MipsII::MO_ABS_LO: O << "%lo("; break;
+ case MipsII::MO_HIGHER: O << "%higher("; break;
+ case MipsII::MO_HIGHEST: O << "%highest(("; break;
case MipsII::MO_TLSGD: O << "%tlsgd("; break;
case MipsII::MO_GOTTPREL: O << "%gottprel("; break;
case MipsII::MO_TPREL_HI: O << "%tprel_hi("; break;
@@ -698,7 +716,7 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// Ideally it should test for properties of the ABI and not the ABI
// itself.
// For the moment, I'm only correcting enough to make MIPS-IV work.
- if (!isPositionIndependent() && !ABI.IsN64())
+ if (!isPositionIndependent() && STI.hasSym32())
TS.emitDirectiveOptionPic0();
}
@@ -1032,6 +1050,149 @@ void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer->SwitchSection(OutContext.getObjectFileInfo()->getTextSection());
}
+void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
+ const uint8_t NoopsInSledCount = Subtarget->isGP64bit() ? 15 : 11;
+ // For mips32 we want to emit the following pattern:
+ //
+ // .Lxray_sled_N:
+ // ALIGN
+ // B .tmpN
+ // 11 NOP instructions (44 bytes)
+ // ADDIU T9, T9, 52
+ // .tmpN
+ //
+ // We need the 44 bytes (11 instructions) because at runtime, we'd
+ // be patching over the full 48 bytes (12 instructions) with the following
+ // pattern:
+ //
+ // ADDIU SP, SP, -8
+ // NOP
+ // SW RA, 4(SP)
+ // SW T9, 0(SP)
+ // LUI T9, %hi(__xray_FunctionEntry/Exit)
+ // ORI T9, T9, %lo(__xray_FunctionEntry/Exit)
+ // LUI T0, %hi(function_id)
+ // JALR T9
+ // ORI T0, T0, %lo(function_id)
+ // LW T9, 0(SP)
+ // LW RA, 4(SP)
+ // ADDIU SP, SP, 8
+ //
+ // We add 52 bytes to t9 because we want to adjust the function pointer to
+ // the actual start of function i.e. the address just after the noop sled.
+ // We do this because gp displacement relocation is emitted at the start of
+ // of the function i.e after the nop sled and to correctly calculate the
+ // global offset table address, t9 must hold the address of the instruction
+ // containing the gp displacement relocation.
+ // FIXME: Is this correct for the static relocation model?
+ //
+ // For mips64 we want to emit the following pattern:
+ //
+ // .Lxray_sled_N:
+ // ALIGN
+ // B .tmpN
+ // 15 NOP instructions (60 bytes)
+ // .tmpN
+ //
+ // We need the 60 bytes (15 instructions) because at runtime, we'd
+ // be patching over the full 64 bytes (16 instructions) with the following
+ // pattern:
+ //
+ // DADDIU SP, SP, -16
+ // NOP
+ // SD RA, 8(SP)
+ // SD T9, 0(SP)
+ // LUI T9, %highest(__xray_FunctionEntry/Exit)
+ // ORI T9, T9, %higher(__xray_FunctionEntry/Exit)
+ // DSLL T9, T9, 16
+ // ORI T9, T9, %hi(__xray_FunctionEntry/Exit)
+ // DSLL T9, T9, 16
+ // ORI T9, T9, %lo(__xray_FunctionEntry/Exit)
+ // LUI T0, %hi(function_id)
+ // JALR T9
+ // ADDIU T0, T0, %lo(function_id)
+ // LD T9, 0(SP)
+ // LD RA, 8(SP)
+ // DADDIU SP, SP, 16
+ //
+ OutStreamer->EmitCodeAlignment(4);
+ auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ OutStreamer->EmitLabel(CurSled);
+ auto Target = OutContext.createTempSymbol();
+
+ // Emit "B .tmpN" instruction, which jumps over the nop sled to the actual
+ // start of function
+ const MCExpr *TargetExpr = MCSymbolRefExpr::create(
+ Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Mips::BEQ)
+ .addReg(Mips::ZERO)
+ .addReg(Mips::ZERO)
+ .addExpr(TargetExpr));
+
+ for (int8_t I = 0; I < NoopsInSledCount; I++)
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Mips::SLL)
+ .addReg(Mips::ZERO)
+ .addReg(Mips::ZERO)
+ .addImm(0));
+
+ OutStreamer->EmitLabel(Target);
+
+ if (!Subtarget->isGP64bit()) {
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(Mips::ADDiu)
+ .addReg(Mips::T9)
+ .addReg(Mips::T9)
+ .addImm(0x34));
+ }
+
+ recordSled(CurSled, MI, Kind);
+}
+
+void MipsAsmPrinter::EmitXRayTable() {
+ if (Sleds.empty())
+ return;
+ if (Subtarget->isTargetELF()) {
+ auto PrevSection = OutStreamer->getCurrentSectionOnly();
+ auto Fn = MF->getFunction();
+ MCSection *Section;
+
+ if (Fn->hasComdat())
+ Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+ Fn->getComdat()->getName());
+ else
+ Section =
+ OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC, 0, CurrentFnSym->getName());
+
+ OutStreamer->SwitchSection(Section);
+ for (const auto &Sled : Sleds) {
+ OutStreamer->EmitSymbolValue(Sled.Sled, Subtarget->isGP64bit() ? 8 : 4);
+ OutStreamer->EmitSymbolValue(CurrentFnSym, Subtarget->isGP64bit() ? 8 : 4);
+ auto Kind = static_cast<uint8_t>(Sled.Kind);
+ OutStreamer->EmitBytes(
+ StringRef(reinterpret_cast<const char *>(&Kind), 1));
+ OutStreamer->EmitBytes(
+ StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+ OutStreamer->EmitZeros(Subtarget->isGP64bit() ? 14 : 6);
+ }
+ OutStreamer->SwitchSection(PrevSection);
+ }
+ Sleds.clear();
+}
+
+void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void MipsAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void MipsAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) {
+ EmitSled(MI, SledKind::TAIL_CALL);
+}
+
void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
raw_ostream &OS) {
// TODO: implement
@@ -1039,7 +1200,7 @@ void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// Emit .dtprelword or .dtpreldword directive
// and value for debug thread local expression.
-void MipsAsmPrinter::EmitDebugValue(const MCExpr *Value,
+void MipsAsmPrinter::EmitDebugThreadLocal(const MCExpr *Value,
unsigned Size) const {
switch (Size) {
case 4:
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index c5cf5241c236..4699e1b0bd3b 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -35,7 +35,21 @@ class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
+ //===------------------------------------------------------------------===//
+ // XRay implementation
+ //===------------------------------------------------------------------===//
+public:
+ // XRay-specific lowering for Mips.
+ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+ void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+ // Helper function that emits the XRay sleds we've collected for a particular
+ // function.
+ void EmitXRayTable();
+
private:
+ void EmitSled(const MachineInstr &MI, SledKind Kind);
+
// tblgen'erated function.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
@@ -140,7 +154,7 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
- void EmitDebugValue(const MCExpr *Value, unsigned Size) const override;
+ void EmitDebugThreadLocal(const MCExpr *Value, unsigned Size) const override;
};
}
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 08b8ed31ccbb..026f66a1c0e1 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
//
-//
// This pass is used to make Pc relative loads of constants.
// For now, only Mips16 will use this.
//
@@ -19,30 +18,43 @@
// This can be particularly helpful in static relocation mode for embedded
// non-linux targets.
//
-//
+//===----------------------------------------------------------------------===//
#include "Mips.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips16InstrInfo.h"
#include "MipsMachineFunction.h"
-#include "MipsTargetMachine.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <vector>
using namespace llvm;
@@ -58,7 +70,6 @@ static cl::opt<bool>
AlignConstantIslands("mips-align-constant-islands", cl::Hidden, cl::init(true),
cl::desc("Align constant islands in code"));
-
// Rather than do make check tests with huge amounts of code, we force
// the test to use this amount.
//
@@ -178,7 +189,6 @@ static unsigned int branchMaxOffsets(unsigned int Opcode) {
namespace {
-
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
@@ -195,7 +205,6 @@ namespace {
/// tracks a list of users.
class MipsConstantIslands : public MachineFunctionPass {
-
/// BasicBlockInfo - Information about the offset and size of a single
/// basic block.
struct BasicBlockInfo {
@@ -208,14 +217,16 @@ namespace {
///
/// Because worst case padding is used, the computed offset of an aligned
/// block may not actually be aligned.
- unsigned Offset;
+ unsigned Offset = 0;
/// Size - Size of the basic block in bytes. If the block contains
/// inline assembly, this is a worst case estimate.
///
/// The size does not include any alignment padding whether from the
/// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
+ unsigned Size = 0;
+
+ BasicBlockInfo() = default;
// FIXME: ignore LogAlign for this patch
//
@@ -223,9 +234,6 @@ namespace {
unsigned PO = Offset + Size;
return PO;
}
-
- BasicBlockInfo() : Offset(0), Size(0) {}
-
};
std::vector<BasicBlockInfo> BBInfo;
@@ -257,13 +265,16 @@ namespace {
MachineInstr *MI;
MachineInstr *CPEMI;
MachineBasicBlock *HighWaterMark;
+
private:
unsigned MaxDisp;
unsigned LongFormMaxDisp; // mips16 has 16/32 bit instructions
// with different displacements
unsigned LongFormOpcode;
+
public:
bool NegOk;
+
CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
bool neg,
unsigned longformmaxdisp, unsigned longformopcode)
@@ -272,18 +283,22 @@ namespace {
NegOk(neg){
HighWaterMark = CPEMI->getParent();
}
+
/// getMaxDisp - Returns the maximum displacement supported by MI.
unsigned getMaxDisp() const {
unsigned xMaxDisp = ConstantIslandsSmallOffset?
ConstantIslandsSmallOffset: MaxDisp;
return xMaxDisp;
}
+
void setMaxDisp(unsigned val) {
MaxDisp = val;
}
+
unsigned getLongFormMaxDisp() const {
return LongFormMaxDisp;
}
+
unsigned getLongFormOpcode() const {
return LongFormOpcode;
}
@@ -300,6 +315,7 @@ namespace {
MachineInstr *CPEMI;
unsigned CPI;
unsigned RefCount;
+
CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
: CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
};
@@ -309,7 +325,7 @@ namespace {
/// existed upon entry to this pass), it keeps a vector of entries.
/// Original elements are cloned as we go along; the clones are
/// put in the vector of the original element, but have distinct CPIs.
- std::vector<std::vector<CPEntry> > CPEntries;
+ std::vector<std::vector<CPEntry>> CPEntries;
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
@@ -320,6 +336,7 @@ namespace {
unsigned MaxDisp : 31;
bool isCond : 1;
int UncondBr;
+
ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
: MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
};
@@ -332,29 +349,27 @@ namespace {
/// the branch fix up pass.
bool HasFarJump;
- const MipsSubtarget *STI;
+ const MipsSubtarget *STI = nullptr;
const Mips16InstrInfo *TII;
MipsFunctionInfo *MFI;
- MachineFunction *MF;
- MachineConstantPool *MCP;
+ MachineFunction *MF = nullptr;
+ MachineConstantPool *MCP = nullptr;
unsigned PICLabelUId;
- bool PrescannedForConstants;
+ bool PrescannedForConstants = false;
void initPICLabelUId(unsigned UId) {
PICLabelUId = UId;
}
-
unsigned createPICLabelUId() {
return PICLabelUId++;
}
public:
static char ID;
- MipsConstantIslands()
- : MachineFunctionPass(ID), STI(nullptr), MF(nullptr), MCP(nullptr),
- PrescannedForConstants(false) {}
+
+ MipsConstantIslands() : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "Mips Constant Islands"; }
@@ -403,13 +418,11 @@ namespace {
bool fixupUnconditionalBr(ImmBranch &Br);
void prescanForConstants();
-
- private:
-
};
char MipsConstantIslands::ID = 0;
-} // end of anonymous namespace
+
+} // end anonymous namespace
bool MipsConstantIslands::isOffsetInRange
(unsigned UserOffset, unsigned TrialOffset,
@@ -417,20 +430,17 @@ bool MipsConstantIslands::isOffsetInRange
return isOffsetInRange(UserOffset, TrialOffset,
U.getMaxDisp(), U.NegOk);
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// print block size and offset information - debugging
-void MipsConstantIslands::dumpBBs() {
- DEBUG({
- for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
- const BasicBlockInfo &BBI = BBInfo[J];
- dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
- << format(" size=%#x\n", BBInfo[J].Size);
- }
- });
-}
-/// Returns a pass that converts branches to long branches.
-FunctionPass *llvm::createMipsConstantIslandPass() {
- return new MipsConstantIslands();
+LLVM_DUMP_METHOD void MipsConstantIslands::dumpBBs() {
+ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+ const BasicBlockInfo &BBI = BBInfo[J];
+ dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+ << format(" size=%#x\n", BBInfo[J].Size);
+ }
}
+#endif
bool MipsConstantIslands::runOnMachineFunction(MachineFunction &mf) {
// The intention is for this to be a mips16 only pass for now
@@ -527,7 +537,6 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
MF->push_back(BB);
-
// MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
@@ -647,7 +656,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
computeBlockSize(&*I);
-
// Compute block offsets.
adjustBBOffsetsAfter(&MF->front());
@@ -737,7 +745,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
if (Opc == Mips::CONSTPOOL_ENTRY)
continue;
-
// Scan the instructions for constant pool operands.
for (unsigned op = 0, e = MI.getNumOperands(); op != e; ++op)
if (MI.getOperand(op).isCPI()) {
@@ -784,12 +791,9 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Instructions can only use one CP entry, don't bother scanning the
// rest of the operands.
break;
-
}
-
}
}
-
}
/// computeBlockSize - Compute the size and some alignment information for MBB.
@@ -921,8 +925,6 @@ MipsConstantIslands::splitBlockBeforeInstr(MachineInstr &MI) {
return NewBB;
}
-
-
/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
/// reference) is within MaxDisp of TrialOffset (a proposed location of a
/// constant pool entry).
@@ -1337,7 +1339,6 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
if (result==1) return false;
else if (result==2) return true;
-
// Look for water where we can place this CPE.
MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
MachineBasicBlock *NewMBB;
@@ -1371,7 +1372,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
// it. Check for this so it will be removed from the WaterList.
// Also remove any entry from NewWaterList.
MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
- IP = find(WaterList, WaterBB);
+ IP = llvm::find(WaterList, WaterBB);
if (IP != WaterList.end())
NewWaterList.erase(WaterBB);
@@ -1473,9 +1474,7 @@ bool MipsConstantIslands::removeUnusedCPEntries() {
/// specific BB can fit in MI's displacement field.
bool MipsConstantIslands::isBBInRange
(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned MaxDisp) {
-
-unsigned PCAdj = 4;
-
+ unsigned PCAdj = 4;
unsigned BrOffset = getOffsetOf(MI) + PCAdj;
unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
@@ -1553,7 +1552,6 @@ MipsConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
return true;
}
-
/// fixupConditionalBr - Fix up a conditional branch whose destination is too
/// far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
@@ -1614,7 +1612,6 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
}
}
-
if (NeedSplit) {
splitBlockBeforeInstr(*MI);
// No need for the branch to the next block. We're adding an unconditional
@@ -1654,7 +1651,6 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
return true;
}
-
void MipsConstantIslands::prescanForConstants() {
unsigned J = 0;
(void)J;
@@ -1667,11 +1663,11 @@ void MipsConstantIslands::prescanForConstants() {
PrescannedForConstants = true;
DEBUG(dbgs() << "constant island constant " << *I << "\n");
J = I->getNumOperands();
- DEBUG(dbgs() << "num operands " << J << "\n");
+ DEBUG(dbgs() << "num operands " << J << "\n");
MachineOperand& Literal = I->getOperand(1);
if (Literal.isImm()) {
int64_t V = Literal.getImm();
- DEBUG(dbgs() << "literal " << V << "\n");
+ DEBUG(dbgs() << "literal " << V << "\n");
Type *Int32Ty =
Type::getInt32Ty(MF->getFunction()->getContext());
const Constant *C = ConstantInt::get(Int32Ty, V);
@@ -1692,3 +1688,8 @@ void MipsConstantIslands::prescanForConstants() {
}
}
}
+
+/// Returns a pass that converts branches to long branches.
+FunctionPass *llvm::createMipsConstantIslandPass() {
+ return new MipsConstantIslands();
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index c821084f68cf..ae58c26e145a 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -14,21 +14,39 @@
#include "MCTargetDesc/MipsMCNaCl.h"
#include "Mips.h"
#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <utility>
using namespace llvm;
@@ -84,6 +102,7 @@ static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy(
);
namespace {
+
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
typedef SmallDenseMap<MachineBasicBlock*, MachineInstr*, 2> BB2BrMap;
@@ -91,6 +110,7 @@ namespace {
class RegDefsUses {
public:
RegDefsUses(const TargetRegisterInfo &TRI);
+
void init(const MachineInstr &MI);
/// This function sets all caller-saved registers in Defs.
@@ -120,18 +140,18 @@ namespace {
/// Base class for inspecting loads and stores.
class InspectMemInstr {
public:
- InspectMemInstr(bool ForbidMemInstr_)
- : OrigSeenLoad(false), OrigSeenStore(false), SeenLoad(false),
- SeenStore(false), ForbidMemInstr(ForbidMemInstr_) {}
+ InspectMemInstr(bool ForbidMemInstr_) : ForbidMemInstr(ForbidMemInstr_) {}
+ virtual ~InspectMemInstr() = default;
/// Return true if MI cannot be moved to delay slot.
bool hasHazard(const MachineInstr &MI);
- virtual ~InspectMemInstr() {}
-
protected:
/// Flags indicating whether loads or stores have been seen.
- bool OrigSeenLoad, OrigSeenStore, SeenLoad, SeenStore;
+ bool OrigSeenLoad = false;
+ bool OrigSeenStore = false;
+ bool SeenLoad = false;
+ bool SeenStore = false;
/// Memory instructions are not allowed to move to delay slot if this flag
/// is true.
@@ -145,6 +165,7 @@ namespace {
class NoMemInstr : public InspectMemInstr {
public:
NoMemInstr() : InspectMemInstr(true) {}
+
private:
bool hasHazard_(const MachineInstr &MI) override { return true; }
};
@@ -153,6 +174,7 @@ namespace {
class LoadFromStackOrConst : public InspectMemInstr {
public:
LoadFromStackOrConst() : InspectMemInstr(false) {}
+
private:
bool hasHazard_(const MachineInstr &MI) override;
};
@@ -183,7 +205,8 @@ namespace {
/// Flags indicating whether loads or stores with no underlying objects have
/// been seen.
- bool SeenNoObjLoad, SeenNoObjStore;
+ bool SeenNoObjLoad = false;
+ bool SeenNoObjStore = false;
};
class Filler : public MachineFunctionPass {
@@ -271,8 +294,10 @@ namespace {
static char ID;
};
+
char Filler::ID = 0;
-} // end of anonymous namespace
+
+} // end anonymous namespace
static bool hasUnoccupiedSlot(const MachineInstr *MI) {
return MI->hasDelaySlot() && !MI->isBundledWithSucc();
@@ -458,8 +483,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
}
MemDefsUses::MemDefsUses(const DataLayout &DL, const MachineFrameInfo *MFI_)
- : InspectMemInstr(false), MFI(MFI_), DL(DL), SeenNoObjLoad(false),
- SeenNoObjStore(false) {}
+ : InspectMemInstr(false), MFI(MFI_), DL(DL) {}
bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
@@ -646,12 +670,6 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
return Changed;
}
-/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
-/// slots in Mips MachineFunctions
-FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
- return new Filler(tm);
-}
-
template<typename IterTy>
bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
RegDefsUses &RegDU, InspectMemInstr& IM, Iter Slot,
@@ -889,3 +907,9 @@ bool Filler::terminateSearch(const MachineInstr &Candidate) const {
Candidate.isPosition() || Candidate.isInlineAsm() ||
Candidate.hasUnmodeledSideEffects());
}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+ return new Filler(tm);
+}
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index a44192f57aa0..c060cf06099d 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -1,4 +1,4 @@
-//===-- MipsFastISel.cpp - Mips FastISel implementation --------------------===//
+//===-- MipsFastISel.cpp - Mips FastISel implementation -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,24 +14,62 @@
///
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/MipsABIInfo.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsCCState.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
#include "MipsMachineFunction.h"
-#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <new>
#define DEBUG_TYPE "mips-fastisel"
@@ -47,35 +85,40 @@ class MipsFastISel final : public FastISel {
typedef enum { RegBase, FrameIndexBase } BaseKind;
private:
- BaseKind Kind;
+ BaseKind Kind = RegBase;
union {
unsigned Reg;
int FI;
} Base;
- int64_t Offset;
+ int64_t Offset = 0;
- const GlobalValue *GV;
+ const GlobalValue *GV = nullptr;
public:
// Innocuous defaults for our address.
- Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; }
+ Address() { Base.Reg = 0; }
+
void setKind(BaseKind K) { Kind = K; }
BaseKind getKind() const { return Kind; }
bool isRegBase() const { return Kind == RegBase; }
bool isFIBase() const { return Kind == FrameIndexBase; }
+
void setReg(unsigned Reg) {
assert(isRegBase() && "Invalid base register access!");
Base.Reg = Reg;
}
+
unsigned getReg() const {
assert(isRegBase() && "Invalid base register access!");
return Base.Reg;
}
+
void setFI(unsigned FI) {
assert(isFIBase() && "Invalid base frame index access!");
Base.FI = FI;
}
+
unsigned getFI() const {
assert(isFIBase() && "Invalid base frame index access!");
return Base.FI;
@@ -165,14 +208,17 @@ private:
MachineInstrBuilder emitInst(unsigned Opc) {
return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
}
+
MachineInstrBuilder emitInst(unsigned Opc, unsigned DstReg) {
return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
DstReg);
}
+
MachineInstrBuilder emitInstStore(unsigned Opc, unsigned SrcReg,
unsigned MemReg, int64_t MemOffset) {
return emitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
}
+
MachineInstrBuilder emitInstLoad(unsigned Opc, unsigned DstReg,
unsigned MemReg, int64_t MemOffset) {
return emitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
@@ -198,6 +244,7 @@ private:
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
+
const MipsABIInfo &getABI() const {
return static_cast<const MipsTargetMachine &>(TM).getABI();
}
@@ -220,7 +267,8 @@ public:
#include "MipsGenFastISel.inc"
};
-} // end anonymous namespace.
+
+} // end anonymous namespace
static bool CC_Mips(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
@@ -414,7 +462,6 @@ unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
}
bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
-
const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
@@ -432,10 +479,9 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
switch (Opcode) {
default:
break;
- case Instruction::BitCast: {
+ case Instruction::BitCast:
// Look through bitcasts.
return computeAddress(U->getOperand(0), Addr);
- }
case Instruction::GetElementPtr: {
Address SavedAddr = Addr;
int64_t TmpOffset = Addr.getOffset();
@@ -451,7 +497,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
TmpOffset += SL->getElementOffset(Idx);
} else {
uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- for (;;) {
+ while (true) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
// Constant-offset addressing.
TmpOffset += CI->getSExtValue() * S;
@@ -613,14 +659,12 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
emitInst(Mips::SLTu, ResultReg).addReg(Mips::ZERO).addReg(TempReg);
break;
}
- case CmpInst::ICMP_UGT: {
+ case CmpInst::ICMP_UGT:
emitInst(Mips::SLTu, ResultReg).addReg(RightReg).addReg(LeftReg);
break;
- }
- case CmpInst::ICMP_ULT: {
+ case CmpInst::ICMP_ULT:
emitInst(Mips::SLTu, ResultReg).addReg(LeftReg).addReg(RightReg);
break;
- }
case CmpInst::ICMP_UGE: {
unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLTu, TempReg).addReg(LeftReg).addReg(RightReg);
@@ -633,14 +677,12 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
emitInst(Mips::XORi, ResultReg).addReg(TempReg).addImm(1);
break;
}
- case CmpInst::ICMP_SGT: {
+ case CmpInst::ICMP_SGT:
emitInst(Mips::SLT, ResultReg).addReg(RightReg).addReg(LeftReg);
break;
- }
- case CmpInst::ICMP_SLT: {
+ case CmpInst::ICMP_SLT:
emitInst(Mips::SLT, ResultReg).addReg(LeftReg).addReg(RightReg);
break;
- }
case CmpInst::ICMP_SGE: {
unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::SLT, TempReg).addReg(LeftReg).addReg(RightReg);
@@ -709,6 +751,7 @@ bool MipsFastISel::emitCmp(unsigned ResultReg, const CmpInst *CI) {
}
return true;
}
+
bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
unsigned Alignment) {
//
@@ -716,35 +759,30 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
//
unsigned Opc;
switch (VT.SimpleTy) {
- case MVT::i32: {
+ case MVT::i32:
ResultReg = createResultReg(&Mips::GPR32RegClass);
Opc = Mips::LW;
break;
- }
- case MVT::i16: {
+ case MVT::i16:
ResultReg = createResultReg(&Mips::GPR32RegClass);
Opc = Mips::LHu;
break;
- }
- case MVT::i8: {
+ case MVT::i8:
ResultReg = createResultReg(&Mips::GPR32RegClass);
Opc = Mips::LBu;
break;
- }
- case MVT::f32: {
+ case MVT::f32:
if (UnsupportedFPMode)
return false;
ResultReg = createResultReg(&Mips::FGR32RegClass);
Opc = Mips::LWC1;
break;
- }
- case MVT::f64: {
+ case MVT::f64:
if (UnsupportedFPMode)
return false;
ResultReg = createResultReg(&Mips::AFGR64RegClass);
Opc = Mips::LDC1;
break;
- }
default:
return false;
}
@@ -1730,6 +1768,7 @@ bool MipsFastISel::selectTrunc(const Instruction *I) {
updateValueMap(I, SrcReg);
return true;
}
+
bool MipsFastISel::selectIntExt(const Instruction *I) {
Type *DestTy = I->getType();
Value *Src = I->getOperand(0);
@@ -1757,6 +1796,7 @@ bool MipsFastISel::selectIntExt(const Instruction *I) {
updateValueMap(I, ResultReg);
return true;
}
+
bool MipsFastISel::emitIntSExt32r1(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned DestReg) {
unsigned ShiftAmt;
@@ -2074,8 +2114,10 @@ unsigned MipsFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
}
namespace llvm {
+
FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) {
return new MipsFastISel(funcInfo, libInfo);
}
-}
+
+} // end namespace llvm
diff --git a/lib/Target/Mips/MipsHazardSchedule.cpp b/lib/Target/Mips/MipsHazardSchedule.cpp
index 31b86124bc8d..f6fcf6ec9385 100644
--- a/lib/Target/Mips/MipsHazardSchedule.cpp
+++ b/lib/Target/Mips/MipsHazardSchedule.cpp
@@ -36,7 +36,7 @@
///
/// A) A previous pass has created a compact branch directly.
/// B) Transforming a delay slot branch into compact branch. This case can be
-/// difficult to process as lookahead for hazards is insufficent, as
+/// difficult to process as lookahead for hazards is insufficient, as
/// backwards delay slot fillling can also produce hazards in previously
/// processed instuctions.
///
@@ -103,23 +103,24 @@ static Iter getNextMachineInstrInBB(Iter Position) {
// Find the next real instruction from the current position, looking through
// basic block boundaries.
-static Iter getNextMachineInstr(Iter Position, MachineBasicBlock *Parent) {
+static std::pair<Iter, bool> getNextMachineInstr(Iter Position, MachineBasicBlock * Parent) {
if (Position == Parent->end()) {
- MachineBasicBlock *Succ = Parent->getNextNode();
- if (Succ != nullptr && Parent->isSuccessor(Succ)) {
- Position = Succ->begin();
- Parent = Succ;
- } else {
- llvm_unreachable(
- "Should have identified the end of the function earlier!");
- }
+ do {
+ MachineBasicBlock *Succ = Parent->getNextNode();
+ if (Succ != nullptr && Parent->isSuccessor(Succ)) {
+ Position = Succ->begin();
+ Parent = Succ;
+ } else {
+ return std::make_pair(Position, true);
+ }
+ } while (Parent->empty());
}
Iter Instr = getNextMachineInstrInBB(Position);
if (Instr == Parent->end()) {
return getNextMachineInstr(Instr, Parent);
}
- return Instr;
+ return std::make_pair(Instr, false);
}
bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) {
@@ -145,7 +146,9 @@ bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) {
bool LastInstInFunction =
std::next(I) == FI->end() && std::next(FI) == MF.end();
if (!LastInstInFunction) {
- Inst = getNextMachineInstr(std::next(I), &*FI);
+ std::pair<Iter, bool> Res = getNextMachineInstr(std::next(I), &*FI);
+ LastInstInFunction |= Res.second;
+ Inst = Res.first;
}
if (LastInstInFunction || !TII->SafeInForbiddenSlot(*Inst)) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 9c511bd77822..93c5f496ce97 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -112,8 +112,11 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::FIRST_NUMBER: break;
case MipsISD::JmpLink: return "MipsISD::JmpLink";
case MipsISD::TailCall: return "MipsISD::TailCall";
+ case MipsISD::Highest: return "MipsISD::Highest";
+ case MipsISD::Higher: return "MipsISD::Higher";
case MipsISD::Hi: return "MipsISD::Hi";
case MipsISD::Lo: return "MipsISD::Lo";
+ case MipsISD::GotHi: return "MipsISD::GotHi";
case MipsISD::GPRel: return "MipsISD::GPRel";
case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer";
case MipsISD::Ret: return "MipsISD::Ret";
@@ -144,6 +147,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::Sync: return "MipsISD::Sync";
case MipsISD::Ext: return "MipsISD::Ext";
case MipsISD::Ins: return "MipsISD::Ins";
+ case MipsISD::CIns: return "MipsISD::CIns";
case MipsISD::LWL: return "MipsISD::LWL";
case MipsISD::LWR: return "MipsISD::LWR";
case MipsISD::SWL: return "MipsISD::SWL";
@@ -425,6 +429,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::AssertZext);
+ setTargetDAGCombine(ISD::SHL);
if (ABI.IsO32()) {
// These libcalls are not available in 32-bit.
@@ -699,41 +704,81 @@ static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const MipsSubtarget &Subtarget) {
- // Pattern match EXT.
- // $dst = and ((sra or srl) $src , pos), (2**size - 1)
- // => ext $dst, $src, size, pos
if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert())
return SDValue();
- SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
- unsigned ShiftRightOpc = ShiftRight.getOpcode();
-
- // Op's first operand must be a shift right.
- if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
- return SDValue();
+ SDValue FirstOperand = N->getOperand(0);
+ unsigned FirstOperandOpc = FirstOperand.getOpcode();
+ SDValue Mask = N->getOperand(1);
+ EVT ValTy = N->getValueType(0);
+ SDLoc DL(N);
- // The second operand of the shift must be an immediate.
+ uint64_t Pos = 0, SMPos, SMSize;
ConstantSDNode *CN;
- if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
- return SDValue();
-
- uint64_t Pos = CN->getZExtValue();
- uint64_t SMPos, SMSize;
+ SDValue NewOperand;
+ unsigned Opc;
// Op's second operand must be a shifted mask.
if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
!isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
return SDValue();
- // Return if the shifted mask does not start at bit 0 or the sum of its size
- // and Pos exceeds the word's size.
- EVT ValTy = N->getValueType(0);
- if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
- return SDValue();
+ if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
+ // Pattern match EXT.
+ // $dst = and ((sra or srl) $src , pos), (2**size - 1)
+ // => ext $dst, $src, pos, size
+
+ // The second operand of the shift must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
+ return SDValue();
+
+ Opc = MipsISD::Ext;
+ NewOperand = FirstOperand.getOperand(0);
+ } else if (FirstOperandOpc == ISD::SHL && Subtarget.hasCnMips()) {
+ // Pattern match CINS.
+ // $dst = and (shl $src , pos), mask
+ // => cins $dst, $src, pos, size
+ // mask is a shifted mask with consecutive 1's, pos = shift amount,
+ // size = population count.
+
+ // The second operand of the shift must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ if (SMPos != Pos || Pos >= ValTy.getSizeInBits() || SMSize >= 32 ||
+ Pos + SMSize > ValTy.getSizeInBits())
+ return SDValue();
+
+ NewOperand = FirstOperand.getOperand(0);
+ // SMSize is 'location' (position) in this case, not size.
+ SMSize--;
+ Opc = MipsISD::CIns;
+ } else {
+ // Pattern match EXT.
+ // $dst = and $src, (2**size - 1) , if size > 16
+ // => ext $dst, $src, pos, size , pos = 0
- SDLoc DL(N);
- return DAG.getNode(MipsISD::Ext, DL, ValTy,
- ShiftRight.getOperand(0),
+ // If the mask is <= 0xffff, andi can be used instead.
+ if (CN->getZExtValue() <= 0xffff)
+ return SDValue();
+
+ // Return if the mask doesn't start at position 0.
+ if (SMPos)
+ return SDValue();
+
+ Opc = MipsISD::Ext;
+ NewOperand = FirstOperand;
+ }
+ return DAG.getNode(Opc, DL, ValTy, NewOperand,
DAG.getConstant(Pos, DL, MVT::i32),
DAG.getConstant(SMSize, DL, MVT::i32));
}
@@ -852,6 +897,58 @@ static SDValue performAssertZextCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+
+static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const MipsSubtarget &Subtarget) {
+ // Pattern match CINS.
+ // $dst = shl (and $src , imm), pos
+ // => cins $dst, $src, pos, size
+
+ if (DCI.isBeforeLegalizeOps() || !Subtarget.hasCnMips())
+ return SDValue();
+
+ SDValue FirstOperand = N->getOperand(0);
+ unsigned FirstOperandOpc = FirstOperand.getOpcode();
+ SDValue SecondOperand = N->getOperand(1);
+ EVT ValTy = N->getValueType(0);
+ SDLoc DL(N);
+
+ uint64_t Pos = 0, SMPos, SMSize;
+ ConstantSDNode *CN;
+ SDValue NewOperand;
+
+ // The second operand of the shift must be an immediate.
+ if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)))
+ return SDValue();
+
+ Pos = CN->getZExtValue();
+
+ if (Pos >= ValTy.getSizeInBits())
+ return SDValue();
+
+ if (FirstOperandOpc != ISD::AND)
+ return SDValue();
+
+ // AND's second operand must be a shifted mask.
+ if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
+ !isShiftedMask(CN->getZExtValue(), SMPos, SMSize))
+ return SDValue();
+
+ // Return if the shifted mask does not start at bit 0 or the sum of its size
+ // and Pos exceeds the word's size.
+ if (SMPos != 0 || SMSize > 32 || Pos + SMSize > ValTy.getSizeInBits())
+ return SDValue();
+
+ NewOperand = FirstOperand.getOperand(0);
+ // SMSize is 'location' (position) in this case, not size.
+ SMSize--;
+
+ return DAG.getNode(MipsISD::CIns, DL, ValTy, NewOperand,
+ DAG.getConstant(Pos, DL, MVT::i32),
+ DAG.getConstant(SMSize, DL, MVT::i32));
+}
+
SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
SelectionDAG &DAG = DCI.DAG;
@@ -875,6 +972,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
return performADDCombine(N, DAG, DCI, Subtarget);
case ISD::AssertZext:
return performAssertZextCombine(N, DAG, DCI, Subtarget);
+ case ISD::SHL:
+ return performSHLCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
@@ -1733,7 +1832,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = N->getGlobal();
- if (!isPositionIndependent() && !ABI.IsN64()) {
+ if (!isPositionIndependent()) {
const MipsTargetObjectFile *TLOF =
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
@@ -1742,8 +1841,10 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
- // %hi/%lo relocation
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ // %hi/%lo relocation
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ // %highest/%higher/%hi/%lo relocation
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
}
// Every other architecture would use shouldAssumeDSOLocal in here, but
@@ -1777,8 +1878,9 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
EVT Ty = Op.getValueType();
- if (!isPositionIndependent() && !ABI.IsN64())
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ if (!isPositionIndependent())
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
}
@@ -1820,8 +1922,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args));
+ CLI.setDebugLoc(DL)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
@@ -1870,8 +1973,9 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
EVT Ty = Op.getValueType();
- if (!isPositionIndependent() && !ABI.IsN64())
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ if (!isPositionIndependent())
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
}
@@ -1882,7 +1986,7 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
EVT Ty = Op.getValueType();
- if (!isPositionIndependent() && !ABI.IsN64()) {
+ if (!isPositionIndependent()) {
const MipsTargetObjectFile *TLOF =
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
@@ -1892,10 +1996,11 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG);
- return getAddrNonPIC(N, SDLoc(N), Ty, DAG);
+ return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG)
+ : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG);
}
- return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
+ return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
}
SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -2796,14 +2901,13 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- bool IsPICCall = (ABI.IsN64() || IsPIC); // true if calls are translated to
- // jalr $25
+
SDValue CalleeLo;
EVT Ty = Callee.getValueType();
bool GlobalOrExternal = false, IsCallReloc = false;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- if (IsPICCall) {
+ if (IsPIC) {
const GlobalValue *Val = G->getGlobal();
InternalLinkage = Val->hasInternalLinkage();
@@ -2828,7 +2932,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- if (!ABI.IsN64() && !IsPIC) // !N64 && static
+ if (!IsPIC) // static
Callee = DAG.getTargetExternalSymbol(
Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG);
else if (LargeGOT) {
@@ -2836,7 +2940,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Sym));
IsCallReloc = true;
- } else { // N64 || PIC
+ } else { // PIC
Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Sym));
IsCallReloc = true;
@@ -2848,7 +2952,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> Ops(1, Chain);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
+ getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, InternalLinkage,
IsCallReloc, CLI, Callee, Chain);
if (IsTailCall) {
@@ -3683,7 +3787,9 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
}
unsigned MipsTargetLowering::getJumpTableEncoding() const {
- if (ABI.IsN64())
+
+ // FIXME: For space reasons this should be: EK_GPRel32BlockAddress.
+ if (ABI.IsN64() && isPositionIndependent())
return MachineJumpTableInfo::EK_GPRel64BlockAddress;
return TargetLowering::getJumpTableEncoding();
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index cddf0903ca6a..2dcafd51061a 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -37,14 +37,23 @@ namespace llvm {
// Tail call
TailCall,
- // Get the Higher 16 bits from a 32-bit immediate
+ // Get the Highest (63-48) 16 bits from a 64-bit immediate
+ Highest,
+
+ // Get the Higher (47-32) 16 bits from a 64-bit immediate
+ Higher,
+
+ // Get the High 16 bits from a 32/64-bit immediate
// No relation with Mips Hi register
Hi,
- // Get the Lower 16 bits from a 32-bit immediate
+ // Get the Lower 16 bits from a 32/64-bit immediate
// No relation with Mips Lo register
Lo,
+ // Get the High 16 bits from a 32 bit immediate for accessing the GOT.
+ GotHi,
+
// Handle gp_rel (small data/bss sections) relocation.
GPRel,
@@ -107,6 +116,7 @@ namespace llvm {
Ext,
Ins,
+ CIns,
// EXTR.W instrinsic nodes.
EXTP,
@@ -297,7 +307,7 @@ namespace llvm {
}
bool isJumpTableRelative() const override {
- return getTargetMachine().isPositionIndependent() || ABI.IsN64();
+ return getTargetMachine().isPositionIndependent();
}
protected:
@@ -344,8 +354,8 @@ namespace llvm {
SelectionDAG &DAG, unsigned HiFlag,
unsigned LoFlag, SDValue Chain,
const MachinePointerInfo &PtrInfo) const {
- SDValue Hi =
- DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(N, Ty, DAG, HiFlag));
+ SDValue Hi = DAG.getNode(MipsISD::GotHi, DL, Ty,
+ getTargetNode(N, Ty, DAG, HiFlag));
Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
getTargetNode(N, Ty, DAG, LoFlag));
@@ -356,6 +366,8 @@ namespace llvm {
// computing a symbol's address in non-PIC mode:
//
// (add %hi(sym), %lo(sym))
+ //
+ // This method covers O32, N32 and N64 in sym32 mode.
template <class NodeTy>
SDValue getAddrNonPIC(NodeTy *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG) const {
@@ -364,7 +376,37 @@ namespace llvm {
return DAG.getNode(ISD::ADD, DL, Ty,
DAG.getNode(MipsISD::Hi, DL, Ty, Hi),
DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
- }
+ }
+
+ // This method creates the following nodes, which are necessary for
+ // computing a symbol's address in non-PIC mode for N64.
+ //
+ // (add (shl (add (shl (add %highest(sym), %higher(sim)), 16), %high(sym)),
+ // 16), %lo(%sym))
+ //
+ // FIXME: This method is not efficent for (micro)MIPS64R6.
+ template <class NodeTy>
+ SDValue getAddrNonPICSym64(NodeTy *N, const SDLoc &DL, EVT Ty,
+ SelectionDAG &DAG) const {
+ SDValue Hi = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_HI);
+ SDValue Lo = getTargetNode(N, Ty, DAG, MipsII::MO_ABS_LO);
+
+ SDValue Highest =
+ DAG.getNode(MipsISD::Highest, DL, Ty,
+ getTargetNode(N, Ty, DAG, MipsII::MO_HIGHEST));
+ SDValue Higher = getTargetNode(N, Ty, DAG, MipsII::MO_HIGHER);
+ SDValue HigherPart =
+ DAG.getNode(ISD::ADD, DL, Ty, Highest,
+ DAG.getNode(MipsISD::Higher, DL, Ty, Higher));
+ SDValue Cst = DAG.getConstant(16, DL, MVT::i32);
+ SDValue Shift = DAG.getNode(ISD::SHL, DL, Ty, HigherPart, Cst);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, Ty, Shift,
+ DAG.getNode(MipsISD::Hi, DL, Ty, Hi));
+ SDValue Shift2 = DAG.getNode(ISD::SHL, DL, Ty, Add, Cst);
+
+ return DAG.getNode(ISD::ADD, DL, Ty, Shift2,
+ DAG.getNode(MipsISD::Lo, DL, Ty, Lo));
+ }
// This method creates the following nodes, which are necessary for
// computing a symbol's address using gp-relative addressing:
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 19af1914c819..df62c66b75a3 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -482,7 +482,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
MIB->RemoveOperand(0);
for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
- MIB.addOperand(I->getOperand(J));
+ MIB.add(I->getOperand(J));
}
MIB.addImm(0);
@@ -492,7 +492,7 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
continue;
- MIB.addOperand(I->getOperand(J));
+ MIB.add(I->getOperand(J));
}
}
@@ -501,3 +501,31 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
MIB.setMemRefs(I->memoperands_begin(), I->memoperands_end());
return MIB;
}
+
+bool MipsInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ assert(!MI.isBundle() &&
+ "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ if (!MCID.isCommutable())
+ return false;
+
+ switch (MI.getOpcode()) {
+ case Mips::DPADD_U_H:
+ case Mips::DPADD_U_W:
+ case Mips::DPADD_U_D:
+ case Mips::DPADD_S_H:
+ case Mips::DPADD_S_W:
+ case Mips::DPADD_S_D: {
+ // The first operand is both input and output, so it should not commute
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3))
+ return false;
+
+ if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
+ return false;
+ return true;
+ }
+ }
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 347b9187d08c..45d700d8afd6 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -135,6 +135,9 @@ public:
MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
MachineBasicBlock::iterator I) const;
+ bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
+
protected:
bool isZeroImm(const MachineOperand &op) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 5bc48336121a..b90077d7807d 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -59,10 +59,20 @@ def MipsTailCall : SDNode<"MipsISD::TailCall", SDT_MipsJmpLink,
// Hi and Lo nodes are used to handle global addresses. Used on
// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
// static model. (nothing to do with Mips Registers Hi and Lo)
+
+// Hi is the odd node out, on MIPS64 it can expand to either daddiu when
+// using static relocations with 64 bit symbols, or lui when using 32 bit
+// symbols.
+def MipsHigher : SDNode<"MipsISD::Higher", SDTIntUnaryOp>;
+def MipsHighest : SDNode<"MipsISD::Highest", SDTIntUnaryOp>;
def MipsHi : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
def MipsLo : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+
def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+// Hi node for accessing the GOT.
+def MipsGotHi : SDNode<"MipsISD::GotHi", SDTIntUnaryOp>;
+
// TlsGd node is used to handle General Dynamic TLS
def MipsTlsGd : SDNode<"MipsISD::TlsGd", SDTIntUnaryOp>;
@@ -128,6 +138,7 @@ def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>;
def MipsExt : SDNode<"MipsISD::Ext", SDT_Ext>;
def MipsIns : SDNode<"MipsISD::Ins", SDT_Ins>;
+def MipsCIns : SDNode<"MipsISD::CIns", SDT_Ext>;
def MipsLWL : SDNode<"MipsISD::LWL", SDTMipsLoadLR,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -205,6 +216,10 @@ def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
AssemblerPredicate<"FeatureCnMips">;
def NotCnMips : Predicate<"!Subtarget->hasCnMips()">,
AssemblerPredicate<"!FeatureCnMips">;
+def IsSym32 : Predicate<"Subtarget->HasSym32()">,
+ AssemblerPredicate<"FeatureSym32">;
+def IsSym64 : Predicate<"!Subtarget->HasSym32()">,
+ AssemblerPredicate<"!FeatureSym32">;
def RelocNotPIC : Predicate<"!TM.isPositionIndependent()">;
def RelocPIC : Predicate<"TM.isPositionIndependent()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
@@ -237,6 +252,14 @@ class PTR_32 { list<Predicate> PTRPredicates = [IsPTR32bit]; }
class PTR_64 { list<Predicate> PTRPredicates = [IsPTR64bit]; }
//===----------------------------------------------------------------------===//
+// Mips Symbol size adjectives.
+// They are mutally exculsive.
+//===----------------------------------------------------------------------===//
+
+class SYM_32 { list<Predicate> SYMPredicates = [IsSym32]; }
+class SYM_64 { list<Predicate> SYMPredicates = [IsSym64]; }
+
+//===----------------------------------------------------------------------===//
// Mips ISA/ASE membership and instruction group membership adjectives.
// They are mutually exclusive.
//===----------------------------------------------------------------------===//
@@ -519,7 +542,7 @@ def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> {
def SImm32RelaxedAsmOperandClass
: SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> {
let Name = "SImm32_Relaxed";
- let PredicateMethod = "isAnyImm<32>";
+ let PredicateMethod = "isAnyImm<33>";
let DiagnosticType = "SImm32_Relaxed";
}
def SImm32AsmOperandClass
@@ -1150,6 +1173,10 @@ def immZExt5Plus33 : PatLeaf<(imm), [{
return isUInt<5>(N->getZExtValue() - 33);
}]>;
+def immZExt5To31 : SDNodeXForm<imm, [{
+ return getImm(N, 31 - N->getZExtValue());
+}]>;
+
// True if (N + 1) fits in 16-bit field.
def immSExt16Plus1 : PatLeaf<(imm), [{
return isInt<17>(N->getSExtValue()) && isInt<16>(N->getSExtValue() + 1);
@@ -2281,9 +2308,38 @@ def SEQIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
def : MipsInstAlias<"seq $rd, $imm",
(SEQIMacro GPR32Opnd:$rd, GPR32Opnd:$rd, simm32:$imm), 0>,
NOT_ASE_CNMIPS;
+
+def MULImmMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rd, GPR32Opnd:$rs,
+ simm32_relaxed:$imm),
+ "mul\t$rd, $rs, $imm">,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MULOMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rd, GPR32Opnd:$rs,
+ GPR32Opnd:$rt),
+ "mulo\t$rd, $rs, $rt">,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MULOUMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rd, GPR32Opnd:$rs,
+ GPR32Opnd:$rt),
+ "mulou\t$rd, $rs, $rt">,
+ ISA_MIPS1_NOT_32R6_64R6;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
+
+multiclass OneOrTwoOperandMacroImmediateAlias<string Memnomic,
+ Instruction Opcode,
+ RegisterOperand RO = GPR32Opnd,
+ Operand Imm = simm32_relaxed> {
+ def : MipsInstAlias<!strconcat(Memnomic, " $rs, $rt, $imm"),
+ (Opcode RO:$rs,
+ RO:$rt,
+ Imm:$imm), 0>;
+ def : MipsInstAlias<!strconcat(Memnomic, " $rs, $imm"),
+ (Opcode RO:$rs,
+ RO:$rs,
+ Imm:$imm), 0>;
+}
+
def : MipsInstAlias<"move $dst, $src",
(OR GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
GPR_32 {
@@ -2296,26 +2352,7 @@ def : MipsInstAlias<"move $dst, $src",
}
def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>,
ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<
- "addu $rs, $rt, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
-def : MipsInstAlias<
- "addu $rs, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
-def : MipsInstAlias<
- "add $rs, $rt, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>,
- ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<
- "add $rs, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>,
- ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<
- "and $rs, $rt, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
-def : MipsInstAlias<
- "and $rs, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
+
def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
let Predicates = [NotInMicroMips] in {
def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
@@ -2343,36 +2380,26 @@ let AdditionalPredicates = [NotInMicroMips] in {
"sgtu $$rs, $rt",
(SLTu GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<
- "slt $rs, $rt, $imm",
- (SLTi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "sltu $rt, $rs, $imm",
- (SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "and $rs, $rt, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "and $rs, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "xor $rs, $rt, $imm",
- (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "xor $rs, $imm",
- (XORi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "or $rs, $rt, $imm",
- (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
- "or $rs, $imm",
- (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
- def : MipsInstAlias<
"not $rt, $rs",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
def : MipsInstAlias<
"not $rt",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rt, ZERO), 0>;
def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"add", ADDi>, ISA_MIPS1_NOT_32R6_64R6;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"addu", ADDiu>;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"and", ANDi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"or", ORi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"xor", XORi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"slt", SLTi>, GPR_32;
+
+ defm : OneOrTwoOperandMacroImmediateAlias<"sltu", SLTiu>, GPR_32;
}
def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, COP0Opnd:$rd, 0), 0>;
def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 COP0Opnd:$rd, GPR32Opnd:$rt, 0), 0>;
@@ -2445,6 +2472,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
def : MipsInstAlias<"sdbbp", (SDBBP 0)>, ISA_MIPS32_NOT_32R6_64R6;
def : MipsInstAlias<"sync",
(SYNC 0), 1>, ISA_MIPS2;
+
+def : MipsInstAlias<"mulo $rs, $rt",
+ (MULOMacro GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"mulou $rs, $rt",
+ (MULOUMacro GPR32Opnd:$rs, GPR32Opnd:$rs, GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -2472,9 +2507,12 @@ def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs),
"jal\t$rs"> ;
-def NORImm : MipsAsmPseudoInst<
- (outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm32:$imm),
- "nor\t$rs, $rt, $imm"> ;
+class NORIMM_DESC_BASE<RegisterOperand RO, DAGOperand Imm> :
+ MipsAsmPseudoInst<(outs RO:$rs), (ins RO:$rt, Imm:$imm),
+ "nor\t$rs, $rt, $imm">;
+def NORImm : NORIMM_DESC_BASE<GPR32Opnd, simm32_relaxed>, GPR_32;
+def : MipsInstAlias<"nor\t$rs, $imm", (NORImm GPR32Opnd:$rs, GPR32Opnd:$rs,
+ simm32_relaxed:$imm)>, GPR_32;
let hasDelaySlot = 1, isCTI = 1 in {
def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
@@ -2512,6 +2550,9 @@ class CondBranchImmPseudo<string instr_asm> :
MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, imm64:$imm, brtarget:$offset),
!strconcat(instr_asm, "\t$rs, $imm, $offset")>;
+def BEQLImmMacro : CondBranchImmPseudo<"beql">, ISA_MIPS2_NOT_32R6_64R6;
+def BNELImmMacro : CondBranchImmPseudo<"bnel">, ISA_MIPS2_NOT_32R6_64R6;
+
def BLTImmMacro : CondBranchImmPseudo<"blt">;
def BLEImmMacro : CondBranchImmPseudo<"ble">;
def BGEImmMacro : CondBranchImmPseudo<"bge">;
@@ -2535,34 +2576,46 @@ def BGTULImmMacro : CondBranchImmPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
// Once the tablegen-erated errors are made better, this needs to be fixed and
// predicates needs to be restored.
-def SDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+def SDivMacro : MipsAsmPseudoInst<(outs GPR32NonZeroOpnd:$rd),
(ins GPR32Opnd:$rs, GPR32Opnd:$rt),
"div\t$rd, $rs, $rt">,
ISA_MIPS1_NOT_32R6_64R6;
+def SDivIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32:$imm),
+ "div\t$rd, $rs, $imm">,
+ ISA_MIPS1_NOT_32R6_64R6;
def UDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
(ins GPR32Opnd:$rs, GPR32Opnd:$rt),
"divu\t$rd, $rs, $rt">,
ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<"div $rt, $rs", (SDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
- GPR32Opnd:$rs), 0>,
+def UDivIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+ (ins GPR32Opnd:$rs, simm32:$imm),
+ "divu\t$rd, $rs, $imm">,
+ ISA_MIPS1_NOT_32R6_64R6;
+
+
+def : MipsInstAlias<"div $rs, $rt", (SDIV GPR32ZeroOpnd:$rs,
+ GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"div $rs, $rt", (SDivMacro GPR32NonZeroOpnd:$rs,
+ GPR32NonZeroOpnd:$rs,
+ GPR32Opnd:$rt), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"div $rd, $imm", (SDivIMacro GPR32Opnd:$rd, GPR32Opnd:$rd,
+ simm32:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
+
+def : MipsInstAlias<"divu $rt, $rs", (UDIV GPR32ZeroOpnd:$rt,
+ GPR32Opnd:$rs), 0>,
ISA_MIPS1_NOT_32R6_64R6;
-def : MipsInstAlias<"divu $rt, $rs", (UDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
+def : MipsInstAlias<"divu $rt, $rs", (UDivMacro GPR32NonZeroOpnd:$rt,
+ GPR32NonZeroOpnd:$rt,
GPR32Opnd:$rs), 0>,
ISA_MIPS1_NOT_32R6_64R6;
-def DSDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
- (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
- "ddiv\t$rd, $rs, $rt">,
- ISA_MIPS64_NOT_64R6;
-def DUDivMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
- (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
- "ddivu\t$rd, $rs, $rt">,
- ISA_MIPS64_NOT_64R6;
-def : MipsInstAlias<"ddiv $rt, $rs", (DSDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
- GPR32Opnd:$rs), 0>,
- ISA_MIPS64_NOT_64R6;
-def : MipsInstAlias<"ddivu $rt, $rs", (DUDivMacro GPR32Opnd:$rt, GPR32Opnd:$rt,
- GPR32Opnd:$rs), 0>,
- ISA_MIPS64_NOT_64R6;
+
+def : MipsInstAlias<"divu $rd, $imm", (UDivIMacro GPR32Opnd:$rd, GPR32Opnd:$rd,
+ simm32:$imm), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
def Ulh : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
"ulh\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
@@ -2647,30 +2700,40 @@ def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
(TAILCALL texternalsym:$dst)>;
// hi/lo relocs
-def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
-def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
-def : MipsPat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
-def : MipsPat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
-def : MipsPat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-def : MipsPat<(MipsHi texternalsym:$in), (LUi texternalsym:$in)>;
-
-def : MipsPat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
-def : MipsPat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
-def : MipsPat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
-def : MipsPat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
-def : MipsPat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-def : MipsPat<(MipsLo texternalsym:$in), (ADDiu ZERO, texternalsym:$in)>;
-
-def : MipsPat<(add GPR32:$hi, (MipsLo tglobaladdr:$lo)),
- (ADDiu GPR32:$hi, tglobaladdr:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tblockaddress:$lo)),
- (ADDiu GPR32:$hi, tblockaddress:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tjumptable:$lo)),
- (ADDiu GPR32:$hi, tjumptable:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tconstpool:$lo)),
- (ADDiu GPR32:$hi, tconstpool:$lo)>;
-def : MipsPat<(add GPR32:$hi, (MipsLo tglobaltlsaddr:$lo)),
- (ADDiu GPR32:$hi, tglobaltlsaddr:$lo)>;
+multiclass MipsHiLoRelocs<Instruction Lui, Instruction Addiu,
+ Register ZeroReg, RegisterOperand GPROpnd> {
+ def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>;
+ def : MipsPat<(MipsHi tblockaddress:$in), (Lui tblockaddress:$in)>;
+ def : MipsPat<(MipsHi tjumptable:$in), (Lui tjumptable:$in)>;
+ def : MipsPat<(MipsHi tconstpool:$in), (Lui tconstpool:$in)>;
+ def : MipsPat<(MipsHi tglobaltlsaddr:$in), (Lui tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsHi texternalsym:$in), (Lui texternalsym:$in)>;
+
+ def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>;
+ def : MipsPat<(MipsLo tblockaddress:$in),
+ (Addiu ZeroReg, tblockaddress:$in)>;
+ def : MipsPat<(MipsLo tjumptable:$in), (Addiu ZeroReg, tjumptable:$in)>;
+ def : MipsPat<(MipsLo tconstpool:$in), (Addiu ZeroReg, tconstpool:$in)>;
+ def : MipsPat<(MipsLo tglobaltlsaddr:$in),
+ (Addiu ZeroReg, tglobaltlsaddr:$in)>;
+ def : MipsPat<(MipsLo texternalsym:$in), (Addiu ZeroReg, texternalsym:$in)>;
+
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)),
+ (Addiu GPROpnd:$hi, tglobaladdr:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tblockaddress:$lo)),
+ (Addiu GPROpnd:$hi, tblockaddress:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tjumptable:$lo)),
+ (Addiu GPROpnd:$hi, tjumptable:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tconstpool:$lo)),
+ (Addiu GPROpnd:$hi, tconstpool:$lo)>;
+ def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaltlsaddr:$lo)),
+ (Addiu GPROpnd:$hi, tglobaltlsaddr:$lo)>;
+}
+
+defm : MipsHiLoRelocs<LUi, ADDiu, ZERO, GPR32Opnd>;
+
+def : MipsPat<(MipsGotHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : MipsPat<(MipsGotHi texternalsym:$in), (LUi texternalsym:$in)>;
// gp_rel relocs
def : MipsPat<(add GPR32:$gp, (MipsGPRel tglobaladdr:$in)),
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 1087d0e0140e..100503700a72 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -13,20 +13,31 @@
// FIXME: Fix pc-region jump instructions which cross 256MB segment boundaries.
//===----------------------------------------------------------------------===//
-#include "Mips.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCNaCl.h"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
@@ -47,21 +58,23 @@ static cl::opt<bool> ForceLongBranch(
cl::Hidden);
namespace {
+
typedef MachineBasicBlock::iterator Iter;
typedef MachineBasicBlock::reverse_iterator ReverseIter;
struct MBBInfo {
- uint64_t Size, Address;
- bool HasLongBranch;
- MachineInstr *Br;
+ uint64_t Size = 0;
+ uint64_t Address;
+ bool HasLongBranch = false;
+ MachineInstr *Br = nullptr;
- MBBInfo() : Size(0), HasLongBranch(false), Br(nullptr) {}
+ MBBInfo() = default;
};
class MipsLongBranch : public MachineFunctionPass {
-
public:
static char ID;
+
MipsLongBranch(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm), IsPIC(TM.isPositionIndependent()),
ABI(static_cast<const MipsTargetMachine &>(TM).getABI()) {}
@@ -92,13 +105,8 @@ namespace {
};
char MipsLongBranch::ID = 0;
-} // end of anonymous namespace
-/// createMipsLongBranchPass - Returns a pass that converts branches to long
-/// branches.
-FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
- return new MipsLongBranch(tm);
-}
+} // end anonymous namespace
/// Iterate over list of Br's operands and search for a MachineBasicBlock
/// operand.
@@ -530,3 +538,9 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
return true;
}
+
+/// createMipsLongBranchPass - Returns a pass that converts branches to long
+/// branches.
+FunctionPass *llvm::createMipsLongBranchPass(MipsTargetMachine &tm) {
+ return new MipsLongBranch(tm);
+}
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index d0609b15341d..5bf4c958c7b9 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -7,16 +7,15 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "MipsInstrInfo.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
@@ -24,7 +23,7 @@ static cl::opt<bool>
FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
cl::desc("Always use $gp as the global base register."));
-MipsFunctionInfo::~MipsFunctionInfo() {}
+MipsFunctionInfo::~MipsFunctionInfo() = default;
bool MipsFunctionInfo::globalBaseRegSet() const {
return GlobalBaseReg;
@@ -101,4 +100,4 @@ int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
return MoveF64ViaSpillFI;
}
-void MipsFunctionInfo::anchor() { }
+void MipsFunctionInfo::anchor() {}
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index c9e5fddc1932..553a66703b26 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -1,4 +1,4 @@
-//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//===- MipsMachineFunctionInfo.h - Private data used for Mips ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,12 +15,8 @@
#define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
#include "Mips16HardFloatInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
#include <map>
namespace llvm {
@@ -29,12 +25,9 @@ namespace llvm {
/// Mips target-specific information for each MachineFunction.
class MipsFunctionInfo : public MachineFunctionInfo {
public:
- MipsFunctionInfo(MachineFunction &MF)
- : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0),
- CallsEhReturn(false), IsISR(false), SaveS2(false),
- MoveF64ViaSpillFI(-1) {}
+ MipsFunctionInfo(MachineFunction &MF) : MF(MF) {}
- ~MipsFunctionInfo();
+ ~MipsFunctionInfo() override;
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -81,25 +74,26 @@ public:
int getMoveF64ViaSpillFI(const TargetRegisterClass *RC);
- std::map<const char *, const llvm::Mips16HardFloatInfo::FuncSignature *>
+ std::map<const char *, const Mips16HardFloatInfo::FuncSignature *>
StubsNeeded;
private:
virtual void anchor();
MachineFunction& MF;
+
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
+ unsigned SRetReturnReg = 0;
/// GlobalBaseReg - keeps track of the virtual register initialized for
/// use as the global base register. This is used for PIC in some PIC
/// relocation models.
- unsigned GlobalBaseReg;
+ unsigned GlobalBaseReg = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// True if function has a byval argument.
bool HasByvalArg;
@@ -108,25 +102,25 @@ private:
unsigned IncomingArgSize;
/// CallsEhReturn - Whether the function calls llvm.eh.return.
- bool CallsEhReturn;
+ bool CallsEhReturn = false;
/// Frame objects for spilling eh data registers.
int EhDataRegFI[4];
/// ISR - Whether the function is an Interrupt Service Routine.
- bool IsISR;
+ bool IsISR = false;
/// Frame objects for spilling C0_STATUS, C0_EPC
int ISRDataRegFI[2];
// saveS2
- bool SaveS2;
+ bool SaveS2 = false;
/// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the
/// O32 FPXX ABI is enabled. -1 is used to denote invalid index.
- int MoveF64ViaSpillFI;
+ int MoveF64ViaSpillFI = -1;
};
-} // end of namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index 23f0b7070d62..4708784063d3 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -1,4 +1,4 @@
-//===-- MipsOptionRecord.h - Abstraction for storing information ----------===//
+//===- MipsOptionRecord.h - Abstraction for storing information -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -23,14 +23,16 @@
#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include <cstdint>
namespace llvm {
+
class MipsELFStreamer;
-class MCSubtargetInfo;
class MipsOptionRecord {
public:
- virtual ~MipsOptionRecord(){};
+ virtual ~MipsOptionRecord() = default;
+
virtual void EmitMipsOptionRecord() = 0;
};
@@ -53,7 +55,8 @@ public:
COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
}
- ~MipsRegInfoRecord() override {}
+
+ ~MipsRegInfoRecord() override = default;
void EmitMipsOptionRecord() override;
void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
@@ -74,5 +77,7 @@ private:
uint32_t ri_cprmask[4];
int64_t ri_gp_value;
};
-} // namespace llvm
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MIPSOPTIONRECORD_H
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index 51ac5620f585..670b6c96e78e 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -57,7 +57,7 @@ static bool needsFPFromSig(Function &F) {
;
}
if (F.arg_size() >=1) {
- Argument &Arg = F.getArgumentList().front();
+ Argument &Arg = *F.arg_begin();
switch (Arg.getType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 8c82239ebbd3..ccfdcc89b078 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -290,6 +290,25 @@ class GPR32Class<list<ValueType> regTypes> :
K0, K1, GP, SP, FP, RA)>;
def GPR32 : GPR32Class<[i32]>;
+
+def GPR32ZERO : RegisterClass<"Mips", [i32], 32, (add
+ // Reserved
+ ZERO)>;
+
+def GPR32NONZERO : RegisterClass<"Mips", [i32], 32, (add
+ // Reserved
+ AT,
+ // Return Values and Arguments
+ V0, V1, A0, A1, A2, A3,
+ // Not preserved across procedure calls
+ T0, T1, T2, T3, T4, T5, T6, T7,
+ // Callee save
+ S0, S1, S2, S3, S4, S5, S6, S7,
+ // Not preserved across procedure calls
+ T8, T9,
+ // Reserved
+ K0, K1, GP, SP, FP, RA)>;
+
def DSPR : GPR32Class<[v4i8, v2i16]>;
def GPRMM16 : RegisterClass<"Mips", [i32], 32, (add
@@ -317,7 +336,7 @@ def GPRMM16MoveP : RegisterClass<"Mips", [i32], 32, (add
S0, S2, S3, S4)>;
def GPR64 : RegisterClass<"Mips", [i64], 64, (add
-// Reserved
+ // Reserved
ZERO_64, AT_64,
// Return Values and Arguments
V0_64, V1_64, A0_64, A1_64, A2_64, A3_64,
@@ -479,6 +498,16 @@ def GPR64AsmOperand : MipsAsmRegOperand {
let PredicateMethod = "isGPRAsmReg";
}
+def GPR32ZeroAsmOperand : MipsAsmRegOperand {
+ let Name = "GPR32ZeroAsmReg";
+ let PredicateMethod = "isGPRZeroAsmReg";
+}
+
+def GPR32NonZeroAsmOperand : MipsAsmRegOperand {
+ let Name = "GPR32NonZeroAsmReg";
+ let PredicateMethod = "isGPRNonZeroAsmReg";
+}
+
def GPR32AsmOperand : MipsAsmRegOperand {
let Name = "GPR32AsmReg";
let PredicateMethod = "isGPRAsmReg";
@@ -550,6 +579,14 @@ def MSACtrlAsmOperand : MipsAsmRegOperand {
let Name = "MSACtrlAsmReg";
}
+def GPR32ZeroOpnd : RegisterOperand<GPR32ZERO> {
+ let ParserMatchClass = GPR32ZeroAsmOperand;
+}
+
+def GPR32NonZeroOpnd : RegisterOperand<GPR32NONZERO> {
+ let ParserMatchClass = GPR32NonZeroAsmOperand;
+}
+
def GPR32Opnd : RegisterOperand<GPR32> {
let ParserMatchClass = GPR32AsmOperand;
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 4996d070eb29..ef8d18c6deb1 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -11,27 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsSEFrameLowering.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSEFrameLowering.h"
#include "MipsSEInstrInfo.h"
#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <utility>
+#include <vector>
using namespace llvm;
-namespace {
-typedef MachineBasicBlock::iterator Iter;
-
static std::pair<unsigned, unsigned> getMFHiLoOpc(unsigned Src) {
if (Mips::ACC64RegClass.contains(Src))
return std::make_pair((unsigned)Mips::PseudoMFHI,
@@ -47,6 +62,8 @@ static std::pair<unsigned, unsigned> getMFHiLoOpc(unsigned Src) {
return std::make_pair(0, 0);
}
+namespace {
+
/// Helper class to expand pseudos.
class ExpandPseudo {
public:
@@ -54,6 +71,8 @@ public:
bool expand();
private:
+ typedef MachineBasicBlock::iterator Iter;
+
bool expandInstr(MachineBasicBlock &MBB, Iter I);
void expandLoadCCond(MachineBasicBlock &MBB, Iter I);
void expandStoreCCond(MachineBasicBlock &MBB, Iter I);
@@ -74,7 +93,8 @@ private:
const MipsSEInstrInfo &TII;
const MipsRegisterInfo &RegInfo;
};
-}
+
+} // end anonymous namespace
ExpandPseudo::ExpandPseudo(MachineFunction &MF_)
: MF(MF_), MRI(MF.getRegInfo()),
@@ -419,7 +439,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.size()) {
+ if (!CSI.empty()) {
// Find the instruction past the last instruction that saves a callee-saved
// register to the stack.
for (unsigned i = 0; i < CSI.size(); ++i)
@@ -471,7 +491,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
} else {
// Reg is either in GPR32 or FGR32.
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, MRI->getDwarfRegNum(Reg, 1), Offset));
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -534,7 +554,6 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
void MipsSEFrameLowering::emitInterruptPrologueStub(
MachineFunction &MF, MachineBasicBlock &MBB) const {
-
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -722,7 +741,6 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
void MipsSEFrameLowering::emitInterruptEpilogueStub(
MachineFunction &MF, MachineBasicBlock &MBB) const {
-
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -820,7 +838,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool
MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
-
// Reserve call frame if the size of the maximum call frame fits into 16-bit
// immediate field and there are no variable sized objects on the stack.
// Make sure the second register scavenger spill slot can be accessed with one
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 63cd3cebc56a..bf30deb1905e 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- MipsSEFrameLowering.h - Mips32/64 frame lowering --------*- C++ -*-===//
+//===- MipsSEFrameLowering.h - Mips32/64 frame lowering ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,8 @@
#define LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
#include "MipsFrameLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
namespace llvm {
@@ -47,6 +49,7 @@ private:
void emitInterruptPrologueStub(MachineFunction &MF,
MachineBasicBlock &MBB) const;
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_MIPS_MIPSSEFRAMELOWERING_H
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 92d3c001df94..c9cf9363b8c9 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -97,11 +97,13 @@ bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI,
// Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
if ((MI.getOpcode() == Mips::ADDiu) &&
(MI.getOperand(1).getReg() == Mips::ZERO) &&
+ (MI.getOperand(2).isImm()) &&
(MI.getOperand(2).getImm() == 0)) {
DstReg = MI.getOperand(0).getReg();
ZeroReg = Mips::ZERO;
} else if ((MI.getOpcode() == Mips::DADDiu) &&
(MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+ (MI.getOperand(2).isImm()) &&
(MI.getOperand(2).getImm() == 0)) {
DstReg = MI.getOperand(0).getReg();
ZeroReg = Mips::ZERO_64;
@@ -690,7 +692,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
// as the original value.
if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
EltTy);
return true;
}
@@ -722,7 +724,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
// Extract the run of set bits starting with bit zero, and test that the
// result is the same as the original value
if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), SDLoc(N),
+ Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
EltTy);
return true;
}
@@ -932,6 +934,9 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
// same set/ of registers. Similarly, ldi.h isn't capable of producing {
// 0x00000000, 0x00000001, 0x00000000, 0x00000001 } but 'ldi.d wd, 1' can.
+ const MipsABIInfo &ABI =
+ static_cast<const MipsTargetMachine &>(TM).getABI();
+
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Node);
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
@@ -969,13 +974,233 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
break;
}
- if (!SplatValue.isSignedIntN(10))
- return false;
-
- SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
- ViaVecTy.getVectorElementType());
+ SDNode *Res;
- SDNode *Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
+ // If we have a signed 10 bit integer, we can splat it directly.
+ //
+ // If we have something bigger we can synthesize the value into a GPR and
+ // splat from there.
+ if (SplatValue.isSignedIntN(10)) {
+ SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL,
+ ViaVecTy.getVectorElementType());
+
+ Res = CurDAG->getMachineNode(LdiOp, DL, ViaVecTy, Imm);
+ } else if (SplatValue.isSignedIntN(16) &&
+ ((ABI.IsO32() && SplatBitSize < 64) ||
+ (ABI.IsN32() || ABI.IsN64()))) {
+ // Only handle signed 16 bit values when the element size is GPR width.
+ // MIPS64 can handle all the cases but MIPS32 would need to handle
+ // negative cases specifically here. Instead, handle those cases as
+ // 64bit values.
+
+ bool Is32BitSplat = ABI.IsO32() || SplatBitSize < 64;
+ const unsigned ADDiuOp = Is32BitSplat ? Mips::ADDiu : Mips::DADDiu;
+ const MVT SplatMVT = Is32BitSplat ? MVT::i32 : MVT::i64;
+ SDValue ZeroVal = CurDAG->getRegister(
+ Is32BitSplat ? Mips::ZERO : Mips::ZERO_64, SplatMVT);
+
+ const unsigned FILLOp =
+ SplatBitSize == 16
+ ? Mips::FILL_H
+ : (SplatBitSize == 32 ? Mips::FILL_W
+ : (SplatBitSize == 64 ? Mips::FILL_D : 0));
+
+ assert(FILLOp != 0 && "Unknown FILL Op for splat synthesis!");
+ assert((!ABI.IsO32() || (FILLOp != Mips::FILL_D)) &&
+ "Attempting to use fill.d on MIPS32!");
+
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, SplatMVT);
+
+ Res = CurDAG->getMachineNode(ADDiuOp, DL, SplatMVT, ZeroVal, LoVal);
+ Res = CurDAG->getMachineNode(FILLOp, DL, ViaVecTy, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 32) {
+ // Only handle the cases where the splat size agrees with the size
+ // of the SplatValue here.
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ assert((Hi || Lo) && "Zero case reached 32 bit case splat synthesis!");
+ Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(32) && SplatBitSize == 64 &&
+ (ABI.IsN32() || ABI.IsN64())) {
+ // N32 and N64 can perform some tricks that O32 can't for signed 32 bit
+ // integers due to having 64bit registers. lui will cause the necessary
+ // zero/sign extension.
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ Res = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64),
+ SDValue(Res, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ Res =
+ CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0));
+
+ } else if (SplatValue.isSignedIntN(64)) {
+ // If we have a 64 bit Splat value, we perform a similar sequence to the
+ // above:
+ //
+ // MIPS32: MIPS64:
+ // lui $res, %highest(val) lui $res, %highest(val)
+ // ori $res, $res, %higher(val) ori $res, $res, %higher(val)
+ // lui $res2, %hi(val) lui $res2, %hi(val)
+ // ori $res2, %res2, %lo(val) ori $res2, %res2, %lo(val)
+ // $res3 = fill $res2 dinsu $res, $res2, 0, 32
+ // $res4 = insert.w $res3[1], $res fill.d $res
+ // splat.d $res4, 0
+ //
+ // The ability to use dinsu is guaranteed as MSA requires MIPSR5. This saves
+ // having to materialize the value by shifts and ors.
+ //
+ // FIXME: Implement the preferred sequence for MIPS64R6:
+ //
+ // MIPS64R6:
+ // ori $res, $zero, %lo(val)
+ // daui $res, $res, %hi(val)
+ // dahi $res, $res, %higher(val)
+ // dati $res, $res, %highest(cal)
+ // fill.d $res
+ //
+
+ const unsigned Lo = SplatValue.getLoBits(16).getZExtValue();
+ const unsigned Hi = SplatValue.lshr(16).getLoBits(16).getZExtValue();
+ const unsigned Higher = SplatValue.lshr(32).getLoBits(16).getZExtValue();
+ const unsigned Highest = SplatValue.lshr(48).getLoBits(16).getZExtValue();
+
+ SDValue LoVal = CurDAG->getTargetConstant(Lo, DL, MVT::i32);
+ SDValue HiVal = CurDAG->getTargetConstant(Hi, DL, MVT::i32);
+ SDValue HigherVal = CurDAG->getTargetConstant(Higher, DL, MVT::i32);
+ SDValue HighestVal = CurDAG->getTargetConstant(Highest, DL, MVT::i32);
+ SDValue ZeroVal = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+
+ // Independent of whether we're targeting MIPS64 or not, the basic
+ // operations are the same. Also, directly use the $zero register if
+ // the 16 bit chunk is zero.
+ //
+ // For optimization purposes we always synthesize the splat value as
+ // an i32 value, then if we're targetting MIPS64, use SUBREG_TO_REG
+ // just before combining the values with dinsu to produce an i64. This
+ // enables SelectionDAG to aggressively share components of splat values
+ // where possible.
+ //
+ // FIXME: This is the general constant synthesis problem. This code
+ // should be factored out into a class shared between all the
+ // classes that need it. Specifically, for a splat size of 64
+ // bits that's a negative number we can do better than LUi/ORi
+ // for the upper 32bits.
+
+ if (Hi)
+ Res = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HiVal);
+
+ if (Lo)
+ Res = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Hi ? SDValue(Res, 0) : ZeroVal, LoVal);
+
+ SDNode *HiRes;
+ if (Highest)
+ HiRes = CurDAG->getMachineNode(Mips::LUi, DL, MVT::i32, HighestVal);
+
+ if (Higher)
+ HiRes = CurDAG->getMachineNode(Mips::ORi, DL, MVT::i32,
+ Highest ? SDValue(HiRes, 0) : ZeroVal,
+ HigherVal);
+
+
+ if (ABI.IsO32()) {
+ Res = CurDAG->getMachineNode(Mips::FILL_W, DL, MVT::v4i32,
+ (Hi || Lo) ? SDValue(Res, 0) : ZeroVal);
+
+ Res = CurDAG->getMachineNode(
+ Mips::INSERT_W, DL, MVT::v4i32, SDValue(Res, 0),
+ (Highest || Higher) ? SDValue(HiRes, 0) : ZeroVal,
+ CurDAG->getTargetConstant(1, DL, MVT::i32));
+
+ const TargetLowering *TLI = getTargetLowering();
+ const TargetRegisterClass *RC =
+ TLI->getRegClassFor(ViaVecTy.getSimpleVT());
+
+ Res = CurDAG->getMachineNode(
+ Mips::COPY_TO_REGCLASS, DL, ViaVecTy, SDValue(Res, 0),
+ CurDAG->getTargetConstant(RC->getID(), DL, MVT::i32));
+
+ Res = CurDAG->getMachineNode(
+ Mips::SPLATI_D, DL, MVT::v2i64, SDValue(Res, 0),
+ CurDAG->getTargetConstant(0, DL, MVT::i32));
+ } else if (ABI.IsN64() || ABI.IsN32()) {
+
+ SDValue Zero64Val = CurDAG->getRegister(Mips::ZERO_64, MVT::i64);
+ const bool HiResNonZero = Highest || Higher;
+ const bool ResNonZero = Hi || Lo;
+
+ if (HiResNonZero)
+ HiRes = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Highest >> 15) & 0x1), DL, MVT::i64),
+ SDValue(HiRes, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ if (ResNonZero)
+ Res = CurDAG->getMachineNode(
+ Mips::SUBREG_TO_REG, DL, MVT::i64,
+ CurDAG->getTargetConstant(((Hi >> 15) & 0x1), DL, MVT::i64),
+ SDValue(Res, 0),
+ CurDAG->getTargetConstant(Mips::sub_32, DL, MVT::i64));
+
+ // We have 3 cases:
+ // The HiRes is nonzero but Res is $zero => dsll32 HiRes, 0
+ // The Res is nonzero but HiRes is $zero => dinsu Res, $zero, 32, 32
+ // Both are non zero => dinsu Res, HiRes, 32, 32
+ //
+ // The obvious "missing" case is when both are zero, but that case is
+ // handled by the ldi case.
+ if (ResNonZero) {
+ SDValue Ops[4] = {HiResNonZero ? SDValue(HiRes, 0) : Zero64Val,
+ CurDAG->getTargetConstant(64, DL, MVT::i32),
+ CurDAG->getTargetConstant(32, DL, MVT::i32),
+ SDValue(Res, 0)};
+
+ Res = CurDAG->getMachineNode(Mips::DINSU, DL, MVT::i64, Ops);
+ } else if (HiResNonZero) {
+ Res = CurDAG->getMachineNode(
+ Mips::DSLL32, DL, MVT::i64, SDValue(HiRes, 0),
+ CurDAG->getTargetConstant(0, DL, MVT::i32));
+ } else
+ llvm_unreachable(
+ "Zero splat value handled by non-zero 64bit splat synthesis!");
+
+ Res = CurDAG->getMachineNode(Mips::FILL_D, DL, MVT::v2i64, SDValue(Res, 0));
+ } else
+ llvm_unreachable("Unknown ABI in MipsISelDAGToDAG!");
+
+ } else
+ return false;
if (ResVecTy != ViaVecTy) {
// If LdiOp is writing to a different register class to ResVecTy, then
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index f28e8b36fdbc..e2da8477295b 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1123,7 +1123,8 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
case ISD::MUL:
return performMULCombine(N, DAG, DCI, this);
case ISD::SHL:
- return performSHLCombine(N, DAG, DCI, Subtarget);
+ Val = performSHLCombine(N, DAG, DCI, Subtarget);
+ break;
case ISD::SRA:
return performSRACombine(N, DAG, DCI, Subtarget);
case ISD::SRL:
@@ -1643,7 +1644,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
report_fatal_error("Immediate out of range");
APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(),
- Op->getConstantOperandVal(3));
+ Op->getConstantOperandVal(3) + 1);
return DAG.getNode(ISD::VSELECT, DL, VecTy,
DAG.getConstant(Mask, DL, VecTy, true),
Op->getOperand(2), Op->getOperand(1));
@@ -1658,7 +1659,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
report_fatal_error("Immediate out of range");
APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
- Op->getConstantOperandVal(3));
+ Op->getConstantOperandVal(3) + 1);
return DAG.getNode(ISD::VSELECT, DL, VecTy,
DAG.getConstant(Mask, DL, VecTy, true),
Op->getOperand(2), Op->getOperand(1));
@@ -2529,11 +2530,10 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
SplatBitSize != 64)
return SDValue();
- // If the value fits into a simm10 then we can use ldi.[bhwd]
- // However, if it isn't an integer type we will have to bitcast from an
- // integer type first. Also, if there are any undefs, we must lower them
- // to defined values first.
- if (ResTy.isInteger() && !HasAnyUndefs && SplatValue.isSignedIntN(10))
+ // If the value isn't an integer type we will have to bitcast
+ // from an integer type first. Also, if there are any undefs, we must
+ // lower them to defined values first.
+ if (ResTy.isInteger() && !HasAnyUndefs)
return Op;
EVT ViaVecTy;
@@ -3628,7 +3628,7 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
for (unsigned i = 1; i < MI.getNumOperands(); i++)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index ea703d0edd96..91e712a7a54e 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -540,11 +540,20 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
+
+ MachineInstrBuilder MIB;
if (Subtarget.isGP64bit())
- BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64))
- .addReg(Mips::RA_64);
+ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64))
+ .addReg(Mips::RA_64, RegState::Undef);
else
- BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA);
+ MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn))
+ .addReg(Mips::RA, RegState::Undef);
+
+ // Retain any imp-use flags.
+ for (auto & MO : I->operands()) {
+ if (MO.isImplicit())
+ MIB.add(MO);
+ }
}
void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 3e7570ff46ed..8f5ecadecdea 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -70,8 +70,8 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
- Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasEVA(false), TM(TM),
- TargetTriple(TT), TSInfo(),
+ Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
+ HasEVA(false), TM(TM), TargetTriple(TT), TSInfo(),
InstrInfo(
MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
FrameLowering(MipsFrameLowering::create(*this)),
@@ -117,6 +117,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
if (NoABICalls && TM.isPositionIndependent())
report_fatal_error("position-independent code requires '-mabicalls'");
+ if (isABI_N64() && !TM.isPositionIndependent() && !hasSym32())
+ NoABICalls = true;
+
// Set UseSmallSection.
UseSmallSection = GPOpt;
if (!NoABICalls && GPOpt) {
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 38d3cee70477..cca2cb8a4660 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -142,6 +142,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
// UseTCCInDIV -- Enables the use of trapping in the assembler.
bool UseTCCInDIV;
+ // Sym32 -- On Mips64 symbols are 32 bits.
+ bool HasSym32;
+
// HasEVA -- supports EVA ASE.
bool HasEVA;
@@ -229,7 +232,11 @@ public:
unsigned getGPRSizeInBytes() const { return isGP64bit() ? 8 : 4; }
bool isPTR64bit() const { return IsPTR64bit; }
bool isPTR32bit() const { return !IsPTR64bit; }
+ bool hasSym32() const {
+ return (HasSym32 && isABI_N64()) || isABI_N32() || isABI_O32();
+ }
bool isSingleFloat() const { return IsSingleFloat; }
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool hasVFPU() const { return HasVFPU; }
bool inMips16Mode() const { return InMips16Mode; }
bool inMips16ModeDefault() const {
@@ -271,6 +278,8 @@ public:
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
+ bool isXRaySupported() const override { return true; }
+
// for now constant islands are on for the whole compilation unit but we only
// really use them if in addition we are in mips16 mode
static bool useConstantIslands();
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index bb48188e3b87..a45a9c4b41c3 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,27 +11,30 @@
//
//===----------------------------------------------------------------------===//
-#include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsABIInfo.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
#include "Mips.h"
-#include "Mips16FrameLowering.h"
#include "Mips16ISelDAGToDAG.h"
-#include "Mips16ISelLowering.h"
-#include "Mips16InstrInfo.h"
-#include "MipsFrameLowering.h"
-#include "MipsInstrInfo.h"
-#include "MipsSEFrameLowering.h"
#include "MipsSEISelDAGToDAG.h"
-#include "MipsSEISelLowering.h"
-#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
#include "MipsTargetObjectFile.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Target/TargetOptions.h"
+#include <string>
using namespace llvm;
@@ -48,7 +51,7 @@ extern "C" void LLVMInitializeMipsTarget() {
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
const TargetOptions &Options,
bool isLittle) {
- std::string Ret = "";
+ std::string Ret;
MipsABIInfo ABI = MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions);
// There are both little and big endian mips.
@@ -102,7 +105,7 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT,
CPU, FS, Options, getEffectiveRelocModel(CM, RM), CM,
OL),
- isLittle(isLittle), TLOF(make_unique<MipsTargetObjectFile>()),
+ isLittle(isLittle), TLOF(llvm::make_unique<MipsTargetObjectFile>()),
ABI(MipsABIInfo::computeTargetABI(TT, CPU, Options.MCOptions)),
Subtarget(nullptr), DefaultSubtarget(TT, CPU, FS, isLittle, *this),
NoMips16Subtarget(TT, CPU, FS.empty() ? "-mips16" : FS.str() + ",-mips16",
@@ -113,9 +116,9 @@ MipsTargetMachine::MipsTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-MipsTargetMachine::~MipsTargetMachine() {}
+MipsTargetMachine::~MipsTargetMachine() = default;
-void MipsebTargetMachine::anchor() { }
+void MipsebTargetMachine::anchor() {}
MipsebTargetMachine::MipsebTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -125,7 +128,7 @@ MipsebTargetMachine::MipsebTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-void MipselTargetMachine::anchor() { }
+void MipselTargetMachine::anchor() {}
MipselTargetMachine::MipselTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -182,10 +185,10 @@ void MipsTargetMachine::resetSubtarget(MachineFunction *MF) {
Subtarget = const_cast<MipsSubtarget *>(getSubtargetImpl(*MF->getFunction()));
MF->setSubtarget(Subtarget);
- return;
}
namespace {
+
/// Mips Code Generator Pass Configuration Options.
class MipsPassConfig : public TargetPassConfig {
public:
@@ -209,11 +212,10 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
void addPreEmitPass() override;
-
void addPreRegAlloc() override;
-
};
-} // namespace
+
+} // end anonymous namespace
TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
return new MipsPassConfig(this, PM);
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index e4cf17e2abd8..140d7133f879 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- MipsTargetMachine.h - Define TargetMachine for Mips -----*- C++ -*-===//
+//===- MipsTargetMachine.h - Define TargetMachine for Mips ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,14 @@
#include "MCTargetDesc/MipsABIInfo.h"
#include "MipsSubtarget.h"
-#include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
-class formatted_raw_ostream;
-class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
bool isLittle;
@@ -73,6 +72,7 @@ public:
///
class MipsebTargetMachine : public MipsTargetMachine {
virtual void anchor();
+
public:
MipsebTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -84,6 +84,7 @@ public:
///
class MipselTargetMachine : public MipsTargetMachine {
virtual void anchor();
+
public:
MipselTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -91,6 +92,6 @@ public:
CodeGenOpt::Level OL);
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_MIPS_MIPSTARGETMACHINE_H
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 399ff1fd96e0..a8eecfcc138c 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -17,7 +17,6 @@ set(NVPTXCodeGen_sources
NVPTXISelDAGToDAG.cpp
NVPTXISelLowering.cpp
NVPTXImageOptimizer.cpp
- NVPTXInferAddressSpaces.cpp
NVPTXInstrInfo.cpp
NVPTXLowerAggrCopies.cpp
NVPTXLowerArgs.cpp
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 4594c22b8701..b774fe169d71 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -61,6 +61,12 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
case 6:
OS << "%fd";
break;
+ case 7:
+ OS << "%h";
+ break;
+ case 8:
+ OS << "%hh";
+ break;
}
unsigned VReg = RegNo & 0x0FFFFFFF;
@@ -247,8 +253,12 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
O << "s";
else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
O << "u";
- else
+ else if (Imm == NVPTX::PTXLdStInstCode::Untyped)
+ O << "b";
+ else if (Imm == NVPTX::PTXLdStInstCode::Float)
O << "f";
+ else
+ llvm_unreachable("Unknown register type");
} else if (!strcmp(Modifier, "vec")) {
if (Imm == NVPTX::PTXLdStInstCode::V2)
O << ".v2";
diff --git a/lib/Target/NVPTX/LLVMBuild.txt b/lib/Target/NVPTX/LLVMBuild.txt
index 70a2de3441ce..ee8aaa998bb6 100644
--- a/lib/Target/NVPTX/LLVMBuild.txt
+++ b/lib/Target/NVPTX/LLVMBuild.txt
@@ -28,5 +28,5 @@ has_asmprinter = 1
type = Library
name = NVPTXCodeGen
parent = NVPTX
-required_libraries = Analysis AsmPrinter CodeGen Core MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils Vectorize
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC NVPTXAsmPrinter NVPTXDesc NVPTXInfo Scalar SelectionDAG Support Target TransformUtils Vectorize
add_to_library_groups = NVPTX
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index c455a437d8d5..902d1b25e7dd 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -45,10 +45,8 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
-FunctionPass *createNVPTXInferAddressSpacesPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
FunctionPass *createNVVMReflectPass();
-FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
@@ -108,7 +106,8 @@ enum AddressSpace {
enum FromType {
Unsigned = 0,
Signed,
- Float
+ Float,
+ Untyped
};
enum VecType {
Scalar = 1,
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3c2594c77f45..21e25de80dc7 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -320,6 +320,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
switch (Cnt->getType()->getTypeID()) {
default: report_fatal_error("Unsupported FP type"); break;
+ case Type::HalfTyID:
+ MCOp = MCOperand::createExpr(
+ NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
+ break;
case Type::FloatTyID:
MCOp = MCOperand::createExpr(
NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
@@ -357,6 +361,10 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
Ret = (5 << 28);
} else if (RC == &NVPTX::Float64RegsRegClass) {
Ret = (6 << 28);
+ } else if (RC == &NVPTX::Float16RegsRegClass) {
+ Ret = (7 << 28);
+ } else if (RC == &NVPTX::Float16x2RegsRegClass) {
+ Ret = (8 << 28);
} else {
report_fatal_error("Bad register class");
}
@@ -396,12 +404,15 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
unsigned size = 0;
if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
- if (size < 32)
- size = 32;
} else {
assert(Ty->isFloatingPointTy() && "Floating point type expected here");
size = Ty->getPrimitiveSizeInBits();
}
+ // PTX ABI requires all scalar return values to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type in
+ // PTX, so its size must be adjusted here, too.
+ if (size < 32)
+ size = 32;
O << ".param .b" << size << " func_retval0";
} else if (isa<PointerType>(Ty)) {
@@ -1221,7 +1232,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << " .align " << GVar->getAlignment();
- if (ETy->isFloatingPointTy() || ETy->isIntegerTy() || ETy->isPointerTy()) {
+ if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
+ (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
O << " .";
// Special case: ABI requires that we use .u8 for predicates
if (ETy->isIntegerTy(1))
@@ -1262,6 +1274,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// targets that support these high level field accesses. Structs, arrays
// and vectors are lowered into arrays of bytes.
switch (ETy->getTypeID()) {
+ case Type::IntegerTyID: // Integers larger than 64 bits
case Type::StructTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
@@ -1376,6 +1389,9 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
}
break;
}
+ case Type::HalfTyID:
+ // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
+ return "b16";
case Type::FloatTyID:
return "f32";
case Type::DoubleTyID:
@@ -1477,7 +1493,7 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
- const AttributeSet &PAL = F->getAttributes();
+ const AttributeList &PAL = F->getAttributes();
const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
Function::const_arg_iterator I, E;
unsigned paramIndex = 0;
@@ -1534,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (!PAL.hasAttribute(paramIndex + 1, Attribute::ByVal)) {
+ if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy()) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1601,6 +1617,11 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
sz = 32;
} else if (isa<PointerType>(Ty))
sz = thePointerTy.getSizeInBits();
+ else if (Ty->isHalfTy())
+ // PTX ABI requires all scalar parameters to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type
+ // in PTX, so its size must be adjusted here, too.
+ sz = 32;
else
sz = Ty->getPrimitiveSizeInBits();
if (isABI)
@@ -1977,6 +1998,17 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
const DataLayout &DL = getDataLayout();
int Bytes;
+ // Integers of arbitrary width
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+ APInt Val = CI->getValue();
+ for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
+ uint8_t Byte = Val.getLoBits(8).getZExtValue();
+ aggBuffer->addBytes(&Byte, 1, 1);
+ Val = Val.lshr(8);
+ }
+ return;
+ }
+
// Old constants
if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
if (CPV->getNumOperands())
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 43c478f4212f..274977254046 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -26,23 +26,6 @@ using namespace llvm;
#define DEBUG_TYPE "nvptx-isel"
-static cl::opt<int> UsePrecDivF32(
- "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
- " IEEE Compliant F32 div.rnd if available."),
- cl::init(2));
-
-static cl::opt<bool>
-UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
- cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
- cl::init(true));
-
-static cl::opt<bool>
-FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
- cl::init(false));
-
-
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -57,45 +40,20 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
}
bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
- return SelectionDAGISel::runOnMachineFunction(MF);
+ Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
+ return SelectionDAGISel::runOnMachineFunction(MF);
}
int NVPTXDAGToDAGISel::getDivF32Level() const {
- if (UsePrecDivF32.getNumOccurrences() > 0) {
- // If nvptx-prec-div32=N is used on the command-line, always honor it
- return UsePrecDivF32;
- } else {
- // Otherwise, use div.approx if fast math is enabled
- if (TM.Options.UnsafeFPMath)
- return 0;
- else
- return 2;
- }
+ return Subtarget->getTargetLowering()->getDivF32Level();
}
bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
- if (UsePrecSqrtF32.getNumOccurrences() > 0) {
- // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
- return UsePrecSqrtF32;
- } else {
- // Otherwise, use sqrt.approx if fast math is enabled
- return !TM.Options.UnsafeFPMath;
- }
+ return Subtarget->getTargetLowering()->usePrecSqrtF32();
}
bool NVPTXDAGToDAGISel::useF32FTZ() const {
- if (FtzEnabled.getNumOccurrences() > 0) {
- // If nvptx-f32ftz is used on the command-line, always honor it
- return FtzEnabled;
- } else {
- const Function *F = MF->getFunction();
- // Otherwise, check for an nvptx-f32ftz attribute on the function
- if (F->hasFnAttribute("nvptx-f32ftz"))
- return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
- else
- return false;
- }
+ return Subtarget->getTargetLowering()->useF32FTZ(*MF);
}
bool NVPTXDAGToDAGISel::allowFMA() const {
@@ -103,6 +61,11 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
return TL->allowFMA(*MF, OptLevel);
}
+bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
+ const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
+ return TL->allowUnsafeFPMath(*MF);
+}
+
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
void NVPTXDAGToDAGISel::Select(SDNode *N) {
@@ -121,6 +84,14 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
if (tryStore(N))
return;
break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ if (tryEXTRACT_VECTOR_ELEMENT(N))
+ return;
+ break;
+ case NVPTXISD::SETP_F16X2:
+ SelectSETP_F16X2(N);
+ return;
+
case NVPTXISD::LoadV2:
case NVPTXISD::LoadV4:
if (tryLoadVector(N))
@@ -515,6 +486,10 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::ADDRSPACECAST:
SelectAddrSpaceCast(N);
return;
+ case ISD::ConstantFP:
+ if (tryConstantFP16(N))
+ return;
+ break;
default:
break;
}
@@ -536,6 +511,140 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
}
}
+// There's no way to specify FP16 immediates in .f16 ops, so we have to
+// load them into an .f16 register first.
+bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
+ if (N->getValueType(0) != MVT::f16)
+ return false;
+ SDValue Val = CurDAG->getTargetConstantFP(
+ cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
+ SDNode *LoadConstF16 =
+ CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
+ ReplaceNode(N, LoadConstF16);
+ return true;
+}
+
+// Map ISD:CONDCODE value to appropriate CmpMode expected by
+// NVPTXInstPrinter::printCmpMode()
+static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
+ using NVPTX::PTXCmpMode::CmpMode;
+ unsigned PTXCmpMode = [](ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unexpected condition code.");
+ case ISD::SETOEQ:
+ return CmpMode::EQ;
+ case ISD::SETOGT:
+ return CmpMode::GT;
+ case ISD::SETOGE:
+ return CmpMode::GE;
+ case ISD::SETOLT:
+ return CmpMode::LT;
+ case ISD::SETOLE:
+ return CmpMode::LE;
+ case ISD::SETONE:
+ return CmpMode::NE;
+ case ISD::SETO:
+ return CmpMode::NUM;
+ case ISD::SETUO:
+ return CmpMode::NotANumber;
+ case ISD::SETUEQ:
+ return CmpMode::EQU;
+ case ISD::SETUGT:
+ return CmpMode::GTU;
+ case ISD::SETUGE:
+ return CmpMode::GEU;
+ case ISD::SETULT:
+ return CmpMode::LTU;
+ case ISD::SETULE:
+ return CmpMode::LEU;
+ case ISD::SETUNE:
+ return CmpMode::NEU;
+ case ISD::SETEQ:
+ return CmpMode::EQ;
+ case ISD::SETGT:
+ return CmpMode::GT;
+ case ISD::SETGE:
+ return CmpMode::GE;
+ case ISD::SETLT:
+ return CmpMode::LT;
+ case ISD::SETLE:
+ return CmpMode::LE;
+ case ISD::SETNE:
+ return CmpMode::NE;
+ }
+ }(CondCode.get());
+
+ if (FTZ)
+ PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
+
+ return PTXCmpMode;
+}
+
+bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
+ unsigned PTXCmpMode =
+ getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
+ SDLoc DL(N);
+ SDNode *SetP = CurDAG->getMachineNode(
+ NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
+ N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
+ ReplaceNode(N, SetP);
+ return true;
+}
+
+// Find all instances of extract_vector_elt that use this v2f16 vector
+// and coalesce them into a scattering move instruction.
+bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
+ SDValue Vector = N->getOperand(0);
+
+ // We only care about f16x2 as it's the only real vector type we
+ // need to deal with.
+ if (Vector.getSimpleValueType() != MVT::v2f16)
+ return false;
+
+ // Find and record all uses of this vector that extract element 0 or 1.
+ SmallVector<SDNode *, 4> E0, E1;
+ for (const auto &U : Vector.getNode()->uses()) {
+ if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ continue;
+ if (U->getOperand(0) != Vector)
+ continue;
+ if (const ConstantSDNode *IdxConst =
+ dyn_cast<ConstantSDNode>(U->getOperand(1))) {
+ if (IdxConst->getZExtValue() == 0)
+ E0.push_back(U);
+ else if (IdxConst->getZExtValue() == 1)
+ E1.push_back(U);
+ else
+ llvm_unreachable("Invalid vector index.");
+ }
+ }
+
+ // There's no point scattering f16x2 if we only ever access one
+ // element of it.
+ if (E0.empty() || E1.empty())
+ return false;
+
+ unsigned Op = NVPTX::SplitF16x2;
+ // If the vector has been BITCAST'ed from i32, we can use original
+ // value directly and avoid register-to-register move.
+ SDValue Source = Vector;
+ if (Vector->getOpcode() == ISD::BITCAST) {
+ Op = NVPTX::SplitI32toF16x2;
+ Source = Vector->getOperand(0);
+ }
+ // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
+ // into f16,f16 SplitF16x2(V)
+ SDNode *ScatterOp =
+ CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
+ for (auto *Node : E0)
+ ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
+ for (auto *Node : E1)
+ ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
+
+ return true;
+}
+
static unsigned int getCodeAddrSpace(MemSDNode *N) {
const Value *Src = N->getMemOperand()->getValue();
@@ -681,6 +790,35 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
}
}
+// Helper function template to reduce amount of boilerplate code for
+// opcode selection.
+static Optional<unsigned> pickOpcodeForVT(
+ MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
+ unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
+ unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
+ switch (VT) {
+ case MVT::i1:
+ case MVT::i8:
+ return Opcode_i8;
+ case MVT::i16:
+ return Opcode_i16;
+ case MVT::i32:
+ return Opcode_i32;
+ case MVT::i64:
+ return Opcode_i64;
+ case MVT::f16:
+ return Opcode_f16;
+ case MVT::v2f16:
+ return Opcode_f16x2;
+ case MVT::f32:
+ return Opcode_f32;
+ case MVT::f64:
+ return Opcode_f64;
+ default:
+ return None;
+ }
+}
+
bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
@@ -709,33 +847,32 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
isVolatile = false;
- // Vector Setting
- MVT SimpleVT = LoadedVT.getSimpleVT();
- unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
- if (SimpleVT.isVector()) {
- unsigned num = SimpleVT.getVectorNumElements();
- if (num == 2)
- vecType = NVPTX::PTXLdStInstCode::V2;
- else if (num == 4)
- vecType = NVPTX::PTXLdStInstCode::V4;
- else
- return false;
- }
-
// Type Setting: fromType + fromTypeWidth
//
// Sign : ISD::SEXTLOAD
// Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
// type is integer
// Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
+ MVT SimpleVT = LoadedVT.getSimpleVT();
MVT ScalarVT = SimpleVT.getScalarType();
// Read at least 8 bits (predicates are stored as 8-bit values)
unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
unsigned int fromType;
+
+ // Vector Setting
+ unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
+ if (SimpleVT.isVector()) {
+ assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
+ // v2f16 is loaded using ld.b32
+ fromTypeWidth = 32;
+ }
+
if ((LD->getExtensionType() == ISD::SEXTLOAD))
fromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
- fromType = NVPTX::PTXLdStInstCode::Float;
+ // f16 uses .b16 as its storage type.
+ fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
fromType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -744,169 +881,72 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue Addr;
SDValue Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N1, Addr)) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_avar;
- break;
- default:
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
+ NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
+ NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Addr, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_asi;
- break;
- default:
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
+ NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
+ NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
+ NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
- if (TM.is64Bit()) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_ari_64;
- break;
- default:
- return false;
- }
- } else {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_ari;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
+ NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
+ NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
+ else
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
+ NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
+ NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
} else {
- if (TM.is64Bit()) {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_areg_64;
- break;
- default:
- return false;
- }
- } else {
- switch (TargetVT) {
- case MVT::i8:
- Opcode = NVPTX::LD_i8_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::LD_i16_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::LD_i32_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::LD_i64_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::LD_f32_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::LD_f64_areg;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
+ NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
+ NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
+ NVPTX::LD_f64_areg_64);
+ else
+ Opcode = pickOpcodeForVT(
+ TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
+ NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
+ NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
getI32Imm(vecType, dl), getI32Imm(fromType, dl),
getI32Imm(fromTypeWidth, dl), N1, Chain };
- NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
+ NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
+ MVT::Other, Ops);
}
if (!NVPTXLD)
@@ -925,7 +965,7 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
SDLoc DL(N);
SDNode *LD;
MemSDNode *MemSD = cast<MemSDNode>(N);
@@ -968,7 +1008,8 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
if (ExtensionType == ISD::SEXTLOAD)
FromType = NVPTX::PTXLdStInstCode::Signed;
else if (ScalarVT.isFloatingPoint())
- FromType = NVPTX::PTXLdStInstCode::Float;
+ FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
FromType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -987,111 +1028,67 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
EVT EltVT = N->getValueType(0);
+ // v8f16 is a special case. PTX doesn't have ld.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // load them with ld.v4.b32.
+ if (EltVT == MVT::v2f16) {
+ assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
+ EltVT = MVT::i32;
+ FromType = NVPTX::PTXLdStInstCode::Untyped;
+ FromTypeWidth = 32;
+ }
+
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
+ NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
+ NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
+ NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_avar;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
+ NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
+ NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
+ NVPTX::LDV_f32_v4_avar, None);
break;
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_asi;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
+ NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
+ NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
+ NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_asi;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
+ NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
+ NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
+ NVPTX::LDV_f32_v4_asi, None);
break;
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (TM.is64Bit()) {
@@ -1099,46 +1096,19 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
+ NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
+ NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
+ NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
+ NVPTX::LDV_f64_v2_ari_64);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
+ NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
+ NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
+ NVPTX::LDV_f32_v4_ari_64, None);
break;
}
} else {
@@ -1146,101 +1116,47 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_ari;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
+ NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
+ NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
+ NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_ari;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
+ NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
+ NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
+ NVPTX::LDV_f32_v4_ari, None);
break;
}
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
} else {
if (TM.is64Bit()) {
switch (N->getOpcode()) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
+ NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
+ NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
+ NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
+ NVPTX::LDV_f64_v2_areg_64);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
+ NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
+ NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
+ NVPTX::LDV_f32_v4_areg_64, None);
break;
}
} else {
@@ -1248,54 +1164,28 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v2_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v2_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v2_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::LDV_i64_v2_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v2_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::LDV_f64_v2_areg;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
+ NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
+ NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
+ NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
+ NVPTX::LDV_f64_v2_areg);
break;
case NVPTXISD::LoadV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::LDV_i8_v4_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::LDV_i16_v4_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::LDV_i32_v4_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::LDV_f32_v4_areg;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
+ NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
+ NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
+ NVPTX::LDV_f32_v4_areg, None);
break;
}
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
getI32Imm(VecType, DL), getI32Imm(FromType, DL),
getI32Imm(FromTypeWidth, DL), Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -1338,7 +1228,7 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Mem = cast<MemSDNode>(N);
}
- unsigned Opcode;
+ Optional<unsigned> Opcode;
SDLoc DL(N);
SDNode *LD;
SDValue Base, Offset, Addr;
@@ -1366,142 +1256,72 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
default:
return false;
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
break;
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
break;
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
break;
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (TM.is64Bit()) {
@@ -1510,139 +1330,68 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
break;
}
} else {
@@ -1651,146 +1400,75 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
break;
}
}
-
- SDValue Ops[] = { Base, Offset, Chain };
-
- LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
+ if (!Opcode)
+ return false;
+ SDValue Ops[] = {Base, Offset, Chain};
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
} else {
if (TM.is64Bit()) {
switch (N->getOpcode()) {
@@ -1798,139 +1476,68 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
break;
}
} else {
@@ -1939,145 +1546,75 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
return false;
case ISD::LOAD:
case ISD::INTRINSIC_W_CHAIN:
- if (IsLDG) {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
- break;
- }
- } else {
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
- break;
- }
- }
+ if (IsLDG)
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
+ else
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
break;
case NVPTXISD::LDUV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
- break;
- case MVT::i64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
- break;
- case MVT::f64:
- Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
+ NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
break;
case NVPTXISD::LDUV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
- break;
- case MVT::i16:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
- break;
- case MVT::i32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
- break;
- case MVT::f32:
- Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
+ NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
+ NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
break;
}
}
-
+ if (!Opcode)
+ return false;
SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
+ LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -2151,24 +1688,23 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
// Vector Setting
MVT SimpleVT = StoreVT.getSimpleVT();
unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
- if (SimpleVT.isVector()) {
- unsigned num = SimpleVT.getVectorNumElements();
- if (num == 2)
- vecType = NVPTX::PTXLdStInstCode::V2;
- else if (num == 4)
- vecType = NVPTX::PTXLdStInstCode::V4;
- else
- return false;
- }
// Type Setting: toType + toTypeWidth
// - for integer type, always use 'u'
//
MVT ScalarVT = SimpleVT.getScalarType();
unsigned toTypeWidth = ScalarVT.getSizeInBits();
+ if (SimpleVT.isVector()) {
+ assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
+ // v2f16 is stored using st.b32
+ toTypeWidth = 32;
+ }
+
unsigned int toType;
if (ScalarVT.isFloatingPoint())
- toType = NVPTX::PTXLdStInstCode::Float;
+ // f16 uses .b16 as its storage type.
+ toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
toType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -2178,173 +1714,73 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
SDValue N2 = N->getOperand(2);
SDValue Addr;
SDValue Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N2, Addr)) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_avar;
- break;
- default:
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
+ NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
+ NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
+ NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_asi;
- break;
- default:
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
+ NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
+ NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
+ NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
+ if (!Opcode)
return false;
- }
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
: SelectADDRri(N2.getNode(), N2, Base, Offset)) {
- if (TM.is64Bit()) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_ari_64;
- break;
- default:
- return false;
- }
- } else {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_ari;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode = pickOpcodeForVT(
+ SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
+ NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
+ NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
+ else
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
+ NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
+ NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
+ NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
+ if (!Opcode)
+ return false;
+
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
Offset, Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
} else {
- if (TM.is64Bit()) {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_areg_64;
- break;
- default:
- return false;
- }
- } else {
- switch (SourceVT) {
- case MVT::i8:
- Opcode = NVPTX::ST_i8_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::ST_i16_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::ST_i32_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::ST_i64_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::ST_f32_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::ST_f64_areg;
- break;
- default:
- return false;
- }
- }
+ if (TM.is64Bit())
+ Opcode =
+ pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
+ NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
+ NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
+ NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
+ else
+ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
+ NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
+ NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
+ NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
+ if (!Opcode)
+ return false;
SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
Chain };
- NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
+ NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
}
if (!NVPTXST)
@@ -2361,7 +1797,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
SDValue Addr, Offset, Base;
- unsigned Opcode;
+ Optional<unsigned> Opcode;
SDLoc DL(N);
SDNode *ST;
EVT EltVT = Op1.getValueType();
@@ -2391,7 +1827,8 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
unsigned ToTypeWidth = ScalarVT.getSizeInBits();
unsigned ToType;
if (ScalarVT.isFloatingPoint())
- ToType = NVPTX::PTXLdStInstCode::Float;
+ ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
+ : NVPTX::PTXLdStInstCode::Float;
else
ToType = NVPTX::PTXLdStInstCode::Unsigned;
@@ -2418,6 +1855,16 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
return false;
}
+ // v8f16 is a special case. PTX doesn't have st.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // store them with st.v4.b32.
+ if (EltVT == MVT::v2f16) {
+ assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
+ EltVT = MVT::i32;
+ ToType = NVPTX::PTXLdStInstCode::Untyped;
+ ToTypeWidth = 32;
+ }
+
StOps.push_back(getI32Imm(IsVolatile, DL));
StOps.push_back(getI32Imm(CodeAddrSpace, DL));
StOps.push_back(getI32Imm(VecType, DL));
@@ -2429,46 +1876,18 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_avar;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_avar;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_avar;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
+ NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
+ NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
+ NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_avar;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_avar;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_avar;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_avar;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
+ NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
+ NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
+ NVPTX::STV_f32_v4_avar, None);
break;
}
StOps.push_back(Addr);
@@ -2478,46 +1897,18 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_asi;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_asi;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_asi;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
+ NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
+ NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
+ NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_asi;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_asi;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_asi;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_asi;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
+ NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
+ NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
+ NVPTX::STV_f32_v4_asi, None);
break;
}
StOps.push_back(Base);
@@ -2529,46 +1920,19 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_ari_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_ari_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
+ NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
+ NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
+ NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
+ NVPTX::STV_f64_v2_ari_64);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_ari_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_ari_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_ari_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_ari_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
+ NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
+ NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
+ NVPTX::STV_f32_v4_ari_64, None);
break;
}
} else {
@@ -2576,46 +1940,18 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_ari;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_ari;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_ari;
- break;
- }
+ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
+ NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
+ NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
+ NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_ari;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_ari;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_ari;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_ari;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
+ NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
+ NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
+ NVPTX::STV_f32_v4_ari, None);
break;
}
}
@@ -2627,46 +1963,19 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_areg_64;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_areg_64;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
+ NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
+ NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
+ NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
+ NVPTX::STV_f64_v2_areg_64);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_areg_64;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_areg_64;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_areg_64;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_areg_64;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
+ NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
+ NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
+ NVPTX::STV_f32_v4_areg_64, None);
break;
}
} else {
@@ -2674,55 +1983,31 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v2_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v2_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v2_areg;
- break;
- case MVT::i64:
- Opcode = NVPTX::STV_i64_v2_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v2_areg;
- break;
- case MVT::f64:
- Opcode = NVPTX::STV_f64_v2_areg;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
+ NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
+ NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
+ NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
+ NVPTX::STV_f64_v2_areg);
break;
case NVPTXISD::StoreV4:
- switch (EltVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i8:
- Opcode = NVPTX::STV_i8_v4_areg;
- break;
- case MVT::i16:
- Opcode = NVPTX::STV_i16_v4_areg;
- break;
- case MVT::i32:
- Opcode = NVPTX::STV_i32_v4_areg;
- break;
- case MVT::f32:
- Opcode = NVPTX::STV_f32_v4_areg;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
+ NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
+ NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
+ NVPTX::STV_f32_v4_areg, None);
break;
}
}
StOps.push_back(N2);
}
+ if (!Opcode)
+ return false;
+
StOps.push_back(Chain);
- ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
+ ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
@@ -2757,87 +2042,36 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
EVT EltVT = Node->getValueType(0);
EVT MemVT = Mem->getMemoryVT();
- unsigned Opc = 0;
+ Optional<unsigned> Opcode;
switch (VecSize) {
default:
return false;
case 1:
- switch (MemVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opc = NVPTX::LoadParamMemI8;
- break;
- case MVT::i8:
- Opc = NVPTX::LoadParamMemI8;
- break;
- case MVT::i16:
- Opc = NVPTX::LoadParamMemI16;
- break;
- case MVT::i32:
- Opc = NVPTX::LoadParamMemI32;
- break;
- case MVT::i64:
- Opc = NVPTX::LoadParamMemI64;
- break;
- case MVT::f32:
- Opc = NVPTX::LoadParamMemF32;
- break;
- case MVT::f64:
- Opc = NVPTX::LoadParamMemF64;
- break;
- }
+ Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
+ NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
+ NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
+ NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
+ NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
break;
case 2:
- switch (MemVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opc = NVPTX::LoadParamMemV2I8;
- break;
- case MVT::i8:
- Opc = NVPTX::LoadParamMemV2I8;
- break;
- case MVT::i16:
- Opc = NVPTX::LoadParamMemV2I16;
- break;
- case MVT::i32:
- Opc = NVPTX::LoadParamMemV2I32;
- break;
- case MVT::i64:
- Opc = NVPTX::LoadParamMemV2I64;
- break;
- case MVT::f32:
- Opc = NVPTX::LoadParamMemV2F32;
- break;
- case MVT::f64:
- Opc = NVPTX::LoadParamMemV2F64;
- break;
- }
+ Opcode =
+ pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
+ NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
+ NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
+ NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
+ NVPTX::LoadParamMemV2F64);
break;
case 4:
- switch (MemVT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opc = NVPTX::LoadParamMemV4I8;
- break;
- case MVT::i8:
- Opc = NVPTX::LoadParamMemV4I8;
- break;
- case MVT::i16:
- Opc = NVPTX::LoadParamMemV4I16;
- break;
- case MVT::i32:
- Opc = NVPTX::LoadParamMemV4I32;
- break;
- case MVT::f32:
- Opc = NVPTX::LoadParamMemV4F32;
- break;
- }
+ Opcode = pickOpcodeForVT(
+ MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
+ NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
+ NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
+ NVPTX::LoadParamMemV4F32, None);
break;
}
+ if (!Opcode)
+ return false;
SDVTList VTs;
if (VecSize == 1) {
@@ -2856,7 +2090,7 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Ops.push_back(Chain);
Ops.push_back(Flag);
- ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, VTs, Ops));
+ ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
return true;
}
@@ -2893,89 +2127,36 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
// Determine target opcode
// If we have an i1, use an 8-bit store. The lowering code in
// NVPTXISelLowering will have already emitted an upcast.
- unsigned Opcode = 0;
+ Optional<unsigned> Opcode = 0;
switch (NumElts) {
default:
return false;
case 1:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreRetvalI8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreRetvalI8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreRetvalI16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreRetvalI32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreRetvalI64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreRetvalF32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreRetvalF64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
+ NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
+ NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
+ NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
break;
case 2:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreRetvalV2I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreRetvalV2I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreRetvalV2I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreRetvalV2I32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreRetvalV2I64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreRetvalV2F32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreRetvalV2F64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
+ NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
+ NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
+ NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
break;
case 4:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreRetvalV4I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreRetvalV4I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreRetvalV4I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreRetvalV4I32;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreRetvalV4F32;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
+ NVPTX::StoreRetvalV4I32, None,
+ NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
+ NVPTX::StoreRetvalV4F32, None);
break;
}
+ if (!Opcode)
+ return false;
- SDNode *Ret =
- CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+ SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -3024,88 +2205,36 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
// Determine target opcode
// If we have an i1, use an 8-bit store. The lowering code in
// NVPTXISelLowering will have already emitted an upcast.
- unsigned Opcode = 0;
+ Optional<unsigned> Opcode = 0;
switch (N->getOpcode()) {
default:
switch (NumElts) {
default:
return false;
case 1:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreParamI8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreParamI8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreParamI16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreParamI32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreParamI64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreParamF32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreParamF64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreParamI8, NVPTX::StoreParamI16,
+ NVPTX::StoreParamI32, NVPTX::StoreParamI64,
+ NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
+ NVPTX::StoreParamF32, NVPTX::StoreParamF64);
break;
case 2:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreParamV2I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreParamV2I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreParamV2I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreParamV2I32;
- break;
- case MVT::i64:
- Opcode = NVPTX::StoreParamV2I64;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreParamV2F32;
- break;
- case MVT::f64:
- Opcode = NVPTX::StoreParamV2F64;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
+ NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
+ NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
+ NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
break;
case 4:
- switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::i1:
- Opcode = NVPTX::StoreParamV4I8;
- break;
- case MVT::i8:
- Opcode = NVPTX::StoreParamV4I8;
- break;
- case MVT::i16:
- Opcode = NVPTX::StoreParamV4I16;
- break;
- case MVT::i32:
- Opcode = NVPTX::StoreParamV4I32;
- break;
- case MVT::f32:
- Opcode = NVPTX::StoreParamV4F32;
- break;
- }
+ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
+ NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
+ NVPTX::StoreParamV4I32, None,
+ NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
+ NVPTX::StoreParamV4F32, None);
break;
}
+ if (!Opcode)
+ return false;
break;
// Special case: if we have a sign-extend/zero-extend node, insert the
// conversion instruction first, and use that as the value operand to
@@ -3132,7 +2261,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
SDNode *Ret =
- CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
+ CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 0591035a6aa8..8fc38e7c4612 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -34,6 +34,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool usePrecSqrtF32() const;
bool useF32FTZ() const;
bool allowFMA() const;
+ bool allowUnsafeFPMath() const;
public:
explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
@@ -69,6 +70,9 @@ private:
bool tryTextureIntrinsic(SDNode *N);
bool trySurfaceIntrinsic(SDNode *N);
bool tryBFE(SDNode *N);
+ bool tryConstantFP16(SDNode *N);
+ bool SelectSETP_F16X2(SDNode *N);
+ bool tryEXTRACT_VECTOR_ELEMENT(SDNode *N);
inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) {
return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7a760fd38d0f..4d06912054a2 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -79,6 +79,60 @@ FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
" 1: do it 2: do it aggressively"),
cl::init(2));
+static cl::opt<int> UsePrecDivF32(
+ "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+ " IEEE Compliant F32 div.rnd if available."),
+ cl::init(2));
+
+static cl::opt<bool> UsePrecSqrtF32(
+ "nvptx-prec-sqrtf32", cl::Hidden,
+ cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
+ cl::init(true));
+
+static cl::opt<bool> FtzEnabled(
+ "nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
+ cl::init(false));
+
+int NVPTXTargetLowering::getDivF32Level() const {
+ if (UsePrecDivF32.getNumOccurrences() > 0) {
+ // If nvptx-prec-div32=N is used on the command-line, always honor it
+ return UsePrecDivF32;
+ } else {
+ // Otherwise, use div.approx if fast math is enabled
+ if (getTargetMachine().Options.UnsafeFPMath)
+ return 0;
+ else
+ return 2;
+ }
+}
+
+bool NVPTXTargetLowering::usePrecSqrtF32() const {
+ if (UsePrecSqrtF32.getNumOccurrences() > 0) {
+ // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
+ return UsePrecSqrtF32;
+ } else {
+ // Otherwise, use sqrt.approx if fast math is enabled
+ return !getTargetMachine().Options.UnsafeFPMath;
+ }
+}
+
+bool NVPTXTargetLowering::useF32FTZ(const MachineFunction &MF) const {
+ // TODO: Get rid of this flag; there can be only one way to do this.
+ if (FtzEnabled.getNumOccurrences() > 0) {
+ // If nvptx-f32ftz is used on the command-line, always honor it
+ return FtzEnabled;
+ } else {
+ const Function *F = MF.getFunction();
+ // Otherwise, check for an nvptx-f32ftz attribute on the function
+ if (F->hasFnAttribute("nvptx-f32ftz"))
+ return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
+ else
+ return false;
+ }
+}
+
static bool IsPTXVectorType(MVT VT) {
switch (VT.SimpleTy) {
default:
@@ -92,6 +146,9 @@ static bool IsPTXVectorType(MVT VT) {
case MVT::v2i32:
case MVT::v4i32:
case MVT::v2i64:
+ case MVT::v2f16:
+ case MVT::v4f16:
+ case MVT::v8f16: // <4 x f16x2>
case MVT::v2f32:
case MVT::v4f32:
case MVT::v2f64:
@@ -116,13 +173,24 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
EVT VT = TempVTs[i];
uint64_t Off = TempOffsets[i];
- if (VT.isVector())
- for (unsigned j = 0, je = VT.getVectorNumElements(); j != je; ++j) {
- ValueVTs.push_back(VT.getVectorElementType());
+ // Split vectors into individual elements, except for v2f16, which
+ // we will pass as a single scalar.
+ if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ // Vectors with an even number of f16 elements will be passed to
+ // us as an array of v2f16 elements. We must match this so we
+ // stay in sync with Ins/Outs.
+ if (EltVT == MVT::f16 && NumElts % 2 == 0) {
+ EltVT = MVT::v2f16;
+ NumElts /= 2;
+ }
+ for (unsigned j = 0; j != NumElts; ++j) {
+ ValueVTs.push_back(EltVT);
if (Offsets)
- Offsets->push_back(Off+j*VT.getVectorElementType().getStoreSize());
+ Offsets->push_back(Off + j * EltVT.getStoreSize());
}
- else {
+ } else {
ValueVTs.push_back(VT);
if (Offsets)
Offsets->push_back(Off);
@@ -130,6 +198,125 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
}
}
+// Check whether we can merge loads/stores of some of the pieces of a
+// flattened function parameter or return value into a single vector
+// load/store.
+//
+// The flattened parameter is represented as a list of EVTs and
+// offsets, and the whole structure is aligned to ParamAlignment. This
+// function determines whether we can load/store pieces of the
+// parameter starting at index Idx using a single vectorized op of
+// size AccessSize. If so, it returns the number of param pieces
+// covered by the vector op. Otherwise, it returns 1.
+static unsigned CanMergeParamLoadStoresStartingAt(
+ unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs,
+ const SmallVectorImpl<uint64_t> &Offsets, unsigned ParamAlignment) {
+ assert(isPowerOf2_32(AccessSize) && "must be a power of 2!");
+
+ // Can't vectorize if param alignment is not sufficient.
+ if (AccessSize > ParamAlignment)
+ return 1;
+ // Can't vectorize if offset is not aligned.
+ if (Offsets[Idx] & (AccessSize - 1))
+ return 1;
+
+ EVT EltVT = ValueVTs[Idx];
+ unsigned EltSize = EltVT.getStoreSize();
+
+ // Element is too large to vectorize.
+ if (EltSize >= AccessSize)
+ return 1;
+
+ unsigned NumElts = AccessSize / EltSize;
+ // Can't vectorize if AccessBytes if not a multiple of EltSize.
+ if (AccessSize != EltSize * NumElts)
+ return 1;
+
+ // We don't have enough elements to vectorize.
+ if (Idx + NumElts > ValueVTs.size())
+ return 1;
+
+ // PTX ISA can only deal with 2- and 4-element vector ops.
+ if (NumElts != 4 && NumElts != 2)
+ return 1;
+
+ for (unsigned j = Idx + 1; j < Idx + NumElts; ++j) {
+ // Types do not match.
+ if (ValueVTs[j] != EltVT)
+ return 1;
+
+ // Elements are not contiguous.
+ if (Offsets[j] - Offsets[j - 1] != EltSize)
+ return 1;
+ }
+ // OK. We can vectorize ValueVTs[i..i+NumElts)
+ return NumElts;
+}
+
+// Flags for tracking per-element vectorization state of loads/stores
+// of a flattened function parameter or return value.
+enum ParamVectorizationFlags {
+ PVF_INNER = 0x0, // Middle elements of a vector.
+ PVF_FIRST = 0x1, // First element of the vector.
+ PVF_LAST = 0x2, // Last element of the vector.
+ // Scalar is effectively a 1-element vector.
+ PVF_SCALAR = PVF_FIRST | PVF_LAST
+};
+
+// Computes whether and how we can vectorize the loads/stores of a
+// flattened function parameter or return value.
+//
+// The flattened parameter is represented as the list of ValueVTs and
+// Offsets, and is aligned to ParamAlignment bytes. We return a vector
+// of the same size as ValueVTs indicating how each piece should be
+// loaded/stored (i.e. as a scalar, or as part of a vector
+// load/store).
+static SmallVector<ParamVectorizationFlags, 16>
+VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs,
+ const SmallVectorImpl<uint64_t> &Offsets,
+ unsigned ParamAlignment) {
+ // Set vector size to match ValueVTs and mark all elements as
+ // scalars by default.
+ SmallVector<ParamVectorizationFlags, 16> VectorInfo;
+ VectorInfo.assign(ValueVTs.size(), PVF_SCALAR);
+
+ // Check what we can vectorize using 128/64/32-bit accesses.
+ for (int I = 0, E = ValueVTs.size(); I != E; ++I) {
+ // Skip elements we've already processed.
+ assert(VectorInfo[I] == PVF_SCALAR && "Unexpected vector info state.");
+ for (unsigned AccessSize : {16, 8, 4, 2}) {
+ unsigned NumElts = CanMergeParamLoadStoresStartingAt(
+ I, AccessSize, ValueVTs, Offsets, ParamAlignment);
+ // Mark vectorized elements.
+ switch (NumElts) {
+ default:
+ llvm_unreachable("Unexpected return value");
+ case 1:
+ // Can't vectorize using this size, try next smaller size.
+ continue;
+ case 2:
+ assert(I + 1 < E && "Not enough elements.");
+ VectorInfo[I] = PVF_FIRST;
+ VectorInfo[I + 1] = PVF_LAST;
+ I += 1;
+ break;
+ case 4:
+ assert(I + 3 < E && "Not enough elements.");
+ VectorInfo[I] = PVF_FIRST;
+ VectorInfo[I + 1] = PVF_INNER;
+ VectorInfo[I + 2] = PVF_INNER;
+ VectorInfo[I + 3] = PVF_LAST;
+ I += 3;
+ break;
+ }
+ // Break out of the inner loop because we've already succeeded
+ // using largest possible AccessSize.
+ break;
+ }
+ }
+ return VectorInfo;
+}
+
// NVPTXTargetLowering Constructor.
NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
const NVPTXSubtarget &STI)
@@ -158,14 +345,32 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
else
setSchedulingPreference(Sched::Source);
+ auto setFP16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
+ LegalizeAction NoF16Action) {
+ setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
+ };
+
addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
+ addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
+ addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
+
+ // Conversion to/from FP16/FP16x2 is always legal.
+ setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::f16, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom);
+
+ setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
+ setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
// Operations not directly supported by NVPTX.
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v2f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
@@ -173,6 +378,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v2f16, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -195,6 +402,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+ setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
+
if (STI.hasROT64()) {
setOperationAction(ISD::ROTL, MVT::i64, Legal);
setOperationAction(ISD::ROTR, MVT::i64, Legal);
@@ -259,6 +469,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// This is legal in NVPTX
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
// TRAP can be lowered to PTX trap
setOperationAction(ISD::TRAP, MVT::Other, Legal);
@@ -278,15 +489,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// Custom handling for i8 intrinsics
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::CTLZ, MVT::i16, Legal);
- setOperationAction(ISD::CTLZ, MVT::i32, Legal);
- setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+ for (const auto& Ty : {MVT::i16, MVT::i32, MVT::i64}) {
+ setOperationAction(ISD::SMIN, Ty, Legal);
+ setOperationAction(ISD::SMAX, Ty, Legal);
+ setOperationAction(ISD::UMIN, Ty, Legal);
+ setOperationAction(ISD::UMAX, Ty, Legal);
+
+ setOperationAction(ISD::CTPOP, Ty, Legal);
+ setOperationAction(ISD::CTLZ, Ty, Legal);
+ }
+
setOperationAction(ISD::CTTZ, MVT::i16, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Expand);
- setOperationAction(ISD::CTPOP, MVT::i16, Legal);
- setOperationAction(ISD::CTPOP, MVT::i32, Legal);
- setOperationAction(ISD::CTPOP, MVT::i64, Legal);
// PTX does not directly support SELP of i1, so promote to i32 first
setOperationAction(ISD::SELECT, MVT::i1, Custom);
@@ -301,28 +516,60 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SREM);
setTargetDAGCombine(ISD::UREM);
- // Library functions. These default to Expand, but we have instructions
- // for them.
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FROUND, MVT::f32, Legal);
- setOperationAction(ISD::FROUND, MVT::f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+ // setcc for f16x2 needs special handling to prevent legalizer's
+ // attempt to scalarize it due to v2i1 not being legal.
+ if (STI.allowFP16Math())
+ setTargetDAGCombine(ISD::SETCC);
+
+ // Promote fp16 arithmetic if fp16 hardware isn't available or the
+ // user passed --nvptx-no-fp16-math. The flag is useful because,
+ // although sm_53+ GPUs have some sort of FP16 support in
+ // hardware, only sm_53 and sm_60 have full implementation. Others
+ // only have token amount of hardware and are likely to run faster
+ // by using fp32 units instead.
+ for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
+ setFP16OperationAction(Op, MVT::f16, Legal, Promote);
+ setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
+ }
+
+ // There's no neg.f16 instruction. Expand to (0-x).
+ setOperationAction(ISD::FNEG, MVT::f16, Expand);
+ setOperationAction(ISD::FNEG, MVT::v2f16, Expand);
+
+ // (would be) Library functions.
+
+ // These map to conversion instructions for scalar FP types.
+ for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
+ ISD::FROUND, ISD::FTRUNC}) {
+ setOperationAction(Op, MVT::f16, Legal);
+ setOperationAction(Op, MVT::f32, Legal);
+ setOperationAction(Op, MVT::f64, Legal);
+ setOperationAction(Op, MVT::v2f16, Expand);
+ }
+
+ // 'Expand' implements FCOPYSIGN without calling an external library.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+
+ // These map to corresponding instructions for f32/f64. f16 must be
+ // promoted to f32. v2f16 is expanded to f16, which is then promoted
+ // to f32.
+ for (const auto &Op : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS,
+ ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM}) {
+ setOperationAction(Op, MVT::f16, Promote);
+ setOperationAction(Op, MVT::f32, Legal);
+ setOperationAction(Op, MVT::f64, Legal);
+ setOperationAction(Op, MVT::v2f16, Expand);
+ }
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
// No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
// No FPOW or FREM in PTX.
@@ -434,6 +681,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::FUN_SHFR_CLAMP";
case NVPTXISD::IMAD:
return "NVPTXISD::IMAD";
+ case NVPTXISD::SETP_F16X2:
+ return "NVPTXISD::SETP_F16X2";
case NVPTXISD::Dummy:
return "NVPTXISD::Dummy";
case NVPTXISD::MUL_WIDE_SIGNED:
@@ -932,10 +1181,60 @@ TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
return TypeSplitVector;
-
+ if (VT == MVT::v2f16)
+ return TypeLegal;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &ExtraSteps,
+ bool &UseOneConst,
+ bool Reciprocal) const {
+ if (!(Enabled == ReciprocalEstimate::Enabled ||
+ (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
+ return SDValue();
+
+ if (ExtraSteps == ReciprocalEstimate::Unspecified)
+ ExtraSteps = 0;
+
+ SDLoc DL(Operand);
+ EVT VT = Operand.getValueType();
+ bool Ftz = useF32FTZ(DAG.getMachineFunction());
+
+ auto MakeIntrinsicCall = [&](Intrinsic::ID IID) {
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(IID, DL, MVT::i32), Operand);
+ };
+
+ // The sqrt and rsqrt refinement processes assume we always start out with an
+ // approximation of the rsqrt. Therefore, if we're going to do any refinement
+ // (i.e. ExtraSteps > 0), we must return an rsqrt. But if we're *not* doing
+ // any refinement, we must return a regular sqrt.
+ if (Reciprocal || ExtraSteps > 0) {
+ if (VT == MVT::f32)
+ return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_rsqrt_approx_ftz_f
+ : Intrinsic::nvvm_rsqrt_approx_f);
+ else if (VT == MVT::f64)
+ return MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d);
+ else
+ return SDValue();
+ } else {
+ if (VT == MVT::f32)
+ return MakeIntrinsicCall(Ftz ? Intrinsic::nvvm_sqrt_approx_ftz_f
+ : Intrinsic::nvvm_sqrt_approx_f);
+ else {
+ // There's no sqrt.approx.f64 instruction, so we emit
+ // reciprocal(rsqrt(x)). This is faster than
+ // select(x == 0, 0, x * rsqrt(x)). (In fact, it's faster than plain
+ // x * rsqrt(x).)
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::nvvm_rcp_approx_ftz_d, DL, MVT::i32),
+ MakeIntrinsicCall(Intrinsic::nvvm_rsqrt_approx_d));
+ }
+ }
+}
+
SDValue
NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -967,19 +1266,21 @@ std::string NVPTXTargetLowering::getPrototype(
unsigned size = 0;
if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
- if (size < 32)
- size = 32;
} else {
assert(retTy->isFloatingPointTy() &&
"Floating point type expected here");
size = retTy->getPrimitiveSizeInBits();
}
+ // PTX ABI requires all scalar return values to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type in
+ // PTX, so its size must be adjusted here, too.
+ if (size < 32)
+ size = 32;
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << PtrVT.getSizeInBits() << " _";
- } else if ((retTy->getTypeID() == Type::StructTyID) ||
- isa<VectorType>(retTy)) {
+ } else if (retTy->isAggregateType() || retTy->isVectorTy()) {
auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
O << ".param .align " << retAlignment << " .b8 _["
<< DL.getTypeAllocSize(retTy) << "]";
@@ -1018,7 +1319,7 @@ std::string NVPTXTargetLowering::getPrototype(
OIdx += len - 1;
continue;
}
- // i8 types in IR will be i16 types in SDAG
+ // i8 types in IR will be i16 types in SDAG
assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
(getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
"type mismatch between callee prototype and arguments");
@@ -1028,8 +1329,13 @@ std::string NVPTXTargetLowering::getPrototype(
sz = cast<IntegerType>(Ty)->getBitWidth();
if (sz < 32)
sz = 32;
- } else if (isa<PointerType>(Ty))
+ } else if (isa<PointerType>(Ty)) {
sz = PtrVT.getSizeInBits();
+ } else if (Ty->isHalfTy())
+ // PTX ABI requires all scalar parameters to be at least 32
+ // bits in size. fp16 normally uses .b16 as its storage type
+ // in PTX, so its size must be adjusted here, too.
+ sz = 32;
else
sz = Ty->getPrimitiveSizeInBits();
O << ".param .b" << sz << " ";
@@ -1113,21 +1419,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
ArgListTy &Args = CLI.getArgs();
- Type *retTy = CLI.RetTy;
+ Type *RetTy = CLI.RetTy;
ImmutableCallSite *CS = CLI.CS;
+ const DataLayout &DL = DAG.getDataLayout();
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- auto &DL = MF.getDataLayout();
SDValue tempChain = Chain;
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(uniqueCallSite, dl, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(
+ Chain, DAG.getIntPtrConstant(uniqueCallSite, dl, true), dl);
SDValue InFlag = Chain.getValue(1);
unsigned paramCount = 0;
@@ -1148,240 +1451,124 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Type *Ty = Args[i].Ty;
if (!Outs[OIdx].Flags.isByVal()) {
- if (Ty->isAggregateType()) {
- // aggregate
- SmallVector<EVT, 16> vtparts;
- SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
- 0);
-
- unsigned align =
- getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets);
+ unsigned ArgAlign =
+ getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
+ unsigned AllocSize = DL.getTypeAllocSize(Ty);
+ SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ bool NeedAlign; // Does argument declaration specify alignment?
+ if (Ty->isAggregateType() || Ty->isVectorTy()) {
// declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = DL.getTypeAllocSize(Ty);
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
- MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32),
- InFlag };
+ SDValue DeclareParamOps[] = {
+ Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
+ DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(AllocSize, dl, MVT::i32), InFlag};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
- InFlag = Chain.getValue(1);
- for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- EVT elemtype = vtparts[j];
- unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- SDValue StVal = OutVals[OIdx];
- if (elemtype.getSizeInBits() < 16) {
- StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(Offsets[j], dl, MVT::i32),
- StVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, CopyParamOps,
- elemtype, MachinePointerInfo(),
- ArgAlign);
- InFlag = Chain.getValue(1);
- ++OIdx;
+ NeedAlign = true;
+ } else {
+ // declare .param .b<size> .param<n>;
+ if ((VT.isInteger() || VT.isFloatingPoint()) && AllocSize < 4) {
+ // PTX ABI requires integral types to be at least 32 bits in
+ // size. FP16 is loaded/stored using i16, so it's handled
+ // here as well.
+ AllocSize = 4;
}
- if (vtparts.size() > 0)
- --OIdx;
- ++paramCount;
- continue;
+ SDValue DeclareScalarParamOps[] = {
+ Chain, DAG.getConstant(paramCount, dl, MVT::i32),
+ DAG.getConstant(AllocSize * 8, dl, MVT::i32),
+ DAG.getConstant(0, dl, MVT::i32), InFlag};
+ Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
+ DeclareScalarParamOps);
+ NeedAlign = false;
}
- if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(DL, Ty);
- unsigned align =
- getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
- // declare .param .align <align> .b8 .param<n>[<size>];
- unsigned sz = DL.getTypeAllocSize(Ty);
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(align, dl, MVT::i32),
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32),
- InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps);
- InFlag = Chain.getValue(1);
- unsigned NumElts = ObjectVT.getVectorNumElements();
- EVT EltVT = ObjectVT.getVectorElementType();
- EVT MemVT = EltVT;
- bool NeedExtend = false;
- if (EltVT.getSizeInBits() < 16) {
- NeedExtend = true;
- EltVT = MVT::i16;
+ InFlag = Chain.getValue(1);
+
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter
+ // than 32-bits are sign extended or zero extended, depending on
+ // whether they are signed or unsigned types. This case applies
+ // only to scalar parameters and not to aggregate values.
+ bool ExtendIntegerParam =
+ Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Ty) < 32;
+
+ auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign);
+ SmallVector<SDValue, 6> StoreOperands;
+ for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
+ // New store.
+ if (VectorInfo[j] & PVF_FIRST) {
+ assert(StoreOperands.empty() && "Unfinished preceeding store.");
+ StoreOperands.push_back(Chain);
+ StoreOperands.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
+ StoreOperands.push_back(DAG.getConstant(Offsets[j], dl, MVT::i32));
}
- // V1 store
- if (NumElts == 1) {
- SDValue Elt = OutVals[OIdx++];
- if (NeedExtend)
- Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt);
-
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), Elt,
- InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, CopyParamOps,
- MemVT, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- } else if (NumElts == 2) {
- SDValue Elt0 = OutVals[OIdx++];
- SDValue Elt1 = OutVals[OIdx++];
- if (NeedExtend) {
- Elt0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt0);
- Elt1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Elt1);
+ EVT EltVT = VTs[j];
+ SDValue StVal = OutVals[OIdx];
+ if (ExtendIntegerParam) {
+ assert(VTs.size() == 1 && "Scalar can't have multiple parts.");
+ // zext/sext to i32
+ StVal = DAG.getNode(Outs[OIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND,
+ dl, MVT::i32, StVal);
+ } else if (EltVT.getSizeInBits() < 16) {
+ // Use 16-bit registers for small stores as it's the
+ // smallest general purpose register size supported by NVPTX.
+ StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
+ }
+
+ // Record the value to store.
+ StoreOperands.push_back(StVal);
+
+ if (VectorInfo[j] & PVF_LAST) {
+ unsigned NumElts = StoreOperands.size() - 3;
+ NVPTXISD::NodeType Op;
+ switch (NumElts) {
+ case 1:
+ Op = NVPTXISD::StoreParam;
+ break;
+ case 2:
+ Op = NVPTXISD::StoreParamV2;
+ break;
+ case 4:
+ Op = NVPTXISD::StoreParamV4;
+ break;
+ default:
+ llvm_unreachable("Invalid vector info.");
}
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), Elt0,
- Elt1, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
- CopyParamVTs, CopyParamOps,
- MemVT, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- } else {
- unsigned curOffset = 0;
- // V4 stores
- // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
- // the
- // vector will be expanded to a power of 2 elements, so we know we can
- // always round up to the next multiple of 4 when creating the vector
- // stores.
- // e.g. 4 elem => 1 st.v4
- // 6 elem => 2 st.v4
- // 8 elem => 2 st.v4
- // 11 elem => 3 st.v4
- unsigned VecSize = 4;
- if (EltVT.getSizeInBits() == 64)
- VecSize = 2;
-
- // This is potentially only part of a vector, so assume all elements
- // are packed together.
- unsigned PerStoreOffset = MemVT.getStoreSizeInBits() / 8 * VecSize;
-
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- // Get values
- SDValue StoreVal;
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(paramCount, dl, MVT::i32));
- Ops.push_back(DAG.getConstant(curOffset, dl, MVT::i32));
-
- unsigned Opc = NVPTXISD::StoreParamV2;
-
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- Ops.push_back(StoreVal);
-
- if (i + 1 < NumElts) {
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(EltVT);
- }
- Ops.push_back(StoreVal);
-
- if (VecSize == 4) {
- Opc = NVPTXISD::StoreParamV4;
- if (i + 2 < NumElts) {
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(EltVT);
- }
- Ops.push_back(StoreVal);
-
- if (i + 3 < NumElts) {
- StoreVal = OutVals[OIdx++];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(EltVT);
- }
- Ops.push_back(StoreVal);
- }
+ StoreOperands.push_back(InFlag);
- Ops.push_back(InFlag);
+ // Adjust type of the store op if we've extended the scalar
+ // return value.
+ EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : VTs[j];
+ unsigned EltAlign =
+ NeedAlign ? GreatestCommonDivisor64(ArgAlign, Offsets[j]) : 0;
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
- MemVT, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- curOffset += PerStoreOffset;
- }
+ Chain = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
+ TheStoreType, MachinePointerInfo(), EltAlign);
+ InFlag = Chain.getValue(1);
+
+ // Cleanup.
+ StoreOperands.clear();
}
- ++paramCount;
- --OIdx;
- continue;
- }
- // Plain scalar
- // for ABI, declare .param .b<size> .param<n>;
- unsigned sz = VT.getSizeInBits();
- bool needExtend = false;
- if (VT.isInteger()) {
- if (sz < 16)
- needExtend = true;
- if (sz < 32)
- sz = 32;
+ ++OIdx;
}
- SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue DeclareParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(sz, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag };
- Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
- DeclareParamOps);
- InFlag = Chain.getValue(1);
- SDValue OutV = OutVals[OIdx];
- if (needExtend) {
- // zext/sext i1 to i16
- unsigned opc = ISD::ZERO_EXTEND;
- if (Outs[OIdx].Flags.isSExt())
- opc = ISD::SIGN_EXTEND;
- OutV = DAG.getNode(opc, dl, MVT::i16, OutV);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), OutV,
- InFlag };
-
- unsigned opcode = NVPTXISD::StoreParam;
- if (Outs[OIdx].Flags.isZExt() && VT.getSizeInBits() < 32)
- opcode = NVPTXISD::StoreParamU32;
- else if (Outs[OIdx].Flags.isSExt() && VT.getSizeInBits() < 32)
- opcode = NVPTXISD::StoreParamS32;
- Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
- VT, MachinePointerInfo());
-
- InFlag = Chain.getValue(1);
+ assert(StoreOperands.empty() && "Unfinished parameter store.");
+ if (VTs.size() > 0)
+ --OIdx;
++paramCount;
continue;
}
- // struct or vector
- SmallVector<EVT, 16> vtparts;
+
+ // ByVal arguments
+ SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
- vtparts, &Offsets, 0);
+ ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -1402,11 +1589,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
- for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- EVT elemtype = vtparts[j];
+ for (unsigned j = 0, je = VTs.size(); j != je; ++j) {
+ EVT elemtype = VTs[j];
int curOffset = Offsets[j];
unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
+ auto PtrVT = getPointerTy(DL);
SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
DAG.getConstant(curOffset, dl, PtrVT));
SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
@@ -1434,18 +1621,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle Result
if (Ins.size() > 0) {
SmallVector<EVT, 16> resvtparts;
- ComputeValueVTs(*this, DL, retTy, resvtparts);
+ ComputeValueVTs(*this, DL, RetTy, resvtparts);
// Declare
// .param .align 16 .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
- unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
+ unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
// Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
// these three types to match the logic in
// NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
// Plus, this behavior is consistent with nvcc's.
- if (retTy->isFloatingPointTy() || retTy->isIntegerTy() ||
- retTy->isPointerTy()) {
+ if (RetTy->isFloatingPointTy() || RetTy->isIntegerTy() ||
+ RetTy->isPointerTy()) {
// Scalar needs to be at least 32bit wide
if (resultsz < 32)
resultsz = 32;
@@ -1457,7 +1644,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DeclareRetOps);
InFlag = Chain.getValue(1);
} else {
- retAlignment = getArgumentAlignment(Callee, CS, retTy, 0, DL);
+ retAlignment = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareRetOps[] = { Chain,
DAG.getConstant(retAlignment, dl, MVT::i32),
@@ -1478,8 +1665,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The prototype is embedded in a string and put as the operand for a
// CallPrototype SDNode which will print out to the value of the string.
SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- std::string Proto =
- getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
+ std::string Proto = getPrototype(DL, RetTy, Args, Outs, retAlignment, CS);
const char *ProtoStr =
nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
SDValue ProtoOps[] = {
@@ -1544,175 +1730,84 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
- if (retTy && retTy->isVectorTy()) {
- EVT ObjectVT = getValueType(DL, retTy);
- unsigned NumElts = ObjectVT.getVectorNumElements();
- EVT EltVT = ObjectVT.getVectorElementType();
- assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
- ObjectVT) == NumElts &&
- "Vector was not scalarized");
- unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 8;
-
- if (NumElts == 1) {
- // Just a simple load
- SmallVector<EVT, 4> LoadRetVTs;
- if (EltVT == MVT::i1 || EltVT == MVT::i8) {
- // If loading i1/i8 result, generate
- // load.b8 i16
- // if i1
- // trunc i16 to i1
- LoadRetVTs.push_back(MVT::i16);
- } else
- LoadRetVTs.push_back(EltVT);
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- NVPTXISD::LoadParam, dl,
- DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
- Chain = retval.getValue(1);
- InFlag = retval.getValue(2);
- SDValue Ret0 = retval;
- if (needTruncate)
- Ret0 = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Ret0);
- InVals.push_back(Ret0);
- } else if (NumElts == 2) {
- // LoadV2
- SmallVector<EVT, 4> LoadRetVTs;
- if (EltVT == MVT::i1 || EltVT == MVT::i8) {
- // If loading i1/i8 result, generate
- // load.b8 i16
- // if i1
- // trunc i16 to i1
- LoadRetVTs.push_back(MVT::i16);
- LoadRetVTs.push_back(MVT::i16);
- } else {
- LoadRetVTs.push_back(EltVT);
- LoadRetVTs.push_back(EltVT);
- }
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- NVPTXISD::LoadParamV2, dl,
- DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
- Chain = retval.getValue(2);
- InFlag = retval.getValue(3);
- SDValue Ret0 = retval.getValue(0);
- SDValue Ret1 = retval.getValue(1);
- if (needTruncate) {
- Ret0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret0);
- InVals.push_back(Ret0);
- Ret1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ret1);
- InVals.push_back(Ret1);
- } else {
- InVals.push_back(Ret0);
- InVals.push_back(Ret1);
- }
- } else {
- // Split into N LoadV4
- unsigned Ofst = 0;
- unsigned VecSize = 4;
- unsigned Opc = NVPTXISD::LoadParamV4;
- if (EltVT.getSizeInBits() == 64) {
- VecSize = 2;
- Opc = NVPTXISD::LoadParamV2;
- }
- EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- SmallVector<EVT, 8> LoadRetVTs;
- if (EltVT == MVT::i1 || EltVT == MVT::i8) {
- // If loading i1/i8 result, generate
- // load.b8 i16
- // if i1
- // trunc i16 to i1
- for (unsigned j = 0; j < VecSize; ++j)
- LoadRetVTs.push_back(MVT::i16);
- } else {
- for (unsigned j = 0; j < VecSize; ++j)
- LoadRetVTs.push_back(EltVT);
- }
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(Ofst, dl, MVT::i32), InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- Opc, dl, DAG.getVTList(LoadRetVTs),
- LoadRetOps, EltVT, MachinePointerInfo());
- if (VecSize == 2) {
- Chain = retval.getValue(2);
- InFlag = retval.getValue(3);
- } else {
- Chain = retval.getValue(4);
- InFlag = retval.getValue(5);
- }
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0);
+ assert(VTs.size() == Ins.size() && "Bad value decomposition");
+
+ unsigned RetAlign = getArgumentAlignment(Callee, CS, RetTy, 0, DL);
+ auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign);
+
+ SmallVector<EVT, 6> LoadVTs;
+ int VecIdx = -1; // Index of the first element of the vector.
+
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
+ // 32-bits are sign extended or zero extended, depending on whether
+ // they are signed or unsigned types.
+ bool ExtendIntegerRetVal =
+ RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
+
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ bool needTruncate = false;
+ EVT TheLoadType = VTs[i];
+ EVT EltType = Ins[i].VT;
+ unsigned EltAlign = GreatestCommonDivisor64(RetAlign, Offsets[i]);
+ if (ExtendIntegerRetVal) {
+ TheLoadType = MVT::i32;
+ EltType = MVT::i32;
+ needTruncate = true;
+ } else if (TheLoadType.getSizeInBits() < 16) {
+ if (VTs[i].isInteger())
+ needTruncate = true;
+ EltType = MVT::i16;
+ }
- for (unsigned j = 0; j < VecSize; ++j) {
- if (i + j >= NumElts)
- break;
- SDValue Elt = retval.getValue(j);
- if (needTruncate)
- Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
- InVals.push_back(Elt);
- }
- Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
- }
+ // Record index of the very first element of the vector.
+ if (VectorInfo[i] & PVF_FIRST) {
+ assert(VecIdx == -1 && LoadVTs.empty() && "Orphaned operand list.");
+ VecIdx = i;
}
- } else {
- SmallVector<EVT, 16> VTs;
- SmallVector<uint64_t, 16> Offsets;
- auto &DL = DAG.getDataLayout();
- ComputePTXValueVTs(*this, DL, retTy, VTs, &Offsets, 0);
- assert(VTs.size() == Ins.size() && "Bad value decomposition");
- unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0, DL);
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- unsigned sz = VTs[i].getSizeInBits();
- unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
- bool needTruncate = false;
- if (VTs[i].isInteger() && sz < 8) {
- sz = 8;
- needTruncate = true;
+
+ LoadVTs.push_back(EltType);
+
+ if (VectorInfo[i] & PVF_LAST) {
+ unsigned NumElts = LoadVTs.size();
+ LoadVTs.push_back(MVT::Other);
+ LoadVTs.push_back(MVT::Glue);
+ NVPTXISD::NodeType Op;
+ switch (NumElts) {
+ case 1:
+ Op = NVPTXISD::LoadParam;
+ break;
+ case 2:
+ Op = NVPTXISD::LoadParamV2;
+ break;
+ case 4:
+ Op = NVPTXISD::LoadParamV4;
+ break;
+ default:
+ llvm_unreachable("Invalid vector info.");
}
- SmallVector<EVT, 4> LoadRetVTs;
- EVT TheLoadType = VTs[i];
- if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
- // This is for integer types only, and specifically not for
- // aggregates.
- LoadRetVTs.push_back(MVT::i32);
- TheLoadType = MVT::i32;
- needTruncate = true;
- } else if (sz < 16) {
- // If loading i1/i8 result, generate
- // load i8 (-> i16)
- // trunc i16 to i1/i8
-
- // FIXME: Do we need to set needTruncate to true here, too? We could
- // not figure out what this branch is for in D17872, so we left it
- // alone. The comment above about loading i1/i8 may be wrong, as the
- // branch above seems to cover integers of size < 32.
- LoadRetVTs.push_back(MVT::i16);
- } else
- LoadRetVTs.push_back(Ins[i].VT);
- LoadRetVTs.push_back(MVT::Other);
- LoadRetVTs.push_back(MVT::Glue);
-
- SDValue LoadRetOps[] = {Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(Offsets[i], dl, MVT::i32),
- InFlag};
- SDValue retval = DAG.getMemIntrinsicNode(
- NVPTXISD::LoadParam, dl,
- DAG.getVTList(LoadRetVTs), LoadRetOps,
- TheLoadType, MachinePointerInfo(), AlignI);
- Chain = retval.getValue(1);
- InFlag = retval.getValue(2);
- SDValue Ret0 = retval.getValue(0);
- if (needTruncate)
- Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
- InVals.push_back(Ret0);
+ SDValue LoadOperands[] = {
+ Chain, DAG.getConstant(1, dl, MVT::i32),
+ DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag};
+ SDValue RetVal = DAG.getMemIntrinsicNode(
+ Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
+ MachinePointerInfo(), EltAlign);
+
+ for (unsigned j = 0; j < NumElts; ++j) {
+ SDValue Ret = RetVal.getValue(j);
+ if (needTruncate)
+ Ret = DAG.getNode(ISD::TRUNCATE, dl, Ins[VecIdx + j].VT, Ret);
+ InVals.push_back(Ret);
+ }
+ Chain = RetVal.getValue(NumElts);
+ InFlag = RetVal.getValue(NumElts + 1);
+
+ // Cleanup
+ VecIdx = -1;
+ LoadVTs.clear();
}
}
}
@@ -1752,6 +1847,55 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBuildVector(Node->getValueType(0), dl, Ops);
}
+// We can init constant f16x2 with a single .b32 move. Normally it
+// would get lowered as two constant loads and vector-packing move.
+// mov.b16 %h1, 0x4000;
+// mov.b16 %h2, 0x3C00;
+// mov.b32 %hh2, {%h2, %h1};
+// Instead we want just a constant move:
+// mov.b32 %hh2, 0x40003C00
+//
+// This results in better SASS code with CUDA 7.x. Ptxas in CUDA 8.0
+// generates good SASS in both cases.
+SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ //return Op;
+ if (!(Op->getValueType(0) == MVT::v2f16 &&
+ isa<ConstantFPSDNode>(Op->getOperand(0)) &&
+ isa<ConstantFPSDNode>(Op->getOperand(1))))
+ return Op;
+
+ APInt E0 =
+ cast<ConstantFPSDNode>(Op->getOperand(0))->getValueAPF().bitcastToAPInt();
+ APInt E1 =
+ cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
+ SDValue Const =
+ DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const);
+}
+
+SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Index = Op->getOperand(1);
+ // Constant index will be matched by tablegen.
+ if (isa<ConstantSDNode>(Index.getNode()))
+ return Op;
+
+ // Extract individual elements and select one of them.
+ SDValue Vector = Op->getOperand(0);
+ EVT VectorVT = Vector.getValueType();
+ assert(VectorVT == MVT::v2f16 && "Unexpected vector type.");
+ EVT EltVT = VectorVT.getVectorElementType();
+
+ SDLoc dl(Op.getNode());
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vector,
+ DAG.getIntPtrConstant(1, dl));
+ return DAG.getSelectCC(dl, Index, DAG.getIntPtrConstant(0, dl), E0, E1,
+ ISD::CondCode::SETEQ);
+}
+
/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
/// amount, or
@@ -1885,8 +2029,11 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INTRINSIC_W_CHAIN:
return Op;
case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return Op;
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS:
return LowerCONCAT_VECTORS(Op, DAG);
case ISD::STORE:
@@ -1924,8 +2071,21 @@ SDValue NVPTXTargetLowering::LowerSelect(SDValue Op, SelectionDAG &DAG) const {
SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType() == MVT::i1)
return LowerLOADi1(Op, DAG);
- else
- return SDValue();
+
+ // v2f16 is legal, so we can't rely on legalizer to handle unaligned
+ // loads and have to handle it here.
+ if (Op.getValueType() == MVT::v2f16) {
+ LoadSDNode *Load = cast<LoadSDNode>(Op);
+ EVT MemVT = Load->getMemoryVT();
+ if (!allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+ Load->getAddressSpace(), Load->getAlignment())) {
+ SDValue Ops[2];
+ std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
+ return DAG.getMergeValues(Ops, SDLoc(Op));
+ }
+ }
+
+ return SDValue();
}
// v = ld i1* addr
@@ -1951,13 +2111,23 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
}
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- EVT ValVT = Op.getOperand(1).getValueType();
- if (ValVT == MVT::i1)
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT VT = Store->getMemoryVT();
+
+ if (VT == MVT::i1)
return LowerSTOREi1(Op, DAG);
- else if (ValVT.isVector())
+
+ // v2f16 is legal, so we can't rely on legalizer to handle unaligned
+ // stores and have to handle it here.
+ if (VT == MVT::v2f16 &&
+ !allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ Store->getAddressSpace(), Store->getAlignment()))
+ return expandUnalignedStore(Store, DAG);
+
+ if (VT.isVector())
return LowerSTOREVector(Op, DAG);
- else
- return SDValue();
+
+ return SDValue();
}
SDValue
@@ -1980,12 +2150,15 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
case MVT::v2i16:
case MVT::v2i32:
case MVT::v2i64:
+ case MVT::v2f16:
case MVT::v2f32:
case MVT::v2f64:
case MVT::v4i8:
case MVT::v4i16:
case MVT::v4i32:
+ case MVT::v4f16:
case MVT::v4f32:
+ case MVT::v8f16: // <4 x f16x2>
// This is a "native" vector type
break;
}
@@ -2016,6 +2189,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
if (EltVT.getSizeInBits() < 16)
NeedExt = true;
+ bool StoreF16x2 = false;
switch (NumElts) {
default:
return SDValue();
@@ -2025,6 +2199,14 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
case 4:
Opcode = NVPTXISD::StoreV4;
break;
+ case 8:
+ // v8f16 is a special case. PTX doesn't have st.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // store them with st.v4.b32.
+ assert(EltVT == MVT::f16 && "Wrong type for the vector.");
+ Opcode = NVPTXISD::StoreV4;
+ StoreF16x2 = true;
+ break;
}
SmallVector<SDValue, 8> Ops;
@@ -2032,23 +2214,36 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// First is the chain
Ops.push_back(N->getOperand(0));
- // Then the split values
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
- DAG.getIntPtrConstant(i, DL));
- if (NeedExt)
- ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
- Ops.push_back(ExtVal);
+ if (StoreF16x2) {
+ // Combine f16,f16 -> v2f16
+ NumElts /= 2;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
+ DAG.getIntPtrConstant(i * 2, DL));
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
+ DAG.getIntPtrConstant(i * 2 + 1, DL));
+ SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1);
+ Ops.push_back(V2);
+ }
+ } else {
+ // Then the split values
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
+ DAG.getIntPtrConstant(i, DL));
+ if (NeedExt)
+ ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
+ Ops.push_back(ExtVal);
+ }
}
// Then any remaining arguments
Ops.append(N->op_begin() + 2, N->op_end());
- SDValue NewSt = DAG.getMemIntrinsicNode(
- Opcode, DL, DAG.getVTList(MVT::Other), Ops,
- MemSD->getMemoryVT(), MemSD->getMemOperand());
+ SDValue NewSt =
+ DAG.getMemIntrinsicNode(Opcode, DL, DAG.getVTList(MVT::Other), Ops,
+ MemSD->getMemoryVT(), MemSD->getMemOperand());
- //return DCI.CombineTo(N, NewSt, true);
+ // return DCI.CombineTo(N, NewSt, true);
return NewSt;
}
@@ -2120,7 +2315,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
auto PtrVT = getPointerTy(DAG.getDataLayout());
const Function *F = MF.getFunction();
- const AttributeSet &PAL = F->getAttributes();
+ const AttributeList &PAL = F->getAttributes();
const TargetLowering *TLI = STI.getTargetLowering();
SDValue Root = DAG.getRoot();
@@ -2200,177 +2395,80 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (!PAL.hasAttribute(i + 1, Attribute::ByVal)) {
- if (Ty->isAggregateType()) {
- SmallVector<EVT, 16> vtparts;
- SmallVector<uint64_t, 16> offsets;
+ if (!PAL.hasParamAttribute(i, Attribute::ByVal)) {
+ bool aggregateIsPacked = false;
+ if (StructType *STy = dyn_cast<StructType>(Ty))
+ aggregateIsPacked = STy->isPacked();
- // NOTE: Here, we lose the ability to issue vector loads for vectors
- // that are a part of a struct. This should be investigated in the
- // future.
- ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
- 0);
- assert(vtparts.size() > 0 && "empty aggregate type not expected");
- bool aggregateIsPacked = false;
- if (StructType *STy = dyn_cast<StructType>(Ty))
- aggregateIsPacked = STy->isPacked();
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
+ assert(VTs.size() > 0 && "Unexpected empty type.");
+ auto VectorInfo =
+ VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlignment(Ty));
- SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
- for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
- ++parti) {
- EVT partVT = vtparts[parti];
- Value *srcValue = Constant::getNullValue(
- PointerType::get(partVT.getTypeForEVT(F->getContext()),
- ADDRESS_SPACE_PARAM));
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
- DAG.getConstant(offsets[parti], dl, PtrVT));
- unsigned partAlign = aggregateIsPacked
- ? 1
- : DL.getABITypeAlignment(
- partVT.getTypeForEVT(F->getContext()));
- SDValue p;
- if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
- ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
- ISD::SEXTLOAD : ISD::ZEXTLOAD;
- p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, srcAddr,
- MachinePointerInfo(srcValue), partVT, partAlign);
- } else {
- p = DAG.getLoad(partVT, dl, Root, srcAddr,
- MachinePointerInfo(srcValue), partAlign);
- }
- if (p.getNode())
- p.getNode()->setIROrder(idx + 1);
- InVals.push_back(p);
- ++InsIdx;
+ SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
+ int VecIdx = -1; // Index of the first element of the current vector.
+ for (unsigned parti = 0, parte = VTs.size(); parti != parte; ++parti) {
+ if (VectorInfo[parti] & PVF_FIRST) {
+ assert(VecIdx == -1 && "Orphaned vector.");
+ VecIdx = parti;
}
- if (vtparts.size() > 0)
- --InsIdx;
- continue;
- }
- if (Ty->isVectorTy()) {
- EVT ObjectVT = getValueType(DL, Ty);
- SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
- unsigned NumElts = ObjectVT.getVectorNumElements();
- assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
- "Vector was not scalarized");
- EVT EltVT = ObjectVT.getVectorElementType();
-
- // V1 load
- // f32 = load ...
- if (NumElts == 1) {
- // We only have one element, so just directly load it
- Value *SrcValue = Constant::getNullValue(PointerType::get(
- EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
- SDValue P = DAG.getLoad(
- EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
- DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())),
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
- if (P.getNode())
- P.getNode()->setIROrder(idx + 1);
- if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
- P = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, P);
- InVals.push_back(P);
- ++InsIdx;
- } else if (NumElts == 2) {
- // V2 load
- // f32,f32 = load ...
- EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
- Value *SrcValue = Constant::getNullValue(PointerType::get(
- VecVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
- SDValue P = DAG.getLoad(
- VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
- DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
+ // That's the last element of this store op.
+ if (VectorInfo[parti] & PVF_LAST) {
+ unsigned NumElts = parti - VecIdx + 1;
+ EVT EltVT = VTs[parti];
+ // i1 is loaded/stored as i8.
+ EVT LoadVT = EltVT;
+ if (EltVT == MVT::i1)
+ LoadVT = MVT::i8;
+ else if (EltVT == MVT::v2f16)
+ // getLoad needs a vector type, but it can't handle
+ // vectors which contain v2f16 elements. So we must load
+ // using i32 here and then bitcast back.
+ LoadVT = MVT::i32;
+
+ EVT VecVT = EVT::getVectorVT(F->getContext(), LoadVT, NumElts);
+ SDValue VecAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+ DAG.getConstant(Offsets[VecIdx], dl, PtrVT));
+ Value *srcValue = Constant::getNullValue(PointerType::get(
+ EltVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
+ SDValue P =
+ DAG.getLoad(VecVT, dl, Root, VecAddr,
+ MachinePointerInfo(srcValue), aggregateIsPacked,
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant);
if (P.getNode())
P.getNode()->setIROrder(idx + 1);
-
- SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(0, dl));
- SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(1, dl));
-
- if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits()) {
- Elt0 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt0);
- Elt1 = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt1);
- }
-
- InVals.push_back(Elt0);
- InVals.push_back(Elt1);
- InsIdx += 2;
- } else {
- // V4 loads
- // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
- // the vector will be expanded to a power of 2 elements, so we know we
- // can always round up to the next multiple of 4 when creating the
- // vector loads.
- // e.g. 4 elem => 1 ld.v4
- // 6 elem => 2 ld.v4
- // 8 elem => 2 ld.v4
- // 11 elem => 3 ld.v4
- unsigned VecSize = 4;
- if (EltVT.getSizeInBits() == 64) {
- VecSize = 2;
- }
- EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
- unsigned Ofst = 0;
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- Value *SrcValue = Constant::getNullValue(
- PointerType::get(VecVT.getTypeForEVT(F->getContext()),
- ADDRESS_SPACE_PARAM));
- SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
- DAG.getConstant(Ofst, dl, PtrVT));
- SDValue P = DAG.getLoad(
- VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue),
- DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant);
- if (P.getNode())
- P.getNode()->setIROrder(idx + 1);
-
- for (unsigned j = 0; j < VecSize; ++j) {
- if (i + j >= NumElts)
- break;
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
- DAG.getIntPtrConstant(j, dl));
- if (Ins[InsIdx].VT.getSizeInBits() > EltVT.getSizeInBits())
- Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
- InVals.push_back(Elt);
+ for (unsigned j = 0; j < NumElts; ++j) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LoadVT, P,
+ DAG.getIntPtrConstant(j, dl));
+ // We've loaded i1 as an i8 and now must truncate it back to i1
+ if (EltVT == MVT::i1)
+ Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
+ // v2f16 was loaded as an i32. Now we must bitcast it back.
+ else if (EltVT == MVT::v2f16)
+ Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
+ // Extend the element if necesary (e.g. an i8 is loaded
+ // into an i16 register)
+ if (Ins[InsIdx].VT.isInteger() &&
+ Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) {
+ unsigned Extend = Ins[InsIdx].Flags.isSExt() ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND;
+ Elt = DAG.getNode(Extend, dl, Ins[InsIdx].VT, Elt);
}
- Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+ InVals.push_back(Elt);
}
- InsIdx += NumElts;
- }
- if (NumElts > 0)
- --InsIdx;
- continue;
- }
- // A plain scalar.
- EVT ObjectVT = getValueType(DL, Ty);
- // If ABI, load from the param symbol
- SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
- Value *srcValue = Constant::getNullValue(PointerType::get(
- ObjectVT.getTypeForEVT(F->getContext()), ADDRESS_SPACE_PARAM));
- SDValue p;
- if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
- ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ?
- ISD::SEXTLOAD : ISD::ZEXTLOAD;
- p = DAG.getExtLoad(
- ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
- ObjectVT,
- DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
- } else {
- p = DAG.getLoad(
- Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue),
- DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+ // Reset vector tracking state.
+ VecIdx = -1;
+ }
+ ++InsIdx;
}
- if (p.getNode())
- p.getNode()->setIROrder(idx + 1);
- InVals.push_back(p);
+ if (VTs.size() > 0)
+ --InsIdx;
continue;
}
@@ -2412,164 +2510,77 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- Type *RetTy = F->getReturnType();
- const DataLayout &TD = DAG.getDataLayout();
+ Type *RetTy = MF.getFunction()->getReturnType();
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
return Chain;
- if (VectorType *VTy = dyn_cast<VectorType>(RetTy)) {
- // If we have a vector type, the OutVals array will be the scalarized
- // components and we have combine them into 1 or more vector stores.
- unsigned NumElts = VTy->getNumElements();
- assert(NumElts == Outs.size() && "Bad scalarization of return value");
+ const DataLayout DL = DAG.getDataLayout();
+ SmallVector<EVT, 16> VTs;
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets);
+ assert(VTs.size() == OutVals.size() && "Bad return value decomposition");
+
+ auto VectorInfo = VectorizePTXValueVTs(
+ VTs, Offsets, RetTy->isSized() ? DL.getABITypeAlignment(RetTy) : 1);
+
+ // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than
+ // 32-bits are sign extended or zero extended, depending on whether
+ // they are signed or unsigned types.
+ bool ExtendIntegerRetVal =
+ RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32;
+
+ SmallVector<SDValue, 6> StoreOperands;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ // New load/store. Record chain and offset operands.
+ if (VectorInfo[i] & PVF_FIRST) {
+ assert(StoreOperands.empty() && "Orphaned operand list.");
+ StoreOperands.push_back(Chain);
+ StoreOperands.push_back(DAG.getConstant(Offsets[i], dl, MVT::i32));
+ }
- // const_cast can be removed in later LLVM versions
- EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
- bool NeedExtend = false;
- if (EltVT.getSizeInBits() < 16)
- NeedExtend = true;
-
- // V1 store
- if (NumElts == 1) {
- SDValue StoreVal = OutVals[0];
- // We only have one element, so just directly store it
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
- SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), Ops,
- EltVT, MachinePointerInfo());
- } else if (NumElts == 2) {
- // V2 store
- SDValue StoreVal0 = OutVals[0];
- SDValue StoreVal1 = OutVals[1];
-
- if (NeedExtend) {
- StoreVal0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal0);
- StoreVal1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal1);
- }
+ SDValue RetVal = OutVals[i];
+ if (ExtendIntegerRetVal) {
+ RetVal = DAG.getNode(Outs[i].Flags.isSExt() ? ISD::SIGN_EXTEND
+ : ISD::ZERO_EXTEND,
+ dl, MVT::i32, RetVal);
+ } else if (RetVal.getValueSizeInBits() < 16) {
+ // Use 16-bit registers for small load-stores as it's the
+ // smallest general purpose register size supported by NVPTX.
+ RetVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, RetVal);
+ }
- SDValue Ops[] = { Chain, DAG.getConstant(0, dl, MVT::i32), StoreVal0,
- StoreVal1 };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
- DAG.getVTList(MVT::Other), Ops,
- EltVT, MachinePointerInfo());
- } else {
- // V4 stores
- // We have at least 4 elements (<3 x Ty> expands to 4 elements) and the
- // vector will be expanded to a power of 2 elements, so we know we can
- // always round up to the next multiple of 4 when creating the vector
- // stores.
- // e.g. 4 elem => 1 st.v4
- // 6 elem => 2 st.v4
- // 8 elem => 2 st.v4
- // 11 elem => 3 st.v4
-
- unsigned VecSize = 4;
- if (OutVals[0].getValueSizeInBits() == 64)
- VecSize = 2;
-
- unsigned Offset = 0;
-
- EVT VecVT =
- EVT::getVectorVT(F->getContext(), EltVT, VecSize);
- unsigned PerStoreOffset =
- TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
-
- for (unsigned i = 0; i < NumElts; i += VecSize) {
- // Get values
- SDValue StoreVal;
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(Chain);
- Ops.push_back(DAG.getConstant(Offset, dl, MVT::i32));
- unsigned Opc = NVPTXISD::StoreRetvalV2;
- EVT ExtendedVT = (NeedExtend) ? MVT::i16 : OutVals[0].getValueType();
-
- StoreVal = OutVals[i];
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- Ops.push_back(StoreVal);
-
- if (i + 1 < NumElts) {
- StoreVal = OutVals[i + 1];
- if (NeedExtend)
- StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(ExtendedVT);
- }
- Ops.push_back(StoreVal);
-
- if (VecSize == 4) {
- Opc = NVPTXISD::StoreRetvalV4;
- if (i + 2 < NumElts) {
- StoreVal = OutVals[i + 2];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(ExtendedVT);
- }
- Ops.push_back(StoreVal);
-
- if (i + 3 < NumElts) {
- StoreVal = OutVals[i + 3];
- if (NeedExtend)
- StoreVal =
- DAG.getNode(ISD::ZERO_EXTEND, dl, ExtendedVT, StoreVal);
- } else {
- StoreVal = DAG.getUNDEF(ExtendedVT);
- }
- Ops.push_back(StoreVal);
- }
+ // Record the value to return.
+ StoreOperands.push_back(RetVal);
- // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
- Chain =
- DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
- EltVT, MachinePointerInfo());
- Offset += PerStoreOffset;
- }
- }
- } else {
- SmallVector<EVT, 16> ValVTs;
- SmallVector<uint64_t, 16> Offsets;
- ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
- assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
-
- for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- SDValue theVal = OutVals[i];
- EVT TheValType = theVal.getValueType();
- unsigned numElems = 1;
- if (TheValType.isVector())
- numElems = TheValType.getVectorNumElements();
- for (unsigned j = 0, je = numElems; j != je; ++j) {
- SDValue TmpVal = theVal;
- if (TheValType.isVector())
- TmpVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- TheValType.getVectorElementType(), TmpVal,
- DAG.getIntPtrConstant(j, dl));
- EVT TheStoreType = ValVTs[i];
- if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
- // The following zero-extension is for integer types only, and
- // specifically not for aggregates.
- TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
- TheStoreType = MVT::i32;
- }
- else if (TmpVal.getValueSizeInBits() < 16)
- TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
-
- SDValue Ops[] = {
- Chain,
- DAG.getConstant(Offsets[i], dl, MVT::i32),
- TmpVal };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), Ops,
- TheStoreType,
- MachinePointerInfo());
+ // That's the last element of this store op.
+ if (VectorInfo[i] & PVF_LAST) {
+ NVPTXISD::NodeType Op;
+ unsigned NumElts = StoreOperands.size() - 2;
+ switch (NumElts) {
+ case 1:
+ Op = NVPTXISD::StoreRetval;
+ break;
+ case 2:
+ Op = NVPTXISD::StoreRetvalV2;
+ break;
+ case 4:
+ Op = NVPTXISD::StoreRetvalV4;
+ break;
+ default:
+ llvm_unreachable("Invalid vector info.");
}
+
+ // Adjust type of load/store op if we've extended the scalar
+ // return value.
+ EVT TheStoreType = ExtendIntegerRetVal ? MVT::i32 : VTs[i];
+ Chain = DAG.getMemIntrinsicNode(Op, dl, DAG.getVTList(MVT::Other),
+ StoreOperands, TheStoreType,
+ MachinePointerInfo(), 1);
+ // Cleanup vector state.
+ StoreOperands.clear();
}
}
@@ -3863,27 +3874,35 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
CodeGenOpt::Level OptLevel) const {
- const Function *F = MF.getFunction();
- const TargetOptions &TO = MF.getTarget().Options;
-
// Always honor command-line argument
- if (FMAContractLevelOpt.getNumOccurrences() > 0) {
+ if (FMAContractLevelOpt.getNumOccurrences() > 0)
return FMAContractLevelOpt > 0;
- } else if (OptLevel == 0) {
- // Do not contract if we're not optimizing the code
+
+ // Do not contract if we're not optimizing the code.
+ if (OptLevel == 0)
return false;
- } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) {
- // Honor TargetOptions flags that explicitly say fusion is okay
+
+ // Honor TargetOptions flags that explicitly say fusion is okay.
+ if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
return true;
- } else if (F->hasFnAttribute("unsafe-fp-math")) {
- // Check for unsafe-fp-math=true coming from Clang
+
+ return allowUnsafeFPMath(MF);
+}
+
+bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const {
+ // Honor TargetOptions flags that explicitly say unsafe math is okay.
+ if (MF.getTarget().Options.UnsafeFPMath)
+ return true;
+
+ // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
+ const Function *F = MF.getFunction();
+ if (F->hasFnAttribute("unsafe-fp-math")) {
Attribute Attr = F->getFnAttribute("unsafe-fp-math");
StringRef Val = Attr.getValueAsString();
if (Val == "true")
return true;
}
- // We did not have a clear indication that fusion is allowed, so assume not
return false;
}
@@ -4088,67 +4107,6 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
-static SDValue PerformSELECTCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // Currently this detects patterns for integer min and max and
- // lowers them to PTX-specific intrinsics that enable hardware
- // support.
-
- const SDValue Cond = N->getOperand(0);
- if (Cond.getOpcode() != ISD::SETCC) return SDValue();
-
- const SDValue LHS = Cond.getOperand(0);
- const SDValue RHS = Cond.getOperand(1);
- const SDValue True = N->getOperand(1);
- const SDValue False = N->getOperand(2);
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
-
- const EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
-
- const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
- SDValue Larger; // The larger of LHS and RHS when condition is true.
- switch (CC) {
- case ISD::SETULT:
- case ISD::SETULE:
- case ISD::SETLT:
- case ISD::SETLE:
- Larger = RHS;
- break;
-
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- Larger = LHS;
- break;
-
- default:
- return SDValue();
- }
- const bool IsMax = (Larger == True);
- const bool IsSigned = ISD::isSignedIntSetCC(CC);
-
- unsigned IntrinsicId;
- if (VT == MVT::i32) {
- if (IsSigned)
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
- else
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
- } else {
- assert(VT == MVT::i64);
- if (IsSigned)
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
- else
- IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
- }
-
- SDLoc DL(N);
- return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
-}
-
static SDValue PerformREMCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
CodeGenOpt::Level OptLevel) {
@@ -4344,6 +4302,27 @@ static SDValue PerformSHLCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformSETCCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT CCType = N->getValueType(0);
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+
+ if (CCType != MVT::v2i1 || A.getValueType() != MVT::v2f16)
+ return SDValue();
+
+ SDLoc DL(N);
+ // setp.f16x2 returns two scalar predicates, which we need to
+ // convert back to v2i1. The returned result will be scalarized by
+ // the legalizer, but the comparison will remain a single vector
+ // instruction.
+ SDValue CCNode = DCI.DAG.getNode(NVPTXISD::SETP_F16X2, DL,
+ DCI.DAG.getVTList(MVT::i1, MVT::i1),
+ {A, B, N->getOperand(2)});
+ return DCI.DAG.getNode(ISD::BUILD_VECTOR, DL, CCType, CCNode.getValue(0),
+ CCNode.getValue(1));
+}
+
SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
CodeGenOpt::Level OptLevel = getTargetMachine().getOptLevel();
@@ -4358,11 +4337,11 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return PerformSHLCombine(N, DCI, OptLevel);
case ISD::AND:
return PerformANDCombine(N, DCI);
- case ISD::SELECT:
- return PerformSELECTCombine(N, DCI);
case ISD::UREM:
case ISD::SREM:
return PerformREMCombine(N, DCI, OptLevel);
+ case ISD::SETCC:
+ return PerformSETCCCombine(N, DCI);
}
return SDValue();
}
@@ -4386,12 +4365,15 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
case MVT::v2i16:
case MVT::v2i32:
case MVT::v2i64:
+ case MVT::v2f16:
case MVT::v2f32:
case MVT::v2f64:
case MVT::v4i8:
case MVT::v4i16:
case MVT::v4i32:
+ case MVT::v4f16:
case MVT::v4f32:
+ case MVT::v8f16: // <4 x f16x2>
// This is a "native" vector type
break;
}
@@ -4425,6 +4407,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
unsigned Opcode = 0;
SDVTList LdResVTs;
+ bool LoadF16x2 = false;
switch (NumElts) {
default:
@@ -4439,6 +4422,18 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LdResVTs = DAG.getVTList(ListVTs);
break;
}
+ case 8: {
+ // v8f16 is a special case. PTX doesn't have ld.v8.f16
+ // instruction. Instead, we split the vector into v2f16 chunks and
+ // load them with ld.v4.b32.
+ assert(EltVT == MVT::f16 && "Unsupported v8 vector type.");
+ LoadF16x2 = true;
+ Opcode = NVPTXISD::LoadV4;
+ EVT ListVTs[] = {MVT::v2f16, MVT::v2f16, MVT::v2f16, MVT::v2f16,
+ MVT::Other};
+ LdResVTs = DAG.getVTList(ListVTs);
+ break;
+ }
}
// Copy regular operands
@@ -4452,13 +4447,26 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
LD->getMemoryVT(),
LD->getMemOperand());
- SmallVector<SDValue, 4> ScalarRes;
-
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Res = NewLD.getValue(i);
- if (NeedTrunc)
- Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
- ScalarRes.push_back(Res);
+ SmallVector<SDValue, 8> ScalarRes;
+ if (LoadF16x2) {
+ // Split v2f16 subvectors back into individual elements.
+ NumElts /= 2;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue SubVector = NewLD.getValue(i);
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SubVector,
+ DAG.getIntPtrConstant(1, DL));
+ ScalarRes.push_back(E0);
+ ScalarRes.push_back(E1);
+ }
+ } else {
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Res = NewLD.getValue(i);
+ if (NeedTrunc)
+ Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+ ScalarRes.push_back(Res);
+ }
}
SDValue LoadChain = NewLD.getValue(NumElts);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index e433aed7781b..9d7b70d80c11 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -56,6 +56,7 @@ enum NodeType : unsigned {
MUL_WIDE_SIGNED,
MUL_WIDE_UNSIGNED,
IMAD,
+ SETP_F16X2,
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -73,7 +74,7 @@ enum NodeType : unsigned {
StoreParamV2,
StoreParamV4,
StoreParamS32, // to sext and store a <32bit value, not used currently
- StoreParamU32, // to zext and store a <32bit value, not used currently
+ StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
StoreRetvalV4,
@@ -510,17 +511,48 @@ public:
TargetLoweringBase::LegalizeTypeAction
getPreferredVectorAction(EVT VT) const override;
+ // Get the degree of precision we want from 32-bit floating point division
+ // operations.
+ //
+ // 0 - Use ptx div.approx
+ // 1 - Use ptx.div.full (approximate, but less so than div.approx)
+ // 2 - Use IEEE-compliant div instructions, if available.
+ int getDivF32Level() const;
+
+ // Get whether we should use a precise or approximate 32-bit floating point
+ // sqrt instruction.
+ bool usePrecSqrtF32() const;
+
+ // Get whether we should use instructions that flush floating-point denormals
+ // to sign-preserving zero.
+ bool useF32FTZ(const MachineFunction &MF) const;
+
+ SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &ExtraSteps, bool &UseOneConst,
+ bool Reciprocal) const override;
+
+ unsigned combineRepeatedFPDivisors() const override { return 2; }
+
bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const;
+ bool allowUnsafeFPMath(MachineFunction &MF) const;
bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; }
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
+ // The default is to transform llvm.ctlz(x, false) (where false indicates that
+ // x == 0 is not undefined behavior) into a branch that checks whether x is 0
+ // and avoids calling ctlz in that case. We have a dedicated ctlz
+ // instruction, so we say that ctlz is cheap to speculate.
+ bool isCheapToSpeculateCtlz() const override { return true; }
+
private:
const NVPTXSubtarget &STI; // cache the subtarget here
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index 8d00bbb5e9c2..f12ed81b6d9f 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -96,9 +96,7 @@ bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) {
// This is an OpenCL sampler, so it must be a samplerref
replaceWith(&I, ConstantInt::getTrue(I.getContext()));
return true;
- } else if (isImageWriteOnly(*TexHandle) ||
- isImageReadWrite(*TexHandle) ||
- isImageReadOnly(*TexHandle)) {
+ } else if (isImage(*TexHandle)) {
// This is an OpenCL image, so it cannot be a samplerref
replaceWith(&I, ConstantInt::getFalse(I.getContext()));
return true;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 7f89742a3215..3026f0be242d 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -52,6 +52,11 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
: NVPTX::BITCONVERT_64_F2I);
+ } else if (DestRC == &NVPTX::Float16RegsRegClass) {
+ Op = (SrcRC == &NVPTX::Float16RegsRegClass ? NVPTX::FMOV16rr
+ : NVPTX::BITCONVERT_16_I2F);
+ } else if (DestRC == &NVPTX::Float16x2RegsRegClass) {
+ Op = NVPTX::IMOV32rr;
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
: NVPTX::BITCONVERT_32_I2F);
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 0fbb0448e4c4..2b847414b8a8 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -18,6 +18,10 @@ let hasSideEffects = 0 in {
def NOP : NVPTXInst<(outs), (ins), "", []>;
}
+let OperandType = "OPERAND_IMMEDIATE" in {
+ def f16imm : Operand<f16>;
+}
+
// List of vector specific properties
def isVecLD : VecInstTypeEnum<1>;
def isVecST : VecInstTypeEnum<2>;
@@ -98,6 +102,9 @@ def CmpNAN_FTZ : PatLeaf<(i32 0x111)>;
def CmpMode : Operand<i32> {
let PrintMethod = "printCmpMode";
}
+def VecElement : Operand<i32> {
+ let PrintMethod = "printVecElement";
+}
//===----------------------------------------------------------------------===//
// NVPTX Instruction Predicate Definitions
@@ -134,6 +141,7 @@ def doMulWide : Predicate<"doMulWide">;
def allowFMA : Predicate<"allowFMA()">;
def noFMA : Predicate<"!allowFMA()">;
+def allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">;
def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">;
def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">;
@@ -148,6 +156,7 @@ def true : Predicate<"true">;
def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
+def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
//===----------------------------------------------------------------------===//
// Some Common Instruction Class Templates
@@ -239,11 +248,11 @@ multiclass F3<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
}
-// Template for instructions which take three fp64 or fp32 args. The
+// Template for instructions which take three FP args. The
// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64").
//
// Also defines ftz (flush subnormal inputs and results to sign-preserving
-// zero) variants for fp32 functions.
+// zero) variants for fp32/fp16 functions.
//
// This multiclass should be used for nodes that can be folded to make fma ops.
// In this case, we use the ".rn" variant when FMA is disabled, as this behaves
@@ -286,6 +295,32 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[allowFMA]>;
+ def f16rr_ftz :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA, doF32FTZ]>;
+ def f16rr :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA]>;
+
+ def f16x2rr_ftz :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA, doF32FTZ]>;
+ def f16x2rr :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, allowFMA]>;
+
// These have strange names so we don't perturb existing mir tests.
def _rnf64rr :
NVPTXInst<(outs Float64Regs:$dst),
@@ -323,6 +358,30 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
!strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[noFMA]>;
+ def _rnf16rr_ftz :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, noFMA, doF32FTZ]>;
+ def _rnf16rr :
+ NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"),
+ [(set Float16Regs:$dst, (OpNode Float16Regs:$a, Float16Regs:$b))]>,
+ Requires<[useFP16Math, noFMA]>;
+ def _rnf16x2rr_ftz :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, noFMA, doF32FTZ]>;
+ def _rnf16x2rr :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"),
+ [(set Float16x2Regs:$dst, (OpNode Float16x2Regs:$a, Float16x2Regs:$b))]>,
+ Requires<[useFP16Math, noFMA]>;
}
// Template for operations which take two f32 or f64 operands. Provides three
@@ -358,57 +417,57 @@ let hasSideEffects = 0 in {
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s8\t$dst, $src;"), []>;
+ FromName, ".s8 \t$dst, $src;"), []>;
def _u8 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u8\t$dst, $src;"), []>;
+ FromName, ".u8 \t$dst, $src;"), []>;
def _s16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s16\t$dst, $src;"), []>;
+ FromName, ".s16 \t$dst, $src;"), []>;
def _u16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u16\t$dst, $src;"), []>;
- def _f16 :
- NVPTXInst<(outs RC:$dst),
- (ins Int16Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f16\t$dst, $src;"), []>;
+ FromName, ".u16 \t$dst, $src;"), []>;
def _s32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s32\t$dst, $src;"), []>;
+ FromName, ".s32 \t$dst, $src;"), []>;
def _u32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u32\t$dst, $src;"), []>;
+ FromName, ".u32 \t$dst, $src;"), []>;
def _s64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s64\t$dst, $src;"), []>;
+ FromName, ".s64 \t$dst, $src;"), []>;
def _u64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u64\t$dst, $src;"), []>;
+ FromName, ".u64 \t$dst, $src;"), []>;
+ def _f16 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Float16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
+ FromName, ".f16 \t$dst, $src;"), []>;
def _f32 :
NVPTXInst<(outs RC:$dst),
(ins Float32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f32\t$dst, $src;"), []>;
+ FromName, ".f32 \t$dst, $src;"), []>;
def _f64 :
NVPTXInst<(outs RC:$dst),
(ins Float64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f64\t$dst, $src;"), []>;
+ FromName, ".f64 \t$dst, $src;"), []>;
}
// Generate cvts from all types to all types.
@@ -416,11 +475,11 @@ let hasSideEffects = 0 in {
defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>;
defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
- defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
+ defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>;
defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
@@ -458,7 +517,7 @@ multiclass ADD_SUB_i1<SDNode OpNode> {
defm ADD_i1 : ADD_SUB_i1<add>;
defm SUB_i1 : ADD_SUB_i1<sub>;
-// int16, int32, and int64 signed addition. Since nvptx is 2's compliment, we
+// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
// also use these for unsigned arithmetic.
defm ADD : I3<"add.s", add>;
defm SUB : I3<"sub.s", sub>;
@@ -485,6 +544,24 @@ defm UDIV : I3<"div.u", udiv>;
defm SREM : I3<"rem.s", srem>;
defm UREM : I3<"rem.u", urem>;
+// Integer absolute value. NumBits should be one minus the bit width of RC.
+// This idiom implements the algorithm at
+// http://graphics.stanford.edu/~seander/bithacks.html#IntegerAbs.
+multiclass ABS<RegisterClass RC, int NumBits, string SizeName> {
+ def : NVPTXInst<(outs RC:$dst), (ins RC:$a),
+ !strconcat("abs", SizeName, " \t$dst, $a;"),
+ [(set RC:$dst, (xor (add (sra RC:$a, (i32 NumBits)), RC:$a),
+ (sra RC:$a, (i32 NumBits))))]>;
+}
+defm ABS_16 : ABS<Int16Regs, 15, ".s16">;
+defm ABS_32 : ABS<Int32Regs, 31, ".s32">;
+defm ABS_64 : ABS<Int64Regs, 63, ".s64">;
+
+// Integer min/max.
+defm SMAX : I3<"max.s", smax>;
+defm UMAX : I3<"max.u", umax>;
+defm SMIN : I3<"min.s", smin>;
+defm UMIN : I3<"min.u", umin>;
//
// Wide multiplication
@@ -748,6 +825,15 @@ def DoubleConst1 : PatLeaf<(fpimm), [{
N->getValueAPF().convertToDouble() == 1.0;
}]>;
+// Loads FP16 constant into a register.
+//
+// ptxas does not have hex representation for fp16, so we can't use
+// fp16 immediate values in .f16 instructions. Instead we have to load
+// the constant into a register using mov.b16.
+def LOAD_CONST_F16 :
+ NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a),
+ "mov.b16 \t$dst, $a;", []>;
+
defm FADD : F3_fma_component<"add", fadd>;
defm FSUB : F3_fma_component<"sub", fsub>;
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -908,18 +994,9 @@ def FDIV32ri_prec :
Requires<[reqPTX20]>;
//
-// F32 rsqrt
+// FMA
//
-def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
- "rsqrt.approx.f32 \t$dst, $b;", []>;
-
-// Convert 1.0f/sqrt(x) to rsqrt.approx.f32. (There is an rsqrt.approx.f64, but
-// it's emulated in software.)
-def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
- (RSQRTF32approx1r Float32Regs:$b)>,
- Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
-
multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> {
def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
@@ -942,6 +1019,17 @@ multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred>
Requires<[Pred]>;
}
+multiclass FMA_F16<string OpcStr, RegisterClass RC, Predicate Pred> {
+ def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
+ Requires<[useFP16Math, Pred]>;
+}
+
+defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", Float16Regs, doF32FTZ>;
+defm FMA16 : FMA_F16<"fma.rn.f16", Float16Regs, true>;
+defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", Float16x2Regs, doF32FTZ>;
+defm FMA16x2 : FMA_F16<"fma.rn.f16x2", Float16x2Regs, true>;
defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, true>;
defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>;
@@ -949,10 +1037,12 @@ defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>;
// sin/cos
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
- [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
+ [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>,
+ Requires<[allowUnsafeFPMath]>;
def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"cos.approx.f32 \t$dst, $src;",
- [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
+ [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>,
+ Requires<[allowUnsafeFPMath]>;
// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)),
// i.e. "poor man's fmod()"
@@ -1087,6 +1177,16 @@ defm SHL : SHIFT<"shl.b", shl>;
defm SRA : SHIFT<"shr.s", sra>;
defm SRL : SHIFT<"shr.u", srl>;
+// Bit-reverse
+def BREV32 :
+ NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a),
+ "brev.b32 \t$dst, $a;",
+ [(set Int32Regs:$dst, (bitreverse Int32Regs:$a))]>;
+def BREV64 :
+ NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a),
+ "brev.b64 \t$dst, $a;",
+ [(set Int64Regs:$dst, (bitreverse Int64Regs:$a))]>;
+
//
// Rotate: Use ptx shf instruction if available.
//
@@ -1294,15 +1394,15 @@ let hasSideEffects = 0 in {
def rr :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
- "\t$dst, $a, $b;"), []>;
+ " \t$dst, $a, $b;"), []>;
def ri :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
- "\t$dst, $a, $b;"), []>;
+ " \t$dst, $a, $b;"), []>;
def ir :
NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
- "\t$dst, $a, $b;"), []>;
+ " \t$dst, $a, $b;"), []>;
}
}
@@ -1317,6 +1417,19 @@ defm SETP_s64 : SETP<"s64", Int64Regs, i64imm>;
defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
+def SETP_f16rr :
+ NVPTXInst<(outs Int1Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp),
+ "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;",
+ []>, Requires<[useFP16Math]>;
+
+def SETP_f16x2rr :
+ NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp),
+ "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;",
+ []>,
+ Requires<[useFP16Math]>;
+
// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
// "set.CmpOp{.ftz}.dtype.stype", where dtype is the type of the destination
@@ -1326,13 +1439,13 @@ let hasSideEffects = 0 in {
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+ !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
def ri : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, ImmCls:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+ !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
def ir : NVPTXInst<(outs Int32Regs:$dst),
(ins ImmCls:$a, RC:$b, CmpMode:$cmp),
- !strconcat("set$cmp.", TypeStr, "\t$dst, $a, $b;"), []>;
+ !strconcat("set$cmp.", TypeStr, " \t$dst, $a, $b;"), []>;
}
}
@@ -1345,6 +1458,7 @@ defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
+defm SET_f16 : SET<"f16", Float16Regs, f16imm>;
defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
@@ -1360,16 +1474,16 @@ let hasSideEffects = 0 in {
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
def ri : NVPTXInst<(outs RC:$dst),
(ins RC:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
def ir : NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
def ii : NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"), []>;
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"), []>;
}
multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
@@ -1377,22 +1491,22 @@ let hasSideEffects = 0 in {
def rr :
NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
def ri :
NVPTXInst<(outs RC:$dst),
(ins RC:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
def ir :
NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, RC:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
def ii :
NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
- !strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
+ !strconcat("selp.", TypeStr, " \t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
}
}
@@ -1408,9 +1522,17 @@ defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
defm SELP_b64 : SELP_PATTERN<"b64", Int64Regs, i64imm, imm>;
defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
+defm SELP_f16 : SELP_PATTERN<"b16", Float16Regs, f16imm, fpimm>;
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
+def SELP_f16x2rr :
+ NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p),
+ "selp.b32 \t$dst, $a, $b, $p;",
+ [(set Float16x2Regs:$dst,
+ (select Int1Regs:$p, Float16x2Regs:$a, Float16x2Regs:$b))]>;
+
//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
@@ -1472,6 +1594,9 @@ let IsSimpleMove=1, hasSideEffects=0 in {
def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
"mov.u64 \t$dst, $sss;", []>;
+ def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src),
+ // We have to use .b16 here as there's no mov.f16.
+ "mov.b16 \t$dst, $src;", []>;
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"mov.f32 \t$dst, $src;", []>;
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
@@ -1633,6 +1758,26 @@ def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
+ // f16 -> pred
+ def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ Requires<[useFP16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+
// f32 -> pred
def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
(SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
@@ -1658,6 +1803,26 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
def : Pat<(i1 (OpNode fpimm:$a, Float64Regs:$b)),
(SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
+ // f16 -> i32
+ def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode Float16Regs:$a, Float16Regs:$b)),
+ (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode Float16Regs:$a, fpimm:$b)),
+ (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ Requires<[useFP16Math]>;
+ def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ Requires<[useFP16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode fpimm:$a, Float16Regs:$b)),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ Requires<[useFP16Math]>;
+
// f32 -> i32
def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
(SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
@@ -1825,40 +1990,39 @@ def RETURNNode :
let mayLoad = 1 in {
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat(!strconcat("ld.param", opstr),
- "\t$dst, [retval0+$b];"),
+ !strconcat("ld.param", opstr, " \t$dst, [retval0+$b];"),
[]>;
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
!strconcat("ld.param.v2", opstr,
- "\t{{$dst, $dst2}}, [retval0+$b];"), []>;
+ " \t{{$dst, $dst2}}, [retval0+$b];"), []>;
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins i32imm:$b),
!strconcat("ld.param.v4", opstr,
- "\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
+ " \t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
[]>;
}
class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
- !strconcat("mov", opstr, "\t$dst, retval$b;"),
+ !strconcat("mov", opstr, " \t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
let mayStore = 1 in {
class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
- !strconcat("st.param", opstr, "\t[param$a+$b], $val;"),
+ !strconcat("st.param", opstr, " \t[param$a+$b], $val;"),
[]>;
class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
i32imm:$a, i32imm:$b),
!strconcat("st.param.v2", opstr,
- "\t[param$a+$b], {{$val, $val2}};"),
+ " \t[param$a+$b], {{$val, $val2}};"),
[]>;
class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
@@ -1866,18 +2030,18 @@ let mayStore = 1 in {
regclass:$val4, i32imm:$a,
i32imm:$b),
!strconcat("st.param.v4", opstr,
- "\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
+ " \t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
[]>;
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
- !strconcat("st.param", opstr, "\t[func_retval0+$a], $val;"),
+ !strconcat("st.param", opstr, " \t[func_retval0+$a], $val;"),
[]>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
!strconcat("st.param.v2", opstr,
- "\t[func_retval0+$a], {{$val, $val2}};"),
+ " \t[func_retval0+$a], {{$val, $val2}};"),
[]>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
@@ -1885,7 +2049,7 @@ let mayStore = 1 in {
(ins regclass:$val, regclass:$val2, regclass:$val3,
regclass:$val4, i32imm:$a),
!strconcat("st.param.v4", opstr,
- "\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
+ " \t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
[]>;
}
@@ -1941,10 +2105,16 @@ def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
+def LoadParamMemF16 : LoadParamMemInst<Float16Regs, ".b16">;
+def LoadParamMemF16x2 : LoadParamMemInst<Float16x2Regs, ".b32">;
def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
+def LoadParamMemV2F16 : LoadParamV2MemInst<Float16Regs, ".b16">;
+def LoadParamMemV2F16x2: LoadParamV2MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
+def LoadParamMemV4F16 : LoadParamV4MemInst<Float16Regs, ".b16">;
+def LoadParamMemV4F16x2: LoadParamV4MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
@@ -1961,10 +2131,16 @@ def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
def StoreParamV4I16 : StoreParamV4Inst<Int16Regs, ".b16">;
def StoreParamV4I8 : StoreParamV4Inst<Int16Regs, ".b8">;
+def StoreParamF16 : StoreParamInst<Float16Regs, ".b16">;
+def StoreParamF16x2 : StoreParamInst<Float16x2Regs, ".b32">;
def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
+def StoreParamV2F16 : StoreParamV2Inst<Float16Regs, ".b16">;
+def StoreParamV2F16x2 : StoreParamV2Inst<Float16x2Regs, ".b32">;
def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
+def StoreParamV4F16 : StoreParamV4Inst<Float16Regs, ".b16">;
+def StoreParamV4F16x2 : StoreParamV4Inst<Float16x2Regs, ".b32">;
def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
@@ -1981,9 +2157,15 @@ def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
+def StoreRetvalF16 : StoreRetvalInst<Float16Regs, ".b16">;
+def StoreRetvalF16x2 : StoreRetvalInst<Float16x2Regs, ".b32">;
def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">;
def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">;
+def StoreRetvalV2F16 : StoreRetvalV2Inst<Float16Regs, ".b16">;
+def StoreRetvalV2F16x2: StoreRetvalV2Inst<Float16x2Regs, ".b32">;
def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
+def StoreRetvalV4F16 : StoreRetvalV4Inst<Float16Regs, ".b16">;
+def StoreRetvalV4F16x2: StoreRetvalV4Inst<Float16x2Regs, ".b32">;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
@@ -2057,17 +2239,18 @@ def DeclareScalarRegInst :
class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
- !strconcat("mov", asmstr, "\t$dst, $src;"),
+ !strconcat("mov", asmstr, " \t$dst, $src;"),
[(set regclass:$dst, (MoveParam regclass:$src))]>;
def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
def MoveParamI16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
- "cvt.u16.u32\t$dst, $src;",
+ "cvt.u16.u32 \t$dst, $src;",
[(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
+def MoveParamF16 : MoveParamInst<Float16Regs, ".f16">;
class PseudoUseParamInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$src),
@@ -2128,6 +2311,8 @@ let mayLoad=1, hasSideEffects=0 in {
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
defm LD_i64 : LD<Int64Regs>;
+ defm LD_f16 : LD<Float16Regs>;
+ defm LD_f16x2 : LD<Float16x2Regs>;
defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
@@ -2176,6 +2361,8 @@ let mayStore=1, hasSideEffects=0 in {
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
defm ST_i64 : ST<Int64Regs>;
+ defm ST_f16 : ST<Float16Regs>;
+ defm ST_f16x2 : ST<Float16x2Regs>;
defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
@@ -2262,6 +2449,8 @@ let mayLoad=1, hasSideEffects=0 in {
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
defm LDV_i64 : LD_VEC<Int64Regs>;
+ defm LDV_f16 : LD_VEC<Float16Regs>;
+ defm LDV_f16x2 : LD_VEC<Float16x2Regs>;
defm LDV_f32 : LD_VEC<Float32Regs>;
defm LDV_f64 : LD_VEC<Float64Regs>;
}
@@ -2355,28 +2544,53 @@ let mayStore=1, hasSideEffects=0 in {
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
defm STV_i64 : ST_VEC<Int64Regs>;
+ defm STV_f16 : ST_VEC<Float16Regs>;
+ defm STV_f16x2 : ST_VEC<Float16x2Regs>;
defm STV_f32 : ST_VEC<Float32Regs>;
defm STV_f64 : ST_VEC<Float64Regs>;
}
-
//---- Conversion ----
class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
NVPTXRegClass regclassOut> :
NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
- !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
+ !strconcat("mov.b", !strconcat(SzStr, " \t$d, $a;")),
[(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
+def BITCONVERT_16_I2F : F_BITCONVERT<"16", Int16Regs, Float16Regs>;
+def BITCONVERT_16_F2I : F_BITCONVERT<"16", Float16Regs, Int16Regs>;
def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
+def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", Int32Regs, Float16x2Regs>;
+def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", Float16x2Regs, Int32Regs>;
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
// use an integer selp to select either 1 or 0 and then cvt to floating-point.
+// sint -> f16
+def : Pat<(f16 (sint_to_fp Int1Regs:$a)),
+ (CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
+ (CVT_f16_s16 Int16Regs:$a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp Int32Regs:$a)),
+ (CVT_f16_s32 Int32Regs:$a, CvtRN)>;
+def : Pat<(f16 (sint_to_fp Int64Regs:$a)),
+ (CVT_f16_s64 Int64Regs:$a, CvtRN)>;
+
+// uint -> f16
+def : Pat<(f16 (uint_to_fp Int1Regs:$a)),
+ (CVT_f16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+def : Pat<(f16 (uint_to_fp Int16Regs:$a)),
+ (CVT_f16_u16 Int16Regs:$a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp Int32Regs:$a)),
+ (CVT_f16_u32 Int32Regs:$a, CvtRN)>;
+def : Pat<(f16 (uint_to_fp Int64Regs:$a)),
+ (CVT_f16_u64 Int64Regs:$a, CvtRN)>;
+
// sint -> f32
def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
(CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
@@ -2418,6 +2632,38 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
(CVT_f64_u64 Int64Regs:$a, CvtRN)>;
+// f16 -> sint
+def : Pat<(i1 (fp_to_sint Float16Regs:$a)),
+ (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
+ (CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
+ (CVT_s16_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
+ (CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
+ (CVT_s32_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
+ (CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
+ (CVT_s64_f16 Float16Regs:$a, CvtRZI)>;
+
+// f16 -> uint
+def : Pat<(i1 (fp_to_uint Float16Regs:$a)),
+ (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
+ (CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
+ (CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
+ (CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
+ (CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
+ (CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
+ (CVT_u64_f16 Float16Regs:$a, CvtRZI)>;
+
// f32 -> sint
def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
(SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
@@ -2562,6 +2808,9 @@ def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
(SELP_b64rr Int64Regs:$a, Int64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+def : Pat<(select Int32Regs:$pred, Float16Regs:$a, Float16Regs:$b),
+ (SELP_f16rr Float16Regs:$a, Float16Regs:$b,
+ (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
(SELP_f32rr Float32Regs:$a, Float32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
@@ -2575,77 +2824,150 @@ let hasSideEffects = 0 in {
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2,
Int16Regs:$s3, Int16Regs:$s4),
- "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>;
+ "mov.b64 \t$d, {{$s1, $s2, $s3, $s4}};", []>;
def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2),
- "mov.b32\t$d, {{$s1, $s2}};", []>;
+ "mov.b32 \t$d, {{$s1, $s2}};", []>;
def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int32Regs:$s1, Int32Regs:$s2),
- "mov.b64\t$d, {{$s1, $s2}};", []>;
+ "mov.b64 \t$d, {{$s1, $s2}};", []>;
def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
(ins Float32Regs:$s1, Float32Regs:$s2),
- "mov.b64\t$d, {{$s1, $s2}};", []>;
+ "mov.b64 \t$d, {{$s1, $s2}};", []>;
// unpack a larger int register to a set of smaller int registers
def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
Int16Regs:$d3, Int16Regs:$d4),
(ins Int64Regs:$s),
- "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>;
+ "mov.b64 \t{{$d1, $d2, $d3, $d4}}, $s;", []>;
def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
(ins Int32Regs:$s),
- "mov.b32\t{{$d1, $d2}}, $s;", []>;
+ "mov.b32 \t{{$d1, $d2}}, $s;", []>;
def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
(ins Int64Regs:$s),
- "mov.b64\t{{$d1, $d2}}, $s;", []>;
+ "mov.b64 \t{{$d1, $d2}}, $s;", []>;
def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
(ins Float64Regs:$s),
- "mov.b64\t{{$d1, $d2}}, $s;", []>;
+ "mov.b64 \t{{$d1, $d2}}, $s;", []>;
+
+}
+
+let hasSideEffects = 0 in {
+ // Extract element of f16x2 register. PTX does not provide any way
+ // to access elements of f16x2 vector directly, so we need to
+ // extract it using a temporary register.
+ def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16x2Regs:$src),
+ "{{ .reg .b16 \t%tmp_hi;\n\t"
+ " mov.b32 \t{$dst, %tmp_hi}, $src; }}",
+ [(set Float16Regs:$dst,
+ (extractelt (v2f16 Float16x2Regs:$src), 0))]>;
+ def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst),
+ (ins Float16x2Regs:$src),
+ "{{ .reg .b16 \t%tmp_lo;\n\t"
+ " mov.b32 \t{%tmp_lo, $dst}, $src; }}",
+ [(set Float16Regs:$dst,
+ (extractelt (v2f16 Float16x2Regs:$src), 1))]>;
+
+ // Coalesce two f16 registers into f16x2
+ def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst),
+ (ins Float16Regs:$a, Float16Regs:$b),
+ "mov.b32 \t$dst, {{$a, $b}};",
+ [(set Float16x2Regs:$dst,
+ (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>;
+
+ // Directly initializing underlying the b32 register is one less SASS
+ // instruction than than vector-packing move.
+ def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src),
+ "mov.b32 \t$dst, $src;",
+ []>;
+
+ // Split f16x2 into two f16 registers.
+ def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
+ (ins Float16x2Regs:$src),
+ "mov.b32 \t{{$lo, $hi}}, $src;",
+ []>;
+ // Split an i32 into two f16
+ def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
+ (ins Int32Regs:$src),
+ "mov.b32 \t{{$lo, $hi}}, $src;",
+ []>;
}
// Count leading zeros
let hasSideEffects = 0 in {
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
- "clz.b32\t$d, $a;", []>;
+ "clz.b32 \t$d, $a;", []>;
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
- "clz.b64\t$d, $a;", []>;
+ "clz.b64 \t$d, $a;", []>;
}
// 32-bit has a direct PTX instruction
def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
-// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
-// to 64-bit to match the LLVM semantics
+// The return type of the ctlz ISD node is the same as its input, but the PTX
+// ctz instruction always returns a 32-bit value. For ctlz.i64, convert the
+// ptx value to 64 bits to match the ISD node's semantics, unless we know we're
+// truncating back down to 32 bits.
def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>;
-// For 16-bit, we zero-extend to 32-bit, then trunc the result back
-// to 16-bits (ctlz of a 16-bit value is guaranteed to require less
-// than 16 bits to store). We also need to subtract 16 because the
-// high-order 16 zeros were counted.
+// For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the
+// result back to 16-bits if necessary. We also need to subtract 16 because
+// the high-order 16 zeros were counted.
+//
+// TODO: NVPTX has a mov.b32 b32reg, {imm, b16reg} instruction, which we could
+// use to save one SASS instruction (on sm_35 anyway):
+//
+// mov.b32 $tmp, {0xffff, $a}
+// ctlz.b32 $result, $tmp
+//
+// That is, instead of zero-extending the input to 32 bits, we'd "one-extend"
+// and then ctlz that value. This way we don't have to subtract 16 from the
+// result. Unfortunately today we don't have a way to generate
+// "mov b32reg, {b16imm, b16reg}", so we don't do this optimization.
def : Pat<(ctlz Int16Regs:$a),
- (SUBi16ri (CVT_u16_u32 (CLZr32
- (CVT_u32_u16 Int16Regs:$a, CvtNONE)),
- CvtNONE), 16)>;
+ (SUBi16ri (CVT_u16_u32
+ (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>;
+def : Pat<(i32 (zext (ctlz Int16Regs:$a))),
+ (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>;
// Population count
let hasSideEffects = 0 in {
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
- "popc.b32\t$d, $a;", []>;
+ "popc.b32 \t$d, $a;", []>;
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
- "popc.b64\t$d, $a;", []>;
+ "popc.b64 \t$d, $a;", []>;
}
// 32-bit has a direct PTX instruction
def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;
-// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
-// to 64-bit to match the LLVM semantics
+// For 64-bit, the result in PTX is actually 32-bit so we zero-extend to 64-bit
+// to match the LLVM semantics. Just as with ctlz.i64, we provide a second
+// pattern that avoids the type conversion if we're truncating the result to
+// i32 anyway.
def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
+def : Pat<(i32 (trunc (ctpop Int64Regs:$a))), (POPCr64 Int64Regs:$a)>;
-// For 16-bit, we zero-extend to 32-bit, then trunc the result back
-// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
-// than 16 bits to store)
+// For 16-bit, we zero-extend to 32-bit, then trunc the result back to 16-bits.
+// If we know that we're storing into an i32, we can avoid the final trunc.
def : Pat<(ctpop Int16Regs:$a),
(CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
+def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
+ (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;
+
+// fpround f32 -> f16
+def : Pat<(f16 (fpround Float32Regs:$a)),
+ (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f16 (fpround Float32Regs:$a)),
+ (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
+
+// fpround f64 -> f16
+def : Pat<(f16 (fpround Float64Regs:$a)),
+ (CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f16 (fpround Float64Regs:$a)),
+ (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
// fpround f64 -> f32
def : Pat<(f32 (fpround Float64Regs:$a)),
@@ -2653,6 +2975,18 @@ def : Pat<(f32 (fpround Float64Regs:$a)),
def : Pat<(f32 (fpround Float64Regs:$a)),
(CVT_f32_f64 Float64Regs:$a, CvtRN)>;
+// fpextend f16 -> f32
+def : Pat<(f32 (fpextend Float16Regs:$a)),
+ (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f32 (fpextend Float16Regs:$a)),
+ (CVT_f32_f16 Float16Regs:$a, CvtNONE)>;
+
+// fpextend f16 -> f64
+def : Pat<(f64 (fpextend Float16Regs:$a)),
+ (CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f64 (fpextend Float16Regs:$a)),
+ (CVT_f64_f16 Float16Regs:$a, CvtNONE)>;
+
// fpextend f32 -> f64
def : Pat<(f64 (fpextend Float32Regs:$a)),
(CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
@@ -2664,6 +2998,10 @@ def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
// fceil, ffloor, fround, ftrunc.
+def : Pat<(fceil Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(fceil Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>;
def : Pat<(fceil Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fceil Float32Regs:$a),
@@ -2671,6 +3009,10 @@ def : Pat<(fceil Float32Regs:$a),
def : Pat<(fceil Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
+def : Pat<(ffloor Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(ffloor Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),
@@ -2678,6 +3020,10 @@ def : Pat<(ffloor Float32Regs:$a),
def : Pat<(ffloor Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
+def : Pat<(fround Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f16 (fround Float16Regs:$a)),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
def : Pat<(fround Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fround Float32Regs:$a)),
@@ -2685,6 +3031,10 @@ def : Pat<(f32 (fround Float32Regs:$a)),
def : Pat<(f64 (fround Float64Regs:$a)),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+def : Pat<(ftrunc Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(ftrunc Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),
@@ -2696,6 +3046,10 @@ def : Pat<(ftrunc Float64Regs:$a),
// strictly correct, because it causes us to ignore the rounding mode. But it
// matches what CUDA's "libm" does.
+def : Pat<(fnearbyint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(fnearbyint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
def : Pat<(fnearbyint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fnearbyint Float32Regs:$a),
@@ -2703,6 +3057,10 @@ def : Pat<(fnearbyint Float32Regs:$a),
def : Pat<(fnearbyint Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+def : Pat<(frint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(frint Float16Regs:$a),
+ (CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
def : Pat<(frint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(frint Float32Regs:$a),
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index b0408f12f5b1..8d228a9eeb74 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -36,33 +36,39 @@ let isConvergent = 1 in {
def INT_BARRIER0 : NVPTXInst<(outs), (ins),
"bar.sync \t0;",
[(int_nvvm_barrier0)]>;
+def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
+ "bar.sync \t$src1;",
+ [(int_nvvm_barrier_n Int32Regs:$src1)]>;
+def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
+ "bar.sync \t$src1, $src2;",
+ [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
- !strconcat(".reg .pred \t%p1; \n\t",
- !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
- !strconcat("bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
- !strconcat("}}", ""))))),
+ ".reg .pred \t%p1; \n\t",
+ "setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
+ "}}"),
[(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
- !strconcat(".reg .pred \t%p1; \n\t",
- !strconcat(".reg .pred \t%p2; \n\t",
- !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
- !strconcat("bar.red.and.pred \t%p2, 0, %p1; \n\t",
- !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
- !strconcat("}}", ""))))))),
+ ".reg .pred \t%p1; \n\t",
+ ".reg .pred \t%p2; \n\t",
+ "setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ "bar.red.and.pred \t%p2, 0, %p1; \n\t",
+ "selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ "}}"),
[(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
!strconcat("{{ \n\t",
- !strconcat(".reg .pred \t%p1; \n\t",
- !strconcat(".reg .pred \t%p2; \n\t",
- !strconcat("setp.ne.u32 \t%p1, $pred, 0; \n\t",
- !strconcat("bar.red.or.pred \t%p2, 0, %p1; \n\t",
- !strconcat("selp.u32 \t$dst, 1, 0, %p2; \n\t",
- !strconcat("}}", ""))))))),
+ ".reg .pred \t%p1; \n\t",
+ ".reg .pred \t%p2; \n\t",
+ "setp.ne.u32 \t%p1, $pred, 0; \n\t",
+ "bar.red.or.pred \t%p2, 0, %p1; \n\t",
+ "selp.u32 \t$dst, 1, 0, %p2; \n\t",
+ "}}"),
[(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
-def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
+def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
[(int_nvvm_bar_sync imm:$i)]>;
// shfl.{up,down,bfly,idx}.b32
@@ -187,16 +193,6 @@ class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
// MISC
//
-def INT_NVVM_CLZ_I : F_MATH_1<"clz.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_clz_i>;
-def INT_NVVM_CLZ_LL : F_MATH_1<"clz.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
- int_nvvm_clz_ll>;
-
-def INT_NVVM_POPC_I : F_MATH_1<"popc.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_popc_i>;
-def INT_NVVM_POPC_LL : F_MATH_1<"popc.b64 \t$dst, $src0;", Int32Regs, Int64Regs,
- int_nvvm_popc_ll>;
-
def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
@@ -204,26 +200,6 @@ def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
// Min Max
//
-def INT_NVVM_MIN_I : F_MATH_2<"min.s32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_min_i>;
-def INT_NVVM_MIN_UI : F_MATH_2<"min.u32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_min_ui>;
-
-def INT_NVVM_MIN_LL : F_MATH_2<"min.s64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_min_ll>;
-def INT_NVVM_MIN_ULL : F_MATH_2<"min.u64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_min_ull>;
-
-def INT_NVVM_MAX_I : F_MATH_2<"max.s32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_max_i>;
-def INT_NVVM_MAX_UI : F_MATH_2<"max.u32 \t$dst, $src0, $src1;", Int32Regs,
- Int32Regs, Int32Regs, int_nvvm_max_ui>;
-
-def INT_NVVM_MAX_LL : F_MATH_2<"max.s64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_max_ll>;
-def INT_NVVM_MAX_ULL : F_MATH_2<"max.u64 \t$dst, $src0, $src1;", Int64Regs,
- Int64Regs, Int64Regs, int_nvvm_max_ull>;
-
def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
Float32Regs, Float32Regs, int_nvvm_fmin_f>;
def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
@@ -239,6 +215,7 @@ def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
Float64Regs, Float64Regs, int_nvvm_fmax_d>;
+
//
// Multiplication
//
@@ -321,15 +298,6 @@ def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
//
-// Brev
-//
-
-def INT_NVVM_BREV32 : F_MATH_1<"brev.b32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_brev32>;
-def INT_NVVM_BREV64 : F_MATH_1<"brev.b64 \t$dst, $src0;", Int64Regs, Int64Regs,
- int_nvvm_brev64>;
-
-//
// Sad
//
@@ -360,11 +328,6 @@ def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
// Abs
//
-def INT_NVVM_ABS_I : F_MATH_1<"abs.s32 \t$dst, $src0;", Int32Regs, Int32Regs,
- int_nvvm_abs_i>;
-def INT_NVVM_ABS_LL : F_MATH_1<"abs.s64 \t$dst, $src0;", Int64Regs, Int64Regs,
- int_nvvm_abs_ll>;
-
def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
Float32Regs, int_nvvm_fabs_ftz_f>;
def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
@@ -703,16 +666,18 @@ def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
-def INT_NVVM_D2I_LO : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b32 %temp; \n\t",
- !strconcat("mov.b64 \t{$dst, %temp}, $src0;\n\t",
- "}}"))),
- Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
-def INT_NVVM_D2I_HI : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b32 %temp; \n\t",
- !strconcat("mov.b64 \t{%temp, $dst}, $src0;\n\t",
- "}}"))),
- Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
+def INT_NVVM_D2I_LO : F_MATH_1<
+ !strconcat("{{\n\t",
+ ".reg .b32 %temp; \n\t",
+ "mov.b64 \t{$dst, %temp}, $src0;\n\t",
+ "}}"),
+ Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
+def INT_NVVM_D2I_HI : F_MATH_1<
+ !strconcat("{{\n\t",
+ ".reg .b32 %temp; \n\t",
+ "mov.b64 \t{%temp, $dst}, $src0;\n\t",
+ "}}"),
+ Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
(CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
@@ -803,49 +768,10 @@ def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
(CVT_f64_u64 Int64Regs:$a, CvtRP)>;
-// FIXME: Ideally, we could use these patterns instead of the scope-creating
-// patterns, but ptxas does not like these since .s16 is not compatible with
-// .f16. The solution is to use .bXX for all integer register types, but we
-// are not there yet.
-//def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
-// (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
-//def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
-// (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
-//
-//def : Pat<(int_nvvm_h2f Int16Regs:$a),
-// (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
-
-def INT_NVVM_F2H_RN_FTZ : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b16 %temp;\n\t",
- !strconcat("cvt.rn.ftz.f16.f32 \t%temp, $src0;\n\t",
- !strconcat("mov.b16 \t$dst, %temp;\n",
- "}}")))),
- Int16Regs, Float32Regs, int_nvvm_f2h_rn_ftz>;
-def INT_NVVM_F2H_RN : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b16 %temp;\n\t",
- !strconcat("cvt.rn.f16.f32 \t%temp, $src0;\n\t",
- !strconcat("mov.b16 \t$dst, %temp;\n",
- "}}")))),
- Int16Regs, Float32Regs, int_nvvm_f2h_rn>;
-
-def INT_NVVM_H2F : F_MATH_1<!strconcat("{{\n\t",
- !strconcat(".reg .b16 %temp;\n\t",
- !strconcat("mov.b16 \t%temp, $src0;\n\t",
- !strconcat("cvt.f32.f16 \t$dst, %temp;\n\t",
- "}}")))),
- Float32Regs, Int16Regs, int_nvvm_h2f>;
-
-def : Pat<(f32 (f16_to_fp Int16Regs:$a)),
- (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
-def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
- (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(i16 (fp_to_f16 Float32Regs:$a)),
- (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
-
-def : Pat<(f64 (f16_to_fp Int16Regs:$a)),
- (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
-def : Pat<(i16 (fp_to_f16 Float64Regs:$a)),
- (CVT_f16_f64 Float64Regs:$a, CvtRN)>;
+def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
+ (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
+def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
+ (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
//
// Bitcast
@@ -882,20 +808,12 @@ multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, SDNode IMM, Predicate Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
Requires<[Pred]>;
def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
Requires<[Pred]>;
}
multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
@@ -911,21 +829,13 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, Predicate Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat("{{ \n\t",
- !strconcat(".reg \t.s",
- !strconcat(TypeStr,
- !strconcat(" temp; \n\t",
- !strconcat("neg.s",
- !strconcat(TypeStr,
- !strconcat(" \ttemp, $b; \n\t",
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(".u",
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], temp; \n\t",
- !strconcat("}}", "")))))))))))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
+ !strconcat(
+ "{{ \n\t",
+ ".reg \t.s", TypeStr, " temp; \n\t",
+ "neg.s", TypeStr, " \ttemp, $b; \n\t",
+ "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
+ "}}"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
Requires<[Pred]>;
}
multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
@@ -943,40 +853,26 @@ multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
Operand IMMType, Predicate Pred> {
def reg : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, regclass:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst,
- (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
- Requires<[Pred]>;
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
+ Requires<[Pred]>;
+
def imm1 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, regclass:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
Requires<[Pred]>;
+
def imm2 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, regclass:$b, IMMType:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
Requires<[Pred]>;
+
def imm3 : NVPTXInst<(outs regclass:$dst),
(ins ptrclass:$addr, IMMType:$b, IMMType:$c),
- !strconcat("atom",
- !strconcat(SpaceStr,
- !strconcat(OpcStr,
- !strconcat(TypeStr,
- !strconcat(" \t$dst, [$addr], $b, $c;", ""))))),
- [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
Requires<[Pred]>;
}
multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
@@ -1607,6 +1503,8 @@ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
+defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
+defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
@@ -1657,6 +1555,10 @@ defm INT_PTX_LDU_G_v2i16_ELE
: VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDU_G_v2i32_ELE
: VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDU_G_v2f16_ELE
+ : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
+defm INT_PTX_LDU_G_v2f16x2_ELE
+ : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDU_G_v2f32_ELE
: VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDU_G_v2i64_ELE
@@ -1671,6 +1573,12 @@ defm INT_PTX_LDU_G_v4i16_ELE
defm INT_PTX_LDU_G_v4i32_ELE
: VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Int32Regs>;
+defm INT_PTX_LDU_G_v4f16_ELE
+ : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Float16Regs>;
+defm INT_PTX_LDU_G_v4f16x2_ELE
+ : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
+ Float16x2Regs>;
defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
@@ -1710,6 +1618,10 @@ defm INT_PTX_LDG_GLOBAL_i32
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_i64
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
+defm INT_PTX_LDG_GLOBAL_f16
+ : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
+defm INT_PTX_LDG_GLOBAL_f16x2
+ : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_GLOBAL_f32
: LDG_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDG_GLOBAL_f64
@@ -1765,6 +1677,10 @@ defm INT_PTX_LDG_G_v2i16_ELE
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v2i32_ELE
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v2f16_ELE
+ : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
+defm INT_PTX_LDG_G_v2f16x2_ELE
+ : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v2f32_ELE
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDG_G_v2i64_ELE
@@ -1777,17 +1693,21 @@ defm INT_PTX_LDG_G_v4i16_ELE
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v4i32_ELE
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
+defm INT_PTX_LDG_G_v4f16_ELE
+ : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
+defm INT_PTX_LDG_G_v4f16x2_ELE
+ : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v4f32_ELE
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
multiclass NG_TO_G<string Str, Intrinsic Intrin> {
def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- !strconcat("cvta.", !strconcat(Str, ".u32 \t$result, $src;")),
+ !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
Requires<[hasGenericLdSt]>;
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- !strconcat("cvta.", !strconcat(Str, ".u64 \t$result, $src;")),
+ !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
Requires<[hasGenericLdSt]>;
@@ -1821,11 +1741,11 @@ multiclass NG_TO_G<string Str, Intrinsic Intrin> {
multiclass G_TO_NG<string Str, Intrinsic Intrin> {
def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
- !strconcat("cvta.to.", !strconcat(Str, ".u32 \t$result, $src;")),
+ !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
[(set Int32Regs:$result, (Intrin Int32Regs:$src))]>,
Requires<[hasGenericLdSt]>;
def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
- !strconcat("cvta.to.", !strconcat(Str, ".u64 \t$result, $src;")),
+ !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
[(set Int64Regs:$result, (Intrin Int64Regs:$src))]>,
Requires<[hasGenericLdSt]>;
def _no : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
@@ -1983,7 +1903,7 @@ def ISSPACEP_SHARED_64
// Special register reads
def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
(ins SpecialRegs:$r),
- "mov.b32\t$d, $r;", []>;
+ "mov.b32 \t$d, $r;", []>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
@@ -2046,20 +1966,18 @@ def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
Requires<[noHWROT32]> ;
let hasSideEffects = 0 in {
- def GET_LO_INT64
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
- !strconcat("{{\n\t",
- !strconcat(".reg .b32 %dummy;\n\t",
- !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
- !strconcat("}}", "")))),
+ def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ ".reg .b32 %dummy;\n\t",
+ "mov.b64 \t{$dst,%dummy}, $src;\n\t",
+ "}}"),
[]> ;
- def GET_HI_INT64
- : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
- !strconcat("{{\n\t",
- !strconcat(".reg .b32 %dummy;\n\t",
- !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
- !strconcat("}}", "")))),
+ def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ ".reg .b32 %dummy;\n\t",
+ "mov.b64 \t{%dummy,$dst}, $src;\n\t",
+ "}}"),
[]> ;
}
@@ -2164,19 +2082,19 @@ def TEX_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_F32_F32_LEVEL
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], $lod;",
[]>;
def TEX_1D_F32_F32_GRAD
@@ -2184,27 +2102,27 @@ def TEX_1D_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], $lod;",
[]>;
def TEX_1D_S32_F32_GRAD
@@ -2212,27 +2130,27 @@ def TEX_1D_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
[]>;
def TEX_1D_U32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], $lod;",
[]>;
def TEX_1D_U32_F32_GRAD
@@ -2240,7 +2158,7 @@ def TEX_1D_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -2248,14 +2166,14 @@ def TEX_1D_ARRAY_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_F32_F32_LEVEL
@@ -2263,7 +2181,7 @@ def TEX_1D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], $lod;",
[]>;
def TEX_1D_ARRAY_F32_F32_GRAD
@@ -2271,21 +2189,21 @@ def TEX_1D_ARRAY_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_ARRAY_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_S32_F32_LEVEL
@@ -2293,7 +2211,7 @@ def TEX_1D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], $lod;",
[]>;
def TEX_1D_ARRAY_S32_F32_GRAD
@@ -2301,21 +2219,21 @@ def TEX_1D_ARRAY_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_1D_ARRAY_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}];",
[]>;
def TEX_1D_ARRAY_U32_F32_LEVEL
@@ -2323,7 +2241,7 @@ def TEX_1D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], $lod;",
[]>;
def TEX_1D_ARRAY_U32_F32_GRAD
@@ -2331,7 +2249,7 @@ def TEX_1D_ARRAY_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -2339,14 +2257,14 @@ def TEX_2D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_F32_F32_LEVEL
@@ -2354,7 +2272,7 @@ def TEX_2D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], $lod;",
[]>;
def TEX_2D_F32_F32_GRAD
@@ -2363,7 +2281,7 @@ def TEX_2D_F32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2371,14 +2289,14 @@ def TEX_2D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_S32_F32_LEVEL
@@ -2386,7 +2304,7 @@ def TEX_2D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], $lod;",
[]>;
def TEX_2D_S32_F32_GRAD
@@ -2395,7 +2313,7 @@ def TEX_2D_S32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2403,14 +2321,14 @@ def TEX_2D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TEX_2D_U32_F32_LEVEL
@@ -2418,7 +2336,7 @@ def TEX_2D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], $lod;",
[]>;
def TEX_2D_U32_F32_GRAD
@@ -2427,7 +2345,7 @@ def TEX_2D_U32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2437,7 +2355,7 @@ def TEX_2D_ARRAY_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_F32_F32
@@ -2445,7 +2363,7 @@ def TEX_2D_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_F32_F32_LEVEL
@@ -2453,7 +2371,7 @@ def TEX_2D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_2D_ARRAY_F32_F32_GRAD
@@ -2462,7 +2380,7 @@ def TEX_2D_ARRAY_F32_F32_GRAD
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2471,7 +2389,7 @@ def TEX_2D_ARRAY_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_S32_F32
@@ -2479,7 +2397,7 @@ def TEX_2D_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_S32_F32_LEVEL
@@ -2487,7 +2405,7 @@ def TEX_2D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_2D_ARRAY_S32_F32_GRAD
@@ -2497,7 +2415,7 @@ def TEX_2D_ARRAY_S32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2506,7 +2424,7 @@ def TEX_2D_ARRAY_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_U32_F32
@@ -2514,7 +2432,7 @@ def TEX_2D_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_2D_ARRAY_U32_F32_LEVEL
@@ -2522,7 +2440,7 @@ def TEX_2D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_2D_ARRAY_U32_F32_GRAD
@@ -2532,7 +2450,7 @@ def TEX_2D_ARRAY_U32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -2542,7 +2460,7 @@ def TEX_3D_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_F32_F32
@@ -2550,7 +2468,7 @@ def TEX_3D_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_F32_F32_LEVEL
@@ -2558,7 +2476,7 @@ def TEX_3D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_3D_F32_F32_GRAD
@@ -2569,7 +2487,7 @@ def TEX_3D_F32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -2579,7 +2497,7 @@ def TEX_3D_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_S32_F32
@@ -2587,7 +2505,7 @@ def TEX_3D_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_S32_F32_LEVEL
@@ -2595,7 +2513,7 @@ def TEX_3D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_3D_S32_F32_GRAD
@@ -2606,7 +2524,7 @@ def TEX_3D_S32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -2616,7 +2534,7 @@ def TEX_3D_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_U32_F32
@@ -2624,7 +2542,7 @@ def TEX_3D_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_3D_U32_F32_LEVEL
@@ -2632,7 +2550,7 @@ def TEX_3D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_3D_U32_F32_GRAD
@@ -2643,7 +2561,7 @@ def TEX_3D_U32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -2654,7 +2572,7 @@ def TEX_CUBE_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_CUBE_F32_F32_LEVEL
@@ -2663,7 +2581,7 @@ def TEX_CUBE_F32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_CUBE_S32_F32
@@ -2671,7 +2589,7 @@ def TEX_CUBE_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_CUBE_S32_F32_LEVEL
@@ -2680,7 +2598,7 @@ def TEX_CUBE_S32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_CUBE_U32_F32
@@ -2688,7 +2606,7 @@ def TEX_CUBE_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_CUBE_U32_F32_LEVEL
@@ -2697,7 +2615,7 @@ def TEX_CUBE_U32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
@@ -2706,7 +2624,7 @@ def TEX_CUBE_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_CUBE_ARRAY_F32_F32_LEVEL
@@ -2715,7 +2633,7 @@ def TEX_CUBE_ARRAY_F32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_CUBE_ARRAY_S32_F32
@@ -2723,7 +2641,7 @@ def TEX_CUBE_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_CUBE_ARRAY_S32_F32_LEVEL
@@ -2732,7 +2650,7 @@ def TEX_CUBE_ARRAY_S32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_CUBE_ARRAY_U32_F32
@@ -2740,7 +2658,7 @@ def TEX_CUBE_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_CUBE_ARRAY_U32_F32_LEVEL
@@ -2749,7 +2667,7 @@ def TEX_CUBE_ARRAY_U32_F32_LEVEL
(ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
@@ -2757,84 +2675,84 @@ def TLD4_R_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_G_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_B_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_A_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_R_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_G_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_B_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_A_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_R_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_G_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_B_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
def TLD4_A_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, $s, \\{$x, $y\\}];",
[]>;
}
@@ -2847,19 +2765,19 @@ def TEX_UNIFIED_1D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_F32_F32_LEVEL
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_F32_F32_GRAD
@@ -2867,27 +2785,27 @@ def TEX_UNIFIED_1D_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_S32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_S32_F32_GRAD
@@ -2895,27 +2813,27 @@ def TEX_UNIFIED_1D_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
[]>;
def TEX_UNIFIED_1D_U32_F32_LEVEL
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_U32_F32_GRAD
@@ -2923,7 +2841,7 @@ def TEX_UNIFIED_1D_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -2931,14 +2849,14 @@ def TEX_UNIFIED_1D_ARRAY_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
@@ -2946,7 +2864,7 @@ def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
@@ -2954,21 +2872,21 @@ def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
@@ -2976,7 +2894,7 @@ def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
@@ -2984,21 +2902,21 @@ def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}];",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
@@ -3006,7 +2924,7 @@ def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], $lod;",
[]>;
def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
@@ -3014,7 +2932,7 @@ def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
[]>;
@@ -3022,14 +2940,14 @@ def TEX_UNIFIED_2D_F32_S32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_F32_F32_LEVEL
@@ -3037,7 +2955,7 @@ def TEX_UNIFIED_2D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_F32_F32_GRAD
@@ -3046,7 +2964,7 @@ def TEX_UNIFIED_2D_F32_F32_GRAD
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3054,14 +2972,14 @@ def TEX_UNIFIED_2D_S32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_S32_F32_LEVEL
@@ -3069,7 +2987,7 @@ def TEX_UNIFIED_2D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_S32_F32_GRAD
@@ -3078,7 +2996,7 @@ def TEX_UNIFIED_2D_S32_F32_GRAD
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3086,14 +3004,14 @@ def TEX_UNIFIED_2D_U32_S32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TEX_UNIFIED_2D_U32_F32_LEVEL
@@ -3101,7 +3019,7 @@ def TEX_UNIFIED_2D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_U32_F32_GRAD
@@ -3110,7 +3028,7 @@ def TEX_UNIFIED_2D_U32_F32_GRAD
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3120,7 +3038,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_F32_F32
@@ -3128,7 +3046,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
@@ -3136,7 +3054,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
@@ -3145,7 +3063,7 @@ def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3154,7 +3072,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_S32_F32
@@ -3162,7 +3080,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
@@ -3170,7 +3088,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
@@ -3180,7 +3098,7 @@ def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3189,7 +3107,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$y),
- "tex.a2d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_U32_F32
@@ -3197,7 +3115,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y),
- "tex.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}];",
[]>;
def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
@@ -3205,7 +3123,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], $lod;",
[]>;
def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
@@ -3215,7 +3133,7 @@ def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
Float32Regs:$y,
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
"\\{$grady0, $grady1\\};",
[]>;
@@ -3225,7 +3143,7 @@ def TEX_UNIFIED_3D_F32_S32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_F32_F32
@@ -3233,7 +3151,7 @@ def TEX_UNIFIED_3D_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_F32_F32_LEVEL
@@ -3241,7 +3159,7 @@ def TEX_UNIFIED_3D_F32_F32_LEVEL
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_3D_F32_F32_GRAD
@@ -3252,7 +3170,7 @@ def TEX_UNIFIED_3D_F32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -3262,7 +3180,7 @@ def TEX_UNIFIED_3D_S32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_S32_F32
@@ -3270,7 +3188,7 @@ def TEX_UNIFIED_3D_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_S32_F32_LEVEL
@@ -3278,7 +3196,7 @@ def TEX_UNIFIED_3D_S32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_3D_S32_F32_GRAD
@@ -3289,7 +3207,7 @@ def TEX_UNIFIED_3D_S32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -3299,7 +3217,7 @@ def TEX_UNIFIED_3D_U32_S32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$z),
- "tex.3d.v4.u32.s32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_U32_F32
@@ -3307,7 +3225,7 @@ def TEX_UNIFIED_3D_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z),
- "tex.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_3D_U32_F32_LEVEL
@@ -3315,7 +3233,7 @@ def TEX_UNIFIED_3D_U32_F32_LEVEL
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_3D_U32_F32_GRAD
@@ -3326,7 +3244,7 @@ def TEX_UNIFIED_3D_U32_F32_GRAD
Float32Regs:$gradx0, Float32Regs:$gradx1,
Float32Regs:$gradx2, Float32Regs:$grady0,
Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], "
"\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
"\\{$grady0, $grady1, $grady2, $grady2\\};",
@@ -3337,7 +3255,7 @@ def TEX_UNIFIED_CUBE_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_F32_F32_LEVEL
@@ -3346,7 +3264,7 @@ def TEX_UNIFIED_CUBE_F32_F32_LEVEL
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_S32_F32
@@ -3354,7 +3272,7 @@ def TEX_UNIFIED_CUBE_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_S32_F32_LEVEL
@@ -3363,7 +3281,7 @@ def TEX_UNIFIED_CUBE_S32_F32_LEVEL
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_U32_F32
@@ -3371,7 +3289,7 @@ def TEX_UNIFIED_CUBE_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_U32_F32_LEVEL
@@ -3380,7 +3298,7 @@ def TEX_UNIFIED_CUBE_U32_F32_LEVEL
(ins Int64Regs:$t,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$x, $y, $z, $z\\}], $lod;",
[]>;
@@ -3389,7 +3307,7 @@ def TEX_UNIFIED_CUBE_ARRAY_F32_F32
Float32Regs:$b, Float32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
@@ -3398,7 +3316,7 @@ def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_S32_F32
@@ -3406,7 +3324,7 @@ def TEX_UNIFIED_CUBE_ARRAY_S32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
@@ -3415,7 +3333,7 @@ def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_U32_F32
@@ -3423,7 +3341,7 @@ def TEX_UNIFIED_CUBE_ARRAY_U32_F32
Int32Regs:$b, Int32Regs:$a),
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}];",
[]>;
def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
@@ -3432,7 +3350,7 @@ def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
(ins Int64Regs:$t, Int32Regs:$l,
Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32\t\\{$r, $g, $b, $a\\}, "
+ "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
"[$t, \\{$l, $x, $y, $z\\}], $lod;",
[]>;
@@ -3440,84 +3358,84 @@ def TLD4_UNIFIED_R_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_G_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_B_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_A_2D_F32_F32
: NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
Float32Regs:$v2, Float32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_R_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_G_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_B_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_A_2D_S32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_R_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_G_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_B_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
def TLD4_UNIFIED_A_2D_U32_F32
: NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
Int32Regs:$v2, Int32Regs:$v3),
(ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32\t\\{$v0, $v1, $v2, $v3\\}, "
+ "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
"[$t, \\{$x, $y\\}];",
[]>;
}
@@ -7172,12 +7090,12 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
class PTX_READ_SREG_R64<string regname, Intrinsic intop>
: NVPTXInst<(outs Int64Regs:$d), (ins),
- !strconcat(!strconcat("mov.u64\t$d, %", regname), ";"),
+ !strconcat("mov.u64 \t$d, %", regname, ";"),
[(set Int64Regs:$d, (intop))]>;
class PTX_READ_SREG_R32<string regname, Intrinsic intop>
: NVPTXInst<(outs Int32Regs:$d), (ins),
- !strconcat(!strconcat("mov.u32\t$d, %", regname), ";"),
+ !strconcat("mov.u32 \t$d, %", regname, ";"),
[(set Int32Regs:$d, (intop))]>;
// TODO Add read vector-version of special registers
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index b925b632ee4a..3be291b48b8f 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
#define DEBUG_TYPE "nvptx"
@@ -54,188 +55,6 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
char NVPTXLowerAggrCopies::ID = 0;
-// Lower memcpy to loop.
-void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr,
- Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
- bool DstIsVolatile, LLVMContext &Context,
- Function &F) {
- Type *TypeOfCopyLen = CopyLen->getType();
-
- BasicBlock *OrigBB = ConvertedInst->getParent();
- BasicBlock *NewBB =
- ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
- BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
-
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
- IRBuilder<> Builder(OrigBB->getTerminator());
-
- // SrcAddr and DstAddr are expected to be pointer types,
- // so no check is made here.
- unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
- unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
-
- // Cast pointers to (char *)
- SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
- DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
-
- // load from SrcAddr+LoopIndex
- // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
- // word-sized loads and stores.
- Value *Element =
- LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
- LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
- SrcIsVolatile);
- // store at DstAddr+LoopIndex
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
- DstAddr, LoopIndex),
- DstIsVolatile);
-
- // The value for LoopIndex coming from backedge is (LoopIndex + 1)
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
-}
-
-// Lower memmove to IR. memmove is required to correctly copy overlapping memory
-// regions; therefore, it has to check the relative positions of the source and
-// destination pointers and choose the copy direction accordingly.
-//
-// The code below is an IR rendition of this C function:
-//
-// void* memmove(void* dst, const void* src, size_t n) {
-// unsigned char* d = dst;
-// const unsigned char* s = src;
-// if (s < d) {
-// // copy backwards
-// while (n--) {
-// d[n] = s[n];
-// }
-// } else {
-// // copy forward
-// for (size_t i = 0; i < n; ++i) {
-// d[i] = s[i];
-// }
-// }
-// return dst;
-// }
-void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr,
- Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
- bool DstIsVolatile, LLVMContext &Context,
- Function &F) {
- Type *TypeOfCopyLen = CopyLen->getType();
- BasicBlock *OrigBB = ConvertedInst->getParent();
-
- // Create the a comparison of src and dst, based on which we jump to either
- // the forward-copy part of the function (if src >= dst) or the backwards-copy
- // part (if src < dst).
- // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
- // structure. Its block terminators (unconditional branches) are replaced by
- // the appropriate conditional branches when the loop is built.
- ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT,
- SrcAddr, DstAddr, "compare_src_dst");
- TerminatorInst *ThenTerm, *ElseTerm;
- SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm,
- &ElseTerm);
-
- // Each part of the function consists of two blocks:
- // copy_backwards: used to skip the loop when n == 0
- // copy_backwards_loop: the actual backwards loop BB
- // copy_forward: used to skip the loop when n == 0
- // copy_forward_loop: the actual forward loop BB
- BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
- CopyBackwardsBB->setName("copy_backwards");
- BasicBlock *CopyForwardBB = ElseTerm->getParent();
- CopyForwardBB->setName("copy_forward");
- BasicBlock *ExitBB = ConvertedInst->getParent();
- ExitBB->setName("memmove_done");
-
- // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
- // between both backwards and forward copy clauses.
- ICmpInst *CompareN =
- new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
- ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
-
- // Copying backwards.
- BasicBlock *LoopBB =
- BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB);
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
- Value *IndexPtr = LoopBuilder.CreateSub(
- LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
- Value *Element = LoopBuilder.CreateLoad(
- LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
- LoopBuilder.CreateStore(Element,
- LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
- LoopBuilder.CreateCondBr(
- LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
- ExitBB, LoopBB);
- LoopPhi->addIncoming(IndexPtr, LoopBB);
- LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
- BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
- ThenTerm->eraseFromParent();
-
- // Copying forward.
- BasicBlock *FwdLoopBB =
- BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB);
- IRBuilder<> FwdLoopBuilder(FwdLoopBB);
- PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
- Value *FwdElement = FwdLoopBuilder.CreateLoad(
- FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
- FwdLoopBuilder.CreateStore(
- FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
- Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
- FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
- FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
- ExitBB, FwdLoopBB);
- FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
- FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
-
- BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
- ElseTerm->eraseFromParent();
-}
-
-// Lower memset to loop.
-void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr,
- Value *CopyLen, Value *SetValue, LLVMContext &Context,
- Function &F) {
- BasicBlock *OrigBB = ConvertedInst->getParent();
- BasicBlock *NewBB =
- ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
- BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
-
- OrigBB->getTerminator()->setSuccessor(0, LoopBB);
- IRBuilder<> Builder(OrigBB->getTerminator());
-
- // Cast pointer to the type of value getting stored
- unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
- DstAddr = Builder.CreateBitCast(DstAddr,
- PointerType::get(SetValue->getType(), dstAS));
-
- IRBuilder<> LoopBuilder(LoopBB);
- PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
- LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
-
- LoopBuilder.CreateStore(
- SetValue,
- LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
- false);
-
- Value *NewIndex =
- LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
- LoopIndex->addIncoming(NewIndex, LoopBB);
-
- LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
- NewBB);
-}
-
bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
SmallVector<LoadInst *, 4> AggrLoads;
SmallVector<MemIntrinsic *, 4> MemCalls;
@@ -287,13 +106,13 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
- convertMemCpyToLoop(/* ConvertedInst */ SI,
- /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
- /* CopyLen */ CopyLen,
- /* SrcIsVolatile */ LI->isVolatile(),
- /* DstIsVolatile */ SI->isVolatile(),
- /* Context */ Context,
- /* Function F */ F);
+ createMemCpyLoop(/* ConvertedInst */ SI,
+ /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+ /* CopyLen */ CopyLen,
+ /* SrcAlign */ LI->getAlignment(),
+ /* DestAlign */ SI->getAlignment(),
+ /* SrcIsVolatile */ LI->isVolatile(),
+ /* DstIsVolatile */ SI->isVolatile());
SI->eraseFromParent();
LI->eraseFromParent();
@@ -302,31 +121,11 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
// Transform mem* intrinsic calls.
for (MemIntrinsic *MemCall : MemCalls) {
if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
- convertMemCpyToLoop(/* ConvertedInst */ Memcpy,
- /* SrcAddr */ Memcpy->getRawSource(),
- /* DstAddr */ Memcpy->getRawDest(),
- /* CopyLen */ Memcpy->getLength(),
- /* SrcIsVolatile */ Memcpy->isVolatile(),
- /* DstIsVolatile */ Memcpy->isVolatile(),
- /* Context */ Context,
- /* Function F */ F);
+ expandMemCpyAsLoop(Memcpy);
} else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
- convertMemMoveToLoop(/* ConvertedInst */ Memmove,
- /* SrcAddr */ Memmove->getRawSource(),
- /* DstAddr */ Memmove->getRawDest(),
- /* CopyLen */ Memmove->getLength(),
- /* SrcIsVolatile */ Memmove->isVolatile(),
- /* DstIsVolatile */ Memmove->isVolatile(),
- /* Context */ Context,
- /* Function F */ F);
-
+ expandMemMoveAsLoop(Memmove);
} else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
- convertMemSetToLoop(/* ConvertedInst */ Memset,
- /* DstAddr */ Memset->getRawDest(),
- /* CopyLen */ Memset->getLength(),
- /* SetValue */ Memset->getValue(),
- /* Context */ Context,
- /* Function F */ F);
+ expandMemSetAsLoop(Memset);
}
MemCall->eraseFromParent();
}
diff --git a/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 3f0c7be7863d..5b626cbcd5ba 100644
--- a/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -159,7 +159,8 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
assert(PType && "Expecting pointer type in handleByValParam");
Type *StructType = PType->getElementType();
- AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
+ unsigned AS = Func->getParent()->getDataLayout().getAllocaAddrSpace();
+ AllocaInst *AllocA = new AllocaInst(StructType, AS, Arg->getName(), FirstInst);
// Set the alignment to alignment of the byval parameter. This is because,
// later load/stores assume that alignment, and we are going to replace
// the use of the byval parameter with this alloca instruction.
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index eab5ee80561e..86a28f7d0700 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -27,6 +27,13 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
switch (Kind) {
default: llvm_unreachable("Invalid kind!");
+ case VK_NVPTX_HALF_PREC_FLOAT:
+ // ptxas does not have a way to specify half-precision floats.
+ // Instead we have to print and load fp16 constants as .b16
+ OS << "0x";
+ NumHex = 4;
+ APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ break;
case VK_NVPTX_SINGLE_PREC_FLOAT:
OS << "0f";
NumHex = 8;
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 7f833c42fa8f..95741d9b0451 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -22,8 +22,9 @@ class NVPTXFloatMCExpr : public MCTargetExpr {
public:
enum VariantKind {
VK_NVPTX_None,
- VK_NVPTX_SINGLE_PREC_FLOAT, // FP constant in single-precision
- VK_NVPTX_DOUBLE_PREC_FLOAT // FP constant in double-precision
+ VK_NVPTX_HALF_PREC_FLOAT, // FP constant in half-precision
+ VK_NVPTX_SINGLE_PREC_FLOAT, // FP constant in single-precision
+ VK_NVPTX_DOUBLE_PREC_FLOAT // FP constant in double-precision
};
private:
@@ -40,6 +41,11 @@ public:
static const NVPTXFloatMCExpr *create(VariantKind Kind, const APFloat &Flt,
MCContext &Ctx);
+ static const NVPTXFloatMCExpr *createConstantFPHalf(const APFloat &Flt,
+ MCContext &Ctx) {
+ return create(VK_NVPTX_HALF_PREC_FLOAT, Flt, Ctx);
+ }
+
static const NVPTXFloatMCExpr *createConstantFPSingle(const APFloat &Flt,
MCContext &Ctx) {
return create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx);
diff --git a/lib/Target/NVPTX/NVPTXPeephole.cpp b/lib/Target/NVPTX/NVPTXPeephole.cpp
index 49e639793efc..e10b046f7c97 100644
--- a/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -113,7 +113,7 @@ static void CombineCVTAToLocal(MachineInstr &Root) {
BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
Root.getOperand(0).getReg())
.addReg(NVPTX::VRFrameLocal)
- .addOperand(Prev.getOperand(2));
+ .add(Prev.getOperand(2));
MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6cbf0604d7ef..8d46694fbe50 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -27,12 +27,19 @@ using namespace llvm;
namespace llvm {
std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
- if (RC == &NVPTX::Float32RegsRegClass) {
+ if (RC == &NVPTX::Float32RegsRegClass)
return ".f32";
- }
- if (RC == &NVPTX::Float64RegsRegClass) {
+ if (RC == &NVPTX::Float16RegsRegClass)
+ // Ideally fp16 registers should be .f16, but this syntax is only
+ // supported on sm_53+. On the other hand, .b16 registers are
+ // accepted for all supported fp16 instructions on all GPU
+ // variants, so we can use them instead.
+ return ".b16";
+ if (RC == &NVPTX::Float16x2RegsRegClass)
+ return ".b32";
+ if (RC == &NVPTX::Float64RegsRegClass)
return ".f64";
- } else if (RC == &NVPTX::Int64RegsRegClass) {
+ if (RC == &NVPTX::Int64RegsRegClass)
// We use untyped (.b) integer registers here as NVCC does.
// Correctness of generated code does not depend on register type,
// but using .s/.u registers runs into ptxas bug that prevents
@@ -52,40 +59,37 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
// add.f16v2 rb32,rb32,rb32; // OK
// add.f16v2 rs32,rs32,rs32; // OK
return ".b64";
- } else if (RC == &NVPTX::Int32RegsRegClass) {
+ if (RC == &NVPTX::Int32RegsRegClass)
return ".b32";
- } else if (RC == &NVPTX::Int16RegsRegClass) {
+ if (RC == &NVPTX::Int16RegsRegClass)
return ".b16";
- } else if (RC == &NVPTX::Int1RegsRegClass) {
+ if (RC == &NVPTX::Int1RegsRegClass)
return ".pred";
- } else if (RC == &NVPTX::SpecialRegsRegClass) {
+ if (RC == &NVPTX::SpecialRegsRegClass)
return "!Special!";
- } else {
- return "INTERNAL";
- }
- return "";
+ return "INTERNAL";
}
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
- if (RC == &NVPTX::Float32RegsRegClass) {
+ if (RC == &NVPTX::Float32RegsRegClass)
return "%f";
- }
- if (RC == &NVPTX::Float64RegsRegClass) {
+ if (RC == &NVPTX::Float16RegsRegClass)
+ return "%h";
+ if (RC == &NVPTX::Float16x2RegsRegClass)
+ return "%hh";
+ if (RC == &NVPTX::Float64RegsRegClass)
return "%fd";
- } else if (RC == &NVPTX::Int64RegsRegClass) {
+ if (RC == &NVPTX::Int64RegsRegClass)
return "%rd";
- } else if (RC == &NVPTX::Int32RegsRegClass) {
+ if (RC == &NVPTX::Int32RegsRegClass)
return "%r";
- } else if (RC == &NVPTX::Int16RegsRegClass) {
+ if (RC == &NVPTX::Int16RegsRegClass)
return "%rs";
- } else if (RC == &NVPTX::Int1RegsRegClass) {
+ if (RC == &NVPTX::Int1RegsRegClass)
return "%p";
- } else if (RC == &NVPTX::SpecialRegsRegClass) {
+ if (RC == &NVPTX::SpecialRegsRegClass)
return "!Special!";
- } else {
- return "INTERNAL";
- }
- return "";
+ return "INTERNAL";
}
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index ff6ccc457db7..f04764a9e9a3 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -36,6 +36,8 @@ foreach i = 0-4 in {
def RS#i : NVPTXReg<"%rs"#i>; // 16-bit
def R#i : NVPTXReg<"%r"#i>; // 32-bit
def RL#i : NVPTXReg<"%rd"#i>; // 64-bit
+ def H#i : NVPTXReg<"%h"#i>; // 16-bit float
+ def HH#i : NVPTXReg<"%hh"#i>; // 2x16-bit float
def F#i : NVPTXReg<"%f"#i>; // 32-bit float
def FL#i : NVPTXReg<"%fd"#i>; // 64-bit float
@@ -57,6 +59,8 @@ def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>;
def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4))>;
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4))>;
+def Float16Regs : NVPTXRegClass<[f16], 16, (add (sequence "H%u", 0, 4))>;
+def Float16x2Regs : NVPTXRegClass<[v2f16], 32, (add (sequence "HH%u", 0, 4))>;
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>;
def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>;
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index b0472de980fc..d736eaa41301 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -31,7 +31,7 @@ public:
/// Override this as NVPTX has its own way of printing switching
/// to a section.
- void PrintSwitchToSection(const MCAsmInfo &MAI,
+ void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,
const MCExpr *Subsection) const override {}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 6e1f427ed021..acbee86ae386 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -23,6 +23,11 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "NVPTXGenSubtargetInfo.inc"
+static cl::opt<bool>
+ NoF16Math("nvptx-no-f16-math", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
+ cl::init(false));
+
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
@@ -57,3 +62,7 @@ bool NVPTXSubtarget::hasImageHandles() const {
// Disabled, otherwise
return false;
}
+
+bool NVPTXSubtarget::allowFP16Math() const {
+ return hasFP16Math() && NoF16Math == false;
+}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index da020a94bcdd..96618cf46373 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -101,6 +101,8 @@ public:
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
bool hasImageHandles() const;
+ bool hasFP16Math() const { return SmVersion >= 53; }
+ bool allowFP16Math() const;
unsigned int getSmVersion() const { return SmVersion; }
std::string getTargetName() const { return TargetName; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index eb357e0a4d50..ab5298d0dcfd 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Vectorize.h"
@@ -50,7 +51,6 @@ void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
-void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
@@ -70,7 +70,6 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeGenericToNVVMPass(PR);
initializeNVPTXAllocaHoistingPass(PR);
initializeNVPTXAssignValidGlobalNamesPass(PR);
- initializeNVPTXInferAddressSpacesPass(PR);
initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
@@ -167,9 +166,13 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
return new NVPTXPassConfig(this, PM);
}
-void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
- PM.add(createNVVMReflectPass());
- PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
+void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
+ Builder.addExtension(
+ PassManagerBuilder::EP_EarlyAsPossible,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ PM.add(createNVVMReflectPass());
+ PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
+ });
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
@@ -190,7 +193,7 @@ void NVPTXPassConfig::addAddressSpaceInferencePasses() {
// be eliminated by SROA.
addPass(createSROAPass());
addPass(createNVPTXLowerAllocaPass());
- addPass(createNVPTXInferAddressSpacesPass());
+ addPass(createInferAddressSpacesPass());
}
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 78a053831772..1ed8e3b1e935 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -61,7 +61,8 @@ public:
return TLOF.get();
}
- void addEarlyAsPossiblePasses(PassManagerBase &PM) override;
+ void adjustPassManager(PassManagerBuilder &) override;
+
TargetIRAnalysis getTargetIRAnalysis() override;
}; // NVPTXTargetMachine.
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index b6c271ae4cbc..03075b550429 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -45,6 +45,10 @@ public:
bool isSourceOfDivergence(const Value *V);
+ unsigned getFlatAddressSpace() const {
+ return AddressSpace::ADDRESS_SPACE_GENERIC;
+ }
+
// Increase the inlining cost threshold by a factor of 5, reflecting that
// calls are particularly expensive in NVPTX.
unsigned getInliningThresholdMultiplier() { return 5; }
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index c639c4dc0683..152b665d0fdc 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -10,11 +10,10 @@
// This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect
// with an integer.
//
-// We choose the value we use by looking, in this order, at:
-//
-// * the -nvvm-reflect-list flag, which has the format "foo=1,bar=42",
-// * the StringMap passed to the pass's constructor, and
-// * metadata in the module itself.
+// We choose the value we use by looking at metadata in the module itself. Note
+// that we intentionally only have one way to choose these values, because other
+// parts of LLVM (particularly, InstCombineCall) rely on being able to predict
+// the values chosen by this pass.
//
// If we see an unknown string, we replace its call with 0.
//
@@ -49,30 +48,17 @@ namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
namespace {
class NVVMReflect : public FunctionPass {
-private:
- StringMap<int> VarMap;
-
public:
static char ID;
- NVVMReflect() : NVVMReflect(StringMap<int>()) {}
-
- NVVMReflect(const StringMap<int> &Mapping)
- : FunctionPass(ID), VarMap(Mapping) {
+ NVVMReflect() : FunctionPass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- setVarMap();
}
bool runOnFunction(Function &) override;
-
-private:
- void setVarMap();
};
}
FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
-FunctionPass *llvm::createNVVMReflectPass(const StringMap<int> &Mapping) {
- return new NVVMReflect(Mapping);
-}
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -83,35 +69,6 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
false)
-static cl::list<std::string>
-ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
- cl::desc("A list of string=num assignments"),
- cl::ValueRequired);
-
-/// The command line can look as follows :
-/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
-/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
-/// ReflectList vector. First, each of ReflectList[i] is 'split'
-/// using "," as the delimiter. Then each of this part is split
-/// using "=" as the delimiter.
-void NVVMReflect::setVarMap() {
- for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
- DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
- SmallVector<StringRef, 4> NameValList;
- StringRef(ReflectList[i]).split(NameValList, ',');
- for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
- SmallVector<StringRef, 2> NameValPair;
- NameValList[j].split(NameValPair, '=');
- assert(NameValPair.size() == 2 && "name=val expected");
- std::stringstream ValStream(NameValPair[1]);
- int Val;
- ValStream >> Val;
- assert((!(ValStream.fail())) && "integer value expected");
- VarMap[NameValPair[0]] = Val;
- }
- }
-}
-
bool NVVMReflect::runOnFunction(Function &F) {
if (!NVVMReflectEnabled)
return false;
@@ -199,11 +156,10 @@ bool NVVMReflect::runOnFunction(Function &F) {
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
int ReflectVal = 0; // The default value is 0
- auto Iter = VarMap.find(ReflectArg);
- if (Iter != VarMap.end())
- ReflectVal = Iter->second;
- else if (ReflectArg == "__CUDA_FTZ") {
- // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.
+ if (ReflectArg == "__CUDA_FTZ") {
+ // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. Our
+ // choice here must be kept in sync with AutoUpgrade, which uses the same
+ // technique to detect whether ftz is enabled.
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
ReflectVal = Flag->getSExtValue();
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 4842c3b7a656..7ca4c1999003 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -40,6 +40,7 @@ add_llvm_target(PowerPCCodeGen
PPCVSXCopy.cpp
PPCVSXFMAMutate.cpp
PPCVSXSwapRemoval.cpp
+ PPCExpandISEL.cpp
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 5847b3a52bfc..4863ac542736 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -114,7 +114,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 017d21af08a8..a00b56af0490 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -11,22 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOpcodes.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
@@ -34,10 +40,8 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
-class PPCMCCodeEmitter : public MCCodeEmitter {
- PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
- void operator=(const PPCMCCodeEmitter &) = delete;
+class PPCMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
const MCContext &CTX;
bool IsLittleEndian;
@@ -46,8 +50,9 @@ public:
PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), CTX(ctx),
IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
-
- ~PPCMCCodeEmitter() override {}
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) = delete;
+ void operator=(const PPCMCCodeEmitter &) = delete;
+ ~PPCMCCodeEmitter() override = default;
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -103,6 +108,7 @@ public:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override {
@@ -137,7 +143,7 @@ public:
}
break;
default:
- llvm_unreachable ("Invalid instruction size");
+ llvm_unreachable("Invalid instruction size");
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
@@ -238,7 +244,6 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return RegBits;
}
-
unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -286,7 +291,6 @@ unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -302,7 +306,6 @@ unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI)
@@ -318,7 +321,6 @@ unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
return reverseBits(Imm | RegBits) >> 22;
}
-
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -383,7 +385,5 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
-
-
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "PPCGenMCCodeEmitter.inc"
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index bbd10e5b260f..2d686f227919 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -11,22 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
-#include "PPCMCAsmInfo.h"
+#include "MCTargetDesc/PPCMCAsmInfo.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPCTargetStreamer.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -41,9 +48,10 @@ using namespace llvm;
#include "PPCGenRegisterInfo.inc"
// Pin the vtable to this file.
-PPCTargetStreamer::~PPCTargetStreamer() {}
PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+PPCTargetStreamer::~PPCTargetStreamer() = default;
+
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPPCMCInstrInfo(X);
@@ -96,12 +104,14 @@ static void adjustCodeGenOpts(const Triple &TT, Reloc::Model RM,
}
namespace {
+
class PPCTargetAsmStreamer : public PPCTargetStreamer {
formatted_raw_ostream &OS;
public:
PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
: PPCTargetStreamer(S), OS(OS) {}
+
void emitTCEntry(const MCSymbol &S) override {
OS << "\t.tc ";
OS << S.getName();
@@ -109,12 +119,15 @@ public:
OS << S.getName();
OS << '\n';
}
+
void emitMachine(StringRef CPU) override {
OS << "\t.machine " << CPU << '\n';
}
+
void emitAbiVersion(int AbiVersion) override {
OS << "\t.abiversion " << AbiVersion << '\n';
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
@@ -129,18 +142,22 @@ public:
class PPCTargetELFStreamer : public PPCTargetStreamer {
public:
PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
MCELFStreamer &getStreamer() {
return static_cast<MCELFStreamer &>(Streamer);
}
+
void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
Streamer.EmitValueToAlignment(8);
Streamer.EmitSymbolValue(&S, 8);
}
+
void emitMachine(StringRef CPU) override {
// FIXME: Is there anything to do in here or does this directive only
// limit the parser?
}
+
void emitAbiVersion(int AbiVersion) override {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned Flags = MCA.getELFHeaderEFlags();
@@ -148,6 +165,7 @@ public:
Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
MCA.setELFHeaderEFlags(Flags);
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
MCAssembler &MCA = getStreamer().getAssembler();
@@ -170,6 +188,7 @@ public:
if ((Flags & ELF::EF_PPC64_ABI) == 0)
MCA.setELFHeaderEFlags(Flags | 2);
}
+
void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
auto *Symbol = cast<MCSymbolELF>(S);
// When encoding an assignment to set symbol A to symbol B, also copy
@@ -188,21 +207,26 @@ public:
class PPCTargetMachOStreamer : public PPCTargetStreamer {
public:
PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+
void emitTCEntry(const MCSymbol &S) override {
llvm_unreachable("Unknown pseudo-op: .tc");
}
+
void emitMachine(StringRef CPU) override {
// FIXME: We should update the CPUType, CPUSubType in the Object file if
// the new values are different from the defaults.
}
+
void emitAbiVersion(int AbiVersion) override {
llvm_unreachable("Unknown pseudo-op: .abiversion");
}
+
void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) override {
llvm_unreachable("Unknown pseudo-op: .localentry");
}
};
-}
+
+} // end anonymous namespace
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 0989e0c8e268..893233ee2300 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -17,23 +17,22 @@
// GCC #defines PPC on Linux but we use it as our namespace name
#undef PPC
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MathExtras.h"
+#include <cstdint>
namespace llvm {
+
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
class MCObjectWriter;
class MCRegisterInfo;
-class MCSubtargetInfo;
class MCTargetOptions;
class Target;
class Triple;
class StringRef;
class raw_pwrite_stream;
-class raw_ostream;
Target &getThePPC32Target();
Target &getThePPC64Target();
@@ -83,7 +82,7 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}
-} // End llvm namespace
+} // end namespace llvm
// Generated files will use "namespace PPC". To avoid symbol clash,
// undefine PPC here. PPC may be predefined on some hosts.
@@ -103,4 +102,4 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
#define GET_SUBTARGETINFO_ENUM
#include "PPCGenSubtargetInfo.inc"
-#endif
+#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index e01f49dce81e..38ae62b26757 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -45,11 +45,13 @@ namespace llvm {
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
FunctionPass *createPPCBoolRetToIntPass();
+ FunctionPass *createPPCExpandISELPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
void initializePPCVSXFMAMutatePass(PassRegistry&);
void initializePPCBoolRetToIntPass(PassRegistry&);
+ void initializePPCExpandISELPass(PassRegistry &);
extern char &PPCVSXFMAMutateID;
namespace PPCII {
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f0e0ebc4946c..1f181d007f63 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -112,7 +112,9 @@ public:
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
- return AsmPrinter::runOnMachineFunction(MF);
+ bool Changed = AsmPrinter::runOnMachineFunction(MF);
+ emitXRayTable();
+ return Changed;
}
};
@@ -134,6 +136,7 @@ public:
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
+ void EmitInstruction(const MachineInstr *MI) override;
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@@ -402,7 +405,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
.addImm(CallTarget & 0xFFFF));
// Save the current TOC pointer before the remote call.
- int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+ int TOCSaveOffset = Subtarget->getFrameLowering()->getTOCSaveOffset();
EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
.addReg(PPC::X2)
.addImm(TOCSaveOffset)
@@ -1046,6 +1049,97 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
}
+void PPCLinuxAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ if (!Subtarget->isPPC64())
+ return PPCAsmPrinter::EmitInstruction(MI);
+
+ switch (MI->getOpcode()) {
+ default:
+ return PPCAsmPrinter::EmitInstruction(MI);
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
+ // .begin:
+ // b .end # lis 0, FuncId[16..32]
+ // nop # li 0, FuncId[0..15]
+ // std 0, -8(1)
+ // mflr 0
+ // bl __xray_FunctionEntry
+ // mtlr 0
+ // .end:
+ //
+ // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
+ // of instructions change.
+ MCSymbol *BeginOfSled = OutContext.createTempSymbol();
+ MCSymbol *EndOfSled = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(BeginOfSled);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::B).addExpr(
+ MCSymbolRefExpr::create(EndOfSled, OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL8_NOP)
+ .addExpr(MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol("__xray_FunctionEntry"),
+ OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
+ OutStreamer->EmitLabel(EndOfSled);
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_ENTER);
+ break;
+ }
+ case TargetOpcode::PATCHABLE_FUNCTION_EXIT: {
+ // .p2align 3
+ // .begin:
+ // b(lr)? # lis 0, FuncId[16..32]
+ // nop # li 0, FuncId[0..15]
+ // std 0, -8(1)
+ // mflr 0
+ // bl __xray_FunctionExit
+ // mtlr 0
+ // .end:
+ // b(lr)?
+ //
+ // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
+ // of instructions change.
+ const MachineInstr *Next = [&] {
+ MachineBasicBlock::const_iterator It(MI);
+ assert(It != MI->getParent()->end());
+ ++It;
+ assert(It->isReturn());
+ return &*It;
+ }();
+ OutStreamer->EmitCodeAlignment(8);
+ MCSymbol *BeginOfSled = OutContext.createTempSymbol();
+ OutStreamer->EmitLabel(BeginOfSled);
+ MCInst TmpInst;
+ LowerPPCMachineInstrToMCInst(Next, TmpInst, *this, false);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(PPC::STD).addReg(PPC::X0).addImm(-8).addReg(PPC::X1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MFLR8).addReg(PPC::X0));
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(PPC::BL8_NOP)
+ .addExpr(MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol("__xray_FunctionExit"),
+ OutContext)));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTLR8).addReg(PPC::X0));
+ recordSled(BeginOfSled, *MI, SledKind::FUNCTION_EXIT);
+ break;
+ }
+ case TargetOpcode::PATCHABLE_TAIL_CALL:
+ case TargetOpcode::PATCHABLE_RET:
+ // PPC's tail call instruction, e.g. PPC::TCRETURNdi8, doesn't really
+ // lower to a PPC::B instruction. The PPC::B instruction is generated
+ // before it, and handled by the normal case.
+ llvm_unreachable("Tail call is handled in the normal case. See comments"
+ "around this assert.");
+ }
+}
+
void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (static_cast<const PPCTargetMachine &>(TM).isELFv2ABI()) {
PPCTargetStreamer *TS =
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index ae76386fdfb6..b7d3154d0000 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -78,7 +78,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
BlockSizes.resize(Fn.getNumBlockIDs());
auto GetAlignmentAdjustment =
- [TII](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
+ [](MachineBasicBlock &MBB, unsigned Offset) -> unsigned {
unsigned Align = MBB.getAlignment();
if (!Align)
return 0;
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2c62a0f1d909..70c4170653ae 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -298,15 +298,17 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
return true;
else
continue; // ISD::FCOPYSIGN is never a library call.
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- case Intrinsic::round: Opcode = ISD::FROUND; break;
- case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
- case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
+ case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
+ case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
+ case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
}
}
@@ -315,7 +317,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
// (i.e. soft float or atomics). If adapting for targets that do,
// additional care is required here.
- LibFunc::Func Func;
+ LibFunc Func;
if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
LibInfo->getLibFunc(F->getName(), Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
@@ -329,50 +331,50 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
switch (Func) {
default: return true;
- case LibFunc::copysign:
- case LibFunc::copysignf:
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
continue; // ISD::FCOPYSIGN is never a library call.
- case LibFunc::copysignl:
+ case LibFunc_copysignl:
return true;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
continue; // ISD::FABS is never a library call.
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
Opcode = ISD::FSQRT; break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
Opcode = ISD::FFLOOR; break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
Opcode = ISD::FNEARBYINT; break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
Opcode = ISD::FCEIL; break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
Opcode = ISD::FRINT; break;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
Opcode = ISD::FROUND; break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
Opcode = ISD::FTRUNC; break;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
Opcode = ISD::FMINNUM; break;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
Opcode = ISD::FMAXNUM; break;
}
}
diff --git a/lib/Target/PowerPC/PPCExpandISEL.cpp b/lib/Target/PowerPC/PPCExpandISEL.cpp
new file mode 100644
index 000000000000..ebd414baf1d2
--- /dev/null
+++ b/lib/Target/PowerPC/PPCExpandISEL.cpp
@@ -0,0 +1,458 @@
+//===------------- PPCExpandISEL.cpp - Expand ISEL instruction ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that expands the ISEL instruction into an if-then-else sequence.
+// This pass must be run post-RA since all operands must be physical registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-expand-isel"
+
+STATISTIC(NumExpanded, "Number of ISEL instructions expanded");
+STATISTIC(NumRemoved, "Number of ISEL instructions removed");
+STATISTIC(NumFolded, "Number of ISEL instructions folded");
+
+// If -ppc-gen-isel=false is set, we will disable generating the ISEL
+// instruction on all PPC targets. Otherwise, if the user set option
+// -misel or the platform supports ISEL by default, still generate the
+// ISEL instruction, else expand it.
+static cl::opt<bool>
+ GenerateISEL("ppc-gen-isel",
+ cl::desc("Enable generating the ISEL instruction."),
+ cl::init(true), cl::Hidden);
+
+namespace {
+class PPCExpandISEL : public MachineFunctionPass {
+ DebugLoc dl;
+ MachineFunction *MF;
+ const TargetInstrInfo *TII;
+ bool IsTrueBlockRequired;
+ bool IsFalseBlockRequired;
+ MachineBasicBlock *TrueBlock;
+ MachineBasicBlock *FalseBlock;
+ MachineBasicBlock *NewSuccessor;
+ MachineBasicBlock::iterator TrueBlockI;
+ MachineBasicBlock::iterator FalseBlockI;
+
+ typedef SmallVector<MachineInstr *, 4> BlockISELList;
+ typedef SmallDenseMap<int, BlockISELList> ISELInstructionList;
+
+ // A map of MBB numbers to their lists of contained ISEL instructions.
+ ISELInstructionList ISELInstructions;
+
+ /// Initialize the object.
+ void initialize(MachineFunction &MFParam);
+
+ void handleSpecialCases(BlockISELList &BIL, MachineBasicBlock *MBB);
+ void reorganizeBlockLayout(BlockISELList &BIL, MachineBasicBlock *MBB);
+ void populateBlocks(BlockISELList &BIL);
+ void expandMergeableISELs(BlockISELList &BIL);
+ void expandAndMergeISELs();
+
+ bool canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI);
+
+ /// Is this instruction an ISEL or ISEL8?
+ static bool isISEL(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::ISEL || MI.getOpcode() == PPC::ISEL8);
+ }
+
+ /// Is this instruction an ISEL8?
+ static bool isISEL8(const MachineInstr &MI) {
+ return (MI.getOpcode() == PPC::ISEL8);
+ }
+
+ /// Are the two operands using the same register?
+ bool useSameRegister(const MachineOperand &Op1, const MachineOperand &Op2) {
+ return (Op1.getReg() == Op2.getReg());
+ }
+
+ ///
+ /// Collect all ISEL instructions from the current function.
+ ///
+ /// Walk the current function and collect all the ISEL instructions that are
+ /// found. The instructions are placed in the ISELInstructions vector.
+ ///
+ /// \return true if any ISEL instructions were found, false otherwise
+ ///
+ bool collectISELInstructions();
+
+public:
+ static char ID;
+ PPCExpandISEL() : MachineFunctionPass(ID) {
+ initializePPCExpandISELPass(*PassRegistry::getPassRegistry());
+ }
+
+ ///
+ /// Determine whether to generate the ISEL instruction or expand it.
+ ///
+ /// Expand ISEL instruction into if-then-else sequence when one of
+ /// the following two conditions hold:
+ /// (1) -ppc-gen-isel=false
+ /// (2) hasISEL() return false
+ /// Otherwise, still generate ISEL instruction.
+ /// The -ppc-gen-isel option is set to true by default. Which means the ISEL
+ /// instruction is still generated by default on targets that support them.
+ ///
+ /// \return true if ISEL should be expanded into if-then-else code sequence;
+ /// false if ISEL instruction should be generated, i.e. not expaned.
+ ///
+ static bool isExpandISELEnabled(const MachineFunction &MF);
+
+#ifndef NDEBUG
+ void DumpISELInstructions() const;
+#endif
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!isExpandISELEnabled(MF))
+ return false;
+
+ DEBUG(dbgs() << "Function: "; MF.dump(); dbgs() << "\n");
+ initialize(MF);
+
+ if (!collectISELInstructions()) {
+ DEBUG(dbgs() << "No ISEL instructions in this function\n");
+ return false;
+ }
+
+#ifndef NDEBUG
+ DumpISELInstructions();
+#endif
+
+ expandAndMergeISELs();
+
+ return true;
+ }
+};
+} // end anonymous namespace
+
+void PPCExpandISEL::initialize(MachineFunction &MFParam) {
+ MF = &MFParam;
+ TII = MF->getSubtarget().getInstrInfo();
+ ISELInstructions.clear();
+}
+
+bool PPCExpandISEL::isExpandISELEnabled(const MachineFunction &MF) {
+ return !GenerateISEL || !MF.getSubtarget<PPCSubtarget>().hasISEL();
+}
+
+bool PPCExpandISEL::collectISELInstructions() {
+ for (MachineBasicBlock &MBB : *MF) {
+ BlockISELList thisBlockISELs;
+ for (MachineInstr &MI : MBB)
+ if (isISEL(MI))
+ thisBlockISELs.push_back(&MI);
+ if (!thisBlockISELs.empty())
+ ISELInstructions.insert(std::make_pair(MBB.getNumber(), thisBlockISELs));
+ }
+ return !ISELInstructions.empty();
+}
+
+#ifndef NDEBUG
+void PPCExpandISEL::DumpISELInstructions() const {
+ for (const auto &I : ISELInstructions) {
+ DEBUG(dbgs() << "BB#" << I.first << ":\n");
+ for (const auto &VI : I.second)
+ DEBUG(dbgs() << " "; VI->print(dbgs()));
+ }
+}
+#endif
+
+/// Contiguous ISELs that have the same condition can be merged.
+bool PPCExpandISEL::canMerge(MachineInstr *PrevPushedMI, MachineInstr *MI) {
+ // Same Condition Register?
+ if (!useSameRegister(PrevPushedMI->getOperand(3), MI->getOperand(3)))
+ return false;
+
+ MachineBasicBlock::iterator PrevPushedMBBI = *PrevPushedMI;
+ MachineBasicBlock::iterator MBBI = *MI;
+ return (std::prev(MBBI) == PrevPushedMBBI); // Contiguous ISELs?
+}
+
+void PPCExpandISEL::expandAndMergeISELs() {
+ for (auto &BlockList : ISELInstructions) {
+ DEBUG(dbgs() << "Expanding ISEL instructions in BB#" << BlockList.first
+ << "\n");
+
+ BlockISELList &CurrentISELList = BlockList.second;
+ auto I = CurrentISELList.begin();
+ auto E = CurrentISELList.end();
+
+ while (I != E) {
+ BlockISELList SubISELList;
+
+ SubISELList.push_back(*I++);
+
+ // Collect the ISELs that can be merged together.
+ while (I != E && canMerge(SubISELList.back(), *I))
+ SubISELList.push_back(*I++);
+
+ expandMergeableISELs(SubISELList);
+ }
+ }
+}
+
+void PPCExpandISEL::handleSpecialCases(BlockISELList &BIL,
+ MachineBasicBlock *MBB) {
+ IsTrueBlockRequired = false;
+ IsFalseBlockRequired = false;
+
+ auto MI = BIL.begin();
+ while (MI != BIL.end()) {
+ assert(isISEL(**MI) && "Expecting an ISEL instruction");
+ DEBUG(dbgs() << "ISEL: " << **MI << "\n");
+
+ MachineOperand &Dest = (*MI)->getOperand(0);
+ MachineOperand &TrueValue = (*MI)->getOperand(1);
+ MachineOperand &FalseValue = (*MI)->getOperand(2);
+
+ // If at least one of the ISEL instructions satisfy the following
+ // condition, we need the True Block:
+ // The Dest Register and True Value Register are not the same
+ // Similarly, if at least one of the ISEL instructions satisfy the
+ // following condition, we need the False Block:
+ // The Dest Register and False Value Register are not the same.
+
+ bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
+ bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
+
+ // Special case 1, all registers used by ISEL are the same one.
+ if (!IsADDIInstRequired && !IsORIInstRequired) {
+ DEBUG(dbgs() << "Remove redudant ISEL instruction.");
+ NumRemoved++;
+ (*MI)->eraseFromParent();
+ // Setting MI to the erase result keeps the iterator valid and increased.
+ MI = BIL.erase(MI);
+ continue;
+ }
+
+ // Special case 2, the two input registers used by ISEL are the same.
+ // Note 1: We favor merging ISEL expansions over folding a single one. If
+ // the passed list has multiple merge-able ISEL's, we won't fold any.
+ // Note 2: There is no need to test for PPC::R0/PPC::X0 because PPC::ZERO/
+ // PPC::ZERO8 will be used for the first operand if the value is meant to
+ // be zero. In this case, the useSameRegister method will return false,
+ // thereby preventing this ISEL from being folded.
+
+ if (useSameRegister(TrueValue, FalseValue) && (BIL.size() == 1)) {
+ DEBUG(dbgs() << "Fold the ISEL instruction to an unconditonal copy.");
+ NumFolded++;
+ BuildMI(*MBB, (*MI), dl, TII->get(isISEL8(**MI) ? PPC::ADDI8 : PPC::ADDI))
+ .add(Dest)
+ .add(TrueValue)
+ .add(MachineOperand::CreateImm(0));
+ (*MI)->eraseFromParent();
+ // Setting MI to the erase result keeps the iterator valid and increased.
+ MI = BIL.erase(MI);
+ continue;
+ }
+
+ IsTrueBlockRequired |= IsADDIInstRequired;
+ IsFalseBlockRequired |= IsORIInstRequired;
+ MI++;
+ }
+}
+
+void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL,
+ MachineBasicBlock *MBB) {
+ if (BIL.empty())
+ return;
+
+ assert((IsTrueBlockRequired || IsFalseBlockRequired) &&
+ "Should have been handled by special cases earlier!");
+
+ MachineBasicBlock *Successor = nullptr;
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+ MachineBasicBlock::iterator MBBI = (*BIL.back());
+ NewSuccessor = (MBBI != MBB->getLastNonDebugInstr() || !MBB->canFallThrough())
+ // Another BB is needed to move the instructions that
+ // follow this ISEL. If the ISEL is the last instruction
+ // in a block that can't fall through, we also need a block
+ // to branch to.
+ ? MF->CreateMachineBasicBlock(LLVM_BB)
+ : nullptr;
+
+ MachineFunction::iterator It = MBB->getIterator();
+ ++It; // Point to the successor block of MBB.
+
+ // If NewSuccessor is NULL then the last ISEL in this group is the last
+ // non-debug instruction in this block. Find the fall-through successor
+ // of this block to use when updating the CFG below.
+ if (!NewSuccessor) {
+ for (auto &Succ : MBB->successors()) {
+ if (MBB->isLayoutSuccessor(Succ)) {
+ Successor = Succ;
+ break;
+ }
+ }
+ } else
+ Successor = NewSuccessor;
+
+ // The FalseBlock and TrueBlock are inserted after the MBB block but before
+ // its successor.
+ // Note this need to be done *after* the above setting the Successor code.
+ if (IsFalseBlockRequired) {
+ FalseBlock = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, FalseBlock);
+ }
+
+ if (IsTrueBlockRequired) {
+ TrueBlock = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, TrueBlock);
+ }
+
+ if (NewSuccessor) {
+ MF->insert(It, NewSuccessor);
+
+ // Transfer the rest of this block into the new successor block.
+ NewSuccessor->splice(NewSuccessor->end(), MBB,
+ std::next(MachineBasicBlock::iterator(BIL.back())),
+ MBB->end());
+ NewSuccessor->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Copy the original liveIns of MBB to NewSuccessor.
+ for (auto &LI : MBB->liveins())
+ NewSuccessor->addLiveIn(LI);
+
+ // After splitting the NewSuccessor block, Regs defined but not killed
+ // in MBB should be treated as liveins of NewSuccessor.
+ // Note: Cannot use stepBackward instead since we are using the Reg
+ // liveness state at the end of MBB (liveOut of MBB) as the liveIn for
+ // NewSuccessor. Otherwise, will cause cyclic dependence.
+ LivePhysRegs LPR(MF->getSubtarget<PPCSubtarget>().getRegisterInfo());
+ SmallVector<std::pair<unsigned, const MachineOperand *>, 2> Clobbers;
+ for (MachineInstr &MI : *MBB)
+ LPR.stepForward(MI, Clobbers);
+ for (auto &LI : LPR)
+ NewSuccessor->addLiveIn(LI);
+ } else {
+ // Remove successor from MBB.
+ MBB->removeSuccessor(Successor);
+ }
+
+ // Note that this needs to be done *after* transfering the successors from MBB
+ // to the NewSuccessor block, otherwise these blocks will also be transferred
+ // as successors!
+ MBB->addSuccessor(IsTrueBlockRequired ? TrueBlock : Successor);
+ MBB->addSuccessor(IsFalseBlockRequired ? FalseBlock : Successor);
+
+ if (IsTrueBlockRequired) {
+ TrueBlockI = TrueBlock->begin();
+ TrueBlock->addSuccessor(Successor);
+ }
+
+ if (IsFalseBlockRequired) {
+ FalseBlockI = FalseBlock->begin();
+ FalseBlock->addSuccessor(Successor);
+ }
+
+ // Conditional branch to the TrueBlock or Successor
+ BuildMI(*MBB, BIL.back(), dl, TII->get(PPC::BC))
+ .add(BIL.back()->getOperand(3))
+ .addMBB(IsTrueBlockRequired ? TrueBlock : Successor);
+
+ // Jump over the true block to the new successor if the condition is false.
+ BuildMI(*(IsFalseBlockRequired ? FalseBlock : MBB),
+ (IsFalseBlockRequired ? FalseBlockI : BIL.back()), dl,
+ TII->get(PPC::B))
+ .addMBB(Successor);
+
+ if (IsFalseBlockRequired)
+ FalseBlockI = FalseBlock->begin(); // get the position of PPC::B
+}
+
+void PPCExpandISEL::populateBlocks(BlockISELList &BIL) {
+ for (auto &MI : BIL) {
+ assert(isISEL(*MI) && "Expecting an ISEL instruction");
+
+ MachineOperand &Dest = MI->getOperand(0); // location to store to
+ MachineOperand &TrueValue = MI->getOperand(1); // Value to store if
+ // condition is true
+ MachineOperand &FalseValue = MI->getOperand(2); // Value to store if
+ // condition is false
+ MachineOperand &ConditionRegister = MI->getOperand(3); // Condition
+
+ DEBUG(dbgs() << "Dest: " << Dest << "\n");
+ DEBUG(dbgs() << "TrueValue: " << TrueValue << "\n");
+ DEBUG(dbgs() << "FalseValue: " << FalseValue << "\n");
+ DEBUG(dbgs() << "ConditionRegister: " << ConditionRegister << "\n");
+
+
+ // If the Dest Register and True Value Register are not the same one, we
+ // need the True Block.
+ bool IsADDIInstRequired = !useSameRegister(Dest, TrueValue);
+ bool IsORIInstRequired = !useSameRegister(Dest, FalseValue);
+
+ if (IsADDIInstRequired) {
+ // Copy the result into the destination if the condition is true.
+ BuildMI(*TrueBlock, TrueBlockI, dl,
+ TII->get(isISEL8(*MI) ? PPC::ADDI8 : PPC::ADDI))
+ .add(Dest)
+ .add(TrueValue)
+ .add(MachineOperand::CreateImm(0));
+
+ // Add the LiveIn registers required by true block.
+ TrueBlock->addLiveIn(TrueValue.getReg());
+ }
+
+ if (IsORIInstRequired) {
+ // Add the LiveIn registers required by false block.
+ FalseBlock->addLiveIn(FalseValue.getReg());
+ }
+
+ if (NewSuccessor) {
+ // Add the LiveIn registers required by NewSuccessor block.
+ NewSuccessor->addLiveIn(Dest.getReg());
+ NewSuccessor->addLiveIn(TrueValue.getReg());
+ NewSuccessor->addLiveIn(FalseValue.getReg());
+ NewSuccessor->addLiveIn(ConditionRegister.getReg());
+ }
+
+ // Copy the value into the destination if the condition is false.
+ if (IsORIInstRequired)
+ BuildMI(*FalseBlock, FalseBlockI, dl,
+ TII->get(isISEL8(*MI) ? PPC::ORI8 : PPC::ORI))
+ .add(Dest)
+ .add(FalseValue)
+ .add(MachineOperand::CreateImm(0));
+
+ MI->eraseFromParent(); // Remove the ISEL instruction.
+
+ NumExpanded++;
+ }
+}
+
+void PPCExpandISEL::expandMergeableISELs(BlockISELList &BIL) {
+ // At this stage all the ISELs of BIL are in the same MBB.
+ MachineBasicBlock *MBB = BIL.back()->getParent();
+
+ handleSpecialCases(BIL, MBB);
+ reorganizeBlockLayout(BIL, MBB);
+ populateBlocks(BIL);
+}
+
+INITIALIZE_PASS(PPCExpandISEL, DEBUG_TYPE, "PowerPC Expand ISEL Generation",
+ false, false)
+char PPCExpandISEL::ID = 0;
+
+FunctionPass *llvm::createPPCExpandISELPass() { return new PPCExpandISEL(); }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index e786ef9aee0e..4c9430a2eca0 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -433,8 +433,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -519,8 +518,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
@@ -616,8 +614,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
return true;
// Get the list of callee-saved registers for the target.
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
// Get all the available registers in the block.
@@ -663,8 +660,7 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
// and the stack frame is large, we need two scratch registers.
bool
PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned FrameSize = determineFrameLayout(MF, false);
@@ -694,10 +690,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo &MFI = MF.getFrameInfo();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -1221,10 +1215,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Get alignment info so we know how to restore the SP.
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1550,8 +1542,7 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
if (MBBI != MBB.end())
dl = MBBI->getDebugLoc();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
// Create branch instruction for pseudo tail call return instruction
unsigned RetOpcode = MBBI->getOpcode();
@@ -1589,8 +1580,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -1793,8 +1783,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
}
- const PPCRegisterInfo *RegInfo =
- static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
if (RegInfo->hasBasePointer(MF)) {
HasGPSaveArea = true;
@@ -1941,8 +1930,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction *MF = MBB.getParent();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
@@ -2083,8 +2071,7 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return false;
MachineFunction *MF = MBB.getParent();
- const PPCInstrInfo &TII =
- *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
bool CR2Spilled = false;
bool CR3Spilled = false;
bool CR4Spilled = false;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 1e51c1f651c9..9c72638023bb 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -12,30 +12,57 @@
//
//===----------------------------------------------------------------------===//
-#include "PPC.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <new>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "ppc-codegen"
@@ -60,6 +87,7 @@ static cl::opt<bool> EnableBranchHint(
cl::Hidden);
namespace {
+
//===--------------------------------------------------------------------===//
/// PPCDAGToDAGISel - PPC specific code to select PPC machine
/// instructions for SelectionDAG operations.
@@ -69,6 +97,7 @@ namespace {
const PPCSubtarget *PPCSubTarget;
const PPCTargetLowering *PPCLowering;
unsigned GlobalBaseReg;
+
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm) {}
@@ -184,7 +213,6 @@ namespace {
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override {
-
switch(ConstraintID) {
default:
errs() << "ConstraintID: " << ConstraintID << "\n";
@@ -237,7 +265,8 @@ private:
void transferMemOperands(SDNode *N, SDNode *Result);
};
-}
+
+} // end anonymous namespace
/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,
@@ -303,7 +332,6 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
}
}
-
/// getGlobalBaseReg - Output the instructions required to put the
/// base address to use for accessing globals into a register.
///
@@ -368,7 +396,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) {
return isIntS16Immediate(Op.getNode(), Imm);
}
-
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
/// operand. If so Imm will receive the 32-bit value.
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
@@ -833,6 +860,7 @@ static SDNode *getInt64(SelectionDAG *CurDAG, SDNode *N) {
}
namespace {
+
class BitPermutationSelector {
struct ValueBit {
SDValue V;
@@ -898,14 +926,12 @@ class BitPermutationSelector {
// associated with each) used to choose the lowering method.
struct ValueRotInfo {
SDValue V;
- unsigned RLAmt;
- unsigned NumGroups;
- unsigned FirstGroupStartIdx;
- bool Repl32;
+ unsigned RLAmt = std::numeric_limits<unsigned>::max();
+ unsigned NumGroups = 0;
+ unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
+ bool Repl32 = false;
- ValueRotInfo()
- : RLAmt(UINT32_MAX), NumGroups(0), FirstGroupStartIdx(UINT32_MAX),
- Repl32(false) {}
+ ValueRotInfo() = default;
// For sorting (in reverse order) by NumGroups, and then by
// FirstGroupStartIdx.
@@ -1985,7 +2011,8 @@ public:
return RNLM;
}
};
-} // anonymous namespace
+
+} // end anonymous namespace
bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
if (N->getValueType(0) != MVT::i32 &&
@@ -2450,7 +2477,6 @@ void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
}
-
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
void PPCDAGToDAGISel::Select(SDNode *N) {
@@ -2474,19 +2500,18 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
- case ISD::Constant: {
+ case ISD::Constant:
if (N->getValueType(0) == MVT::i64) {
ReplaceNode(N, getInt64(CurDAG, N));
return;
}
break;
- }
- case ISD::SETCC: {
+ case ISD::SETCC:
if (trySETCC(N))
return;
break;
- }
+
case PPCISD::GlobalBaseReg:
ReplaceNode(N, getGlobalBaseReg());
return;
@@ -2502,11 +2527,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
- case PPCISD::READ_TIME_BASE: {
+ case PPCISD::READ_TIME_BASE:
ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
MVT::Other, N->getOperand(0)));
return;
- }
case PPCISD::SRA_ADDZE: {
SDValue N0 = N->getOperand(0);
@@ -2690,6 +2714,19 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
return;
}
+ // If this is a negated 64-bit zero-extension mask,
+ // i.e. the immediate is a sequence of ones from most significant side
+ // and all zero for reminder, we should use rldicr.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(~Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 63 - countTrailingOnes(~Imm64);
+ SH = 0;
+ SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
+ CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
+ return;
+ }
+
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
@@ -2911,8 +2948,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
return;
}
-
break;
+
case ISD::VECTOR_SHUFFLE:
if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
@@ -2940,7 +2977,11 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
- CurDAG->SelectNodeTo(N, PPC::LXVDSX, N->getValueType(0), Ops);
+ SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = LD->getMemOperand();
+ cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
return;
}
}
@@ -3088,7 +3129,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue(Tmp, 0), GA));
return;
}
- case PPCISD::PPC32_PICGOT: {
+ case PPCISD::PPC32_PICGOT:
// Generate a PIC-safe GOT reference.
assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
"PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
@@ -3096,7 +3137,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
PPCLowering->getPointerTy(CurDAG->getDataLayout()),
MVT::i32);
return;
- }
+
case PPCISD::VADD_SPLAT: {
// This expands into one of three sequences, depending on whether
// the first operand is odd or even, positive or negative.
@@ -3139,7 +3180,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
SDValue TmpVal = SDValue(Tmp, 0);
ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
return;
-
} else if (Elt > 0) {
// Elt is odd and positive, in the range [17,31].
//
@@ -3154,7 +3194,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
SDValue(Tmp2, 0)));
return;
-
} else {
// Elt is odd and negative, in the range [-31,-17].
//
@@ -3199,7 +3238,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue RHS, LHS;
- bool BytesFound[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+ bool BytesFound[8] = {false, false, false, false, false, false, false, false};
uint64_t Mask = 0, Alt = 0;
auto IsByteSelectCC = [this](SDValue O, unsigned &b,
@@ -3499,7 +3538,6 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
/// PostprocessISelDAG - Perform some late peephole optimizations
/// on the DAG representation.
void PPCDAGToDAGISel::PostprocessISelDAG() {
-
// Skip peepholes at -O0.
if (TM.getOptLevel() == CodeGenOpt::None)
return;
@@ -3515,10 +3553,6 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
// be folded with the isel so that we don't need to materialize a register
// containing zero.
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
- // If we're not using isel, then this does not matter.
- if (!PPCSubTarget->hasISEL())
- return false;
-
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
@@ -4520,7 +4554,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
}
}
-
/// createPPCISelDag - This pass converts a legalized DAG into a
/// PowerPC-specific DAG, ready for instruction scheduling.
///
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2b9195b095e1..f7663d8e5185 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11,39 +11,88 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCISelLowering.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
#include "PPCCallingConv.h"
#include "PPCCCState.h"
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
+#include "PPCRegisterInfo.h"
+#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
-#include "PPCTargetObjectFile.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
#include <list>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -1525,7 +1574,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
-
// Check that the mask is shuffling words
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
@@ -1643,7 +1691,6 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// If the element isn't a constant, bail fully out.
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
-
if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
@@ -2026,7 +2073,6 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
}
if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
-
// Common code will reject creating a pre-inc form if the base pointer
// is a frame index, or if N is a store and the base pointer is either
// the same as or a predecessor of the value being stored. Check for
@@ -2277,7 +2323,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
-
// FIXME: TLS addresses currently use medium model code sequences,
// which is the most useful form. Eventually support for small and
// large models could be added if users need it, at the cost of
@@ -2602,10 +2647,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- std::move(Args));
+ CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+ CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -2737,7 +2781,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-bool
+bool
llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -2752,7 +2796,7 @@ llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
unsigned RegNum = State.getFirstUnallocated(ArgRegs);
int RegsLeft = NumArgRegs - RegNum;
- // Skip if there is not enough registers left for long double type (4 gpr regs
+ // Skip if there is not enough registers left for long double type (4 gpr regs
// in soft float mode) and put long double argument on the stack.
if (RegNum != NumArgRegs && RegsLeft < 4) {
for (int i = 0; i < RegsLeft; i++) {
@@ -4066,7 +4110,7 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
static bool
hasSameArgumentList(const Function *CallerFn, ImmutableCallSite *CS) {
- if (CS->arg_size() != CallerFn->getArgumentList().size())
+ if (CS->arg_size() != CallerFn->arg_size())
return false;
ImmutableCallSite::arg_iterator CalleeArgIter = CS->arg_begin();
@@ -4222,11 +4266,12 @@ namespace {
struct TailCallArgumentInfo {
SDValue Arg;
SDValue FrameIdxOp;
- int FrameIdx;
+ int FrameIdx = 0;
- TailCallArgumentInfo() : FrameIdx(0) {}
+ TailCallArgumentInfo() = default;
};
-}
+
+} // end anonymous namespace
/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static void StoreTailCallArgumentsToStackSlot(
@@ -4406,7 +4451,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
-
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
@@ -4602,7 +4646,6 @@ SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -4649,7 +4692,6 @@ SDValue PPCTargetLowering::FinishCall(
SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals, ImmutableCallSite *CS) const {
-
std::vector<EVT> NodeTys;
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
@@ -5059,7 +5101,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
@@ -5105,10 +5146,30 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
};
const unsigned NumGPRs = array_lengthof(GPR);
- const unsigned NumFPRs = 13;
+ const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
const unsigned NumVRs = array_lengthof(VR);
const unsigned NumQFPRs = NumFPRs;
+ // On ELFv2, we can avoid allocating the parameter area if all the arguments
+ // can be passed to the callee in registers.
+ // For the fast calling convention, there is another check below.
+ // Note: We should keep consistent with LowerFormalArguments_64SVR4()
+ bool HasParameterArea = !isELFv2ABI || isVarArg || CallConv == CallingConv::Fast;
+ if (!HasParameterArea) {
+ unsigned ParamAreaSize = NumGPRs * PtrByteSize;
+ unsigned AvailableFPRs = NumFPRs;
+ unsigned AvailableVRs = NumVRs;
+ unsigned NumBytesTmp = NumBytes;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ if (Outs[i].Flags.isNest()) continue;
+ if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytesTmp, AvailableFPRs, AvailableVRs,
+ Subtarget.hasQPX()))
+ HasParameterArea = true;
+ }
+ }
+
// When using the fast calling convention, we don't provide backing for
// arguments that will be in registers.
unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
@@ -5176,13 +5237,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
unsigned NumBytesActuallyUsed = NumBytes;
- // The prolog code of the callee may store up to 8 GPR argument registers to
+ // In the old ELFv1 ABI,
+ // the prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
- // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
- NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ // In the ELFv2 ABI, we allocate the parameter area iff a callee
+ // really requires memory operands, e.g. a vararg function.
+ if (HasParameterArea)
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+ else
+ NumBytes = LinkageSize;
// Tail call needs the stack to be aligned.
if (getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -5401,6 +5467,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5486,6 +5554,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
TailCallArguments, dl);
@@ -5520,6 +5590,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// GPRs when within range. For now, we always put the value in both
// locations (or even all three).
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5552,6 +5624,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5572,6 +5646,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
case MVT::v4i1: {
bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
if (isVarArg) {
+ assert(HasParameterArea &&
+ "Parameter area must exist if we have a varargs call.");
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
SDValue Store =
@@ -5604,6 +5680,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
if (CallConv == CallingConv::Fast)
ComputePtrOff();
+ assert(HasParameterArea &&
+ "Parameter area must exist to pass an argument in memory.");
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
@@ -5618,7 +5696,8 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
}
}
- assert(NumBytesActuallyUsed == ArgOffset);
+ assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
+ "mismatch in size of parameter area");
(void)NumBytesActuallyUsed;
if (!MemOpChains.empty())
@@ -5673,7 +5752,6 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite *CS) const {
-
unsigned NumOps = Outs.size();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -6065,7 +6143,6 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
-
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
@@ -7612,7 +7689,6 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
}
-
}
if (Subtarget.hasQPX()) {
@@ -7792,24 +7868,39 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
bool &isDot, const PPCSubtarget &Subtarget) {
unsigned IntrinsicID =
- cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
CompareOpc = -1;
isDot = false;
switch (IntrinsicID) {
- default: return false;
- // Comparison predicates.
- case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ default:
+ return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ CompareOpc = 966;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ CompareOpc = 198;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ CompareOpc = 6;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ CompareOpc = 70;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ CompareOpc = 134;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpequd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 199;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb_p:
case Intrinsic::ppc_altivec_vcmpneh_p:
@@ -7818,45 +7909,80 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezh_p:
case Intrinsic::ppc_altivec_vcmpnezw_p:
if (Subtarget.hasP9Altivec()) {
- switch(IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
+ switch (IntrinsicID) {
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb_p:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh_p:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew_p:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb_p:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh_p:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw_p:
+ CompareOpc = 391;
+ break;
}
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ CompareOpc = 454;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ CompareOpc = 710;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ CompareOpc = 774;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ CompareOpc = 838;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ CompareOpc = 902;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 967;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
- case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ CompareOpc = 518;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ CompareOpc = 582;
+ isDot = true;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ CompareOpc = 646;
+ isDot = true;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud_p:
if (Subtarget.hasP8Altivec()) {
CompareOpc = 711;
- isDot = 1;
+ isDot = true;
} else
return false;
-
break;
- // VSX predicate comparisons use the same infrastructure
+
+ // VSX predicate comparisons use the same infrastructure
case Intrinsic::ppc_vsx_xvcmpeqdp_p:
case Intrinsic::ppc_vsx_xvcmpgedp_p:
case Intrinsic::ppc_vsx_xvcmpgtdp_p:
@@ -7865,33 +7991,51 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_vsx_xvcmpgtsp_p:
if (Subtarget.hasVSX()) {
switch (IntrinsicID) {
- case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
- case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
- case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
- case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
- case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
- case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+ case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+ CompareOpc = 99;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgedp_p:
+ CompareOpc = 115;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+ CompareOpc = 107;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+ CompareOpc = 67;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgesp_p:
+ CompareOpc = 83;
+ break;
+ case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+ CompareOpc = 75;
+ break;
}
- isDot = 1;
- }
- else
+ isDot = true;
+ } else
return false;
-
break;
- // Normal Comparisons.
- case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp:
+ CompareOpc = 966;
+ break;
+ case Intrinsic::ppc_altivec_vcmpeqfp:
+ CompareOpc = 198;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequb:
+ CompareOpc = 6;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequh:
+ CompareOpc = 70;
+ break;
+ case Intrinsic::ppc_altivec_vcmpequw:
+ CompareOpc = 134;
+ break;
case Intrinsic::ppc_altivec_vcmpequd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 199;
- isDot = 0;
- } else
+ else
return false;
-
break;
case Intrinsic::ppc_altivec_vcmpneb:
case Intrinsic::ppc_altivec_vcmpneh:
@@ -7899,43 +8043,67 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
case Intrinsic::ppc_altivec_vcmpnezb:
case Intrinsic::ppc_altivec_vcmpnezh:
case Intrinsic::ppc_altivec_vcmpnezw:
- if (Subtarget.hasP9Altivec()) {
+ if (Subtarget.hasP9Altivec())
switch (IntrinsicID) {
- default: llvm_unreachable("Unknown comparison intrinsic.");
- case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
- case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
- case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
- case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
- case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
- case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
+ default:
+ llvm_unreachable("Unknown comparison intrinsic.");
+ case Intrinsic::ppc_altivec_vcmpneb:
+ CompareOpc = 7;
+ break;
+ case Intrinsic::ppc_altivec_vcmpneh:
+ CompareOpc = 71;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnew:
+ CompareOpc = 135;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezb:
+ CompareOpc = 263;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezh:
+ CompareOpc = 327;
+ break;
+ case Intrinsic::ppc_altivec_vcmpnezw:
+ CompareOpc = 391;
+ break;
}
- isDot = 0;
- } else
+ else
return false;
break;
- case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp:
+ CompareOpc = 454;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtfp:
+ CompareOpc = 710;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsb:
+ CompareOpc = 774;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsh:
+ CompareOpc = 838;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtsw:
+ CompareOpc = 902;
+ break;
case Intrinsic::ppc_altivec_vcmpgtsd:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 967;
- isDot = 0;
- } else
+ else
return false;
-
break;
- case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
- case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub:
+ CompareOpc = 518;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuh:
+ CompareOpc = 582;
+ break;
+ case Intrinsic::ppc_altivec_vcmpgtuw:
+ CompareOpc = 646;
+ break;
case Intrinsic::ppc_altivec_vcmpgtud:
- if (Subtarget.hasP8Altivec()) {
+ if (Subtarget.hasP8Altivec())
CompareOpc = 711;
- isDot = 0;
- } else
+ else
return false;
-
break;
}
return true;
@@ -8044,7 +8212,7 @@ SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
}
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
@@ -9174,10 +9342,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
- if (Subtarget.hasISEL() &&
- (MI.getOpcode() == PPC::SELECT_CC_I4 ||
+ if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8 ||
- MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
+ MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -9417,7 +9584,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB = EmitAtomicBinary(MI, BB, 4, 0);
else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
BB = EmitAtomicBinary(MI, BB, 8, 0);
-
else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
(Subtarget.hasPartwordAtomics() &&
@@ -10028,14 +10194,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
return false;
}
-
/// This function is called when we have proved that a SETCC node can be replaced
/// by subtraction (and other supporting instructions) so that the result of
/// comparison is kept in a GPR instead of CR. This function is purely for
/// codegen purposes and has some flags to guide the codegen process.
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
bool Swap, SDLoc &DL, SelectionDAG &DAG) {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
// Zero extend the operands to the largest legal integer. Originally, they
@@ -10068,7 +10232,6 @@ static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
DAGCombinerInfo &DCI) const {
-
assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
SelectionDAG &DAG = DCI.DAG;
@@ -11227,9 +11390,20 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+ // If the type of BSWAP operand is wider than stored memory width
+ // it need to be shifted to the right side before STBRX.
+ EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
+ if (Op1VT.bitsGT(mVT)) {
+ int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
+ BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
+ DAG.getConstant(Shift, dl, MVT::i32));
+ // Need to truncate if this is a bswap of i64 stored as i32/i16.
+ if (Op1VT == MVT::i64)
+ BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
+ }
+
SDValue Ops[] = {
- N->getOperand(0), BSwapOp, N->getOperand(2),
- DAG.getValueType(N->getOperand(1).getValueType())
+ N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
@@ -11570,7 +11744,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
- case ISD::INTRINSIC_W_CHAIN: {
+ case ISD::INTRINSIC_W_CHAIN:
// For little endian, VSX loads require generating lxvd2x/xxswapd.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11583,8 +11757,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
- case ISD::INTRINSIC_VOID: {
+ case ISD::INTRINSIC_VOID:
// For little endian, VSX stores require generating xxswapd/stxvd2x.
// Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
if (Subtarget.needsSwapsForVSXMemOps()) {
@@ -11597,7 +11770,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- }
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -11635,9 +11807,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
-
break;
- case PPCISD::VCMP: {
+ case PPCISD::VCMP:
// If a VCMPo node already exists with exactly the same operands as this
// node, use its result instead of this node (VCMPo computes both a CR6 and
// a normal output).
@@ -11687,7 +11858,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue(VCMPoNode, 0);
}
break;
- }
case ISD::BRCOND: {
SDValue Cond = N->getOperand(1);
SDValue Target = N->getOperand(2);
@@ -11847,6 +12017,7 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
@@ -12295,7 +12466,6 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
unsigned Intrinsic) const {
-
switch (Intrinsic) {
case Intrinsic::ppc_qpx_qvlfd:
case Intrinsic::ppc_qpx_qvlfs:
@@ -12753,7 +12923,6 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
}
bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-
if (!VT.isSimple() || !Subtarget.hasVSX())
return false;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 05acd25ae5fc..6113eb58f421 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -17,13 +17,26 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
-#include "PPCRegisterInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
#include "llvm/Target/TargetLowering.h"
+#include <utility>
namespace llvm {
+
namespace PPCISD {
+
enum NodeType : unsigned {
// Start the numbering where the builtin ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
@@ -398,10 +411,12 @@ namespace llvm {
/// the last operand.
TOC_ENTRY
};
- }
+
+ } // end namespace PPCISD
/// Define some predicates that are used for node matching.
namespace PPC {
+
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
@@ -465,7 +480,8 @@ namespace llvm {
/// If this is a qvaligni shuffle mask, return the shift
/// amount, otherwise return -1.
int isQVALIGNIShuffleMask(SDNode *N);
- }
+
+ } // end namespace PPC
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &Subtarget;
@@ -492,6 +508,7 @@ namespace llvm {
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+
bool useSoftFloat() const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
@@ -514,6 +531,10 @@ namespace llvm {
return true;
}
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
bool supportSplitCSR(MachineFunction *MF) const override {
return
MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
@@ -587,6 +608,7 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
@@ -694,6 +716,10 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool convertSelectOfConstantsToMath() const override {
+ return true;
+ }
+
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
@@ -785,15 +811,13 @@ namespace llvm {
SDValue Chain;
SDValue ResChain;
MachinePointerInfo MPI;
- bool IsDereferenceable;
- bool IsInvariant;
- unsigned Alignment;
+ bool IsDereferenceable = false;
+ bool IsInvariant = false;
+ unsigned Alignment = 0;
AAMDNodes AAInfo;
- const MDNode *Ranges;
+ const MDNode *Ranges = nullptr;
- ReuseLoadInfo()
- : IsDereferenceable(false), IsInvariant(false), Alignment(0),
- Ranges(nullptr) {}
+ ReuseLoadInfo() = default;
MachineMemOperand::Flags MMOFlags() const {
MachineMemOperand::Flags F = MachineMemOperand::MONone;
@@ -906,15 +930,13 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue
- LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
- bool
- CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -994,14 +1016,16 @@ namespace llvm {
CCAssignFn *useFastISelCCs(unsigned Flag) const;
SDValue
- combineElementTruncationToVectorTruncation(SDNode *N,
- DAGCombinerInfo &DCI) const;
+ combineElementTruncationToVectorTruncation(SDNode *N,
+ DAGCombinerInfo &DCI) const;
};
namespace PPC {
+
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo);
- }
+
+ } // end namespace PPC
bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
@@ -1026,6 +1050,7 @@ namespace llvm {
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State);
-}
-#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+} // end namespace llvm
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index fbec8787ef8d..997b96ca6ec8 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -253,11 +253,11 @@ def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
Requires<[IsISA3_0]>;
}
-let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in
+let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
"stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isDOT;
-let mayStore = 1, hasSideEffects = 0 in
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
"stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64,
Requires<[IsISA3_0]>;
@@ -1082,7 +1082,7 @@ def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
}
// Stores with Update (pre-inc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
"stbu $rS, $dst", IIC_LdStStoreUpd, []>,
@@ -1232,6 +1232,10 @@ def : Pat<(srl i64:$rS, i32:$rB),
def : Pat<(shl i64:$rS, i32:$rB),
(SLD $rS, $rB)>;
+// SUBFIC
+def : Pat<(sub imm64SExt16:$imm, i64:$in),
+ (SUBFIC8 $in, imm:$imm)>;
+
// SHL/SRL
def : Pat<(shl i64:$in, (i32 imm:$imm)),
(RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 5c022749ad64..c380766e9f5c 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -407,7 +407,7 @@ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
-let PPC970_Unit = 2 in { // Loads.
+let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
"lvebx $vD, $src", IIC_LdStLoad,
[(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
@@ -434,7 +434,7 @@ def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
[(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
PPC970_Unit_LSU;
-let PPC970_Unit = 2 in { // Stores.
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores.
def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
"stvebx $rS, $dst", IIC_LdStStore,
[(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
@@ -851,6 +851,10 @@ def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
// Additional Altivec Patterns
//
+// Extended mnemonics
+def : InstAlias<"vmr $vD, $vA", (VOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+def : InstAlias<"vnot $vD, $vA", (VNOR vrrc:$vD, vrrc:$vA, vrrc:$vA)>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2e0b9355f82b..8e159f47ea2e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -65,7 +65,9 @@ UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
void PPCInstrInfo::anchor() {}
PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
- : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
+ /* CatchRetOpcode */ -1,
+ STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
Subtarget(STI), RI(STI.getTargetMachine()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
@@ -662,12 +664,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
- BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
- BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
else // Conditional branch
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ .addImm(Cond[0].getImm())
+ .add(Cond[1])
+ .addMBB(TBB);
return 1;
}
@@ -677,12 +681,14 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
- BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);
else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
- BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);
else
BuildMI(&MBB, DL, get(PPC::BCC))
- .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ .addImm(Cond[0].getImm())
+ .add(Cond[1])
+ .addMBB(TBB);
BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
@@ -692,9 +698,6 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
- if (!Subtarget.hasISEL())
- return false;
-
if (Cond.size() != 2)
return false;
@@ -736,9 +739,6 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
- assert(Subtarget.hasISEL() &&
- "Cannot insert select on target without ISEL support");
-
// Get the register classes.
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
@@ -1493,7 +1493,7 @@ bool PPCInstrInfo::DefinesPredicate(MachineInstr &MI,
return Found;
}
-bool PPCInstrInfo::isPredicable(MachineInstr &MI) const {
+bool PPCInstrInfo::isPredicable(const MachineInstr &MI) const {
unsigned OpC = MI.getOpcode();
switch (OpC) {
default:
@@ -1836,8 +1836,7 @@ unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
PatchPointOpers Opers(&MI);
return Opers.getNumPatchBytes();
} else {
- const MCInstrDesc &Desc = get(Opcode);
- return Desc.getSize();
+ return get(Opcode).getSize();
}
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 32b2f009a3f5..f11aed8fa268 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -253,7 +253,7 @@ public:
bool DefinesPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
// Comparison optimization.
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f615cc7cc974..f004ce49cac0 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -114,9 +114,9 @@ def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
[SDNPHasChain, SDNPMayStore]>;
def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
[SDNPHasChain, SDNPMayLoad]>;
def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
@@ -243,7 +243,7 @@ def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
[SDNPHasChain, SDNPOptInGlue]>;
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
[SDNPHasChain, SDNPMayStore]>;
@@ -770,9 +770,10 @@ def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
}
// A single-register address. This is used with the SjLj
-// pseudo-instructions.
+// pseudo-instructions which tranlates to LD/LWZ. These instructions requires
+// G8RC_NOX0 registers.
def memr : Operand<iPTR> {
- let MIOperandInfo = (ops ptr_rc:$ptrreg);
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg);
}
def PPCTLSRegOperand : AsmOperandClass {
let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
@@ -1648,7 +1649,7 @@ let usesCustomInserter = 1 in {
}
// Instructions to support atomic operations
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),
"lbarx $rD, $src", IIC_LdStLWARX, []>,
Requires<[HasPartwordAtomics]>;
@@ -1681,7 +1682,7 @@ def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC),
Requires<[IsISA3_0]>;
}
-let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in {
+let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
"stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
isDOT, Requires<[HasPartwordAtomics]>;
@@ -1694,7 +1695,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
"stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isDOT;
}
-let mayStore = 1, hasSideEffects = 0 in
+let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC),
"stwat $rS, $rA, $FC", IIC_LdStStore>,
Requires<[IsISA3_0]>;
@@ -1740,7 +1741,7 @@ def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
// Unindexed (r+i) Loads with Update (preinc).
-let mayLoad = 1, hasSideEffects = 0 in {
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
"lbzu $rD, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
@@ -1813,7 +1814,7 @@ def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
-let PPC970_Unit = 2 in {
+let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in {
def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
"lbzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (zextloadi8 xaddr:$src))]>;
@@ -1827,8 +1828,6 @@ def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
"lwzx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (load xaddr:$src))]>;
-
-
def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
"lhbrx $rD, $src", IIC_LdStLoad,
[(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
@@ -1860,7 +1859,7 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
//
// Unindexed (r+i) Stores.
-let PPC970_Unit = 2 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
"stb $rS, $src", IIC_LdStStore,
[(truncstorei8 i32:$rS, iaddr:$src)]>;
@@ -1879,7 +1878,7 @@ def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
}
// Unindexed (r+i) Stores with Update (preinc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
"stbu $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
@@ -1948,7 +1947,7 @@ def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
}
// Indexed (r+r) Stores with Update (preinc).
-let PPC970_Unit = 2, mayStore = 1 in {
+let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
"stbux $rS, $dst", IIC_LdStStoreUpd, []>,
RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 0d9e3459f47e..13603732397a 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -62,7 +62,7 @@ def SDTVecConv : SDTypeProfile<1, 2, [
]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
- [SDNPHasChain, SDNPMayLoad]>;
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
@@ -117,7 +117,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
let Uses = [RM] in {
// Load indexed instructions
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSDX : XX1Form<31, 588,
(outs vsfrc:$XT), (ins memrr:$src),
@@ -142,7 +142,7 @@ let Uses = [RM] in {
} // mayLoad
// Store indexed instructions
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSDX : XX1Form<31, 716,
(outs), (ins vsfrc:$XT, memrr:$dst),
@@ -1197,7 +1197,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
// VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
"lxsspx $XT, $src", IIC_LdStLFD,
@@ -1211,7 +1211,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
} // mayLoad
// VSX scalar stores introduced in ISA 2.07
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
"stxsspx $XT, $dst", IIC_LdStSTFD,
@@ -1410,6 +1410,11 @@ let Predicates = [HasDirectMove] in {
"mfvsrd $rA, $XT", IIC_VecGeneral,
[(set i64:$rA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
+ let isCodeGenOnly = 1 in
+ def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vrrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ []>,
+ Requires<[In64BitMode]>;
def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
"mfvsrwz $rA, $XT", IIC_VecGeneral,
[(set i32:$rA, (PPCmfvsr f64:$XT))]>;
@@ -1440,6 +1445,13 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
} // IsISA3_0, HasDirectMove
} // UseVSXReg = 1
+// We want to parse this from asm, but we don't want to emit this as it would
+// be emitted with a VSX reg. So leave Emit = 0 here.
+def : InstAlias<"mfvrd $rA, $XT",
+ (MFVRD g8rc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprd $rA, $src",
+ (MFVSRD g8rc:$rA, f8rc:$src)>;
+
/* Direct moves of various widths from GPR's into VSR's. Each move lines
the value up into element 0 (both BE and LE). Namely, entities smaller than
a doubleword are shifted left and moved for BE. For LE, they're moved, then
@@ -2186,7 +2198,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // UseVSXReg = 1
// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
- // seperate pattern so that it can convert the input register class from
+ // separate pattern so that it can convert the input register class from
// VRRC(v8i16) to VSRC.
def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
(v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
@@ -2335,7 +2347,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1 in {
+ let mayLoad = 1, mayStore = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
"lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
@@ -2383,7 +2395,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1 in {
+ let mayStore = 1, mayLoad = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
"stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 2c3e75523e8f..a349fa1b4090 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -72,9 +73,10 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) {
+ PPCLoopPreIncPrep() : FunctionPass(ID) {
initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
}
+
PPCLoopPreIncPrep(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
}
@@ -93,7 +95,7 @@ namespace {
bool rotateLoop(Loop *L);
private:
- PPCTargetMachine *TM;
+ PPCTargetMachine *TM = nullptr;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index e527b018d4fb..541b98e01b99 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -148,7 +148,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
assert(!MO.getSubReg() && "Subregs should be eliminated!");
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
index 2413af3f7042..c6d2c3ebcc0f 100644
--- a/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -147,9 +147,9 @@ bool PPCMIPeephole::simplifyCode(void) {
<< "Optimizing load-and-splat/splat "
"to load-and-splat/copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -169,9 +169,9 @@ bool PPCMIPeephole::simplifyCode(void) {
<< "Optimizing splat/swap or splat/splat "
"to splat/copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -194,9 +194,9 @@ bool PPCMIPeephole::simplifyCode(void) {
else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
DEBUG(MI.dump());
- BuildMI(MBB, &MI, MI.getDebugLoc(),
- TII->get(PPC::COPY), MI.getOperand(0).getReg())
- .addOperand(DefMI->getOperand(1));
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(DefMI->getOperand(1));
ToErase = &MI;
Simplified = true;
}
@@ -251,7 +251,7 @@ bool PPCMIPeephole::simplifyCode(void) {
DEBUG(MI.dump());
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
MI.getOperand(0).getReg())
- .addOperand(MI.getOperand(OpNo));
+ .add(MI.getOperand(OpNo));
ToErase = &MI;
Simplified = true;
}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 9d91e31165de..bc2d9a08b5e8 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -8,14 +8,13 @@
//===----------------------------------------------------------------------===//
#include "PPCMachineFunctionInfo.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
-void PPCFunctionInfo::anchor() { }
+void PPCFunctionInfo::anchor() {}
MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
const DataLayout &DL = MF.getDataLayout();
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 4c29aa06f048..202e10058b73 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -26,17 +27,17 @@ class PPCFunctionInfo : public MachineFunctionInfo {
/// FramePointerSaveIndex - Frame index of where the old frame pointer is
/// stored. Also used as an anchor for instructions that need to be altered
/// when using frame pointers (dyna_add, dyna_sub.)
- int FramePointerSaveIndex;
+ int FramePointerSaveIndex = 0;
/// ReturnAddrSaveIndex - Frame index of where the return address is stored.
///
- int ReturnAddrSaveIndex;
+ int ReturnAddrSaveIndex = 0;
/// Frame index where the old base pointer is stored.
- int BasePointerSaveIndex;
+ int BasePointerSaveIndex = 0;
/// Frame index where the old PIC base pointer is stored.
- int PICBasePointerSaveIndex;
+ int PICBasePointerSaveIndex = 0;
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
@@ -44,54 +45,58 @@ class PPCFunctionInfo : public MachineFunctionInfo {
bool MustSaveLR;
/// Does this function have any stack spills.
- bool HasSpills;
+ bool HasSpills = false;
/// Does this function spill using instructions with only r+r (not r+i)
/// forms.
- bool HasNonRISpills;
+ bool HasNonRISpills = false;
/// SpillsCR - Indicates whether CR is spilled in the current function.
- bool SpillsCR;
+ bool SpillsCR = false;
/// Indicates whether VRSAVE is spilled in the current function.
- bool SpillsVRSAVE;
+ bool SpillsVRSAVE = false;
/// LRStoreRequired - The bool indicates whether there is some explicit use of
/// the LR/LR8 stack slot that is not obvious from scanning the code. This
/// requires that the code generator produce a store of LR to the stack on
/// entry, even though LR may otherwise apparently not be used.
- bool LRStoreRequired;
+ bool LRStoreRequired = false;
/// This function makes use of the PPC64 ELF TOC base pointer (register r2).
- bool UsesTOCBasePtr;
+ bool UsesTOCBasePtr = false;
/// MinReservedArea - This is the frame size that is at least reserved in a
/// potential caller (parameter+linkage area).
- unsigned MinReservedArea;
+ unsigned MinReservedArea = 0;
/// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
/// amount the stack pointer is adjusted to make the frame bigger for tail
/// calls. Used for creating an area before the register spill area.
- int TailCallSPDelta;
+ int TailCallSPDelta = 0;
/// HasFastCall - Does this function contain a fast call. Used to determine
/// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
- bool HasFastCall;
+ bool HasFastCall = false;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
+
/// VarArgsStackOffset - StackOffset for start of stack
/// arguments.
- int VarArgsStackOffset;
+
+ int VarArgsStackOffset = 0;
+
/// VarArgsNumGPR - Index of the first unused integer
/// register for parameter passing.
- unsigned VarArgsNumGPR;
+ unsigned VarArgsNumGPR = 0;
+
/// VarArgsNumFPR - Index of the first unused double
/// register for parameter passing.
- unsigned VarArgsNumFPR;
+ unsigned VarArgsNumFPR = 0;
/// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
- int CRSpillFrameIndex;
+ int CRSpillFrameIndex = 0;
/// If any of CR[2-4] need to be saved in the prologue and restored in the
/// epilogue then they are added to this array. This is used for the
@@ -102,35 +107,14 @@ class PPCFunctionInfo : public MachineFunctionInfo {
MachineFunction &MF;
/// Whether this uses the PIC Base register or not.
- bool UsesPICBase;
+ bool UsesPICBase = false;
/// True if this function has a subset of CSRs that is handled explicitly via
/// copies
- bool IsSplitCSR;
+ bool IsSplitCSR = false;
public:
- explicit PPCFunctionInfo(MachineFunction &MF)
- : FramePointerSaveIndex(0),
- ReturnAddrSaveIndex(0),
- BasePointerSaveIndex(0),
- PICBasePointerSaveIndex(0),
- HasSpills(false),
- HasNonRISpills(false),
- SpillsCR(false),
- SpillsVRSAVE(false),
- LRStoreRequired(false),
- UsesTOCBasePtr(false),
- MinReservedArea(0),
- TailCallSPDelta(0),
- HasFastCall(false),
- VarArgsFrameIndex(0),
- VarArgsStackOffset(0),
- VarArgsNumGPR(0),
- VarArgsNumFPR(0),
- CRSpillFrameIndex(0),
- MF(MF),
- UsesPICBase(0),
- IsSplitCSR(false) {}
+ explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {}
int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
@@ -211,7 +195,6 @@ public:
MCSymbol *getTOCOffsetSymbol() const;
};
-} // end of namespace llvm
-
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_POWERPC_PPCMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e49201402861..aad913924692 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -209,86 +209,67 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// The ZERO register is not really a register, but the representation of r0
// when used in instructions that treat r0 as the constant 0.
- Reserved.set(PPC::ZERO);
- Reserved.set(PPC::ZERO8);
+ markSuperRegs(Reserved, PPC::ZERO);
// The FP register is also not really a register, but is the representation
// of the frame pointer register used by ISD::FRAMEADDR.
- Reserved.set(PPC::FP);
- Reserved.set(PPC::FP8);
+ markSuperRegs(Reserved, PPC::FP);
// The BP register is also not really a register, but is the representation
// of the base pointer register used by setjmp.
- Reserved.set(PPC::BP);
- Reserved.set(PPC::BP8);
+ markSuperRegs(Reserved, PPC::BP);
// The counter registers must be reserved so that counter-based loops can
// be correctly formed (and the mtctr instructions are not DCE'd).
- Reserved.set(PPC::CTR);
- Reserved.set(PPC::CTR8);
+ markSuperRegs(Reserved, PPC::CTR);
+ markSuperRegs(Reserved, PPC::CTR8);
- Reserved.set(PPC::R1);
- Reserved.set(PPC::LR);
- Reserved.set(PPC::LR8);
- Reserved.set(PPC::RM);
+ markSuperRegs(Reserved, PPC::R1);
+ markSuperRegs(Reserved, PPC::LR);
+ markSuperRegs(Reserved, PPC::LR8);
+ markSuperRegs(Reserved, PPC::RM);
if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
- Reserved.set(PPC::VRSAVE);
+ markSuperRegs(Reserved, PPC::VRSAVE);
// The SVR4 ABI reserves r2 and r13
if (Subtarget.isSVR4ABI()) {
- Reserved.set(PPC::R2); // System-reserved register
- Reserved.set(PPC::R13); // Small Data Area pointer register
+ // We only reserve r2 if we need to use the TOC pointer. If we have no
+ // explicit uses of the TOC pointer (meaning we're a leaf function with
+ // no constant-pool loads, etc.) and we have no potential uses inside an
+ // inline asm block, then we can treat r2 has an ordinary callee-saved
+ // register.
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ if (!TM.isPPC64() || FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
+ markSuperRegs(Reserved, PPC::R2); // System-reserved register
+ markSuperRegs(Reserved, PPC::R13); // Small Data Area pointer register
}
// On PPC64, r13 is the thread pointer. Never allocate this register.
- if (TM.isPPC64()) {
- Reserved.set(PPC::R13);
-
- Reserved.set(PPC::X1);
- Reserved.set(PPC::X13);
-
- if (TFI->needsFP(MF))
- Reserved.set(PPC::X31);
-
- if (hasBasePointer(MF))
- Reserved.set(PPC::X30);
-
- // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
- if (Subtarget.isSVR4ABI()) {
- // We only reserve r2 if we need to use the TOC pointer. If we have no
- // explicit uses of the TOC pointer (meaning we're a leaf function with
- // no constant-pool loads, etc.) and we have no potential uses inside an
- // inline asm block, then we can treat r2 has an ordinary callee-saved
- // register.
- const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- if (FuncInfo->usesTOCBasePtr() || MF.hasInlineAsm())
- Reserved.set(PPC::X2);
- else
- Reserved.reset(PPC::R2);
- }
- }
+ if (TM.isPPC64())
+ markSuperRegs(Reserved, PPC::R13);
if (TFI->needsFP(MF))
- Reserved.set(PPC::R31);
+ markSuperRegs(Reserved, PPC::R31);
bool IsPositionIndependent = TM.isPositionIndependent();
if (hasBasePointer(MF)) {
if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
- Reserved.set(PPC::R29);
+ markSuperRegs(Reserved, PPC::R29);
else
- Reserved.set(PPC::R30);
+ markSuperRegs(Reserved, PPC::R30);
}
if (Subtarget.isSVR4ABI() && !TM.isPPC64() && IsPositionIndependent)
- Reserved.set(PPC::R30);
+ markSuperRegs(Reserved, PPC::R30);
// Reserve Altivec registers when Altivec is unavailable.
if (!Subtarget.hasAltivec())
for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(),
IE = PPC::VRRCRegClass.end(); I != IE; ++I)
- Reserved.set(*I);
+ markSuperRegs(Reserved, *I);
+ assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
index 8e52da583a0d..79963dd6a3e9 100644
--- a/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -377,7 +377,7 @@ def P8Itineraries : ProcessorItineraries<
InstrStage<1, [P8_FPU1, P8_FPU2]>],
[7, 1, 1]>,
InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
- InstrStage<1, [P8_FPU2, P8_FPU2]>],
+ InstrStage<1, [P8_FPU1, P8_FPU2]>],
[3, 1, 1]>
]>;
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index e8a87e7f4437..ccf0f80c336b 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -220,8 +220,8 @@ bool PPCSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
-unsigned char PPCSubtarget::classifyGlobalReference(
- const GlobalValue *GV) const {
+unsigned char
+PPCSubtarget::classifyGlobalReference(const GlobalValue *GV) const {
// Note that currently we don't generate non-pic references.
// If a caller wants that, this will have to be updated.
@@ -229,23 +229,9 @@ unsigned char PPCSubtarget::classifyGlobalReference(
if (TM.getCodeModel() == CodeModel::Large)
return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
- unsigned char flags = PPCII::MO_PIC_FLAG;
-
- // Only if the relocation mode is PIC do we have to worry about
- // interposition. In all other cases we can use a slightly looser standard to
- // decide how to access the symbol.
- if (TM.getRelocationModel() == Reloc::PIC_) {
- // If it's local, or it's non-default, it can't be interposed.
- if (!GV->hasLocalLinkage() &&
- GV->hasDefaultVisibility()) {
- flags |= PPCII::MO_NLP_FLAG;
- }
- return flags;
- }
-
- if (GV->isStrongDefinitionForLinker())
- return flags;
- return flags | PPCII::MO_NLP_FLAG;
+ if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ return PPCII::MO_PIC_FLAG;
+ return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
}
bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 7fd907990ceb..5a97f595ad8c 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -298,7 +298,9 @@ public:
bool isSVR4ABI() const { return !isDarwinABI(); }
bool isELFv2ABI() const;
- bool enableEarlyIfConversion() const override { return hasISEL(); }
+ /// Originally, this function return hasISEL(). Now we always enable it,
+ /// but may expand the ISEL instruction later.
+ bool enableEarlyIfConversion() const override { return true; }
// Scheduling customization.
bool enableMachineScheduler() const override;
@@ -316,6 +318,8 @@ public:
/// classifyGlobalReference - Classify a global variable reference for the
/// current subtarget accourding to how we should reference it.
unsigned char classifyGlobalReference(const GlobalValue *GV) const;
+
+ bool isXRaySupported() const override { return IsPPC64 && IsLittleEndian; }
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 91b1d24b2e41..7806d45b5457 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,21 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "PPC.h"
+#include "PPCSubtarget.h"
#include "PPCTargetObjectFile.h"
+#include "PPCTargetMachine.h"
#include "PPCTargetTransformInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
+#include <cassert>
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::
@@ -80,6 +92,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializePPCBoolRetToIntPass(PR);
+ initializePPCExpandISELPass(PR);
}
/// Return the datalayout string of a subtarget.
@@ -149,9 +162,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
// If it isn't a Mach-O file then it's going to be a linux ELF
// object file.
if (TT.isOSDarwin())
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
- return make_unique<PPC64LinuxTargetObjectFile>();
+ return llvm::make_unique<PPC64LinuxTargetObjectFile>();
}
static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
@@ -205,15 +218,13 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
computeFSAdditions(FS, OL, TT), Options,
getEffectiveRelocModel(TT, RM), CM, OL),
TLOF(createTLOF(getTargetTriple())),
- TargetABI(computeTargetABI(TT, Options)),
- Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
-
+ TargetABI(computeTargetABI(TT, Options)) {
initAsmInfo();
}
-PPCTargetMachine::~PPCTargetMachine() {}
+PPCTargetMachine::~PPCTargetMachine() = default;
-void PPC32TargetMachine::anchor() { }
+void PPC32TargetMachine::anchor() {}
PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -223,7 +234,7 @@ PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
-void PPC64TargetMachine::anchor() { }
+void PPC64TargetMachine::anchor() {}
PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,
@@ -281,6 +292,7 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
//===----------------------------------------------------------------------===//
namespace {
+
/// PPC Code Generator Pass Configuration Options.
class PPCPassConfig : public TargetPassConfig {
public:
@@ -300,7 +312,8 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
@@ -416,6 +429,8 @@ void PPCPassConfig::addPreSched2() {
}
void PPCPassConfig::addPreEmitPass() {
+ addPass(createPPCExpandISELPass());
+
if (getOptLevel() != CodeGenOpt::None)
addPass(createPPCEarlyReturnPass(), false);
// Must run branch selection immediately preceding the asm printer.
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 59b4f1e30c0e..f2838351cee5 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,7 +29,6 @@ public:
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
PPCABI TargetABI;
- PPCSubtarget Subtarget;
mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index dbe7617d3542..310fea9ef09f 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -1,4 +1,4 @@
-//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//===- PPCTargetStreamer.h - PPC Target Streamer ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,18 +10,26 @@
#ifndef LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
#define LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCStreamer.h"
namespace llvm {
+
+class MCExpr;
+class MCSymbol;
+class MCSymbolELF;
+
class PPCTargetStreamer : public MCTargetStreamer {
public:
PPCTargetStreamer(MCStreamer &S);
~PPCTargetStreamer() override;
+
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
virtual void emitAbiVersion(int AbiVersion) = 0;
virtual void emitLocalEntry(MCSymbolELF *S, const MCExpr *LocalOffset) = 0;
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_POWERPC_PPCTARGETSTREAMER_H
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index f94d1eab097d..7ee1317bf72f 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -302,14 +302,16 @@ int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first;
}
-int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -352,7 +354,7 @@ int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
}
int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
@@ -401,6 +403,10 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
if (IsVSXType || (ST->hasVSX() && IsAltivecType))
return Cost;
+ // Newer PPC supports unaligned memory access.
+ if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
+ return Cost;
+
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 30ee2814aba1..6ce70fbd8778 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -74,11 +74,13 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
unsigned Factor,
ArrayRef<unsigned> Indices,
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 3b5d8f094fd0..f3a0290da054 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -112,7 +112,7 @@ protected:
TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
.addImm(1) // add 1, not 0, because there is no implicit clearing
// of the high bits.
- .addOperand(SrcMO)
+ .add(SrcMO)
.addImm(PPC::sub_64);
// The source of the original copy is now the new virtual register.
@@ -132,7 +132,7 @@ protected:
unsigned NewVReg = MRI.createVirtualRegister(DstRC);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg)
- .addOperand(SrcMO);
+ .add(SrcMO);
// Transform the original copy into a subregister extraction copy.
SrcMO.setReg(NewVReg);
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 8197285b7b1f..d3434b77be8a 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -522,7 +522,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
if (RelevantFunction) {
DEBUG(dbgs() << "Swap vector when first built\n\n");
- dumpSwapVector();
+ DEBUG(dumpSwapVector());
}
return RelevantFunction;
@@ -731,7 +731,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
}
DEBUG(dbgs() << "Swap vector after web analysis:\n\n");
- dumpSwapVector();
+ DEBUG(dumpSwapVector());
}
// Walk the swap vector entries looking for swaps fed by permuting loads
@@ -936,9 +936,9 @@ bool PPCVSXSwapRemoval::removeSwaps() {
Changed = true;
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
MachineBasicBlock *MBB = MI->getParent();
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), MI->getOperand(0).getReg())
- .addOperand(MI->getOperand(1));
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .add(MI->getOperand(1));
DEBUG(dbgs() << format("Replaced %d with copy: ",
SwapVector[EntryIdx].VSEId));
@@ -951,77 +951,78 @@ bool PPCVSXSwapRemoval::removeSwaps() {
return Changed;
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
// For debug purposes, dump the contents of the swap vector.
-void PPCVSXSwapRemoval::dumpSwapVector() {
+LLVM_DUMP_METHOD void PPCVSXSwapRemoval::dumpSwapVector() {
for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
int ID = SwapVector[EntryIdx].VSEId;
- DEBUG(dbgs() << format("%6d", ID));
- DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID)));
- DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber()));
- DEBUG(dbgs() << format(" %14s ",
- TII->getName(MI->getOpcode()).str().c_str()));
+ dbgs() << format("%6d", ID);
+ dbgs() << format("%6d", EC->getLeaderValue(ID));
+ dbgs() << format(" BB#%3d", MI->getParent()->getNumber());
+ dbgs() << format(" %14s ", TII->getName(MI->getOpcode()).str().c_str());
if (SwapVector[EntryIdx].IsLoad)
- DEBUG(dbgs() << "load ");
+ dbgs() << "load ";
if (SwapVector[EntryIdx].IsStore)
- DEBUG(dbgs() << "store ");
+ dbgs() << "store ";
if (SwapVector[EntryIdx].IsSwap)
- DEBUG(dbgs() << "swap ");
+ dbgs() << "swap ";
if (SwapVector[EntryIdx].MentionsPhysVR)
- DEBUG(dbgs() << "physreg ");
+ dbgs() << "physreg ";
if (SwapVector[EntryIdx].MentionsPartialVR)
- DEBUG(dbgs() << "partialreg ");
+ dbgs() << "partialreg ";
if (SwapVector[EntryIdx].IsSwappable) {
- DEBUG(dbgs() << "swappable ");
+ dbgs() << "swappable ";
switch(SwapVector[EntryIdx].SpecialHandling) {
default:
- DEBUG(dbgs() << "special:**unknown**");
+ dbgs() << "special:**unknown**";
break;
case SH_NONE:
break;
case SH_EXTRACT:
- DEBUG(dbgs() << "special:extract ");
+ dbgs() << "special:extract ";
break;
case SH_INSERT:
- DEBUG(dbgs() << "special:insert ");
+ dbgs() << "special:insert ";
break;
case SH_NOSWAP_LD:
- DEBUG(dbgs() << "special:load ");
+ dbgs() << "special:load ";
break;
case SH_NOSWAP_ST:
- DEBUG(dbgs() << "special:store ");
+ dbgs() << "special:store ";
break;
case SH_SPLAT:
- DEBUG(dbgs() << "special:splat ");
+ dbgs() << "special:splat ";
break;
case SH_XXPERMDI:
- DEBUG(dbgs() << "special:xxpermdi ");
+ dbgs() << "special:xxpermdi ";
break;
case SH_COPYWIDEN:
- DEBUG(dbgs() << "special:copywiden ");
+ dbgs() << "special:copywiden ";
break;
}
}
if (SwapVector[EntryIdx].WebRejected)
- DEBUG(dbgs() << "rejected ");
+ dbgs() << "rejected ";
if (SwapVector[EntryIdx].WillRemove)
- DEBUG(dbgs() << "remove ");
+ dbgs() << "remove ";
- DEBUG(dbgs() << "\n");
+ dbgs() << "\n";
// For no-asserts builds.
(void)MI;
(void)ID;
}
- DEBUG(dbgs() << "\n");
+ dbgs() << "\n";
}
+#endif
} // end default namespace
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index f8ef142255c8..d6f2672271e9 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -33,7 +33,7 @@ public:
~RISCVAsmBackend() override {}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -71,7 +71,7 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
void RISCVAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
return;
}
diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 4fc69a7fcaba..41be0a2084b3 100644
--- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -44,13 +44,12 @@ static MCRegisterInfo *createRISCVMCRegisterInfo(const Triple &TT) {
static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT) {
- MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
- return MAI;
+ return new RISCVMCAsmInfo(TT);
}
extern "C" void LLVMInitializeRISCVTargetMC() {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
- RegisterMCAsmInfoFn X(*T, createRISCVMCAsmInfo);
+ TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(*T, createRISCVMCRegisterInfo);
TargetRegistry::RegisterMCAsmBackend(*T, createRISCVAsmBackend);
diff --git a/lib/Target/RISCV/RISCVInstrFormats.td b/lib/Target/RISCV/RISCVInstrFormats.td
index 1e9bc3bf9bc5..3fab7122f6f1 100644
--- a/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/lib/Target/RISCV/RISCVInstrFormats.td
@@ -44,8 +44,9 @@ class RISCVInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// Pseudo instructions
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : RISCVInst<outs, ins, asmstr, pattern> {
+ : RISCVInst<outs, ins, "", pattern> {
let isPseudo = 1;
+ let isCodeGenOnly = 1;
}
class FR<bits<7> funct7, bits<3> funct3, bits<7> opcode, dag outs, dag ins,
diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp
index afbbe004186e..a20331cd0a3e 100644
--- a/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -32,7 +32,7 @@ static std::string computeDataLayout(const Triple &TT) {
return "e-m:e-i64:64-n32:64-S128";
} else {
assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
- return "e-m:e-i64:64-n32-S128";
+ return "e-m:e-p:32:32-i64:64-n32-S128";
}
}
@@ -51,7 +51,9 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM), CM, OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()) {}
+ TLOF(make_unique<TargetLoweringObjectFileELF>()) {
+ initAsmInfo();
+}
TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
return new TargetPassConfig(this, PM);
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index e775aa607b53..7e6dff6b7894 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -9,32 +9,49 @@
#include "MCTargetDesc/SparcMCExpr.h"
#include "MCTargetDesc/SparcMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
using namespace llvm;
// The generated AsmMatcher SparcGenAsmMatcher uses "Sparc" as the target
// namespace. But SPARC backend uses "SP" as its namespace.
namespace llvm {
- namespace Sparc {
+namespace Sparc {
+
using namespace SP;
- }
-}
+
+} // end namespace Sparc
+} // end namespace llvm
namespace {
+
class SparcOperand;
-class SparcAsmParser : public MCTargetAsmParser {
+class SparcAsmParser : public MCTargetAsmParser {
MCAsmParser &Parser;
/// @name Auto-generated Match Functions
@@ -95,9 +112,10 @@ public:
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
}
-
};
+} // end anonymous namespace
+
static const MCPhysReg IntRegs[32] = {
Sparc::G0, Sparc::G1, Sparc::G2, Sparc::G3,
Sparc::G4, Sparc::G5, Sparc::G6, Sparc::G7,
@@ -166,6 +184,8 @@ public:
Sparc::C16_C17, Sparc::C18_C19, Sparc::C20_C21, Sparc::C22_C23,
Sparc::C24_C25, Sparc::C26_C27, Sparc::C28_C29, Sparc::C30_C31};
+namespace {
+
/// SparcOperand - Instances of this class represent a parsed Sparc machine
/// instruction.
class SparcOperand : public MCParsedAsmOperand {
@@ -219,6 +239,7 @@ private:
struct ImmOp Imm;
struct MemOp Mem;
};
+
public:
SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -464,7 +485,7 @@ public:
}
};
-} // end namespace
+} // end anonymous namespace
bool SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
@@ -591,9 +612,8 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Implement any new match types added!");
}
-bool SparcAsmParser::
-ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc)
-{
+bool SparcAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
const AsmToken &Tok = Parser.getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
@@ -695,7 +715,7 @@ ParseDirective(AsmToken DirectiveID)
bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
+ while (true) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
return true;
@@ -717,7 +737,6 @@ bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) {
OperandMatchResultTy
SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
-
SMLoc S, E;
unsigned BaseReg = 0;
@@ -824,7 +843,6 @@ SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
OperandMatchResultTy
SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
bool isCall) {
-
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
@@ -910,11 +928,9 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
OperandMatchResultTy
SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
-
// parse (,a|,pn|,pt)+
while (getLexer().is(AsmToken::Comma)) {
-
Parser.Lex(); // Eat the comma
if (!getLexer().is(AsmToken::Identifier))
@@ -929,10 +945,8 @@ SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
return MatchOperand_Success;
}
-bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
- unsigned &RegNo,
- unsigned &RegKind)
-{
+bool SparcAsmParser::matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
+ unsigned &RegKind) {
int64_t intVal = 0;
RegNo = 0;
RegKind = SparcOperand::rk_None;
@@ -1211,8 +1225,7 @@ static bool hasGOTReference(const MCExpr *Expr) {
const SparcMCExpr *
SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
- const MCExpr *subExpr)
-{
+ const MCExpr *subExpr) {
// When in PIC mode, "%lo(...)" and "%hi(...)" behave differently.
// If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is
// actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted
@@ -1236,8 +1249,7 @@ SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
}
bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
- SMLoc &EndLoc)
-{
+ SMLoc &EndLoc) {
AsmToken Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
return false;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 6106a6c32dc8..cc07547ede2c 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -274,7 +274,8 @@ namespace {
SparcAsmBackend(T), OSType(OSType) { }
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsPCRel,
+ MCContext &Ctx) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 280c6d7937b2..3ed09898fb78 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//===- SparcMCAsmInfo.cpp - Sparc asm properties --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,7 +14,10 @@
#include "SparcMCAsmInfo.h"
#include "SparcMCExpr.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/Dwarf.h"
using namespace llvm;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index ad441227600e..5e8d0cb50312 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- SparcMCAsmInfo.h - Sparc asm properties ----------------*- C++ -*--===//
+//===- SparcMCAsmInfo.h - Sparc asm properties -----------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,7 @@
#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
+
class Triple;
class SparcELFMCAsmInfo : public MCAsmInfoELF {
@@ -24,6 +25,7 @@ class SparcELFMCAsmInfo : public MCAsmInfoELF {
public:
explicit SparcELFMCAsmInfo(const Triple &TheTriple);
+
const MCExpr*
getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
MCStreamer &Streamer) const override;
@@ -33,6 +35,6 @@ public:
};
-} // namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SPARC_MCTARGETDESC_SPARCMCASMINFO_H
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 86341c61d1e2..684f66970dbe 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -11,20 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "SparcMCExpr.h"
#include "MCTargetDesc/SparcFixupKinds.h"
+#include "SparcMCExpr.h"
#include "SparcMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -33,17 +42,17 @@ using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
+
class SparcMCCodeEmitter : public MCCodeEmitter {
- SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete;
- void operator=(const SparcMCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
MCContext &Ctx;
public:
SparcMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), Ctx(ctx) {}
-
- ~SparcMCCodeEmitter() override {}
+ SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete;
+ SparcMCCodeEmitter &operator=(const SparcMCCodeEmitter &) = delete;
+ ~SparcMCCodeEmitter() override = default;
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -79,13 +88,8 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-} // end anonymous namespace
-MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new SparcMCCodeEmitter(MCII, Ctx);
-}
+} // end anonymous namespace
void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -121,12 +125,10 @@ void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
-
unsigned SparcMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
-
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
@@ -209,6 +211,7 @@ getBranchPredTargetOpValue(const MCInst &MI, unsigned OpNo,
(MCFixupKind)Sparc::fixup_sparc_br19));
return 0;
}
+
unsigned SparcMCCodeEmitter::
getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
@@ -227,3 +230,9 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SparcGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new SparcMCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 122f830e0dc5..c07cc213c3ed 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -288,11 +288,11 @@ static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
{
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
- if (!MRI->reg_nodbg_empty(reg))
+ if (MRI->isPhysRegUsed(reg))
return false;
for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
- if (!MRI->reg_nodbg_empty(reg))
+ if (MRI->isPhysRegUsed(reg))
return false;
return true;
@@ -305,8 +305,8 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
MachineFrameInfo &MFI = MF.getFrameInfo();
return !(MFI.hasCalls() // has calls
- || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
- || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
+ || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
+ || MRI.isPhysRegUsed(SP::O6) // %SP is used
|| hasFP(MF)); // need %FP
}
@@ -314,11 +314,10 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
// Remap %i[0-7] to %o[0-7].
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
- if (MRI.reg_nodbg_empty(reg))
+ if (!MRI.isPhysRegUsed(reg))
continue;
unsigned mapped_reg = reg - SP::I0 + SP::O0;
- assert(MRI.reg_nodbg_empty(mapped_reg));
// Replace I register with O register.
MRI.replaceRegWith(reg, mapped_reg);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 2ac9aae2471b..455d1ee1564a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1877,6 +1877,7 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
APInt KnownZero2, KnownOne2;
@@ -2177,8 +2178,8 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
Entry.Node = RetPtr;
Entry.Ty = PointerType::getUnqual(RetTy);
if (!Subtarget->is64Bit())
- Entry.isSRet = true;
- Entry.isReturned = false;
+ Entry.IsSRet = true;
+ Entry.IsReturned = false;
Args.push_back(Entry);
RetTyABI = Type::getVoidTy(*DAG.getContext());
}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index e0a421b83712..90d03984060c 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -68,6 +68,7 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index a94717c93456..3f91ca9035a6 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -8,16 +8,31 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
using namespace llvm;
@@ -31,6 +46,7 @@ static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
}
namespace {
+
enum RegisterKind {
GR32Reg,
GRH32Reg,
@@ -56,7 +72,6 @@ enum MemoryKind {
};
class SystemZOperand : public MCParsedAsmOperand {
-public:
private:
enum OperandKind {
KindInvalid,
@@ -140,12 +155,14 @@ public:
SMLoc EndLoc) {
return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
}
+
static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) {
auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc);
Op->Token.Data = Str.data();
Op->Token.Length = Str.size();
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
@@ -153,12 +170,14 @@ public:
Op->Reg.Num = Num;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
Op->Imm = Expr;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm,
@@ -175,6 +194,7 @@ public:
Op->Mem.Length.Reg = LengthReg;
return Op;
}
+
static std::unique_ptr<SystemZOperand>
createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
SMLoc StartLoc, SMLoc EndLoc) {
@@ -503,6 +523,7 @@ public:
return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
}
};
+
} // end anonymous namespace
#define GET_REGISTER_MATCHER
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 1806e015f61e..a281a0aa6bcc 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -7,12 +7,16 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "SystemZ.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -21,17 +25,19 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
+
class SystemZDisassembler : public MCDisassembler {
public:
SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- ~SystemZDisassembler() override {}
+ ~SystemZDisassembler() override = default;
DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &VStream,
raw_ostream &CStream) const override;
};
+
} // end anonymous namespace
static MCDisassembler *createSystemZDisassembler(const Target &T,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 1207c7b327e8..6cd12e13e220 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax ===//
+//===- SystemZInstPrinter.cpp - Convert SystemZ MCInst to assembly syntax -===//
//
// The LLVM Compiler Infrastructure
//
@@ -10,10 +10,13 @@
#include "SystemZInstPrinter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 6336f5ee0efa..d65c661545eb 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -15,8 +15,10 @@
#define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
+#include <cstdint>
namespace llvm {
+
class MCOperand;
class SystemZInstPrinter : public MCInstPrinter {
@@ -70,6 +72,7 @@ private:
// This forms part of the instruction name rather than the operand list.
void printCond4Operand(const MCInst *MI, int OpNum, raw_ostream &O);
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 9192448afd04..23b7d5b5d501 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -51,7 +51,7 @@ public:
}
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
bool mayNeedRelaxation(const MCInst &Inst) const override {
return false;
}
@@ -91,7 +91,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
MCFixupKind Kind = Fixup.getKind();
unsigned Offset = Fixup.getOffset();
unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 7082abad716d..092eb4011adc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -11,20 +11,28 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
namespace {
+
class SystemZMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
MCContext &Ctx;
@@ -34,7 +42,7 @@ public:
: MCII(mcii), Ctx(ctx) {
}
- ~SystemZMCCodeEmitter() override {}
+ ~SystemZMCCodeEmitter() override = default;
// OVerride MCCodeEmitter.
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -137,13 +145,8 @@ private:
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
};
-} // end anonymous namespace
-MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new SystemZMCCodeEmitter(MCII, Ctx);
-}
+} // end anonymous namespace
void SystemZMCCodeEmitter::
encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -282,3 +285,9 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
#define ENABLE_INSTR_PREDICATE_VERIFIER
#include "SystemZGenMCCodeEmitter.inc"
+
+MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new SystemZMCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 43a96e84289c..3de570bf30cc 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -7,35 +7,38 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
+
class SystemZObjectWriter : public MCELFObjectTargetWriter {
public:
SystemZObjectWriter(uint8_t OSABI);
-
- ~SystemZObjectWriter() override;
+ ~SystemZObjectWriter() override = default;
protected:
// Override MCELFObjectTargetWriter.
unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup, bool IsPCRel) const override;
};
+
} // end anonymous namespace
SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(/*Is64Bit=*/true, OSABI, ELF::EM_S390,
/*HasRelocationAddend=*/ true) {}
-SystemZObjectWriter::~SystemZObjectWriter() {
-}
-
// Return the relocation type for an absolute value of MCFixupKind Kind.
static unsigned getAbsoluteReloc(unsigned Kind) {
switch (Kind) {
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index b4c843f658aa..d70f9e90cd3e 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -13,15 +13,23 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -33,11 +41,11 @@ STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
namespace {
+
// Represents the references to a particular register in one or more
// instructions.
struct Reference {
- Reference()
- : Def(false), Use(false) {}
+ Reference() = default;
Reference &operator|=(const Reference &Other) {
Def |= Other.Def;
@@ -49,15 +57,16 @@ struct Reference {
// True if the register is defined or used in some form, either directly or
// via a sub- or super-register.
- bool Def;
- bool Use;
+ bool Def = false;
+ bool Use = false;
};
class SystemZElimCompare : public MachineFunctionPass {
public:
static char ID;
+
SystemZElimCompare(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {}
+ : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "SystemZ Comparison Elimination";
@@ -65,6 +74,7 @@ public:
bool processBlock(MachineBasicBlock &MBB);
bool runOnMachineFunction(MachineFunction &F) override;
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -84,16 +94,13 @@ private:
bool fuseCompareOperations(MachineInstr &Compare,
SmallVectorImpl<MachineInstr *> &CCUsers);
- const SystemZInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const SystemZInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
};
char SystemZElimCompare::ID = 0;
-} // end anonymous namespace
-FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
- return new SystemZElimCompare(TM);
-}
+} // end anonymous namespace
// Return true if CC is live out of MBB.
static bool isCCLiveOut(MachineBasicBlock &MBB) {
@@ -167,7 +174,7 @@ static unsigned getCompareSourceReg(MachineInstr &Compare) {
reg = Compare.getOperand(0).getReg();
else if (isLoadAndTestAsCmp(Compare))
reg = Compare.getOperand(1).getReg();
- assert (reg);
+ assert(reg);
return reg;
}
@@ -216,9 +223,7 @@ bool SystemZElimCompare::convertToBRCT(
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(BRCT));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
- MIB.addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(Target);
+ MIB.add(MI.getOperand(0)).add(MI.getOperand(1)).add(Target);
// Add a CC def to BRCT(G), since we may have to split them again if the
// branch displacement overflows. BRCTH has a 32-bit displacement, so
// this is not necessary there.
@@ -261,10 +266,10 @@ bool SystemZElimCompare::convertToLoadAndTrap(
Branch->RemoveOperand(0);
Branch->setDesc(TII->get(LATOpcode));
MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1))
- .addOperand(MI.getOperand(2))
- .addOperand(MI.getOperand(3));
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3));
MI.eraseFromParent();
return true;
}
@@ -368,10 +373,8 @@ static bool isCompareZero(MachineInstr &Compare) {
return true;
default:
-
if (isLoadAndTestAsCmp(Compare))
return true;
-
return Compare.getNumExplicitOperands() == 2 &&
Compare.getOperand(1).isImm() && Compare.getOperand(1).getImm() == 0;
}
@@ -502,15 +505,15 @@ bool SystemZElimCompare::fuseCompareOperations(
Branch->setDesc(TII->get(FusedOpcode));
MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
for (unsigned I = 0; I < SrcNOps; I++)
- MIB.addOperand(Compare.getOperand(I));
- MIB.addOperand(CCMask);
+ MIB.add(Compare.getOperand(I));
+ MIB.add(CCMask);
if (Type == SystemZII::CompareAndBranch) {
// Only conditional branches define CC, as they may be converted back
// to a non-fused branch because of a long displacement. Conditional
// returns don't have that problem.
- MIB.addOperand(Target)
- .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+ MIB.add(Target).addReg(SystemZ::CC,
+ RegState::ImplicitDefine | RegState::Dead);
}
if (Type == SystemZII::CompareAndSibcall)
@@ -573,3 +576,7 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
return Changed;
}
+
+FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
+ return new SystemZElimCompare(TM);
+}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2d0a06af18ae..84d3c7bed50a 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, VT, Custom);
// Only z196 and above have native support for conversions to unsigned.
+ // On z10, promoting to i64 doesn't generate an inexact condition for
+ // values that are outside the i32 range but in the i64 range, so use
+ // the default expansion.
if (!Subtarget.hasFPExtension())
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}
@@ -344,9 +347,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// There should be no need to check for float types other than v2f64
// since <2 x f32> isn't a legal type.
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
}
// Handle floating-point types.
@@ -2789,8 +2796,9 @@ SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
// but we need this case for bitcasts that are created during lowering
// and which are then lowered themselves.
if (auto *LoadN = dyn_cast<LoadSDNode>(In))
- return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
- LoadN->getMemOperand());
+ if (ISD::isNormalLoad(LoadN))
+ return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
+ LoadN->getMemOperand());
if (InVT == MVT::i32 && ResVT == MVT::f32) {
SDValue In64;
@@ -3802,7 +3810,7 @@ namespace {
struct GeneralShuffle {
GeneralShuffle(EVT vt) : VT(vt) {}
void addUndef();
- void add(SDValue, unsigned);
+ bool add(SDValue, unsigned);
SDValue getNode(SelectionDAG &, const SDLoc &);
// The operands of the shuffle.
@@ -3828,8 +3836,10 @@ void GeneralShuffle::addUndef() {
// Add an extra element to the shuffle, taking it from element Elem of Op.
// A null Op indicates a vector input whose value will be calculated later;
// there is at most one such input per shuffle and it always has the same
-// type as the result.
-void GeneralShuffle::add(SDValue Op, unsigned Elem) {
+// type as the result. Aborts and returns false if the source vector elements
+// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
+// LLVM they become implicitly extended, but this is rare and not optimized.
+bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
// The source vector can have wider elements than the result,
@@ -3837,8 +3847,12 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
// We want the least significant part.
EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
- assert(FromBytesPerElement >= BytesPerElement &&
- "Invalid EXTRACT_VECTOR_ELT");
+
+ // Return false if the source elements are smaller than their destination
+ // elements.
+ if (FromBytesPerElement < BytesPerElement)
+ return false;
+
unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
(FromBytesPerElement - BytesPerElement));
@@ -3856,13 +3870,13 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
break;
if (NewByte < 0) {
addUndef();
- return;
+ return true;
}
Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
Byte = unsigned(NewByte) % SystemZ::VectorBytes;
} else if (Op.isUndef()) {
addUndef();
- return;
+ return true;
} else
break;
}
@@ -3879,6 +3893,8 @@ void GeneralShuffle::add(SDValue Op, unsigned Elem) {
unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
for (unsigned I = 0; I < BytesPerElement; ++I)
Bytes.push_back(Base + I);
+
+ return true;
}
// Return SDNodes for the completed shuffle.
@@ -4110,12 +4126,14 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op.getOperand(1).getOpcode() == ISD::Constant) {
unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- GS.add(Op.getOperand(0), Elem);
+ if (!GS.add(Op.getOperand(0), Elem))
+ return SDValue();
FoundOne = true;
} else if (Op.isUndef()) {
GS.addUndef();
} else {
- GS.add(SDValue(), ResidueOps.size());
+ if (!GS.add(SDValue(), ResidueOps.size()))
+ return SDValue();
ResidueOps.push_back(BVN->getOperand(I));
}
}
@@ -4354,9 +4372,9 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
int Elt = VSN->getMaskElt(I);
if (Elt < 0)
GS.addUndef();
- else
- GS.add(Op.getOperand(unsigned(Elt) / NumElements),
- unsigned(Elt) % NumElements);
+ else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
+ unsigned(Elt) % NumElements))
+ return SDValue();
}
return GS.getNode(DAG, SDLoc(VSN));
}
@@ -4722,9 +4740,12 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
// Return true if VT is a vector whose elements are a whole number of bytes
-// in width.
-static bool canTreatAsByteVector(EVT VT) {
- return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0;
+// in width. Also check for presence of vector support.
+bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
+ if (!Subtarget.hasVector())
+ return false;
+
+ return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
}
// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
@@ -4986,6 +5007,10 @@ SDValue SystemZTargetLowering::combineSTORE(
SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
SDNode *N, DAGCombinerInfo &DCI) const {
+
+ if (!Subtarget.hasVector())
+ return SDValue();
+
// Try to simplify a vector extraction.
if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
SDValue Op0 = N->getOperand(0);
@@ -5233,7 +5258,7 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
- .addOperand(Base)
+ .add(Base)
.addImm(0)
.addReg(0);
return Reg;
@@ -5322,8 +5347,11 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
if (Invert)
CCMask ^= CCValid;
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
- .addReg(SrcReg).addOperand(Base).addImm(Disp)
- .addImm(CCValid).addImm(CCMask);
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addImm(CCValid)
+ .addImm(CCMask);
MI.eraseFromParent();
return MBB;
}
@@ -5350,7 +5378,10 @@ MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
// # fallthrough to JoinMBB
MBB = FalseMBB;
BuildMI(MBB, DL, TII->get(StoreOpcode))
- .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg);
+ .addReg(SrcReg)
+ .add(Base)
+ .addImm(Disp)
+ .addReg(IndexReg);
MBB->addSuccessor(JoinMBB);
MI.eraseFromParent();
@@ -5415,8 +5446,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
// %OrigVal = L Disp(%Base)
// # fall through to LoopMMB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5437,8 +5467,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
if (Invert) {
// Perform the operation normally and then invert every bit of the field.
unsigned Tmp = MRI.createVirtualRegister(RC);
- BuildMI(MBB, DL, TII->get(BinOpcode), Tmp)
- .addReg(RotatedOldVal).addOperand(Src2);
+ BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
if (BitSize <= 32)
// XILF with the upper BitSize bits set.
BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
@@ -5454,7 +5483,8 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
} else if (BinOpcode)
// A simply binary operation.
BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
- .addReg(RotatedOldVal).addOperand(Src2);
+ .addReg(RotatedOldVal)
+ .add(Src2);
else if (IsSubWord)
// Use RISBG to rotate Src2 into position and use it to replace the
// field in RotatedOldVal.
@@ -5465,7 +5495,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
- .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5533,8 +5566,7 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
// %OrigVal = L Disp(%Base)
// # fall through to LoopMMB
MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(LOpcode), OrigVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5581,7 +5613,10 @@ MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
.addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
- .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(NewVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5642,7 +5677,9 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
// # fall through to LoopMMB
MBB = StartMBB;
BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
- .addOperand(Base).addImm(Disp).addReg(0);
+ .add(Base)
+ .addImm(Disp)
+ .addReg(0);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
@@ -5696,7 +5733,10 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
.addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
- .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp);
+ .addReg(OldVal)
+ .addReg(StoreVal)
+ .add(Base)
+ .addImm(Disp);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
@@ -5869,7 +5909,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (!isUInt<12>(DestDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(DestBase)
+ .add(DestBase)
.addImm(DestDisp)
.addReg(0);
DestBase = MachineOperand::CreateReg(Reg, false);
@@ -5878,15 +5918,18 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
if (!isUInt<12>(SrcDisp)) {
unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg)
- .addOperand(SrcBase)
+ .add(SrcBase)
.addImm(SrcDisp)
.addReg(0);
SrcBase = MachineOperand::CreateReg(Reg, false);
SrcDisp = 0;
}
BuildMI(*MBB, MI, DL, TII->get(Opcode))
- .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength)
- .addOperand(SrcBase).addImm(SrcDisp);
+ .add(DestBase)
+ .addImm(DestDisp)
+ .addImm(ThisLength)
+ .add(SrcBase)
+ .addImm(SrcDisp);
DestDisp += ThisLength;
SrcDisp += ThisLength;
Length -= ThisLength;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 7a21a474c119..7d92a7355877 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -537,6 +537,7 @@ private:
unsigned UnpackHigh) const;
SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
+ bool canTreatAsByteVector(EVT VT) const;
SDValue combineExtract(const SDLoc &DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
unsigned Index, DAGCombinerInfo &DCI,
bool Force) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 3565d5f2c49c..c8ff9558cc88 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -11,12 +11,33 @@
//
//===----------------------------------------------------------------------===//
-#include "SystemZInstrInfo.h"
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
#include "SystemZInstrBuilder.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
@@ -58,12 +79,25 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
MachineInstr *EarlierMI = MF.CloneMachineInstr(&*MI);
MBB->insert(MI, EarlierMI);
- // Set up the two 64-bit registers.
+ // Set up the two 64-bit registers and remember super reg and its flags.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
+ unsigned Reg128 = LowRegOp.getReg();
+ unsigned Reg128Killed = getKillRegState(LowRegOp.isKill());
+ unsigned Reg128Undef = getUndefRegState(LowRegOp.isUndef());
HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_h64));
LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_l64));
+ if (MI->mayStore()) {
+ // Add implicit uses of the super register in case one of the subregs is
+ // undefined. We could track liveness and skip storing an undefined
+ // subreg, but this is hopefully rare (discovered with llvm-stress).
+ // If Reg128 was killed, set kill flag on MI.
+ unsigned Reg128UndefImpl = (Reg128Undef | RegState::Implicit);
+ MachineInstrBuilder(MF, EarlierMI).addReg(Reg128, Reg128UndefImpl);
+ MachineInstrBuilder(MF, MI).addReg(Reg128, (Reg128UndefImpl | Reg128Killed));
+ }
+
// The address in the first (high) instruction is already correct.
// Adjust the offset in the second (low) instruction.
MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
@@ -131,7 +165,8 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr &MI, unsigned LowOpcode,
MI.setDesc(get(LowOpcodeK));
else {
emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, SrcReg,
- SystemZ::LR, 32, MI.getOperand(1).isKill());
+ SystemZ::LR, 32, MI.getOperand(1).isKill(),
+ MI.getOperand(1).isUndef());
MI.setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
MI.getOperand(1).setReg(DestReg);
MI.tieOperands(0, 1);
@@ -185,9 +220,15 @@ void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
// are low registers, otherwise use RISB[LH]G.
void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const {
- emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
- MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
- Size, MI.getOperand(1).isKill());
+ MachineInstrBuilder MIB =
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(),
+ MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), LowOpcode,
+ Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef());
+
+ // Keep the remaining operands as-is.
+ for (unsigned I = 2; I < MI.getNumOperands(); ++I)
+ MIB.add(MI.getOperand(I));
+
MI.eraseFromParent();
}
@@ -227,11 +268,13 @@ void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
// are low registers, otherwise use RISB[LH]G. Size is the number of bits
// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
// KillSrc is true if this move is the last use of SrcReg.
-void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg,
- unsigned SrcReg, unsigned LowLowOpcode,
- unsigned Size, bool KillSrc) const {
+MachineInstrBuilder
+SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg,
+ unsigned SrcReg, unsigned LowLowOpcode,
+ unsigned Size, bool KillSrc,
+ bool UndefSrc) const {
unsigned Opcode;
bool DestIsHigh = isHighReg(DestReg);
bool SrcIsHigh = isHighReg(SrcReg);
@@ -242,18 +285,16 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
else if (!DestIsHigh && SrcIsHigh)
Opcode = SystemZ::RISBLH;
else {
- BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
+ return BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc));
}
unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
- BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
+ return BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
.addReg(DestReg, RegState::Undef)
- .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SrcReg, getKillRegState(KillSrc) | getUndefRegState(UndefSrc))
.addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
}
-
MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
unsigned OpIdx1,
@@ -282,7 +323,6 @@ MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
}
}
-
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
@@ -586,7 +626,6 @@ bool SystemZInstrInfo::optimizeCompareInstr(
removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
-
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
unsigned TrueReg, unsigned FalseReg,
@@ -640,6 +679,12 @@ void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
else {
Opc = SystemZ::LOCR;
MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
+ unsigned TReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ unsigned FReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), TReg).addReg(TrueReg);
+ BuildMI(MBB, I, DL, get(TargetOpcode::COPY), FReg).addReg(FalseReg);
+ TrueReg = TReg;
+ FalseReg = FReg;
}
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
Opc = SystemZ::LOCGR;
@@ -706,7 +751,7 @@ bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
return true;
}
-bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
+bool SystemZInstrInfo::isPredicable(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (Opcode == SystemZ::Return ||
Opcode == SystemZ::Trap ||
@@ -780,10 +825,11 @@ bool SystemZInstrInfo::PredicateInstruction(
MI.RemoveOperand(0);
MI.setDesc(get(SystemZ::CallBRCL));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
- .addImm(CCValid).addImm(CCMask)
- .addOperand(FirstOp)
- .addRegMask(RegMask)
- .addReg(SystemZ::CC, RegState::Implicit);
+ .addImm(CCValid)
+ .addImm(CCMask)
+ .add(FirstOp)
+ .addRegMask(RegMask)
+ .addReg(SystemZ::CC, RegState::Implicit);
return true;
}
if (Opcode == SystemZ::CallBR) {
@@ -813,7 +859,8 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
- emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc);
+ emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc,
+ false);
return;
}
@@ -888,15 +935,19 @@ static bool isSimpleBD12Move(const MachineInstr *MI, unsigned Flag) {
}
namespace {
+
struct LogicOp {
- LogicOp() : RegSize(0), ImmLSB(0), ImmSize(0) {}
+ LogicOp() = default;
LogicOp(unsigned regSize, unsigned immLSB, unsigned immSize)
: RegSize(regSize), ImmLSB(immLSB), ImmSize(immSize) {}
explicit operator bool() const { return RegSize; }
- unsigned RegSize, ImmLSB, ImmSize;
+ unsigned RegSize = 0;
+ unsigned ImmLSB = 0;
+ unsigned ImmSize = 0;
};
+
} // end anonymous namespace
static LogicOp interpretAndImmediate(unsigned Opcode) {
@@ -976,12 +1027,12 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineInstrBuilder MIB(
*MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
/*NoImplicit=*/true));
- MIB.addOperand(Dest);
+ MIB.add(Dest);
// Keep the kill state, but drop the tied flag.
MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
// Keep the remaining operands as-is.
for (unsigned I = 2; I < NumOps; ++I)
- MIB.addOperand(MI.getOperand(I));
+ MIB.add(MI.getOperand(I));
MBB->insert(MI, MIB);
return finishConvertToThreeAddress(&MI, MIB, LV);
}
@@ -1009,7 +1060,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineOperand &Src = MI.getOperand(1);
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpcode))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addReg(Src.getReg(), getKillRegState(Src.isKill()),
Src.getSubReg())
@@ -1040,7 +1091,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MCRegUnitIterator CCUnit(SystemZ::CC, TRI);
LiveRange &CCLiveRange = LIS->getRegUnit(*CCUnit);
++CCUnit;
- assert (!CCUnit.isValid() && "CC only has one reg unit.");
+ assert(!CCUnit.isValid() && "CC only has one reg unit.");
SlotIndex MISlot =
LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
if (!CCLiveRange.liveAt(MISlot)) {
@@ -1091,7 +1142,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
unsigned StoreOpcode = Op1IsGPR ? SystemZ::STG : SystemZ::STD;
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(StoreOpcode))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addFrameIndex(FrameIndex)
.addImm(0)
.addReg(0);
@@ -1100,12 +1151,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
// destination register instead.
if (OpNum == 1) {
unsigned LoadOpcode = Op0IsGPR ? SystemZ::LG : SystemZ::LD;
- unsigned Dest = MI.getOperand(0).getReg();
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
- get(LoadOpcode), Dest)
- .addFrameIndex(FrameIndex)
- .addImm(0)
- .addReg(0);
+ get(LoadOpcode))
+ .add(MI.getOperand(0))
+ .addFrameIndex(FrameIndex)
+ .addImm(0)
+ .addReg(0);
}
}
@@ -1132,7 +1183,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
.addFrameIndex(FrameIndex)
.addImm(0)
.addImm(Size)
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(MI.getOperand(2).getImm())
.addMemOperand(MMO);
}
@@ -1140,7 +1191,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
if (isSimpleBD12Move(&MI, SystemZII::SimpleBDXStore)) {
return BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(),
get(SystemZ::MVC))
- .addOperand(MI.getOperand(1))
+ .add(MI.getOperand(1))
.addImm(MI.getOperand(2).getImm())
.addImm(Size)
.addFrameIndex(FrameIndex)
@@ -1164,7 +1215,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode));
for (unsigned I = 0; I < OpNum; ++I)
- MIB.addOperand(MI.getOperand(I));
+ MIB.add(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 794b193a501e..b8be1f5f3921 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -16,16 +16,22 @@
#include "SystemZ.h"
#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
#define GET_INSTRINFO_HEADER
#include "SystemZGenInstrInfo.inc"
namespace llvm {
-class SystemZTargetMachine;
+class SystemZSubtarget;
namespace SystemZII {
+
enum {
// See comments in SystemZInstrFormats.td.
SimpleBDXLoad = (1 << 0),
@@ -43,12 +49,15 @@ enum {
CCMaskLast = (1 << 19),
IsLogical = (1 << 20)
};
+
static inline unsigned getAccessSize(unsigned int Flags) {
return (Flags & AccessSizeMask) >> AccessSizeShift;
}
+
static inline unsigned getCCValues(unsigned int Flags) {
return (Flags & CCValuesMask) >> CCValuesShift;
}
+
static inline unsigned getCompareZeroCCMask(unsigned int Flags) {
return (Flags & CompareZeroCCMaskMask) >> CompareZeroCCMaskShift;
}
@@ -64,6 +73,7 @@ enum {
// @INDNTPOFF
MO_INDNTPOFF = (2 << 0)
};
+
// Classifies a branch.
enum BranchType {
// An instruction that branches on the current value of CC.
@@ -93,6 +103,7 @@ enum BranchType {
// the result is nonzero.
BranchCTG
};
+
// Information about a branch instruction.
struct Branch {
// The type of the branch.
@@ -111,6 +122,7 @@ struct Branch {
const MachineOperand *target)
: Type(type), CCValid(ccValid), CCMask(ccMask), Target(target) {}
};
+
// Kinds of fused compares in compare-and-* instructions. Together with type
// of the converted compare, this identifies the compare-and-*
// instruction.
@@ -127,9 +139,9 @@ enum FusedCompareType {
// Trap
CompareAndTrap
};
+
} // end namespace SystemZII
-class SystemZSubtarget;
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
SystemZSubtarget &STI;
@@ -149,9 +161,13 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
- void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
- unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
+
+ MachineInstrBuilder
+ emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
+ unsigned LowLowOpcode, unsigned Size, bool KillSrc,
+ bool UndefSrc) const;
+
virtual void anchor();
protected:
@@ -203,7 +219,7 @@ public:
unsigned FalseReg) const override;
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const override;
- bool isPredicable(MachineInstr &MI) const override;
+ bool isPredicable(const MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,
BranchProbability Probability) const override;
@@ -304,6 +320,7 @@ public:
areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
AliasAnalysis *AA = nullptr) const override;
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZINSTRINFO_H
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td
index 738ea7a33729..0158fe6aec08 100644
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -56,17 +56,28 @@ def : VectorExtractSubreg<v4i32, VLGVF>;
//===----------------------------------------------------------------------===//
let Predicates = [FeatureVector] in {
- // Generate byte mask.
- def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
- def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
- def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
-
- // Generate mask.
- def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
- def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
- def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
- def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
- def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
+ isReMaterializable = 1 in {
+
+ // Generate byte mask.
+ def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
+ def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
+ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+
+ // Generate mask.
+ def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
+ def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
+ def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
+ def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
+ def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
+
+ // Replicate immediate.
+ def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
+ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
+ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
+ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
+ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
+ }
// Load element immediate.
//
@@ -86,13 +97,6 @@ let Predicates = [FeatureVector] in {
def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
v128g, v128g, imm64sx16, imm32zx1>;
}
-
- // Replicate immediate.
- def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
- def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
- def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
- def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
- def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 14ff6afbd4ae..791f0334e0f1 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -53,15 +53,21 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
#include "SystemZTargetMachine.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
@@ -70,72 +76,72 @@ using namespace llvm;
STATISTIC(LongBranches, "Number of long branches.");
namespace {
+
// Represents positional information about a basic block.
struct MBBInfo {
// The address that we currently assume the block has.
- uint64_t Address;
+ uint64_t Address = 0;
// The size of the block in bytes, excluding terminators.
// This value never changes.
- uint64_t Size;
+ uint64_t Size = 0;
// The minimum alignment of the block, as a log2 value.
// This value never changes.
- unsigned Alignment;
+ unsigned Alignment = 0;
// The number of terminators in this block. This value never changes.
- unsigned NumTerminators;
+ unsigned NumTerminators = 0;
- MBBInfo()
- : Address(0), Size(0), Alignment(0), NumTerminators(0) {}
+ MBBInfo() = default;
};
// Represents the state of a block terminator.
struct TerminatorInfo {
// If this terminator is a relaxable branch, this points to the branch
// instruction, otherwise it is null.
- MachineInstr *Branch;
+ MachineInstr *Branch = nullptr;
// The address that we currently assume the terminator has.
- uint64_t Address;
+ uint64_t Address = 0;
// The current size of the terminator in bytes.
- uint64_t Size;
+ uint64_t Size = 0;
// If Branch is nonnull, this is the number of the target block,
// otherwise it is unused.
- unsigned TargetBlock;
+ unsigned TargetBlock = 0;
// If Branch is nonnull, this is the length of the longest relaxed form,
// otherwise it is zero.
- unsigned ExtraRelaxSize;
+ unsigned ExtraRelaxSize = 0;
- TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0),
- ExtraRelaxSize(0) {}
+ TerminatorInfo() = default;
};
// Used to keep track of the current position while iterating over the blocks.
struct BlockPosition {
// The address that we assume this position has.
- uint64_t Address;
+ uint64_t Address = 0;
// The number of low bits in Address that are known to be the same
// as the runtime address.
unsigned KnownBits;
- BlockPosition(unsigned InitialAlignment)
- : Address(0), KnownBits(InitialAlignment) {}
+ BlockPosition(unsigned InitialAlignment) : KnownBits(InitialAlignment) {}
};
class SystemZLongBranch : public MachineFunctionPass {
public:
static char ID;
+
SystemZLongBranch(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(nullptr) {}
+ : MachineFunctionPass(ID) {}
StringRef getPassName() const override { return "SystemZ Long Branch"; }
bool runOnMachineFunction(MachineFunction &F) override;
+
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
@@ -155,7 +161,7 @@ private:
void relaxBranch(TerminatorInfo &Terminator);
void relaxBranches();
- const SystemZInstrInfo *TII;
+ const SystemZInstrInfo *TII = nullptr;
MachineFunction *MF;
SmallVector<MBBInfo, 16> MBBs;
SmallVector<TerminatorInfo, 16> Terminators;
@@ -165,11 +171,8 @@ char SystemZLongBranch::ID = 0;
const uint64_t MaxBackwardRange = 0x10000;
const uint64_t MaxForwardRange = 0xfffe;
-} // end anonymous namespace
-FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
- return new SystemZLongBranch(TM);
-}
+} // end anonymous namespace
// Position describes the state immediately before Block. Update Block
// accordingly and move Position to the end of the block's non-terminator
@@ -354,13 +357,13 @@ void SystemZLongBranch::splitBranchOnCount(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
BuildMI(*MBB, MI, DL, TII->get(AddOpcode))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1))
- .addImm(-1);
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1))
+ .addImm(-1);
MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
- .addImm(SystemZ::CCMASK_ICMP)
- .addImm(SystemZ::CCMASK_CMP_NE)
- .addOperand(MI->getOperand(2));
+ .addImm(SystemZ::CCMASK_ICMP)
+ .addImm(SystemZ::CCMASK_CMP_NE)
+ .add(MI->getOperand(2));
// The implicit use of CC is a killing use.
BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
MI->eraseFromParent();
@@ -373,12 +376,12 @@ void SystemZLongBranch::splitCompareBranch(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
DebugLoc DL = MI->getDebugLoc();
BuildMI(*MBB, MI, DL, TII->get(CompareOpcode))
- .addOperand(MI->getOperand(0))
- .addOperand(MI->getOperand(1));
+ .add(MI->getOperand(0))
+ .add(MI->getOperand(1));
MachineInstr *BRCL = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BRCL))
- .addImm(SystemZ::CCMASK_ICMP)
- .addOperand(MI->getOperand(2))
- .addOperand(MI->getOperand(3));
+ .addImm(SystemZ::CCMASK_ICMP)
+ .add(MI->getOperand(2))
+ .add(MI->getOperand(3));
// The implicit use of CC is a killing use.
BRCL->addRegisterKilled(SystemZ::CC, &TII->getRegisterInfo());
MI->eraseFromParent();
@@ -463,3 +466,7 @@ bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
relaxBranches();
return true;
}
+
+FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
+ return new SystemZLongBranch(TM);
+}
diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.h b/lib/Target/SystemZ/SystemZMachineScheduler.h
index b919758b70e7..12357e0348a9 100644
--- a/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -1,4 +1,4 @@
-//==-- SystemZMachineScheduler.h - SystemZ Scheduler Interface -*- C++ -*---==//
+//==- SystemZMachineScheduler.h - SystemZ Scheduler Interface ----*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -14,10 +14,10 @@
// usage of processor resources.
//===----------------------------------------------------------------------===//
-#include "SystemZInstrInfo.h"
#include "SystemZHazardRecognizer.h"
#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include <set>
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
@@ -28,29 +28,29 @@ namespace llvm {
/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
class SystemZPostRASchedStrategy : public MachineSchedStrategy {
- ScheduleDAGMI *DAG;
+ ScheduleDAGMI *DAG;
/// A candidate during instruction evaluation.
struct Candidate {
- SUnit *SU;
+ SUnit *SU = nullptr;
/// The decoding cost.
- int GroupingCost;
+ int GroupingCost = 0;
/// The processor resources cost.
- int ResourcesCost;
+ int ResourcesCost = 0;
- Candidate() : SU(nullptr), GroupingCost(0), ResourcesCost(0) {}
+ Candidate() = default;
Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec);
// Compare two candidates.
bool operator<(const Candidate &other);
// Check if this node is free of cost ("as good as any").
- bool inline noCost() {
+ bool noCost() const {
return (GroupingCost <= 0 && !ResourcesCost);
}
- };
+ };
// A sorter for the Available set that makes sure that SUs are considered
// in the best order.
@@ -83,7 +83,7 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
// region.
SystemZHazardRecognizer HazardRec;
- public:
+public:
SystemZPostRASchedStrategy(const MachineSchedContext *C);
/// PostRA scheduling does not track pressure.
@@ -107,6 +107,6 @@ class SystemZPostRASchedStrategy : public MachineSchedStrategy {
void releaseBottomNode(SUnit *SU) override {};
};
-} // namespace llvm
+} // end namespace llvm
-#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H */
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index e97d61d8355d..7aee6f52e9a7 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -855,8 +855,8 @@ def : InstRW<[VecXsPm], (instregex "VZERO$")>;
def : InstRW<[VecXsPm], (instregex "VONE$")>;
def : InstRW<[VecXsPm], (instregex "VGBM$")>;
def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>;
-def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
//===----------------------------------------------------------------------===//
// Vector: Loads
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 83882fc0310a..263aff8b7bfb 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -167,10 +167,10 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
MI.RemoveOperand(0);
MI.setDesc(TII->get(Opcode));
MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
- .addOperand(Dest)
- .addOperand(Mode)
- .addOperand(Src)
- .addOperand(Suppress);
+ .add(Dest)
+ .add(Mode)
+ .add(Src)
+ .add(Suppress);
return true;
}
return false;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 33fdb8f90825..ede5005fa491 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -7,14 +7,25 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZ.h"
+#include "SystemZMachineScheduler.h"
#include "SystemZTargetMachine.h"
#include "SystemZTargetTransformInfo.h"
-#include "SystemZMachineScheduler.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include <string>
using namespace llvm;
@@ -48,7 +59,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
static std::string computeDataLayout(const Triple &TT, StringRef CPU,
StringRef FS) {
bool VectorABI = UsesVectorABI(CPU, FS);
- std::string Ret = "";
+ std::string Ret;
// Big endian.
Ret += "E";
@@ -96,14 +107,15 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, computeDataLayout(TT, CPU, FS), TT, CPU, FS, Options,
getEffectiveRelocModel(RM), CM, OL),
- TLOF(make_unique<TargetLoweringObjectFileELF>()),
+ TLOF(llvm::make_unique<TargetLoweringObjectFileELF>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
-SystemZTargetMachine::~SystemZTargetMachine() {}
+SystemZTargetMachine::~SystemZTargetMachine() = default;
namespace {
+
/// SystemZ Code Generator Pass Configuration Options.
class SystemZPassConfig : public TargetPassConfig {
public:
@@ -116,7 +128,8 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
- return new ScheduleDAGMI(C, make_unique<SystemZPostRASchedStrategy>(C),
+ return new ScheduleDAGMI(C,
+ llvm::make_unique<SystemZPostRASchedStrategy>(C),
/*RemoveKillFlags=*/true);
}
@@ -126,6 +139,7 @@ public:
void addPreSched2() override;
void addPreEmitPass() override;
};
+
} // end anonymous namespace
void SystemZPassConfig::addIRPasses() {
@@ -157,7 +171,6 @@ void SystemZPassConfig::addPreSched2() {
}
void SystemZPassConfig::addPreEmitPass() {
-
// Do instruction shortening before compare elimination because some
// vector instructions will be shortened into opcodes that compare
// elimination recognizes.
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 69cf9bc6e525..a10ca64fa632 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -1,4 +1,4 @@
-//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//=- SystemZTargetMachine.h - Define TargetMachine for SystemZ ----*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -16,15 +16,18 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
#include "SystemZSubtarget.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
-class TargetFrameLowering;
-
class SystemZTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- SystemZSubtarget Subtarget;
+ SystemZSubtarget Subtarget;
public:
SystemZTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -34,20 +37,22 @@ public:
~SystemZTargetMachine() override;
const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
const SystemZSubtarget *getSubtargetImpl(const Function &) const override {
return &Subtarget;
}
+
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
bool targetSchedulesPostRAScheduling() const override { return true; };
-
};
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETMACHINE_H
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index b10c0e09a0d4..e74c9a80515d 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -259,11 +259,8 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
}
}
if (isa<StoreInst>(&I)) {
- NumStores++;
Type *MemAccessTy = I.getOperand(0)->getType();
- if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
- (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
- NumStores++; // 128 bit fp/int stores get split.
+ NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, 0, 0);
}
}
@@ -313,3 +310,547 @@ unsigned SystemZTTIImpl::getRegisterBitWidth(bool Vector) {
return 0;
}
+int SystemZTTIImpl::getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args) {
+
+ // TODO: return a good value for BB-VECTORIZER that includes the
+ // immediate loads, which we do not want to count for the loop
+ // vectorizer, since they are hopefully hoisted out of the loop. This
+ // would require a new parameter 'InLoop', but not sure if constant
+ // args are common enough to motivate this.
+
+ unsigned ScalarBits = Ty->getScalarSizeInBits();
+
+ if (Ty->isVectorTy()) {
+ assert (ST->hasVector() && "getArithmeticInstrCost() called with vector type.");
+ unsigned VF = Ty->getVectorNumElements();
+ unsigned NumVectors = getNumberOfParts(Ty);
+
+ // These vector operations are custom handled, but are still supported
+ // with one instruction per vector, regardless of element size.
+ if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
+ Opcode == Instruction::AShr) {
+ return NumVectors;
+ }
+
+ // These FP operations are supported with a single vector instruction for
+ // double (base implementation assumes float generally costs 2). For
+ // FP128, the scalar cost is 1, and there is no overhead since the values
+ // are already in scalar registers.
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv) {
+ switch (ScalarBits) {
+ case 32: {
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values.
+ unsigned ScalarCost = getArithmeticInstrCost(Opcode, Ty->getScalarType());
+ unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for these FP operations are currently just as
+ // expensive as for VF 4.
+ if (VF == 2)
+ Cost *= 2;
+ return Cost;
+ }
+ case 64:
+ case 128:
+ return NumVectors;
+ default:
+ break;
+ }
+ }
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem) {
+ unsigned Cost = (VF * LIBCALL_COST) + getScalarizationOverhead(Ty, Args);
+ // FIXME: VF 2 for float is currently just as expensive as for VF 4.
+ if (VF == 2 && ScalarBits == 32)
+ Cost *= 2;
+ return Cost;
+ }
+ }
+ else { // Scalar:
+ // These FP operations are supported with a dedicated instruction for
+ // float, double and fp128 (base implementation assumes float generally
+ // costs 2).
+ if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FMul || Opcode == Instruction::FDiv)
+ return 1;
+
+ // There is no native support for FRem.
+ if (Opcode == Instruction::FRem)
+ return LIBCALL_COST;
+
+ if (Opcode == Instruction::LShr || Opcode == Instruction::AShr)
+ return (ScalarBits >= 32 ? 1 : 2 /*ext*/);
+
+ // Or requires one instruction, although it has custom handling for i64.
+ if (Opcode == Instruction::Or)
+ return 1;
+
+ if (Opcode == Instruction::Xor && ScalarBits == 1)
+ // 2 * ipm sequences ; xor ; shift ; compare
+ return 7;
+
+ // An extra extension for narrow types is needed.
+ if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
+ // sext of op(s) for narrow types
+ return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
+
+ if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
+ // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
+ return (ScalarBits < 32 ? 4 : 2);
+ }
+
+ // Fallback to the default implementation.
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+ Opd1PropInfo, Opd2PropInfo, Args);
+}
+
+
+int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) {
+ assert (Tp->isVectorTy());
+ assert (ST->hasVector() && "getShuffleCost() called.");
+ unsigned NumVectors = getNumberOfParts(Tp);
+
+ // TODO: Since fp32 is expanded, the shuffle cost should always be 0.
+
+ // FP128 values are always in scalar registers, so there is no work
+ // involved with a shuffle, except for broadcast. In that case register
+ // moves are done with a single instruction per element.
+ if (Tp->getScalarType()->isFP128Ty())
+ return (Kind == TargetTransformInfo::SK_Broadcast ? NumVectors - 1 : 0);
+
+ switch (Kind) {
+ case TargetTransformInfo::SK_ExtractSubvector:
+ // ExtractSubvector Index indicates start offset.
+
+ // Extracting a subvector from first index is a noop.
+ return (Index == 0 ? 0 : NumVectors);
+
+ case TargetTransformInfo::SK_Broadcast:
+ // Loop vectorizer calls here to figure out the extra cost of
+ // broadcasting a loaded value to all elements of a vector. Since vlrep
+ // loads and replicates with a single instruction, adjust the returned
+ // value.
+ return NumVectors - 1;
+
+ default:
+
+ // SystemZ supports single instruction permutation / replication.
+ return NumVectors;
+ }
+
+ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+// Return the log2 difference of the element sizes of the two vector types.
+static unsigned getElSizeLog2Diff(Type *Ty0, Type *Ty1) {
+ unsigned Bits0 = Ty0->getScalarSizeInBits();
+ unsigned Bits1 = Ty1->getScalarSizeInBits();
+
+ if (Bits1 > Bits0)
+ return (Log2_32(Bits1) - Log2_32(Bits0));
+
+ return (Log2_32(Bits0) - Log2_32(Bits1));
+}
+
+// Return the number of instructions needed to truncate SrcTy to DstTy.
+unsigned SystemZTTIImpl::
+getVectorTruncCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy());
+ assert (SrcTy->getPrimitiveSizeInBits() > DstTy->getPrimitiveSizeInBits() &&
+ "Packing must reduce size of vector type.");
+ assert (SrcTy->getVectorNumElements() == DstTy->getVectorNumElements() &&
+ "Packing should not change number of elements.");
+
+ // TODO: Since fp32 is expanded, the extract cost should always be 0.
+
+ unsigned NumParts = getNumberOfParts(SrcTy);
+ if (NumParts <= 2)
+ // Up to 2 vector registers can be truncated efficiently with pack or
+ // permute. The latter requires an immediate mask to be loaded, which
+ // typically gets hoisted out of a loop. TODO: return a good value for
+ // BB-VECTORIZER that includes the immediate loads, which we do not want
+ // to count for the loop vectorizer.
+ return 1;
+
+ unsigned Cost = 0;
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ unsigned VF = SrcTy->getVectorNumElements();
+ for (unsigned P = 0; P < Log2Diff; ++P) {
+ if (NumParts > 1)
+ NumParts /= 2;
+ Cost += NumParts;
+ }
+
+ // Currently, a general mix of permutes and pack instructions is output by
+ // isel, which follow the cost computation above except for this case which
+ // is one instruction less:
+ if (VF == 8 && SrcTy->getScalarSizeInBits() == 64 &&
+ DstTy->getScalarSizeInBits() == 8)
+ Cost--;
+
+ return Cost;
+}
+
+// Return the cost of converting a vector bitmask produced by a compare
+// (SrcTy), to the type of the select or extend instruction (DstTy).
+unsigned SystemZTTIImpl::
+getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy) {
+ assert (SrcTy->isVectorTy() && DstTy->isVectorTy() &&
+ "Should only be called with vector types.");
+
+ unsigned PackCost = 0;
+ unsigned SrcScalarBits = SrcTy->getScalarSizeInBits();
+ unsigned DstScalarBits = DstTy->getScalarSizeInBits();
+ unsigned Log2Diff = getElSizeLog2Diff(SrcTy, DstTy);
+ if (SrcScalarBits > DstScalarBits)
+ // The bitmask will be truncated.
+ PackCost = getVectorTruncCost(SrcTy, DstTy);
+ else if (SrcScalarBits < DstScalarBits) {
+ unsigned DstNumParts = getNumberOfParts(DstTy);
+ // Each vector select needs its part of the bitmask unpacked.
+ PackCost = Log2Diff * DstNumParts;
+ // Extra cost for moving part of mask before unpacking.
+ PackCost += DstNumParts - 1;
+ }
+
+ return PackCost;
+}
+
+// Return the type of the compared operands. This is needed to compute the
+// cost for a Select / ZExt or SExt instruction.
+static Type *getCmpOpsType(const Instruction *I, unsigned VF = 1) {
+ Type *OpTy = nullptr;
+ if (CmpInst *CI = dyn_cast<CmpInst>(I->getOperand(0)))
+ OpTy = CI->getOperand(0)->getType();
+ else if (Instruction *LogicI = dyn_cast<Instruction>(I->getOperand(0)))
+ if (CmpInst *CI0 = dyn_cast<CmpInst>(LogicI->getOperand(0)))
+ if (isa<CmpInst>(LogicI->getOperand(1)))
+ OpTy = CI0->getOperand(0)->getType();
+
+ if (OpTy != nullptr) {
+ if (VF == 1) {
+ assert (!OpTy->isVectorTy() && "Expected scalar type");
+ return OpTy;
+ }
+ // Return the potentially vectorized type based on 'I' and 'VF'. 'I' may
+ // be either scalar or already vectorized with a same or lesser VF.
+ Type *ElTy = OpTy->getScalarType();
+ return VectorType::get(ElTy, VF);
+ }
+
+ return nullptr;
+}
+
+int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
+ unsigned DstScalarBits = Dst->getScalarSizeInBits();
+ unsigned SrcScalarBits = Src->getScalarSizeInBits();
+
+ if (Src->isVectorTy()) {
+ assert (ST->hasVector() && "getCastInstrCost() called with vector type.");
+ assert (Dst->isVectorTy());
+ unsigned VF = Src->getVectorNumElements();
+ unsigned NumDstVectors = getNumberOfParts(Dst);
+ unsigned NumSrcVectors = getNumberOfParts(Src);
+
+ if (Opcode == Instruction::Trunc) {
+ if (Src->getScalarSizeInBits() == Dst->getScalarSizeInBits())
+ return 0; // Check for NOOP conversions.
+ return getVectorTruncCost(Src, Dst);
+ }
+
+ if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
+ if (SrcScalarBits >= 8) {
+ // ZExt/SExt will be handled with one unpack per doubling of width.
+ unsigned NumUnpacks = getElSizeLog2Diff(Src, Dst);
+
+ // For types that spans multiple vector registers, some additional
+ // instructions are used to setup the unpacking.
+ unsigned NumSrcVectorOps =
+ (NumUnpacks > 1 ? (NumDstVectors - NumSrcVectors)
+ : (NumDstVectors / 2));
+
+ return (NumUnpacks * NumDstVectors) + NumSrcVectorOps;
+ }
+ else if (SrcScalarBits == 1) {
+ // This should be extension of a compare i1 result.
+ // If we know what the widths of the compared operands, get the
+ // cost of converting it to Dst. Otherwise assume same widths.
+ unsigned Cost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ Cost = getVectorBitmaskConversionCost(CmpOpTy, Dst);
+ if (Opcode == Instruction::ZExt)
+ // One 'vn' per dst vector with an immediate mask.
+ Cost += NumDstVectors;
+ return Cost;
+ }
+ }
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP ||
+ Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI) {
+ // TODO: Fix base implementation which could simplify things a bit here
+ // (seems to miss on differentiating on scalar/vector types).
+
+ // Only 64 bit vector conversions are natively supported.
+ if (SrcScalarBits == 64 && DstScalarBits == 64)
+ return NumDstVectors;
+
+ // Return the cost of multiple scalar invocation plus the cost of
+ // inserting and extracting the values. Base implementation does not
+ // realize float->int gets scalarized.
+ unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(),
+ Src->getScalarType());
+ unsigned TotCost = VF * ScalarCost;
+ bool NeedsInserts = true, NeedsExtracts = true;
+ // FP128 registers do not get inserted or extracted.
+ if (DstScalarBits == 128 &&
+ (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP))
+ NeedsInserts = false;
+ if (SrcScalarBits == 128 &&
+ (Opcode == Instruction::FPToSI || Opcode == Instruction::FPToUI))
+ NeedsExtracts = false;
+
+ TotCost += getScalarizationOverhead(Dst, NeedsInserts, NeedsExtracts);
+
+ // FIXME: VF 2 for float<->i32 is currently just as expensive as for VF 4.
+ if (VF == 2 && SrcScalarBits == 32 && DstScalarBits == 32)
+ TotCost *= 2;
+
+ return TotCost;
+ }
+
+ if (Opcode == Instruction::FPTrunc) {
+ if (SrcScalarBits == 128) // fp128 -> double/float + inserts of elements.
+ return VF /*ldxbr/lexbr*/ + getScalarizationOverhead(Dst, true, false);
+ else // double -> float
+ return VF / 2 /*vledb*/ + std::max(1U, VF / 4 /*vperm*/);
+ }
+
+ if (Opcode == Instruction::FPExt) {
+ if (SrcScalarBits == 32 && DstScalarBits == 64) {
+ // float -> double is very rare and currently unoptimized. Instead of
+ // using vldeb, which can do two at a time, all conversions are
+ // scalarized.
+ return VF * 2;
+ }
+ // -> fp128. VF * lxdb/lxeb + extraction of elements.
+ return VF + getScalarizationOverhead(Src, false, true);
+ }
+ }
+ else { // Scalar
+ assert (!Dst->isVectorTy());
+
+ if (Opcode == Instruction::SIToFP || Opcode == Instruction::UIToFP)
+ return (SrcScalarBits >= 32 ? 1 : 2 /*i8/i16 extend*/);
+
+ if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+ Src->isIntegerTy(1)) {
+ // This should be extension of a compare i1 result, which is done with
+ // ipm and a varying sequence of instructions.
+ unsigned Cost = 0;
+ if (Opcode == Instruction::SExt)
+ Cost = (DstScalarBits < 64 ? 3 : 4);
+ if (Opcode == Instruction::ZExt)
+ Cost = 3;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I) : nullptr);
+ if (CmpOpTy != nullptr && CmpOpTy->isFloatingPointTy())
+ // If operands of an fp-type was compared, this costs +1.
+ Cost++;
+
+ return Cost;
+ }
+ }
+
+ return BaseT::getCastInstrCost(Opcode, Dst, Src, I);
+}
+
+int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
+ if (ValTy->isVectorTy()) {
+ assert (ST->hasVector() && "getCmpSelInstrCost() called with vector type.");
+ assert (CondTy == nullptr || CondTy->isVectorTy());
+ unsigned VF = ValTy->getVectorNumElements();
+
+ // Called with a compare instruction.
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
+ unsigned PredicateExtraCost = 0;
+ if (I != nullptr) {
+ // Some predicates cost one or two extra instructions.
+ switch (dyn_cast<CmpInst>(I)->getPredicate()) {
+ case CmpInst::Predicate::ICMP_NE:
+ case CmpInst::Predicate::ICMP_UGE:
+ case CmpInst::Predicate::ICMP_ULE:
+ case CmpInst::Predicate::ICMP_SGE:
+ case CmpInst::Predicate::ICMP_SLE:
+ PredicateExtraCost = 1;
+ break;
+ case CmpInst::Predicate::FCMP_ONE:
+ case CmpInst::Predicate::FCMP_ORD:
+ case CmpInst::Predicate::FCMP_UEQ:
+ case CmpInst::Predicate::FCMP_UNO:
+ PredicateExtraCost = 2;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Float is handled with 2*vmr[lh]f + 2*vldeb + vfchdb for each pair of
+ // floats. FIXME: <2 x float> generates same code as <4 x float>.
+ unsigned CmpCostPerVector = (ValTy->getScalarType()->isFloatTy() ? 10 : 1);
+ unsigned NumVecs_cmp = getNumberOfParts(ValTy);
+
+ unsigned Cost = (NumVecs_cmp * (CmpCostPerVector + PredicateExtraCost));
+ return Cost;
+ }
+ else { // Called with a select instruction.
+ assert (Opcode == Instruction::Select);
+
+ // We can figure out the extra cost of packing / unpacking if the
+ // instruction was passed and the compare instruction is found.
+ unsigned PackCost = 0;
+ Type *CmpOpTy = ((I != nullptr) ? getCmpOpsType(I, VF) : nullptr);
+ if (CmpOpTy != nullptr)
+ PackCost =
+ getVectorBitmaskConversionCost(CmpOpTy, ValTy);
+
+ return getNumberOfParts(ValTy) /*vsel*/ + PackCost;
+ }
+ }
+ else { // Scalar
+ switch (Opcode) {
+ case Instruction::ICmp: {
+ unsigned Cost = 1;
+ if (ValTy->isIntegerTy() && ValTy->getScalarSizeInBits() <= 16)
+ Cost += 2; // extend both operands
+ return Cost;
+ }
+ case Instruction::Select:
+ if (ValTy->isFloatingPointTy())
+ return 4; // No load on condition for FP, so this costs a conditional jump.
+ return 1; // Load On Condition.
+ }
+ }
+
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr);
+}
+
+int SystemZTTIImpl::
+getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
+ // vlvgp will insert two grs into a vector register, so only count half the
+ // number of instructions.
+ if (Opcode == Instruction::InsertElement &&
+ Val->getScalarType()->isIntegerTy(64))
+ return ((Index % 2 == 0) ? 1 : 0);
+
+ if (Opcode == Instruction::ExtractElement) {
+ int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1);
+
+ // Give a slight penalty for moving out of vector pipeline to FXU unit.
+ if (Index == 0 && Val->getScalarType()->isIntegerTy())
+ Cost += 1;
+
+ return Cost;
+ }
+
+ return BaseT::getVectorInstrCost(Opcode, Val, Index);
+}
+
+int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace,
+ const Instruction *I) {
+ assert(!Src->isVoidTy() && "Invalid type");
+
+ if (!Src->isVectorTy() && Opcode == Instruction::Load &&
+ I != nullptr && I->hasOneUse()) {
+ const Instruction *UserI = cast<Instruction>(*I->user_begin());
+ unsigned Bits = Src->getScalarSizeInBits();
+ bool FoldsLoad = false;
+ switch (UserI->getOpcode()) {
+ case Instruction::ICmp:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // This also makes sense for float operations, but disabled for now due
+ // to regressions.
+ // case Instruction::FCmp:
+ // case Instruction::FAdd:
+ // case Instruction::FSub:
+ // case Instruction::FMul:
+ // case Instruction::FDiv:
+ FoldsLoad = (Bits == 32 || Bits == 64);
+ break;
+ }
+
+ if (FoldsLoad) {
+ assert (UserI->getNumOperands() == 2 &&
+ "Expected to only handle binops.");
+
+ // UserI can't fold two loads, so in that case return 0 cost only
+ // half of the time.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (UserI->getOperand(i) == I)
+ continue;
+ if (LoadInst *LI = dyn_cast<LoadInst>(UserI->getOperand(i))) {
+ if (LI->hasOneUse())
+ return i == 0;
+ }
+ }
+
+ return 0;
+ }
+ }
+
+ unsigned NumOps = getNumberOfParts(Src);
+
+ if (Src->getScalarSizeInBits() == 128)
+ // 128 bit scalars are held in a pair of two 64 bit registers.
+ NumOps *= 2;
+
+ return NumOps;
+}
+
+int SystemZTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace) {
+ assert(isa<VectorType>(VecTy) &&
+ "Expect a vector type for interleaved memory op");
+
+ unsigned WideBits = (VecTy->isPtrOrPtrVectorTy() ?
+ (64U * VecTy->getVectorNumElements()) : VecTy->getPrimitiveSizeInBits());
+ assert (WideBits > 0 && "Could not compute size of vector");
+ int NumWideParts =
+ ((WideBits % 128U) ? ((WideBits / 128U) + 1) : (WideBits / 128U));
+
+ // How many source vectors are handled to produce a vectorized operand?
+ int NumElsPerVector = (VecTy->getVectorNumElements() / NumWideParts);
+ int NumSrcParts =
+ ((NumWideParts > NumElsPerVector) ? NumElsPerVector : NumWideParts);
+
+ // A Load group may have gaps.
+ unsigned NumOperands =
+ ((Opcode == Instruction::Load) ? Indices.size() : Factor);
+
+ // Each needed permute takes two vectors as input.
+ if (NumSrcParts > 1)
+ NumSrcParts--;
+ int NumPermutes = NumSrcParts * NumOperands;
+
+ // Cost of load/store operations and the permutations needed.
+ return NumWideParts + NumPermutes;
+}
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index f7d2d827f11b..3766ed45b8c4 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -27,6 +27,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
const SystemZSubtarget *getST() const { return ST; }
const SystemZTargetLowering *getTLI() const { return TLI; }
+ unsigned const LIBCALL_COST = 30;
+
public:
explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -53,6 +55,32 @@ public:
unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector);
+ bool supportsEfficientVectorElementLoadStore() { return true; }
+ bool enableInterleavedAccessVectorization() { return true; }
+
+ int getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+ unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
+ unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
+ int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace, const Instruction *I = nullptr);
+
+ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor,
+ ArrayRef<unsigned> Indices,
+ unsigned Alignment,
+ unsigned AddressSpace);
/// @}
};
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 375f8511f7ad..50272fda56de 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -264,10 +264,7 @@ bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
// in discardable section
// FIXME: this isn't the right predicate, should be based on the MCSection
// for the function.
- if (F.isWeakForLinker())
- return true;
-
- return false;
+ return F.isWeakForLinker();
}
/// Given a mergable constant with the specified size and relocation
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 8a6d28490e8c..e8fe0a2b218e 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -74,10 +74,10 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
Options.X = DefaultOptions.X; \
} while (0)
- RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad");
RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+ RESET_OPTION(NoSignedZerosFPMath, "no-signed-zeros-fp-math");
RESET_OPTION(NoTrappingFPMath, "no-trapping-math");
StringRef Denormal =
@@ -156,8 +156,11 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
bool IsTLS = GV && GV->isThreadLocal();
bool IsAccessViaCopyRelocs =
Options.MCOptions.MCPIECopyRelocations && GV && isa<GlobalVariable>(GV);
- // Check if we can use copy relocations.
- if (!IsTLS && (RM == Reloc::Static || IsAccessViaCopyRelocs))
+ Triple::ArchType Arch = TT.getArch();
+ bool IsPPC =
+ Arch == Triple::ppc || Arch == Triple::ppc64 || Arch == Triple::ppc64le;
+ // Check if we can use copy relocations. PowerPC has no copy relocations.
+ if (!IsTLS && !IsPPC && (RM == Reloc::Static || IsAccessViaCopyRelocs))
return true;
}
@@ -198,7 +201,7 @@ CodeGenOpt::Level TargetMachine::getOptLevel() const { return OptLevel; }
void TargetMachine::setOptLevel(CodeGenOpt::Level Level) { OptLevel = Level; }
TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
- return TargetIRAnalysis([this](const Function &F) {
+ return TargetIRAnalysis([](const Function &F) {
return TargetTransformInfo(F.getParent()->getDataLayout());
});
}
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
index d9c53ecc8d08..78b2cdb61b76 100644
--- a/lib/Target/WebAssembly/CMakeLists.txt
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_target(WebAssemblyCodeGen
WebAssemblyAsmPrinter.cpp
WebAssemblyCallIndirectFixup.cpp
WebAssemblyCFGStackify.cpp
+ WebAssemblyCFGSort.cpp
WebAssemblyExplicitLocals.cpp
WebAssemblyFastISel.cpp
WebAssemblyFixIrreducibleControlFlow.cpp
@@ -35,6 +36,7 @@ add_llvm_target(WebAssemblyCodeGen
WebAssemblyRegNumbering.cpp
WebAssemblyRegStackify.cpp
WebAssemblyReplacePhysRegs.cpp
+ WebAssemblyRuntimeLibcallSignatures.cpp
WebAssemblySelectionDAGInfo.cpp
WebAssemblySetP2AlignOperands.cpp
WebAssemblyStoreResults.cpp
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index b4763ca60ab6..b5f53114d3e1 100644
--- a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -63,89 +63,8 @@ extern "C" void LLVMInitializeWebAssemblyDisassembler() {
MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
raw_ostream &OS, raw_ostream &CS) const {
- Size = 0;
- uint64_t Pos = 0;
- // Read the opcode.
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Opcode = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
+ // TODO: Implement disassembly.
- if (Opcode >= WebAssembly::INSTRUCTION_LIST_END)
- return MCDisassembler::Fail;
-
- MI.setOpcode(Opcode);
- const MCInstrDesc &Desc = MCII->get(Opcode);
- unsigned NumFixedOperands = Desc.NumOperands;
-
- // If it's variadic, read the number of extra operands.
- unsigned NumExtraOperands = 0;
- if (Desc.isVariadic()) {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- NumExtraOperands = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- }
-
- // Read the fixed operands. These are described by the MCInstrDesc.
- for (unsigned i = 0; i < NumFixedOperands; ++i) {
- const MCOperandInfo &Info = Desc.OpInfo[i];
- switch (Info.OperandType) {
- case MCOI::OPERAND_IMMEDIATE:
- case WebAssembly::OPERAND_LOCAL:
- case WebAssembly::OPERAND_P2ALIGN:
- case WebAssembly::OPERAND_BASIC_BLOCK: {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- MI.addOperand(MCOperand::createImm(Imm));
- break;
- }
- case MCOI::OPERAND_REGISTER: {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- MI.addOperand(MCOperand::createReg(Reg));
- break;
- }
- case WebAssembly::OPERAND_F32IMM:
- case WebAssembly::OPERAND_F64IMM: {
- // TODO: MC converts all floating point immediate operands to double.
- // This is fine for numeric values, but may cause NaNs to change bits.
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- uint64_t Bits = support::endian::read64le(Bytes.data() + Pos);
- Pos += sizeof(uint64_t);
- double Imm;
- memcpy(&Imm, &Bits, sizeof(Imm));
- MI.addOperand(MCOperand::createFPImm(Imm));
- break;
- }
- default:
- llvm_unreachable("unimplemented operand kind");
- }
- }
-
- // Read the extra operands.
- assert(NumExtraOperands == 0 || Desc.isVariadic());
- for (unsigned i = 0; i < NumExtraOperands; ++i) {
- if (Pos + sizeof(uint64_t) > Bytes.size())
- return MCDisassembler::Fail;
- if (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate) {
- // Decode extra immediate operands.
- uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
- MI.addOperand(MCOperand::createImm(Imm));
- } else {
- // Decode extra register operands.
- uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
- MI.addOperand(MCOperand::createReg(Reg));
- }
- Pos += sizeof(uint64_t);
- }
-
- Size = Pos;
- return MCDisassembler::Success;
+ return MCDisassembler::Fail;
}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 0af13cffdb04..f31dde0ce48f 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -242,3 +242,17 @@ const char *llvm::WebAssembly::TypeToString(MVT Ty) {
llvm_unreachable("unsupported type");
}
}
+
+const char *llvm::WebAssembly::TypeToString(wasm::ValType Type) {
+ switch (Type) {
+ case wasm::ValType::I32:
+ return "i32";
+ case wasm::ValType::I64:
+ return "i64";
+ case wasm::ValType::F32:
+ return "f32";
+ case wasm::ValType::F64:
+ return "f64";
+ }
+ llvm_unreachable("unsupported type");
+}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index d11f99c1ff39..c6158720d62f 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Wasm.h"
namespace llvm {
@@ -50,6 +51,7 @@ public:
namespace WebAssembly {
const char *TypeToString(MVT Ty);
+const char *TypeToString(wasm::ValType Type);
} // end namespace WebAssembly
diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
index fd41df7b9635..13c0fe915908 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
@@ -5,4 +5,5 @@ add_llvm_library(LLVMWebAssemblyDesc
WebAssemblyMCCodeEmitter.cpp
WebAssemblyMCTargetDesc.cpp
WebAssemblyTargetStreamer.cpp
+ WebAssemblyWasmObjectWriter.cpp
)
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index 97454a824a34..7c78285fbda4 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyFixupKinds.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCDirectives.h"
@@ -22,21 +23,22 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCWasmObjectWriter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
-class WebAssemblyAsmBackend final : public MCAsmBackend {
+class WebAssemblyAsmBackendELF final : public MCAsmBackend {
bool Is64Bit;
public:
- explicit WebAssemblyAsmBackend(bool Is64Bit)
+ explicit WebAssemblyAsmBackendELF(bool Is64Bit)
: MCAsmBackend(), Is64Bit(Is64Bit) {}
- ~WebAssemblyAsmBackend() override {}
+ ~WebAssemblyAsmBackendELF() override {}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override;
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
@@ -61,6 +63,95 @@ public:
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
};
+class WebAssemblyAsmBackend final : public MCAsmBackend {
+ bool Is64Bit;
+
+public:
+ explicit WebAssemblyAsmBackend(bool Is64Bit)
+ : MCAsmBackend(), Is64Bit(Is64Bit) {}
+ ~WebAssemblyAsmBackend() override {}
+
+ unsigned getNumFixupKinds() const override {
+ return WebAssembly::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override;
+
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+ // No instruction requires relaxation
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ return false;
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+ void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ MCInst &Res) const override {}
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+};
+
+bool WebAssemblyAsmBackendELF::writeNopData(uint64_t Count,
+ MCObjectWriter *OW) const {
+ for (uint64_t i = 0; i < Count; ++i)
+ OW->write8(WebAssembly::Nop);
+
+ return true;
+}
+
+void WebAssemblyAsmBackendELF::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value,
+ bool IsPCRel, MCContext &Ctx) const {
+ const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
+ assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags");
+
+ unsigned NumBytes = alignTo(Info.TargetSize, 8) / 8;
+ if (Value == 0)
+ return; // Doesn't change encoding.
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+MCObjectWriter *
+WebAssemblyAsmBackendELF::createObjectWriter(raw_pwrite_stream &OS) const {
+ return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
+}
+
+const MCFixupKindInfo &
+WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+ const static MCFixupKindInfo Infos[WebAssembly::NumTargetFixupKinds] = {
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // WebAssemblyFixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ { "fixup_code_sleb128_i32", 0, 5*8, 0 },
+ { "fixup_code_sleb128_i64", 0, 10*8, 0 },
+ { "fixup_code_uleb128_i32", 0, 5*8, 0 },
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return Infos[Kind - FirstTargetFixupKind];
+}
+
bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
MCObjectWriter *OW) const {
if (Count == 0)
@@ -74,11 +165,11 @@ bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
+ bool IsPCRel, MCContext &Ctx) const {
const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags");
- unsigned NumBytes = (Info.TargetSize + 7) / 8;
+ unsigned NumBytes = alignTo(Info.TargetSize, 8) / 8;
if (Value == 0)
return; // Doesn't change encoding.
@@ -96,10 +187,12 @@ void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
MCObjectWriter *
WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
- return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
+ return createWebAssemblyWasmObjectWriter(OS, Is64Bit);
}
} // end anonymous namespace
MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) {
+ if (TT.isOSBinFormatELF())
+ return new WebAssemblyAsmBackendELF(TT.isArch64Bit());
return new WebAssemblyAsmBackend(TT.isArch64Bit());
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
new file mode 100644
index 000000000000..b0af63c924bd
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h
@@ -0,0 +1,31 @@
+//=- WebAssemblyFixupKinds.h - WebAssembly Specific Fixup Entries -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYFIXUPKINDS_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace WebAssembly {
+enum Fixups {
+ fixup_code_sleb128_i32 = FirstTargetFixupKind, // 32-bit signed
+ fixup_code_sleb128_i64, // 64-bit signed
+ fixup_code_uleb128_i32, // 32-bit unsigned
+
+ fixup_code_global_index, // 32-bit unsigned
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+} // end namespace WebAssembly
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index d8c39216c53b..2dcec5263fa1 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -19,9 +19,9 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mc-asm-info"
-WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+WebAssemblyMCAsmInfoELF::~WebAssemblyMCAsmInfoELF() {}
-WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
+WebAssemblyMCAsmInfoELF::WebAssemblyMCAsmInfoELF(const Triple &T) {
PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
// TODO: What should MaxInstLength be?
@@ -51,3 +51,33 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
// WebAssembly's stack is never executable.
UsesNonexecutableStackSection = false;
}
+
+WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
+
+WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
+ PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
+
+ // TODO: What should MaxInstLength be?
+
+ UseDataRegionDirectives = true;
+
+ // Use .skip instead of .zero because .zero is confusing when used with two
+ // arguments (it doesn't actually zero things out).
+ ZeroDirective = "\t.skip\t";
+
+ Data8bitsDirective = "\t.int8\t";
+ Data16bitsDirective = "\t.int16\t";
+ Data32bitsDirective = "\t.int32\t";
+ Data64bitsDirective = "\t.int64\t";
+
+ AlignmentIsInBytes = false;
+ COMMDirectiveAlignmentIsInBytes = false;
+ LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
+
+ SupportsDebugInformation = true;
+
+ // For now, WebAssembly does not support exceptions.
+ ExceptionsType = ExceptionHandling::None;
+
+ // TODO: UseIntegratedAssembler?
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index 2dcf2cd3c892..d9547096190e 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -16,12 +16,19 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoWasm.h"
namespace llvm {
class Triple;
-class WebAssemblyMCAsmInfo final : public MCAsmInfoELF {
+class WebAssemblyMCAsmInfoELF final : public MCAsmInfoELF {
+public:
+ explicit WebAssemblyMCAsmInfoELF(const Triple &T);
+ ~WebAssemblyMCAsmInfoELF() override;
+};
+
+class WebAssemblyMCAsmInfo final : public MCAsmInfoWasm {
public:
explicit WebAssemblyMCAsmInfo(const Triple &T);
~WebAssemblyMCAsmInfo() override;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index d0e0eecd3002..a0b008947491 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyFixupKinds.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -35,6 +36,7 @@ STATISTIC(MCNumFixups, "Number of MC fixups created.");
namespace {
class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCInstrInfo &MCII;
+ MCContext &Ctx;
// Implementation generated by tablegen.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -46,12 +48,14 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCSubtargetInfo &STI) const override;
public:
- explicit WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {}
};
} // end anonymous namespace
-MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) {
- return new WebAssemblyMCCodeEmitter(MCII);
+MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx) {
+ return new WebAssemblyMCCodeEmitter(MCII, Ctx);
}
void WebAssemblyMCCodeEmitter::encodeInstruction(
@@ -63,6 +67,13 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
assert(Binary < UINT8_MAX && "Multi-byte opcodes not supported yet");
OS << uint8_t(Binary);
+ // For br_table instructions, encode the size of the table. In the MCInst,
+ // there's an index operand, one operand for each table entry, and the
+ // default operand.
+ if (MI.getOpcode() == WebAssembly::BR_TABLE_I32 ||
+ MI.getOpcode() == WebAssembly::BR_TABLE_I64)
+ encodeULEB128(MI.getNumOperands() - 2, OS);
+
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
const MCOperand &MO = MI.getOperand(i);
@@ -77,6 +88,12 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
encodeSLEB128(int32_t(MO.getImm()), OS);
} else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
encodeSLEB128(int64_t(MO.getImm()), OS);
+ } else if (Info.OperandType == WebAssembly::OPERAND_GLOBAL) {
+ Fixups.push_back(MCFixup::create(
+ OS.tell() - Start, MCConstantExpr::create(MO.getImm(), Ctx),
+ MCFixupKind(WebAssembly::fixup_code_global_index), MI.getLoc()));
+ ++MCNumFixups;
+ encodeULEB128(uint64_t(MO.getImm()), OS);
} else {
encodeULEB128(uint64_t(MO.getImm()), OS);
}
@@ -102,14 +119,28 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
support::endian::Writer<support::little>(OS).write<double>(d);
}
} else if (MO.isExpr()) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ llvm::MCFixupKind FixupKind;
+ size_t PaddedSize;
+ if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+ FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i32);
+ PaddedSize = 5;
+ } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+ FixupKind = MCFixupKind(WebAssembly::fixup_code_sleb128_i64);
+ PaddedSize = 10;
+ } else if (Info.OperandType == WebAssembly::OPERAND_FUNCTION32 ||
+ Info.OperandType == WebAssembly::OPERAND_OFFSET32 ||
+ Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
+ FixupKind = MCFixupKind(WebAssembly::fixup_code_uleb128_i32);
+ PaddedSize = 5;
+ } else {
+ llvm_unreachable("unexpected symbolic operand kind");
+ }
Fixups.push_back(MCFixup::create(
OS.tell() - Start, MO.getExpr(),
- STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
- MI.getLoc()));
+ FixupKind, MI.getLoc()));
++MCNumFixups;
- encodeULEB128(STI.getTargetTriple().isArch64Bit() ? UINT64_MAX
- : uint64_t(UINT32_MAX),
- OS);
+ encodeULEB128(0, OS, PaddedSize - 1);
} else {
llvm_unreachable("unexpected operand kind");
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 3dc1ded17116..9fd3ec81c258 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -36,6 +36,8 @@ using namespace llvm;
static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
const Triple &TT) {
+ if (TT.isOSBinFormatELF())
+ return new WebAssemblyMCAsmInfoELF(TT);
return new WebAssemblyMCAsmInfo(TT);
}
@@ -71,8 +73,8 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo & /*MRI*/,
- MCContext & /*Ctx*/) {
- return createWebAssemblyMCCodeEmitter(MCII);
+ MCContext &Ctx) {
+ return createWebAssemblyMCCodeEmitter(MCII, Ctx);
}
static MCAsmBackend *createAsmBackend(const Target & /*T*/,
@@ -88,8 +90,12 @@ static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
}
static MCTargetStreamer *
-createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo & /*STI*/) {
- return new WebAssemblyTargetELFStreamer(S);
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ const Triple &TT = STI.getTargetTriple();
+ if (TT.isOSBinFormatELF())
+ return new WebAssemblyTargetELFStreamer(S);
+
+ return new WebAssemblyTargetWasmStreamer(S);
}
static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
@@ -135,12 +141,12 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
}
}
-WebAssembly::ValType WebAssembly::toValType(const MVT &Ty) {
+wasm::ValType WebAssembly::toValType(const MVT &Ty) {
switch (Ty.SimpleTy) {
- case MVT::i32: return WebAssembly::ValType::I32;
- case MVT::i64: return WebAssembly::ValType::I64;
- case MVT::f32: return WebAssembly::ValType::F32;
- case MVT::f64: return WebAssembly::ValType::F64;
+ case MVT::i32: return wasm::ValType::I32;
+ case MVT::i64: return wasm::ValType::I64;
+ case MVT::f32: return wasm::ValType::F32;
+ case MVT::f64: return wasm::ValType::F64;
default: llvm_unreachable("unexpected type");
}
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 8583b772deab..795658ca96b4 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -17,6 +17,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Wasm.h"
namespace llvm {
@@ -34,19 +35,25 @@ class raw_pwrite_stream;
Target &getTheWebAssemblyTarget32();
Target &getTheWebAssemblyTarget64();
-MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
+MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx);
MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit, uint8_t OSABI);
+MCObjectWriter *createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit);
+
namespace WebAssembly {
enum OperandType {
/// Basic block label in a branch construct.
OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
/// Local index.
OPERAND_LOCAL,
+ /// Global index.
+ OPERAND_GLOBAL,
/// 32-bit integer immediates.
OPERAND_I32IMM,
/// 64-bit integer immediates.
@@ -62,7 +69,9 @@ enum OperandType {
/// p2align immediate for load and store address alignment.
OPERAND_P2ALIGN,
/// signature immediate for block/loop.
- OPERAND_SIGNATURE
+ OPERAND_SIGNATURE,
+ /// type signature immediate for call_indirect.
+ OPERAND_TYPEINDEX,
};
} // end namespace WebAssembly
@@ -141,40 +150,25 @@ static const unsigned StoreP2AlignOperandNo = 0;
/// This is used to indicate block signatures.
enum class ExprType {
- Void = 0x40,
- I32 = 0x7f,
- I64 = 0x7e,
- F32 = 0x7d,
- F64 = 0x7c,
- I8x16 = 0x7b,
- I16x8 = 0x7a,
- I32x4 = 0x79,
- F32x4 = 0x78,
- B8x16 = 0x77,
- B16x8 = 0x76,
- B32x4 = 0x75
-};
-
-/// This is used to indicate local types.
-enum class ValType {
- I32 = 0x7f,
- I64 = 0x7e,
- F32 = 0x7d,
- F64 = 0x7c,
- I8x16 = 0x7b,
- I16x8 = 0x7a,
- I32x4 = 0x79,
- F32x4 = 0x78,
- B8x16 = 0x77,
- B16x8 = 0x76,
- B32x4 = 0x75
+ Void = -0x40,
+ I32 = -0x01,
+ I64 = -0x02,
+ F32 = -0x03,
+ F64 = -0x04,
+ I8x16 = -0x05,
+ I16x8 = -0x06,
+ I32x4 = -0x07,
+ F32x4 = -0x08,
+ B8x16 = -0x09,
+ B16x8 = -0x0a,
+ B32x4 = -0x0b
};
/// Instruction opcodes emitted via means other than CodeGen.
static const unsigned Nop = 0x01;
static const unsigned End = 0x0b;
-ValType toValType(const MVT &Ty);
+wasm::ValType toValType(const MVT &Ty);
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 3cee8b2a1844..ad59f2f40587 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -18,9 +18,11 @@
#include "WebAssemblyMCTargetDesc.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
@@ -28,6 +30,10 @@ using namespace llvm;
WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S)
: MCTargetStreamer(S) {}
+void WebAssemblyTargetStreamer::emitValueType(wasm::ValType Type) {
+ Streamer.EmitSLEB128IntValue(int32_t(Type));
+}
+
WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
MCStreamer &S, formatted_raw_ostream &OS)
: WebAssemblyTargetStreamer(S), OS(OS) {}
@@ -35,6 +41,9 @@ WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
WebAssemblyTargetELFStreamer::WebAssemblyTargetELFStreamer(MCStreamer &S)
: WebAssemblyTargetStreamer(S) {}
+WebAssemblyTargetWasmStreamer::WebAssemblyTargetWasmStreamer(MCStreamer &S)
+ : WebAssemblyTargetStreamer(S) {}
+
static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
bool First = true;
for (MVT Type : Types) {
@@ -47,14 +56,28 @@ static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
OS << '\n';
}
-void WebAssemblyTargetAsmStreamer::emitParam(ArrayRef<MVT> Types) {
- OS << "\t.param \t";
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitParam(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ if (!Types.empty()) {
+ OS << "\t.param \t";
+
+ // FIXME: Currently this applies to the "current" function; it may
+ // be cleaner to specify an explicit symbol as part of the directive.
+
+ PrintTypes(OS, Types);
+ }
}
-void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) {
- OS << "\t.result \t";
- PrintTypes(OS, Types);
+void WebAssemblyTargetAsmStreamer::emitResult(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ if (!Types.empty()) {
+ OS << "\t.result \t";
+
+ // FIXME: Currently this applies to the "current" function; it may
+ // be cleaner to specify an explicit symbol as part of the directive.
+
+ PrintTypes(OS, Types);
+ }
}
void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
@@ -64,6 +87,31 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
}
}
+void WebAssemblyTargetAsmStreamer::emitGlobal(
+ ArrayRef<wasm::Global> Globals) {
+ if (!Globals.empty()) {
+ OS << "\t.globalvar \t";
+
+ bool First = true;
+ for (const wasm::Global &G : Globals) {
+ if (First)
+ First = false;
+ else
+ OS << ", ";
+ OS << WebAssembly::TypeToString(G.Type);
+ if (!G.InitialModule.empty())
+ OS << '=' << G.InitialModule << ':' << G.InitialName;
+ else
+ OS << '=' << G.InitialValue;
+ }
+ OS << '\n';
+ }
+}
+
+void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) {
+ OS << "\t.stack_pointer\t" << Index << '\n';
+}
+
void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
@@ -88,18 +136,30 @@ void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
OS << "\t.indidx \t" << *Value << '\n';
}
-void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) {
+void WebAssemblyTargetELFStreamer::emitParam(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
// Nothing to emit; params are declared as part of the function signature.
}
-void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) {
+void WebAssemblyTargetELFStreamer::emitResult(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
// Nothing to emit; results are declared as part of the function signature.
}
void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) {
Streamer.EmitULEB128IntValue(Types.size());
for (MVT Type : Types)
- Streamer.EmitIntValue(int64_t(WebAssembly::toValType(Type)), 1);
+ emitValueType(WebAssembly::toValType(Type));
+}
+
+void WebAssemblyTargetELFStreamer::emitGlobal(
+ ArrayRef<wasm::Global> Globals) {
+ llvm_unreachable(".globalvar encoding not yet implemented");
+}
+
+void WebAssemblyTargetELFStreamer::emitStackPointer(
+ uint32_t Index) {
+ llvm_unreachable(".stack_pointer encoding not yet implemented");
}
void WebAssemblyTargetELFStreamer::emitEndFunc() {
@@ -117,4 +177,88 @@ void WebAssemblyTargetELFStreamer::emitIndirectFunctionType(
}
void WebAssemblyTargetELFStreamer::emitGlobalImport(StringRef name) {
-} \ No newline at end of file
+}
+
+void WebAssemblyTargetWasmStreamer::emitParam(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ SmallVector<wasm::ValType, 4> Params;
+ for (MVT Ty : Types)
+ Params.push_back(WebAssembly::toValType(Ty));
+
+ cast<MCSymbolWasm>(Symbol)->setParams(std::move(Params));
+}
+
+void WebAssemblyTargetWasmStreamer::emitResult(MCSymbol *Symbol,
+ ArrayRef<MVT> Types) {
+ SmallVector<wasm::ValType, 4> Returns;
+ for (MVT Ty : Types)
+ Returns.push_back(WebAssembly::toValType(Ty));
+
+ cast<MCSymbolWasm>(Symbol)->setReturns(std::move(Returns));
+}
+
+void WebAssemblyTargetWasmStreamer::emitLocal(ArrayRef<MVT> Types) {
+ SmallVector<std::pair<MVT, uint32_t>, 4> Grouped;
+ for (MVT Type : Types) {
+ if (Grouped.empty() || Grouped.back().first != Type)
+ Grouped.push_back(std::make_pair(Type, 1));
+ else
+ ++Grouped.back().second;
+ }
+
+ Streamer.EmitULEB128IntValue(Grouped.size());
+ for (auto Pair : Grouped) {
+ Streamer.EmitULEB128IntValue(Pair.second);
+ emitValueType(WebAssembly::toValType(Pair.first));
+ }
+}
+
+void WebAssemblyTargetWasmStreamer::emitGlobal(
+ ArrayRef<wasm::Global> Globals) {
+ // Encode the globals use by the funciton into the special .global_variables
+ // section. This will later be decoded and turned into contents for the
+ // Globals Section.
+ Streamer.PushSection();
+ Streamer.SwitchSection(Streamer.getContext()
+ .getWasmSection(".global_variables", 0, 0));
+ for (const wasm::Global &G : Globals) {
+ Streamer.EmitIntValue(int32_t(G.Type), 1);
+ Streamer.EmitIntValue(G.Mutable, 1);
+ if (G.InitialModule.empty()) {
+ Streamer.EmitIntValue(0, 1); // indicate that we have an int value
+ Streamer.EmitSLEB128IntValue(0);
+ } else {
+ Streamer.EmitIntValue(1, 1); // indicate that we have a module import
+ Streamer.EmitBytes(G.InitialModule);
+ Streamer.EmitIntValue(0, 1); // nul-terminate
+ Streamer.EmitBytes(G.InitialName);
+ Streamer.EmitIntValue(0, 1); // nul-terminate
+ }
+ }
+ Streamer.PopSection();
+}
+
+void WebAssemblyTargetWasmStreamer::emitStackPointer(uint32_t Index) {
+ Streamer.PushSection();
+ Streamer.SwitchSection(Streamer.getContext()
+ .getWasmSection(".stack_pointer", 0, 0));
+ Streamer.EmitIntValue(Index, 4);
+ Streamer.PopSection();
+}
+
+void WebAssemblyTargetWasmStreamer::emitEndFunc() {
+ llvm_unreachable(".end_func is not needed for direct wasm output");
+}
+
+void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) {
+ llvm_unreachable(".indidx encoding not yet implemented");
+}
+
+void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType(
+ StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
+ // Nothing to emit here. TODO: Re-design how linking works and re-evaluate
+ // whether it's necessary for .o files to declare indirect function types.
+}
+
+void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) {
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 23ac3190243a..68d6747298df 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -18,10 +18,12 @@
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Wasm.h"
namespace llvm {
class MCELFStreamer;
+class MCWasmStreamer;
/// WebAssembly-specific streamer interface, to implement support
/// WebAssembly-specific assembly directives.
@@ -30,11 +32,15 @@ public:
explicit WebAssemblyTargetStreamer(MCStreamer &S);
/// .param
- virtual void emitParam(ArrayRef<MVT> Types) = 0;
+ virtual void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
/// .result
- virtual void emitResult(ArrayRef<MVT> Types) = 0;
+ virtual void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) = 0;
/// .local
virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+ /// .globalvar
+ virtual void emitGlobal(ArrayRef<wasm::Global> Globals) = 0;
+ /// .stack_pointer
+ virtual void emitStackPointer(uint32_t Index) = 0;
/// .endfunc
virtual void emitEndFunc() = 0;
/// .functype
@@ -47,6 +53,9 @@ public:
virtual void emitIndIdx(const MCExpr *Value) = 0;
/// .import_global
virtual void emitGlobalImport(StringRef name) = 0;
+
+protected:
+ void emitValueType(wasm::ValType Type);
};
/// This part is for ascii assembly output
@@ -56,9 +65,11 @@ class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
public:
WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- void emitParam(ArrayRef<MVT> Types) override;
- void emitResult(ArrayRef<MVT> Types) override;
+ void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
void emitLocal(ArrayRef<MVT> Types) override;
+ void emitGlobal(ArrayRef<wasm::Global> Globals) override;
+ void emitStackPointer(uint32_t Index) override;
void emitEndFunc() override;
void emitIndirectFunctionType(StringRef name,
SmallVectorImpl<MVT> &Params,
@@ -72,9 +83,29 @@ class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer {
public:
explicit WebAssemblyTargetELFStreamer(MCStreamer &S);
- void emitParam(ArrayRef<MVT> Types) override;
- void emitResult(ArrayRef<MVT> Types) override;
+ void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<MVT> Types) override;
+ void emitGlobal(ArrayRef<wasm::Global> Globals) override;
+ void emitStackPointer(uint32_t Index) override;
+ void emitEndFunc() override;
+ void emitIndirectFunctionType(StringRef name,
+ SmallVectorImpl<MVT> &Params,
+ SmallVectorImpl<MVT> &Results) override;
+ void emitIndIdx(const MCExpr *Value) override;
+ void emitGlobalImport(StringRef name) override;
+};
+
+/// This part is for Wasm object output
+class WebAssemblyTargetWasmStreamer final : public WebAssemblyTargetStreamer {
+public:
+ explicit WebAssemblyTargetWasmStreamer(MCStreamer &S);
+
+ void emitParam(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
+ void emitResult(MCSymbol *Symbol, ArrayRef<MVT> Types) override;
void emitLocal(ArrayRef<MVT> Types) override;
+ void emitGlobal(ArrayRef<wasm::Global> Globals) override;
+ void emitStackPointer(uint32_t Index) override;
void emitEndFunc() override;
void emitIndirectFunctionType(StringRef name,
SmallVectorImpl<MVT> &Params,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
new file mode 100644
index 000000000000..2846ec5e9337
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -0,0 +1,92 @@
+//===-- WebAssemblyWasmObjectWriter.cpp - WebAssembly Wasm Writer ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file handles Wasm-specific object emission, converting LLVM's
+/// internal fixups into the appropriate relocations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyFixupKinds.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/MCWasmObjectWriter.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Wasm.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyWasmObjectWriter final : public MCWasmObjectTargetWriter {
+public:
+ explicit WebAssemblyWasmObjectWriter(bool Is64Bit);
+
+private:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+};
+} // end anonymous namespace
+
+WebAssemblyWasmObjectWriter::WebAssemblyWasmObjectWriter(bool Is64Bit)
+ : MCWasmObjectTargetWriter(Is64Bit) {}
+
+// Test whether the given expression computes a function address.
+static bool IsFunctionExpr(const MCExpr *Expr) {
+ if (const MCSymbolRefExpr *SyExp =
+ dyn_cast<MCSymbolRefExpr>(Expr))
+ return cast<MCSymbolWasm>(SyExp->getSymbol()).isFunction();
+
+ if (const MCBinaryExpr *BinOp =
+ dyn_cast<MCBinaryExpr>(Expr))
+ return IsFunctionExpr(BinOp->getLHS()) != IsFunctionExpr(BinOp->getRHS());
+
+ if (const MCUnaryExpr *UnOp =
+ dyn_cast<MCUnaryExpr>(Expr))
+ return IsFunctionExpr(UnOp->getSubExpr());
+
+ return false;
+}
+
+unsigned WebAssemblyWasmObjectWriter::getRelocType(MCContext &Ctx,
+ const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ // WebAssembly functions are not allocated in the data address space. To
+ // resolve a pointer to a function, we must use a special relocation type.
+ bool IsFunction = IsFunctionExpr(Fixup.getValue());
+
+ assert(!IsPCRel);
+ switch (unsigned(Fixup.getKind())) {
+ case WebAssembly::fixup_code_sleb128_i32:
+ if (IsFunction)
+ return wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB;
+ return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB;
+ case WebAssembly::fixup_code_sleb128_i64:
+ llvm_unreachable("fixup_sleb128_i64 not implemented yet");
+ case WebAssembly::fixup_code_uleb128_i32:
+ if (IsFunction)
+ return wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB;
+ return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_LEB;
+ case FK_Data_4:
+ if (IsFunction)
+ return wasm::R_WEBASSEMBLY_TABLE_INDEX_I32;
+ return wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32;
+ case FK_Data_8:
+ llvm_unreachable("FK_Data_8 not implemented yet");
+ default:
+ llvm_unreachable("unimplemented fixup kind");
+ }
+}
+
+MCObjectWriter *llvm::createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit) {
+ MCWasmObjectTargetWriter *MOTW = new WebAssemblyWasmObjectWriter(Is64Bit);
+ return createWasmObjectWriter(MOTW, OS);
+}
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
index 64991ad14071..3433b1553e8c 100644
--- a/lib/Target/WebAssembly/README.txt
+++ b/lib/Target/WebAssembly/README.txt
@@ -145,3 +145,24 @@ WebAssemblyRegStackify could be extended, or possibly rewritten, to take
advantage of the new opportunities.
//===---------------------------------------------------------------------===//
+
+Add support for mergeable sections in the Wasm writer, such as for strings and
+floating-point constants.
+
+//===---------------------------------------------------------------------===//
+
+The function @dynamic_alloca_redzone in test/CodeGen/WebAssembly/userstack.ll
+ends up with a tee_local in its prolog which has an unused result, requiring
+an extra drop:
+
+ get_global $push8=, 0
+ tee_local $push9=, 1, $pop8
+ drop $pop9
+ [...]
+
+The prologue code initially thinks it needs an FP register, but later it
+turns out to be unneeded, so one could either approach this by being more
+clever about not inserting code for an FP in the first place, or optimizing
+away the copy later.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
index 8738263ad847..e04c4db19c8c 100644
--- a/lib/Target/WebAssembly/WebAssembly.h
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -46,6 +46,7 @@ FunctionPass *createWebAssemblyRegStackify();
FunctionPass *createWebAssemblyRegColoring();
FunctionPass *createWebAssemblyExplicitLocals();
FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
+FunctionPass *createWebAssemblyCFGSort();
FunctionPass *createWebAssemblyCFGStackify();
FunctionPass *createWebAssemblyLowerBrUnless();
FunctionPass *createWebAssemblyRegNumbering();
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 5b4b82eb5603..d9c2dba5bace 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -14,6 +14,7 @@
///
//===----------------------------------------------------------------------===//
+#include "WebAssemblyAsmPrinter.h"
#include "InstPrinter/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
@@ -21,13 +22,14 @@
#include "WebAssemblyMCInstLower.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRegisterInfo.h"
-#include "WebAssemblySubtarget.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -38,56 +40,6 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
-namespace {
-
-class WebAssemblyAsmPrinter final : public AsmPrinter {
- const MachineRegisterInfo *MRI;
- WebAssemblyFunctionInfo *MFI;
-
-public:
- WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {}
-
-private:
- StringRef getPassName() const override {
- return "WebAssembly Assembly Printer";
- }
-
- //===------------------------------------------------------------------===//
- // MachineFunctionPass Implementation.
- //===------------------------------------------------------------------===//
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- MRI = &MF.getRegInfo();
- MFI = MF.getInfo<WebAssemblyFunctionInfo>();
- return AsmPrinter::runOnMachineFunction(MF);
- }
-
- //===------------------------------------------------------------------===//
- // AsmPrinter Implementation.
- //===------------------------------------------------------------------===//
-
- void EmitEndOfAsmFile(Module &M) override;
- void EmitJumpTableInfo() override;
- void EmitConstantPool() override;
- void EmitFunctionBodyStart() override;
- void EmitFunctionBodyEnd() override;
- void EmitInstruction(const MachineInstr *MI) override;
- const MCExpr *lowerConstant(const Constant *CV) override;
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS) override;
-
- MVT getRegType(unsigned RegNo) const;
- std::string regToString(const MachineOperand &MO);
- WebAssemblyTargetStreamer *getTargetStreamer();
-};
-
-} // end anonymous namespace
-
//===----------------------------------------------------------------------===//
// Helpers.
//===----------------------------------------------------------------------===//
@@ -135,9 +87,19 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
}
for (const auto &G : M.globals()) {
if (!G.hasInitializer() && G.hasExternalLinkage()) {
+ uint16_t Size = M.getDataLayout().getTypeAllocSize(G.getValueType());
getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier());
+ OutStreamer->emitELFSize(getSymbol(&G),
+ MCConstantExpr::create(Size, OutContext));
}
}
+
+ if (!TM.getTargetTriple().isOSBinFormatELF()) {
+ MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo<MachineModuleInfoWasm>();
+ getTargetStreamer()->emitGlobal(MMIW.getGlobals());
+ if (MMIW.hasStackPointerGlobal())
+ getTargetStreamer()->emitStackPointer(MMIW.getStackPointerGlobal());
+ }
}
void WebAssemblyAsmPrinter::EmitConstantPool() {
@@ -150,8 +112,7 @@ void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
}
void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
- if (!MFI->getParams().empty())
- getTargetStreamer()->emitParam(MFI->getParams());
+ getTargetStreamer()->emitParam(CurrentFnSym, MFI->getParams());
SmallVector<MVT, 4> ResultVTs;
const Function &F(*MF->getFunction());
@@ -169,23 +130,26 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
// If the return type needs to be legalized it will get converted into
// passing a pointer.
if (ResultVTs.size() == 1)
- getTargetStreamer()->emitResult(ResultVTs);
-
- // FIXME: When ExplicitLocals is enabled by default, we won't need
- // to define the locals here (and MFI can go back to being pointer-to-const).
- for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
- unsigned WAReg = MFI->getWAReg(VReg);
- // Don't declare unused registers.
- if (WAReg == WebAssemblyFunctionInfo::UnusedReg)
- continue;
- // Don't redeclare parameters.
- if (WAReg < MFI->getParams().size())
- continue;
- // Don't declare stackified registers.
- if (int(WAReg) < 0)
- continue;
- MFI->addLocal(getRegType(VReg));
+ getTargetStreamer()->emitResult(CurrentFnSym, ResultVTs);
+ else
+ getTargetStreamer()->emitResult(CurrentFnSym, ArrayRef<MVT>());
+
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ assert(MFI->getLocals().empty());
+ for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
+ unsigned WAReg = MFI->getWAReg(VReg);
+ // Don't declare unused registers.
+ if (WAReg == WebAssemblyFunctionInfo::UnusedReg)
+ continue;
+ // Don't redeclare parameters.
+ if (WAReg < MFI->getParams().size())
+ continue;
+ // Don't declare stackified registers.
+ if (int(WAReg) < 0)
+ continue;
+ MFI->addLocal(getRegType(VReg));
+ }
}
getTargetStreamer()->emitLocal(MFI->getLocals());
@@ -194,7 +158,8 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
}
void WebAssemblyAsmPrinter::EmitFunctionBodyEnd() {
- getTargetStreamer()->emitEndFunc();
+ if (TM.getTargetTriple().isOSBinFormatELF())
+ getTargetStreamer()->emitEndFunc();
}
void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
new file mode 100644
index 000000000000..c8917b8d7e48
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -0,0 +1,77 @@
+// WebAssemblyAsmPrinter.h - WebAssembly implementation of AsmPrinter-*- C++ -*-
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H
+
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCSymbol;
+class WebAssemblyFunctionInfo;
+class WebAssemblyTargetStreamer;
+class WebAssemblyMCInstLower;
+
+class LLVM_LIBRARY_VISIBILITY WebAssemblyAsmPrinter final : public AsmPrinter {
+ const WebAssemblySubtarget *Subtarget;
+ const MachineRegisterInfo *MRI;
+ WebAssemblyFunctionInfo *MFI;
+
+public:
+ explicit WebAssemblyAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)),
+ Subtarget(nullptr), MRI(nullptr), MFI(nullptr) {}
+
+ StringRef getPassName() const override {
+ return "WebAssembly Assembly Printer";
+ }
+
+ const WebAssemblySubtarget &getSubtarget() const { return *Subtarget; }
+
+ //===------------------------------------------------------------------===//
+ // MachineFunctionPass Implementation.
+ //===------------------------------------------------------------------===//
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
+ MRI = &MF.getRegInfo();
+ MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+ return AsmPrinter::runOnMachineFunction(MF);
+ }
+
+ //===------------------------------------------------------------------===//
+ // AsmPrinter Implementation.
+ //===------------------------------------------------------------------===//
+
+ void EmitEndOfAsmFile(Module &M) override;
+ void EmitJumpTableInfo() override;
+ void EmitConstantPool() override;
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+ const MCExpr *lowerConstant(const Constant *CV) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &OS) override;
+
+ MVT getRegType(unsigned RegNo) const;
+ std::string regToString(const MachineOperand &MO);
+ WebAssemblyTargetStreamer *getTargetStreamer();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
new file mode 100644
index 000000000000..40e1928197bc
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp
@@ -0,0 +1,277 @@
+//===-- WebAssemblyCFGSort.cpp - CFG Sorting ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a CFG sorting pass.
+///
+/// This pass reorders the blocks in a function to put them into topological
+/// order, ignoring loop backedges, and without any loop being interrupted
+/// by a block not dominated by the loop header, with special care to keep the
+/// order as similar as possible to the original order.
+///
+////===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-cfg-sort"
+
+namespace {
+class WebAssemblyCFGSort final : public MachineFunctionPass {
+ StringRef getPassName() const override { return "WebAssembly CFG Sort"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ WebAssemblyCFGSort() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyCFGSort::ID = 0;
+FunctionPass *llvm::createWebAssemblyCFGSort() {
+ return new WebAssemblyCFGSort();
+}
+
+static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
+#ifndef NDEBUG
+ bool AnyBarrier = false;
+#endif
+ bool AllAnalyzable = true;
+ for (const MachineInstr &Term : MBB->terminators()) {
+#ifndef NDEBUG
+ AnyBarrier |= Term.isBarrier();
+#endif
+ AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch();
+ }
+ assert((AnyBarrier || AllAnalyzable) &&
+ "AnalyzeBranch needs to analyze any block with a fallthrough");
+ if (AllAnalyzable)
+ MBB->updateTerminator();
+}
+
+namespace {
+/// Sort blocks by their number.
+struct CompareBlockNumbers {
+ bool operator()(const MachineBasicBlock *A,
+ const MachineBasicBlock *B) const {
+ return A->getNumber() > B->getNumber();
+ }
+};
+/// Sort blocks by their number in the opposite order..
+struct CompareBlockNumbersBackwards {
+ bool operator()(const MachineBasicBlock *A,
+ const MachineBasicBlock *B) const {
+ return A->getNumber() < B->getNumber();
+ }
+};
+/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated
+/// by the loop header among the loop's blocks.
+struct Entry {
+ const MachineLoop *Loop;
+ unsigned NumBlocksLeft;
+
+ /// List of blocks not dominated by Loop's header that are deferred until
+ /// after all of Loop's blocks have been seen.
+ std::vector<MachineBasicBlock *> Deferred;
+
+ explicit Entry(const MachineLoop *L)
+ : Loop(L), NumBlocksLeft(L->getNumBlocks()) {}
+};
+} // end anonymous namespace
+
+/// Sort the blocks, taking special care to make sure that loops are not
+/// interrupted by blocks not dominated by their header.
+/// TODO: There are many opportunities for improving the heuristics here.
+/// Explore them.
+static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
+ const MachineDominatorTree &MDT) {
+ // Prepare for a topological sort: Record the number of predecessors each
+ // block has, ignoring loop backedges.
+ MF.RenumberBlocks();
+ SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0);
+ for (MachineBasicBlock &MBB : MF) {
+ unsigned N = MBB.pred_size();
+ if (MachineLoop *L = MLI.getLoopFor(&MBB))
+ if (L->getHeader() == &MBB)
+ for (const MachineBasicBlock *Pred : MBB.predecessors())
+ if (L->contains(Pred))
+ --N;
+ NumPredsLeft[MBB.getNumber()] = N;
+ }
+
+ // Topological sort the CFG, with additional constraints:
+ // - Between a loop header and the last block in the loop, there can be
+ // no blocks not dominated by the loop header.
+ // - It's desirable to preserve the original block order when possible.
+ // We use two ready lists; Preferred and Ready. Preferred has recently
+ // processed sucessors, to help preserve block sequences from the original
+ // order. Ready has the remaining ready blocks.
+ PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
+ CompareBlockNumbers>
+ Preferred;
+ PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
+ CompareBlockNumbersBackwards>
+ Ready;
+ SmallVector<Entry, 4> Loops;
+ for (MachineBasicBlock *MBB = &MF.front();;) {
+ const MachineLoop *L = MLI.getLoopFor(MBB);
+ if (L) {
+ // If MBB is a loop header, add it to the active loop list. We can't put
+ // any blocks that it doesn't dominate until we see the end of the loop.
+ if (L->getHeader() == MBB)
+ Loops.push_back(Entry(L));
+ // For each active loop the block is in, decrement the count. If MBB is
+ // the last block in an active loop, take it off the list and pick up any
+ // blocks deferred because the header didn't dominate them.
+ for (Entry &E : Loops)
+ if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0)
+ for (auto DeferredBlock : E.Deferred)
+ Ready.push(DeferredBlock);
+ while (!Loops.empty() && Loops.back().NumBlocksLeft == 0)
+ Loops.pop_back();
+ }
+ // The main topological sort logic.
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ // Ignore backedges.
+ if (MachineLoop *SuccL = MLI.getLoopFor(Succ))
+ if (SuccL->getHeader() == Succ && SuccL->contains(MBB))
+ continue;
+ // Decrement the predecessor count. If it's now zero, it's ready.
+ if (--NumPredsLeft[Succ->getNumber()] == 0)
+ Preferred.push(Succ);
+ }
+ // Determine the block to follow MBB. First try to find a preferred block,
+ // to preserve the original block order when possible.
+ MachineBasicBlock *Next = nullptr;
+ while (!Preferred.empty()) {
+ Next = Preferred.top();
+ Preferred.pop();
+ // If X isn't dominated by the top active loop header, defer it until that
+ // loop is done.
+ if (!Loops.empty() &&
+ !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
+ Loops.back().Deferred.push_back(Next);
+ Next = nullptr;
+ continue;
+ }
+ // If Next was originally ordered before MBB, and it isn't because it was
+ // loop-rotated above the header, it's not preferred.
+ if (Next->getNumber() < MBB->getNumber() &&
+ (!L || !L->contains(Next) ||
+ L->getHeader()->getNumber() < Next->getNumber())) {
+ Ready.push(Next);
+ Next = nullptr;
+ continue;
+ }
+ break;
+ }
+ // If we didn't find a suitable block in the Preferred list, check the
+ // general Ready list.
+ if (!Next) {
+ // If there are no more blocks to process, we're done.
+ if (Ready.empty()) {
+ MaybeUpdateTerminator(MBB);
+ break;
+ }
+ for (;;) {
+ Next = Ready.top();
+ Ready.pop();
+ // If Next isn't dominated by the top active loop header, defer it until
+ // that loop is done.
+ if (!Loops.empty() &&
+ !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
+ Loops.back().Deferred.push_back(Next);
+ continue;
+ }
+ break;
+ }
+ }
+ // Move the next block into place and iterate.
+ Next->moveAfter(MBB);
+ MaybeUpdateTerminator(MBB);
+ MBB = Next;
+ }
+ assert(Loops.empty() && "Active loop list not finished");
+ MF.RenumberBlocks();
+
+#ifndef NDEBUG
+ SmallSetVector<MachineLoop *, 8> OnStack;
+
+ // Insert a sentinel representing the degenerate loop that starts at the
+ // function entry block and includes the entire function as a "loop" that
+ // executes once.
+ OnStack.insert(nullptr);
+
+ for (auto &MBB : MF) {
+ assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+
+ MachineLoop *Loop = MLI.getLoopFor(&MBB);
+ if (Loop && &MBB == Loop->getHeader()) {
+ // Loop header. The loop predecessor should be sorted above, and the other
+ // predecessors should be backedges below.
+ for (auto Pred : MBB.predecessors())
+ assert(
+ (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
+ "Loop header predecessors must be loop predecessors or backedges");
+ assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
+ } else {
+ // Not a loop header. All predecessors should be sorted above.
+ for (auto Pred : MBB.predecessors())
+ assert(Pred->getNumber() < MBB.getNumber() &&
+ "Non-loop-header predecessors should be topologically sorted");
+ assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
+ "Blocks must be nested in their loops");
+ }
+ while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
+ OnStack.pop_back();
+ }
+ assert(OnStack.pop_back_val() == nullptr &&
+ "The function entry block shouldn't actually be a loop header");
+ assert(OnStack.empty() &&
+ "Control flow stack pushes and pops should be balanced.");
+#endif
+}
+
+bool WebAssemblyCFGSort::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** CFG Sorting **********\n"
+ "********** Function: "
+ << MF.getName() << '\n');
+
+ const auto &MLI = getAnalysis<MachineLoopInfo>();
+ auto &MDT = getAnalysis<MachineDominatorTree>();
+ // Liveness is not tracked for VALUE_STACK physreg.
+ MF.getRegInfo().invalidateLiveness();
+
+ // Sort the blocks, with contiguous loops.
+ SortBlocks(MF, MLI, MDT);
+
+ return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 49b9754e6b62..bd11d1b46906 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -10,12 +10,7 @@
/// \file
/// \brief This file implements a CFG stacking pass.
///
-/// This pass reorders the blocks in a function to put them into topological
-/// order, ignoring loop backedges, and without any loop being interrupted
-/// by a block not dominated by the loop header, with special care to keep the
-/// order as similar as possible to the original order.
-///
-/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since
+/// This pass inserts BLOCK and LOOP markers to mark the start of scopes, since
/// scope boundaries serve as the labels for WebAssembly's control transfers.
///
/// This is sufficient to convert arbitrary CFGs into a form that works on
@@ -28,8 +23,6 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyUtilities.h"
-#include "llvm/ADT/PriorityQueue.h"
-#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -68,217 +61,6 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() {
return new WebAssemblyCFGStackify();
}
-/// Return the "bottom" block of a loop. This differs from
-/// MachineLoop::getBottomBlock in that it works even if the loop is
-/// discontiguous.
-static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) {
- MachineBasicBlock *Bottom = Loop->getHeader();
- for (MachineBasicBlock *MBB : Loop->blocks())
- if (MBB->getNumber() > Bottom->getNumber())
- Bottom = MBB;
- return Bottom;
-}
-
-static void MaybeUpdateTerminator(MachineBasicBlock *MBB) {
-#ifndef NDEBUG
- bool AnyBarrier = false;
-#endif
- bool AllAnalyzable = true;
- for (const MachineInstr &Term : MBB->terminators()) {
-#ifndef NDEBUG
- AnyBarrier |= Term.isBarrier();
-#endif
- AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch();
- }
- assert((AnyBarrier || AllAnalyzable) &&
- "AnalyzeBranch needs to analyze any block with a fallthrough");
- if (AllAnalyzable)
- MBB->updateTerminator();
-}
-
-namespace {
-/// Sort blocks by their number.
-struct CompareBlockNumbers {
- bool operator()(const MachineBasicBlock *A,
- const MachineBasicBlock *B) const {
- return A->getNumber() > B->getNumber();
- }
-};
-/// Sort blocks by their number in the opposite order..
-struct CompareBlockNumbersBackwards {
- bool operator()(const MachineBasicBlock *A,
- const MachineBasicBlock *B) const {
- return A->getNumber() < B->getNumber();
- }
-};
-/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated
-/// by the loop header among the loop's blocks.
-struct Entry {
- const MachineLoop *Loop;
- unsigned NumBlocksLeft;
-
- /// List of blocks not dominated by Loop's header that are deferred until
- /// after all of Loop's blocks have been seen.
- std::vector<MachineBasicBlock *> Deferred;
-
- explicit Entry(const MachineLoop *L)
- : Loop(L), NumBlocksLeft(L->getNumBlocks()) {}
-};
-}
-
-/// Sort the blocks, taking special care to make sure that loops are not
-/// interrupted by blocks not dominated by their header.
-/// TODO: There are many opportunities for improving the heuristics here.
-/// Explore them.
-static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI,
- const MachineDominatorTree &MDT) {
- // Prepare for a topological sort: Record the number of predecessors each
- // block has, ignoring loop backedges.
- MF.RenumberBlocks();
- SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0);
- for (MachineBasicBlock &MBB : MF) {
- unsigned N = MBB.pred_size();
- if (MachineLoop *L = MLI.getLoopFor(&MBB))
- if (L->getHeader() == &MBB)
- for (const MachineBasicBlock *Pred : MBB.predecessors())
- if (L->contains(Pred))
- --N;
- NumPredsLeft[MBB.getNumber()] = N;
- }
-
- // Topological sort the CFG, with additional constraints:
- // - Between a loop header and the last block in the loop, there can be
- // no blocks not dominated by the loop header.
- // - It's desirable to preserve the original block order when possible.
- // We use two ready lists; Preferred and Ready. Preferred has recently
- // processed sucessors, to help preserve block sequences from the original
- // order. Ready has the remaining ready blocks.
- PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
- CompareBlockNumbers>
- Preferred;
- PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>,
- CompareBlockNumbersBackwards>
- Ready;
- SmallVector<Entry, 4> Loops;
- for (MachineBasicBlock *MBB = &MF.front();;) {
- const MachineLoop *L = MLI.getLoopFor(MBB);
- if (L) {
- // If MBB is a loop header, add it to the active loop list. We can't put
- // any blocks that it doesn't dominate until we see the end of the loop.
- if (L->getHeader() == MBB)
- Loops.push_back(Entry(L));
- // For each active loop the block is in, decrement the count. If MBB is
- // the last block in an active loop, take it off the list and pick up any
- // blocks deferred because the header didn't dominate them.
- for (Entry &E : Loops)
- if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0)
- for (auto DeferredBlock : E.Deferred)
- Ready.push(DeferredBlock);
- while (!Loops.empty() && Loops.back().NumBlocksLeft == 0)
- Loops.pop_back();
- }
- // The main topological sort logic.
- for (MachineBasicBlock *Succ : MBB->successors()) {
- // Ignore backedges.
- if (MachineLoop *SuccL = MLI.getLoopFor(Succ))
- if (SuccL->getHeader() == Succ && SuccL->contains(MBB))
- continue;
- // Decrement the predecessor count. If it's now zero, it's ready.
- if (--NumPredsLeft[Succ->getNumber()] == 0)
- Preferred.push(Succ);
- }
- // Determine the block to follow MBB. First try to find a preferred block,
- // to preserve the original block order when possible.
- MachineBasicBlock *Next = nullptr;
- while (!Preferred.empty()) {
- Next = Preferred.top();
- Preferred.pop();
- // If X isn't dominated by the top active loop header, defer it until that
- // loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
- Next = nullptr;
- continue;
- }
- // If Next was originally ordered before MBB, and it isn't because it was
- // loop-rotated above the header, it's not preferred.
- if (Next->getNumber() < MBB->getNumber() &&
- (!L || !L->contains(Next) ||
- L->getHeader()->getNumber() < Next->getNumber())) {
- Ready.push(Next);
- Next = nullptr;
- continue;
- }
- break;
- }
- // If we didn't find a suitable block in the Preferred list, check the
- // general Ready list.
- if (!Next) {
- // If there are no more blocks to process, we're done.
- if (Ready.empty()) {
- MaybeUpdateTerminator(MBB);
- break;
- }
- for (;;) {
- Next = Ready.top();
- Ready.pop();
- // If Next isn't dominated by the top active loop header, defer it until
- // that loop is done.
- if (!Loops.empty() &&
- !MDT.dominates(Loops.back().Loop->getHeader(), Next)) {
- Loops.back().Deferred.push_back(Next);
- continue;
- }
- break;
- }
- }
- // Move the next block into place and iterate.
- Next->moveAfter(MBB);
- MaybeUpdateTerminator(MBB);
- MBB = Next;
- }
- assert(Loops.empty() && "Active loop list not finished");
- MF.RenumberBlocks();
-
-#ifndef NDEBUG
- SmallSetVector<MachineLoop *, 8> OnStack;
-
- // Insert a sentinel representing the degenerate loop that starts at the
- // function entry block and includes the entire function as a "loop" that
- // executes once.
- OnStack.insert(nullptr);
-
- for (auto &MBB : MF) {
- assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
-
- MachineLoop *Loop = MLI.getLoopFor(&MBB);
- if (Loop && &MBB == Loop->getHeader()) {
- // Loop header. The loop predecessor should be sorted above, and the other
- // predecessors should be backedges below.
- for (auto Pred : MBB.predecessors())
- assert(
- (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
- "Loop header predecessors must be loop predecessors or backedges");
- assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
- } else {
- // Not a loop header. All predecessors should be sorted above.
- for (auto Pred : MBB.predecessors())
- assert(Pred->getNumber() < MBB.getNumber() &&
- "Non-loop-header predecessors should be topologically sorted");
- assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
- "Blocks must be nested in their loops");
- }
- while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
- OnStack.pop_back();
- }
- assert(OnStack.pop_back_val() == nullptr &&
- "The function entry block shouldn't actually be a loop header");
- assert(OnStack.empty() &&
- "Control flow stack pushes and pops should be balanced.");
-#endif
-}
-
/// Test whether Pred has any terminators explicitly branching to MBB, as
/// opposed to falling through. Note that it's possible (eg. in unoptimized
/// code) for a branch instruction to both branch to a block and fallthrough
@@ -488,6 +270,15 @@ static void FixEndsAtEndOfFunction(
}
}
+// WebAssembly functions end with an end instruction, as if the function body
+// were a block.
+static void AppendEndToFunction(
+ MachineFunction &MF,
+ const WebAssemblyInstrInfo &TII) {
+ BuildMI(MF.back(), MF.back().end(), DebugLoc(),
+ TII.get(WebAssembly::END_FUNCTION));
+}
+
/// Insert LOOP and BLOCK markers at appropriate places.
static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
const WebAssemblyInstrInfo &TII,
@@ -555,6 +346,11 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
// Fix up block/loop signatures at the end of the function to conform to
// WebAssembly's rules.
FixEndsAtEndOfFunction(MF, MFI, BlockTops, LoopTops);
+
+ // Add an end instruction at the end of the function body.
+ if (!MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF())
+ AppendEndToFunction(MF, TII);
}
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
@@ -569,9 +365,6 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
MF.getRegInfo().invalidateLiveness();
- // Sort the blocks, with contiguous loops.
- SortBlocks(MF, MLI, MDT);
-
// Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
PlaceMarkers(MF, MLI, TII, MDT, MFI);
diff --git a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
index fc0a01ca30e5..bc6360aafd61 100644
--- a/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -97,15 +97,28 @@ bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
MI.setDesc(Desc);
// Rewrite argument order
- auto Uses = MI.explicit_uses();
- MachineInstr::mop_iterator it = Uses.begin();
- const MachineOperand MO = *it;
+ SmallVector<MachineOperand, 8> Ops;
+
+ // Set up a placeholder for the type signature immediate.
+ Ops.push_back(MachineOperand::CreateImm(0));
// Set up the flags immediate, which currently has no defined flags
// so it's always zero.
- it->ChangeToImmediate(0);
-
- MI.addOperand(MF, MO);
+ Ops.push_back(MachineOperand::CreateImm(0));
+
+ for (const MachineOperand &MO :
+ make_range(MI.operands_begin() +
+ MI.getDesc().getNumDefs() + 1,
+ MI.operands_begin() +
+ MI.getNumExplicitOperands()))
+ Ops.push_back(MO);
+ Ops.push_back(MI.getOperand(MI.getDesc().getNumDefs()));
+
+ // Replace the instructions operands.
+ while (MI.getNumOperands() > MI.getDesc().getNumDefs())
+ MI.RemoveOperand(MI.getNumOperands() - 1);
+ for (const MachineOperand &MO : Ops)
+ MI.addOperand(MO);
DEBUG(dbgs() << " After transform: " << MI);
Changed = true;
diff --git a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 04ede7ff110c..41249117ae0e 100644
--- a/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -31,6 +31,14 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-explicit-locals"
+// A command-line option to disable this pass. Note that this produces output
+// which is not valid WebAssembly, though it may be more convenient for writing
+// LLVM unit tests with.
+static cl::opt<bool> DisableWebAssemblyExplicitLocals(
+ "disable-wasm-explicit-locals", cl::ReallyHidden,
+ cl::desc("WebAssembly: Disable emission of get_local/set_local."),
+ cl::init(false));
+
namespace {
class WebAssemblyExplicitLocals final : public MachineFunctionPass {
StringRef getPassName() const override {
@@ -60,7 +68,25 @@ FunctionPass *llvm::createWebAssemblyExplicitLocals() {
/// if it doesn't yet have one.
static unsigned getLocalId(DenseMap<unsigned, unsigned> &Reg2Local,
unsigned &CurLocal, unsigned Reg) {
- return Reg2Local.insert(std::make_pair(Reg, CurLocal++)).first->second;
+ auto P = Reg2Local.insert(std::make_pair(Reg, CurLocal));
+ if (P.second)
+ ++CurLocal;
+ return P.first->second;
+}
+
+/// Get the appropriate drop opcode for the given register class.
+static unsigned getDropOpcode(const TargetRegisterClass *RC) {
+ if (RC == &WebAssembly::I32RegClass)
+ return WebAssembly::DROP_I32;
+ if (RC == &WebAssembly::I64RegClass)
+ return WebAssembly::DROP_I64;
+ if (RC == &WebAssembly::F32RegClass)
+ return WebAssembly::DROP_F32;
+ if (RC == &WebAssembly::F64RegClass)
+ return WebAssembly::DROP_F64;
+ if (RC == &WebAssembly::V128RegClass)
+ return WebAssembly::DROP_V128;
+ llvm_unreachable("Unexpected register class");
}
/// Get the appropriate get_local opcode for the given register class.
@@ -146,6 +172,10 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
"********** Function: "
<< MF.getName() << '\n');
+ // Disable this pass if directed to do so.
+ if (DisableWebAssemblyExplicitLocals)
+ return false;
+
// Disable this pass if we aren't doing direct wasm object emission.
if (MF.getSubtarget<WebAssemblySubtarget>()
.getTargetTriple().isOSBinFormatELF())
@@ -176,6 +206,12 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Start assigning local numbers after the last parameter.
unsigned CurLocal = MFI.getParams().size();
+ // Precompute the set of registers that are unused, so that we can insert
+ // drops to their defs.
+ BitVector UseEmpty(MRI.getNumVirtRegs());
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i)
+ UseEmpty[i] = MRI.use_empty(TargetRegisterInfo::index2VirtReg(i));
+
// Visit each instruction in the function.
for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
@@ -224,15 +260,26 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
assert(MI.getDesc().getNumDefs() <= 1);
if (MI.getDesc().getNumDefs() == 1) {
unsigned OldReg = MI.getOperand(0).getReg();
- if (!MFI.isVRegStackified(OldReg) && !MRI.use_empty(OldReg)) {
- unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ if (!MFI.isVRegStackified(OldReg)) {
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
unsigned NewReg = MRI.createVirtualRegister(RC);
auto InsertPt = std::next(MachineBasicBlock::iterator(&MI));
- unsigned Opc = getSetLocalOpcode(RC);
- BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
- .addImm(LocalId)
- .addReg(NewReg);
+ if (MI.getOpcode() == WebAssembly::IMPLICIT_DEF) {
+ MI.eraseFromParent();
+ Changed = true;
+ continue;
+ }
+ if (UseEmpty[TargetRegisterInfo::virtReg2Index(OldReg)]) {
+ unsigned Opc = getDropOpcode(RC);
+ BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
+ .addReg(NewReg);
+ } else {
+ unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+ unsigned Opc = getSetLocalOpcode(RC);
+ BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
+ .addImm(LocalId)
+ .addReg(NewReg);
+ }
MI.getOperand(0).setReg(NewReg);
MFI.stackifyVReg(NewReg);
Changed = true;
@@ -278,13 +325,16 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
}
// Define the locals.
+ // TODO: Sort the locals for better compression.
+ MFI.setNumLocals(CurLocal - MFI.getParams().size());
for (size_t i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
auto I = Reg2Local.find(Reg);
if (I == Reg2Local.end() || I->second < MFI.getParams().size())
continue;
- MFI.addLocal(typeForRegClass(MRI.getRegClass(Reg)));
+ MFI.setLocal(I->second - MFI.getParams().size(),
+ typeForRegClass(MRI.getRegClass(Reg)));
Changed = true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index bc7020fded8c..53698ff09b10 100644
--- a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -116,6 +116,8 @@ private:
case MVT::f32:
case MVT::f64:
return VT;
+ case MVT::f16:
+ return MVT::f32;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@@ -594,12 +596,12 @@ bool WebAssemblyFastISel::fastLowerArguments() {
unsigned i = 0;
for (auto const &Arg : F->args()) {
- const AttributeSet &Attrs = F->getAttributes();
- if (Attrs.hasAttribute(i+1, Attribute::ByVal) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftSelf) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftError) ||
- Attrs.hasAttribute(i+1, Attribute::InAlloca) ||
- Attrs.hasAttribute(i+1, Attribute::Nest))
+ const AttributeList &Attrs = F->getAttributes();
+ if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
+ Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
+ Attrs.hasParamAttribute(i, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
@@ -744,19 +746,19 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
- const AttributeSet &Attrs = Call->getAttributes();
- if (Attrs.hasAttribute(i+1, Attribute::ByVal) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftSelf) ||
- Attrs.hasAttribute(i+1, Attribute::SwiftError) ||
- Attrs.hasAttribute(i+1, Attribute::InAlloca) ||
- Attrs.hasAttribute(i+1, Attribute::Nest))
+ const AttributeList &Attrs = Call->getAttributes();
+ if (Attrs.hasParamAttribute(i, Attribute::ByVal) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttribute(i, Attribute::SwiftError) ||
+ Attrs.hasParamAttribute(i, Attribute::InAlloca) ||
+ Attrs.hasParamAttribute(i, Attribute::Nest))
return false;
unsigned Reg;
- if (Attrs.hasAttribute(i+1, Attribute::SExt))
+ if (Attrs.hasParamAttribute(i, Attribute::SExt))
Reg = getRegForSignedValue(V);
- else if (Attrs.hasAttribute(i+1, Attribute::ZExt))
+ else if (Attrs.hasParamAttribute(i, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index adf904ee0269..76a2ff3f9803 100644
--- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -84,7 +84,7 @@ static void FindUses(Value *V, Function &F,
// - Call with fewer arguments than needed: arguments are filled in with undef
// - Return value is not needed: drop it
// - Return value needed but not present: supply an undef
-//
+//
// For now, return nullptr without creating a wrapper if the wrapper cannot
// be generated due to incompatible types.
static Function *CreateWrapper(Function *F, FunctionType *Ty) {
@@ -148,6 +148,11 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!Ty)
continue;
+ // Wasm varargs are not ABI-compatible with non-varargs. Just ignore
+ // such casts for now.
+ if (Ty->isVarArg() || F->isVarArg())
+ continue;
+
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
Pair.first->second = CreateWrapper(F, Ty);
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index a6a2c0bf06ae..4209bc333f23 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -24,10 +24,11 @@
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "WebAssemblyTargetMachine.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -101,25 +102,35 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
MachineBasicBlock::iterator &InsertAddr,
MachineBasicBlock::iterator &InsertStore,
const DebugLoc &DL) {
- const char *ES = "__stack_pointer";
- auto *SPSymbol = MF.createExternalSymbolName(ES);
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterClass *PtrRC =
- MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
- unsigned Zero = MRI.createVirtualRegister(PtrRC);
const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero)
- .addImm(0);
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
- MachineMemOperand::MOStore, 4, 4);
- BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32))
- .addImm(2) // p2align
- .addExternalSymbol(SPSymbol)
- .addReg(Zero)
- .addReg(SrcReg)
- .addMemOperand(MMO);
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ const char *ES = "__stack_pointer";
+ auto *SPSymbol = MF.createExternalSymbolName(ES);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetRegisterClass *PtrRC =
+ MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
+ unsigned Zero = MRI.createVirtualRegister(PtrRC);
+
+ BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero)
+ .addImm(0);
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+ MachineMemOperand::MOStore, 4, 4);
+ BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32))
+ .addImm(2) // p2align
+ .addExternalSymbol(SPSymbol)
+ .addReg(Zero)
+ .addReg(SrcReg)
+ .addMemOperand(MMO);
+ } else {
+ MachineModuleInfoWasm &MMIW =
+ MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
+ BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::SET_GLOBAL_I32))
+ .addImm(MMIW.getStackPointerGlobal())
+ .addReg(SrcReg);
+ }
}
MachineBasicBlock::iterator
@@ -151,27 +162,50 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
auto &MRI = MF.getRegInfo();
auto InsertPt = MBB.begin();
+ while (InsertPt != MBB.end() && WebAssembly::isArgument(*InsertPt))
+ ++InsertPt;
DebugLoc DL;
const TargetRegisterClass *PtrRC =
MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
- unsigned Zero = MRI.createVirtualRegister(PtrRC);
unsigned SPReg = WebAssembly::SP32;
if (StackSize)
SPReg = MRI.createVirtualRegister(PtrRC);
- const char *ES = "__stack_pointer";
- auto *SPSymbol = MF.createExternalSymbolName(ES);
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero)
- .addImm(0);
- MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
- MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
- MachineMemOperand::MOLoad, 4, 4);
- // Load the SP value.
- BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
- .addImm(2) // p2align
- .addExternalSymbol(SPSymbol)
- .addReg(Zero) // addr
- .addMemOperand(LoadMMO);
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ const char *ES = "__stack_pointer";
+ auto *SPSymbol = MF.createExternalSymbolName(ES);
+ unsigned Zero = MRI.createVirtualRegister(PtrRC);
+
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero)
+ .addImm(0);
+ MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+ MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+ MachineMemOperand::MOLoad, 4, 4);
+ // Load the SP value.
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
+ .addImm(2) // p2align
+ .addExternalSymbol(SPSymbol)
+ .addReg(Zero) // addr
+ .addMemOperand(LoadMMO);
+ } else {
+ auto &MMIW = MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
+ if (!MMIW.hasStackPointerGlobal()) {
+ MMIW.setStackPointerGlobal(MMIW.getGlobals().size());
+
+ // Create the stack-pointer global. For now, just use the
+ // Emscripten/Binaryen ABI names.
+ wasm::Global G;
+ G.Type = wasm::ValType::I32;
+ G.Mutable = true;
+ G.InitialValue = 0;
+ G.InitialModule = "env";
+ G.InitialName = "STACKTOP";
+ MMIW.addGlobal(G);
+ }
+ BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::GET_GLOBAL_I32), SPReg)
+ .addImm(MMIW.getStackPointerGlobal());
+ }
bool HasBP = hasBP(MF);
if (HasBP) {
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 6a7f75a6b3a1..31a5ca1f4cc2 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -95,6 +95,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Support minnan and maxnan, which otherwise default to expand.
setOperationAction(ISD::FMINNAN, T, Legal);
setOperationAction(ISD::FMAXNAN, T, Legal);
+ // WebAssembly currently has no builtin f16 support.
+ setOperationAction(ISD::FP16_TO_FP, T, Expand);
+ setOperationAction(ISD::FP_TO_FP16, T, Expand);
+ setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
+ setTruncStoreAction(T, MVT::f16, Expand);
}
for (auto T : {MVT::i32, MVT::i64}) {
@@ -253,7 +258,8 @@ bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
return true;
}
-bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
+ AttributeList Attr) const {
// The current thinking is that wasm engines will perform this optimization,
// so we can save on code size.
return true;
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 5bc723028e63..99d3d0d558f5 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -58,7 +58,7 @@ class WebAssemblyTargetLowering final : public TargetLowering {
unsigned AS) const override;
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
bool *Fast) const override;
- bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 047f4be066c0..73d1d4be293b 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -30,13 +30,15 @@ multiclass CALL<WebAssemblyRegClass vt, string prefix> {
[(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
!strconcat(prefix, "call\t$dst, $callee"),
0x10>;
+
let isCodeGenOnly = 1 in {
def PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
[(set vt:$dst, (WebAssemblycall1 I32:$callee))],
"PSEUDO CALL INDIRECT\t$callee">;
} // isCodeGenOnly = 1
- def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins i32imm:$flags, variable_ops),
+ def CALL_INDIRECT_#vt : I<(outs vt:$dst),
+ (ins TypeIndex:$type, i32imm:$flags, variable_ops),
[],
!strconcat(prefix, "call_indirect\t$dst"),
0x11>;
@@ -48,6 +50,7 @@ multiclass SIMD_CALL<ValueType vt, string prefix> {
(WebAssemblycall1 (i32 imm:$callee)))],
!strconcat(prefix, "call\t$dst, $callee"),
0x10>;
+
let isCodeGenOnly = 1 in {
def PCALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
(ins I32:$callee, variable_ops),
@@ -57,7 +60,8 @@ multiclass SIMD_CALL<ValueType vt, string prefix> {
} // isCodeGenOnly = 1
def CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
- (ins i32imm:$flags, variable_ops),
+ (ins TypeIndex:$type, i32imm:$flags,
+ variable_ops),
[],
!strconcat(prefix, "call_indirect\t$dst"),
0x11>;
@@ -76,13 +80,15 @@ let Uses = [SP32, SP64], isCall = 1 in {
def CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
[(WebAssemblycall0 (i32 imm:$callee))],
"call \t$callee", 0x10>;
+
let isCodeGenOnly = 1 in {
def PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
[(WebAssemblycall0 I32:$callee)],
"PSEUDO CALL INDIRECT\t$callee">;
} // isCodeGenOnly = 1
- def CALL_INDIRECT_VOID : I<(outs), (ins i32imm:$flags, variable_ops),
+ def CALL_INDIRECT_VOID : I<(outs),
+ (ins TypeIndex:$type, i32imm:$flags, variable_ops),
[],
"call_indirect\t", 0x11>;
} // Uses = [SP32,SP64], isCall = 1
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 1146431e6b77..39cb1ca336f2 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -64,9 +64,12 @@ let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
def BLOCK : I<(outs), (ins Signature:$sig), [], "block \t$sig", 0x02>;
def LOOP : I<(outs), (ins Signature:$sig), [], "loop \t$sig", 0x03>;
-// END_BLOCK and END_LOOP are represented with the same opcode in wasm.
+// END_BLOCK, END_LOOP, and END_FUNCTION are represented with the same opcode
+// in wasm.
def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>;
def END_LOOP : I<(outs), (ins), [], "end_loop", 0x0b>;
+let isTerminator = 1, isBarrier = 1 in
+def END_FUNCTION : I<(outs), (ins), [], "end_function", 0x0b>;
} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
multiclass RETURN<WebAssemblyRegClass vt> {
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 030be0862a56..03c9c1f8d5c0 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -55,8 +55,8 @@ defm EQ : ComparisonFP<SETOEQ, "eq ", 0x5b, 0x61>;
defm NE : ComparisonFP<SETUNE, "ne ", 0x5c, 0x62>;
} // isCommutable = 1
defm LT : ComparisonFP<SETOLT, "lt ", 0x5d, 0x63>;
-defm LE : ComparisonFP<SETOLE, "le ", 0x5e, 0x64>;
-defm GT : ComparisonFP<SETOGT, "gt ", 0x5f, 0x65>;
+defm LE : ComparisonFP<SETOLE, "le ", 0x5f, 0x65>;
+defm GT : ComparisonFP<SETOGT, "gt ", 0x5e, 0x64>;
defm GE : ComparisonFP<SETOGE, "ge ", 0x60, 0x66>;
} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 0e2d8bbaf64c..8846952e5af4 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -183,11 +183,9 @@ unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
assert(Cond.size() == 2 && "Expected a flag and a successor block");
if (Cond[0].getImm()) {
- BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).add(Cond[1]);
} else {
- BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS))
- .addMBB(TBB)
- .addOperand(Cond[1]);
+ BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)).addMBB(TBB).add(Cond[1]);
}
if (!FBB)
return 1;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index dcfd1a42c6aa..a601b575f579 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -74,6 +74,9 @@ def bb_op : Operand<OtherVT>;
let OperandType = "OPERAND_LOCAL" in
def local_op : Operand<i32>;
+let OperandType = "OPERAND_GLOBAL" in
+def global_op : Operand<i32>;
+
let OperandType = "OPERAND_I32IMM" in
def i32imm_op : Operand<i32>;
@@ -104,6 +107,9 @@ def Signature : Operand<i32> {
}
} // OperandType = "OPERAND_SIGNATURE"
+let OperandType = "OPERAND_TYPEINDEX" in
+def TypeIndex : Operand<i32>;
+
} // OperandNamespace = "WebAssembly"
//===----------------------------------------------------------------------===//
@@ -178,6 +184,18 @@ let hasSideEffects = 0 in {
def TEE_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src), [],
"tee_local\t$res, $local, $src", 0x22>;
+ // Unused values must be dropped in some contexts.
+ def DROP_#vt : I<(outs), (ins vt:$src), [],
+ "drop\t$src", 0x1a>;
+
+ let mayLoad = 1 in
+ def GET_GLOBAL_#vt : I<(outs vt:$res), (ins global_op:$local), [],
+ "get_global\t$res, $local", 0x23>;
+
+ let mayStore = 1 in
+ def SET_GLOBAL_#vt : I<(outs), (ins global_op:$local, vt:$src), [],
+ "set_global\t$local, $src", 0x24>;
+
} // hasSideEffects = 0
}
defm : LOCAL<I32>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index b606ebb0a68d..25d77bb1f234 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -673,9 +673,9 @@ def CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
Requires<[HasAddr32]>;
// Grow memory.
-def GROW_MEMORY_I32 : I<(outs), (ins i32imm:$flags, I32:$delta),
+def GROW_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags, I32:$delta),
[],
- "grow_memory\t$delta", 0x40>,
+ "grow_memory\t$dst, $delta", 0x40>,
Requires<[HasAddr32]>;
} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 7ea5d05a1b21..744a3ed427af 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -118,7 +118,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
// delete the br_unless.
assert(Inverted);
BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
- .addOperand(MI->getOperand(0))
+ .add(MI->getOperand(0))
.addReg(Cond);
MBB.erase(MI);
}
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 72cb1ccbe668..947c0329bb6e 100644
--- a/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -412,7 +412,7 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
if (CI->doesNotReturn()) {
if (auto *F = dyn_cast<Function>(CI->getCalledValue()))
F->removeFnAttr(Attribute::NoReturn);
- CI->removeAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
+ CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
}
IRBuilder<> IRB(C);
@@ -435,25 +435,20 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
// Because we added the pointer to the callee as first argument, all
// argument attribute indices have to be incremented by one.
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &InvokePAL = CI->getAttributes();
- CallSite::arg_iterator AI = CI->arg_begin();
- unsigned i = 1; // Argument attribute index starts from 1
- for (unsigned e = CI->getNumArgOperands(); i <= e; ++AI, ++i) {
- if (InvokePAL.hasAttributes(i)) {
- AttrBuilder B(InvokePAL, i);
- AttributesVec.push_back(AttributeSet::get(C, i + 1, B));
- }
- }
- // Add any return attributes.
- if (InvokePAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getRetAttributes()));
- // Add any function attributes.
- if (InvokePAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getFnAttributes()));
+ SmallVector<AttributeSet, 8> ArgAttributes;
+ const AttributeList &InvokeAL = CI->getAttributes();
+
+ // No attributes for the callee pointer.
+ ArgAttributes.push_back(AttributeSet());
+ // Copy the argument attributes from the original
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i < e; ++i)
+ ArgAttributes.push_back(InvokeAL.getParamAttributes(i));
+
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewCallPAL = AttributeSet::get(C, AttributesVec);
- NewCall->setAttributes(NewCallPAL);
+ AttributeList NewCallAL =
+ AttributeList::get(C, InvokeAL.getFnAttributes(),
+ InvokeAL.getRetAttributes(), ArgAttributes);
+ NewCall->setAttributes(NewCallAL);
CI->replaceAllUsesWith(NewCall);
@@ -624,7 +619,7 @@ void WebAssemblyLowerEmscriptenEHSjLj::createSetThrewFunction(Module &M) {
Function *F =
Function::Create(FTy, GlobalValue::ExternalLinkage, SetThrewFName, &M);
Argument *Arg1 = &*(F->arg_begin());
- Argument *Arg2 = &*(++F->arg_begin());
+ Argument *Arg2 = &*std::next(F->arg_begin());
Arg1->setName("threw");
Arg2->setName("value");
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 022a448590ec..ff186eb91503 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -14,7 +14,10 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyAsmPrinter.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyRuntimeLibcallSignatures.h"
+#include "WebAssemblyUtilities.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/Constants.h"
@@ -22,18 +25,85 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
MCSymbol *
WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
- return Printer.getSymbol(MO.getGlobal());
+ const GlobalValue *Global = MO.getGlobal();
+ MCSymbol *Sym = Printer.getSymbol(Global);
+ if (isa<MCSymbolELF>(Sym))
+ return Sym;
+
+ MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+
+ if (const auto *FuncTy = dyn_cast<FunctionType>(Global->getValueType())) {
+ const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
+ const TargetMachine &TM = MF.getTarget();
+ const Function &CurrentFunc = *MF.getFunction();
+
+ SmallVector<wasm::ValType, 4> Returns;
+ SmallVector<wasm::ValType, 4> Params;
+
+ wasm::ValType iPTR =
+ MF.getSubtarget<WebAssemblySubtarget>().hasAddr64() ?
+ wasm::ValType::I64 :
+ wasm::ValType::I32;
+
+ SmallVector<MVT, 4> ResultMVTs;
+ ComputeLegalValueVTs(CurrentFunc, TM, FuncTy->getReturnType(), ResultMVTs);
+ // WebAssembly can't currently handle returning tuples.
+ if (ResultMVTs.size() <= 1)
+ for (MVT ResultMVT : ResultMVTs)
+ Returns.push_back(WebAssembly::toValType(ResultMVT));
+ else
+ Params.push_back(iPTR);
+
+ for (Type *Ty : FuncTy->params()) {
+ SmallVector<MVT, 4> ParamMVTs;
+ ComputeLegalValueVTs(CurrentFunc, TM, Ty, ParamMVTs);
+ for (MVT ParamMVT : ParamMVTs)
+ Params.push_back(WebAssembly::toValType(ParamMVT));
+ }
+
+ if (FuncTy->isVarArg())
+ Params.push_back(iPTR);
+
+ WasmSym->setReturns(std::move(Returns));
+ WasmSym->setParams(std::move(Params));
+ WasmSym->setIsFunction(true);
+ }
+
+ return WasmSym;
}
MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
const MachineOperand &MO) const {
- return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+ const char *Name = MO.getSymbolName();
+ MCSymbol *Sym = Printer.GetExternalSymbolSymbol(Name);
+ if (isa<MCSymbolELF>(Sym))
+ return Sym;
+
+ MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+ const WebAssemblySubtarget &Subtarget = Printer.getSubtarget();
+
+ // __stack_pointer is a global variable; all other external symbols used by
+ // CodeGen are functions.
+ if (strcmp(Name, "__stack_pointer") == 0)
+ return WasmSym;
+
+ SmallVector<wasm::ValType, 4> Returns;
+ SmallVector<wasm::ValType, 4> Params;
+ GetSignature(Subtarget, Name, Returns, Params);
+
+ WasmSym->setReturns(std::move(Returns));
+ WasmSym->setParams(std::move(Params));
+ WasmSym->setIsFunction(true);
+
+ return WasmSym;
}
MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
@@ -42,6 +112,9 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
MCSymbolRefExpr::VariantKind VK =
IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
: MCSymbolRefExpr::VK_None;
+ if (!isa<MCSymbolELF>(Sym))
+ cast<MCSymbolWasm>(Sym)->setIsFunction(IsFunc);
+
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
if (Offset != 0) {
@@ -54,20 +127,34 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
return MCOperand::createExpr(Expr);
}
+// Return the WebAssembly type associated with the given register class.
+static wasm::ValType getType(const TargetRegisterClass *RC) {
+ if (RC == &WebAssembly::I32RegClass)
+ return wasm::ValType::I32;
+ if (RC == &WebAssembly::I64RegClass)
+ return wasm::ValType::I64;
+ if (RC == &WebAssembly::F32RegClass)
+ return wasm::ValType::F32;
+ if (RC == &WebAssembly::F64RegClass)
+ return wasm::ValType::F64;
+ llvm_unreachable("Unexpected register class");
+}
+
void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
+ const MCInstrDesc &Desc = MI->getDesc();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_MachineBasicBlock:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("MachineBasicBlock operand should have been rewritten");
case MachineOperand::MO_Register: {
// Ignore all implicit register operands.
@@ -80,6 +167,41 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
break;
}
case MachineOperand::MO_Immediate:
+ if (i < Desc.NumOperands) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ if (Info.OperandType == WebAssembly::OPERAND_TYPEINDEX) {
+ MCSymbol *Sym = Printer.createTempSymbol("typeindex");
+ if (!isa<MCSymbolELF>(Sym)) {
+ SmallVector<wasm::ValType, 4> Returns;
+ SmallVector<wasm::ValType, 4> Params;
+
+ const MachineRegisterInfo &MRI =
+ MI->getParent()->getParent()->getRegInfo();
+ for (const MachineOperand &MO : MI->defs())
+ Returns.push_back(getType(MRI.getRegClass(MO.getReg())));
+ for (const MachineOperand &MO : MI->explicit_uses())
+ if (MO.isReg())
+ Params.push_back(getType(MRI.getRegClass(MO.getReg())));
+
+ // call_indirect instructions have a callee operand at the end which
+ // doesn't count as a param.
+ if (WebAssembly::isCallIndirect(*MI))
+ Params.pop_back();
+
+ MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Sym);
+ WasmSym->setReturns(std::move(Returns));
+ WasmSym->setParams(std::move(Params));
+ WasmSym->setIsFunction(true);
+
+ const MCExpr *Expr =
+ MCSymbolRefExpr::create(WasmSym,
+ MCSymbolRefExpr::VK_WebAssembly_TYPEINDEX,
+ Ctx);
+ MCOp = MCOperand::createExpr(Expr);
+ break;
+ }
+ }
+ }
MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_FPImmediate: {
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index ab4ba1c28d53..d1d2794c3b8f 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -20,7 +20,7 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
-class AsmPrinter;
+class WebAssemblyAsmPrinter;
class MCContext;
class MCSymbol;
class MachineInstr;
@@ -29,7 +29,7 @@ class MachineOperand;
/// This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCContext &Ctx;
- AsmPrinter &Printer;
+ WebAssemblyAsmPrinter &Printer;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
@@ -37,7 +37,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
bool IsFunc) const;
public:
- WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
+ WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
: Ctx(ctx), Printer(printer) {}
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
};
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 756619bebbed..1fcbb7791d4e 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -60,6 +60,8 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
void addResult(MVT VT) { Results.push_back(VT); }
const std::vector<MVT> &getResults() const { return Results; }
+ void setNumLocals(size_t NumLocals) { Locals.resize(NumLocals, MVT::i32); }
+ void setLocal(size_t i, MVT VT) { Locals[i] = VT; }
void addLocal(MVT VT) { Locals.push_back(VT); }
const std::vector<MVT> &getLocals() const { return Locals; }
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 96520aa5d28c..f4c9a4ef6b9c 100644
--- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -54,7 +54,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
void OptimizeReturned::visitCallSite(CallSite CS) {
for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
- if (CS.paramHasAttr(1 + i, Attribute::Returned)) {
+ if (CS.paramHasAttr(0, Attribute::Returned)) {
Instruction *Inst = CS.getInstruction();
Value *Arg = CS.getArgOperand(i);
// Ignore constants, globals, undef, etc.
diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index 32dde88c2234..d2fbc5a22308 100644
--- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -80,19 +80,31 @@ static bool MaybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
return false;
if (&MBB != &MF.back())
return false;
- if (&MI != &MBB.back())
- return false;
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ if (&MI != &MBB.back())
+ return false;
+ } else {
+ MachineBasicBlock::iterator End = MBB.end();
+ --End;
+ assert(End->getOpcode() == WebAssembly::END_FUNCTION);
+ --End;
+ if (&MI != &*End)
+ return false;
+ }
- // If the operand isn't stackified, insert a COPY to read the operand and
- // stackify it.
- MachineOperand &MO = MI.getOperand(0);
- unsigned Reg = MO.getReg();
- if (!MFI.isVRegStackified(Reg)) {
- unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
- BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg)
- .addReg(Reg);
- MO.setReg(NewReg);
- MFI.stackifyVReg(NewReg);
+ if (FallthroughOpc != WebAssembly::FALLTHROUGH_RETURN_VOID) {
+ // If the operand isn't stackified, insert a COPY to read the operand and
+ // stackify it.
+ MachineOperand &MO = MI.getOperand(0);
+ unsigned Reg = MO.getReg();
+ if (!MFI.isVRegStackified(Reg)) {
+ unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg)
+ .addReg(Reg);
+ MO.setReg(NewReg);
+ MFI.stackifyVReg(NewReg);
+ }
}
// Rewrite the return.
@@ -127,7 +139,7 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
Name == TLI.getLibcallName(RTLIB::MEMSET)) {
- LibFunc::Func Func;
+ LibFunc Func;
if (LibInfo.getLibFunc(Name, Func)) {
const auto &Op2 = MI.getOperand(2);
if (!Op2.isReg())
@@ -188,9 +200,9 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
WebAssembly::COPY_V128);
break;
case WebAssembly::RETURN_VOID:
- if (!DisableWebAssemblyFallthroughReturnOpt &&
- &MBB == &MF.back() && &MI == &MBB.back())
- MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN_VOID));
+ Changed |= MaybeRewriteToFallthrough(
+ MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_VOID,
+ WebAssembly::INSTRUCTION_LIST_END);
break;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 32ee09e45796..57d454746b06 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
@@ -152,7 +153,7 @@ static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
}
// Determine whether MI reads memory, writes memory, has side effects,
-// and/or uses the __stack_pointer value.
+// and/or uses the stack pointer value.
static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
bool &Write, bool &Effects, bool &StackPointer) {
assert(!MI.isPosition());
@@ -169,15 +170,28 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
if (MI.mayStore()) {
Write = true;
- // Check for stores to __stack_pointer.
- for (auto MMO : MI.memoperands()) {
- const MachinePointerInfo &MPI = MMO->getPointerInfo();
- if (MPI.V.is<const PseudoSourceValue *>()) {
- auto PSV = MPI.V.get<const PseudoSourceValue *>();
- if (const ExternalSymbolPseudoSourceValue *EPSV =
- dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
- if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
- StackPointer = true;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ if (MF.getSubtarget<WebAssemblySubtarget>()
+ .getTargetTriple().isOSBinFormatELF()) {
+ // Check for stores to __stack_pointer.
+ for (auto MMO : MI.memoperands()) {
+ const MachinePointerInfo &MPI = MMO->getPointerInfo();
+ if (MPI.V.is<const PseudoSourceValue *>()) {
+ auto PSV = MPI.V.get<const PseudoSourceValue *>();
+ if (const ExternalSymbolPseudoSourceValue *EPSV =
+ dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
+ if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
+ StackPointer = true;
+ }
+ }
+ } else {
+ // Check for sets of the stack pointer.
+ const MachineModuleInfoWasm &MMIW =
+ MF.getMMI().getObjFileInfo<MachineModuleInfoWasm>();
+ if ((MI.getOpcode() == WebAssembly::SET_LOCAL_I32 ||
+ MI.getOpcode() == WebAssembly::SET_LOCAL_I64) &&
+ MI.getOperand(0).getImm() == MMIW.getStackPointerGlobal()) {
+ StackPointer = true;
}
}
} else if (MI.hasOrderedMemoryRef()) {
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
new file mode 100644
index 000000000000..c02ef4a1c399
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -0,0 +1,1302 @@
+// CodeGen/RuntimeLibcallSignatures.cpp - R.T. Lib. Call Signatures -*- C++ -*--
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains signature information for runtime libcalls.
+///
+/// CodeGen uses external symbols, which it refers to by name. The WebAssembly
+/// target needs type information for all functions. This file contains a big
+/// table providing type signatures for all runtime library functions that LLVM
+/// uses.
+///
+/// This is currently a fairly heavy-handed solution.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyRuntimeLibcallSignatures.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+
+using namespace llvm;
+
+namespace {
+
+enum RuntimeLibcallSignature {
+ func,
+ f32_func_f32,
+ f32_func_f64,
+ f32_func_i32,
+ f32_func_i64,
+ f32_func_i16,
+ f64_func_f32,
+ f64_func_f64,
+ f64_func_i32,
+ f64_func_i64,
+ i32_func_f32,
+ i32_func_f64,
+ i32_func_i32,
+ i64_func_f32,
+ i64_func_f64,
+ i64_func_i64,
+ f32_func_f32_f32,
+ f32_func_f32_i32,
+ f32_func_i64_i64,
+ f64_func_f64_f64,
+ f64_func_f64_i32,
+ f64_func_i64_i64,
+ i16_func_f32,
+ i8_func_i8_i8,
+ func_f32_iPTR_iPTR,
+ func_f64_iPTR_iPTR,
+ i16_func_i16_i16,
+ i32_func_f32_f32,
+ i32_func_f64_f64,
+ i32_func_i32_i32,
+ i64_func_i64_i64,
+ i64_i64_func_f32,
+ i64_i64_func_f64,
+ i16_i16_func_i16_i16,
+ i32_i32_func_i32_i32,
+ i64_i64_func_i64_i64,
+ i64_i64_func_i64_i64_i64_i64,
+ i64_i64_i64_i64_func_i64_i64_i64_i64,
+ i64_i64_func_i64_i64_i32,
+ iPTR_func_iPTR_i32_iPTR,
+ iPTR_func_iPTR_iPTR_iPTR,
+ f32_func_f32_f32_f32,
+ f64_func_f64_f64_f64,
+ func_i64_i64_iPTR_iPTR,
+ func_iPTR_f32,
+ func_iPTR_f64,
+ func_iPTR_i32,
+ func_iPTR_i64,
+ func_iPTR_i64_i64,
+ func_iPTR_i64_i64_i64_i64,
+ func_iPTR_i64_i64_i64_i64_i64_i64,
+ i32_func_i64_i64,
+ i32_func_i64_i64_i64_i64,
+ unsupported
+};
+
+} // end anonymous namespace
+
+static const RuntimeLibcallSignature
+RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = {
+// Integer
+/* SHL_I16 */ i16_func_i16_i16,
+/* SHL_I32 */ i32_func_i32_i32,
+/* SHL_I64 */ i64_func_i64_i64,
+/* SHL_I128 */ i64_i64_func_i64_i64_i32,
+/* SRL_I16 */ i16_func_i16_i16,
+/* SRL_I32 */ i32_func_i32_i32,
+/* SRL_I64 */ i64_func_i64_i64,
+/* SRL_I128 */ i64_i64_func_i64_i64_i32,
+/* SRA_I16 */ i16_func_i16_i16,
+/* SRA_I32 */ i32_func_i32_i32,
+/* SRA_I64 */ i64_func_i64_i64,
+/* SRA_I128 */ i64_i64_func_i64_i64_i32,
+/* MUL_I8 */ i8_func_i8_i8,
+/* MUL_I16 */ i16_func_i16_i16,
+/* MUL_I32 */ i32_func_i32_i32,
+/* MUL_I64 */ i64_func_i64_i64,
+/* MUL_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* MULO_I32 */ i32_func_i32_i32,
+/* MULO_I64 */ i64_func_i64_i64,
+/* MULO_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* SDIV_I8 */ i8_func_i8_i8,
+/* SDIV_I16 */ i16_func_i16_i16,
+/* SDIV_I32 */ i32_func_i32_i32,
+/* SDIV_I64 */ i64_func_i64_i64,
+/* SDIV_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* UDIV_I8 */ i8_func_i8_i8,
+/* UDIV_I16 */ i16_func_i16_i16,
+/* UDIV_I32 */ i32_func_i32_i32,
+/* UDIV_I64 */ i64_func_i64_i64,
+/* UDIV_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* SREM_I8 */ i8_func_i8_i8,
+/* SREM_I16 */ i16_func_i16_i16,
+/* SREM_I32 */ i32_func_i32_i32,
+/* SREM_I64 */ i64_func_i64_i64,
+/* SREM_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* UREM_I8 */ i8_func_i8_i8,
+/* UREM_I16 */ i16_func_i16_i16,
+/* UREM_I32 */ i32_func_i32_i32,
+/* UREM_I64 */ i64_func_i64_i64,
+/* UREM_I128 */ i64_i64_func_i64_i64_i64_i64,
+/* SDIVREM_I8 */ i8_func_i8_i8,
+/* SDIVREM_I16 */ i16_i16_func_i16_i16,
+/* SDIVREM_I32 */ i32_i32_func_i32_i32,
+/* SDIVREM_I64 */ i64_func_i64_i64,
+/* SDIVREM_I128 */ i64_i64_i64_i64_func_i64_i64_i64_i64,
+/* UDIVREM_I8 */ i8_func_i8_i8,
+/* UDIVREM_I16 */ i16_i16_func_i16_i16,
+/* UDIVREM_I32 */ i32_i32_func_i32_i32,
+/* UDIVREM_I64 */ i64_i64_func_i64_i64,
+/* UDIVREM_I128 */ i64_i64_i64_i64_func_i64_i64_i64_i64,
+/* NEG_I32 */ i32_func_i32,
+/* NEG_I64 */ i64_func_i64,
+
+// FLOATING POINT
+/* ADD_F32 */ f32_func_f32_f32,
+/* ADD_F64 */ f64_func_f64_f64,
+/* ADD_F80 */ unsupported,
+/* ADD_F128 */ func_iPTR_i64_i64_i64_i64,
+/* ADD_PPCF128 */ unsupported,
+/* SUB_F32 */ f32_func_f32_f32,
+/* SUB_F64 */ f64_func_f64_f64,
+/* SUB_F80 */ unsupported,
+/* SUB_F128 */ func_iPTR_i64_i64_i64_i64,
+/* SUB_PPCF128 */ unsupported,
+/* MUL_F32 */ f32_func_f32_f32,
+/* MUL_F64 */ f64_func_f64_f64,
+/* MUL_F80 */ unsupported,
+/* MUL_F128 */ func_iPTR_i64_i64_i64_i64,
+/* MUL_PPCF128 */ unsupported,
+/* DIV_F32 */ f32_func_f32_f32,
+/* DIV_F64 */ f64_func_f64_f64,
+/* DIV_F80 */ unsupported,
+/* DIV_F128 */ func_iPTR_i64_i64_i64_i64,
+/* DIV_PPCF128 */ unsupported,
+/* REM_F32 */ f32_func_f32_f32,
+/* REM_F64 */ f64_func_f64_f64,
+/* REM_F80 */ unsupported,
+/* REM_F128 */ func_iPTR_i64_i64_i64_i64,
+/* REM_PPCF128 */ unsupported,
+/* FMA_F32 */ f32_func_f32_f32_f32,
+/* FMA_F64 */ f64_func_f64_f64_f64,
+/* FMA_F80 */ unsupported,
+/* FMA_F128 */ func_iPTR_i64_i64_i64_i64_i64_i64,
+/* FMA_PPCF128 */ unsupported,
+/* POWI_F32 */ f32_func_f32_i32,
+/* POWI_F64 */ f64_func_f64_i32,
+/* POWI_F80 */ unsupported,
+/* POWI_F128 */ func_iPTR_i64_i64_i64_i64,
+/* POWI_PPCF128 */ unsupported,
+/* SQRT_F32 */ f32_func_f32,
+/* SQRT_F64 */ f64_func_f64,
+/* SQRT_F80 */ unsupported,
+/* SQRT_F128 */ func_iPTR_i64_i64,
+/* SQRT_PPCF128 */ unsupported,
+/* LOG_F32 */ f32_func_f32,
+/* LOG_F64 */ f64_func_f64,
+/* LOG_F80 */ unsupported,
+/* LOG_F128 */ func_iPTR_i64_i64,
+/* LOG_PPCF128 */ unsupported,
+/* LOG2_F32 */ f32_func_f32,
+/* LOG2_F64 */ f64_func_f64,
+/* LOG2_F80 */ unsupported,
+/* LOG2_F128 */ func_iPTR_i64_i64,
+/* LOG2_PPCF128 */ unsupported,
+/* LOG10_F32 */ f32_func_f32,
+/* LOG10_F64 */ f64_func_f64,
+/* LOG10_F80 */ unsupported,
+/* LOG10_F128 */ func_iPTR_i64_i64,
+/* LOG10_PPCF128 */ unsupported,
+/* EXP_F32 */ f32_func_f32,
+/* EXP_F64 */ f64_func_f64,
+/* EXP_F80 */ unsupported,
+/* EXP_F128 */ func_iPTR_i64_i64,
+/* EXP_PPCF128 */ unsupported,
+/* EXP2_F32 */ f32_func_f32,
+/* EXP2_F64 */ f64_func_f64,
+/* EXP2_F80 */ unsupported,
+/* EXP2_F128 */ func_iPTR_i64_i64,
+/* EXP2_PPCF128 */ unsupported,
+/* SIN_F32 */ f32_func_f32,
+/* SIN_F64 */ f64_func_f64,
+/* SIN_F80 */ unsupported,
+/* SIN_F128 */ func_iPTR_i64_i64,
+/* SIN_PPCF128 */ unsupported,
+/* COS_F32 */ f32_func_f32,
+/* COS_F64 */ f64_func_f64,
+/* COS_F80 */ unsupported,
+/* COS_F128 */ func_iPTR_i64_i64,
+/* COS_PPCF128 */ unsupported,
+/* SINCOS_F32 */ func_f32_iPTR_iPTR,
+/* SINCOS_F64 */ func_f64_iPTR_iPTR,
+/* SINCOS_F80 */ unsupported,
+/* SINCOS_F128 */ func_i64_i64_iPTR_iPTR,
+/* SINCOS_PPCF128 */ unsupported,
+/* POW_F32 */ f32_func_f32_f32,
+/* POW_F64 */ f64_func_f64_f64,
+/* POW_F80 */ unsupported,
+/* POW_F128 */ func_iPTR_i64_i64_i64_i64,
+/* POW_PPCF128 */ unsupported,
+/* CEIL_F32 */ f32_func_f32,
+/* CEIL_F64 */ f64_func_f64,
+/* CEIL_F80 */ unsupported,
+/* CEIL_F128 */ func_iPTR_i64_i64,
+/* CEIL_PPCF128 */ unsupported,
+/* TRUNC_F32 */ f32_func_f32,
+/* TRUNC_F64 */ f64_func_f64,
+/* TRUNC_F80 */ unsupported,
+/* TRUNC_F128 */ func_iPTR_i64_i64,
+/* TRUNC_PPCF128 */ unsupported,
+/* RINT_F32 */ f32_func_f32,
+/* RINT_F64 */ f64_func_f64,
+/* RINT_F80 */ unsupported,
+/* RINT_F128 */ func_iPTR_i64_i64,
+/* RINT_PPCF128 */ unsupported,
+/* NEARBYINT_F32 */ f32_func_f32,
+/* NEARBYINT_F64 */ f64_func_f64,
+/* NEARBYINT_F80 */ unsupported,
+/* NEARBYINT_F128 */ func_iPTR_i64_i64,
+/* NEARBYINT_PPCF128 */ unsupported,
+/* ROUND_F32 */ f32_func_f32,
+/* ROUND_F64 */ f64_func_f64,
+/* ROUND_F80 */ unsupported,
+/* ROUND_F128 */ func_iPTR_i64_i64,
+/* ROUND_PPCF128 */ unsupported,
+/* FLOOR_F32 */ f32_func_f32,
+/* FLOOR_F64 */ f64_func_f64,
+/* FLOOR_F80 */ unsupported,
+/* FLOOR_F128 */ func_iPTR_i64_i64,
+/* FLOOR_PPCF128 */ unsupported,
+/* COPYSIGN_F32 */ f32_func_f32_f32,
+/* COPYSIGN_F64 */ f64_func_f64_f64,
+/* COPYSIGN_F80 */ unsupported,
+/* COPYSIGN_F128 */ func_iPTR_i64_i64_i64_i64,
+/* COPYSIGN_PPCF128 */ unsupported,
+/* FMIN_F32 */ f32_func_f32_f32,
+/* FMIN_F64 */ f64_func_f64_f64,
+/* FMIN_F80 */ unsupported,
+/* FMIN_F128 */ func_iPTR_i64_i64_i64_i64,
+/* FMIN_PPCF128 */ unsupported,
+/* FMAX_F32 */ f32_func_f32_f32,
+/* FMAX_F64 */ f64_func_f64_f64,
+/* FMAX_F80 */ unsupported,
+/* FMAX_F128 */ func_iPTR_i64_i64_i64_i64,
+/* FMAX_PPCF128 */ unsupported,
+
+// CONVERSION
+/* FPEXT_F32_PPCF128 */ unsupported,
+/* FPEXT_F64_PPCF128 */ unsupported,
+/* FPEXT_F64_F128 */ func_iPTR_f64,
+/* FPEXT_F32_F128 */ func_iPTR_f32,
+/* FPEXT_F32_F64 */ f64_func_f32,
+/* FPEXT_F16_F32 */ f32_func_i16,
+/* FPROUND_F32_F16 */ i16_func_f32,
+/* FPROUND_F64_F16 */ unsupported,
+/* FPROUND_F80_F16 */ unsupported,
+/* FPROUND_F128_F16 */ unsupported,
+/* FPROUND_PPCF128_F16 */ unsupported,
+/* FPROUND_F64_F32 */ f32_func_f64,
+/* FPROUND_F80_F32 */ unsupported,
+/* FPROUND_F128_F32 */ f32_func_i64_i64,
+/* FPROUND_PPCF128_F32 */ unsupported,
+/* FPROUND_F80_F64 */ unsupported,
+/* FPROUND_F128_F64 */ f64_func_i64_i64,
+/* FPROUND_PPCF128_F64 */ unsupported,
+/* FPTOSINT_F32_I32 */ i32_func_f32,
+/* FPTOSINT_F32_I64 */ i64_func_f32,
+/* FPTOSINT_F32_I128 */ i64_i64_func_f32,
+/* FPTOSINT_F64_I32 */ i32_func_f64,
+/* FPTOSINT_F64_I64 */ i64_func_f64,
+/* FPTOSINT_F64_I128 */ i64_i64_func_f64,
+/* FPTOSINT_F80_I32 */ unsupported,
+/* FPTOSINT_F80_I64 */ unsupported,
+/* FPTOSINT_F80_I128 */ unsupported,
+/* FPTOSINT_F128_I32 */ i32_func_i64_i64,
+/* FPTOSINT_F128_I64 */ i64_func_i64_i64,
+/* FPTOSINT_F128_I128 */ i64_i64_func_i64_i64,
+/* FPTOSINT_PPCF128_I32 */ unsupported,
+/* FPTOSINT_PPCF128_I64 */ unsupported,
+/* FPTOSINT_PPCF128_I128 */ unsupported,
+/* FPTOUINT_F32_I32 */ i32_func_f32,
+/* FPTOUINT_F32_I64 */ i64_func_f32,
+/* FPTOUINT_F32_I128 */ i64_i64_func_f32,
+/* FPTOUINT_F64_I32 */ i32_func_f64,
+/* FPTOUINT_F64_I64 */ i64_func_f64,
+/* FPTOUINT_F64_I128 */ i64_i64_func_f64,
+/* FPTOUINT_F80_I32 */ unsupported,
+/* FPTOUINT_F80_I64 */ unsupported,
+/* FPTOUINT_F80_I128 */ unsupported,
+/* FPTOUINT_F128_I32 */ i32_func_i64_i64,
+/* FPTOUINT_F128_I64 */ i64_func_i64_i64,
+/* FPTOUINT_F128_I128 */ i64_i64_func_i64_i64,
+/* FPTOUINT_PPCF128_I32 */ unsupported,
+/* FPTOUINT_PPCF128_I64 */ unsupported,
+/* FPTOUINT_PPCF128_I128 */ unsupported,
+/* SINTTOFP_I32_F32 */ f32_func_i32,
+/* SINTTOFP_I32_F64 */ f64_func_i32,
+/* SINTTOFP_I32_F80 */ unsupported,
+/* SINTTOFP_I32_F128 */ func_iPTR_i32,
+/* SINTTOFP_I32_PPCF128 */ unsupported,
+/* SINTTOFP_I64_F32 */ f32_func_i64,
+/* SINTTOFP_I64_F64 */ f64_func_i64,
+/* SINTTOFP_I64_F80 */ unsupported,
+/* SINTTOFP_I64_F128 */ func_iPTR_i64,
+/* SINTTOFP_I64_PPCF128 */ unsupported,
+/* SINTTOFP_I128_F32 */ f32_func_i64_i64,
+/* SINTTOFP_I128_F64 */ f64_func_i64_i64,
+/* SINTTOFP_I128_F80 */ unsupported,
+/* SINTTOFP_I128_F128 */ func_iPTR_i64_i64,
+/* SINTTOFP_I128_PPCF128 */ unsupported,
+/* UINTTOFP_I32_F32 */ f32_func_i32,
+/* UINTTOFP_I32_F64 */ f64_func_i64,
+/* UINTTOFP_I32_F80 */ unsupported,
+/* UINTTOFP_I32_F128 */ func_iPTR_i32,
+/* UINTTOFP_I32_PPCF128 */ unsupported,
+/* UINTTOFP_I64_F32 */ f32_func_i64,
+/* UINTTOFP_I64_F64 */ f64_func_i64,
+/* UINTTOFP_I64_F80 */ unsupported,
+/* UINTTOFP_I64_F128 */ func_iPTR_i64,
+/* UINTTOFP_I64_PPCF128 */ unsupported,
+/* UINTTOFP_I128_F32 */ f32_func_i64_i64,
+/* UINTTOFP_I128_F64 */ f64_func_i64_i64,
+/* UINTTOFP_I128_F80 */ unsupported,
+/* UINTTOFP_I128_F128 */ func_iPTR_i64_i64,
+/* UINTTOFP_I128_PPCF128 */ unsupported,
+
+// COMPARISON
+/* OEQ_F32 */ i32_func_f32_f32,
+/* OEQ_F64 */ i32_func_f64_f64,
+/* OEQ_F128 */ i32_func_i64_i64_i64_i64,
+/* OEQ_PPCF128 */ unsupported,
+/* UNE_F32 */ i32_func_f32_f32,
+/* UNE_F64 */ i32_func_f64_f64,
+/* UNE_F128 */ i32_func_i64_i64_i64_i64,
+/* UNE_PPCF128 */ unsupported,
+/* OGE_F32 */ i32_func_f32_f32,
+/* OGE_F64 */ i32_func_f64_f64,
+/* OGE_F128 */ i32_func_i64_i64_i64_i64,
+/* OGE_PPCF128 */ unsupported,
+/* OLT_F32 */ i32_func_f32_f32,
+/* OLT_F64 */ i32_func_f64_f64,
+/* OLT_F128 */ i32_func_i64_i64_i64_i64,
+/* OLT_PPCF128 */ unsupported,
+/* OLE_F32 */ i32_func_f32_f32,
+/* OLE_F64 */ i32_func_f64_f64,
+/* OLE_F128 */ i32_func_i64_i64_i64_i64,
+/* OLE_PPCF128 */ unsupported,
+/* OGT_F32 */ i32_func_f32_f32,
+/* OGT_F64 */ i32_func_f64_f64,
+/* OGT_F128 */ i32_func_i64_i64_i64_i64,
+/* OGT_PPCF128 */ unsupported,
+/* UO_F32 */ i32_func_f32_f32,
+/* UO_F64 */ i32_func_f64_f64,
+/* UO_F128 */ i32_func_i64_i64_i64_i64,
+/* UO_PPCF128 */ unsupported,
+/* O_F32 */ i32_func_f32_f32,
+/* O_F64 */ i32_func_f64_f64,
+/* O_F128 */ i32_func_i64_i64_i64_i64,
+/* O_PPCF128 */ unsupported,
+
+// MEMORY
+/* MEMCPY */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMSET */ iPTR_func_iPTR_i32_iPTR,
+/* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR,
+
+// ELEMENT-WISE ATOMIC MEMORY
+/* MEMCPY_ELEMENT_ATOMIC_1 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_2 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_4 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_8 */ iPTR_func_iPTR_iPTR_iPTR,
+/* MEMCPY_ELEMENT_ATOMIC_16 */ iPTR_func_iPTR_iPTR_iPTR,
+
+// EXCEPTION HANDLING
+/* UNWIND_RESUME */ unsupported,
+
+// Note: there's two sets of atomics libcalls; see
+// <http://llvm.org/docs/Atomics.html> for more info on the
+// difference between them.
+
+// Atomic '__sync_*' libcalls.
+/* SYNC_VAL_COMPARE_AND_SWAP_1 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_2 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_4 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_8 */ unsupported,
+/* SYNC_VAL_COMPARE_AND_SWAP_16 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_1 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_2 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_4 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_8 */ unsupported,
+/* SYNC_LOCK_TEST_AND_SET_16 */ unsupported,
+/* SYNC_FETCH_AND_ADD_1 */ unsupported,
+/* SYNC_FETCH_AND_ADD_2 */ unsupported,
+/* SYNC_FETCH_AND_ADD_4 */ unsupported,
+/* SYNC_FETCH_AND_ADD_8 */ unsupported,
+/* SYNC_FETCH_AND_ADD_16 */ unsupported,
+/* SYNC_FETCH_AND_SUB_1 */ unsupported,
+/* SYNC_FETCH_AND_SUB_2 */ unsupported,
+/* SYNC_FETCH_AND_SUB_4 */ unsupported,
+/* SYNC_FETCH_AND_SUB_8 */ unsupported,
+/* SYNC_FETCH_AND_SUB_16 */ unsupported,
+/* SYNC_FETCH_AND_AND_1 */ unsupported,
+/* SYNC_FETCH_AND_AND_2 */ unsupported,
+/* SYNC_FETCH_AND_AND_4 */ unsupported,
+/* SYNC_FETCH_AND_AND_8 */ unsupported,
+/* SYNC_FETCH_AND_AND_16 */ unsupported,
+/* SYNC_FETCH_AND_OR_1 */ unsupported,
+/* SYNC_FETCH_AND_OR_2 */ unsupported,
+/* SYNC_FETCH_AND_OR_4 */ unsupported,
+/* SYNC_FETCH_AND_OR_8 */ unsupported,
+/* SYNC_FETCH_AND_OR_16 */ unsupported,
+/* SYNC_FETCH_AND_XOR_1 */ unsupported,
+/* SYNC_FETCH_AND_XOR_2 */ unsupported,
+/* SYNC_FETCH_AND_XOR_4 */ unsupported,
+/* SYNC_FETCH_AND_XOR_8 */ unsupported,
+/* SYNC_FETCH_AND_XOR_16 */ unsupported,
+/* SYNC_FETCH_AND_NAND_1 */ unsupported,
+/* SYNC_FETCH_AND_NAND_2 */ unsupported,
+/* SYNC_FETCH_AND_NAND_4 */ unsupported,
+/* SYNC_FETCH_AND_NAND_8 */ unsupported,
+/* SYNC_FETCH_AND_NAND_16 */ unsupported,
+/* SYNC_FETCH_AND_MAX_1 */ unsupported,
+/* SYNC_FETCH_AND_MAX_2 */ unsupported,
+/* SYNC_FETCH_AND_MAX_4 */ unsupported,
+/* SYNC_FETCH_AND_MAX_8 */ unsupported,
+/* SYNC_FETCH_AND_MAX_16 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_1 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_2 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_4 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_8 */ unsupported,
+/* SYNC_FETCH_AND_UMAX_16 */ unsupported,
+/* SYNC_FETCH_AND_MIN_1 */ unsupported,
+/* SYNC_FETCH_AND_MIN_2 */ unsupported,
+/* SYNC_FETCH_AND_MIN_4 */ unsupported,
+/* SYNC_FETCH_AND_MIN_8 */ unsupported,
+/* SYNC_FETCH_AND_MIN_16 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_1 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_2 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_4 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_8 */ unsupported,
+/* SYNC_FETCH_AND_UMIN_16 */ unsupported,
+
+// Atomic '__atomic_*' libcalls.
+/* ATOMIC_LOAD */ unsupported,
+/* ATOMIC_LOAD_1 */ unsupported,
+/* ATOMIC_LOAD_2 */ unsupported,
+/* ATOMIC_LOAD_4 */ unsupported,
+/* ATOMIC_LOAD_8 */ unsupported,
+/* ATOMIC_LOAD_16 */ unsupported,
+
+/* ATOMIC_STORE */ unsupported,
+/* ATOMIC_STORE_1 */ unsupported,
+/* ATOMIC_STORE_2 */ unsupported,
+/* ATOMIC_STORE_4 */ unsupported,
+/* ATOMIC_STORE_8 */ unsupported,
+/* ATOMIC_STORE_16 */ unsupported,
+
+/* ATOMIC_EXCHANGE */ unsupported,
+/* ATOMIC_EXCHANGE_1 */ unsupported,
+/* ATOMIC_EXCHANGE_2 */ unsupported,
+/* ATOMIC_EXCHANGE_4 */ unsupported,
+/* ATOMIC_EXCHANGE_8 */ unsupported,
+/* ATOMIC_EXCHANGE_16 */ unsupported,
+
+/* ATOMIC_COMPARE_EXCHANGE */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_1 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_2 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_4 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_8 */ unsupported,
+/* ATOMIC_COMPARE_EXCHANGE_16 */ unsupported,
+
+/* ATOMIC_FETCH_ADD_1 */ unsupported,
+/* ATOMIC_FETCH_ADD_2 */ unsupported,
+/* ATOMIC_FETCH_ADD_4 */ unsupported,
+/* ATOMIC_FETCH_ADD_8 */ unsupported,
+/* ATOMIC_FETCH_ADD_16 */ unsupported,
+
+/* ATOMIC_FETCH_SUB_1 */ unsupported,
+/* ATOMIC_FETCH_SUB_2 */ unsupported,
+/* ATOMIC_FETCH_SUB_4 */ unsupported,
+/* ATOMIC_FETCH_SUB_8 */ unsupported,
+/* ATOMIC_FETCH_SUB_16 */ unsupported,
+
+/* ATOMIC_FETCH_AND_1 */ unsupported,
+/* ATOMIC_FETCH_AND_2 */ unsupported,
+/* ATOMIC_FETCH_AND_4 */ unsupported,
+/* ATOMIC_FETCH_AND_8 */ unsupported,
+/* ATOMIC_FETCH_AND_16 */ unsupported,
+
+/* ATOMIC_FETCH_OR_1 */ unsupported,
+/* ATOMIC_FETCH_OR_2 */ unsupported,
+/* ATOMIC_FETCH_OR_4 */ unsupported,
+/* ATOMIC_FETCH_OR_8 */ unsupported,
+/* ATOMIC_FETCH_OR_16 */ unsupported,
+
+/* ATOMIC_FETCH_XOR_1 */ unsupported,
+/* ATOMIC_FETCH_XOR_2 */ unsupported,
+/* ATOMIC_FETCH_XOR_4 */ unsupported,
+/* ATOMIC_FETCH_XOR_8 */ unsupported,
+/* ATOMIC_FETCH_XOR_16 */ unsupported,
+
+/* ATOMIC_FETCH_NAND_1 */ unsupported,
+/* ATOMIC_FETCH_NAND_2 */ unsupported,
+/* ATOMIC_FETCH_NAND_4 */ unsupported,
+/* ATOMIC_FETCH_NAND_8 */ unsupported,
+/* ATOMIC_FETCH_NAND_16 */ unsupported,
+
+// Stack Protector Fail.
+/* STACKPROTECTOR_CHECK_FAIL */ func,
+
+// Deoptimization.
+/* DEOPTIMIZE */ unsupported,
+
+};
+
+static const char *
+RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = {
+/* SHL_I16 */ "__ashlhi3",
+/* SHL_I32 */ "__ashlsi3",
+/* SHL_I64 */ "__ashldi3",
+/* SHL_I128 */ "__ashlti3",
+/* SRL_I16 */ "__lshrhi3",
+/* SRL_I32 */ "__lshrsi3",
+/* SRL_I64 */ "__lshrdi3",
+/* SRL_I128 */ "__lshrti3",
+/* SRA_I16 */ "__ashrhi3",
+/* SRA_I32 */ "__ashrsi3",
+/* SRA_I64 */ "__ashrdi3",
+/* SRA_I128 */ "__ashrti3",
+/* MUL_I8 */ "__mulqi3",
+/* MUL_I16 */ "__mulhi3",
+/* MUL_I32 */ "__mulsi3",
+/* MUL_I64 */ "__muldi3",
+/* MUL_I128 */ "__multi3",
+/* MULO_I32 */ "__mulosi4",
+/* MULO_I64 */ "__mulodi4",
+/* MULO_I128 */ "__muloti4",
+/* SDIV_I8 */ "__divqi3",
+/* SDIV_I16 */ "__divhi3",
+/* SDIV_I32 */ "__divsi3",
+/* SDIV_I64 */ "__divdi3",
+/* SDIV_I128 */ "__divti3",
+/* UDIV_I8 */ "__udivqi3",
+/* UDIV_I16 */ "__udivhi3",
+/* UDIV_I32 */ "__udivsi3",
+/* UDIV_I64 */ "__udivdi3",
+/* UDIV_I128 */ "__udivti3",
+/* SREM_I8 */ "__modqi3",
+/* SREM_I16 */ "__modhi3",
+/* SREM_I32 */ "__modsi3",
+/* SREM_I64 */ "__moddi3",
+/* SREM_I128 */ "__modti3",
+/* UREM_I8 */ "__umodqi3",
+/* UREM_I16 */ "__umodhi3",
+/* UREM_I32 */ "__umodsi3",
+/* UREM_I64 */ "__umoddi3",
+/* UREM_I128 */ "__umodti3",
+/* SDIVREM_I8 */ nullptr,
+/* SDIVREM_I16 */ nullptr,
+/* SDIVREM_I32 */ nullptr,
+/* SDIVREM_I64 */ nullptr,
+/* SDIVREM_I128 */ nullptr,
+/* UDIVREM_I8 */ nullptr,
+/* UDIVREM_I16 */ nullptr,
+/* UDIVREM_I32 */ nullptr,
+/* UDIVREM_I64 */ nullptr,
+/* UDIVREM_I128 */ nullptr,
+/* NEG_I32 */ "__negsi2",
+/* NEG_I64 */ "__negdi2",
+/* ADD_F32 */ "__addsf3",
+/* ADD_F64 */ "__adddf3",
+/* ADD_F80 */ nullptr,
+/* ADD_F128 */ "__addtf3",
+/* ADD_PPCF128 */ nullptr,
+/* SUB_F32 */ "__subsf3",
+/* SUB_F64 */ "__subdf3",
+/* SUB_F80 */ nullptr,
+/* SUB_F128 */ "__subtf3",
+/* SUB_PPCF128 */ nullptr,
+/* MUL_F32 */ "__mulsf3",
+/* MUL_F64 */ "__muldf3",
+/* MUL_F80 */ nullptr,
+/* MUL_F128 */ "__multf3",
+/* MUL_PPCF128 */ nullptr,
+/* DIV_F32 */ "__divsf3",
+/* DIV_F64 */ "__divdf3",
+/* DIV_F80 */ nullptr,
+/* DIV_F128 */ "__divtf3",
+/* DIV_PPCF128 */ nullptr,
+/* REM_F32 */ "fmodf",
+/* REM_F64 */ "fmod",
+/* REM_F80 */ nullptr,
+/* REM_F128 */ "fmodl",
+/* REM_PPCF128 */ nullptr,
+/* FMA_F32 */ "fmaf",
+/* FMA_F64 */ "fma",
+/* FMA_F80 */ nullptr,
+/* FMA_F128 */ "fmal",
+/* FMA_PPCF128 */ nullptr,
+/* POWI_F32 */ "__powisf2",
+/* POWI_F64 */ "__powidf2",
+/* POWI_F80 */ nullptr,
+/* POWI_F128 */ "__powitf2",
+/* POWI_PPCF128 */ nullptr,
+/* SQRT_F32 */ "sqrtf",
+/* SQRT_F64 */ "sqrt",
+/* SQRT_F80 */ nullptr,
+/* SQRT_F128 */ "sqrtl",
+/* SQRT_PPCF128 */ nullptr,
+/* LOG_F32 */ "logf",
+/* LOG_F64 */ "log",
+/* LOG_F80 */ nullptr,
+/* LOG_F128 */ "logl",
+/* LOG_PPCF128 */ nullptr,
+/* LOG2_F32 */ "log2f",
+/* LOG2_F64 */ "log2",
+/* LOG2_F80 */ nullptr,
+/* LOG2_F128 */ "log2l",
+/* LOG2_PPCF128 */ nullptr,
+/* LOG10_F32 */ "log10f",
+/* LOG10_F64 */ "log10",
+/* LOG10_F80 */ nullptr,
+/* LOG10_F128 */ "log10l",
+/* LOG10_PPCF128 */ nullptr,
+/* EXP_F32 */ "expf",
+/* EXP_F64 */ "exp",
+/* EXP_F80 */ nullptr,
+/* EXP_F128 */ "expl",
+/* EXP_PPCF128 */ nullptr,
+/* EXP2_F32 */ "exp2f",
+/* EXP2_F64 */ "exp2",
+/* EXP2_F80 */ nullptr,
+/* EXP2_F128 */ "exp2l",
+/* EXP2_PPCF128 */ nullptr,
+/* SIN_F32 */ "sinf",
+/* SIN_F64 */ "sin",
+/* SIN_F80 */ nullptr,
+/* SIN_F128 */ "sinl",
+/* SIN_PPCF128 */ nullptr,
+/* COS_F32 */ "cosf",
+/* COS_F64 */ "cos",
+/* COS_F80 */ nullptr,
+/* COS_F128 */ "cosl",
+/* COS_PPCF128 */ nullptr,
+/* SINCOS_F32 */ "sincosf",
+/* SINCOS_F64 */ "sincos",
+/* SINCOS_F80 */ nullptr,
+/* SINCOS_F128 */ "sincosl",
+/* SINCOS_PPCF128 */ nullptr,
+/* POW_F32 */ "powf",
+/* POW_F64 */ "pow",
+/* POW_F80 */ nullptr,
+/* POW_F128 */ "powl",
+/* POW_PPCF128 */ nullptr,
+/* CEIL_F32 */ "ceilf",
+/* CEIL_F64 */ "ceil",
+/* CEIL_F80 */ nullptr,
+/* CEIL_F128 */ "ceill",
+/* CEIL_PPCF128 */ nullptr,
+/* TRUNC_F32 */ "truncf",
+/* TRUNC_F64 */ "trunc",
+/* TRUNC_F80 */ nullptr,
+/* TRUNC_F128 */ "truncl",
+/* TRUNC_PPCF128 */ nullptr,
+/* RINT_F32 */ "rintf",
+/* RINT_F64 */ "rint",
+/* RINT_F80 */ nullptr,
+/* RINT_F128 */ "rintl",
+/* RINT_PPCF128 */ nullptr,
+/* NEARBYINT_F32 */ "nearbyintf",
+/* NEARBYINT_F64 */ "nearbyint",
+/* NEARBYINT_F80 */ nullptr,
+/* NEARBYINT_F128 */ "nearbyintl",
+/* NEARBYINT_PPCF128 */ nullptr,
+/* ROUND_F32 */ "roundf",
+/* ROUND_F64 */ "round",
+/* ROUND_F80 */ nullptr,
+/* ROUND_F128 */ "roundl",
+/* ROUND_PPCF128 */ nullptr,
+/* FLOOR_F32 */ "floorf",
+/* FLOOR_F64 */ "floor",
+/* FLOOR_F80 */ nullptr,
+/* FLOOR_F128 */ "floorl",
+/* FLOOR_PPCF128 */ nullptr,
+/* COPYSIGN_F32 */ "copysignf",
+/* COPYSIGN_F64 */ "copysign",
+/* COPYSIGN_F80 */ nullptr,
+/* COPYSIGN_F128 */ "copysignl",
+/* COPYSIGN_PPCF128 */ nullptr,
+/* FMIN_F32 */ "fminf",
+/* FMIN_F64 */ "fmin",
+/* FMIN_F80 */ nullptr,
+/* FMIN_F128 */ "fminl",
+/* FMIN_PPCF128 */ nullptr,
+/* FMAX_F32 */ "fmaxf",
+/* FMAX_F64 */ "fmax",
+/* FMAX_F80 */ nullptr,
+/* FMAX_F128 */ "fmaxl",
+/* FMAX_PPCF128 */ nullptr,
+/* FPEXT_F32_PPCF128 */ nullptr,
+/* FPEXT_F64_PPCF128 */ nullptr,
+/* FPEXT_F64_F128 */ "__extenddftf2",
+/* FPEXT_F32_F128 */ "__extendsftf2",
+/* FPEXT_F32_F64 */ "__extendsfdf2",
+/* FPEXT_F16_F32 */ "__gnu_h2f_ieee",
+/* FPROUND_F32_F16 */ "__gnu_f2h_ieee",
+/* FPROUND_F64_F16 */ nullptr,
+/* FPROUND_F80_F16 */ nullptr,
+/* FPROUND_F128_F16 */ nullptr,
+/* FPROUND_PPCF128_F16 */ nullptr,
+/* FPROUND_F64_F32 */ "__truncdfsf2",
+/* FPROUND_F80_F32 */ "__truncxfsf2",
+/* FPROUND_F128_F32 */ "__trunctfsf2",
+/* FPROUND_PPCF128_F32 */ nullptr,
+/* FPROUND_F80_F64 */ "__truncxfdf2",
+/* FPROUND_F128_F64 */ "__trunctfdf2",
+/* FPROUND_PPCF128_F64 */ nullptr,
+/* FPTOSINT_F32_I32 */ "__fixsfsi",
+/* FPTOSINT_F32_I64 */ "__fixsfdi",
+/* FPTOSINT_F32_I128 */ "__fixsfti",
+/* FPTOSINT_F64_I32 */ "__fixdfsi",
+/* FPTOSINT_F64_I64 */ "__fixdfdi",
+/* FPTOSINT_F64_I128 */ "__fixdfti",
+/* FPTOSINT_F80_I32 */ "__fixxfsi",
+/* FPTOSINT_F80_I64 */ "__fixxfdi",
+/* FPTOSINT_F80_I128 */ "__fixxfti",
+/* FPTOSINT_F128_I32 */ "__fixtfsi",
+/* FPTOSINT_F128_I64 */ "__fixtfdi",
+/* FPTOSINT_F128_I128 */ "__fixtfti",
+/* FPTOSINT_PPCF128_I32 */ nullptr,
+/* FPTOSINT_PPCF128_I64 */ nullptr,
+/* FPTOSINT_PPCF128_I128 */ nullptr,
+/* FPTOUINT_F32_I32 */ "__fixunssfsi",
+/* FPTOUINT_F32_I64 */ "__fixunssfdi",
+/* FPTOUINT_F32_I128 */ "__fixunssfti",
+/* FPTOUINT_F64_I32 */ "__fixunsdfsi",
+/* FPTOUINT_F64_I64 */ "__fixunsdfdi",
+/* FPTOUINT_F64_I128 */ "__fixunsdfti",
+/* FPTOUINT_F80_I32 */ "__fixunsxfsi",
+/* FPTOUINT_F80_I64 */ "__fixunsxfdi",
+/* FPTOUINT_F80_I128 */ "__fixunsxfti",
+/* FPTOUINT_F128_I32 */ "__fixunstfsi",
+/* FPTOUINT_F128_I64 */ "__fixunstfdi",
+/* FPTOUINT_F128_I128 */ "__fixunstfti",
+/* FPTOUINT_PPCF128_I32 */ nullptr,
+/* FPTOUINT_PPCF128_I64 */ nullptr,
+/* FPTOUINT_PPCF128_I128 */ nullptr,
+/* SINTTOFP_I32_F32 */ "__floatsisf",
+/* SINTTOFP_I32_F64 */ "__floatsidf",
+/* SINTTOFP_I32_F80 */ nullptr,
+/* SINTTOFP_I32_F128 */ "__floatsitf",
+/* SINTTOFP_I32_PPCF128 */ nullptr,
+/* SINTTOFP_I64_F32 */ "__floatdisf",
+/* SINTTOFP_I64_F64 */ "__floatdidf",
+/* SINTTOFP_I64_F80 */ nullptr,
+/* SINTTOFP_I64_F128 */ "__floatditf",
+/* SINTTOFP_I64_PPCF128 */ nullptr,
+/* SINTTOFP_I128_F32 */ "__floattisf",
+/* SINTTOFP_I128_F64 */ "__floattidf",
+/* SINTTOFP_I128_F80 */ nullptr,
+/* SINTTOFP_I128_F128 */ "__floattitf",
+/* SINTTOFP_I128_PPCF128 */ nullptr,
+/* UINTTOFP_I32_F32 */ "__floatunsisf",
+/* UINTTOFP_I32_F64 */ "__floatunsidf",
+/* UINTTOFP_I32_F80 */ nullptr,
+/* UINTTOFP_I32_F128 */ "__floatunsitf",
+/* UINTTOFP_I32_PPCF128 */ nullptr,
+/* UINTTOFP_I64_F32 */ "__floatundisf",
+/* UINTTOFP_I64_F64 */ "__floatundidf",
+/* UINTTOFP_I64_F80 */ nullptr,
+/* UINTTOFP_I64_F128 */ "__floatunditf",
+/* UINTTOFP_I64_PPCF128 */ nullptr,
+/* UINTTOFP_I128_F32 */ "__floatuntisf",
+/* UINTTOFP_I128_F64 */ "__floatuntidf",
+/* UINTTOFP_I128_F80 */ nullptr,
+/* UINTTOFP_I128_F128 */ "__floatuntitf",
+/* UINTTOFP_I128_PPCF128 */ nullptr,
+/* OEQ_F32 */ "__eqsf2",
+/* OEQ_F64 */ "__eqdf2",
+/* OEQ_F128 */ "__eqtf2",
+/* OEQ_PPCF128 */ nullptr,
+/* UNE_F32 */ "__nesf2",
+/* UNE_F64 */ "__nedf2",
+/* UNE_F128 */ "__netf2",
+/* UNE_PPCF128 */ nullptr,
+/* OGE_F32 */ "__gesf2",
+/* OGE_F64 */ "__gedf2",
+/* OGE_F128 */ "__getf2",
+/* OGE_PPCF128 */ nullptr,
+/* OLT_F32 */ "__ltsf2",
+/* OLT_F64 */ "__ltdf2",
+/* OLT_F128 */ "__lttf2",
+/* OLT_PPCF128 */ nullptr,
+/* OLE_F32 */ "__lesf2",
+/* OLE_F64 */ "__ledf2",
+/* OLE_F128 */ "__letf2",
+/* OLE_PPCF128 */ nullptr,
+/* OGT_F32 */ "__gtsf2",
+/* OGT_F64 */ "__gtdf2",
+/* OGT_F128 */ "__gttf2",
+/* OGT_PPCF128 */ nullptr,
+/* UO_F32 */ "__unordsf2",
+/* UO_F64 */ "__unorddf2",
+/* UO_F128 */ "__unordtf2",
+/* UO_PPCF128 */ nullptr,
+/* O_F32 */ "__unordsf2",
+/* O_F64 */ "__unorddf2",
+/* O_F128 */ "__unordtf2",
+/* O_PPCF128 */ nullptr,
+/* MEMCPY */ "memcpy",
+/* MEMMOVE */ "memset",
+/* MEMSET */ "memmove",
+/* MEMCPY_ELEMENT_ATOMIC_1 */ "MEMCPY_ELEMENT_ATOMIC_1",
+/* MEMCPY_ELEMENT_ATOMIC_2 */ "MEMCPY_ELEMENT_ATOMIC_2",
+/* MEMCPY_ELEMENT_ATOMIC_4 */ "MEMCPY_ELEMENT_ATOMIC_4",
+/* MEMCPY_ELEMENT_ATOMIC_8 */ "MEMCPY_ELEMENT_ATOMIC_8",
+/* MEMCPY_ELEMENT_ATOMIC_16 */ "MEMCPY_ELEMENT_ATOMIC_16",
+/* UNWIND_RESUME */ "_Unwind_Resume",
+/* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1",
+/* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2",
+/* SYNC_VAL_COMPARE_AND_SWAP_4 */ "__sync_val_compare_and_swap_4",
+/* SYNC_VAL_COMPARE_AND_SWAP_8 */ "__sync_val_compare_and_swap_8",
+/* SYNC_VAL_COMPARE_AND_SWAP_16 */ "__sync_val_compare_and_swap_16",
+/* SYNC_LOCK_TEST_AND_SET_1 */ "__sync_lock_test_and_set_1",
+/* SYNC_LOCK_TEST_AND_SET_2 */ "__sync_lock_test_and_set_2",
+/* SYNC_LOCK_TEST_AND_SET_4 */ "__sync_lock_test_and_set_4",
+/* SYNC_LOCK_TEST_AND_SET_8 */ "__sync_lock_test_and_set_8",
+/* SYNC_LOCK_TEST_AND_SET_16 */ "__sync_lock_test_and_set_16",
+/* SYNC_FETCH_AND_ADD_1 */ "__sync_fetch_and_add_1",
+/* SYNC_FETCH_AND_ADD_2 */ "__sync_fetch_and_add_2",
+/* SYNC_FETCH_AND_ADD_4 */ "__sync_fetch_and_add_4",
+/* SYNC_FETCH_AND_ADD_8 */ "__sync_fetch_and_add_8",
+/* SYNC_FETCH_AND_ADD_16 */ "__sync_fetch_and_add_16",
+/* SYNC_FETCH_AND_SUB_1 */ "__sync_fetch_and_sub_1",
+/* SYNC_FETCH_AND_SUB_2 */ "__sync_fetch_and_sub_2",
+/* SYNC_FETCH_AND_SUB_4 */ "__sync_fetch_and_sub_4",
+/* SYNC_FETCH_AND_SUB_8 */ "__sync_fetch_and_sub_8",
+/* SYNC_FETCH_AND_SUB_16 */ "__sync_fetch_and_sub_16",
+/* SYNC_FETCH_AND_AND_1 */ "__sync_fetch_and_and_1",
+/* SYNC_FETCH_AND_AND_2 */ "__sync_fetch_and_and_2",
+/* SYNC_FETCH_AND_AND_4 */ "__sync_fetch_and_and_4",
+/* SYNC_FETCH_AND_AND_8 */ "__sync_fetch_and_and_8",
+/* SYNC_FETCH_AND_AND_16 */ "__sync_fetch_and_and_16",
+/* SYNC_FETCH_AND_OR_1 */ "__sync_fetch_and_or_1",
+/* SYNC_FETCH_AND_OR_2 */ "__sync_fetch_and_or_2",
+/* SYNC_FETCH_AND_OR_4 */ "__sync_fetch_and_or_4",
+/* SYNC_FETCH_AND_OR_8 */ "__sync_fetch_and_or_8",
+/* SYNC_FETCH_AND_OR_16 */ "__sync_fetch_and_or_16",
+/* SYNC_FETCH_AND_XOR_1 */ "__sync_fetch_and_xor_1",
+/* SYNC_FETCH_AND_XOR_2 */ "__sync_fetch_and_xor_2",
+/* SYNC_FETCH_AND_XOR_4 */ "__sync_fetch_and_xor_4",
+/* SYNC_FETCH_AND_XOR_8 */ "__sync_fetch_and_xor_8",
+/* SYNC_FETCH_AND_XOR_16 */ "__sync_fetch_and_xor_16",
+/* SYNC_FETCH_AND_NAND_1 */ "__sync_fetch_and_nand_1",
+/* SYNC_FETCH_AND_NAND_2 */ "__sync_fetch_and_nand_2",
+/* SYNC_FETCH_AND_NAND_4 */ "__sync_fetch_and_nand_4",
+/* SYNC_FETCH_AND_NAND_8 */ "__sync_fetch_and_nand_8",
+/* SYNC_FETCH_AND_NAND_16 */ "__sync_fetch_and_nand_16",
+/* SYNC_FETCH_AND_MAX_1 */ "__sync_fetch_and_max_1",
+/* SYNC_FETCH_AND_MAX_2 */ "__sync_fetch_and_max_2",
+/* SYNC_FETCH_AND_MAX_4 */ "__sync_fetch_and_max_4",
+/* SYNC_FETCH_AND_MAX_8 */ "__sync_fetch_and_max_8",
+/* SYNC_FETCH_AND_MAX_16 */ "__sync_fetch_and_max_16",
+/* SYNC_FETCH_AND_UMAX_1 */ "__sync_fetch_and_umax_1",
+/* SYNC_FETCH_AND_UMAX_2 */ "__sync_fetch_and_umax_2",
+/* SYNC_FETCH_AND_UMAX_4 */ "__sync_fetch_and_umax_4",
+/* SYNC_FETCH_AND_UMAX_8 */ "__sync_fetch_and_umax_8",
+/* SYNC_FETCH_AND_UMAX_16 */ "__sync_fetch_and_umax_16",
+/* SYNC_FETCH_AND_MIN_1 */ "__sync_fetch_and_min_1",
+/* SYNC_FETCH_AND_MIN_2 */ "__sync_fetch_and_min_2",
+/* SYNC_FETCH_AND_MIN_4 */ "__sync_fetch_and_min_4",
+/* SYNC_FETCH_AND_MIN_8 */ "__sync_fetch_and_min_8",
+/* SYNC_FETCH_AND_MIN_16 */ "__sync_fetch_and_min_16",
+/* SYNC_FETCH_AND_UMIN_1 */ "__sync_fetch_and_umin_1",
+/* SYNC_FETCH_AND_UMIN_2 */ "__sync_fetch_and_umin_2",
+/* SYNC_FETCH_AND_UMIN_4 */ "__sync_fetch_and_umin_4",
+/* SYNC_FETCH_AND_UMIN_8 */ "__sync_fetch_and_umin_8",
+/* SYNC_FETCH_AND_UMIN_16 */ "__sync_fetch_and_umin_16",
+
+/* ATOMIC_LOAD */ "__atomic_load",
+/* ATOMIC_LOAD_1 */ "__atomic_load_1",
+/* ATOMIC_LOAD_2 */ "__atomic_load_2",
+/* ATOMIC_LOAD_4 */ "__atomic_load_4",
+/* ATOMIC_LOAD_8 */ "__atomic_load_8",
+/* ATOMIC_LOAD_16 */ "__atomic_load_16",
+
+/* ATOMIC_STORE */ "__atomic_store",
+/* ATOMIC_STORE_1 */ "__atomic_store_1",
+/* ATOMIC_STORE_2 */ "__atomic_store_2",
+/* ATOMIC_STORE_4 */ "__atomic_store_4",
+/* ATOMIC_STORE_8 */ "__atomic_store_8",
+/* ATOMIC_STORE_16 */ "__atomic_store_16",
+
+/* ATOMIC_EXCHANGE */ "__atomic_exchange",
+/* ATOMIC_EXCHANGE_1 */ "__atomic_exchange_1",
+/* ATOMIC_EXCHANGE_2 */ "__atomic_exchange_2",
+/* ATOMIC_EXCHANGE_4 */ "__atomic_exchange_4",
+/* ATOMIC_EXCHANGE_8 */ "__atomic_exchange_8",
+/* ATOMIC_EXCHANGE_16 */ "__atomic_exchange_16",
+
+/* ATOMIC_COMPARE_EXCHANGE */ "__atomic_compare_exchange",
+/* ATOMIC_COMPARE_EXCHANGE_1 */ "__atomic_compare_exchange_1",
+/* ATOMIC_COMPARE_EXCHANGE_2 */ "__atomic_compare_exchange_2",
+/* ATOMIC_COMPARE_EXCHANGE_4 */ "__atomic_compare_exchange_4",
+/* ATOMIC_COMPARE_EXCHANGE_8 */ "__atomic_compare_exchange_8",
+/* ATOMIC_COMPARE_EXCHANGE_16 */ "__atomic_compare_exchange_16",
+
+/* ATOMIC_FETCH_ADD_1 */ "__atomic_fetch_add_1",
+/* ATOMIC_FETCH_ADD_2 */ "__atomic_fetch_add_2",
+/* ATOMIC_FETCH_ADD_4 */ "__atomic_fetch_add_4",
+/* ATOMIC_FETCH_ADD_8 */ "__atomic_fetch_add_8",
+/* ATOMIC_FETCH_ADD_16 */ "__atomic_fetch_add_16",
+/* ATOMIC_FETCH_SUB_1 */ "__atomic_fetch_sub_1",
+/* ATOMIC_FETCH_SUB_2 */ "__atomic_fetch_sub_2",
+/* ATOMIC_FETCH_SUB_4 */ "__atomic_fetch_sub_4",
+/* ATOMIC_FETCH_SUB_8 */ "__atomic_fetch_sub_8",
+/* ATOMIC_FETCH_SUB_16 */ "__atomic_fetch_sub_16",
+/* ATOMIC_FETCH_AND_1 */ "__atomic_fetch_and_1",
+/* ATOMIC_FETCH_AND_2 */ "__atomic_fetch_and_2",
+/* ATOMIC_FETCH_AND_4 */ "__atomic_fetch_and_4",
+/* ATOMIC_FETCH_AND_8 */ "__atomic_fetch_and_8",
+/* ATOMIC_FETCH_AND_16 */ "__atomic_fetch_and_16",
+/* ATOMIC_FETCH_OR_1 */ "__atomic_fetch_or_1",
+/* ATOMIC_FETCH_OR_2 */ "__atomic_fetch_or_2",
+/* ATOMIC_FETCH_OR_4 */ "__atomic_fetch_or_4",
+/* ATOMIC_FETCH_OR_8 */ "__atomic_fetch_or_8",
+/* ATOMIC_FETCH_OR_16 */ "__atomic_fetch_or_16",
+/* ATOMIC_FETCH_XOR_1 */ "__atomic_fetch_xor_1",
+/* ATOMIC_FETCH_XOR_2 */ "__atomic_fetch_xor_2",
+/* ATOMIC_FETCH_XOR_4 */ "__atomic_fetch_xor_4",
+/* ATOMIC_FETCH_XOR_8 */ "__atomic_fetch_xor_8",
+/* ATOMIC_FETCH_XOR_16 */ "__atomic_fetch_xor_16",
+/* ATOMIC_FETCH_NAND_1 */ "__atomic_fetch_nand_1",
+/* ATOMIC_FETCH_NAND_2 */ "__atomic_fetch_nand_2",
+/* ATOMIC_FETCH_NAND_4 */ "__atomic_fetch_nand_4",
+/* ATOMIC_FETCH_NAND_8 */ "__atomic_fetch_nand_8",
+/* ATOMIC_FETCH_NAND_16 */ "__atomic_fetch_nand_16",
+
+/* STACKPROTECTOR_CHECK_FAIL */ "__stack_chk_fail",
+
+/* DEOPTIMIZE */ "__llvm_deoptimize",
+};
+
+void llvm::GetSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC, SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
+ assert(Rets.empty());
+ assert(Params.empty());
+
+ WebAssembly::ExprType iPTR = Subtarget.hasAddr64() ?
+ WebAssembly::ExprType::I64 :
+ WebAssembly::ExprType::I32;
+
+ switch (RuntimeLibcallSignatures[LC]) {
+ case func:
+ break;
+ case f32_func_f32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f32_func_f64:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case f32_func_i32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f32_func_i64:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case f32_func_i16:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f64_func_f32:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f64_func_f64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case f64_func_i32:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f64_func_i64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i32_func_f32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i32_func_f64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i32_func_i32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i64_func_f32:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i64_func_f64:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i64_func_i64:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case f32_func_f32_f32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f32_func_f32_i32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f32_func_i64_i64:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case f64_func_f64_f64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case f64_func_f64_i32:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case f64_func_i64_i64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i16_func_f32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i8_func_i8_i8:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case func_f32_iPTR_iPTR:
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case func_f64_iPTR_iPTR:
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case i16_func_i16_i16:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i32_func_f32_f32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i32_func_f64_f64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i32_func_i32_i32:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i64_func_i64_i64:
+ Rets.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_func_f32:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case i64_i64_func_f64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case i16_i16_func_i16_i16:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I32);
+ Rets.push_back(wasm::ValType::I32);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i32_i32_func_i32_i32:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I32);
+ Rets.push_back(wasm::ValType::I32);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i64_i64_func_i64_i64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_func_i64_i64_i64_i64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_i64_i64_func_i64_i64_i64_i64:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_func_i64_i64_i32:
+#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+#else
+ Params.push_back(wasm::ValType(iPTR));
+#endif
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case iPTR_func_iPTR_i32_iPTR:
+ Rets.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case iPTR_func_iPTR_iPTR_iPTR:
+ Rets.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case f32_func_f32_f32_f32:
+ Rets.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case f64_func_f64_f64_f64:
+ Rets.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case func_i64_i64_iPTR_iPTR:
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType(iPTR));
+ break;
+ case func_iPTR_f32:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::F32);
+ break;
+ case func_iPTR_f64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::F64);
+ break;
+ case func_iPTR_i32:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case func_iPTR_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case func_iPTR_i64_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case func_iPTR_i64_i64_i64_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case func_iPTR_i64_i64_i64_i64_i64_i64:
+ Params.push_back(wasm::ValType(iPTR));
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i32_func_i64_i64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i32_func_i64_i64_i64_i64:
+ Rets.push_back(wasm::ValType::I32);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case unsupported:
+ llvm_unreachable("unsupported runtime library signature");
+ }
+}
+
+void llvm::GetSignature(const WebAssemblySubtarget &Subtarget, const char *Name,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params) {
+ assert(strcmp(RuntimeLibcallNames[RTLIB::DEOPTIMIZE], "__llvm_deoptimize") ==
+ 0);
+
+ for (size_t i = 0, e = RTLIB::UNKNOWN_LIBCALL; i < e; ++i)
+ if (RuntimeLibcallNames[i] && strcmp(RuntimeLibcallNames[i], Name) == 0)
+ return GetSignature(Subtarget, RTLIB::Libcall(i), Rets, Params);
+
+ llvm_unreachable("unexpected runtime library name");
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
new file mode 100644
index 000000000000..129067604784
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h
@@ -0,0 +1,37 @@
+// CodeGen/RuntimeLibcallSignatures.h - R.T. Lib. Call Signatures -*- C++ -*--//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file provides signature information for runtime libcalls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_RUNTIME_LIBCALL_SIGNATURES_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_RUNTIME_LIBCALL_SIGNATURES_H
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+
+namespace llvm {
+
+class WebAssemblySubtarget;
+
+extern void GetSignature(const WebAssemblySubtarget &Subtarget,
+ RTLIB::Libcall LC,
+ SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
+
+extern void GetSignature(const WebAssemblySubtarget &Subtarget,
+ const char *Name, SmallVectorImpl<wasm::ValType> &Rets,
+ SmallVectorImpl<wasm::ValType> &Params);
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
index 34ec6f2d34a7..a9aa781610ce 100644
--- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -154,7 +154,7 @@ static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
if (!callReturnsInput)
return false;
- LibFunc::Func Func;
+ LibFunc Func;
if (!LibInfo.getLibFunc(Name, Func))
return false;
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index f5ef35a2ad40..44c794ef5da1 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -74,13 +74,25 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
: "e-m:e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
CM, OL),
- TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
+ TLOF(TT.isOSBinFormatELF() ?
+ static_cast<TargetLoweringObjectFile*>(
+ new WebAssemblyTargetObjectFileELF()) :
+ static_cast<TargetLoweringObjectFile*>(
+ new WebAssemblyTargetObjectFile())) {
// WebAssembly type-checks instructions, but a noreturn function with a return
// type that doesn't match the context will cause a check failure. So we lower
// LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
// 'unreachable' instructions which is meant for that case.
this->Options.TrapUnreachable = true;
+ // WebAssembly treats each function as an independent unit. Force
+ // -ffunction-sections, effectively, so that we can emit them independently.
+ if (!TT.isOSBinFormatELF()) {
+ this->Options.FunctionSections = true;
+ this->Options.DataSections = true;
+ this->Options.UniqueSectionNames = true;
+ }
+
initAsmInfo();
// Note that we don't use setRequiresStructuredCFG(true). It disables
@@ -260,13 +272,19 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyRegColoring());
}
+ // Eliminate multiple-entry loops. Do this before inserting explicit get_local
+ // and set_local operators because we create a new variable that we want
+ // converted into a local.
+ addPass(createWebAssemblyFixIrreducibleControlFlow());
+
// Insert explicit get_local and set_local operators.
addPass(createWebAssemblyExplicitLocals());
- // Eliminate multiple-entry loops.
- addPass(createWebAssemblyFixIrreducibleControlFlow());
+ // Sort the blocks of the CFG into topological order, a prerequisite for
+ // BLOCK and LOOP markers.
+ addPass(createWebAssemblyCFGSort());
- // Put the CFG in structured form; insert BLOCK and LOOP markers.
+ // Insert BLOCK and LOOP markers.
addPass(createWebAssemblyCFGStackify());
// Lower br_unless into br_if.
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
index 74e33b93e00d..b1fd108bc249 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
@@ -17,8 +17,14 @@
#include "WebAssemblyTargetMachine.h"
using namespace llvm;
-void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
+void WebAssemblyTargetObjectFileELF::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
}
+
+void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
+ TargetLoweringObjectFileWasm::Initialize(Ctx, TM);
+ InitializeWasm();
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
index 39e50c9c575d..ace87c9e442f 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -20,7 +20,13 @@
namespace llvm {
-class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileELF {
+class WebAssemblyTargetObjectFileELF final
+ : public TargetLoweringObjectFileELF {
+public:
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+};
+
+class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileWasm {
public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
};
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
index a0049c147d2c..e32772d491cf 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -15,6 +15,7 @@
#include "WebAssemblyUtilities.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
using namespace llvm;
bool WebAssembly::isArgument(const MachineInstr &MI) {
@@ -69,3 +70,28 @@ bool WebAssembly::isChild(const MachineInstr &MI,
return TargetRegisterInfo::isVirtualRegister(Reg) &&
MFI.isVRegStackified(Reg);
}
+
+bool WebAssembly::isCallIndirect(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::CALL_INDIRECT_VOID:
+ case WebAssembly::CALL_INDIRECT_I32:
+ case WebAssembly::CALL_INDIRECT_I64:
+ case WebAssembly::CALL_INDIRECT_F32:
+ case WebAssembly::CALL_INDIRECT_F64:
+ case WebAssembly::CALL_INDIRECT_v16i8:
+ case WebAssembly::CALL_INDIRECT_v8i16:
+ case WebAssembly::CALL_INDIRECT_v4i32:
+ case WebAssembly::CALL_INDIRECT_v4f32:
+ return true;
+ default:
+ return false;
+ }
+}
+
+MachineBasicBlock *llvm::LoopBottom(const MachineLoop *Loop) {
+ MachineBasicBlock *Bottom = Loop->getHeader();
+ for (MachineBasicBlock *MBB : Loop->blocks())
+ if (MBB->getNumber() > Bottom->getNumber())
+ Bottom = MBB;
+ return Bottom;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyUtilities.h b/lib/Target/WebAssembly/WebAssemblyUtilities.h
index eb114403d14e..595491f1bf5b 100644
--- a/lib/Target/WebAssembly/WebAssemblyUtilities.h
+++ b/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -18,7 +18,9 @@
namespace llvm {
+class MachineBasicBlock;
class MachineInstr;
+class MachineLoop;
class WebAssemblyFunctionInfo;
namespace WebAssembly {
@@ -27,8 +29,15 @@ bool isArgument(const MachineInstr &MI);
bool isCopy(const MachineInstr &MI);
bool isTee(const MachineInstr &MI);
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
+bool isCallIndirect(const MachineInstr &MI);
} // end namespace WebAssembly
+
+/// Return the "bottom" block of a loop. This differs from
+/// MachineLoop::getBottomBlock in that it works even if the loop is
+/// discontiguous.
+MachineBasicBlock *LoopBottom(const MachineLoop *Loop);
+
} // end namespace llvm
#endif
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index c38a7d1dd44d..788fac62626b 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -1,4 +1,4 @@
-//===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly C++ -*-===//
+//===-- X86AsmInstrumentation.cpp - Instrument X86 inline assembly --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,24 +7,31 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86AsmInstrumentation.h"
-#include "MCTargetDesc/X86BaseInfo.h"
#include "X86Operand.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
#include <algorithm>
#include <cassert>
+#include <cstdint>
+#include <limits>
+#include <memory>
#include <vector>
// Following comment describes how assembly instrumentation works.
@@ -91,30 +98,35 @@
// register as a frame register and temprorary override current CFA
// register.
-namespace llvm {
-namespace {
+using namespace llvm;
static cl::opt<bool> ClAsanInstrumentAssembly(
"asan-instrument-assembly",
cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden,
cl::init(false));
-const int64_t MinAllowedDisplacement = std::numeric_limits<int32_t>::min();
-const int64_t MaxAllowedDisplacement = std::numeric_limits<int32_t>::max();
+static const int64_t MinAllowedDisplacement =
+ std::numeric_limits<int32_t>::min();
+static const int64_t MaxAllowedDisplacement =
+ std::numeric_limits<int32_t>::max();
-int64_t ApplyDisplacementBounds(int64_t Displacement) {
+static int64_t ApplyDisplacementBounds(int64_t Displacement) {
return std::max(std::min(MaxAllowedDisplacement, Displacement),
MinAllowedDisplacement);
}
-void CheckDisplacementBounds(int64_t Displacement) {
+static void CheckDisplacementBounds(int64_t Displacement) {
assert(Displacement >= MinAllowedDisplacement &&
Displacement <= MaxAllowedDisplacement);
}
-bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; }
+static bool IsStackReg(unsigned Reg) {
+ return Reg == X86::RSP || Reg == X86::ESP;
+}
-bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
+static bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
+
+namespace {
class X86AddressSanitizer : public X86AsmInstrumentation {
public:
@@ -178,7 +190,7 @@ public:
X86AddressSanitizer(const MCSubtargetInfo *&STI)
: X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
- ~X86AddressSanitizer() override {}
+ ~X86AddressSanitizer() override = default;
// X86AsmInstrumentation implementation:
void InstrumentAndEmitInstruction(const MCInst &Inst,
@@ -255,9 +267,11 @@ protected:
bool is64BitMode() const {
return STI->getFeatureBits()[X86::Mode64Bit];
}
+
bool is32BitMode() const {
return STI->getFeatureBits()[X86::Mode32Bit];
}
+
bool is16BitMode() const {
return STI->getFeatureBits()[X86::Mode16Bit];
}
@@ -498,7 +512,7 @@ public:
X86AddressSanitizer32(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- ~X86AddressSanitizer32() override {}
+ ~X86AddressSanitizer32() override = default;
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
@@ -604,9 +618,9 @@ private:
EmitInstruction(
Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32)));
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
(IsWrite ? "store" : "load") +
- llvm::Twine(AccessSize));
+ Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
@@ -756,7 +770,7 @@ public:
X86AddressSanitizer64(const MCSubtargetInfo *&STI)
: X86AddressSanitizer(STI) {}
- ~X86AddressSanitizer64() override {}
+ ~X86AddressSanitizer64() override = default;
unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
@@ -875,15 +889,17 @@ private:
EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
RegCtx.AddressReg(64)));
}
- MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+ MCSymbol *FnSym = Ctx.getOrCreateSymbol(Twine("__asan_report_") +
(IsWrite ? "store" : "load") +
- llvm::Twine(AccessSize));
+ Twine(AccessSize));
const MCSymbolRefExpr *FnExpr =
MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
}
};
+} // end anonymous namespace
+
void X86AddressSanitizer64::InstrumentMemOperandSmall(
X86Operand &Op, unsigned AccessSize, bool IsWrite,
const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
@@ -1022,12 +1038,10 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
RestoreFlags(Out);
}
-} // End anonymous namespace
-
X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
- : STI(STI), InitialFrameReg(0) {}
+ : STI(STI) {}
-X86AsmInstrumentation::~X86AsmInstrumentation() {}
+X86AsmInstrumentation::~X86AsmInstrumentation() = default;
void X86AsmInstrumentation::InstrumentAndEmitInstruction(
const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
@@ -1060,8 +1074,9 @@ unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
}
X86AsmInstrumentation *
-CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
- const MCContext &Ctx, const MCSubtargetInfo *&STI) {
+llvm::CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
+ const MCContext &Ctx,
+ const MCSubtargetInfo *&STI) {
Triple T(STI->getTargetTriple());
const bool hasCompilerRTSupport = T.isOSLinux();
if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
@@ -1073,5 +1088,3 @@ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
}
return new X86AsmInstrumentation(STI);
}
-
-} // end llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 470ceadb0aa6..97a55cd8ad98 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -1,4 +1,4 @@
-//===- X86AsmInstrumentation.h - Instrument X86 inline assembly *- C++ -*-===//
+//===- X86AsmInstrumentation.h - Instrument X86 inline assembly -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,7 +11,6 @@
#define LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
#include "llvm/ADT/SmallVector.h"
-
#include <memory>
namespace llvm {
@@ -23,7 +22,6 @@ class MCParsedAsmOperand;
class MCStreamer;
class MCSubtargetInfo;
class MCTargetOptions;
-
class X86AsmInstrumentation;
X86AsmInstrumentation *
@@ -43,7 +41,7 @@ public:
// Tries to instrument and emit instruction.
virtual void InstrumentAndEmitInstruction(
const MCInst &Inst,
- SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand> > &Operands,
+ SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
protected:
@@ -60,9 +58,9 @@ protected:
const MCSubtargetInfo *&STI;
- unsigned InitialFrameReg;
+ unsigned InitialFrameReg = 0;
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_ASMPARSER_X86ASMINSTRUMENTATION_H
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e692118f47fd..324da650e74e 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -98,6 +98,14 @@ private:
IC_REGISTER
};
+ enum IntelOperatorKind {
+ IOK_INVALID = 0,
+ IOK_LENGTH,
+ IOK_SIZE,
+ IOK_TYPE,
+ IOK_OFFSET
+ };
+
class InfixCalculator {
typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
@@ -704,10 +712,12 @@ private:
std::unique_ptr<X86Operand> ParseIntelOperand();
std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
- std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
+ unsigned IdentifyIntelOperator(StringRef Name);
+ unsigned ParseIntelOperator(unsigned OpKind);
std::unique_ptr<X86Operand>
ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
+ bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
std::unique_ptr<X86Operand>
ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
@@ -814,6 +824,7 @@ private:
/// }
public:
+
X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
const MCInstrInfo &mii, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
@@ -1266,10 +1277,12 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
}
}
// Remove all the ImmPrefix rewrites within the brackets.
+ // We may have some Imm rewrties as a result of an operator applying,
+ // remove them as well
for (AsmRewrite &AR : AsmRewrites) {
if (AR.Loc.getPointer() < StartInBrac.getPointer())
continue;
- if (AR.Kind == AOK_ImmPrefix)
+ if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm)
AR.Kind = AOK_Delete;
}
const char *SymLocPtr = SymName.data();
@@ -1286,6 +1299,30 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
}
}
+// Some binary bitwise operators have a named synonymous
+// Query a candidate string for being such a named operator
+// and if so - invoke the appropriate handler
+bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
+ // A named operator should be either lower or upper case, but not a mix
+ if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
+ return false;
+ if (Name.equals_lower("not"))
+ SM.onNot();
+ else if (Name.equals_lower("or"))
+ SM.onOr();
+ else if (Name.equals_lower("shl"))
+ SM.onLShift();
+ else if (Name.equals_lower("shr"))
+ SM.onRShift();
+ else if (Name.equals_lower("xor"))
+ SM.onXor();
+ else if (Name.equals_lower("and"))
+ SM.onAnd();
+ else
+ return false;
+ return true;
+}
+
bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
@@ -1324,31 +1361,36 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
const MCExpr *Val;
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
+ UpdateLocLex = false;
if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
SM.onRegister(TmpReg);
- UpdateLocLex = false;
- break;
+ } else if (ParseIntelNamedOperator(Identifier, SM)) {
+ UpdateLocLex = true;
+ } else if (!isParsingInlineAsm()) {
+ if (getParser().parsePrimaryExpr(Val, End))
+ return Error(Tok.getLoc(), "Unexpected identifier!");
+ SM.onIdentifierExpr(Val, Identifier);
+ } else if (unsigned OpKind = IdentifyIntelOperator(Identifier)) {
+ if (OpKind == IOK_OFFSET)
+ return Error(IdentLoc, "Dealing OFFSET operator as part of"
+ "a compound immediate expression is yet to be supported");
+ int64_t Val = ParseIntelOperator(OpKind);
+ if (!Val)
+ return true;
+ StringRef ErrMsg;
+ if (SM.onInteger(Val, ErrMsg))
+ return Error(IdentLoc, ErrMsg);
+ } else if (Identifier.find('.') != StringRef::npos &&
+ PrevTK == AsmToken::RBrac) {
+ return false;
} else {
- if (!isParsingInlineAsm()) {
- if (getParser().parsePrimaryExpr(Val, End))
- return Error(Tok.getLoc(), "Unexpected identifier!");
- } else {
- // This is a dot operator, not an adjacent identifier.
- if (Identifier.find('.') != StringRef::npos &&
- PrevTK == AsmToken::RBrac) {
- return false;
- } else {
- InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
- if (ParseIntelIdentifier(Val, Identifier, Info,
- /*Unevaluated=*/false, End))
- return true;
- }
- }
+ InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
+ if (ParseIntelIdentifier(Val, Identifier, Info,
+ /*Unevaluated=*/false, End))
+ return true;
SM.onIdentifierExpr(Val, Identifier);
- UpdateLocLex = false;
- break;
}
- return Error(Tok.getLoc(), "Unexpected identifier!");
+ break;
}
case AsmToken::Integer: {
StringRef ErrMsg;
@@ -1715,11 +1757,16 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
OffsetOfLoc, Identifier, Info.OpDecl);
}
-enum IntelOperatorKind {
- IOK_LENGTH,
- IOK_SIZE,
- IOK_TYPE
-};
+// Query a candidate string for being an Intel assembly operator
+// Report back its kind, or IOK_INVALID if does not evaluated as a known one
+unsigned X86AsmParser::IdentifyIntelOperator(StringRef Name) {
+ return StringSwitch<unsigned>(Name)
+ .Cases("TYPE","type",IOK_TYPE)
+ .Cases("SIZE","size",IOK_SIZE)
+ .Cases("LENGTH","length",IOK_LENGTH)
+ .Cases("OFFSET","offset",IOK_OFFSET)
+ .Default(IOK_INVALID);
+}
/// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
/// returns the number of elements in an array. It returns the value 1 for
@@ -1727,7 +1774,7 @@ enum IntelOperatorKind {
/// variable. A variable's size is the product of its LENGTH and TYPE. The
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
-std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+unsigned X86AsmParser::ParseIntelOperator(unsigned OpKind) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc TypeLoc = Tok.getLoc();
@@ -1739,11 +1786,13 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/true, End))
- return nullptr;
-
- if (!Info.OpDecl)
- return ErrorOperand(Start, "unable to lookup expression");
+ return 0;
+ if (!Info.OpDecl) {
+ Error(Start, "unable to lookup expression");
+ return 0;
+ }
+
unsigned CVal = 0;
switch(OpKind) {
default: llvm_unreachable("Unexpected operand kind!");
@@ -1757,8 +1806,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
unsigned Len = End.getPointer() - TypeLoc.getPointer();
InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
- const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
- return X86Operand::CreateImm(Imm, Start, End);
+ return CVal;
}
std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
@@ -1766,18 +1814,12 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
- // Offset, length, type and size operators.
- if (isParsingInlineAsm()) {
- StringRef AsmTokStr = Tok.getString();
- if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
+ // FIXME: Offset operator
+ // Should be handled as part of immediate expression, as other operators
+ // Currently, only supported as a stand-alone operand
+ if (isParsingInlineAsm())
+ if (IdentifyIntelOperator(Tok.getString()) == IOK_OFFSET)
return ParseIntelOffsetOfOperator();
- if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
- return ParseIntelOperator(IOK_LENGTH);
- if (AsmTokStr == "size" || AsmTokStr == "SIZE")
- return ParseIntelOperator(IOK_SIZE);
- if (AsmTokStr == "type" || AsmTokStr == "TYPE")
- return ParseIntelOperator(IOK_TYPE);
- }
bool PtrInOperand = false;
unsigned Size = getIntelMemOperandSize(Tok.getString());
@@ -2360,7 +2402,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Name == "lock" || Name == "rep" ||
Name == "repe" || Name == "repz" ||
Name == "repne" || Name == "repnz" ||
- Name == "rex64" || Name == "data16";
+ Name == "rex64" || Name == "data16" || Name == "data32";
bool CurlyAsEndOfStatement = false;
// This does the actual operand parsing. Don't parse any more if we have a
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 9db1a8483bee..9f1fa6c65907 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -1,4 +1,4 @@
-//===-- X86Operand.h - Parsed X86 machine instruction --------------------===//
+//===- X86Operand.h - Parsed X86 machine instruction ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,12 +11,17 @@
#define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
#include "X86AsmParserCommon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/STLExtras.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
+#include <cassert>
+#include <memory>
namespace llvm {
@@ -74,11 +79,14 @@ struct X86Operand : public MCParsedAsmOperand {
/// getStartLoc - Get the location of the first token of this operand.
SMLoc getStartLoc() const override { return StartLoc; }
+
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const override { return EndLoc; }
+
/// getLocRange - Get the range between the first and last token of this
/// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+
/// getOffsetOfLoc - Get the location of the offset operator.
SMLoc getOffsetOfLoc() const override { return OffsetOfLoc; }
@@ -271,6 +279,9 @@ struct X86Operand : public MCParsedAsmOperand {
bool isMem256_RC256X() const {
return isMem256() && isMemIndexReg(X86::YMM0, X86::YMM31);
}
+ bool isMem256_RC512() const {
+ return isMem256() && isMemIndexReg(X86::ZMM0, X86::ZMM31);
+ }
bool isMem512_RC256X() const {
return isMem512() && isMemIndexReg(X86::YMM0, X86::YMM31);
}
@@ -419,10 +430,12 @@ struct X86Operand : public MCParsedAsmOperand {
RegNo = getGR32FromGR64(RegNo);
Inst.addOperand(MCOperand::createReg(RegNo));
}
+
void addAVX512RCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
@@ -451,6 +464,7 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
Inst.addOperand(MCOperand::createReg(getMemSegReg()));
}
+
void addDstIdxOperands(MCInst &Inst, unsigned N) const {
assert((N == 1) && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getMemBaseReg()));
@@ -541,6 +555,6 @@ struct X86Operand : public MCParsedAsmOperand {
}
};
-} // End of namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 9dfd09022bdc..fc4adddc149b 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -10,11 +10,20 @@ tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables)
+if(LLVM_BUILD_GLOBAL_ISEL)
+ tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank)
+ tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel)
+endif()
+
add_public_tablegen_target(X86CommonTableGen)
# Add GlobalISel files if the build option was enabled.
set(GLOBAL_ISEL_FILES
X86CallLowering.cpp
+ X86LegalizerInfo.cpp
+ X86RegisterBankInfo.cpp
+ X86InstructionSelector.cpp
)
if(LLVM_BUILD_GLOBAL_ISEL)
@@ -43,6 +52,7 @@ set(sources
X86EvexToVex.cpp
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
+ X86MacroFusion.cpp
X86OptimizeLEAs.cpp
X86PadShortFunction.cpp
X86RegisterInfo.cpp
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 0871888bbfcd..36ad23bb41c0 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -368,32 +368,49 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
bool isBranch = false;
uint64_t pcrel = 0;
- if (type == TYPE_RELv) {
+ if (type == TYPE_REL) {
isBranch = true;
pcrel = insn.startLocation +
insn.immediateOffset + insn.immediateSize;
- switch (insn.displacementSize) {
+ switch (operand.encoding) {
default:
break;
- case 1:
+ case ENCODING_Iv:
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 1:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case 2:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case 4:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case 8:
+ break;
+ }
+ break;
+ case ENCODING_IB:
if(immediate & 0x80)
immediate |= ~(0xffull);
break;
- case 2:
+ case ENCODING_IW:
if(immediate & 0x8000)
immediate |= ~(0xffffull);
break;
- case 4:
+ case ENCODING_ID:
if(immediate & 0x80000000)
immediate |= ~(0xffffffffull);
break;
- case 8:
- break;
}
}
// By default sign-extend all X86 immediates based on their encoding.
- else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
- type == TYPE_IMM64 || type == TYPE_IMMv) {
+ else if (type == TYPE_IMM) {
switch (operand.encoding) {
default:
break;
@@ -620,38 +637,17 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
switch (type) {
- case TYPE_XMM32:
- case TYPE_XMM64:
- case TYPE_XMM128:
+ case TYPE_XMM:
mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
return;
- case TYPE_XMM256:
+ case TYPE_YMM:
mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
return;
- case TYPE_XMM512:
+ case TYPE_ZMM:
mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
return;
case TYPE_BNDR:
mcInst.addOperand(MCOperand::createReg(X86::BND0 + (immediate >> 4)));
- case TYPE_REL8:
- isBranch = true;
- pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if (immediate & 0x80)
- immediate |= ~(0xffull);
- break;
- case TYPE_REL16:
- isBranch = true;
- pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if (immediate & 0x8000)
- immediate |= ~(0xffffull);
- break;
- case TYPE_REL32:
- case TYPE_REL64:
- isBranch = true;
- pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
- if(immediate & 0x80000000)
- immediate |= ~(0xffffffffull);
- break;
default:
// operand is 64 bits wide. Do nothing.
break;
@@ -662,8 +658,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
mcInst, Dis))
mcInst.addOperand(MCOperand::createImm(immediate));
- if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 ||
- type == TYPE_MOFFS32 || type == TYPE_MOFFS64) {
+ if (type == TYPE_MOFFS) {
MCOperand segmentReg;
segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
mcInst.addOperand(segmentReg);
@@ -767,7 +762,27 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
Opcode == X86::VPGATHERDQYrm ||
Opcode == X86::VPGATHERQQrm ||
Opcode == X86::VPGATHERDDrm ||
- Opcode == X86::VPGATHERQDrm);
+ Opcode == X86::VPGATHERQDrm ||
+ Opcode == X86::VGATHERDPDZ128rm ||
+ Opcode == X86::VGATHERDPDZ256rm ||
+ Opcode == X86::VGATHERDPSZ128rm ||
+ Opcode == X86::VGATHERQPDZ128rm ||
+ Opcode == X86::VGATHERQPSZ128rm ||
+ Opcode == X86::VPGATHERDDZ128rm ||
+ Opcode == X86::VPGATHERDQZ128rm ||
+ Opcode == X86::VPGATHERDQZ256rm ||
+ Opcode == X86::VPGATHERQDZ128rm ||
+ Opcode == X86::VPGATHERQQZ128rm ||
+ Opcode == X86::VSCATTERDPDZ128mr ||
+ Opcode == X86::VSCATTERDPDZ256mr ||
+ Opcode == X86::VSCATTERDPSZ128mr ||
+ Opcode == X86::VSCATTERQPDZ128mr ||
+ Opcode == X86::VSCATTERQPSZ128mr ||
+ Opcode == X86::VPSCATTERDDZ128mr ||
+ Opcode == X86::VPSCATTERDQZ128mr ||
+ Opcode == X86::VPSCATTERDQZ256mr ||
+ Opcode == X86::VPSCATTERQDZ128mr ||
+ Opcode == X86::VPSCATTERQQZ128mr);
bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
Opcode == X86::VGATHERDPSYrm ||
Opcode == X86::VGATHERQPSYrm ||
@@ -775,13 +790,49 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
Opcode == X86::VPGATHERDQZrm ||
Opcode == X86::VPGATHERQQYrm ||
Opcode == X86::VPGATHERDDYrm ||
- Opcode == X86::VPGATHERQDYrm);
+ Opcode == X86::VPGATHERQDYrm ||
+ Opcode == X86::VGATHERDPSZ256rm ||
+ Opcode == X86::VGATHERQPDZ256rm ||
+ Opcode == X86::VGATHERQPSZ256rm ||
+ Opcode == X86::VPGATHERDDZ256rm ||
+ Opcode == X86::VPGATHERQQZ256rm ||
+ Opcode == X86::VPGATHERQDZ256rm ||
+ Opcode == X86::VSCATTERDPDZmr ||
+ Opcode == X86::VPSCATTERDQZmr ||
+ Opcode == X86::VSCATTERDPSZ256mr ||
+ Opcode == X86::VSCATTERQPDZ256mr ||
+ Opcode == X86::VSCATTERQPSZ256mr ||
+ Opcode == X86::VPSCATTERDDZ256mr ||
+ Opcode == X86::VPSCATTERQQZ256mr ||
+ Opcode == X86::VPSCATTERQDZ256mr ||
+ Opcode == X86::VGATHERPF0DPDm ||
+ Opcode == X86::VGATHERPF1DPDm ||
+ Opcode == X86::VSCATTERPF0DPDm ||
+ Opcode == X86::VSCATTERPF1DPDm);
bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm ||
Opcode == X86::VGATHERDPSZrm ||
Opcode == X86::VGATHERQPSZrm ||
Opcode == X86::VPGATHERQQZrm ||
Opcode == X86::VPGATHERDDZrm ||
- Opcode == X86::VPGATHERQDZrm);
+ Opcode == X86::VPGATHERQDZrm ||
+ Opcode == X86::VSCATTERQPDZmr ||
+ Opcode == X86::VSCATTERDPSZmr ||
+ Opcode == X86::VSCATTERQPSZmr ||
+ Opcode == X86::VPSCATTERQQZmr ||
+ Opcode == X86::VPSCATTERDDZmr ||
+ Opcode == X86::VPSCATTERQDZmr ||
+ Opcode == X86::VGATHERPF0DPSm ||
+ Opcode == X86::VGATHERPF0QPDm ||
+ Opcode == X86::VGATHERPF0QPSm ||
+ Opcode == X86::VGATHERPF1DPSm ||
+ Opcode == X86::VGATHERPF1QPDm ||
+ Opcode == X86::VGATHERPF1QPSm ||
+ Opcode == X86::VSCATTERPF0DPSm ||
+ Opcode == X86::VSCATTERPF0QPDm ||
+ Opcode == X86::VSCATTERPF0QPSm ||
+ Opcode == X86::VSCATTERPF1DPSm ||
+ Opcode == X86::VSCATTERPF1QPDm ||
+ Opcode == X86::VSCATTERPF1QPSm);
if (IndexIs128 || IndexIs256 || IndexIs512) {
unsigned IndexOffset = insn.sibIndex -
(insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
@@ -909,38 +960,15 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_R64:
case TYPE_Rv:
case TYPE_MM64:
- case TYPE_XMM32:
- case TYPE_XMM64:
- case TYPE_XMM128:
- case TYPE_XMM256:
- case TYPE_XMM512:
- case TYPE_VK1:
- case TYPE_VK2:
- case TYPE_VK4:
- case TYPE_VK8:
- case TYPE_VK16:
- case TYPE_VK32:
- case TYPE_VK64:
+ case TYPE_XMM:
+ case TYPE_YMM:
+ case TYPE_ZMM:
+ case TYPE_VK:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
case TYPE_BNDR:
return translateRMRegister(mcInst, insn);
case TYPE_M:
- case TYPE_M8:
- case TYPE_M16:
- case TYPE_M32:
- case TYPE_M64:
- case TYPE_M128:
- case TYPE_M256:
- case TYPE_M512:
- case TYPE_Mv:
- case TYPE_M32FP:
- case TYPE_M64FP:
- case TYPE_M80FP:
- case TYPE_M1616:
- case TYPE_M1632:
- case TYPE_M1664:
- case TYPE_LEA:
return translateRMMemory(mcInst, insn, Dis);
}
}
@@ -992,6 +1020,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_WRITEMASK:
return translateMaskRegister(mcInst, insn.writemask);
CASE_ENCODING_RM:
+ CASE_ENCODING_VSIB:
return translateRM(mcInst, operand, insn, Dis);
case ENCODING_IB:
case ENCODING_IW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index ab64d6fcf70b..b7f637e9a8cd 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -650,11 +650,6 @@ static int readPrefixes(struct InternalInstruction* insn) {
insn->addressSize = (hasAdSize ? 4 : 8);
insn->displacementSize = 4;
insn->immediateSize = 4;
- } else if (insn->rexPrefix) {
- insn->registerSize = (hasOpSize ? 2 : 4);
- insn->addressSize = (hasAdSize ? 4 : 8);
- insn->displacementSize = (hasOpSize ? 2 : 4);
- insn->immediateSize = (hasOpSize ? 2 : 4);
} else {
insn->registerSize = (hasOpSize ? 2 : 4);
insn->addressSize = (hasAdSize ? 4 : 8);
@@ -1475,21 +1470,13 @@ static int readModRM(struct InternalInstruction* insn) {
return prefix##_EAX + index; \
case TYPE_R64: \
return prefix##_RAX + index; \
- case TYPE_XMM512: \
+ case TYPE_ZMM: \
return prefix##_ZMM0 + index; \
- case TYPE_XMM256: \
+ case TYPE_YMM: \
return prefix##_YMM0 + index; \
- case TYPE_XMM128: \
- case TYPE_XMM64: \
- case TYPE_XMM32: \
+ case TYPE_XMM: \
return prefix##_XMM0 + index; \
- case TYPE_VK1: \
- case TYPE_VK2: \
- case TYPE_VK4: \
- case TYPE_VK8: \
- case TYPE_VK16: \
- case TYPE_VK32: \
- case TYPE_VK64: \
+ case TYPE_VK: \
if (index > 7) \
*valid = 0; \
return prefix##_K0 + index; \
@@ -1562,6 +1549,7 @@ static int fixupReg(struct InternalInstruction *insn,
return -1;
break;
CASE_ENCODING_RM:
+ CASE_ENCODING_VSIB:
if (insn->eaBase >= insn->eaRegBase) {
insn->eaBase = (EABase)fixupRMValue(insn,
(OperandType)op->type,
@@ -1753,6 +1741,18 @@ static int readOperands(struct InternalInstruction* insn) {
case ENCODING_SI:
case ENCODING_DI:
break;
+ CASE_ENCODING_VSIB:
+ // VSIB can use the V2 bit so check only the other bits.
+ if (needVVVV)
+ needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
+ if (readModRM(insn))
+ return -1;
+ if (fixupReg(insn, &Op))
+ return -1;
+ // Apply the AVX512 compressed displacement scaling factor.
+ if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
+ insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
+ break;
case ENCODING_REG:
CASE_ENCODING_RM:
if (readModRM(insn))
@@ -1774,8 +1774,7 @@ static int readOperands(struct InternalInstruction* insn) {
}
if (readImmediate(insn, 1))
return -1;
- if (Op.type == TYPE_XMM128 ||
- Op.type == TYPE_XMM256)
+ if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
sawRegImm = 1;
break;
case ENCODING_IW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 0a835b876d90..e0f4399b3687 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -339,6 +339,15 @@ enum ModRMDecisionType {
case ENCODING_RM_CD32: \
case ENCODING_RM_CD64
+#define CASE_ENCODING_VSIB \
+ case ENCODING_VSIB: \
+ case ENCODING_VSIB_CD2: \
+ case ENCODING_VSIB_CD4: \
+ case ENCODING_VSIB_CD8: \
+ case ENCODING_VSIB_CD16: \
+ case ENCODING_VSIB_CD32: \
+ case ENCODING_VSIB_CD64
+
// Physical encodings of instruction operands.
#define ENCODINGS \
ENUM_ENTRY(ENCODING_NONE, "") \
@@ -350,6 +359,13 @@ enum ModRMDecisionType {
ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16") \
ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32") \
ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64") \
+ ENUM_ENTRY(ENCODING_VSIB, "VSIB operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_VSIB_CD2, "VSIB operand with CDisp scaling of 2") \
+ ENUM_ENTRY(ENCODING_VSIB_CD4, "VSIB operand with CDisp scaling of 4") \
+ ENUM_ENTRY(ENCODING_VSIB_CD8, "VSIB operand with CDisp scaling of 8") \
+ ENUM_ENTRY(ENCODING_VSIB_CD16,"VSIB operand with CDisp scaling of 16") \
+ ENUM_ENTRY(ENCODING_VSIB_CD32,"VSIB operand with CDisp scaling of 32") \
+ ENUM_ENTRY(ENCODING_VSIB_CD64,"VSIB operand with CDisp scaling of 64") \
ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
ENUM_ENTRY(ENCODING_WRITEMASK, "Register operand in EVEX.aaa byte.") \
ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
@@ -383,85 +399,38 @@ enum OperandEncoding {
// Semantic interpretations of instruction operands.
#define TYPES \
ENUM_ENTRY(TYPE_NONE, "") \
- ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
- ENUM_ENTRY(TYPE_REL16, "2-byte") \
- ENUM_ENTRY(TYPE_REL32, "4-byte") \
- ENUM_ENTRY(TYPE_REL64, "8-byte") \
- ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
- ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
- ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
+ ENUM_ENTRY(TYPE_REL, "immediate address") \
ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
ENUM_ENTRY(TYPE_R16, "2-byte") \
ENUM_ENTRY(TYPE_R32, "4-byte") \
ENUM_ENTRY(TYPE_R64, "8-byte") \
- ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
- ENUM_ENTRY(TYPE_IMM16, "2-byte") \
- ENUM_ENTRY(TYPE_IMM32, "4-byte") \
- ENUM_ENTRY(TYPE_IMM64, "8-byte") \
+ ENUM_ENTRY(TYPE_IMM, "immediate operand") \
ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \
ENUM_ENTRY(TYPE_AVX512ICC, "1-byte immediate operand for AVX512 icmp") \
ENUM_ENTRY(TYPE_UIMM8, "1-byte unsigned immediate operand") \
- ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
- ENUM_ENTRY(TYPE_RM16, "2-byte") \
- ENUM_ENTRY(TYPE_RM32, "4-byte") \
- ENUM_ENTRY(TYPE_RM64, "8-byte") \
ENUM_ENTRY(TYPE_M, "Memory operand") \
- ENUM_ENTRY(TYPE_M8, "1-byte") \
- ENUM_ENTRY(TYPE_M16, "2-byte") \
- ENUM_ENTRY(TYPE_M32, "4-byte") \
- ENUM_ENTRY(TYPE_M64, "8-byte") \
- ENUM_ENTRY(TYPE_LEA, "Effective address") \
- ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
- ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
- ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
- ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
- ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
- ENUM_ENTRY(TYPE_SRCIDX8, "1-byte memory at source index") \
- ENUM_ENTRY(TYPE_SRCIDX16, "2-byte memory at source index") \
- ENUM_ENTRY(TYPE_SRCIDX32, "4-byte memory at source index") \
- ENUM_ENTRY(TYPE_SRCIDX64, "8-byte memory at source index") \
- ENUM_ENTRY(TYPE_DSTIDX8, "1-byte memory at destination index") \
- ENUM_ENTRY(TYPE_DSTIDX16, "2-byte memory at destination index") \
- ENUM_ENTRY(TYPE_DSTIDX32, "4-byte memory at destination index") \
- ENUM_ENTRY(TYPE_DSTIDX64, "8-byte memory at destination index") \
- ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
- "base)") \
- ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
- ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
- ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
- ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
- ENUM_ENTRY(TYPE_M64FP, "64-bit") \
- ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
+ ENUM_ENTRY(TYPE_SRCIDX, "memory at source index") \
+ ENUM_ENTRY(TYPE_DSTIDX, "memory at destination index") \
+ ENUM_ENTRY(TYPE_MOFFS, "memory offset (relative to segment base)") \
ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
ENUM_ENTRY(TYPE_MM64, "8-byte MMX register") \
- ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
- ENUM_ENTRY(TYPE_XMM64, "8-byte") \
- ENUM_ENTRY(TYPE_XMM128, "16-byte") \
- ENUM_ENTRY(TYPE_XMM256, "32-byte") \
- ENUM_ENTRY(TYPE_XMM512, "64-byte") \
- ENUM_ENTRY(TYPE_VK1, "1-bit") \
- ENUM_ENTRY(TYPE_VK2, "2-bit") \
- ENUM_ENTRY(TYPE_VK4, "4-bit") \
- ENUM_ENTRY(TYPE_VK8, "8-bit") \
- ENUM_ENTRY(TYPE_VK16, "16-bit") \
- ENUM_ENTRY(TYPE_VK32, "32-bit") \
- ENUM_ENTRY(TYPE_VK64, "64-bit") \
+ ENUM_ENTRY(TYPE_XMM, "16-byte") \
+ ENUM_ENTRY(TYPE_YMM, "32-byte") \
+ ENUM_ENTRY(TYPE_ZMM, "64-byte") \
+ ENUM_ENTRY(TYPE_VK, "mask register") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
ENUM_ENTRY(TYPE_BNDR, "MPX bounds register") \
\
- ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
- ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
ENUM_ENTRY(TYPE_DUP1, "operand 1") \
ENUM_ENTRY(TYPE_DUP2, "operand 2") \
ENUM_ENTRY(TYPE_DUP3, "operand 3") \
ENUM_ENTRY(TYPE_DUP4, "operand 4") \
- ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
#define ENUM_ENTRY(n, d) n,
enum OperandType {
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 10b7e6ff5ee2..6aa700306744 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -12,19 +12,22 @@
//
//===----------------------------------------------------------------------===//
-#include "X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
-#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
@@ -61,6 +64,17 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
OS << "\tcallq\t";
printPCRelImm(MI, 0, OS);
}
+ // data16 and data32 both have the same encoding of 0x66. While data32 is
+ // valid only in 16 bit systems, data16 is valid in the rest.
+ // There seems to be some lack of support of the Requires clause that causes
+ // 0x66 to be interpreted as "data16" by the asm printer.
+ // Thus we add an adjustment here in order to print the "right" instruction.
+ else if (MI->getOpcode() == X86::DATA16_PREFIX &&
+ (STI.getFeatureBits()[X86::Mode16Bit])) {
+ MCInst Data32MI(*MI);
+ Data32MI.setOpcode(X86::DATA32_PREFIX);
+ printInstruction(&Data32MI, OS);
+ }
// Try to print any aliases first.
else if (!printAliasInstr(MI, OS))
printInstruction(MI, OS);
@@ -135,6 +149,7 @@ void X86ATTInstPrinter::printRoundingControl(const MCInst *MI, unsigned Op,
case 3: O << "{rz-sae}"; break;
}
}
+
/// printPCRelImm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value (e.g. for jumps and calls). These
/// print slightly differently than normal immediates. For example, a $ is not
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index bbb309076610..946c1c73f088 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -1,4 +1,4 @@
-//==- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
+//=- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -137,6 +137,7 @@ public:
private:
bool HasCustomInstComment;
};
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 8594addb5dd4..6e062ec59347 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -1189,8 +1189,6 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
OS << ']';
--i; // For loop increments element #.
}
- //MI->print(OS, 0);
- OS << "\n";
// We successfully added a comment to this instruction.
return true;
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 4443edb8e342..a8c631ae282f 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -12,16 +12,18 @@
//
//===----------------------------------------------------------------------===//
-#include "X86IntelInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
-#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86InstComments.h"
+#include "X86IntelInstPrinter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include <cctype>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 20cd7ffb2e63..ace31186a054 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -157,6 +157,6 @@ public:
}
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_INSTPRINTER_X86INTELINSTPRINTER_H
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index e83ec9f4045a..a713af6aadb5 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -109,7 +109,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const override {
+ uint64_t Value, bool IsPCRel, MCContext &Ctx) const override {
unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
assert(Fixup.getOffset() + Size <= DataSize &&
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index aab552547fac..d8953da4abb2 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -212,7 +212,12 @@ namespace X86II {
/// the offset from beginning of section.
///
/// This is the TLS offset for the COFF/Windows TLS mechanism.
- MO_SECREL
+ MO_SECREL,
+
+ /// MO_ABS8 - On a symbol operand this indicates that the symbol is known
+ /// to be an absolute symbol in range [0,128), so we can use the @ABS8
+ /// symbol modifier.
+ MO_ABS8,
};
enum : uint64_t {
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index da69da51df10..0b73df3a2ff8 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -13,24 +13,28 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
namespace {
- class X86ELFObjectWriter : public MCELFObjectTargetWriter {
- public:
- X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
- ~X86ELFObjectWriter() override;
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+ X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
+ ~X86ELFObjectWriter() override = default;
- protected:
- unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
- const MCFixup &Fixup, bool IsPCRel) const override;
- };
-}
+protected:
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
+};
+
+} // end anonymous namespace
X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
uint16_t EMachine)
@@ -40,9 +44,6 @@ X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
(EMachine != ELF::EM_386) &&
(EMachine != ELF::EM_IAMCU)) {}
-X86ELFObjectWriter::~X86ELFObjectWriter()
-{}
-
enum X86_64RelType { RT64_64, RT64_32, RT64_32S, RT64_16, RT64_8 };
static X86_64RelType getType64(unsigned Kind,
@@ -96,6 +97,7 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
default:
llvm_unreachable("Unimplemented");
case MCSymbolRefExpr::VK_None:
+ case MCSymbolRefExpr::VK_X86_ABS8:
switch (Type) {
case RT64_64:
return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
@@ -219,6 +221,7 @@ static unsigned getRelocType32(MCContext &Ctx,
default:
llvm_unreachable("Unimplemented");
case MCSymbolRefExpr::VK_None:
+ case MCSymbolRefExpr::VK_X86_ABS8:
switch (Type) {
case RT32_32:
return IsPCRel ? ELF::R_386_PC32 : ELF::R_386_32;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 8045e7c6d872..10e2bbc64d3c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -11,35 +11,43 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
namespace {
+
class X86MCCodeEmitter : public MCCodeEmitter {
- X86MCCodeEmitter(const X86MCCodeEmitter &) = delete;
- void operator=(const X86MCCodeEmitter &) = delete;
const MCInstrInfo &MCII;
MCContext &Ctx;
+
public:
X86MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), Ctx(ctx) {
}
-
- ~X86MCCodeEmitter() override {}
+ X86MCCodeEmitter(const X86MCCodeEmitter &) = delete;
+ X86MCCodeEmitter &operator=(const X86MCCodeEmitter &) = delete;
+ ~X86MCCodeEmitter() override = default;
bool is64BitMode(const MCSubtargetInfo &STI) const {
return STI.getFeatureBits()[X86::Mode64Bit];
@@ -106,8 +114,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
int ImmOffset = 0) const;
- inline static uint8_t ModRMByte(unsigned Mod, unsigned RegOpcode,
- unsigned RM) {
+ static uint8_t ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
return RM | (RegOpcode << 3) | (Mod << 6);
}
@@ -149,12 +156,6 @@ public:
} // end anonymous namespace
-MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx) {
- return new X86MCCodeEmitter(MCII, Ctx);
-}
-
/// isDisp8 - Return true if this signed displacement fits in a 8-bit
/// sign-extended field.
static bool isDisp8(int Value) {
@@ -1436,7 +1437,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM0r: case X86II::MRM1r:
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r: {
+ case X86II::MRM6r: case X86II::MRM7r:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
if (HasEVEX_K) // Skip writemask
@@ -1446,13 +1447,12 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
(Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r,
CurByte, OS);
break;
- }
case X86II::MRMXm:
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
+ case X86II::MRM6m: case X86II::MRM7m:
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
if (HasEVEX_K) // Skip writemask
@@ -1463,7 +1463,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
Rex, CurByte, OS, Fixups, STI);
CurOp += X86::AddrNumOperands;
break;
- }
+
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
@@ -1527,3 +1527,9 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
#endif
}
+
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx) {
+ return new X86MCCodeEmitter(MCII, Ctx);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 33376b6d1b90..d6777fc8aa6a 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/X86FixupKinds.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/Support/COFF.h"
@@ -17,28 +18,24 @@
using namespace llvm;
-namespace llvm {
- class MCObjectWriter;
-}
-
namespace {
- class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
- public:
- X86WinCOFFObjectWriter(bool Is64Bit);
- ~X86WinCOFFObjectWriter() override;
- unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsCrossSection,
- const MCAsmBackend &MAB) const override;
- };
-}
+class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ X86WinCOFFObjectWriter(bool Is64Bit);
+ ~X86WinCOFFObjectWriter() override = default;
+
+ unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsCrossSection,
+ const MCAsmBackend &MAB) const override;
+};
+
+} // end anonymous namespace
X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
: MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
: COFF::IMAGE_FILE_MACHINE_I386) {}
-X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
-
unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsCrossSection,
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 2cb80a482d06..fdcc7e1ab7b0 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -21,7 +21,10 @@ namespace llvm {
class FunctionPass;
class ImmutablePass;
+class InstructionSelector;
class PassRegistry;
+class X86RegisterBankInfo;
+class X86Subtarget;
class X86TargetMachine;
/// This pass converts a legalized DAG into a X86-specific DAG, ready for
@@ -92,6 +95,9 @@ void initializeFixupBWInstPassPass(PassRegistry &);
/// encoding when possible in order to reduce code size.
FunctionPass *createX86EvexToVexInsts();
+InstructionSelector *createX86InstructionSelector(X86Subtarget &,
+ X86RegisterBankInfo &);
+
void initializeEvexToVexInstPassPass(PassRegistry &);
} // End llvm namespace
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 83a23d4ad680..8fcc8e31d5d4 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -187,8 +187,6 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true",
"Support BMI2 instructions">;
def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true",
"Support RTM instructions">;
-def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true",
- "Support HLE">;
def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
"Support ADX instructions">;
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
@@ -202,6 +200,8 @@ def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
"Support LAHF and SAHF instructions">;
def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true",
"Enable MONITORX/MWAITX timer functionality">;
+def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true",
+ "Enable Cache Line Zero">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
@@ -215,18 +215,10 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
-def FeatureINVPCID : SubtargetFeature<"invpcid", "HasInvPCId", "true",
- "Invalidate Process-Context Identifier">;
-def FeatureVMFUNC : SubtargetFeature<"vmfunc", "HasVMFUNC", "true",
- "VM Functions">;
-def FeatureSMAP : SubtargetFeature<"smap", "HasSMAP", "true",
- "Supervisor Mode Access Protection">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
"Enable Software Guard Extensions">;
def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true",
"Flush A Cache Line Optimized">;
-def FeaturePCOMMIT : SubtargetFeature<"pcommit", "HasPCOMMIT", "true",
- "Enable Persistent Commit">;
def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true",
"Cache Line Write Back">;
// TODO: This feature ought to be renamed.
@@ -246,11 +238,12 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
-// On at least some AMD processors, there is no performance hazard to writing
-// only the lower parts of a YMM register without clearing the upper part.
-def FeatureFastPartialYMMWrite
- : SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite",
- "true", "Partial writes to YMM registers are fast">;
+// On some X86 processors, there is no performance hazard to writing only the
+// lower parts of a YMM or ZMM register without clearing the upper part.
+def FeatureFastPartialYMMorZMMWrite
+ : SubtargetFeature<"fast-partial-ymm-or-zmm-write",
+ "HasFastPartialYMMorZMMWrite",
+ "true", "Partial writes to YMM/ZMM registers are fast">;
// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
@@ -271,6 +264,15 @@ def FeatureFastLZCNT
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
+
+// Sandy Bridge and newer processors can use SHLD with the same source on both
+// inputs to implement rotate to avoid the partial flag update of the normal
+// rotate instructions.
+def FeatureFastSHLDRotate
+ : SubtargetFeature<
+ "fast-shld-rotate", "HasFastSHLDRotate", "true",
+ "SHLD can be used as a faster rotate">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@@ -466,7 +468,8 @@ def SNBFeatures : ProcessorFeatures<[], [
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureLAHFSAHF,
- FeatureFastScalarFSQRT
+ FeatureFastScalarFSQRT,
+ FeatureFastSHLDRotate
]>;
class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
@@ -498,10 +501,6 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [
FeatureFMA,
FeatureLZCNT,
FeatureMOVBE,
- FeatureINVPCID,
- FeatureVMFUNC,
- FeatureRTM,
- FeatureHLE,
FeatureSlowIncDec
]>;
@@ -512,8 +511,7 @@ def : HaswellProc<"core-avx2">; // Legacy alias.
def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [
FeatureADX,
- FeatureRDSEED,
- FeatureSMAP
+ FeatureRDSEED
]>;
class BroadwellProc<string Name> : ProcModel<Name, HaswellModel,
BDWFeatures.Value, []>;
@@ -521,6 +519,7 @@ def : BroadwellProc<"broadwell">;
def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
FeatureMPX,
+ FeatureRTM,
FeatureXSAVEC,
FeatureXSAVES,
FeatureSGX,
@@ -547,7 +546,8 @@ class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
FeatureLZCNT,
FeatureBMI,
FeatureBMI2,
- FeatureFMA
+ FeatureFMA,
+ FeatureFastPartialYMMorZMMWrite
]>;
def : KnightsLandingProc<"knl">;
@@ -558,7 +558,6 @@ def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [
FeatureBWI,
FeatureVLX,
FeaturePKU,
- FeaturePCOMMIT,
FeatureCLWB
]>;
@@ -662,7 +661,7 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeatureXSAVEOPT,
FeatureSlowSHLD,
FeatureLAHFSAHF,
- FeatureFastPartialYMMWrite
+ FeatureFastPartialYMMorZMMWrite
]>;
// Bulldozer
@@ -771,6 +770,7 @@ def: ProcessorModel<"znver1", BtVer2Model, [
FeatureBMI,
FeatureBMI2,
FeatureCLFLUSHOPT,
+ FeatureCLZERO,
FeatureCMPXCHG16B,
FeatureF16C,
FeatureFMA,
@@ -788,7 +788,6 @@ def: ProcessorModel<"znver1", BtVer2Model, [
FeatureRDRAND,
FeatureRDSEED,
FeatureSHA,
- FeatureSMAP,
FeatureSSE4A,
FeatureSlowSHLD,
FeatureX87,
@@ -824,6 +823,7 @@ def : ProcessorModel<"x86-64", SandyBridgeModel,
//===----------------------------------------------------------------------===//
include "X86RegisterInfo.td"
+include "X86RegisterBanks.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 6798253d0f6a..44bc373b0394 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -81,7 +81,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
- void LowerFAULTING_LOAD_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
+ void LowerFAULTING_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerPATCHABLE_OP(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI);
@@ -92,6 +92,8 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+ void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+
// Helper function that emits the XRay sleds we've collected for a particular
// function.
void EmitXRayTable();
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 78bd2add8c3b..765af67de160 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -17,22 +17,35 @@
//
//===----------------------------------------------------------------------===//
-#include <algorithm>
-
-#include "X86.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86FrameLowering.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
using namespace llvm;
@@ -44,6 +57,7 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
namespace {
+
class X86CallFrameOptimization : public MachineFunctionPass {
public:
X86CallFrameOptimization() : MachineFunctionPass(ID) {}
@@ -53,30 +67,28 @@ public:
private:
// Information we know about a particular call site
struct CallContext {
- CallContext()
- : FrameSetup(nullptr), Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
- MovVector(4, nullptr), NoStackParams(false), UsePush(false) {}
+ CallContext() : FrameSetup(nullptr), MovVector(4, nullptr) {}
// Iterator referring to the frame setup instruction
MachineBasicBlock::iterator FrameSetup;
// Actual call instruction
- MachineInstr *Call;
+ MachineInstr *Call = nullptr;
// A copy of the stack pointer
- MachineInstr *SPCopy;
+ MachineInstr *SPCopy = nullptr;
// The total displacement of all passed parameters
- int64_t ExpectedDist;
+ int64_t ExpectedDist = 0;
// The sequence of movs used to pass the parameters
SmallVector<MachineInstr *, 4> MovVector;
// True if this call site has no stack parameters
- bool NoStackParams;
+ bool NoStackParams = false;
// True if this call site can use push instructions
- bool UsePush;
+ bool UsePush = false;
};
typedef SmallVector<CallContext, 8> ContextVector;
@@ -102,7 +114,7 @@ private:
StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
- const TargetInstrInfo *TII;
+ const X86InstrInfo *TII;
const X86FrameLowering *TFL;
const X86Subtarget *STI;
MachineRegisterInfo *MRI;
@@ -112,11 +124,8 @@ private:
};
char X86CallFrameOptimization::ID = 0;
-} // end anonymous namespace
-FunctionPass *llvm::createX86CallFrameOptimization() {
- return new X86CallFrameOptimization();
-}
+} // end anonymous namespace
// This checks whether the transformation is legal.
// Also returns false in cases where it's potentially legal, but
@@ -322,7 +331,6 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// transformation.
const X86RegisterInfo &RegInfo =
*static_cast<const X86RegisterInfo *>(STI->getRegisterInfo());
- unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
// We expect to enter this at the beginning of a call sequence
assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
@@ -331,8 +339,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
// How much do we adjust the stack? This puts an upper bound on
// the number of parameters actually passed on it.
- unsigned int MaxAdjust =
- FrameSetup->getOperand(0).getImm() >> Log2SlotSize;
+ unsigned int MaxAdjust = TII->getFrameSize(*FrameSetup) >> Log2SlotSize;
// A zero adjustment means no stack parameters
if (!MaxAdjust) {
@@ -425,7 +432,7 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
return;
Context.Call = &*I;
- if ((++I)->getOpcode() != FrameDestroyOpcode)
+ if ((++I)->getOpcode() != TII->getCallFrameDestroyOpcode())
return;
// Now, go through the vector, and see that we don't have any gaps,
@@ -455,7 +462,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
// PEI will end up finalizing the handling of this.
MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
MachineBasicBlock &MBB = *(FrameSetup->getParent());
- FrameSetup->getOperand(1).setImm(Context.ExpectedDist);
+ TII->setFrameAdjustment(*FrameSetup, Context.ExpectedDist);
DebugLoc DL = FrameSetup->getDebugLoc();
bool Is64Bit = STI->is64Bit();
@@ -482,11 +489,10 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
if (isInt<8>(Val))
PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
}
- Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
- .addOperand(PushOp);
+ Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
break;
case X86::MOV32mr:
- case X86::MOV64mr:
+ case X86::MOV64mr: {
unsigned int Reg = PushOp.getReg();
// If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg
@@ -496,9 +502,9 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
Reg = MRI->createVirtualRegister(&X86::GR64RegClass);
BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg);
BuildMI(MBB, Context.Call, DL, TII->get(X86::INSERT_SUBREG), Reg)
- .addReg(UndefReg)
- .addOperand(PushOp)
- .addImm(X86::sub_32bit);
+ .addReg(UndefReg)
+ .add(PushOp)
+ .addImm(X86::sub_32bit);
}
// If PUSHrmm is not slow on this target, try to fold the source of the
@@ -525,6 +531,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
}
break;
}
+ }
// For debugging, when using SP-based CFA, we need to adjust the CFA
// offset after each push.
@@ -584,3 +591,7 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
return &DefMI;
}
+
+FunctionPass *llvm::createX86CallFrameOptimization() {
+ return new X86CallFrameOptimization();
+}
diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp
index 5ae4962378d3..137ef166aaeb 100644
--- a/lib/Target/X86/X86CallLowering.cpp
+++ b/lib/Target/X86/X86CallLowering.cpp
@@ -14,12 +14,20 @@
//===----------------------------------------------------------------------===//
#include "X86CallLowering.h"
+#include "X86CallingConv.h"
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
+#include "X86TargetMachine.h"
+
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+#include "X86GenCallingConv.inc"
+
#ifndef LLVM_BUILD_GLOBAL_ISEL
#error "This shouldn't be built without GISel"
#endif
@@ -27,20 +35,183 @@ using namespace llvm;
X86CallLowering::X86CallLowering(const X86TargetLowering &TLI)
: CallLowering(&TLI) {}
+void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL,
+ MachineRegisterInfo &MRI,
+ SplitArgTy PerformArgSplit) const {
+
+ const X86TargetLowering &TLI = *getTLI<X86TargetLowering>();
+ LLVMContext &Context = OrigArg.Ty->getContext();
+ EVT VT = TLI.getValueType(DL, OrigArg.Ty);
+ unsigned NumParts = TLI.getNumRegisters(Context, VT);
+
+ if (NumParts == 1) {
+ // replace the original type ( pointer -> GPR ).
+ SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context),
+ OrigArg.Flags, OrigArg.IsFixed);
+ return;
+ }
+
+ SmallVector<uint64_t, 4> BitOffsets;
+ SmallVector<unsigned, 8> SplitRegs;
+
+ EVT PartVT = TLI.getRegisterType(Context, VT);
+ Type *PartTy = PartVT.getTypeForEVT(Context);
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ ArgInfo Info =
+ ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)),
+ PartTy, OrigArg.Flags};
+ SplitArgs.push_back(Info);
+ PerformArgSplit(Info.Reg, PartVT.getSizeInBits() * i);
+ }
+}
+
+namespace {
+struct FuncReturnHandler : public CallLowering::ValueHandler {
+ FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB, CCAssignFn *AssignFn)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
+
+ unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+ llvm_unreachable("Don't know how to get a stack address yet");
+ }
+
+ void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ CCValAssign &VA) override {
+ MIB.addUse(PhysReg, RegState::Implicit);
+ unsigned ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
+ }
+
+ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ llvm_unreachable("Don't know how to assign a value to an address yet");
+ }
+
+ MachineInstrBuilder &MIB;
+};
+} // End anonymous namespace.
+
bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, unsigned VReg) const {
- // TODO: handle functions returning non-void values.
- if (Val)
- return false;
- MIRBuilder.buildInstr(X86::RET).addImm(0);
+ assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
+
+ auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0);
+
+ if (VReg) {
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto &DL = MF.getDataLayout();
+ const Function &F = *MF.getFunction();
+
+ ArgInfo OrigArg{VReg, Val->getType()};
+ setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F);
+
+ SmallVector<ArgInfo, 8> SplitArgs;
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ [&](unsigned Reg, uint64_t Offset) {
+ MIRBuilder.buildExtract(Reg, VReg, Offset);
+ });
+ FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ return false;
+ }
+
+ MIRBuilder.insertInstr(MIB);
return true;
}
+namespace {
+struct FormalArgHandler : public CallLowering::ValueHandler {
+ FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ CCAssignFn *AssignFn, const DataLayout &DL)
+ : ValueHandler(MIRBuilder, MRI, AssignFn), DL(DL) {}
+
+ unsigned getStackAddress(uint64_t Size, int64_t Offset,
+ MachinePointerInfo &MPO) override {
+
+ auto &MFI = MIRBuilder.getMF().getFrameInfo();
+ int FI = MFI.CreateFixedObject(Size, Offset, true);
+ MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+
+ unsigned AddrReg = MRI.createGenericVirtualRegister(
+ LLT::pointer(0, DL.getPointerSizeInBits(0)));
+ MIRBuilder.buildFrameIndex(AddrReg, FI);
+ return AddrReg;
+ }
+
+ void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+
+ auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+ MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
+ 0);
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ }
+
+ void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+ CCValAssign &VA) override {
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ }
+
+ const DataLayout &DL;
+};
+} // namespace
+
bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<unsigned> VRegs) const {
- // TODO: handle functions with one or more arguments.
- return F.arg_empty();
+ if (F.arg_empty())
+ return true;
+
+ // TODO: handle variadic function
+ if (F.isVarArg())
+ return false;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto DL = MF.getDataLayout();
+
+ SmallVector<ArgInfo, 8> SplitArgs;
+ unsigned Idx = 0;
+ for (auto &Arg : F.args()) {
+ ArgInfo OrigArg(VRegs[Idx], Arg.getType());
+ setArgFlags(OrigArg, Idx + 1, DL, F);
+ LLT Ty = MRI.getType(VRegs[Idx]);
+ unsigned Dst = VRegs[Idx];
+ bool Split = false;
+ splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+ [&](unsigned Reg, uint64_t Offset) {
+ if (!Split) {
+ Split = true;
+ Dst = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildUndef(Dst);
+ }
+ unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
+ MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
+ Dst = Tmp;
+ });
+ if (Dst != VRegs[Idx])
+ MIRBuilder.buildCopy(VRegs[Idx], Dst);
+ Idx++;
+ }
+
+ MachineBasicBlock &MBB = MIRBuilder.getMBB();
+ if (!MBB.empty())
+ MIRBuilder.setInstr(*MBB.begin());
+
+ FormalArgHandler Handler(MIRBuilder, MRI, CC_X86, DL);
+ if (!handleAssignments(MIRBuilder, SplitArgs, Handler))
+ return false;
+
+ // Move back to the end of the basic block.
+ MIRBuilder.setMBB(MBB);
+
+ return true;
}
diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h
index f2672f09d855..204e6974c702 100644
--- a/lib/Target/X86/X86CallLowering.h
+++ b/lib/Target/X86/X86CallLowering.h
@@ -34,6 +34,14 @@ public:
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
+private:
+ /// A function of this type is used to perform value split action.
+ typedef std::function<void(unsigned, uint64_t)> SplitArgTy;
+
+ void splitToValueTypes(const ArgInfo &OrigArgInfo,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, MachineRegisterInfo &MRI,
+ SplitArgTy SplitArg) const;
};
} // End of namespace llvm;
#endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index cf7bc981b8a5..6781d761a1c4 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -1074,6 +1074,8 @@ def CSR_32_AllRegs_AVX512 : CalleeSavedRegs<(add CSR_32_AllRegs,
(sequence "K%u", 0, 7))>;
def CSR_64_AllRegs : CalleeSavedRegs<(add CSR_64_MostRegs, RAX)>;
+def CSR_64_AllRegs_NoSSE : CalleeSavedRegs<(add RAX, RBX, RCX, RDX, RSI, RDI, R8, R9,
+ R10, R11, R12, R13, R14, R15, RBP)>;
def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX,
(sequence "YMM%u", 0, 15)),
(sequence "XMM%u", 0, 15))>;
diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp
index bdd1ab537bb2..6472bbbc9016 100755
--- a/lib/Target/X86/X86EvexToVex.cpp
+++ b/lib/Target/X86/X86EvexToVex.cpp
@@ -20,16 +20,30 @@
//===---------------------------------------------------------------------===//
#include "InstPrinter/X86InstComments.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86.h"
-#include "X86InstrBuilder.h"
#include "X86InstrInfo.h"
-#include "X86InstrTablesInfo.h"
-#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
+// Including the generated EVEX2VEX tables.
+struct X86EvexToVexCompressTableEntry {
+ uint16_t EvexOpcode;
+ uint16_t VexOpcode;
+};
+#include "X86GenEVEX2VEXTables.inc"
+
#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
#define EVEX2VEX_NAME "x86-evex-to-vex-compress"
@@ -56,8 +70,6 @@ class EvexToVexInstPass : public MachineFunctionPass {
public:
static char ID;
- StringRef getPassName() const override { return EVEX2VEX_DESC; }
-
EvexToVexInstPass() : MachineFunctionPass(ID) {
initializeEvexToVexInstPassPass(*PassRegistry::getPassRegistry());
@@ -72,6 +84,8 @@ public:
}
}
+ StringRef getPassName() const override { return EVEX2VEX_DESC; }
+
/// Loop over all of the basic blocks, replacing EVEX instructions
/// by equivalent VEX instructions when possible for reducing code size.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -88,13 +102,8 @@ private:
};
char EvexToVexInstPass::ID = 0;
-}
-INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
-
-FunctionPass *llvm::createX86EvexToVexInsts() {
- return new EvexToVexInstPass();
-}
+} // end anonymous namespace
bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
@@ -125,7 +134,6 @@ void EvexToVexInstPass::AddTableEntry(EvexToVexTableType &EvexToVexTable,
// For EVEX instructions that can be encoded using VEX encoding
// replace them by the VEX encoding in order to reduce size.
bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
-
// VEX format.
// # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1
// [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM]
@@ -211,3 +219,9 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
MI.setAsmPrinterFlag(AC_EVEX_2_VEX);
return true;
}
+
+INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
+
+FunctionPass *llvm::createX86EvexToVexInsts() {
+ return new EvexToVexInstPass();
+}
diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp
index 985acf92a2d4..5dfd95f71301 100644
--- a/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/lib/Target/X86/X86ExpandPseudo.cpp
@@ -77,9 +77,11 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
default:
return false;
case X86::TCRETURNdi:
+ case X86::TCRETURNdicc:
case X86::TCRETURNri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
+ case X86::TCRETURNdi64cc:
case X86::TCRETURNri64:
case X86::TCRETURNmi64: {
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
@@ -97,6 +99,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Offset = StackAdj - MaxTCDelta;
assert(Offset >= 0 && "Offset should never be negative");
+ if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
+ assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
+ }
+
if (Offset) {
// Check for possible merge with preceding ADD instruction.
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
@@ -105,12 +111,22 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Jump to label or value in register.
bool IsWin64 = STI->isTargetWin64();
- if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
+ if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
+ Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
unsigned Op;
switch (Opcode) {
case X86::TCRETURNdi:
Op = X86::TAILJMPd;
break;
+ case X86::TCRETURNdicc:
+ Op = X86::TAILJMPd_CC;
+ break;
+ case X86::TCRETURNdi64cc:
+ assert(!MBB.getParent()->hasWinCFI() &&
+ "Conditional tail calls confuse "
+ "the Win64 unwinder.");
+ Op = X86::TAILJMPd64_CC;
+ break;
default:
// Note: Win64 uses REX prefixes indirect jumps out of functions, but
// not direct ones.
@@ -126,13 +142,17 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
JumpTarget.getTargetFlags());
}
+ if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
+ MIB.addImm(MBBI->getOperand(2).getImm());
+ }
+
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
unsigned Op = (Opcode == X86::TCRETURNmi)
? X86::TAILJMPm
: (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
for (unsigned i = 0; i != 5; ++i)
- MIB.addOperand(MBBI->getOperand(i));
+ MIB.add(MBBI->getOperand(i));
} else if (Opcode == X86::TCRETURNri64) {
BuildMI(MBB, MBBI, DL,
TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
@@ -195,7 +215,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL));
}
for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
- MIB.addOperand(MBBI->getOperand(I));
+ MIB.add(MBBI->getOperand(I));
MBB.erase(MBBI);
return true;
}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index c890fdd1e519..036f5d2610e4 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -367,6 +367,10 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return false;
+ LLVM_FALLTHROUGH;
case MVT::i8:
Opc = X86::MOV8rm;
RC = &X86::GR8RegClass;
@@ -524,6 +528,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
X86AddressMode &AM,
MachineMemOperand *MMO, bool Aligned) {
+ bool HasSSE1 = Subtarget->hasSSE1();
bool HasSSE2 = Subtarget->hasSSE2();
bool HasSSE4A = Subtarget->hasSSE4A();
bool HasAVX = Subtarget->hasAVX();
@@ -537,6 +542,16 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
case MVT::f80: // No f80 support yet.
default: return false;
case MVT::i1: {
+ // In case ValReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ValReg) == &X86::VK1RegClass) {
+ unsigned KValReg = ValReg;
+ ValReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ValReg)
+ .addReg(KValReg);
+ ValReg = fastEmitInst_extractsubreg(MVT::i8, ValReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -574,6 +589,9 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
} else
Opc = X86::ST_Fp64m;
break;
+ case MVT::x86mmx:
+ Opc = (IsNonTemporal && HasSSE1) ? X86::MMX_MOVNTQmr : X86::MMX_MOVQ64mr;
+ break;
case MVT::v4f32:
if (Aligned) {
if (IsNonTemporal)
@@ -1268,6 +1286,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (SrcVT == MVT::i1) {
if (Outs[0].Flags.isSExt())
return false;
+ // In case SrcReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(SrcReg) == &X86::VK1RegClass) {
+ unsigned KSrcReg = SrcReg;
+ SrcReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg)
+ .addReg(KSrcReg);
+ SrcReg = fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
}
@@ -1559,6 +1587,17 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
// Handle zero-extension from i1 to i8, which is common.
MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT == MVT::i1) {
+ // In case ResultReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ResultReg) == &X86::VK1RegClass) {
+ unsigned KResultReg = ResultReg;
+ ResultReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(KResultReg);
+ ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
+
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
SrcVT = MVT::i8;
@@ -1740,10 +1779,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
unsigned KOpReg = OpReg;
- OpReg = createResultReg(&X86::GR8RegClass);
+ OpReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), OpReg)
.addReg(KOpReg);
+ OpReg = fastEmitInst_extractsubreg(MVT::i8, OpReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(OpReg)
@@ -2084,10 +2125,12 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(&X86::GR8RegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -2297,10 +2340,12 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
// In case OpReg is a K register, COPY to a GPR
if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
unsigned KCondReg = CondReg;
- CondReg = createResultReg(&X86::GR8RegClass);
+ CondReg = createResultReg(&X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), CondReg)
.addReg(KCondReg, getKillRegState(CondIsKill));
+ CondReg = fastEmitInst_extractsubreg(MVT::i8, CondReg, /*Kill=*/true,
+ X86::sub_8bit);
}
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
.addReg(CondReg, getKillRegState(CondIsKill))
@@ -2423,12 +2468,22 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
if (OpReg == 0)
return false;
+ unsigned ImplicitDefReg;
+ if (Subtarget->hasAVX()) {
+ ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+
+ }
+
unsigned ResultReg = createResultReg(RC);
MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
ResultReg);
+
if (Subtarget->hasAVX())
- MIB.addReg(OpReg);
+ MIB.addReg(ImplicitDefReg);
+
MIB.addReg(OpReg);
updateValueMap(I, ResultReg);
return true;
@@ -2461,7 +2516,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
EVT DstVT = TLI.getValueType(DL, I->getType());
// This code only handles truncation to byte.
- if (DstVT != MVT::i8 && DstVT != MVT::i1)
+ // TODO: Support truncate to i1 with AVX512.
+ if (DstVT != MVT::i8 && (DstVT != MVT::i1 || Subtarget->hasAVX512()))
return false;
if (!TLI.isTypeLegal(SrcVT))
return false;
@@ -3105,8 +3161,8 @@ static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
return 0;
if (CS)
- if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
- CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
+ if (CS->arg_empty() || !CS->paramHasAttr(0, Attribute::StructRet) ||
+ CS->paramHasAttr(0, Attribute::InReg) || Subtarget->isTargetMCU())
return 0;
return 4;
@@ -3266,6 +3322,16 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Handle zero-extension from i1 to i8, which is common.
if (ArgVT == MVT::i1) {
+ // In case SrcReg is a K register, COPY to a GPR
+ if (MRI.getRegClass(ArgReg) == &X86::VK1RegClass) {
+ unsigned KArgReg = ArgReg;
+ ArgReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg)
+ .addReg(KArgReg);
+ ArgReg = fastEmitInst_extractsubreg(MVT::i8, ArgReg, /*Kill=*/true,
+ X86::sub_8bit);
+ }
// Set the high bits to zero.
ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
ArgVT = MVT::i8;
@@ -3463,6 +3529,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CCValAssign &VA = RVLocs[i];
EVT CopyVT = VA.getValVT();
unsigned CopyReg = ResultReg + i;
+ unsigned SrcReg = VA.getLocReg();
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
@@ -3470,9 +3537,19 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
report_fatal_error("SSE register return with SSE disabled");
}
+ // If the return value is an i1 and AVX-512 is enabled, we need
+ // to do a fixup to make the copy legal.
+ if (CopyVT == MVT::i1 && SrcReg == X86::AL && Subtarget->hasAVX512()) {
+ // Need to copy to a GR32 first.
+ // TODO: MOVZX isn't great here. We don't care about the upper bits.
+ SrcReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(X86::MOVZX32rr8), SrcReg).addReg(X86::AL);
+ }
+
// If we prefer to use the value in xmm registers, copy it out as f80 and
// use a truncate to move it from fp stack reg to xmm reg.
- if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
+ if ((SrcReg == X86::FP0 || SrcReg == X86::FP1) &&
isScalarFPTypeInSSEReg(VA.getValVT())) {
CopyVT = MVT::f80;
CopyReg = createResultReg(&X86::RFP80RegClass);
@@ -3480,7 +3557,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Copy out the result.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(SrcReg);
InRegs.push_back(VA.getLocReg());
// Round the f80 to the right size, which also moves it to the appropriate
@@ -3601,6 +3678,13 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
case MVT::i1:
+ if (Subtarget->hasAVX512()) {
+ // Need to copy to a VK1 register.
+ unsigned ResultReg = createResultReg(&X86::VK1RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg).addReg(SrcReg);
+ return ResultReg;
+ }
case MVT::i8:
return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
X86::sub_8bit);
@@ -3622,7 +3706,12 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
unsigned Opc = 0;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
- case MVT::i1: VT = MVT::i8; LLVM_FALLTHROUGH;
+ case MVT::i1:
+ // TODO: Support this properly.
+ if (Subtarget->hasAVX512())
+ return 0;
+ VT = MVT::i8;
+ LLVM_FALLTHROUGH;
case MVT::i8: Opc = X86::MOV8ri; break;
case MVT::i16: Opc = X86::MOV16ri; break;
case MVT::i32: Opc = X86::MOV32ri; break;
diff --git a/lib/Target/X86/X86FixupBWInsts.cpp b/lib/Target/X86/X86FixupBWInsts.cpp
index 8bde4bf98d66..c28746f96439 100644
--- a/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/lib/Target/X86/X86FixupBWInsts.cpp
@@ -95,10 +95,9 @@ class FixupBWInstPass : public MachineFunctionPass {
// Change the MachineInstr \p MI into an eqivalent 32 bit instruction if
// possible. Return the replacement instruction if OK, return nullptr
- // otherwise. Set WasCandidate to true or false depending on whether the
- // MI was a candidate for this sort of transformation.
- MachineInstr *tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB,
- bool &WasCandidate) const;
+ // otherwise.
+ MachineInstr *tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB) const;
+
public:
static char ID;
@@ -226,7 +225,7 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
unsigned NumArgs = MI->getNumOperands();
for (unsigned i = 1; i < NumArgs; ++i)
- MIB.addOperand(MI->getOperand(i));
+ MIB.add(MI->getOperand(i));
MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
@@ -264,17 +263,13 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
// Drop imp-defs/uses that would be redundant with the new def/use.
for (auto &Op : MI->implicit_operands())
if (Op.getReg() != (Op.isDef() ? NewDestReg : NewSrcReg))
- MIB.addOperand(Op);
+ MIB.add(Op);
return MIB;
}
-MachineInstr *FixupBWInstPass::tryReplaceInstr(
- MachineInstr *MI, MachineBasicBlock &MBB,
- bool &WasCandidate) const {
- MachineInstr *NewMI = nullptr;
- WasCandidate = false;
-
+MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI,
+ MachineBasicBlock &MBB) const {
// See if this is an instruction of the type we are currently looking for.
switch (MI->getOpcode()) {
@@ -282,12 +277,9 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(
// Only replace 8 bit loads with the zero extending versions if
// in an inner most loop and not optimizing for size. This takes
// an extra byte to encode, and provides limited performance upside.
- if (MachineLoop *ML = MLI->getLoopFor(&MBB)) {
- if (ML->begin() == ML->end() && !OptForSize) {
- NewMI = tryReplaceLoad(X86::MOVZX32rm8, MI);
- WasCandidate = true;
- }
- }
+ if (MachineLoop *ML = MLI->getLoopFor(&MBB))
+ if (ML->begin() == ML->end() && !OptForSize)
+ return tryReplaceLoad(X86::MOVZX32rm8, MI);
break;
case X86::MOV16rm:
@@ -295,9 +287,7 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(
// Code size is the same, and there is sometimes a perf advantage
// from eliminating a false dependence on the upper portion of
// the register.
- NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
- WasCandidate = true;
- break;
+ return tryReplaceLoad(X86::MOVZX32rm16, MI);
case X86::MOV8rr:
case X86::MOV16rr:
@@ -305,16 +295,14 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(
// Code size is either less (16) or equal (8), and there is sometimes a
// perf advantage from eliminating a false dependence on the upper portion
// of the register.
- NewMI = tryReplaceCopy(MI);
- WasCandidate = true;
- break;
+ return tryReplaceCopy(MI);
default:
// nothing to do here.
break;
}
- return NewMI;
+ return nullptr;
}
void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
@@ -338,18 +326,11 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
// We run after PEI, so we need to AddPristinesAndCSRs.
LiveRegs.addLiveOuts(MBB);
- bool WasCandidate = false;
-
for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) {
MachineInstr *MI = &*I;
- MachineInstr *NewMI = tryReplaceInstr(MI, MBB, WasCandidate);
-
- // Add this to replacements if it was a candidate, even if NewMI is
- // nullptr. We will revisit that in a bit.
- if (WasCandidate) {
+ if (MachineInstr *NewMI = tryReplaceInstr(MI, MBB))
MIReplacements.push_back(std::make_pair(MI, NewMI));
- }
// We're done with this instruction, update liveness for the next one.
LiveRegs.stepBackward(*MI);
@@ -359,9 +340,7 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
MachineInstr *MI = MIReplacements.back().first;
MachineInstr *NewMI = MIReplacements.back().second;
MIReplacements.pop_back();
- if (NewMI) {
- MBB.insert(MI, NewMI);
- MBB.erase(MI);
- }
+ MBB.insert(MI, NewMI);
+ MBB.erase(MI);
}
}
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 12095917ca30..2cd4c1a3e7b3 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -120,8 +120,8 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
BuildMI(*MF, MI.getDebugLoc(),
TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
: X86::LEA64r))
- .addOperand(Dest)
- .addOperand(Src)
+ .add(Dest)
+ .add(Src)
.addImm(1)
.addReg(0)
.addImm(0)
@@ -287,8 +287,8 @@ bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
MachineInstr *NewMI =
BuildMI(*MFI, I, MI.getDebugLoc(), TII->get(NewOpcode))
- .addOperand(MI.getOperand(0))
- .addOperand(MI.getOperand(1));
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1));
MFI->erase(I);
I = static_cast<MachineBasicBlock::iterator>(NewMI);
return true;
@@ -377,9 +377,9 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
const MachineOperand &Src1 = MI.getOperand(SrcR1 == DstR ? 1 : 3);
const MachineOperand &Src2 = MI.getOperand(SrcR1 == DstR ? 3 : 1);
NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addrr_opcode))
- .addOperand(Dst)
- .addOperand(Src1)
- .addOperand(Src2);
+ .add(Dst)
+ .add(Src1)
+ .add(Src2);
MFI->insert(I, NewMI);
DEBUG(NewMI->dump(););
}
@@ -387,8 +387,8 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
if (MI.getOperand(4).getImm() != 0) {
const MachineOperand &SrcR = MI.getOperand(SrcR1 == DstR ? 1 : 3);
NewMI = BuildMI(*MF, MI.getDebugLoc(), TII->get(addri_opcode))
- .addOperand(Dst)
- .addOperand(SrcR)
+ .add(Dst)
+ .add(SrcR)
.addImm(MI.getOperand(4).getImm());
MFI->insert(I, NewMI);
DEBUG(NewMI->dump(););
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index cd690442bb9f..78e0bca4158e 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -252,40 +252,76 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
int64_t NumBytes, bool InEpilogue) const {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
+ MachineInstr::MIFlag Flag =
+ isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy;
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
- while (Offset) {
- if (Offset > Chunk) {
- // Rather than emit a long series of instructions for large offsets,
- // load the offset into a register and do one sub/add
- unsigned Reg = 0;
+ if (Offset > Chunk) {
+ // Rather than emit a long series of instructions for large offsets,
+ // load the offset into a register and do one sub/add
+ unsigned Reg = 0;
+ unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
- if (isSub && !isEAXLiveIn(MBB))
- Reg = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
+ if (isSub && !isEAXLiveIn(MBB))
+ Reg = Rax;
+ else
+ Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+
+ unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
+ unsigned AddSubRROpc =
+ isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit);
+ if (Reg) {
+ BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg)
+ .addImm(Offset)
+ .setMIFlag(Flag);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
+ .addReg(StackPtr)
+ .addReg(Reg);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ return;
+ } else if (Offset > 8 * Chunk) {
+ // If we would need more than 8 add or sub instructions (a >16GB stack
+ // frame), it's worth spilling RAX to materialize this immediate.
+ // pushq %rax
+ // movabsq +-$Offset+-SlotSize, %rax
+ // addq %rsp, %rax
+ // xchg %rax, (%rsp)
+ // movq (%rsp), %rsp
+ assert(Is64Bit && "can't have 32-bit 16GB stack frame");
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+ .addReg(Rax, RegState::Kill)
+ .setMIFlag(Flag);
+ // Subtract is not commutative, so negate the offset and always use add.
+ // Subtract 8 less and add 8 more to account for the PUSH we just did.
+ if (isSub)
+ Offset = -(Offset - SlotSize);
else
- Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
-
- if (Reg) {
- unsigned Opc = Is64Bit ? X86::MOV64ri : X86::MOV32ri;
- BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
- .addImm(Offset);
- Opc = isSub
- ? getSUBrrOpcode(Is64Bit)
- : getADDrrOpcode(Is64Bit);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
- .addReg(StackPtr)
- .addReg(Reg);
- MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
- Offset = 0;
- continue;
- }
+ Offset = Offset + SlotSize;
+ BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax)
+ .addImm(Offset)
+ .setMIFlag(Flag);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)
+ .addReg(Rax)
+ .addReg(StackPtr);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ // Exchange the new SP in RAX with the top of the stack.
+ addRegOffset(
+ BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax),
+ StackPtr, false, 0);
+ // Load new SP from the top of the stack into RSP.
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr),
+ StackPtr, false, 0);
+ return;
}
+ }
+ while (Offset) {
uint64_t ThisVal = std::min(Offset, Chunk);
- if (ThisVal == (Is64Bit ? 8 : 4)) {
- // Use push / pop instead.
+ if (ThisVal == SlotSize) {
+ // Use push / pop for slot sized adjustments as a size optimization. We
+ // need to find a dead register when using pop.
unsigned Reg = isSub
? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
: findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
@@ -293,23 +329,16 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
unsigned Opc = isSub
? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
: (Is64Bit ? X86::POP64r : X86::POP32r);
- MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
- .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
- if (isSub)
- MI->setFlag(MachineInstr::FrameSetup);
- else
- MI->setFlag(MachineInstr::FrameDestroy);
+ BuildMI(MBB, MBBI, DL, TII.get(Opc))
+ .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub))
+ .setMIFlag(Flag);
Offset -= ThisVal;
continue;
}
}
- MachineInstrBuilder MI = BuildStackAdjustment(
- MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue);
- if (isSub)
- MI.setMIFlag(MachineInstr::FrameSetup);
- else
- MI.setMIFlag(MachineInstr::FrameDestroy);
+ BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue)
+ .setMIFlag(Flag);
Offset -= ThisVal;
}
@@ -959,6 +988,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.getValueAsString()
.getAsInteger(0, StackProbeSize);
+ // Re-align the stack on 64-bit if the x86-interrupt calling convention is
+ // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
+ // stack alignment.
+ if (Fn->getCallingConv() == CallingConv::X86_INTR && Is64Bit &&
+ Fn->arg_size() == 2) {
+ StackSize += 8;
+ MFI.setStackSize(StackSize);
+ emitSPUpdate(MBB, MBBI, -8, /*InEpilogue=*/false);
+ }
+
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
@@ -2587,8 +2626,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
DebugLoc DL = I->getDebugLoc();
- uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
- uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
+ uint64_t Amount = !reserveCallFrame ? TII.getFrameSize(*I) : 0;
+ uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0;
I = MBB.erase(I);
auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index e1b04d6dc300..863dc8b22968 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -20,6 +20,7 @@ namespace llvm {
class MachineInstrBuilder;
class MCCFIInstruction;
+class X86InstrInfo;
class X86Subtarget;
class X86RegisterInfo;
@@ -30,7 +31,7 @@ public:
// Cached subtarget predicates.
const X86Subtarget &STI;
- const TargetInstrInfo &TII;
+ const X86InstrInfo &TII;
const X86RegisterInfo *TRI;
unsigned SlotSize;
diff --git a/lib/Target/X86/X86GenRegisterBankInfo.def b/lib/Target/X86/X86GenRegisterBankInfo.def
new file mode 100644
index 000000000000..06be142432f7
--- /dev/null
+++ b/lib/Target/X86/X86GenRegisterBankInfo.def
@@ -0,0 +1,104 @@
+//===- X86GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines all the static objects used by X86RegisterBankInfo.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
+RegisterBankInfo::PartialMapping X86GenRegisterBankInfo::PartMappings[]{
+ /* StartIdx, Length, RegBank */
+ // GPR value
+ {0, 8, X86::GPRRegBank}, // :0
+ {0, 16, X86::GPRRegBank}, // :1
+ {0, 32, X86::GPRRegBank}, // :2
+ {0, 64, X86::GPRRegBank}, // :3
+ // FR32/64 , xmm registers
+ {0, 32, X86::VECRRegBank}, // :4
+ {0, 64, X86::VECRRegBank}, // :5
+ // VR128/256/512
+ {0, 128, X86::VECRRegBank}, // :6
+ {0, 256, X86::VECRRegBank}, // :7
+ {0, 512, X86::VECRRegBank}, // :8
+};
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+
+#ifdef GET_TARGET_REGBANK_INFO_CLASS
+enum PartialMappingIdx {
+ PMI_None = -1,
+ PMI_GPR8,
+ PMI_GPR16,
+ PMI_GPR32,
+ PMI_GPR64,
+ PMI_FP32,
+ PMI_FP64,
+ PMI_VEC128,
+ PMI_VEC256,
+ PMI_VEC512
+};
+#endif // GET_TARGET_REGBANK_INFO_CLASS
+
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
+#define INSTR_3OP(INFO) INFO, INFO, INFO,
+#define BREAKDOWN(INDEX, NUM) \
+ { &X86GenRegisterBankInfo::PartMappings[INDEX], NUM }
+// ValueMappings.
+RegisterBankInfo::ValueMapping X86GenRegisterBankInfo::ValMappings[]{
+ /* BreakDown, NumBreakDowns */
+ // 3-operands instructions (all binary operations should end up with one of
+ // those mapping).
+ INSTR_3OP(BREAKDOWN(PMI_GPR8, 1)) // 0: GPR_8
+ INSTR_3OP(BREAKDOWN(PMI_GPR16, 1)) // 3: GPR_16
+ INSTR_3OP(BREAKDOWN(PMI_GPR32, 1)) // 6: GPR_32
+ INSTR_3OP(BREAKDOWN(PMI_GPR64, 1)) // 9: GPR_64
+ INSTR_3OP(BREAKDOWN(PMI_FP32, 1)) // 12: Fp32
+ INSTR_3OP(BREAKDOWN(PMI_FP64, 1)) // 15: Fp64
+ INSTR_3OP(BREAKDOWN(PMI_VEC128, 1)) // 18: Vec128
+ INSTR_3OP(BREAKDOWN(PMI_VEC256, 1)) // 21: Vec256
+ INSTR_3OP(BREAKDOWN(PMI_VEC512, 1)) // 24: Vec512
+};
+#undef INSTR_3OP
+#undef BREAKDOWN
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+
+#ifdef GET_TARGET_REGBANK_INFO_CLASS
+enum ValueMappingIdx {
+ VMI_None = -1,
+ VMI_3OpsGpr8Idx = PMI_GPR8 * 3,
+ VMI_3OpsGpr16Idx = PMI_GPR16 * 3,
+ VMI_3OpsGpr32Idx = PMI_GPR32 * 3,
+ VMI_3OpsGpr64Idx = PMI_GPR64 * 3,
+ VMI_3OpsFp32Idx = PMI_FP32 * 3,
+ VMI_3OpsFp64Idx = PMI_FP64 * 3,
+ VMI_3OpsVec128Idx = PMI_VEC128 * 3,
+ VMI_3OpsVec256Idx = PMI_VEC256 * 3,
+ VMI_3OpsVec512Idx = PMI_VEC512 * 3,
+};
+#undef GET_TARGET_REGBANK_INFO_CLASS
+#endif // GET_TARGET_REGBANK_INFO_CLASS
+
+#ifdef GET_TARGET_REGBANK_INFO_IMPL
+#undef GET_TARGET_REGBANK_INFO_IMPL
+const RegisterBankInfo::ValueMapping *
+X86GenRegisterBankInfo::getValueMapping(PartialMappingIdx Idx,
+ unsigned NumOperands) {
+
+ // We can use VMI_3Ops Mapping for all the cases.
+ if (NumOperands <= 3 && (Idx >= PMI_GPR8 && Idx <= PMI_VEC512))
+ return &ValMappings[(unsigned)Idx * 3];
+
+ llvm_unreachable("Unsupported PartialMappingIdx.");
+}
+
+#endif // GET_TARGET_REGBANK_INFO_IMPL
+
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 8ab4c0616880..eb5c56ff2ff9 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -188,7 +188,6 @@ namespace {
private:
void Select(SDNode *N) override;
- bool tryGather(SDNode *N, unsigned Opc);
bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
@@ -384,6 +383,16 @@ namespace {
bool ComplexPatternFuncMutatesDAG() const override {
return true;
}
+
+ bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const;
+
+ /// Returns whether this is a relocatable immediate in the range
+ /// [-2^Width .. 2^Width-1].
+ template <unsigned Width> bool isSExtRelocImm(SDNode *N) const {
+ if (auto *CN = dyn_cast<ConstantSDNode>(N))
+ return isInt<Width>(CN->getSExtValue());
+ return isSExtAbsoluteSymbolRef(Width, N);
+ }
};
}
@@ -709,7 +718,8 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
- Subtarget->isTargetGlibc())
+ (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
+ Subtarget->isTargetFuchsia()))
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
@@ -1325,8 +1335,8 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
AM.Scale = 1;
// Insert the new nodes into the topological ordering.
- insertDAGNode(*CurDAG, N, Zero);
- insertDAGNode(*CurDAG, N, Neg);
+ insertDAGNode(*CurDAG, Handle.getValue(), Zero);
+ insertDAGNode(*CurDAG, Handle.getValue(), Neg);
return false;
}
@@ -1789,6 +1799,21 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
}
+bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const {
+ if (N->getOpcode() == ISD::TRUNCATE)
+ N = N->getOperand(0).getNode();
+ if (N->getOpcode() != X86ISD::Wrapper)
+ return false;
+
+ auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0));
+ if (!GA)
+ return false;
+
+ Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
+ return CR && CR->getSignedMin().sge(-1ull << Width) &&
+ CR->getSignedMax().slt(1ull << Width);
+}
+
/// Test whether the given X86ISD::CMP node has any uses which require the SF
/// or OF bits to be accurate.
static bool hasNoSignedComparisonUses(SDNode *N) {
@@ -1905,6 +1930,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
SDValue Op = Chain.getOperand(i);
if (Op == Load.getValue(1)) {
ChainCheck = true;
+ // Drop Load, but keep its chain. No cycle check necessary.
+ ChainOps.push_back(Load.getOperand(0));
continue;
}
@@ -1954,39 +1981,6 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
llvm_unreachable("unrecognized size for LdVT");
}
-/// Customized ISel for GATHER operations.
-bool X86DAGToDAGISel::tryGather(SDNode *Node, unsigned Opc) {
- // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
- SDValue Chain = Node->getOperand(0);
- SDValue VSrc = Node->getOperand(2);
- SDValue Base = Node->getOperand(3);
- SDValue VIdx = Node->getOperand(4);
- SDValue VMask = Node->getOperand(5);
- ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
- if (!Scale)
- return false;
-
- SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
- MVT::Other);
-
- SDLoc DL(Node);
-
- // Memory Operands: Base, Scale, Index, Disp, Segment
- SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
- SDValue Segment = CurDAG->getRegister(0, MVT::i32);
- const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx,
- Disp, Segment, VMask, Chain};
- SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
- // Node has 2 outputs: VDst and MVT::Other.
- // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
- // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
- // of ResNode.
- ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
- ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
- CurDAG->RemoveDeadNode(Node);
- return true;
-}
-
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opc, MOpc;
@@ -2024,55 +2018,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- switch (IntNo) {
- default: break;
- case Intrinsic::x86_avx2_gather_d_pd:
- case Intrinsic::x86_avx2_gather_d_pd_256:
- case Intrinsic::x86_avx2_gather_q_pd:
- case Intrinsic::x86_avx2_gather_q_pd_256:
- case Intrinsic::x86_avx2_gather_d_ps:
- case Intrinsic::x86_avx2_gather_d_ps_256:
- case Intrinsic::x86_avx2_gather_q_ps:
- case Intrinsic::x86_avx2_gather_q_ps_256:
- case Intrinsic::x86_avx2_gather_d_q:
- case Intrinsic::x86_avx2_gather_d_q_256:
- case Intrinsic::x86_avx2_gather_q_q:
- case Intrinsic::x86_avx2_gather_q_q_256:
- case Intrinsic::x86_avx2_gather_d_d:
- case Intrinsic::x86_avx2_gather_d_d_256:
- case Intrinsic::x86_avx2_gather_q_d:
- case Intrinsic::x86_avx2_gather_q_d_256: {
- if (!Subtarget->hasAVX2())
- break;
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic");
- case Intrinsic::x86_avx2_gather_d_pd: Opc = X86::VGATHERDPDrm; break;
- case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
- case Intrinsic::x86_avx2_gather_q_pd: Opc = X86::VGATHERQPDrm; break;
- case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
- case Intrinsic::x86_avx2_gather_d_ps: Opc = X86::VGATHERDPSrm; break;
- case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
- case Intrinsic::x86_avx2_gather_q_ps: Opc = X86::VGATHERQPSrm; break;
- case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
- case Intrinsic::x86_avx2_gather_d_q: Opc = X86::VPGATHERDQrm; break;
- case Intrinsic::x86_avx2_gather_d_q_256: Opc = X86::VPGATHERDQYrm; break;
- case Intrinsic::x86_avx2_gather_q_q: Opc = X86::VPGATHERQQrm; break;
- case Intrinsic::x86_avx2_gather_q_q_256: Opc = X86::VPGATHERQQYrm; break;
- case Intrinsic::x86_avx2_gather_d_d: Opc = X86::VPGATHERDDrm; break;
- case Intrinsic::x86_avx2_gather_d_d_256: Opc = X86::VPGATHERDDYrm; break;
- case Intrinsic::x86_avx2_gather_q_d: Opc = X86::VPGATHERQDrm; break;
- case Intrinsic::x86_avx2_gather_q_d_256: Opc = X86::VPGATHERQDYrm; break;
- }
- if (tryGather(Node, Opc))
- return;
- break;
- }
- }
- break;
- }
case X86ISD::GlobalBaseReg:
ReplaceNode(Node, getGlobalBaseReg());
return;
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 08fe2bad281e..7ff483063ec2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -53,6 +53,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
@@ -70,6 +71,13 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
"rather than promotion."),
cl::Hidden);
+static cl::opt<int> ExperimentalPrefLoopAlignment(
+ "x86-experimental-pref-loop-alignment", cl::init(4),
+ cl::desc("Sets the preferable loop alignment for experiments "
+ "(the last x86-experimental-pref-loop-alignment bits"
+ " of the loop header PC will be 0)."),
+ cl::Hidden);
+
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -427,7 +435,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ExternalSymbol , VT, Custom);
setOperationAction(ISD::BlockAddress , VT, Custom);
}
- // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+
+ // 64-bit shl, sra, srl (iff 32-bit x86)
for (auto VT : { MVT::i32, MVT::i64 }) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
@@ -782,6 +791,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -888,6 +898,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
+ setOperationAction(ISD::ABS, MVT::v16i8, Legal);
+ setOperationAction(ISD::ABS, MVT::v8i16, Legal);
+ setOperationAction(ISD::ABS, MVT::v4i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
@@ -922,6 +935,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// SSE41 brings specific instructions for doing vector sign extend even in
// cases where we don't have SRA.
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
+
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
@@ -1065,6 +1086,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
+ setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
@@ -1126,7 +1148,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
}
@@ -1271,6 +1293,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasVLX()) {
+ setOperationAction(ISD::ABS, MVT::v4i64, Legal);
+ setOperationAction(ISD::ABS, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1357,16 +1381,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
- setOperationAction(ISD::ADD, MVT::v8i1, Expand);
- setOperationAction(ISD::ADD, MVT::v16i1, Expand);
- setOperationAction(ISD::SUB, MVT::v8i1, Expand);
- setOperationAction(ISD::SUB, MVT::v16i1, Expand);
- setOperationAction(ISD::MUL, MVT::v8i1, Expand);
- setOperationAction(ISD::MUL, MVT::v16i1, Expand);
+ setOperationAction(ISD::ADD, MVT::v8i1, Custom);
+ setOperationAction(ISD::ADD, MVT::v16i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v8i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v16i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i1, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -1441,7 +1466,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::MGATHER, VT, Legal);
@@ -1460,12 +1485,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
- setOperationAction(ISD::ADD, MVT::v32i1, Expand);
- setOperationAction(ISD::ADD, MVT::v64i1, Expand);
- setOperationAction(ISD::SUB, MVT::v32i1, Expand);
- setOperationAction(ISD::SUB, MVT::v64i1, Expand);
- setOperationAction(ISD::MUL, MVT::v32i1, Expand);
- setOperationAction(ISD::MUL, MVT::v64i1, Expand);
+ setOperationAction(ISD::ADD, MVT::v32i1, Custom);
+ setOperationAction(ISD::ADD, MVT::v64i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v32i1, Custom);
+ setOperationAction(ISD::SUB, MVT::v64i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v32i1, Custom);
+ setOperationAction(ISD::MUL, MVT::v64i1, Custom);
setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
@@ -1479,8 +1504,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
@@ -1546,6 +1571,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
@@ -1574,9 +1600,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
- setOperationAction(ISD::ADD, VT, Expand);
- setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::MUL, VT, Expand);
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::SUB, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::TRUNCATE, VT, Custom);
@@ -1671,6 +1697,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
@@ -1696,6 +1723,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
+ setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
@@ -1712,7 +1741,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
MaxStoresPerMemmoveOptSize = 4;
- setPrefLoopAlignment(4); // 2^4 bytes.
+ // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
+ setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
// An out-of-order CPU can speculatively execute past a predictable branch,
// but a conditional move could be stalled by an expensive earlier operation.
@@ -1933,6 +1963,34 @@ bool X86TargetLowering::useSoftFloat() const {
return Subtarget.useSoftFloat();
}
+void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
+ ArgListTy &Args) const {
+
+ // Only relabel X86-32 for C / Stdcall CCs.
+ if (Subtarget.is64Bit())
+ return;
+ if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
+ return;
+ unsigned ParamRegs = 0;
+ if (auto *M = MF->getFunction()->getParent())
+ ParamRegs = M->getNumberRegisterParameters();
+
+ // Mark the first N int arguments as having reg
+ for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
+ Type *T = Args[Idx].Ty;
+ if (T->isPointerTy() || T->isIntegerTy())
+ if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
+ unsigned numRegs = 1;
+ if (MF->getDataLayout().getTypeAllocSize(T) > 4)
+ numRegs = 2;
+ if (ParamRegs < numRegs)
+ return;
+ ParamRegs -= numRegs;
+ Args[Idx].IsInReg = true;
+ }
+ }
+}
+
const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
@@ -2001,21 +2059,37 @@ unsigned X86TargetLowering::getAddressSpace() const {
return 256;
}
-Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
- // glibc has a special slot for the stack guard in tcbhead_t, use it instead
- // of the usual global variable (see sysdeps/{i386,x86_64}/nptl/tls.h)
- if (!Subtarget.isTargetGlibc())
- return TargetLowering::getIRStackGuard(IRB);
-
- // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
- // %gs:0x14 on i386
- unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
- unsigned AddressSpace = getAddressSpace();
+static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
+ return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
+ (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
+}
+
+static Constant* SegmentOffset(IRBuilder<> &IRB,
+ unsigned Offset, unsigned AddressSpace) {
return ConstantExpr::getIntToPtr(
ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
}
+Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
+ // glibc, bionic, and Fuchsia have a special slot for the stack guard in
+ // tcbhead_t; use it instead of the usual global variable (see
+ // sysdeps/{i386,x86_64}/nptl/tls.h)
+ if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
+ if (Subtarget.isTargetFuchsia()) {
+ // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
+ return SegmentOffset(IRB, 0x10, getAddressSpace());
+ } else {
+ // %fs:0x28, unless we're using a Kernel code model, in which case
+ // it's %gs:0x28. gs:0x14 on i386.
+ unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
+ return SegmentOffset(IRB, Offset, getAddressSpace());
+ }
+ }
+
+ return TargetLowering::getIRStackGuard(IRB);
+}
+
void X86TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
if (Subtarget.getTargetTriple().isOSMSVCRT()) {
@@ -2027,13 +2101,13 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
auto *SecurityCheckCookie = cast<Function>(
M.getOrInsertFunction("__security_check_cookie",
Type::getVoidTy(M.getContext()),
- Type::getInt8PtrTy(M.getContext()), nullptr));
+ Type::getInt8PtrTy(M.getContext())));
SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
return;
}
- // glibc has a special slot for the stack guard.
- if (Subtarget.isTargetGlibc())
+ // glibc, bionic, and Fuchsia have a special slot for the stack guard.
+ if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
return;
TargetLowering::insertSSPDeclarations(M);
}
@@ -2056,21 +2130,23 @@ Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
if (Subtarget.getTargetTriple().isOSContiki())
return getDefaultSafeStackPointerLocation(IRB, false);
- if (!Subtarget.isTargetAndroid())
- return TargetLowering::getSafeStackPointerLocation(IRB);
-
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
- unsigned AddressSpace, Offset;
+ if (Subtarget.isTargetAndroid()) {
+ // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
+ // %gs:0x24 on i386
+ unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
+ return SegmentOffset(IRB, Offset, getAddressSpace());
+ }
- // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
- // %gs:0x24 on i386
- Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
- AddressSpace = getAddressSpace();
- return ConstantExpr::getIntToPtr(
- ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
- Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
+ // Fuchsia is similar.
+ if (Subtarget.isTargetFuchsia()) {
+ // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
+ return SegmentOffset(IRB, 0x18, getAddressSpace());
+ }
+
+ return TargetLowering::getSafeStackPointerLocation(IRB);
}
bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@@ -2179,6 +2255,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
++I, ++OutsIndex) {
CCValAssign &VA = RVLocs[I];
assert(VA.isRegLoc() && "Can only return in registers!");
+
+ // Add the register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
+
SDValue ValToCopy = OutVals[OutsIndex];
EVT ValVT = ValToCopy.getValueType();
@@ -2253,6 +2334,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(2 == RegsToPass.size() &&
"Expecting two registers after Pass64BitArgInRegs");
+
+ // Add the second register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
} else {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
}
@@ -2309,6 +2394,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// RAX/EAX now acts like a return value.
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
+
+ // Add the returned register to the CalleeSaveDisableRegs list.
+ if (CallConv == CallingConv::X86_RegCall)
+ MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
}
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -2444,7 +2533,7 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
// Convert the i32 type into v32i1 type
Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
- // Concantenate the two values together
+ // Concatenate the two values together
return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
}
@@ -2488,8 +2577,10 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
SDValue X86TargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ uint32_t *RegMask) const {
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget.is64Bit();
@@ -2503,6 +2594,14 @@ SDValue X86TargetLowering::LowerCallResult(
CCValAssign &VA = RVLocs[I];
EVT CopyVT = VA.getLocVT();
+ // In some calling conventions we need to remove the used registers
+ // from the register mask.
+ if (RegMask && CallConv == CallingConv::X86_RegCall) {
+ for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+ }
+
// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
@@ -2669,6 +2768,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
// If value is passed by pointer we have address passed instead of the value
// itself. No need to extend if the mask value and location share the same
@@ -2686,13 +2786,16 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
// taken by a return address.
int Offset = 0;
if (CallConv == CallingConv::X86_INTR) {
- const X86Subtarget& Subtarget =
- static_cast<const X86Subtarget&>(DAG.getSubtarget());
// X86 interrupts may take one or two arguments.
// On the stack there will be no return address as in regular call.
// Offset of last argument need to be set to -4/-8 bytes.
// Where offset of the first argument out of two, should be set to 0 bytes.
Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
+ if (Subtarget.is64Bit() && Ins.size() == 2) {
+ // The stack pointer needs to be realigned for 64 bit handlers with error
+ // code, so the argument offset changes by 8 bytes.
+ Offset += 8;
+ }
}
// FIXME: For now, all byval parameter objects are marked mutable. This can be
@@ -2707,30 +2810,71 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}
- return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- } else {
- int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable);
-
- // Set SExt or ZExt flag.
- if (VA.getLocInfo() == CCValAssign::ZExt) {
- MFI.setObjectZExt(FI, true);
- } else if (VA.getLocInfo() == CCValAssign::SExt) {
- MFI.setObjectSExt(FI, true);
+ return DAG.getFrameIndex(FI, PtrVT);
+ }
+
+ // This is an argument in memory. We might be able to perform copy elision.
+ if (Flags.isCopyElisionCandidate()) {
+ EVT ArgVT = Ins[i].ArgVT;
+ SDValue PartAddr;
+ if (Ins[i].PartOffset == 0) {
+ // If this is a one-part value or the first part of a multi-part value,
+ // create a stack object for the entire argument value type and return a
+ // load from our portion of it. This assumes that if the first part of an
+ // argument is in memory, the rest will also be in memory.
+ int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
+ /*Immutable=*/false);
+ PartAddr = DAG.getFrameIndex(FI, PtrVT);
+ return DAG.getLoad(
+ ValVT, dl, Chain, PartAddr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ } else {
+ // This is not the first piece of an argument in memory. See if there is
+ // already a fixed stack object including this offset. If so, assume it
+ // was created by the PartOffset == 0 branch above and create a load from
+ // the appropriate offset into it.
+ int64_t PartBegin = VA.getLocMemOffset();
+ int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
+ int FI = MFI.getObjectIndexBegin();
+ for (; MFI.isFixedObjectIndex(FI); ++FI) {
+ int64_t ObjBegin = MFI.getObjectOffset(FI);
+ int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
+ if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
+ break;
+ }
+ if (MFI.isFixedObjectIndex(FI)) {
+ SDValue Addr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
+ DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
+ return DAG.getLoad(
+ ValVT, dl, Chain, Addr,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
+ Ins[i].PartOffset));
+ }
}
+ }
- // Adjust SP offset of interrupt parameter.
- if (CallConv == CallingConv::X86_INTR) {
- MFI.setObjectOffset(FI, Offset);
- }
+ int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
+ VA.getLocMemOffset(), isImmutable);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue Val = DAG.getLoad(
- ValVT, dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- return ExtendedInMem ?
- DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
+ // Set SExt or ZExt flag.
+ if (VA.getLocInfo() == CCValAssign::ZExt) {
+ MFI.setObjectZExt(FI, true);
+ } else if (VA.getLocInfo() == CCValAssign::SExt) {
+ MFI.setObjectSExt(FI, true);
+ }
+
+ // Adjust SP offset of interrupt parameter.
+ if (CallConv == CallingConv::X86_INTR) {
+ MFI.setObjectOffset(FI, Offset);
}
+
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getLoad(
+ ValVT, dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+ return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
+ : Val;
}
// FIXME: Get this from tablegen.
@@ -2781,12 +2925,14 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
}
+#ifndef NDEBUG
static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
[](const CCValAssign &A, const CCValAssign &B) -> bool {
return A.getValNo() < B.getValNo();
});
}
+#endif
SDValue X86TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -2836,8 +2982,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
// The next loop assumes that the locations are in the same order of the
// input arguments.
- if (!isSortedByValueNo(ArgLocs))
- llvm_unreachable("Argument Location list must be sorted before lowering");
+ assert(isSortedByValueNo(ArgLocs) &&
+ "Argument Location list must be sorted before lowering");
SDValue ArgValue;
for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
@@ -2853,7 +2999,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
"Currently the only custom case is when we split v64i1 to 2 regs");
// v64i1 values, in regcall calling convention, that are
- // compiled to 32 bit arch, are splited up into two registers.
+ // compiled to 32 bit arch, are split up into two registers.
ArgValue =
getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
} else {
@@ -3107,8 +3253,9 @@ SDValue X86TargetLowering::LowerFormalArguments(
MF.getTarget().Options.GuaranteedTailCallOpt)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
- // X86 interrupts must pop the error code if present
- FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
+ // X86 interrupts must pop the error code (and the alignment padding) if
+ // present.
+ FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
@@ -3146,6 +3293,12 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
}
+ if (CallConv == CallingConv::X86_RegCall) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
+ MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
+ }
+
return Chain;
}
@@ -3348,8 +3501,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// The next loop assumes that the locations are in the same order of the
// input arguments.
- if (!isSortedByValueNo(ArgLocs))
- llvm_unreachable("Argument Location list must be sorted before lowering");
+ assert(isSortedByValueNo(ArgLocs) &&
+ "Argument Location list must be sorted before lowering");
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
@@ -3517,7 +3670,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (VA.isRegLoc()) {
if (VA.needsCustom()) {
assert((CallConv == CallingConv::X86_RegCall) &&
- "Expecting custome case only in regcall calling convention");
+ "Expecting custom case only in regcall calling convention");
// This means that we are in special case where one argument was
// passed through two register locations - Skip the next location
++I;
@@ -3662,7 +3815,32 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Mask = RegInfo->getNoPreservedMask();
}
- Ops.push_back(DAG.getRegisterMask(Mask));
+ // Define a new register mask from the existing mask.
+ uint32_t *RegMask = nullptr;
+
+ // In some calling conventions we need to remove the used physical registers
+ // from the reg mask.
+ if (CallConv == CallingConv::X86_RegCall) {
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+
+ // Allocate a new Reg Mask and copy Mask.
+ RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
+ unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
+ memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
+
+ // Make sure all sub registers of the argument registers are reset
+ // in the RegMask.
+ for (auto const &RegPair : RegsToPass)
+ for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+
+ // Create the RegMask Operand according to our updated mask.
+ Ops.push_back(DAG.getRegisterMask(RegMask));
+ } else {
+ // Create the RegMask Operand according to the static mask.
+ Ops.push_back(DAG.getRegisterMask(Mask));
+ }
if (InFlag.getNode())
Ops.push_back(InFlag);
@@ -3715,8 +3893,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
- Ins, dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, RegMask);
}
//===----------------------------------------------------------------------===//
@@ -4132,6 +4310,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
return true;
// 'Faux' Target Shuffles.
case ISD::AND:
+ case X86ISD::ANDNP:
return true;
}
}
@@ -4448,6 +4627,11 @@ bool X86TargetLowering::isCtlzFast() const {
return Subtarget.hasFastLZCNT();
}
+bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ return true;
+}
+
bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
if (!Subtarget.hasBMI())
return false;
@@ -4460,6 +4644,26 @@ bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
return true;
}
+MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
+ MVT VT = MVT::getIntegerVT(NumBits);
+ if (isTypeLegal(VT))
+ return VT;
+
+ // PMOVMSKB can handle this.
+ if (NumBits == 128 && isTypeLegal(MVT::v16i8))
+ return MVT::v16i8;
+
+ // VPMOVMSKB can handle this.
+ if (NumBits == 256 && isTypeLegal(MVT::v32i8))
+ return MVT::v32i8;
+
+ // TODO: Allow 64-bit type for 32-bit target.
+ // TODO: 512-bit types should be allowed, but make sure that those
+ // cases are handled in combineVectorSizedSetCCEquality().
+
+ return MVT::INVALID_SIMPLE_VALUE_TYPE;
+}
+
/// Val is the undef sentinel value or equal to the specified value.
static bool isUndefOrEqual(int Val, int CmpVal) {
return ((Val == SM_SentinelUndef) || (Val == CmpVal));
@@ -4555,28 +4759,30 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
SmallVectorImpl<int> &WidenedMask) {
WidenedMask.assign(Mask.size() / 2, 0);
for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
+ int M0 = Mask[i];
+ int M1 = Mask[i + 1];
+
// If both elements are undef, its trivial.
- if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
+ if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
WidenedMask[i / 2] = SM_SentinelUndef;
continue;
}
// Check for an undef mask and a mask value properly aligned to fit with
// a pair of values. If we find such a case, use the non-undef mask's value.
- if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 &&
- Mask[i + 1] % 2 == 1) {
- WidenedMask[i / 2] = Mask[i + 1] / 2;
+ if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
+ WidenedMask[i / 2] = M1 / 2;
continue;
}
- if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
- WidenedMask[i / 2] = Mask[i] / 2;
+ if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
+ WidenedMask[i / 2] = M0 / 2;
continue;
}
// When zeroing, we need to spread the zeroing across both lanes to widen.
- if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
- if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
- (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
+ if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
+ if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
+ (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
WidenedMask[i / 2] = SM_SentinelZero;
continue;
}
@@ -4585,9 +4791,8 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
// Finally check if the two mask values are adjacent and aligned with
// a pair.
- if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 &&
- Mask[i] + 1 == Mask[i + 1]) {
- WidenedMask[i / 2] = Mask[i] / 2;
+ if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
+ WidenedMask[i / 2] = M0 / 2;
continue;
}
@@ -4770,9 +4975,10 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
return ConstsNode;
}
-static SDValue getConstVector(ArrayRef<APInt> Bits, SmallBitVector &Undefs,
+static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
- assert(Bits.size() == Undefs.size() && "Unequal constant and undef arrays");
+ assert(Bits.size() == Undefs.getBitWidth() &&
+ "Unequal constant and undef arrays");
SmallVector<SDValue, 32> Ops;
bool Split = false;
@@ -4844,10 +5050,6 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
VT.getVectorNumElements()/Factor);
- // Extract from UNDEF is UNDEF.
- if (Vec.isUndef())
- return DAG.getUNDEF(ResultVT);
-
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
@@ -4918,50 +5120,6 @@ static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
SelectionDAG &DAG, const SDLoc &dl) {
assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!");
-
- // For insertion into the zero index (low half) of a 256-bit vector, it is
- // more efficient to generate a blend with immediate instead of an insert*128.
- // We are still creating an INSERT_SUBVECTOR below with an undef node to
- // extend the subvector to the size of the result vector. Make sure that
- // we are not recursing on that node by checking for undef here.
- if (IdxVal == 0 && Result.getValueType().is256BitVector() &&
- !Result.isUndef()) {
- EVT ResultVT = Result.getValueType();
- SDValue ZeroIndex = DAG.getIntPtrConstant(0, dl);
- SDValue Undef = DAG.getUNDEF(ResultVT);
- SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Undef,
- Vec, ZeroIndex);
-
- // The blend instruction, and therefore its mask, depend on the data type.
- MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT();
- if (ScalarType.isFloatingPoint()) {
- // Choose either vblendps (float) or vblendpd (double).
- unsigned ScalarSize = ScalarType.getSizeInBits();
- assert((ScalarSize == 64 || ScalarSize == 32) && "Unknown float type");
- unsigned MaskVal = (ScalarSize == 64) ? 0x03 : 0x0f;
- SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8);
- return DAG.getNode(X86ISD::BLENDI, dl, ResultVT, Result, Vec256, Mask);
- }
-
- const X86Subtarget &Subtarget =
- static_cast<const X86Subtarget &>(DAG.getSubtarget());
-
- // AVX2 is needed for 256-bit integer blend support.
- // Integers must be cast to 32-bit because there is only vpblendd;
- // vpblendw can't be used for this because it has a handicapped mask.
-
- // If we don't have AVX2, then cast to float. Using a wrong domain blend
- // is still more efficient than using the wrong domain vinsertf128 that
- // will be created by InsertSubVector().
- MVT CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32;
-
- SDValue Mask = DAG.getConstant(0x0f, dl, MVT::i8);
- Result = DAG.getBitcast(CastVT, Result);
- Vec256 = DAG.getBitcast(CastVT, Vec256);
- Vec256 = DAG.getNode(X86ISD::BLENDI, dl, CastVT, Result, Vec256, Mask);
- return DAG.getBitcast(ResultVT, Vec256);
- }
-
return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
}
@@ -5023,7 +5181,8 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (Vec.isUndef()) {
if (IdxVal != 0) {
SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
- WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec, ShiftBits);
+ WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
+ ShiftBits);
}
return ExtractSubVec(WideSubVec);
}
@@ -5032,9 +5191,9 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
NumElems = WideOpVT.getVectorNumElements();
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec,
- DAG.getConstant(ShiftLeft, dl, MVT::i8));
- Vec = ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
+ DAG.getConstant(ShiftLeft, dl, MVT::i8));
+ Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
return ExtractSubVec(Vec);
}
@@ -5043,8 +5202,8 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Zero lower bits of the Vec
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
// Merge them together, SubVec should be zero extended.
WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
getZeroVector(WideOpVT, Subtarget, DAG, dl),
@@ -5056,12 +5215,12 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
// Zero upper bits of the Vec
- WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, WideSubVec,
+ WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
- Vec = DAG.getNode(X86ISD::VSHLI, dl, WideOpVT, Vec, ShiftBits);
- Vec = DAG.getNode(X86ISD::VSRLI, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
return ExtractSubVec(Vec);
}
@@ -5094,26 +5253,38 @@ static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
}
/// Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
-/// no AVX2 support, use two <4 x i32> inserted in a <8 x i32> appropriately.
+/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
/// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
- SelectionDAG &DAG, const SDLoc &dl) {
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Expected a 128/256/512-bit vector type");
APInt Ones = APInt::getAllOnesValue(32);
unsigned NumElts = VT.getSizeInBits() / 32;
- SDValue Vec;
- if (!Subtarget.hasInt256() && NumElts == 8) {
- Vec = DAG.getConstant(Ones, dl, MVT::v4i32);
- Vec = concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
- } else {
- Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
- }
+ SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
return DAG.getBitcast(VT, Vec);
}
+static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
+ SelectionDAG &DAG) {
+ EVT InVT = In.getValueType();
+ assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode");
+
+ if (VT.is128BitVector() && InVT.is128BitVector())
+ return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
+ : DAG.getZeroExtendVectorInReg(In, DL, VT);
+
+ // For 256-bit vectors, we only need the lower (128-bit) input half.
+ // For 512-bit vectors, we only need the lower input half or quarter.
+ if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
+ int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
+ In = extractSubVector(In, 0, DAG, DL,
+ std::max(128, (int)VT.getSizeInBits() / Scale));
+ }
+
+ return DAG.getNode(Opc, DL, VT, In);
+}
+
/// Generate unpacklo/unpackhi shuffle mask.
static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
bool Unary) {
@@ -5199,9 +5370,10 @@ static const Constant *getTargetConstantFromNode(SDValue Op) {
// Extract raw constant bits from constant pools.
static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
- SmallBitVector &UndefElts,
- SmallVectorImpl<APInt> &EltBits) {
- assert(UndefElts.empty() && "Expected an empty UndefElts vector");
+ APInt &UndefElts,
+ SmallVectorImpl<APInt> &EltBits,
+ bool AllowWholeUndefs = true,
+ bool AllowPartialUndefs = true) {
assert(EltBits.empty() && "Expected an empty EltBits vector");
Op = peekThroughBitcasts(Op);
@@ -5211,56 +5383,83 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
unsigned NumElts = SizeInBits / EltSizeInBits;
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
+ unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(SizeInBits, 0);
APInt MaskBits(SizeInBits, 0);
// Split the undef/constant single bitset data into the target elements.
auto SplitBitData = [&]() {
- UndefElts = SmallBitVector(NumElts, false);
+ // Don't split if we don't allow undef bits.
+ bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
+ if (UndefBits.getBoolValue() && !AllowUndefs)
+ return false;
+
+ UndefElts = APInt(NumElts, 0);
EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
for (unsigned i = 0; i != NumElts; ++i) {
- APInt UndefEltBits = UndefBits.lshr(i * EltSizeInBits);
- UndefEltBits = UndefEltBits.zextOrTrunc(EltSizeInBits);
+ unsigned BitOffset = i * EltSizeInBits;
+ APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
- // Only treat an element as UNDEF if all bits are UNDEF, otherwise
- // treat it as zero.
+ // Only treat an element as UNDEF if all bits are UNDEF.
if (UndefEltBits.isAllOnesValue()) {
- UndefElts[i] = true;
+ if (!AllowWholeUndefs)
+ return false;
+ UndefElts.setBit(i);
continue;
}
- APInt Bits = MaskBits.lshr(i * EltSizeInBits);
- Bits = Bits.zextOrTrunc(EltSizeInBits);
+ // If only some bits are UNDEF then treat them as zero (or bail if not
+ // supported).
+ if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
+ return false;
+
+ APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
EltBits[i] = Bits.getZExtValue();
}
return true;
};
- auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask,
- APInt &Undefs) {
+ // Collect constant bits and insert into mask/undef bit masks.
+ auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
+ unsigned BitOffset) {
if (!Cst)
return false;
unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
if (isa<UndefValue>(Cst)) {
- Mask = APInt::getNullValue(SizeInBits);
- Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits);
+ Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
return true;
}
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
- Mask = CInt->getValue().zextOrTrunc(SizeInBits);
- Undefs = APInt::getNullValue(SizeInBits);
+ Mask.insertBits(CInt->getValue(), BitOffset);
return true;
}
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
- Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
- Undefs = APInt::getNullValue(SizeInBits);
+ Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset);
return true;
}
return false;
};
+ // Extract constant bits from build vector.
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ const SDValue &Src = Op.getOperand(i);
+ unsigned BitOffset = i * SrcEltSizeInBits;
+ if (Src.isUndef()) {
+ UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
+ continue;
+ }
+ auto *Cst = cast<ConstantSDNode>(Src);
+ APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
+ MaskBits.insertBits(Bits, BitOffset);
+ }
+ return SplitBitData();
+ }
+
// Extract constant bits from constant pool vector.
if (auto *Cst = getTargetConstantFromNode(Op)) {
Type *CstTy = Cst->getType();
@@ -5268,117 +5467,59 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
return false;
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
- for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) {
- APInt Bits, Undefs;
- if (!ExtractConstantBits(Cst->getAggregateElement(i), Bits, Undefs))
+ for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i)
+ if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits,
+ i * CstEltSizeInBits))
return false;
- MaskBits |= Bits.shl(i * CstEltSizeInBits);
- UndefBits |= Undefs.shl(i * CstEltSizeInBits);
- }
return SplitBitData();
}
// Extract constant bits from a broadcasted constant pool scalar.
if (Op.getOpcode() == X86ISD::VBROADCAST &&
- EltSizeInBits <= Op.getScalarValueSizeInBits()) {
+ EltSizeInBits <= SrcEltSizeInBits) {
if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
- APInt Bits, Undefs;
- if (ExtractConstantBits(Broadcast, Bits, Undefs)) {
- unsigned NumBroadcastBits = Op.getScalarValueSizeInBits();
- unsigned NumBroadcastElts = SizeInBits / NumBroadcastBits;
- for (unsigned i = 0; i != NumBroadcastElts; ++i) {
- MaskBits |= Bits.shl(i * NumBroadcastBits);
- UndefBits |= Undefs.shl(i * NumBroadcastBits);
+ APInt Bits(SizeInBits, 0);
+ APInt Undefs(SizeInBits, 0);
+ if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) {
+ for (unsigned i = 0; i != NumSrcElts; ++i) {
+ MaskBits |= Bits.shl(i * SrcEltSizeInBits);
+ UndefBits |= Undefs.shl(i * SrcEltSizeInBits);
}
return SplitBitData();
}
}
}
+ // Extract a rematerialized scalar constant insertion.
+ if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
+ Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
+ auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
+ MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
+ MaskBits = MaskBits.zext(SizeInBits);
+ return SplitBitData();
+ }
+
return false;
}
-// TODO: Merge more of this with getTargetConstantBitsFromNode.
static bool getTargetShuffleMaskIndices(SDValue MaskNode,
unsigned MaskEltSizeInBits,
SmallVectorImpl<uint64_t> &RawMask) {
- MaskNode = peekThroughBitcasts(MaskNode);
-
- MVT VT = MaskNode.getSimpleValueType();
- assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
- unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
-
- // Split an APInt element into MaskEltSizeInBits sized pieces and
- // insert into the shuffle mask.
- auto SplitElementToMask = [&](APInt Element) {
- // Note that this is x86 and so always little endian: the low byte is
- // the first byte of the mask.
- int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
- for (int i = 0; i < Split; ++i) {
- APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
- Element = Element.lshr(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- }
- };
-
- if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
- // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
- // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0
- if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
- return false;
- if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
- const APInt &MaskElement = CN->getAPIntValue();
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
- APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- }
- }
+ APInt UndefElts;
+ SmallVector<APInt, 64> EltBits;
+
+ // Extract the raw target constant bits.
+ // FIXME: We currently don't support UNDEF bits or mask entries.
+ if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
+ EltBits, /* AllowWholeUndefs */ false,
+ /* AllowPartialUndefs */ false))
return false;
- }
- if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
- MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
- SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
- if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
- if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
- RawMask.push_back(CN->getZExtValue());
- RawMask.append(NumMaskElts - 1, 0);
- return true;
- }
-
- if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
- unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
- SplitElementToMask(CN->getAPIntValue());
- RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
- return true;
- }
- }
- return false;
- }
-
- if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
- return false;
-
- // We can always decode if the buildvector is all zero constants,
- // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
- if (all_of(MaskNode->ops(), X86::isZeroNode)) {
- RawMask.append(NumMaskElts, 0);
- return true;
- }
-
- // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
- if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
- return false;
-
- for (SDValue Op : MaskNode->ops()) {
- if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
- SplitElementToMask(CN->getAPIntValue());
- else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
- SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
- else
- return false;
- }
+ // Insert the extracted elements into the mask.
+ for (APInt Elt : EltBits)
+ RawMask.push_back(Elt.getZExtValue());
return true;
}
@@ -5405,6 +5546,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
case X86ISD::BLENDI:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::SHUFP:
ImmN = N->getOperand(N->getNumOperands()-1);
@@ -5473,8 +5615,18 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
IsUnary = true;
break;
case X86ISD::VBROADCAST: {
- // We only decode broadcasts of same-sized vectors at the moment.
- if (N->getOperand(0).getValueType() == VT) {
+ SDValue N0 = N->getOperand(0);
+ // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
+ // add the pre-extracted value to the Ops vector.
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N0.getOperand(0).getValueType() == VT &&
+ N0.getConstantOperandVal(1) == 0)
+ Ops.push_back(N0.getOperand(0));
+
+ // We only decode broadcasts of same-sized vectors, unless the broadcast
+ // came from an extract from the original width. If we found one, we
+ // pushed it the Ops vector above.
+ if (N0.getValueType() == VT || !Ops.empty()) {
DecodeVectorBroadcast(VT, Mask);
IsUnary = true;
break;
@@ -5669,6 +5821,19 @@ static bool setTargetShuffleZeroElements(SDValue N,
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
+ assert((VT.getSizeInBits() % Mask.size()) == 0 &&
+ "Illegal split of shuffle value type");
+ unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
+
+ // Extract known constant input data.
+ APInt UndefSrcElts[2];
+ SmallVector<APInt, 32> SrcEltBits[2];
+ bool IsSrcConstant[2] = {
+ getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
+ SrcEltBits[0], true, false),
+ getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
+ SrcEltBits[1], true, false)};
+
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
int M = Mask[i];
@@ -5677,6 +5842,7 @@ static bool setTargetShuffleZeroElements(SDValue N,
continue;
// Determine shuffle input and normalize the mask.
+ unsigned SrcIdx = M / Size;
SDValue V = M < Size ? V1 : V2;
M %= Size;
@@ -5686,39 +5852,27 @@ static bool setTargetShuffleZeroElements(SDValue N,
continue;
}
- // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
- if (V.getOpcode() != ISD::BUILD_VECTOR)
- continue;
-
- // If the BUILD_VECTOR has fewer elements then the (larger) source
- // element must be UNDEF/ZERO.
- // TODO: Is it worth testing the individual bits of a constant?
- if ((Size % V.getNumOperands()) == 0) {
- int Scale = Size / V->getNumOperands();
- SDValue Op = V.getOperand(M / Scale);
- if (Op.isUndef())
+ // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
+ // TODO: We currently only set UNDEF for integer types - floats use the same
+ // registers as vectors and many of the scalar folded loads rely on the
+ // SCALAR_TO_VECTOR pattern.
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ (Size % V.getValueType().getVectorNumElements()) == 0) {
+ int Scale = Size / V.getValueType().getVectorNumElements();
+ int Idx = M / Scale;
+ if (Idx != 0 && !VT.isFloatingPoint())
Mask[i] = SM_SentinelUndef;
- else if (X86::isZeroNode(Op))
+ else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
Mask[i] = SM_SentinelZero;
continue;
}
- // If the BUILD_VECTOR has more elements then all the (smaller) source
- // elements must be all UNDEF or all ZERO.
- if ((V.getNumOperands() % Size) == 0) {
- int Scale = V->getNumOperands() / Size;
- bool AllUndef = true;
- bool AllZero = true;
- for (int j = 0; j < Scale; ++j) {
- SDValue Op = V.getOperand((M * Scale) + j);
- AllUndef &= Op.isUndef();
- AllZero &= X86::isZeroNode(Op);
- }
- if (AllUndef)
+ // Attempt to extract from the source's constant bits.
+ if (IsSrcConstant[SrcIdx]) {
+ if (UndefSrcElts[SrcIdx][M])
Mask[i] = SM_SentinelUndef;
- else if (AllZero)
+ else if (SrcEltBits[SrcIdx][M] == 0)
Mask[i] = SM_SentinelZero;
- continue;
}
}
@@ -5744,11 +5898,16 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
unsigned Opcode = N.getOpcode();
switch (Opcode) {
- case ISD::AND: {
+ case ISD::AND:
+ case X86ISD::ANDNP: {
// Attempt to decode as a per-byte mask.
- SmallBitVector UndefElts;
+ APInt UndefElts;
SmallVector<APInt, 32> EltBits;
- if (!getTargetConstantBitsFromNode(N.getOperand(1), 8, UndefElts, EltBits))
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ bool IsAndN = (X86ISD::ANDNP == Opcode);
+ uint64_t ZeroMask = IsAndN ? 255 : 0;
+ if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
return false;
for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
if (UndefElts[i]) {
@@ -5758,9 +5917,55 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
uint64_t ByteBits = EltBits[i].getZExtValue();
if (ByteBits != 0 && ByteBits != 255)
return false;
- Mask.push_back(ByteBits == 0 ? SM_SentinelZero : i);
+ Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
}
- Ops.push_back(N.getOperand(0));
+ Ops.push_back(IsAndN ? N1 : N0);
+ return true;
+ }
+ case ISD::SCALAR_TO_VECTOR: {
+ // Match against a scalar_to_vector of an extract from a similar vector.
+ SDValue N0 = N.getOperand(0);
+ if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ N0.getOperand(0).getValueType() != VT ||
+ !isa<ConstantSDNode>(N0.getOperand(1)) ||
+ NumElts <= N0.getConstantOperandVal(1) ||
+ !N->isOnlyUserOf(N0.getNode()))
+ return false;
+ Ops.push_back(N0.getOperand(0));
+ Mask.push_back(N0.getConstantOperandVal(1));
+ Mask.append(NumElts - 1, SM_SentinelUndef);
+ return true;
+ }
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW: {
+ SDValue InVec = N.getOperand(0);
+ SDValue InScl = N.getOperand(1);
+ uint64_t InIdx = N.getConstantOperandVal(2);
+ assert(InIdx < NumElts && "Illegal insertion index");
+
+ // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
+ if (X86::isZeroNode(InScl)) {
+ Ops.push_back(InVec);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
+ return true;
+ }
+
+ // Attempt to recognise a PINSR*(ASSERTZEXT(PEXTR*)) shuffle pattern.
+ // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
+ unsigned ExOp =
+ (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
+ if (InScl.getOpcode() != ISD::AssertZext ||
+ InScl.getOperand(0).getOpcode() != ExOp)
+ return false;
+
+ SDValue ExVec = InScl.getOperand(0).getOperand(0);
+ uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1);
+ assert(ExIdx < NumElts && "Illegal extraction index");
+ Ops.push_back(InVec);
+ Ops.push_back(ExVec);
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
return true;
}
case X86ISD::VSHLI:
@@ -5795,6 +6000,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
}
return true;
}
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
case X86ISD::VZEXT: {
// TODO - add support for VPMOVZX with smaller input vector types.
SDValue Src = N.getOperand(0);
@@ -5810,36 +6016,38 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return false;
}
+/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
+static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
+ SmallVectorImpl<int> &Mask) {
+ int MaskWidth = Mask.size();
+ SmallVector<SDValue, 16> UsedInputs;
+ for (int i = 0, e = Inputs.size(); i < e; ++i) {
+ int lo = UsedInputs.size() * MaskWidth;
+ int hi = lo + MaskWidth;
+ if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
+ UsedInputs.push_back(Inputs[i]);
+ continue;
+ }
+ for (int &M : Mask)
+ if (lo <= M)
+ M -= MaskWidth;
+ }
+ Inputs = UsedInputs;
+}
+
/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
/// remaining input indices in case we now have a unary shuffle and adjust the
-/// Op0/Op1 inputs accordingly.
+/// inputs accordingly.
/// Returns true if the target shuffle mask was decoded.
-static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1,
+static bool resolveTargetShuffleInputs(SDValue Op,
+ SmallVectorImpl<SDValue> &Inputs,
SmallVectorImpl<int> &Mask) {
- SmallVector<SDValue, 2> Ops;
- if (!setTargetShuffleZeroElements(Op, Mask, Ops))
- if (!getFauxShuffleMask(Op, Mask, Ops))
+ if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
+ if (!getFauxShuffleMask(Op, Mask, Inputs))
return false;
- int NumElts = Mask.size();
- bool Op0InUse = any_of(Mask, [NumElts](int Idx) {
- return 0 <= Idx && Idx < NumElts;
- });
- bool Op1InUse = any_of(Mask, [NumElts](int Idx) { return NumElts <= Idx; });
-
- Op0 = Op0InUse ? Ops[0] : SDValue();
- Op1 = Op1InUse ? Ops[1] : SDValue();
-
- // We're only using Op1 - commute the mask and inputs.
- if (!Op0InUse && Op1InUse) {
- for (int &M : Mask)
- if (NumElts <= M)
- M -= NumElts;
- Op0 = Op1;
- Op1 = SDValue();
- }
-
+ resolveTargetShuffleInputsAndMask(Inputs, Mask);
return true;
}
@@ -5914,10 +6122,9 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
/// Custom lower build_vector of v16i8.
static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
- unsigned NumNonZero, unsigned NumZero,
- SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
+ unsigned NumNonZero, unsigned NumZero,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
if (NumNonZero > 8)
return SDValue();
@@ -5928,18 +6135,26 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
// SSE4.1 - use PINSRB to insert each byte directly.
if (Subtarget.hasSSE41()) {
for (unsigned i = 0; i < 16; ++i) {
- bool isNonZero = (NonZeros & (1 << i)) != 0;
- if (isNonZero) {
+ bool IsNonZero = (NonZeros & (1 << i)) != 0;
+ if (IsNonZero) {
+ // If the build vector contains zeros or our first insertion is not the
+ // first index then insert into zero vector to break any register
+ // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
- if (NumZero)
- V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
- else
- V = DAG.getUNDEF(MVT::v16i8);
First = false;
+ if (NumZero || 0 != i)
+ V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+ else {
+ assert(0 == i && "Expected insertion into zero-index");
+ V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v16i8, V);
+ continue;
+ }
}
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
- MVT::v16i8, V, Op.getOperand(i),
- DAG.getIntPtrConstant(i, dl));
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
+ Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
}
}
@@ -5958,24 +6173,35 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
if ((i & 1) != 0) {
+ // FIXME: Investigate extending to i32 instead of just i16.
+ // FIXME: Investigate combining the first 4 bytes as a i32 instead.
SDValue ThisElt, LastElt;
- bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+ bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
if (LastIsNonZero) {
- LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
- MVT::i16, Op.getOperand(i-1));
+ LastElt =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
}
if (ThisIsNonZero) {
ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
- ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
- ThisElt, DAG.getConstant(8, dl, MVT::i8));
+ ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
+ DAG.getConstant(8, dl, MVT::i8));
if (LastIsNonZero)
ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
} else
ThisElt = LastElt;
- if (ThisElt.getNode())
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
- DAG.getIntPtrConstant(i/2, dl));
+ if (ThisElt) {
+ if (1 == i) {
+ V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
+ : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ } else {
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+ DAG.getIntPtrConstant(i / 2, dl));
+ }
+ }
}
}
@@ -5986,8 +6212,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
+ const X86Subtarget &Subtarget) {
if (NumNonZero > 4)
return SDValue();
@@ -5995,18 +6220,26 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
SDValue V;
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
- bool isNonZero = (NonZeros & (1 << i)) != 0;
- if (isNonZero) {
+ bool IsNonZero = (NonZeros & (1 << i)) != 0;
+ if (IsNonZero) {
+ // If the build vector contains zeros or our first insertion is not the
+ // first index then insert into zero vector to break any register
+ // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
if (First) {
- if (NumZero)
- V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
- else
- V = DAG.getUNDEF(MVT::v8i16);
First = false;
+ if (NumZero || 0 != i)
+ V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
+ else {
+ assert(0 == i && "Expected insertion into zero-index");
+ V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
+ V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
+ V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
+ V = DAG.getBitcast(MVT::v8i16, V);
+ continue;
+ }
}
- V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
- MVT::v8i16, V, Op.getOperand(i),
- DAG.getIntPtrConstant(i, dl));
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
+ Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
}
}
@@ -6015,8 +6248,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
/// Custom lower build_vector of v4i32 or v4f32.
static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- const TargetLowering &TLI) {
+ const X86Subtarget &Subtarget) {
// Find all zeroable elements.
std::bitset<4> Zeroable;
for (int i=0; i < 4; ++i) {
@@ -6212,7 +6444,7 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
///
/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
- SDLoc &DL, SelectionDAG &DAG,
+ const SDLoc &DL, SelectionDAG &DAG,
bool isAfterLegalize) {
unsigned NumElems = Elts.size();
@@ -6376,14 +6608,14 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
return SDValue();
}
-static Constant *getConstantVector(MVT VT, APInt SplatValue,
+static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
unsigned SplatBitSize, LLVMContext &C) {
unsigned ScalarSize = VT.getScalarSizeInBits();
unsigned NumElm = SplatBitSize / ScalarSize;
SmallVector<Constant *, 32> ConstantVec;
for (unsigned i = 0; i < NumElm; i++) {
- APInt Val = SplatValue.lshr(ScalarSize * i).trunc(ScalarSize);
+ APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
assert((ScalarSize == 32 || ScalarSize == 64) &&
@@ -6664,6 +6896,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
+
// Quit if non-constant index.
if (!isa<ConstantSDNode>(ExtIdx))
return SDValue();
@@ -6694,11 +6927,10 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
- for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
- unsigned Idx = InsertIndices[i];
+
+ for (unsigned Idx : InsertIndices)
NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
DAG.getIntPtrConstant(Idx, DL));
- }
return NV;
}
@@ -7347,7 +7579,7 @@ static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
(VT == MVT::v8i32 && Subtarget.hasInt256()))
return Op;
- return getOnesVector(VT, Subtarget, DAG, DL);
+ return getOnesVector(VT, DAG, DL);
}
return SDValue();
@@ -7418,7 +7650,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// a constant pool load than it is to do a movd + shuffle.
if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
(!IsAllConstants || Idx == 0)) {
- if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+ if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) {
// Handle SSE only.
assert(VT == MVT::v2i64 && "Expected an SSE value type!");
MVT VecVT = MVT::v4i32;
@@ -7561,17 +7793,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// If element VT is < 32 bits, convert it to inserts into a zero vector.
if (EVTBits == 8 && NumElems == 16)
if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
- DAG, Subtarget, *this))
+ DAG, Subtarget))
return V;
if (EVTBits == 16 && NumElems == 8)
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
- DAG, Subtarget, *this))
+ DAG, Subtarget))
return V;
// If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
if (EVTBits == 32 && NumElems == 4)
- if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget, *this))
+ if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget))
return V;
// If element VT is == 32 bits, turn it into a number of shuffles.
@@ -7767,7 +7999,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
if (V1.isUndef())
- V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
if (IsZeroV1)
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);
@@ -7956,7 +8188,7 @@ static bool isShuffleEquivalent(SDValue V1, SDValue V2, ArrayRef<int> Mask,
ExpectedBV->getOperand(ExpectedMask[i] % Size))
return false;
}
-}
+ }
return true;
}
@@ -7986,6 +8218,41 @@ static bool isTargetShuffleEquivalent(ArrayRef<int> Mask,
return true;
}
+// Merges a general DAG shuffle mask and zeroable bit mask into a target shuffle
+// mask.
+static SmallVector<int, 64> createTargetShuffleMask(ArrayRef<int> Mask,
+ const APInt &Zeroable) {
+ int NumElts = Mask.size();
+ assert(NumElts == (int)Zeroable.getBitWidth() && "Mismatch mask sizes");
+
+ SmallVector<int, 64> TargetMask(NumElts, SM_SentinelUndef);
+ for (int i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ if (M == SM_SentinelUndef)
+ continue;
+ assert(0 <= M && M < (2 * NumElts) && "Out of range shuffle index");
+ TargetMask[i] = (Zeroable[i] ? SM_SentinelZero : M);
+ }
+ return TargetMask;
+}
+
+// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
+// instructions.
+static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) {
+ if (VT != MVT::v8i32 && VT != MVT::v8f32)
+ return false;
+
+ SmallVector<int, 8> Unpcklwd;
+ createUnpackShuffleMask(MVT::v8i16, Unpcklwd, /* Lo = */ true,
+ /* Unary = */ false);
+ SmallVector<int, 8> Unpckhwd;
+ createUnpackShuffleMask(MVT::v8i16, Unpckhwd, /* Lo = */ false,
+ /* Unary = */ false);
+ bool IsUnpackwdMask = (isTargetShuffleEquivalent(Mask, Unpcklwd) ||
+ isTargetShuffleEquivalent(Mask, Unpckhwd));
+ return IsUnpackwdMask;
+}
+
/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
///
/// This helper function produces an 8-bit shuffle immediate corresponding to
@@ -8009,7 +8276,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
return Imm;
}
-static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
+static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
SelectionDAG &DAG) {
return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}
@@ -8022,9 +8289,9 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
/// as many lanes with this technique as possible to simplify the remaining
/// shuffle.
-static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
- SDValue V1, SDValue V2) {
- SmallBitVector Zeroable(Mask.size(), false);
+static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
+ SDValue V1, SDValue V2) {
+ APInt Zeroable(Mask.size(), 0);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
@@ -8039,7 +8306,7 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
int M = Mask[i];
// Handle the easy cases.
if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
- Zeroable[i] = true;
+ Zeroable.setBit(i);
continue;
}
@@ -8057,17 +8324,19 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
int Scale = Size / V->getNumOperands();
SDValue Op = V.getOperand(M / Scale);
if (Op.isUndef() || X86::isZeroNode(Op))
- Zeroable[i] = true;
+ Zeroable.setBit(i);
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
APInt Val = Cst->getAPIntValue();
Val = Val.lshr((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
- Zeroable[i] = (Val == 0);
+ if (Val == 0)
+ Zeroable.setBit(i);
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
APInt Val = Cst->getValueAPF().bitcastToAPInt();
Val = Val.lshr((M % Scale) * ScalarSizeInBits);
Val = Val.getLoBits(ScalarSizeInBits);
- Zeroable[i] = (Val == 0);
+ if (Val == 0)
+ Zeroable.setBit(i);
}
continue;
}
@@ -8081,7 +8350,8 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue Op = V.getOperand((M * Scale) + j);
AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
}
- Zeroable[i] = AllZeroable;
+ if (AllZeroable)
+ Zeroable.setBit(i);
continue;
}
}
@@ -8096,19 +8366,20 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
//
// The function looks for a sub-mask that the nonzero elements are in
// increasing order. If such sub-mask exist. The function returns true.
-static bool isNonZeroElementsInOrder(const SmallBitVector Zeroable,
- ArrayRef<int> Mask,const EVT &VectorType,
+static bool isNonZeroElementsInOrder(const APInt &Zeroable,
+ ArrayRef<int> Mask, const EVT &VectorType,
bool &IsZeroSideLeft) {
int NextElement = -1;
// Check if the Mask's nonzero elements are in increasing order.
- for (int i = 0, e = Zeroable.size(); i < e; i++) {
+ for (int i = 0, e = Mask.size(); i < e; i++) {
// Checks if the mask's zeros elements are built from only zeros.
- if (Mask[i] == -1)
+ assert(Mask[i] >= -1 && "Out of bound mask element!");
+ if (Mask[i] < 0)
return false;
if (Zeroable[i])
continue;
// Find the lowest non zero element
- if (NextElement == -1) {
+ if (NextElement < 0) {
NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0;
IsZeroSideLeft = NextElement != 0;
}
@@ -8124,7 +8395,7 @@ static bool isNonZeroElementsInOrder(const SmallBitVector Zeroable,
static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Size = Mask.size();
@@ -8179,19 +8450,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
const X86Subtarget &Subtarget, SelectionDAG &DAG,
const SDLoc &dl);
-// Function convertBitVectorToUnsigned - The function gets SmallBitVector
-// as argument and convert him to unsigned.
-// The output of the function is not(zeroable)
-static unsigned convertBitVectorToUnsiged(const SmallBitVector &Zeroable) {
- unsigned convertBit = 0;
- for (int i = 0, e = Zeroable.size(); i < e; i++)
- convertBit |= !(Zeroable[i]) << i;
- return convertBit;
-}
-
// X86 has dedicated shuffle that can be lowered to VEXPAND
static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
ArrayRef<int> Mask, SDValue &V1,
SDValue &V2, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -8199,7 +8460,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
IsLeftZeroSide))
return SDValue();
- unsigned VEXPANDMask = convertBitVectorToUnsiged(Zeroable);
+ unsigned VEXPANDMask = (~Zeroable).getZExtValue();
MVT IntegerType =
MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
@@ -8215,6 +8476,91 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
ZeroVector);
}
+static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2,
+ unsigned &UnpackOpcode, bool IsUnary,
+ ArrayRef<int> TargetMask, SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+
+ bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true;
+ for (int i = 0; i != NumElts; i += 2) {
+ int M1 = TargetMask[i + 0];
+ int M2 = TargetMask[i + 1];
+ Undef1 &= (SM_SentinelUndef == M1);
+ Undef2 &= (SM_SentinelUndef == M2);
+ Zero1 &= isUndefOrZero(M1);
+ Zero2 &= isUndefOrZero(M2);
+ }
+ assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) &&
+ "Zeroable shuffle detected");
+
+ // Attempt to match the target mask against the unpack lo/hi mask patterns.
+ SmallVector<int, 64> Unpckl, Unpckh;
+ createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, IsUnary);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
+ UnpackOpcode = X86ISD::UNPCKL;
+ V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
+ V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
+ return true;
+ }
+
+ createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, IsUnary);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
+ UnpackOpcode = X86ISD::UNPCKH;
+ V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2));
+ V1 = (Undef1 ? DAG.getUNDEF(VT) : V1);
+ return true;
+ }
+
+ // If an unary shuffle, attempt to match as an unpack lo/hi with zero.
+ if (IsUnary && (Zero1 || Zero2)) {
+ // Don't bother if we can blend instead.
+ if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) &&
+ isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0))
+ return false;
+
+ bool MatchLo = true, MatchHi = true;
+ for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) {
+ int M = TargetMask[i];
+
+ // Ignore if the input is known to be zero or the index is undef.
+ if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) ||
+ (M == SM_SentinelUndef))
+ continue;
+
+ MatchLo &= (M == Unpckl[i]);
+ MatchHi &= (M == Unpckh[i]);
+ }
+
+ if (MatchLo || MatchHi) {
+ UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+ V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
+ V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1;
+ return true;
+ }
+ }
+
+ // If a binary shuffle, commute and try again.
+ if (!IsUnary) {
+ ShuffleVectorSDNode::commuteMask(Unpckl);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckl)) {
+ UnpackOpcode = X86ISD::UNPCKL;
+ std::swap(V1, V2);
+ return true;
+ }
+
+ ShuffleVectorSDNode::commuteMask(Unpckh);
+ if (isTargetShuffleEquivalent(TargetMask, Unpckh)) {
+ UnpackOpcode = X86ISD::UNPCKH;
+ std::swap(V1, V2);
+ return true;
+ }
+ }
+
+ return false;
+}
+
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
@@ -8248,13 +8594,12 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
/// one of the inputs being zeroable.
static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SelectionDAG &DAG) {
assert(!VT.isFloatingPoint() && "Floating point types are not supported");
MVT EltVT = VT.getVectorElementType();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
- SDValue AllOnes =
- DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, EltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
SDValue V;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
@@ -8286,10 +8631,8 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
SelectionDAG &DAG) {
assert(VT.isInteger() && "Only supports integer vector types!");
MVT EltVT = VT.getVectorElementType();
- int NumEltBits = EltVT.getSizeInBits();
SDValue Zero = DAG.getConstant(0, DL, EltVT);
- SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
- EltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT);
SmallVector<SDValue, 16> MaskOps;
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size)
@@ -8307,51 +8650,81 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
return DAG.getNode(ISD::OR, DL, VT, V1, V2);
}
-/// \brief Try to emit a blend instruction for a shuffle.
-///
-/// This doesn't do any checks for the availability of instructions for blending
-/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
-/// be matched in the backend with the type given. What it does check for is
-/// that the shuffle mask is a blend, or convertible into a blend with zero.
-static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
- SDValue V2, ArrayRef<int> Original,
- const SmallBitVector &Zeroable,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
- bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
- SmallVector<int, 8> Mask(Original.begin(), Original.end());
- bool ForceV1Zero = false, ForceV2Zero = false;
+static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
+ SDValue PreservedSrc,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG);
+
+static bool matchVectorShuffleAsBlend(SDValue V1, SDValue V2,
+ MutableArrayRef<int> TargetMask,
+ bool &ForceV1Zero, bool &ForceV2Zero,
+ uint64_t &BlendMask) {
+ bool V1IsZeroOrUndef =
+ V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode());
+ bool V2IsZeroOrUndef =
+ V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode());
+
+ BlendMask = 0;
+ ForceV1Zero = false, ForceV2Zero = false;
+ assert(TargetMask.size() <= 64 && "Shuffle mask too big for blend mask");
// Attempt to generate the binary blend mask. If an input is zero then
// we can use any lane.
// TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
- unsigned BlendMask = 0;
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- int M = Mask[i];
- if (M < 0)
+ for (int i = 0, Size = TargetMask.size(); i < Size; ++i) {
+ int M = TargetMask[i];
+ if (M == SM_SentinelUndef)
continue;
if (M == i)
continue;
if (M == i + Size) {
- BlendMask |= 1u << i;
+ BlendMask |= 1ull << i;
continue;
}
- if (Zeroable[i]) {
- if (V1IsZero) {
+ if (M == SM_SentinelZero) {
+ if (V1IsZeroOrUndef) {
ForceV1Zero = true;
- Mask[i] = i;
+ TargetMask[i] = i;
continue;
}
- if (V2IsZero) {
+ if (V2IsZeroOrUndef) {
ForceV2Zero = true;
- BlendMask |= 1u << i;
- Mask[i] = i + Size;
+ BlendMask |= 1ull << i;
+ TargetMask[i] = i + Size;
continue;
}
}
- return SDValue(); // Shuffled input!
+ return false;
}
+ return true;
+}
+
+uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, int Scale) {
+ uint64_t ScaledMask = 0;
+ for (int i = 0; i != Size; ++i)
+ if (BlendMask & (1ull << i))
+ ScaledMask |= ((1ull << Scale) - 1) << (i * Scale);
+ return ScaledMask;
+}
+
+/// \brief Try to emit a blend instruction for a shuffle.
+///
+/// This doesn't do any checks for the availability of instructions for blending
+/// these values. It relies on the availability of the X86ISD::BLENDI pattern to
+/// be matched in the backend with the type given. What it does check for is
+/// that the shuffle mask is a blend, or convertible into a blend with zero.
+static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Original,
+ const APInt &Zeroable,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ SmallVector<int, 64> Mask = createTargetShuffleMask(Original, Zeroable);
+
+ uint64_t BlendMask = 0;
+ bool ForceV1Zero = false, ForceV2Zero = false;
+ if (!matchVectorShuffleAsBlend(V1, V2, Mask, ForceV1Zero, ForceV2Zero,
+ BlendMask))
+ return SDValue();
// Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
if (ForceV1Zero)
@@ -8359,15 +8732,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (ForceV2Zero)
V2 = getZeroVector(VT, Subtarget, DAG, DL);
- auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) {
- unsigned ScaledMask = 0;
- for (int i = 0; i != Size; ++i)
- if (BlendMask & (1u << i))
- for (int j = 0; j != Scale; ++j)
- ScaledMask |= 1u << (i * Scale + j);
- return ScaledMask;
- };
-
switch (VT.SimpleTy) {
case MVT::v2f64:
case MVT::v4f32:
@@ -8387,7 +8751,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (Subtarget.hasAVX2()) {
// Scale the blend by the number of 32-bit dwords per element.
int Scale = VT.getScalarSizeInBits() / 32;
- BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
+ BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
V1 = DAG.getBitcast(BlendVT, V1);
V2 = DAG.getBitcast(BlendVT, V2);
@@ -8400,7 +8764,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
// For integer shuffles we need to expand the mask and cast the inputs to
// v8i16s prior to blending.
int Scale = 8 / VT.getVectorNumElements();
- BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
+ BlendMask = scaleVectorShuffleBlendMask(BlendMask, Mask.size(), Scale);
V1 = DAG.getBitcast(MVT::v8i16, V1);
V2 = DAG.getBitcast(MVT::v8i16, V2);
return DAG.getBitcast(VT,
@@ -8417,7 +8781,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
BlendMask = 0;
for (int i = 0; i < 8; ++i)
if (RepeatedMask[i] >= 8)
- BlendMask |= 1u << i;
+ BlendMask |= 1ull << i;
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
}
@@ -8428,6 +8792,13 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
"256-bit byte-blends require AVX2 support!");
+ if (Subtarget.hasBWI() && Subtarget.hasVLX()) {
+ MVT IntegerType =
+ MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+ SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
+ return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
+ }
+
// Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
if (SDValue Masked =
lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
@@ -8465,7 +8836,17 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
VT, DAG.getNode(ISD::VSELECT, DL, BlendVT,
DAG.getBuildVector(BlendVT, DL, VSELECTMask), V1, V2));
}
-
+ case MVT::v16f32:
+ case MVT::v8f64:
+ case MVT::v8i64:
+ case MVT::v16i32:
+ case MVT::v32i16:
+ case MVT::v64i8: {
+ MVT IntegerType =
+ MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
+ SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType);
+ return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG);
+ }
default:
llvm_unreachable("Not a supported integer vector type!");
}
@@ -8503,7 +8884,7 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT,
return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask);
}
-/// \brief Generic routine to decompose a shuffle and blend into indepndent
+/// \brief Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
/// This matches the extremely common pattern for handling combined
@@ -8757,7 +9138,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
unsigned ScalarSizeInBits,
ArrayRef<int> Mask, int MaskOffset,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget) {
int Size = Mask.size();
unsigned SizeInBits = Size * ScalarSizeInBits;
@@ -8819,7 +9200,7 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Size = Mask.size();
@@ -8855,12 +9236,12 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SelectionDAG &DAG) {
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
- assert(!Zeroable.all() && "Fully zeroable shuffle mask");
+ assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
// Upper half must be undefined.
if (!isUndefInRange(Mask, HalfSize, HalfSize))
@@ -8987,7 +9368,7 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
/// Given a specific number of elements, element bit width, and extension
/// stride, produce either a zero or any extension based on the available
/// features of the subtarget. The extended elements are consecutive and
-/// begin and can start from an offseted element index in the input; to
+/// begin and can start from an offsetted element index in the input; to
/// avoid excess shuffling the offset must either being in the bottom lane
/// or at the start of a higher lane. All extended elements must be from
/// the same lane.
@@ -9027,21 +9408,14 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
// Found a valid zext mask! Try various lowering strategies based on the
// input type and available ISA extensions.
if (Subtarget.hasSSE41()) {
- // Not worth offseting 128-bit vectors if scale == 2, a pattern using
+ // Not worth offsetting 128-bit vectors if scale == 2, a pattern using
// PUNPCK will catch this in a later shuffle match.
if (Offset && Scale == 2 && VT.is128BitVector())
return SDValue();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
InputV = ShuffleOffset(InputV);
-
- // For 256-bit vectors, we only need the lower (128-bit) input half.
- // For 512-bit vectors, we only need the lower input half or quarter.
- if (VT.getSizeInBits() > 128)
- InputV = extractSubVector(InputV, 0, DAG, DL,
- std::max(128, (int)VT.getSizeInBits() / Scale));
-
- InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV);
+ InputV = getExtendInVec(X86ISD::VZEXT, DL, ExtVT, InputV, DAG);
return DAG.getBitcast(VT, InputV);
}
@@ -9158,7 +9532,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
/// are both incredibly common and often quite performance sensitive.
static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+ const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
int Bits = VT.getSizeInBits();
int NumLanes = Bits / 128;
@@ -9314,7 +9688,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
/// across all subtarget feature sets.
static SDValue lowerVectorShuffleAsElementInsertion(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+ const APInt &Zeroable, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
@@ -9612,7 +9986,16 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
if (((BroadcastIdx * EltSize) % 128) != 0)
return SDValue();
- MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize);
+ // The shuffle input might have been a bitcast we looked through; look at
+ // the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll
+ // later bitcast it to BroadcastVT.
+ MVT SrcVT = V.getSimpleValueType();
+ assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
+ "Unexpected vector element size");
+ assert((SrcVT.is256BitVector() || SrcVT.is512BitVector()) &&
+ "Unexpected vector size");
+
+ MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize);
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V,
DAG.getIntPtrConstant(BroadcastIdx, DL));
}
@@ -9642,6 +10025,12 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
}
+ // We only support broadcasting from 128-bit vectors to minimize the
+ // number of patterns we need to deal with in isel. So extract down to
+ // 128-bits.
+ if (SrcVT.getSizeInBits() > 128)
+ V = extract128BitVector(V, 0, DAG, DL);
+
return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
}
@@ -9653,7 +10042,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
// elements are zeroable.
static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
unsigned &InsertPSMask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
ArrayRef<int> Mask,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
@@ -9742,7 +10131,7 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SelectionDAG &DAG) {
assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
@@ -9877,7 +10266,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
/// it is better to avoid lowering through this for integer vectors where
/// possible.
static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -9959,7 +10348,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// it falls back to the floating point shuffle operation with appropriate bit
/// casting.
static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -10178,7 +10567,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
/// domain crossing penalties, as these are sufficient to implement all v4f32
/// shuffles.
static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -10261,7 +10650,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// We try to handle these with integer-domain shuffles where we can, but for
/// blends we use the floating point domain blend instructions.
static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -10353,7 +10742,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
- // up the inputs, bypassing domain shift penalties that we would encur if we
+ // up the inputs, bypassing domain shift penalties that we would incur if we
// directly used PSHUFD on Nehalem and older. For newer chips, this isn't
// relevant.
SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1);
@@ -10384,18 +10773,16 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
- assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
+ assert(Mask.size() == 8 && "Shuffle mask length doesn't match!");
MutableArrayRef<int> LoMask = Mask.slice(0, 4);
MutableArrayRef<int> HiMask = Mask.slice(4, 4);
SmallVector<int, 4> LoInputs;
- std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs),
- [](int M) { return M >= 0; });
+ copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
SmallVector<int, 4> HiInputs;
- std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs),
- [](int M) { return M >= 0; });
+ copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
int NumLToL =
@@ -10574,7 +10961,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
};
if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3))
return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4);
- else if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
+ if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3))
return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0);
// At this point there are at most two inputs to the low and high halves from
@@ -10830,7 +11217,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
/// blend if only one input is used.
static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable, SelectionDAG &DAG, bool &V1InUse,
+ const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
bool &V2InUse) {
SDValue V1Mask[16];
SDValue V2Mask[16];
@@ -10891,7 +11278,7 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
/// halves of the inputs separately (making them have relatively few inputs)
/// and then concatenate them.
static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11075,7 +11462,7 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
/// back together.
static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -11132,14 +11519,13 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (!canWidenViaDuplication(Mask))
return SDValue();
SmallVector<int, 4> LoInputs;
- std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs),
- [](int M) { return M >= 0 && M < 8; });
+ copy_if(Mask, std::back_inserter(LoInputs),
+ [](int M) { return M >= 0 && M < 8; });
std::sort(LoInputs.begin(), LoInputs.end());
LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
LoInputs.end());
SmallVector<int, 4> HiInputs;
- std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs),
- [](int M) { return M >= 8; });
+ copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; });
std::sort(HiInputs.begin(), HiInputs.end());
HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
HiInputs.end());
@@ -11193,7 +11579,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
PostDupI16Shuffle[i / 2] = MappedMask;
else
assert(PostDupI16Shuffle[i / 2] == MappedMask &&
- "Conflicting entrties in the original shuffle!");
+ "Conflicting entries in the original shuffle!");
}
return DAG.getBitcast(
MVT::v16i8,
@@ -11365,7 +11751,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// dispatches to the lowering routines accordingly.
static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
switch (VT.SimpleTy) {
@@ -11621,7 +12007,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
/// \brief Handle lowering 2-lane 128-bit shuffles.
static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SmallVector<int, 4> WidenedMask;
@@ -12091,7 +12477,7 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
unsigned &ShuffleImm,
ArrayRef<int> Mask) {
int NumElts = VT.getVectorNumElements();
- assert(VT.getScalarType() == MVT::f64 &&
+ assert(VT.getScalarSizeInBits() == 64 &&
(NumElts == 2 || NumElts == 4 || NumElts == 8) &&
"Unexpected data type for VSHUFPD");
@@ -12127,6 +12513,9 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
ArrayRef<int> Mask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
+ assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&&
+ "Unexpected data type for VSHUFPD");
+
unsigned Immediate = 0;
if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
return SDValue();
@@ -12153,7 +12542,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
/// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12250,7 +12639,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v4i64 shuffling..
static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12338,7 +12727,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
/// isn't available.
static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12414,6 +12803,14 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V1, V2, DAG, Subtarget))
return V;
+ // For non-AVX512 if the Mask is of 16bit elements in lane then try to split
+ // since after split we get a more efficient code using vpunpcklwd and
+ // vpunpckhwd instrs than vblend.
+ if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32))
+ if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2,
+ Mask, DAG))
+ return V;
+
// If we have AVX2 then we always want to lower with a blend because at v8 we
// can fully permute the elements.
if (Subtarget.hasAVX2())
@@ -12429,7 +12826,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v8i32 shuffling..
static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12445,6 +12842,15 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
+ // For non-AVX512 if the Mask is of 16bit elements in lane then try to split
+ // since after split we get a more efficient code than vblend by using
+ // vpunpcklwd and vpunpckhwd instrs.
+ if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() &&
+ !Subtarget.hasAVX512())
+ if (SDValue V =
+ lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, DAG))
+ return V;
+
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
Zeroable, Subtarget, DAG))
return Blend;
@@ -12533,7 +12939,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v16i16 shuffling..
static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12619,7 +13025,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// This routine is only called when we have AVX2 and thus a reasonable
/// instruction set for v32i8 shuffling..
static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12692,7 +13098,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// together based on the available instructions.
static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
// If we have a single input to the zero element, insert that into V1 if we
@@ -12844,7 +13250,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
/// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12891,12 +13297,16 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V2, DAG, Subtarget))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
-static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12925,6 +13335,10 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
return Unpck;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
// Otherwise, fall back to a SHUFPS sequence.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}
@@ -12938,7 +13352,7 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
/// \brief Handle lowering of 8-lane 64-bit integer shuffles.
static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -12994,12 +13408,16 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V2, DAG, Subtarget))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 16-lane 32-bit integer shuffles.
static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -13062,12 +13480,15 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
V1, V2, DAG, Subtarget))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 32-lane 16-bit integer shuffles.
static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -13109,12 +13530,16 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
}
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
}
/// \brief Handle lowering of 64-lane 8-bit integer shuffles.
static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
SDValue V1, SDValue V2,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
@@ -13159,6 +13584,10 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
return V;
+ if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
// FIXME: Implement direct support for this type!
return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
}
@@ -13170,7 +13599,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// together based on the available instructions.
static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
- const SmallBitVector &Zeroable,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert(Subtarget.hasAVX512() &&
@@ -13251,7 +13680,7 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (ISD::isBuildVectorAllZeros(V1.getNode()))
V1 = getZeroVector(ExtVT, Subtarget, DAG, DL);
else if (ISD::isBuildVectorAllOnes(V1.getNode()))
- V1 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ V1 = getOnesVector(ExtVT, DAG, DL);
else
V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);
@@ -13260,7 +13689,7 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
else if (ISD::isBuildVectorAllZeros(V2.getNode()))
V2 = getZeroVector(ExtVT, Subtarget, DAG, DL);
else if (ISD::isBuildVectorAllOnes(V2.getNode()))
- V2 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+ V2 = getOnesVector(ExtVT, DAG, DL);
else
V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);
@@ -13392,8 +13821,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
// We actually see shuffles that are entirely re-arrangements of a set of
// zero inputs. This mostly happens while decomposing complex shuffles into
// simple ones. Directly lower these as a buildvector of zeros.
- SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
- if (Zeroable.all())
+ APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ if (Zeroable.isAllOnesValue())
return getZeroVector(VT, Subtarget, DAG, DL);
// Try to collapse shuffles into using a vector type with fewer elements but
@@ -13569,10 +13998,14 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
"Unexpected vector type in ExtractBitFromMaskVector");
// variable index can't be handled in mask registers,
- // extend vector to VR512
+ // extend vector to VR512/128
if (!isa<ConstantSDNode>(Idx)) {
- MVT ExtVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
- SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Vec);
+ unsigned NumElts = VecVT.getVectorNumElements();
+ // Extending v8i1/v16i1 to 512-bit get better performance on KNL
+ // than extending to 128/256bit.
+ unsigned VecSize = (NumElts <= 4 ? 128 : 512);
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(VecSize/NumElts), NumElts);
+ SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVT, Vec);
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
ExtVT.getVectorElementType(), Ext, Idx);
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
@@ -13590,9 +14023,9 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
}
unsigned MaxSift = VecVT.getVectorNumElements() - 1;
if (MaxSift - IdxVal)
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(MaxSift, dl, MVT::i8));
return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
DAG.getIntPtrConstant(0, dl));
@@ -13610,24 +14043,36 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return ExtractBitFromMaskVector(Op, DAG);
if (!isa<ConstantSDNode>(Idx)) {
- if (VecVT.is512BitVector() ||
- (VecVT.is256BitVector() && Subtarget.hasInt256() &&
- VecVT.getScalarSizeInBits() == 32)) {
-
- MVT MaskEltVT =
- MVT::getIntegerVT(VecVT.getScalarSizeInBits());
- MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
- MaskEltVT.getSizeInBits());
+ // Its more profitable to go through memory (1 cycles throughput)
+ // than using VMOVD + VPERMV/PSHUFB sequence ( 2/3 cycles throughput)
+ // IACA tool was used to get performance estimation
+ // (https://software.intel.com/en-us/articles/intel-architecture-code-analyzer)
+ //
+ // example : extractelement <16 x i8> %a, i32 %i
+ //
+ // Block Throughput: 3.00 Cycles
+ // Throughput Bottleneck: Port5
+ //
+ // | Num Of | Ports pressure in cycles | |
+ // | Uops | 0 - DV | 5 | 6 | 7 | |
+ // ---------------------------------------------
+ // | 1 | | 1.0 | | | CP | vmovd xmm1, edi
+ // | 1 | | 1.0 | | | CP | vpshufb xmm0, xmm0, xmm1
+ // | 2 | 1.0 | 1.0 | | | CP | vpextrb eax, xmm0, 0x0
+ // Total Num Of Uops: 4
+ //
+ //
+ // Block Throughput: 1.00 Cycles
+ // Throughput Bottleneck: PORT2_AGU, PORT3_AGU, Port4
+ //
+ // | | Ports pressure in cycles | |
+ // |Uops| 1 | 2 - D |3 - D | 4 | 5 | |
+ // ---------------------------------------------------------
+ // |2^ | | 0.5 | 0.5 |1.0| |CP| vmovaps xmmword ptr [rsp-0x18], xmm0
+ // |1 |0.5| | | |0.5| | lea rax, ptr [rsp-0x18]
+ // |1 | |0.5, 0.5|0.5, 0.5| | |CP| mov al, byte ptr [rdi+rax*1]
+ // Total Num Of Uops: 4
- Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
- auto PtrVT = getPointerTy(DAG.getDataLayout());
- SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
- getZeroVector(MaskVT, Subtarget, DAG, dl), Idx,
- DAG.getConstant(0, dl, PtrVT));
- SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm,
- DAG.getConstant(0, dl, PtrVT));
- }
return SDValue();
}
@@ -13675,7 +14120,33 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
return Res;
- // TODO: handle v16i8.
+ // TODO: We only extract a single element from v16i8, we can probably afford
+ // to be more aggressive here before using the default approach of spilling to
+ // stack.
+ if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) {
+ // Extract either the lowest i32 or any i16, and extract the sub-byte.
+ int DWordIdx = IdxVal / 4;
+ if (DWordIdx == 0) {
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+ DAG.getBitcast(MVT::v4i32, Vec),
+ DAG.getIntPtrConstant(DWordIdx, dl));
+ int ShiftVal = (IdxVal % 4) * 8;
+ if (ShiftVal != 0)
+ Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res,
+ DAG.getConstant(ShiftVal, dl, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
+
+ int WordIdx = IdxVal / 2;
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+ DAG.getBitcast(MVT::v8i16, Vec),
+ DAG.getIntPtrConstant(WordIdx, dl));
+ int ShiftVal = (IdxVal % 2) * 8;
+ if (ShiftVal != 0)
+ Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res,
+ DAG.getConstant(ShiftVal, dl, MVT::i16));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
+ }
if (VT.getSizeInBits() == 32) {
if (IdxVal == 0)
@@ -13734,7 +14205,7 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
if(Vec.isUndef()) {
if (IdxVal)
- EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
return EltInVec;
}
@@ -13744,21 +14215,21 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
if (IdxVal == 0 ) {
// EltInVec already at correct index and other bits are 0.
// Clean the first bit in source vector.
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
if (IdxVal == NumElems -1) {
// Move the bit to the last position inside the vector.
- EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ EltInVec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
// Clean the last bit in the source vector.
- Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTL, dl, VecVT, Vec,
DAG.getConstant(1, dl, MVT::i8));
- Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
DAG.getConstant(1 , dl, MVT::i8));
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
@@ -13790,17 +14261,21 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
auto *N2C = cast<ConstantSDNode>(N2);
unsigned IdxVal = N2C->getZExtValue();
- // If we are clearing out a element, we do this more efficiently with a
- // blend shuffle than a costly integer insertion.
- // TODO: would other rematerializable values (e.g. allbits) benefit as well?
+ bool IsZeroElt = X86::isZeroNode(N1);
+ bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
+
+ // If we are inserting a element, see if we can do this more efficiently with
+ // a blend shuffle with a rematerializable vector than a costly integer
+ // insertion.
// TODO: pre-SSE41 targets will tend to use bit masking - this could still
// be beneficial if we are inserting several zeros and can combine the masks.
- if (X86::isZeroNode(N1) && Subtarget.hasSSE41() && NumElts <= 8) {
- SmallVector<int, 8> ClearMask;
+ if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && NumElts <= 8) {
+ SmallVector<int, 8> BlendMask;
for (unsigned i = 0; i != NumElts; ++i)
- ClearMask.push_back(i == IdxVal ? i + NumElts : i);
- SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, dl);
- return DAG.getVectorShuffle(VT, dl, N0, ZeroVector, ClearMask);
+ BlendMask.push_back(i == IdxVal ? i + NumElts : i);
+ SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
+ : DAG.getConstant(-1, dl, VT);
+ return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
}
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
@@ -13837,25 +14312,27 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
- if (Subtarget.hasSSE41()) {
- if (EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) {
- unsigned Opc;
- if (VT == MVT::v8i16) {
- Opc = X86ISD::PINSRW;
- } else {
- assert(VT == MVT::v16i8);
- Opc = X86ISD::PINSRB;
- }
-
- // Transform it so it match pinsr{b,w} which expects a GR32 as its second
- // argument.
- if (N1.getValueType() != MVT::i32)
- N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
- if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(IdxVal, dl);
- return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+ // argument. SSE41 required for pinsrb.
+ if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) {
+ unsigned Opc;
+ if (VT == MVT::v8i16) {
+ assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW");
+ Opc = X86ISD::PINSRW;
+ } else {
+ assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector");
+ assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB");
+ Opc = X86ISD::PINSRB;
}
+ if (N1.getValueType() != MVT::i32)
+ N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+ if (N2.getValueType() != MVT::i32)
+ N2 = DAG.getIntPtrConstant(IdxVal, dl);
+ return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+ }
+
+ if (Subtarget.hasSSE41()) {
if (EltVT == MVT::f32) {
// Bits [7:6] of the constant are the source select. This will always be
// zero here. The DAG Combiner may combine an extract_elt index into
@@ -13885,36 +14362,29 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
}
- if (EltVT == MVT::i32 || EltVT == MVT::i64) {
- // PINSR* works with constant index.
+ // PINSR* works with constant index.
+ if (EltVT == MVT::i32 || EltVT == MVT::i64)
return Op;
- }
}
- if (EltVT == MVT::i8)
- return SDValue();
-
- if (EltVT.getSizeInBits() == 16) {
- // Transform it so it match pinsrw which expects a 16-bit value in a GR32
- // as its second argument.
- if (N1.getValueType() != MVT::i32)
- N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
- if (N2.getValueType() != MVT::i32)
- N2 = DAG.getIntPtrConstant(IdxVal, dl);
- return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
- }
return SDValue();
}
-static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
SDLoc dl(Op);
MVT OpVT = Op.getSimpleValueType();
+ // It's always cheaper to replace a xor+movd with xorps and simplifies further
+ // combines.
+ if (X86::isZeroNode(Op.getOperand(0)))
+ return getZeroVector(OpVT, Subtarget, DAG, dl);
+
// If this is a 256-bit vector result, first insert into a 128-bit
// vector and then insert into the 256-bit vector.
if (!OpVT.is128BitVector()) {
// Insert into a 128-bit vector.
- unsigned SizeFactor = OpVT.getSizeInBits()/128;
+ unsigned SizeFactor = OpVT.getSizeInBits() / 128;
MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(),
OpVT.getVectorNumElements() / SizeFactor);
@@ -13923,9 +14393,13 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
// Insert the 128-bit vector.
return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
}
+ assert(OpVT.is128BitVector() && "Expected an SSE type!");
+
+ // Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
+ if (OpVT == MVT::v4i32)
+ return Op;
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
- assert(OpVT.is128BitVector() && "Expected an SSE type!");
return DAG.getBitcast(
OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt));
}
@@ -13947,20 +14421,14 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
In.getSimpleValueType().is512BitVector()) &&
"Can only extract from 256-bit or 512-bit vectors");
- if (ResVT.is128BitVector())
- return extract128BitVector(In, IdxVal, DAG, dl);
- if (ResVT.is256BitVector())
- return extract256BitVector(In, IdxVal, DAG, dl);
-
- llvm_unreachable("Unimplemented!");
-}
+ // If the input is a buildvector just emit a smaller one.
+ unsigned ElemsPerChunk = ResVT.getVectorNumElements();
+ if (In.getOpcode() == ISD::BUILD_VECTOR)
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, ResVT,
+ makeArrayRef(In->op_begin() + IdxVal, ElemsPerChunk));
-static bool areOnlyUsersOf(SDNode *N, ArrayRef<SDValue> ValidUsers) {
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I)
- if (llvm::all_of(ValidUsers,
- [&I](SDValue V) { return V.getNode() != *I; }))
- return false;
- return true;
+ // Everything else is legal.
+ return Op;
}
// Lower a node with an INSERT_SUBVECTOR opcode. This may result in a
@@ -13968,83 +14436,9 @@ static bool areOnlyUsersOf(SDNode *N, ArrayRef<SDValue> ValidUsers) {
// the upper bits of a vector.
static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Subtarget.hasAVX() && "INSERT_SUBVECTOR requires AVX");
-
- SDLoc dl(Op);
- SDValue Vec = Op.getOperand(0);
- SDValue SubVec = Op.getOperand(1);
- SDValue Idx = Op.getOperand(2);
-
- unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- MVT OpVT = Op.getSimpleValueType();
- MVT SubVecVT = SubVec.getSimpleValueType();
-
- if (OpVT.getVectorElementType() == MVT::i1)
- return insert1BitVector(Op, DAG, Subtarget);
-
- assert((OpVT.is256BitVector() || OpVT.is512BitVector()) &&
- "Can only insert into 256-bit or 512-bit vectors");
+ assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1);
- // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
- // load:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr + 16), Elts/2)
- // --> load32 addr
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr + 32), Elts/2)
- // --> load64 addr
- // or a 16-byte or 32-byte broadcast:
- // (insert_subvector (insert_subvector undef, (load16 addr), 0),
- // (load16 addr), Elts/2)
- // --> X86SubVBroadcast(load16 addr)
- // or:
- // (insert_subvector (insert_subvector undef, (load32 addr), 0),
- // (load32 addr), Elts/2)
- // --> X86SubVBroadcast(load32 addr)
- if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
- Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
- OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
- auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
- if (Idx2 && Idx2->getZExtValue() == 0) {
- SDValue SubVec2 = Vec.getOperand(1);
- // If needed, look through bitcasts to get to the load.
- if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
- bool Fast;
- unsigned Alignment = FirstLd->getAlignment();
- unsigned AS = FirstLd->getAddressSpace();
- const X86TargetLowering *TLI = Subtarget.getTargetLowering();
- if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- OpVT, AS, Alignment, &Fast) && Fast) {
- SDValue Ops[] = {SubVec2, SubVec};
- if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
- return Ld;
- }
- }
- // If lower/upper loads are the same and the only users of the load, then
- // lower to a VBROADCASTF128/VBROADCASTI128/etc.
- if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
- if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
- areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
- }
- }
- // If this is subv_broadcast insert into both halves, use a larger
- // subv_broadcast.
- if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
- return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
- SubVec.getOperand(0));
- }
- }
- }
-
- if (SubVecVT.is128BitVector())
- return insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
-
- if (SubVecVT.is256BitVector())
- return insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
-
- llvm_unreachable("Unimplemented!");
+ return insert1BitVector(Op, DAG, Subtarget);
}
// Returns the appropriate wrapper opcode for a global reference.
@@ -14062,7 +14456,7 @@ unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
}
// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
-// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// their target counterpart wrapped in the X86ISD::Wrapper node. Suppose N is
// one of the above mentioned nodes. It has to be wrapped because otherwise
// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
// be used to form addressing mode. These wrapped nodes will be selected
@@ -14438,7 +14832,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Subtarget.isTargetWindowsItanium() ||
Subtarget.isTargetWindowsGNU()) {
// Just use the implicit TLS architecture
- // Need to generate someting similar to:
+ // Need to generate something similar to:
// mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
// ; from TEB
// mov ecx, dword [rel _tls_index]: Load index (from C runtime)
@@ -15489,32 +15883,21 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// word to byte only under BWI
if (InVT == MVT::v16i16 && !Subtarget.hasBWI()) // v16i16 -> v16i8
return DAG.getNode(X86ISD::VTRUNC, DL, VT,
- DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
+ getExtendInVec(X86ISD::VSEXT, DL, MVT::v16i32, In, DAG));
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
}
- // Truncate with PACKSS if we are truncating a vector comparison result.
- // TODO: We should be able to support other operations as long as we
- // we are saturating+packing zero/all bits only.
- auto IsPackableComparison = [](SDValue V) {
- unsigned Opcode = V.getOpcode();
- return (Opcode == X86ISD::PCMPGT || Opcode == X86ISD::PCMPEQ ||
- Opcode == X86ISD::CMPP);
- };
-
- if (IsPackableComparison(In) || (In.getOpcode() == ISD::CONCAT_VECTORS &&
- all_of(In->ops(), IsPackableComparison))) {
+ // Truncate with PACKSS if we are truncating a vector zero/all-bits result.
+ if (InVT.getScalarSizeInBits() == DAG.ComputeNumSignBits(In))
if (SDValue V = truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget))
return V;
- }
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
if (Subtarget.hasInt256()) {
static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1};
In = DAG.getBitcast(MVT::v8i32, In);
- In = DAG.getVectorShuffle(MVT::v8i32, DL, In, DAG.getUNDEF(MVT::v8i32),
- ShufMask);
+ In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In,
DAG.getIntPtrConstant(0, DL));
}
@@ -15530,30 +15913,20 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) {
- // On AVX2, v8i32 -> v8i16 becomed PSHUFB.
+ // On AVX2, v8i32 -> v8i16 becomes PSHUFB.
if (Subtarget.hasInt256()) {
In = DAG.getBitcast(MVT::v32i8, In);
- SmallVector<SDValue,32> pshufbMask;
- for (unsigned i = 0; i < 2; ++i) {
- pshufbMask.push_back(DAG.getConstant(0x0, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x1, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x4, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x5, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x8, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0x9, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xc, DL, MVT::i8));
- pshufbMask.push_back(DAG.getConstant(0xd, DL, MVT::i8));
- for (unsigned j = 0; j < 8; ++j)
- pshufbMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
- }
- SDValue BV = DAG.getBuildVector(MVT::v32i8, DL, pshufbMask);
- In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
+ // The PSHUFB mask:
+ static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ 16, 17, 20, 21, 24, 25, 28, 29,
+ -1, -1, -1, -1, -1, -1, -1, -1 };
+ In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1);
In = DAG.getBitcast(MVT::v4i64, In);
- static const int ShufMask[] = {0, 2, -1, -1};
- In = DAG.getVectorShuffle(MVT::v4i64, DL, In, DAG.getUNDEF(MVT::v4i64),
- ShufMask);
+ static const int ShufMask2[] = {0, 2, -1, -1};
+ In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
DAG.getIntPtrConstant(0, DL));
return DAG.getBitcast(VT, In);
@@ -15572,9 +15945,8 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13,
-1, -1, -1, -1, -1, -1, -1, -1};
- SDValue Undef = DAG.getUNDEF(MVT::v16i8);
- OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, Undef, ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, Undef, ShufMask1);
+ OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1);
+ OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1);
OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
@@ -15598,17 +15970,14 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
// Prepare truncation shuffle mask
for (unsigned i = 0; i != NumElems; ++i)
MaskVec[i] = i * 2;
- SDValue V = DAG.getVectorShuffle(NVT, DL, DAG.getBitcast(NVT, In),
- DAG.getUNDEF(NVT), MaskVec);
+ In = DAG.getBitcast(NVT, In);
+ SDValue V = DAG.getVectorShuffle(NVT, DL, In, In, MaskVec);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
DAG.getIntPtrConstant(0, DL));
}
-SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
-
MVT VT = Op.getSimpleValueType();
if (VT.isVector()) {
@@ -15616,8 +15985,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
SDValue Src = Op.getOperand(0);
SDLoc dl(Op);
if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
- return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI,
- dl, VT,
+ return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32)));
}
@@ -15891,7 +16259,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
VecIns[i] = DAG.getBitcast(TestVT, VecIns[i]);
- // If more than one full vectors are evaluated, OR them first before PTEST.
+ // If more than one full vector is evaluated, OR them first before PTEST.
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; Slot += 2, e += 1) {
// Each iteration will OR 2 nodes and append the result until there is only
// 1 node left, i.e. the final OR'd value of all vectors.
@@ -15900,8 +16268,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget &Subtarget,
VecIns.push_back(DAG.getNode(ISD::OR, DL, TestVT, LHS, RHS));
}
- return DAG.getNode(X86ISD::PTEST, DL, MVT::i32,
- VecIns.back(), VecIns.back());
+ return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, VecIns.back(), VecIns.back());
}
/// \brief return true if \c Op has a use that doesn't just read flags.
@@ -16366,7 +16733,7 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
}
/// If we have at least two divisions that use the same divisor, convert to
-/// multplication by a reciprocal. This may need to be adjusted for a given
+/// multiplication by a reciprocal. This may need to be adjusted for a given
/// CPU if a division's cost is not at least twice the cost of a multiplication.
/// This is because we still need one division to calculate the reciprocal and
/// then we need two multiplies by that reciprocal as replacements for the
@@ -17241,12 +17608,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
// (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
// (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
+ // (select (and (x , 0x1) == 0), y, (z ^ y) ) -> (-(and (x , 0x1)) & z ) ^ y
+ // (select (and (x , 0x1) == 0), y, (z | y) ) -> (-(and (x , 0x1)) & z ) | y
if (Cond.getOpcode() == X86ISD::SETCC &&
Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
isNullConstant(Cond.getOperand(1).getOperand(1))) {
SDValue Cmp = Cond.getOperand(1);
-
- unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+ unsigned CondCode =
+ cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
(CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
@@ -17283,6 +17652,43 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (!isNullConstant(Op2))
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
+ } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E &&
+ Cmp.getOperand(0).getOpcode() == ISD::AND &&
+ isOneConstant(Cmp.getOperand(0).getOperand(1))) {
+ SDValue CmpOp0 = Cmp.getOperand(0);
+ SDValue Src1, Src2;
+ // true if Op2 is XOR or OR operator and one of its operands
+ // is equal to Op1
+ // ( a , a op b) || ( b , a op b)
+ auto isOrXorPattern = [&]() {
+ if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) &&
+ (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) {
+ Src1 =
+ Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0);
+ Src2 = Op1;
+ return true;
+ }
+ return false;
+ };
+
+ if (isOrXorPattern()) {
+ SDValue Neg;
+ unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits();
+ // we need mask of all zeros or ones with same size of the other
+ // operands.
+ if (CmpSz > VT.getSizeInBits())
+ Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0);
+ else if (CmpSz < VT.getSizeInBits())
+ Neg = DAG.getNode(ISD::AND, DL, VT,
+ DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)),
+ DAG.getConstant(1, DL, VT));
+ else
+ Neg = CmpOp0;
+ SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Neg); // -(and (x, 0x1))
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); // Mask & z
+ return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); // And Op y
+ }
}
}
@@ -17423,17 +17829,10 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
// SKX processor
if ((InVTElt == MVT::i1) &&
- (((Subtarget.hasBWI() && Subtarget.hasVLX() &&
- VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) ||
-
- ((Subtarget.hasBWI() && VT.is512BitVector() &&
- VTElt.getSizeInBits() <= 16)) ||
+ (((Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16)) ||
- ((Subtarget.hasDQI() && Subtarget.hasVLX() &&
- VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
+ ((Subtarget.hasDQI() && VTElt.getSizeInBits() >= 32))))
- ((Subtarget.hasDQI() && VT.is512BitVector() &&
- VTElt.getSizeInBits() >= 32))))
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
unsigned NumElts = VT.getVectorNumElements();
@@ -17441,8 +17840,8 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
if (VT.is512BitVector() && InVTElt != MVT::i1 &&
(NumElts == 8 || NumElts == 16 || Subtarget.hasBWI())) {
if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT)
- return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0));
- return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+ return getExtendInVec(In.getOpcode(), dl, VT, In.getOperand(0), DAG);
+ return getExtendInVec(X86ISD::VSEXT, dl, VT, In, DAG);
}
if (InVTElt != MVT::i1)
@@ -17454,10 +17853,10 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
SDValue V;
if (Subtarget.hasDQI()) {
- V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In);
+ V = getExtendInVec(X86ISD::VSEXT, dl, ExtVT, In, DAG);
assert(!VT.is512BitVector() && "Unexpected vector type");
} else {
- SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl);
+ SDValue NegOne = getOnesVector(ExtVT, DAG, dl);
SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl);
V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
if (ExtVT == VT)
@@ -17506,11 +17905,15 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
- // SSE41 targets can use the pmovsx* instructions directly.
- unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
- X86ISD::VSEXT : X86ISD::VZEXT;
- if (Subtarget.hasSSE41())
+ // SSE41 targets can use the pmovsx* instructions directly for 128-bit results,
+ // so are legal and shouldn't occur here. AVX2/AVX512 pmovsx* instructions still
+ // need to be handled here for 256/512-bit results.
+ if (Subtarget.hasInt256()) {
+ assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension");
+ unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
+ X86ISD::VSEXT : X86ISD::VZEXT;
return DAG.getNode(ExtOpc, dl, VT, In);
+ }
// We should only get here for sign extend.
assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
@@ -17595,8 +17998,8 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
VT.getVectorNumElements() / 2);
- OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
- OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
+ OpLo = DAG.getSignExtendVectorInReg(OpLo, dl, HalfVT);
+ OpHi = DAG.getSignExtendVectorInReg(OpHi, dl, HalfVT);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
}
@@ -17674,7 +18077,8 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
MVT VT = Op.getValueType().getSimpleVT();
unsigned NumElts = VT.getVectorNumElements();
- if ((Subtarget.hasVLX() && Subtarget.hasBWI() && Subtarget.hasDQI()) ||
+ if ((Subtarget.hasBWI() && NumElts >= 32) ||
+ (Subtarget.hasDQI() && NumElts < 16) ||
NumElts == 16) {
// Load and extend - everything is legal
if (NumElts < 8) {
@@ -17703,7 +18107,7 @@ static SDValue LowerExtended1BitVectorLoad(SDValue Op,
if (NumElts <= 8) {
// A subset, assume that we have only AVX-512F
- unsigned NumBitsToLoad = NumElts < 8 ? 8 : NumElts;
+ unsigned NumBitsToLoad = 8;
MVT TypeToLoad = MVT::getIntegerVT(NumBitsToLoad);
SDValue Load = DAG.getLoad(TypeToLoad, dl, Ld->getChain(),
Ld->getBasePtr(),
@@ -17911,7 +18315,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget,
if (Ext == ISD::SEXTLOAD) {
// If we have SSE4.1, we can directly emit a VSEXT node.
if (Subtarget.hasSSE41()) {
- SDValue Sext = DAG.getNode(X86ISD::VSEXT, dl, RegVT, SlicedVec);
+ SDValue Sext = getExtendInVec(X86ISD::VSEXT, dl, RegVT, SlicedVec, DAG);
DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
return Sext;
}
@@ -18469,6 +18873,11 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
SelectionDAG &DAG) {
MVT ElementType = VT.getVectorElementType();
+ // Bitcast the source vector to the output type, this is mainly necessary for
+ // vXi8/vXi64 shifts.
+ if (VT != SrcOp.getSimpleValueType())
+ SrcOp = DAG.getBitcast(VT, SrcOp);
+
// Fold this packed shift into its first operand if ShiftAmt is 0.
if (ShiftAmt == 0)
return SrcOp;
@@ -18485,9 +18894,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
&& "Unknown target vector shift-by-constant node");
// Fold this packed vector shift into a build vector if SrcOp is a
- // vector of Constants or UNDEFs, and SrcOp valuetype is the same as VT.
- if (VT == SrcOp.getSimpleValueType() &&
- ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
+ // vector of Constants or UNDEFs.
+ if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) {
SmallVector<SDValue, 8> Elts;
unsigned NumElts = SrcOp->getNumOperands();
ConstantSDNode *ND;
@@ -18578,11 +18986,11 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
ShAmt.getOperand(0).getSimpleValueType() == MVT::i16) {
ShAmt = ShAmt.getOperand(0);
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v8i16, ShAmt);
- ShAmt = DAG.getNode(X86ISD::VZEXT, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else if (Subtarget.hasSSE41() &&
ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt);
- ShAmt = DAG.getNode(X86ISD::VZEXT, SDLoc(ShAmt), MVT::v2i64, ShAmt);
+ ShAmt = DAG.getZeroExtendVectorInReg(ShAmt, SDLoc(ShAmt), MVT::v2i64);
} else {
SmallVector<SDValue, 4> ShOps = {ShAmt, DAG.getConstant(0, dl, SVT),
DAG.getUNDEF(SVT), DAG.getUNDEF(SVT)};
@@ -18853,6 +19261,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
SDValue Src2 = Op.getOperand(2);
SDValue passThru = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
+ unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+ if (IntrWithRoundingModeOpcode != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ if (!isRoundModeCurDirection(Rnd))
+ return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+ dl, VT, Src1, Src2, Rnd),
+ Mask, passThru, Subtarget, DAG);
+ }
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
Mask, passThru, Subtarget, DAG);
}
@@ -19306,6 +19722,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
Src2, Src1);
return DAG.getBitcast(VT, Res);
}
+ case MASK_BINOP: {
+ MVT VT = Op.getSimpleValueType();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
+
+ SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
+ SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
+ SDValue Res = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Src2);
+ return DAG.getBitcast(VT, Res);
+ }
case FIXUPIMMS:
case FIXUPIMMS_MASKZ:
case FIXUPIMM:
@@ -19478,6 +19903,33 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
+ case Intrinsic::x86_avx512_knot_w: {
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getConstant(1, dl, MVT::v16i1);
+ SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
+ return DAG.getBitcast(MVT::i16, Res);
+ }
+
+ case Intrinsic::x86_avx512_kandn_w: {
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ // Invert LHS for the not.
+ LHS = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS,
+ DAG.getConstant(1, dl, MVT::v16i1));
+ SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
+ SDValue Res = DAG.getNode(ISD::AND, dl, MVT::v16i1, LHS, RHS);
+ return DAG.getBitcast(MVT::i16, Res);
+ }
+
+ case Intrinsic::x86_avx512_kxnor_w: {
+ SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
+ SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
+ SDValue Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, LHS, RHS);
+ // Invert result for the not.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v16i1, Res,
+ DAG.getConstant(1, dl, MVT::v16i1));
+ return DAG.getBitcast(MVT::i16, Res);
+ }
+
case Intrinsic::x86_sse42_pcmpistria128:
case Intrinsic::x86_sse42_pcmpestria128:
case Intrinsic::x86_sse42_pcmpistric128:
@@ -19603,6 +20055,28 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
}
}
+static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain,
+ const X86Subtarget &Subtarget) {
+ SDLoc dl(Op);
+ auto *C = cast<ConstantSDNode>(ScaleOp);
+ SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
+ EVT MaskVT = Mask.getValueType();
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
+ SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
+ SDValue Segment = DAG.getRegister(0, MVT::i32);
+ // If source is undef or we know it won't be used, use a zero vector
+ // to break register dependency.
+ // TODO: use undef instead and let ExecutionDepsFix deal with it?
+ if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
+ Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
+ SDValue Ops[] = {Src, Base, Scale, Index, Disp, Segment, Mask, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
+ SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
+ return DAG.getMergeValues(RetOps, dl);
+}
+
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
@@ -19617,7 +20091,10 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- if (Src.isUndef())
+ // If source is undef or we know it won't be used, use a zero vector
+ // to break register dependency.
+ // TODO: use undef instead and let ExecutionDepsFix deal with it?
+ if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
@@ -19656,7 +20133,6 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
MVT MaskVT =
MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- //SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
return SDValue(Res, 0);
@@ -19928,6 +20404,16 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
+ case GATHER_AVX2: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Src = Op.getOperand(2);
+ SDValue Base = Op.getOperand(3);
+ SDValue Index = Op.getOperand(4);
+ SDValue Mask = Op.getOperand(5);
+ SDValue Scale = Op.getOperand(6);
+ return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index,
+ Scale, Chain, Subtarget);
+ }
case GATHER: {
//gather(v1, mask, index, base, scale);
SDValue Chain = Op.getOperand(0);
@@ -19953,8 +20439,9 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
- assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1");
- unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
+ assert((HintVal == 2 || HintVal == 3) &&
+ "Wrong prefetch hint in intrinsic: should be 2 or 3");
+ unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
SDValue Index = Op.getOperand(3);
@@ -20368,7 +20855,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
// Check that ECX wasn't needed by an 'inreg' parameter.
FunctionType *FTy = Func->getFunctionType();
- const AttributeSet &Attrs = Func->getAttributes();
+ const AttributeList &Attrs = Func->getAttributes();
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
@@ -20802,9 +21289,10 @@ static SDValue Lower512IntArith(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
-static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
- if (Op.getValueType() == MVT::i1)
- return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
+static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getScalarType() == MVT::i1)
+ return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
@@ -20812,14 +21300,23 @@ static SDValue LowerADD(SDValue Op, SelectionDAG &DAG) {
return Lower256IntArith(Op, DAG);
}
-static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
- if (Op.getValueType() == MVT::i1)
- return DAG.getNode(ISD::XOR, SDLoc(Op), Op.getValueType(),
- Op.getOperand(0), Op.getOperand(1));
+static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- return Lower256IntArith(Op, DAG);
+ MVT VT = Op.getSimpleValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ SDLoc dl(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue Lo = extract128BitVector(Src, 0, DAG, dl);
+ SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl);
+
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(ISD::ABS, dl, NewVT, Lo),
+ DAG.getNode(ISD::ABS, dl, NewVT, Hi));
}
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
@@ -20834,7 +21331,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
- if (VT == MVT::i1)
+ if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), Op.getOperand(1));
// Decompose 256-bit ops into smaller 128-bit ops.
@@ -20874,8 +21371,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Extract the lo parts and sign extend to i16
SDValue ALo, BLo;
if (Subtarget.hasSSE41()) {
- ALo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, A);
- BLo = DAG.getNode(X86ISD::VSEXT, dl, ExVT, B);
+ ALo = DAG.getSignExtendVectorInReg(A, dl, ExVT);
+ BLo = DAG.getSignExtendVectorInReg(B, dl, ExVT);
} else {
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-1, 4, -1, 5, -1, 6, -1, 7};
@@ -20894,8 +21391,8 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
-1, -1, -1, -1, -1, -1, -1, -1};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, AHi);
- BHi = DAG.getNode(X86ISD::VSEXT, dl, ExVT, BHi);
+ AHi = DAG.getSignExtendVectorInReg(AHi, dl, ExVT);
+ BHi = DAG.getSignExtendVectorInReg(BHi, dl, ExVT);
} else {
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
-1, 12, -1, 13, -1, 14, -1, 15};
@@ -21056,8 +21553,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask));
}
- SDValue ExA = DAG.getNode(ExSSE41, dl, MVT::v16i16, A);
- SDValue ExB = DAG.getNode(ExSSE41, dl, MVT::v16i16, B);
+ SDValue ExA = getExtendInVec(ExSSE41, dl, MVT::v16i16, A, DAG);
+ SDValue ExB = getExtendInVec(ExSSE41, dl, MVT::v16i16, B, DAG);
SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB);
SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul,
DAG.getConstant(8, dl, MVT::v16i16));
@@ -21073,8 +21570,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// Extract the lo parts and zero/sign extend to i16.
SDValue ALo, BLo;
if (Subtarget.hasSSE41()) {
- ALo = DAG.getNode(ExSSE41, dl, ExVT, A);
- BLo = DAG.getNode(ExSSE41, dl, ExVT, B);
+ ALo = getExtendInVec(ExSSE41, dl, ExVT, A, DAG);
+ BLo = getExtendInVec(ExSSE41, dl, ExVT, B, DAG);
} else {
const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-1, 4, -1, 5, -1, 6, -1, 7};
@@ -21093,8 +21590,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
-1, -1, -1, -1, -1, -1, -1, -1};
AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
- AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi);
- BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi);
+ AHi = getExtendInVec(ExSSE41, dl, ExVT, AHi, DAG);
+ BHi = getExtendInVec(ExSSE41, dl, ExVT, BHi, DAG);
} else {
const int ShufMask[] = {-1, 8, -1, 9, -1, 10, -1, 11,
-1, 12, -1, 13, -1, 14, -1, 15};
@@ -21148,8 +21645,8 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
MachinePointerInfo(), /* Alignment = */ 16);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Entry.Ty = PointerType::get(ArgTy,0);
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
}
@@ -21157,11 +21654,15 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
getPointerTy(DAG.getDataLayout()));
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(getLibcallCallingConv(LC),
- static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()),
- Callee, std::move(Args))
- .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
+ CLI.setDebugLoc(dl)
+ .setChain(InChain)
+ .setLibCallee(
+ getLibcallCallingConv(LC),
+ static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee,
+ std::move(Args))
+ .setInRegister()
+ .setSExtResult(isSigned)
+ .setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return DAG.getBitcast(VT, CallInfo.first);
@@ -21269,15 +21770,15 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
if (VT.getScalarSizeInBits() < 16)
return false;
- if (VT.is512BitVector() &&
+ if (VT.is512BitVector() && Subtarget.hasAVX512() &&
(VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI()))
return true;
- bool LShift = VT.is128BitVector() ||
- (VT.is256BitVector() && Subtarget.hasInt256());
+ bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (VT.is256BitVector() && Subtarget.hasInt256());
- bool AShift = LShift && (Subtarget.hasVLX() ||
- (VT != MVT::v2i64 && VT != MVT::v4i64));
+ bool AShift = LShift && (Subtarget.hasAVX512() ||
+ (VT != MVT::v2i64 && VT != MVT::v4i64));
return (Opcode == ISD::SRA) ? AShift : LShift;
}
@@ -21301,7 +21802,7 @@ static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI())
return false;
- if (VT.is512BitVector() || Subtarget.hasVLX())
+ if (Subtarget.hasAVX512())
return true;
bool LShift = VT.is128BitVector() || VT.is256BitVector();
@@ -22062,10 +22563,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
if (isOneConstant(RHS)) {
- BaseOp = X86ISD::INC;
- Cond = X86::COND_O;
- break;
- }
+ BaseOp = X86ISD::INC;
+ Cond = X86::COND_O;
+ break;
+ }
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
@@ -22077,10 +22578,10 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
if (isOneConstant(RHS)) {
- BaseOp = X86ISD::DEC;
- Cond = X86::COND_O;
- break;
- }
+ BaseOp = X86ISD::DEC;
+ Cond = X86::COND_O;
+ break;
+ }
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
@@ -22470,7 +22971,7 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
// index into a in-register pre-computed pop count table. We then split up the
// input vector in two new ones: (1) a vector with only the shifted-right
// higher nibbles for each byte and (2) a vector with the lower nibbles (and
- // masked out higher ones) for each byte. PSHUB is used separately with both
+ // masked out higher ones) for each byte. PSHUFB is used separately with both
// to index the in-register table. Next, both are added and the result is a
// i8 vector where each element contains the pop count for input byte.
//
@@ -22867,8 +23368,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
Entry.Node = Arg;
Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
Args.push_back(Entry);
bool isF64 = ArgVT == MVT::f64;
@@ -22885,8 +23386,9 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget,
: (Type*)VectorType::get(ArgTy, 4);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, RetTy, Callee, std::move(Args));
+ CLI.setDebugLoc(dl)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -23086,7 +23588,7 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
// Mask element has to be i1.
MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
- "We handle 4x32, 4x64 and 2x64 vectors only in this casse");
+ "We handle 4x32, 4x64 and 2x64 vectors only in this case");
MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
@@ -23142,7 +23644,7 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
// Mask element has to be i1.
MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
- "We handle 4x32, 4x64 and 2x64 vectors only in this casse");
+ "We handle 4x32, 4x64 and 2x64 vectors only in this case");
MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
@@ -23202,7 +23704,7 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
- // The pass-thru value
+ // The pass-through value
MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
Src0 = ExtendToType(Src0, NewVT, DAG);
@@ -23284,7 +23786,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG);
- case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -23303,7 +23805,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SIGN_EXTEND_VECTOR_INREG:
return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, Subtarget, DAG);
+ case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
case ISD::LOAD: return LowerExtendedLoad(Op, Subtarget, DAG);
case ISD::FABS:
@@ -23360,12 +23862,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
- case ISD::ADD: return LowerADD(Op, DAG);
- case ISD::SUB: return LowerSUB(Op, DAG);
+ case ISD::ADD:
+ case ISD::SUB: return LowerADD_SUB(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, DAG);
+ case ISD::ABS: return LowerABS(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG);
@@ -23768,7 +24271,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
- case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
@@ -23779,16 +24281,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
- case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::CONFLICT: return "X86ISD::CONFLICT";
case X86ISD::FMAX: return "X86ISD::FMAX";
+ case X86ISD::FMAXS: return "X86ISD::FMAXS";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
+ case X86ISD::FMAXS_RND: return "X86ISD::FMAX_RND";
case X86ISD::FMIN: return "X86ISD::FMIN";
+ case X86ISD::FMINS: return "X86ISD::FMINS";
case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND";
+ case X86ISD::FMINS_RND: return "X86ISD::FMINS_RND";
case X86ISD::FMAXC: return "X86ISD::FMAXC";
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
- case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
+ case X86ISD::FRSQRTS: return "X86ISD::FRSQRTS";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::FRCPS: return "X86ISD::FRCPS";
case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
@@ -23827,7 +24332,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
- case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
case X86ISD::VFPEXTS_RND: return "X86ISD::VFPEXTS_RND";
@@ -23876,6 +24380,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
case X86ISD::KTEST: return "X86ISD::KTEST";
+ case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL";
+ case X86ISD::KSHIFTR: return "X86ISD::KSHIFTR";
case X86ISD::PACKSS: return "X86ISD::PACKSS";
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
@@ -23976,9 +24482,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::RSQRT28: return "X86ISD::RSQRT28";
case X86ISD::RSQRT28S: return "X86ISD::RSQRT28S";
case X86ISD::FADD_RND: return "X86ISD::FADD_RND";
+ case X86ISD::FADDS_RND: return "X86ISD::FADDS_RND";
case X86ISD::FSUB_RND: return "X86ISD::FSUB_RND";
+ case X86ISD::FSUBS_RND: return "X86ISD::FSUBS_RND";
case X86ISD::FMUL_RND: return "X86ISD::FMUL_RND";
+ case X86ISD::FMULS_RND: return "X86ISD::FMULS_RND";
case X86ISD::FDIV_RND: return "X86ISD::FDIV_RND";
+ case X86ISD::FDIVS_RND: return "X86ISD::FDIVS_RND";
case X86ISD::FSQRT_RND: return "X86ISD::FSQRT_RND";
case X86ISD::FSQRTS_RND: return "X86ISD::FSQRTS_RND";
case X86ISD::FGETEXP_RND: return "X86ISD::FGETEXP_RND";
@@ -24302,7 +24812,7 @@ static MachineBasicBlock *emitPCMPSTRM(MachineInstr &MI, MachineBasicBlock *BB,
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
- MIB.addOperand(Op);
+ MIB.add(Op);
}
if (MI.hasOneMemOperand())
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -24338,7 +24848,7 @@ static MachineBasicBlock *emitPCMPSTRI(MachineInstr &MI, MachineBasicBlock *BB,
for (unsigned i = 1; i < NumArgs; ++i) {
MachineOperand &Op = MI.getOperand(i);
if (!(Op.isReg() && Op.isImplicit()))
- MIB.addOperand(Op);
+ MIB.add(Op);
}
if (MI.hasOneMemOperand())
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -24398,7 +24908,7 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
unsigned ValOps = X86::AddrNumOperands;
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
@@ -24413,6 +24923,26 @@ static MachineBasicBlock *emitMonitor(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
+static MachineBasicBlock *emitClzero(MachineInstr *MI, MachineBasicBlock *BB,
+ const X86Subtarget &Subtarget) {
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ // Address into RAX/EAX
+ unsigned MemOpc = Subtarget.is64Bit() ? X86::LEA64r : X86::LEA32r;
+ unsigned MemReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+ for (int i = 0; i < X86::AddrNumOperands; ++i)
+ MIB.add(MI->getOperand(i));
+
+ // The instruction doesn't actually take any operands though.
+ BuildMI(*BB, MI, dl, TII->get(X86::CLZEROr));
+
+ MI->eraseFromParent(); // The pseudo is gone now.
+ return BB;
+}
+
+
+
MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
@@ -24536,12 +25066,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Load the offset value into a register
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, UseFPOffset ? 4 : 0)
- .addOperand(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .add(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
@@ -24561,12 +25091,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Read the reg_save_area address.
unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, 16)
- .addOperand(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, 16)
+ .add(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
@@ -24588,13 +25118,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Store it back into the va_list.
BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, UseFPOffset ? 4 : 0)
- .addOperand(Segment)
- .addReg(NextOffsetReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, UseFPOffset ? 4 : 0)
+ .add(Segment)
+ .addReg(NextOffsetReg)
+ .setMemRefs(MMOBegin, MMOEnd);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
@@ -24608,12 +25138,12 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Load the overflow_area address into a register.
unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, 8)
- .addOperand(Segment)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, 8)
+ .add(Segment)
+ .setMemRefs(MMOBegin, MMOEnd);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
@@ -24644,13 +25174,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr &MI,
// Store the new overflow address.
BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
- .addOperand(Base)
- .addOperand(Scale)
- .addOperand(Index)
- .addDisp(Disp, 8)
- .addOperand(Segment)
- .addReg(NextAddrReg)
- .setMemRefs(MMOBegin, MMOEnd);
+ .add(Base)
+ .add(Scale)
+ .add(Index)
+ .addDisp(Disp, 8)
+ .add(Segment)
+ .addReg(NextAddrReg)
+ .setMemRefs(MMOBegin, MMOEnd);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
@@ -24867,7 +25397,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
//
// (CMOV (CMOV F, T, cc1), T, cc2)
//
- // to two successives branches. For that, we look for another CMOV as the
+ // to two successive branches. For that, we look for another CMOV as the
// following instruction.
//
// Without this, we would add a PHI between the two jumps, which ends up
@@ -25123,12 +25653,12 @@ X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
// instruction using the same address operands.
if (Operand.isReg())
Operand.setIsKill(false);
- MIB.addOperand(Operand);
+ MIB.add(Operand);
}
MachineInstr *FOpMI = MIB;
MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
for (int i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -25508,7 +26038,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
else
- MIB.addOperand(MI.getOperand(MemOpndSlot + i));
+ MIB.add(MI.getOperand(MemOpndSlot + i));
}
if (!UseImmLabel)
MIB.addReg(LabelReg);
@@ -25591,7 +26121,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
// Reload FP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i)
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload IP
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
@@ -25599,7 +26129,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), LabelOffset);
else
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload SP
@@ -25608,7 +26138,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
else
- MIB.addOperand(MI.getOperand(i));
+ MIB.add(MI.getOperand(i));
}
MIB.setMemRefs(MMOBegin, MMOEnd);
// Jump
@@ -25625,7 +26155,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
DebugLoc DL = MI.getDebugLoc();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
@@ -25644,8 +26174,6 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
VR = MRI->createVirtualRegister(TRC);
Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
- /* const X86InstrInfo *XII = static_cast<const X86InstrInfo *>(TII); */
-
if (Subtarget.is64Bit())
BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR)
.addReg(X86::RIP)
@@ -25655,7 +26183,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
.addReg(0);
else
BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR)
- .addReg(0) /* XII->getGlobalBaseReg(MF) */
+ .addReg(0) /* TII->getGlobalBaseReg(MF) */
.addImm(1)
.addReg(0)
.addMBB(DispatchBB, Subtarget.classifyBlockAddressReference())
@@ -25677,7 +26205,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineFunction *MF = BB->getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const X86InstrInfo *TII = Subtarget.getInstrInfo();
int FI = MFI.getFunctionContextIndex();
// Get a mapping of the call site numbers to all of the landing pads they're
@@ -25749,9 +26277,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MF->getOrCreateJumpTableInfo(getJumpTableEncoding());
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
- const X86InstrInfo *XII = static_cast<const X86InstrInfo *>(TII);
- const X86RegisterInfo &RI = XII->getRegisterInfo();
-
+ const X86RegisterInfo &RI = TII->getRegisterInfo();
// Add a register mask with no preserved registers. This results in all
// registers being marked as clobbered.
if (RI.hasBasePointer(*MF)) {
@@ -25799,8 +26325,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// N.B. the order the invoke BBs are processed in doesn't matter here.
SmallVector<MachineBasicBlock *, 64> MBBLPads;
- const MCPhysReg *SavedRegs =
- Subtarget.getRegisterInfo()->getCalleeSavedRegs(MF);
+ const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
for (MachineBasicBlock *MBB : InvokeBBs) {
// Remove the landing pad successor from the invoke block and replace it
// with the new dispatch block.
@@ -26033,6 +26558,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitMonitor(MI, BB, Subtarget, X86::MONITORrrr);
case X86::MONITORX:
return emitMonitor(MI, BB, Subtarget, X86::MONITORXrrr);
+
+ // Cache line zero
+ case X86::CLZERO:
+ return emitClzero(&MI, BB, Subtarget);
+
// PKU feature
case X86::WRPKRU:
return emitWRPKRU(MI, BB, Subtarget);
@@ -26137,10 +26667,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
@@ -26167,44 +26699,91 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
LLVM_FALLTHROUGH;
case X86ISD::SETCC:
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ KnownZero.setBits(1, BitWidth);
break;
case X86ISD::MOVMSK: {
unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements();
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
+ KnownZero.setBits(NumLoBits, BitWidth);
+ break;
+ }
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI: {
+ if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) {
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ break;
+ }
+
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth + 1);
+ unsigned ShAmt = ShiftImm->getZExtValue();
+ if (Opc == X86ISD::VSHLI) {
+ KnownZero = KnownZero << ShAmt;
+ KnownOne = KnownOne << ShAmt;
+ // Low bits are known zero.
+ KnownZero.setLowBits(ShAmt);
+ } else {
+ KnownZero = KnownZero.lshr(ShAmt);
+ KnownOne = KnownOne.lshr(ShAmt);
+ // High bits are known zero.
+ KnownZero.setHighBits(ShAmt);
+ }
+ }
break;
}
case X86ISD::VZEXT: {
SDValue N0 = Op.getOperand(0);
- unsigned NumElts = Op.getValueType().getVectorNumElements();
- unsigned InNumElts = N0.getValueType().getVectorNumElements();
- unsigned InBitWidth = N0.getValueType().getScalarSizeInBits();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT SrcVT = N0.getValueType();
+ unsigned InNumElts = SrcVT.getVectorNumElements();
+ unsigned InBitWidth = SrcVT.getScalarSizeInBits();
+ assert(InNumElts >= NumElts && "Illegal VZEXT input");
KnownZero = KnownOne = APInt(InBitWidth, 0);
- APInt DemandedElts = APInt::getLowBitsSet(InNumElts, NumElts);
- DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+ APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
+ DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedSrcElts, Depth + 1);
KnownOne = KnownOne.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - InBitWidth);
+ KnownZero.setBits(InBitWidth, BitWidth);
break;
}
}
}
unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
- SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
- // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
- if (Op.getOpcode() == X86ISD::SETCC_CARRY)
- return Op.getScalarValueSizeInBits();
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ unsigned Depth) const {
+ unsigned VTBits = Op.getScalarValueSizeInBits();
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case X86ISD::SETCC_CARRY:
+ // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+ return VTBits;
- if (Op.getOpcode() == X86ISD::VSEXT) {
- EVT VT = Op.getValueType();
- EVT SrcVT = Op.getOperand(0).getValueType();
- unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
- Tmp += VT.getScalarSizeInBits() - SrcVT.getScalarSizeInBits();
+ case X86ISD::VSEXT: {
+ SDValue Src = Op.getOperand(0);
+ unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+ Tmp += VTBits - Src.getScalarValueSizeInBits();
return Tmp;
}
+ case X86ISD::VSRAI: {
+ SDValue Src = Op.getOperand(0);
+ unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1);
+ APInt ShiftVal = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ ShiftVal += Tmp;
+ return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
+ }
+
+ case X86ISD::PCMPGT:
+ case X86ISD::PCMPEQ:
+ case X86ISD::CMPP:
+ case X86ISD::VPCOM:
+ case X86ISD::VPCOMU:
+ // Vector compares return zero/all-bits result values.
+ return VTBits;
+ }
+
// Fallback case.
return 1;
}
@@ -26228,24 +26807,17 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
- // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
- if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
- isUndefOrEqual(Mask[0], 0) &&
- isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
- Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
- return true;
- }
-
- // Match against a VZEXT instruction.
- // TODO: Add 256/512-bit vector support.
- if (!FloatDomain && MaskVT.is128BitVector() && Subtarget.hasSSE41()) {
+ // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction.
+ // TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
unsigned MaxScale = 64 / MaskEltSize;
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
bool Match = true;
@@ -26255,19 +26827,32 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
}
if (Match) {
- SrcVT = MaskVT;
+ unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
+ SrcVT = MVT::getVectorVT(MaskVT.getScalarType(), SrcSize / MaskEltSize);
+ if (SrcVT != MaskVT)
+ V1 = extractSubVector(V1, 0, DAG, DL, SrcSize);
DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
DstVT = MVT::getVectorVT(DstVT, NumDstElts);
- Shuffle = X86ISD::VZEXT;
+ Shuffle = SrcVT != MaskVT ? unsigned(X86ISD::VZEXT)
+ : unsigned(ISD::ZERO_EXTEND_VECTOR_INREG);
return true;
}
}
}
+ // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
+ if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
+ isUndefOrEqual(Mask[0], 0) &&
+ isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
+ Shuffle = X86ISD::VZEXT_MOVL;
+ SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ return true;
+ }
+
// Check if we have SSE3 which will let us use MOVDDUP etc. The
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
- if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && FloatDomain) {
+ if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
if (isTargetShuffleEquivalent(Mask, {0, 0})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
@@ -26285,7 +26870,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- if (MaskVT.is256BitVector() && FloatDomain) {
+ if (MaskVT.is256BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
Shuffle = X86ISD::MOVDDUP;
@@ -26304,7 +26889,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- if (MaskVT.is512BitVector() && FloatDomain) {
+ if (MaskVT.is512BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX512() &&
"AVX512 required for 512-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
@@ -26343,24 +26928,26 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
+ bool AllowFloatDomain,
+ bool AllowIntDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
bool ContainsZeros = false;
- SmallBitVector Zeroable(NumMaskElts, false);
+ APInt Zeroable(NumMaskElts, false);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
- Zeroable[i] = isUndefOrZero(M);
+ if (isUndefOrZero(M))
+ Zeroable.setBit(i);
ContainsZeros |= (M == SM_SentinelZero);
}
// Attempt to match against byte/bit shifts.
// FIXME: Add 512-bit support.
- if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
MaskVT.getScalarSizeInBits(), Mask,
0, Zeroable, Subtarget);
@@ -26423,19 +27010,21 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
- if (FloatDomain && !Subtarget.hasAVX())
+ if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX())
return false;
// Pre-AVX2 we must use float shuffles on 256-bit vectors.
- if (MaskVT.is256BitVector() && !Subtarget.hasAVX2())
- FloatDomain = true;
+ if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) {
+ AllowFloatDomain = true;
+ AllowIntDomain = false;
+ }
// Check for lane crossing permutes.
if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
// PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) {
Shuffle = X86ISD::VPERMI;
- ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
+ ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
PermuteImm = getV4X86ShuffleImm(Mask);
return true;
}
@@ -26443,7 +27032,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
SmallVector<int, 4> RepeatedMask;
if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
Shuffle = X86ISD::VPERMI;
- ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
+ ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
PermuteImm = getV4X86ShuffleImm(RepeatedMask);
return true;
}
@@ -26452,7 +27041,7 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// VPERMILPD can permute with a non-repeating shuffle.
- if (FloatDomain && MaskScalarSizeInBits == 64) {
+ if (AllowFloatDomain && MaskScalarSizeInBits == 64) {
Shuffle = X86ISD::VPERMILPI;
ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
PermuteImm = 0;
@@ -26476,8 +27065,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
if (MaskScalarSizeInBits == 64)
scaleShuffleMask(2, RepeatedMask, WordMask);
- Shuffle = (FloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
- ShuffleVT = (FloatDomain ? MVT::f32 : MVT::i32);
+ Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
+ ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32);
ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
PermuteImm = getV4X86ShuffleImm(WordMask);
return true;
@@ -26487,34 +27076,36 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain, SDValue &V1, SDValue &V2,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, SDValue &V2, SDLoc &DL,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
bool IsUnary) {
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
- if (isTargetShuffleEquivalent(Mask, {0, 0}) && FloatDomain) {
+ if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVLHPS;
ShuffleVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {1, 1}) && FloatDomain) {
+ if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVHLPS;
ShuffleVT = MVT::v4f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
- (FloatDomain || !Subtarget.hasSSE41())) {
+ (AllowFloatDomain || !Subtarget.hasSSE41())) {
std::swap(V1, V2);
Shuffle = X86ISD::MOVSD;
ShuffleVT = MaskVT;
return true;
}
if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
- (FloatDomain || !Subtarget.hasSSE41())) {
+ (AllowFloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
ShuffleVT = MaskVT;
return true;
@@ -26527,57 +27118,12 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
(MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) ||
(MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
- MVT LegalVT = MaskVT;
- if (LegalVT.is256BitVector() && !Subtarget.hasAVX2())
- LegalVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
-
- SmallVector<int, 64> Unpckl, Unpckh;
- if (IsUnary) {
- createUnpackShuffleMask(MaskVT, Unpckl, true, true);
- if (isTargetShuffleEquivalent(Mask, Unpckl)) {
- V2 = V1;
- Shuffle = X86ISD::UNPCKL;
- ShuffleVT = LegalVT;
- return true;
- }
-
- createUnpackShuffleMask(MaskVT, Unpckh, false, true);
- if (isTargetShuffleEquivalent(Mask, Unpckh)) {
- V2 = V1;
- Shuffle = X86ISD::UNPCKH;
- ShuffleVT = LegalVT;
- return true;
- }
- } else {
- createUnpackShuffleMask(MaskVT, Unpckl, true, false);
- if (isTargetShuffleEquivalent(Mask, Unpckl)) {
- Shuffle = X86ISD::UNPCKL;
- ShuffleVT = LegalVT;
- return true;
- }
-
- createUnpackShuffleMask(MaskVT, Unpckh, false, false);
- if (isTargetShuffleEquivalent(Mask, Unpckh)) {
- Shuffle = X86ISD::UNPCKH;
- ShuffleVT = LegalVT;
- return true;
- }
-
- ShuffleVectorSDNode::commuteMask(Unpckl);
- if (isTargetShuffleEquivalent(Mask, Unpckl)) {
- std::swap(V1, V2);
- Shuffle = X86ISD::UNPCKL;
- ShuffleVT = LegalVT;
- return true;
- }
-
- ShuffleVectorSDNode::commuteMask(Unpckh);
- if (isTargetShuffleEquivalent(Mask, Unpckh)) {
- std::swap(V1, V2);
- Shuffle = X86ISD::UNPCKH;
- ShuffleVT = LegalVT;
- return true;
- }
+ if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL,
+ DAG, Subtarget)) {
+ ShuffleVT = MaskVT;
+ if (ShuffleVT.is256BitVector() && !Subtarget.hasAVX2())
+ ShuffleVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
+ return true;
}
}
@@ -26585,17 +27131,19 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
- SDValue &V1, SDValue &V2,
- SDLoc &DL, SelectionDAG &DAG,
+ bool AllowFloatDomain,
+ bool AllowIntDomain,
+ SDValue &V1, SDValue &V2, SDLoc &DL,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
+ unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
// Attempt to match against PALIGNR byte rotate.
- if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
if (0 < ByteRotation) {
Shuffle = X86ISD::PALIGNR;
@@ -26606,77 +27154,74 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Attempt to combine to X86ISD::BLENDI.
- if (NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) ||
- (Subtarget.hasAVX() && MaskVT.is256BitVector()))) {
- // Determine a type compatible with X86ISD::BLENDI.
- // TODO - add 16i16 support (requires lane duplication).
- MVT BlendVT = MaskVT;
- if (Subtarget.hasAVX2()) {
- if (BlendVT == MVT::v4i64)
- BlendVT = MVT::v8i32;
- else if (BlendVT == MVT::v2i64)
- BlendVT = MVT::v4i32;
- } else {
- if (BlendVT == MVT::v2i64 || BlendVT == MVT::v4i32)
- BlendVT = MVT::v8i16;
- else if (BlendVT == MVT::v4i64)
- BlendVT = MVT::v4f64;
- else if (BlendVT == MVT::v8i32)
- BlendVT = MVT::v8f32;
- }
-
- unsigned BlendSize = BlendVT.getVectorNumElements();
- unsigned MaskRatio = BlendSize / NumMaskElts;
-
- // Can we blend with zero?
- if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts,
- /*Low*/ 0) &&
- NumMaskElts <= BlendVT.getVectorNumElements()) {
- PermuteImm = 0;
- for (unsigned i = 0; i != BlendSize; ++i)
- if (Mask[i / MaskRatio] < 0)
- PermuteImm |= 1u << i;
-
- V2 = getZeroVector(BlendVT, Subtarget, DAG, DL);
- Shuffle = X86ISD::BLENDI;
- ShuffleVT = BlendVT;
- return true;
- }
-
- // Attempt to match as a binary blend.
- if (NumMaskElts <= BlendVT.getVectorNumElements()) {
- bool MatchBlend = true;
- for (int i = 0; i != (int)NumMaskElts; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
- continue;
- else if (M == SM_SentinelZero)
- MatchBlend = false;
- else if ((M != i) && (M != (i + (int)NumMaskElts)))
- MatchBlend = false;
- }
+ if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) ||
+ (Subtarget.hasAVX() && MaskVT.is256BitVector()))) ||
+ (MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) {
+ uint64_t BlendMask = 0;
+ bool ForceV1Zero = false, ForceV2Zero = false;
+ SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end());
+ if (matchVectorShuffleAsBlend(V1, V2, TargetMask, ForceV1Zero, ForceV2Zero,
+ BlendMask)) {
+ if (MaskVT == MVT::v16i16) {
+ // We can only use v16i16 PBLENDW if the lanes are repeated.
+ SmallVector<int, 8> RepeatedMask;
+ if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask,
+ RepeatedMask)) {
+ assert(RepeatedMask.size() == 8 &&
+ "Repeated mask size doesn't match!");
+ PermuteImm = 0;
+ for (int i = 0; i < 8; ++i)
+ if (RepeatedMask[i] >= 8)
+ PermuteImm |= 1 << i;
+ V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
+ V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
+ Shuffle = X86ISD::BLENDI;
+ ShuffleVT = MaskVT;
+ return true;
+ }
+ } else {
+ // Determine a type compatible with X86ISD::BLENDI.
+ ShuffleVT = MaskVT;
+ if (Subtarget.hasAVX2()) {
+ if (ShuffleVT == MVT::v4i64)
+ ShuffleVT = MVT::v8i32;
+ else if (ShuffleVT == MVT::v2i64)
+ ShuffleVT = MVT::v4i32;
+ } else {
+ if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
+ ShuffleVT = MVT::v8i16;
+ else if (ShuffleVT == MVT::v4i64)
+ ShuffleVT = MVT::v4f64;
+ else if (ShuffleVT == MVT::v8i32)
+ ShuffleVT = MVT::v8f32;
+ }
- if (MatchBlend) {
- PermuteImm = 0;
- for (unsigned i = 0; i != BlendSize; ++i)
- if ((int)NumMaskElts <= Mask[i / MaskRatio])
- PermuteImm |= 1u << i;
+ if (!ShuffleVT.isFloatingPoint()) {
+ int Scale = EltSizeInBits / ShuffleVT.getScalarSizeInBits();
+ BlendMask =
+ scaleVectorShuffleBlendMask(BlendMask, NumMaskElts, Scale);
+ ShuffleVT = MVT::getIntegerVT(EltSizeInBits / Scale);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts * Scale);
+ }
+ V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1;
+ V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2;
+ PermuteImm = (unsigned)BlendMask;
Shuffle = X86ISD::BLENDI;
- ShuffleVT = BlendVT;
return true;
}
}
}
// Attempt to combine to INSERTPS.
- if (Subtarget.hasSSE41() && MaskVT == MVT::v4f32) {
- SmallBitVector Zeroable(4, false);
+ if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
+ MaskVT.is128BitVector()) {
+ APInt Zeroable(4, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
if (Mask[i] < 0)
- Zeroable[i] = true;
+ Zeroable.setBit(i);
- if (Zeroable.any() &&
+ if (Zeroable.getBoolValue() &&
matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
Shuffle = X86ISD::INSERTPS;
ShuffleVT = MVT::v4f32;
@@ -26685,22 +27230,26 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Attempt to combine to SHUFPD.
- if ((MaskVT == MVT::v2f64 && Subtarget.hasSSE2()) ||
- (MaskVT == MVT::v4f64 && Subtarget.hasAVX()) ||
- (MaskVT == MVT::v8f64 && Subtarget.hasAVX512())) {
+ if (AllowFloatDomain && EltSizeInBits == 64 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
Shuffle = X86ISD::SHUFP;
- ShuffleVT = MaskVT;
+ ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64);
return true;
}
}
// Attempt to combine to SHUFPS.
- if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (MaskVT == MVT::v8f32 && Subtarget.hasAVX()) ||
- (MaskVT == MVT::v16f32 && Subtarget.hasAVX512())) {
+ if (AllowFloatDomain && EltSizeInBits == 32 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE1()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
SmallVector<int, 4> RepeatedMask;
if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) {
+ // Match each half of the repeated mask, to determine if its just
+ // referencing one of the vectors, is zeroable or entirely undef.
auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) {
int M0 = RepeatedMask[Offset];
int M1 = RepeatedMask[Offset + 1];
@@ -26732,7 +27281,7 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
V1 = Lo;
V2 = Hi;
Shuffle = X86ISD::SHUFP;
- ShuffleVT = MaskVT;
+ ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32);
PermuteImm = getV4X86ShuffleImm(ShufMask);
return true;
}
@@ -26764,7 +27313,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// here, we're not going to remove the operands we find.
bool UnaryShuffle = (Inputs.size() == 1);
SDValue V1 = peekThroughBitcasts(Inputs[0]);
- SDValue V2 = (UnaryShuffle ? V1 : peekThroughBitcasts(Inputs[1]));
+ SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType())
+ : peekThroughBitcasts(Inputs[1]));
MVT VT1 = V1.getSimpleValueType();
MVT VT2 = V2.getSimpleValueType();
@@ -26853,6 +27403,11 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MVT ShuffleSrcVT, ShuffleVT;
unsigned Shuffle, PermuteImm;
+ // Which shuffle domains are permitted?
+ // Permit domain crossing at higher combine depths.
+ bool AllowFloatDomain = FloatDomain || (Depth > 3);
+ bool AllowIntDomain = !FloatDomain || (Depth > 3);
+
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
// directly if we don't shuffle the lower element and we shuffle the upper
@@ -26869,8 +27424,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget, Shuffle,
- ShuffleSrcVT, ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
+ V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
+ ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26884,8 +27440,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget,
- Shuffle, ShuffleVT, PermuteImm)) {
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ AllowIntDomain, Subtarget, Shuffle,
+ ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26901,8 +27458,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
}
- if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, Subtarget,
- Shuffle, ShuffleVT, UnaryShuffle)) {
+ if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
+ V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT,
+ UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26918,8 +27476,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL,
- DAG, Subtarget, Shuffle, ShuffleVT,
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ AllowIntDomain, V1, V2, DL, DAG,
+ Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
@@ -27039,12 +27598,12 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
APInt Zero = APInt::getNullValue(MaskEltSizeInBits);
APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits);
- SmallBitVector UndefElts(NumMaskElts, false);
+ APInt UndefElts(NumMaskElts, 0);
SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
continue;
}
if (M == SM_SentinelZero)
@@ -27228,8 +27787,8 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
// Extract constant bits from each source op.
bool OneUseConstantOp = false;
- SmallVector<SmallBitVector, 4> UndefEltsOps(NumOps);
- SmallVector<SmallVector<APInt, 8>, 4> RawBitsOps(NumOps);
+ SmallVector<APInt, 16> UndefEltsOps(NumOps);
+ SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps);
for (unsigned i = 0; i != NumOps; ++i) {
SDValue SrcOp = Ops[i];
OneUseConstantOp |= SrcOp.hasOneUse();
@@ -27245,18 +27804,18 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
return false;
// Shuffle the constant bits according to the mask.
- SmallBitVector UndefElts(NumMaskElts, false);
- SmallBitVector ZeroElts(NumMaskElts, false);
- SmallBitVector ConstantElts(NumMaskElts, false);
+ APInt UndefElts(NumMaskElts, 0);
+ APInt ZeroElts(NumMaskElts, 0);
+ APInt ConstantElts(NumMaskElts, 0);
SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
APInt::getNullValue(MaskSizeInBits));
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
continue;
} else if (M == SM_SentinelZero) {
- ZeroElts[i] = true;
+ ZeroElts.setBit(i);
continue;
}
assert(0 <= M && M < (int)(NumMaskElts * NumOps));
@@ -27266,21 +27825,21 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
if (SrcUndefElts[SrcMaskIdx]) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
continue;
}
auto &SrcEltBits = RawBitsOps[SrcOpIdx];
APInt &Bits = SrcEltBits[SrcMaskIdx];
if (!Bits) {
- ZeroElts[i] = true;
+ ZeroElts.setBit(i);
continue;
}
- ConstantElts[i] = true;
+ ConstantElts.setBit(i);
ConstantBitData[i] = Bits;
}
- assert((UndefElts | ZeroElts | ConstantElts).count() == NumMaskElts);
+ assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());
// Create the constant data.
MVT MaskSVT;
@@ -27330,6 +27889,7 @@ static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask,
+ ArrayRef<const SDNode*> SrcNodes,
int Depth, bool HasVariableMask,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -27353,13 +27913,17 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
"Can only combine shuffles of the same vector register size.");
// Extract target shuffle mask and resolve sentinels and inputs.
- SDValue Input0, Input1;
- SmallVector<int, 16> OpMask;
- if (!resolveTargetShuffleInputs(Op, Input0, Input1, OpMask))
+ SmallVector<int, 64> OpMask;
+ SmallVector<SDValue, 2> OpInputs;
+ if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask))
return false;
+ assert(OpInputs.size() <= 2 && "Too many shuffle inputs");
+ SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue());
+ SDValue Input1 = (OpInputs.size() > 1 ? OpInputs[1] : SDValue());
+
// Add the inputs to the Ops list, avoiding duplicates.
- SmallVector<SDValue, 8> Ops(SrcOps.begin(), SrcOps.end());
+ SmallVector<SDValue, 16> Ops(SrcOps.begin(), SrcOps.end());
int InputIdx0 = -1, InputIdx1 = -1;
for (int i = 0, e = Ops.size(); i < e; ++i) {
@@ -27392,8 +27956,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
(RootRatio == 1) != (OpRatio == 1)) &&
"Must not have a ratio for both incoming and op masks!");
- SmallVector<int, 16> Mask;
- Mask.reserve(MaskWidth);
+ SmallVector<int, 64> Mask((unsigned)MaskWidth, SM_SentinelUndef);
// Merge this shuffle operation's mask into our accumulated mask. Note that
// this shuffle's mask will be the first applied to the input, followed by the
@@ -27403,7 +27966,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
int RootIdx = i / RootRatio;
if (RootMask[RootIdx] < 0) {
// This is a zero or undef lane, we're done.
- Mask.push_back(RootMask[RootIdx]);
+ Mask[i] = RootMask[RootIdx];
continue;
}
@@ -27413,7 +27976,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
// than the SrcOp we're currently inserting.
if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
(((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
- Mask.push_back(RootMaskedIdx);
+ Mask[i] = RootMaskedIdx;
continue;
}
@@ -27423,7 +27986,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
if (OpMask[OpIdx] < 0) {
// The incoming lanes are zero or undef, it doesn't matter which ones we
// are using.
- Mask.push_back(OpMask[OpIdx]);
+ Mask[i] = OpMask[OpIdx];
continue;
}
@@ -27439,7 +28002,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
OpMaskedIdx += InputIdx1 * MaskWidth;
}
- Mask.push_back(OpMaskedIdx);
+ Mask[i] = OpMaskedIdx;
}
// Handle the all undef/zero cases early.
@@ -27457,28 +28020,25 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
}
// Remove unused shuffle source ops.
- SmallVector<SDValue, 8> UsedOps;
- for (int i = 0, e = Ops.size(); i < e; ++i) {
- int lo = UsedOps.size() * MaskWidth;
- int hi = lo + MaskWidth;
- if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
- UsedOps.push_back(Ops[i]);
- continue;
- }
- for (int &M : Mask)
- if (lo <= M)
- M -= MaskWidth;
- }
- assert(!UsedOps.empty() && "Shuffle with no inputs detected");
- Ops = UsedOps;
+ resolveTargetShuffleInputsAndMask(Ops, Mask);
+ assert(!Ops.empty() && "Shuffle with no inputs detected");
HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode());
- // See if we can recurse into each shuffle source op (if it's a target shuffle).
+ // Update the list of shuffle nodes that have been combined so far.
+ SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(),
+ SrcNodes.end());
+ CombinedNodes.push_back(Op.getNode());
+
+ // See if we can recurse into each shuffle source op (if it's a target
+ // shuffle). The source op should only be combined if it either has a
+ // single use (i.e. current Op) or all its users have already been combined.
for (int i = 0, e = Ops.size(); i < e; ++i)
- if (Ops[i].getNode()->hasOneUse() || Op->isOnlyUserOf(Ops[i].getNode()))
- if (combineX86ShufflesRecursively(Ops, i, Root, Mask, Depth + 1,
- HasVariableMask, DAG, DCI, Subtarget))
+ if (Ops[i].getNode()->hasOneUse() ||
+ SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
+ if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes,
+ Depth + 1, HasVariableMask, DAG, DCI,
+ Subtarget))
return true;
// Attempt to constant fold all of the constant source ops.
@@ -27495,7 +28055,7 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
// elements, and shrink them to the half-width mask. It does this in a loop
// so it will reduce the size of the mask to the minimal width mask which
// performs an equivalent shuffle.
- SmallVector<int, 16> WidenedMask;
+ SmallVector<int, 64> WidenedMask;
while (Mask.size() > 1 && canWidenShuffleElements(Mask, WidenedMask)) {
Mask = std::move(WidenedMask);
}
@@ -27561,8 +28121,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
/// altering anything.
static SDValue
combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG) {
assert(N.getOpcode() == X86ISD::PSHUFD &&
"Called with something other than an x86 128-bit half shuffle!");
SDLoc DL(N);
@@ -27842,19 +28401,20 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
case X86ISD::MOVSD:
case X86ISD::MOVSS: {
- bool isFloat = VT.isFloatingPoint();
SDValue V0 = peekThroughBitcasts(N->getOperand(0));
SDValue V1 = peekThroughBitcasts(N->getOperand(1));
- bool isFloat0 = V0.getSimpleValueType().isFloatingPoint();
- bool isFloat1 = V1.getSimpleValueType().isFloatingPoint();
bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode());
bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode());
- assert(!(isZero0 && isZero1) && "Zeroable shuffle detected.");
+ if (isZero0 && isZero1)
+ return SDValue();
// We often lower to MOVSD/MOVSS from integer as well as native float
// types; remove unnecessary domain-crossing bitcasts if we can to make it
// easier to combine shuffles later on. We've already accounted for the
// domain switching cost when we decided to lower with it.
+ bool isFloat = VT.isFloatingPoint();
+ bool isFloat0 = V0.getSimpleValueType().isFloatingPoint();
+ bool isFloat1 = V1.getSimpleValueType().isFloatingPoint();
if ((isFloat != isFloat0 || isZero0) && (isFloat != isFloat1 || isZero1)) {
MVT NewVT = isFloat ? (X86ISD::MOVSD == Opcode ? MVT::v2i64 : MVT::v4i32)
: (X86ISD::MOVSD == Opcode ? MVT::v2f64 : MVT::v4f32);
@@ -28025,7 +28585,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
break;
case X86ISD::PSHUFD:
- if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG, DCI))
+ if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG))
return NewN;
break;
@@ -28173,12 +28733,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
-
- // Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
- return SDValue();
-
// If we have legalized the vector types, look for blends of FADD and FSUB
// nodes that we can fuse into an ADDSUB node.
if (TLI.isTypeLegal(VT))
@@ -28249,11 +28804,18 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// load4, <0, 1, 2, 3> into a 128-bit load if the load addresses are
// consecutive, non-overlapping, and in the right order.
SmallVector<SDValue, 16> Elts;
- for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ if (SDValue Elt = getShuffleScalarElt(N, i, DAG, 0)) {
+ Elts.push_back(Elt);
+ continue;
+ }
+ Elts.clear();
+ break;
+ }
- if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
- return LD;
+ if (Elts.size() == VT.getVectorNumElements())
+ if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
+ return LD;
// For AVX2, we sometimes want to combine
// (vector_shuffle <mask> (concat_vectors t1, undef)
@@ -28276,7 +28838,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// a particular chain.
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
@@ -28303,18 +28865,13 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EVT OriginalVT = InVec.getValueType();
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
- EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() ||
- BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
- return SDValue();
- InVec = InVec.getOperand(0);
- }
+ // Peek through bitcasts, don't duplicate a load with other uses.
+ InVec = peekThroughOneUseBitcasts(InVec);
EVT CurrentVT = InVec.getValueType();
+ if (!CurrentVT.isVector() ||
+ CurrentVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
+ return SDValue();
if (!isTargetShuffle(InVec.getOpcode()))
return SDValue();
@@ -28393,19 +28950,41 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+
+ // Since MMX types are special and don't usually play with other vector types,
+ // it's better to handle them early to be sure we emit efficient code by
+ // avoiding store-load conversions.
- // Detect bitcasts between i32 to x86mmx low word. Since MMX types are
- // special and don't usually play with other vector types, it's better to
- // handle them early to be sure we emit efficient code by avoiding
- // store-load conversions.
+ // Detect bitcasts between i32 to x86mmx low word.
if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
- N0.getValueType() == MVT::v2i32 &&
- isNullConstant(N0.getOperand(1))) {
+ SrcVT == MVT::v2i32 && isNullConstant(N0.getOperand(1))) {
SDValue N00 = N0->getOperand(0);
if (N00.getValueType() == MVT::i32)
return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
}
+ // Detect bitcasts between element or subvector extraction to x86mmx.
+ if (VT == MVT::x86mmx &&
+ (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) &&
+ isNullConstant(N0.getOperand(1))) {
+ SDValue N00 = N0->getOperand(0);
+ if (N00.getValueType().is128BitVector())
+ return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT,
+ DAG.getBitcast(MVT::v2i64, N00));
+ }
+
+ // Detect bitcasts from FP_TO_SINT to x86mmx.
+ if (VT == MVT::x86mmx && SrcVT == MVT::v2i32 &&
+ N0.getOpcode() == ISD::FP_TO_SINT) {
+ SDLoc DL(N0);
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+ DAG.getUNDEF(MVT::v2i32));
+ return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT,
+ DAG.getBitcast(MVT::v2i64, Res));
+ }
+
// Convert a bitcasted integer logic operation that has one bitcasted
// floating-point operand into a floating-point logic operation. This may
// create a load of a constant, but that is cheaper than materializing the
@@ -28511,12 +29090,18 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
if (SetCC.getOpcode() != ISD::SETCC)
return false;
ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
- if (CC != ISD::SETGT)
+ if (CC != ISD::SETGT && CC != ISD::SETLT)
return false;
SDValue SelectOp1 = Select->getOperand(1);
SDValue SelectOp2 = Select->getOperand(2);
+ // The following instructions assume SelectOp1 is the subtraction operand
+ // and SelectOp2 is the negation operand.
+ // In the case of SETLT this is the other way around.
+ if (CC == ISD::SETLT)
+ std::swap(SelectOp1, SelectOp2);
+
// The second operand of the select should be the negation of the first
// operand, which is implemented as 0 - SelectOp1.
if (!(SelectOp2.getOpcode() == ISD::SUB &&
@@ -28529,8 +29114,17 @@ static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
if (SetCC.getOperand(0) != SelectOp1)
return false;
- // The second operand of the comparison can be either -1 or 0.
- if (!(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
+ // In SetLT case, The second operand of the comparison can be either 1 or 0.
+ APInt SplatVal;
+ if ((CC == ISD::SETLT) &&
+ !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) &&
+ SplatVal == 1) ||
+ (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()))))
+ return false;
+
+ // In SetGT case, The second operand of the comparison can be either -1 or 0.
+ if ((CC == ISD::SETGT) &&
+ !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
return false;
@@ -28576,17 +29170,92 @@ static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
}
+// Attempt to replace an all_of/any_of style horizontal reduction with a MOVMSK.
+static SDValue combineHorizontalPredicateResult(SDNode *Extract,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Bail without SSE2 or with AVX512VL (which uses predicate registers).
+ if (!Subtarget.hasSSE2() || Subtarget.hasVLX())
+ return SDValue();
+
+ EVT ExtractVT = Extract->getValueType(0);
+ unsigned BitWidth = ExtractVT.getSizeInBits();
+ if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 &&
+ ExtractVT != MVT::i8)
+ return SDValue();
+
+ // Check for OR(any_of) and AND(all_of) horizontal reduction patterns.
+ for (ISD::NodeType Op : {ISD::OR, ISD::AND}) {
+ SDValue Match = matchBinOpReduction(Extract, Op);
+ if (!Match)
+ continue;
+
+ // EXTRACT_VECTOR_ELT can require implicit extension of the vector element
+ // which we can't support here for now.
+ if (Match.getScalarValueSizeInBits() != BitWidth)
+ continue;
+
+ // We require AVX2 for PMOVMSKB for v16i16/v32i8;
+ unsigned MatchSizeInBits = Match.getValueSizeInBits();
+ if (!(MatchSizeInBits == 128 ||
+ (MatchSizeInBits == 256 &&
+ ((Subtarget.hasAVX() && BitWidth >= 32) || Subtarget.hasAVX2()))))
+ return SDValue();
+
+ // Don't bother performing this for 2-element vectors.
+ if (Match.getValueType().getVectorNumElements() <= 2)
+ return SDValue();
+
+ // Check that we are extracting a reduction of all sign bits.
+ if (DAG.ComputeNumSignBits(Match) != BitWidth)
+ return SDValue();
+
+ // For 32/64 bit comparisons use MOVMSKPS/MOVMSKPD, else PMOVMSKB.
+ MVT MaskVT;
+ if (64 == BitWidth || 32 == BitWidth)
+ MaskVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth),
+ MatchSizeInBits / BitWidth);
+ else
+ MaskVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8);
+
+ APInt CompareBits;
+ ISD::CondCode CondCode;
+ if (Op == ISD::OR) {
+ // any_of -> MOVMSK != 0
+ CompareBits = APInt::getNullValue(32);
+ CondCode = ISD::CondCode::SETNE;
+ } else {
+ // all_of -> MOVMSK == ((1 << NumElts) - 1)
+ CompareBits = APInt::getLowBitsSet(32, MaskVT.getVectorNumElements());
+ CondCode = ISD::CondCode::SETEQ;
+ }
+
+ // Perform the select as i32/i64 and then truncate to avoid partial register
+ // stalls.
+ unsigned ResWidth = std::max(BitWidth, 32u);
+ EVT ResVT = EVT::getIntegerVT(*DAG.getContext(), ResWidth);
+ SDLoc DL(Extract);
+ SDValue Zero = DAG.getConstant(0, DL, ResVT);
+ SDValue Ones = DAG.getAllOnesConstant(DL, ResVT);
+ SDValue Res = DAG.getBitcast(MaskVT, Match);
+ Res = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Res);
+ Res = DAG.getSelectCC(DL, Res, DAG.getConstant(CompareBits, DL, MVT::i32),
+ Ones, Zero, CondCode);
+ return DAG.getSExtOrTrunc(Res, DL, ExtractVT);
+ }
+
+ return SDValue();
+}
+
static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
// PSADBW is only supported on SSE2 and up.
if (!Subtarget.hasSSE2())
return SDValue();
- // Verify the type we're extracting from is appropriate
- // TODO: There's nothing special about i32, any integer type above i16 should
- // work just as well.
+ // Verify the type we're extracting from is any integer type above i16.
EVT VT = Extract->getOperand(0).getValueType();
- if (!VT.isSimple() || !(VT.getVectorElementType() == MVT::i32))
+ if (!VT.isSimple() || !(VT.getVectorElementType().getSizeInBits() > 16))
return SDValue();
unsigned RegSize = 128;
@@ -28595,15 +29264,28 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
else if (Subtarget.hasAVX2())
RegSize = 256;
- // We only handle v16i32 for SSE2 / v32i32 for AVX2 / v64i32 for AVX512.
+ // We handle upto v16i* for SSE2 / v32i* for AVX2 / v64i* for AVX512.
// TODO: We should be able to handle larger vectors by splitting them before
// feeding them into several SADs, and then reducing over those.
- if (VT.getSizeInBits() / 4 > RegSize)
+ if (RegSize / VT.getVectorNumElements() < 8)
return SDValue();
// Match shuffle + add pyramid.
SDValue Root = matchBinOpReduction(Extract, ISD::ADD);
+ // The operand is expected to be zero extended from i8
+ // (verified in detectZextAbsDiff).
+ // In order to convert to i64 and above, additional any/zero/sign
+ // extend is expected.
+ // The zero extend from 32 bit has no mathematical effect on the result.
+ // Also the sign extend is basically zero extend
+ // (extends the sign bit which is zero).
+ // So it is correct to skip the sign/zero extend instruction.
+ if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND ||
+ Root.getOpcode() == ISD::ZERO_EXTEND ||
+ Root.getOpcode() == ISD::ANY_EXTEND))
+ Root = Root.getOperand(0);
+
// If there was a match, we want Root to be a select that is the root of an
// abs-diff pattern.
if (!Root || (Root.getOpcode() != ISD::VSELECT))
@@ -28614,7 +29296,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
if (!detectZextAbsDiff(Root, Zext0, Zext1))
return SDValue();
- // Create the SAD instruction
+ // Create the SAD instruction.
SDLoc DL(Extract);
SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL);
@@ -28636,13 +29318,103 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
}
}
- // Return the lowest i32.
- MVT ResVT = MVT::getVectorVT(MVT::i32, SadVT.getSizeInBits() / 32);
+ MVT Type = Extract->getSimpleValueType(0);
+ unsigned TypeSizeInBits = Type.getSizeInBits();
+ // Return the lowest TypeSizeInBits bits.
+ MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits);
SAD = DAG.getNode(ISD::BITCAST, DL, ResVT, SAD);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SAD,
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD,
Extract->getOperand(1));
}
+// Attempt to peek through a target shuffle and extract the scalar from the
+// source.
+static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getVectorElementType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+
+ // Don't attempt this for boolean mask vectors or unknown extraction indices.
+ if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx))
+ return SDValue();
+
+ // Resolve the target shuffle inputs and mask.
+ SmallVector<int, 16> Mask;
+ SmallVector<SDValue, 2> Ops;
+ if (!resolveTargetShuffleInputs(peekThroughBitcasts(Src), Ops, Mask))
+ return SDValue();
+
+ // Attempt to narrow/widen the shuffle mask to the correct size.
+ if (Mask.size() != NumSrcElts) {
+ if ((NumSrcElts % Mask.size()) == 0) {
+ SmallVector<int, 16> ScaledMask;
+ int Scale = NumSrcElts / Mask.size();
+ scaleShuffleMask(Scale, Mask, ScaledMask);
+ Mask = std::move(ScaledMask);
+ } else if ((Mask.size() % NumSrcElts) == 0) {
+ SmallVector<int, 16> WidenedMask;
+ while (Mask.size() > NumSrcElts &&
+ canWidenShuffleElements(Mask, WidenedMask))
+ Mask = std::move(WidenedMask);
+ // TODO - investigate support for wider shuffle masks with known upper
+ // undef/zero elements for implicit zero-extension.
+ }
+ }
+
+ // Check if narrowing/widening failed.
+ if (Mask.size() != NumSrcElts)
+ return SDValue();
+
+ int SrcIdx = Mask[N->getConstantOperandVal(1)];
+ SDLoc dl(N);
+
+ // If the shuffle source element is undef/zero then we can just accept it.
+ if (SrcIdx == SM_SentinelUndef)
+ return DAG.getUNDEF(VT);
+
+ if (SrcIdx == SM_SentinelZero)
+ return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT)
+ : DAG.getConstant(0, dl, VT);
+
+ SDValue SrcOp = Ops[SrcIdx / Mask.size()];
+ SrcOp = DAG.getBitcast(SrcVT, SrcOp);
+ SrcIdx = SrcIdx % Mask.size();
+
+ // We can only extract other elements from 128-bit vectors and in certain
+ // circumstances, depending on SSE-level.
+ // TODO: Investigate using extract_subvector for larger vectors.
+ // TODO: Investigate float/double extraction if it will be just stored.
+ if ((SrcVT == MVT::v4i32 || SrcVT == MVT::v2i64) &&
+ ((SrcIdx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) {
+ assert(SrcSVT == VT && "Unexpected extraction type");
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcSVT, SrcOp,
+ DAG.getIntPtrConstant(SrcIdx, dl));
+ }
+
+ if ((SrcVT == MVT::v8i16 && Subtarget.hasSSE2()) ||
+ (SrcVT == MVT::v16i8 && Subtarget.hasSSE41())) {
+ assert(VT.getSizeInBits() >= SrcSVT.getSizeInBits() &&
+ "Unexpected extraction type");
+ unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB);
+ SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp,
+ DAG.getIntPtrConstant(SrcIdx, dl));
+ SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, ExtOp,
+ DAG.getValueType(SrcSVT));
+ return DAG.getZExtOrTrunc(Assert, dl, VT);
+ }
+
+ return SDValue();
+}
+
/// Detect vector gather/scatter index generation and convert it from being a
/// bunch of shuffles and extracts into a somewhat faster sequence.
/// For i686, the best sequence is apparently storing the value and loading
@@ -28653,14 +29425,29 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
return NewOp;
+ if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget))
+ return NewOp;
+
SDValue InputVector = N->getOperand(0);
+ SDValue EltIdx = N->getOperand(1);
+
+ EVT SrcVT = InputVector.getValueType();
+ EVT VT = N->getValueType(0);
SDLoc dl(InputVector);
+
+ // Detect mmx extraction of all bits as a i64. It works better as a bitcast.
+ if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
+ VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
+ SDValue MMXSrc = InputVector.getOperand(0);
+
+ // The bitcast source is a direct mmx result.
+ if (MMXSrc.getValueType() == MVT::x86mmx)
+ return DAG.getBitcast(VT, InputVector);
+ }
+
// Detect mmx to i32 conversion through a v2i32 elt extract.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
- N->getValueType(0) == MVT::i32 &&
- InputVector.getValueType() == MVT::v2i32 &&
- isa<ConstantSDNode>(N->getOperand(1)) &&
- N->getConstantOperandVal(1) == 0) {
+ VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) {
SDValue MMXSrc = InputVector.getOperand(0);
// The bitcast source is a direct mmx result.
@@ -28668,15 +29455,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
}
- EVT VT = N->getValueType(0);
-
- if (VT == MVT::i1 && isa<ConstantSDNode>(N->getOperand(1)) &&
- InputVector.getOpcode() == ISD::BITCAST &&
+ if (VT == MVT::i1 && InputVector.getOpcode() == ISD::BITCAST &&
+ isa<ConstantSDNode>(EltIdx) &&
isa<ConstantSDNode>(InputVector.getOperand(0))) {
- uint64_t ExtractedElt =
- cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- uint64_t InputValue =
- cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
+ uint64_t ExtractedElt = N->getConstantOperandVal(1);
+ uint64_t InputValue = InputVector.getConstantOperandVal(0);
uint64_t Res = (InputValue >> ExtractedElt) & 1;
return DAG.getConstant(Res, dl, MVT::i1);
}
@@ -28687,9 +29470,13 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
return SAD;
+ // Attempt to replace an all_of/any_of horizontal reduction with a MOVMSK.
+ if (SDValue Cmp = combineHorizontalPredicateResult(N, DAG, Subtarget))
+ return Cmp;
+
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
- if (InputVector.getValueType() != MVT::v4i32)
+ if (SrcVT != MVT::v4i32)
return SDValue();
// Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
@@ -28717,9 +29504,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Record which element was extracted.
- ExtractedElements |=
- 1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
-
+ ExtractedElements |= 1 << Extract->getConstantOperandVal(1);
Uses.push_back(Extract);
}
@@ -28752,11 +29537,11 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::SRA, dl, MVT::i64, TopHalf, ShAmt));
} else {
// Store the value to a temporary stack slot.
- SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+ SDValue StackPtr = DAG.CreateStackTemporary(SrcVT);
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
MachinePointerInfo());
- EVT ElementType = InputVector.getValueType().getVectorElementType();
+ EVT ElementType = SrcVT.getVectorElementType();
unsigned EltSize = ElementType.getSizeInBits() / 8;
// Replace each use (extract) with a load of the appropriate element.
@@ -28779,8 +29564,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
UE = Uses.end(); UI != UE; ++UI) {
SDNode *Extract = *UI;
- SDValue Idx = Extract->getOperand(1);
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ uint64_t IdxVal = Extract->getConstantOperandVal(1);
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), Vals[IdxVal]);
}
@@ -28788,6 +29572,16 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// TODO - merge with combineExtractVectorElt once it can handle the implicit
+// zero-extension of X86ISD::PINSRW/X86ISD::PINSRB in:
+// XFormVExtractWithShuffleIntoLoad, combineHorizontalPredicateResult and
+// combineBasicSADPattern.
+static SDValue combineExtractVectorElt_SSE(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ return combineExtractWithShuffle(N, DAG, DCI, Subtarget);
+}
+
/// If a vector select has an operand that is -1 or 0, try to simplify the
/// select to a bitwise logic operation.
static SDValue
@@ -28812,12 +29606,11 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
// This situation only applies to avx512.
if (FValIsAllZeros && Subtarget.hasAVX512() && Cond.hasOneUse() &&
CondVT.getVectorElementType() == MVT::i1) {
- //Invert the cond to not(cond) : xor(op,allones)=not(op)
- SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
- DAG.getConstant(APInt::getAllOnesValue(CondVT.getScalarSizeInBits()),
- DL, CondVT));
- //Vselect cond, op1, op2 = Vselect not(cond), op2, op1
- return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
+ // Invert the cond to not(cond) : xor(op,allones)=not(op)
+ SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+ DAG.getAllOnesConstant(DL, CondVT));
+ // Vselect cond, op1, op2 = Vselect not(cond), op2, op1
+ return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
}
// To use the condition operand as a bitwise mask, it must have elements that
@@ -28920,18 +29713,6 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(ShAmt, DL, MVT::i8));
}
- // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
- if (FalseC->getAPIntValue() + 1 == TrueC->getAPIntValue()) {
- if (NeedsCondInvert) // Invert the condition if needed.
- Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
- DAG.getConstant(1, DL, Cond.getValueType()));
-
- // Zero extend the condition if needed.
- Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
- return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
- SDValue(FalseC, 0));
- }
-
// Optimize cases that will turn into an LEA instruction. This requires
// an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
@@ -29049,7 +29830,7 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
return false;
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
// Only change element size, not type.
- if (VT.isInteger() != OpEltVT.isInteger())
+ if (EltVT.isInteger() != OpEltVT.isInteger())
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
@@ -29063,7 +29844,7 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
DCI.AddToWorklist(Op1.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0, Op1,
- DAG.getConstant(Imm, DL, MVT::i8)));
+ DAG.getIntPtrConstant(Imm, DL)));
return true;
}
case ISD::EXTRACT_SUBVECTOR: {
@@ -29072,7 +29853,7 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
return false;
MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
// Only change element size, not type.
- if (VT.isInteger() != OpEltVT.isInteger())
+ if (EltVT.isInteger() != OpEltVT.isInteger())
return false;
uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
@@ -29084,7 +29865,23 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
DCI.AddToWorklist(Op0.getNode());
DCI.CombineTo(OrigOp.getNode(),
DAG.getNode(Opcode, DL, VT, Op0,
- DAG.getConstant(Imm, DL, MVT::i8)));
+ DAG.getIntPtrConstant(Imm, DL)));
+ return true;
+ }
+ case X86ISD::SUBV_BROADCAST: {
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (EltSize != 32 && EltSize != 64)
+ return false;
+ // Only change element size, not type.
+ if (VT.isInteger() != Op.getSimpleValueType().isInteger())
+ return false;
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = MVT::getVectorVT(EltVT,
+ Op0.getSimpleValueType().getSizeInBits() / EltSize);
+ Op0 = DAG.getBitcast(Op0VT, Op.getOperand(0));
+ DCI.AddToWorklist(Op0.getNode());
+ DCI.CombineTo(OrigOp.getNode(),
+ DAG.getNode(Opcode, DL, VT, Op0));
return true;
}
}
@@ -29370,8 +30167,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// If this is a *dynamic* select (non-constant condition) and we can match
// this node with one of the variable blend instructions, restructure the
- // condition so that the blends can use the high bit of each element and use
- // SimplifyDemandedBits to simplify the condition operand.
+ // condition so that blends can use the high (sign) bit of each element and
+ // use SimplifyDemandedBits to simplify the condition operand.
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
!DCI.isBeforeLegalize() &&
!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
@@ -29406,49 +30203,45 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue();
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
- APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
-
+ APInt DemandedMask(APInt::getSignBit(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
DCI.isBeforeLegalizeOps());
if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne,
TLO)) {
- // If we changed the computation somewhere in the DAG, this change
- // will affect all users of Cond.
- // Make sure it is fine and update all the nodes so that we do not
- // use the generic VSELECT anymore. Otherwise, we may perform
- // wrong optimizations as we messed up with the actual expectation
+ // If we changed the computation somewhere in the DAG, this change will
+ // affect all users of Cond. Make sure it is fine and update all the nodes
+ // so that we do not use the generic VSELECT anymore. Otherwise, we may
+ // perform wrong optimizations as we messed with the actual expectation
// for the vector boolean values.
if (Cond != TLO.Old) {
- // Check all uses of that condition operand to check whether it will be
- // consumed by non-BLEND instructions, which may depend on all bits are
- // set properly.
- for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
- I != E; ++I)
- if (I->getOpcode() != ISD::VSELECT)
- // TODO: Add other opcodes eventually lowered into BLEND.
+ // Check all uses of the condition operand to check whether it will be
+ // consumed by non-BLEND instructions. Those may require that all bits
+ // are set properly.
+ for (SDNode *U : Cond->uses()) {
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ if (U->getOpcode() != ISD::VSELECT)
return SDValue();
+ }
- // Update all the users of the condition, before committing the change,
- // so that the VSELECT optimizations that expect the correct vector
- // boolean value will not be triggered.
- for (SDNode::use_iterator I = Cond->use_begin(), E = Cond->use_end();
- I != E; ++I)
- DAG.ReplaceAllUsesOfValueWith(
- SDValue(*I, 0),
- DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(*I), I->getValueType(0),
- Cond, I->getOperand(1), I->getOperand(2)));
+ // Update all users of the condition before committing the change, so
+ // that the VSELECT optimizations that expect the correct vector boolean
+ // value will not be triggered.
+ for (SDNode *U : Cond->uses()) {
+ SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U),
+ U->getValueType(0), Cond, U->getOperand(1),
+ U->getOperand(2));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
+ }
DCI.CommitTargetLoweringOpt(TLO);
return SDValue();
}
- // At this point, only Cond is changed. Change the condition
- // just for N to keep the opportunity to optimize all other
- // users their own way.
- DAG.ReplaceAllUsesOfValueWith(
- SDValue(N, 0),
- DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(N), N->getValueType(0),
- TLO.New, N->getOperand(1), N->getOperand(2)));
+ // Only Cond (rather than other nodes in the computation chain) was
+ // changed. Change the condition just for N to keep the opportunity to
+ // optimize all other users their own way.
+ SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB);
return SDValue();
}
}
@@ -29456,7 +30249,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Look for vselects with LHS/RHS being bitcasted from an operation that
// can be executed on another type. Push the bitcast to the inputs of
// the operation. This exposes opportunities for using masking instructions.
- if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalizeOps() &&
+ if (N->getOpcode() == ISD::VSELECT && DCI.isAfterLegalizeVectorOps() &&
CondVT.getVectorElementType() == MVT::i1) {
if (combineBitcastForMaskedOp(LHS, DAG, DCI))
return SDValue(N, 0);
@@ -30208,22 +31001,37 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
}
if (!NewMul) {
- assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
- && "Both cases that could cause potential overflows should have "
- "already been handled.");
- if (isPowerOf2_64(MulAmt - 1))
- // (mul x, 2^N + 1) => (add (shl x, N), x)
- NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
- DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(MulAmt - 1), DL,
- MVT::i8)));
-
- else if (isPowerOf2_64(MulAmt + 1))
- // (mul x, 2^N - 1) => (sub (shl x, N), x)
- NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
- N->getOperand(0),
- DAG.getConstant(Log2_64(MulAmt + 1),
- DL, MVT::i8)), N->getOperand(0));
+ assert(MulAmt != 0 &&
+ MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ "Both cases that could cause potential overflows should have "
+ "already been handled.");
+ int64_t SignMulAmt = C->getSExtValue();
+ if ((SignMulAmt != INT64_MIN) && (SignMulAmt != INT64_MAX) &&
+ (SignMulAmt != -INT64_MAX)) {
+ int NumSign = SignMulAmt > 0 ? 1 : -1;
+ bool IsPowerOf2_64PlusOne = isPowerOf2_64(NumSign * SignMulAmt - 1);
+ bool IsPowerOf2_64MinusOne = isPowerOf2_64(NumSign * SignMulAmt + 1);
+ if (IsPowerOf2_64PlusOne) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ NewMul = DAG.getNode(
+ ISD::ADD, DL, VT, N->getOperand(0),
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(NumSign * SignMulAmt - 1), DL,
+ MVT::i8)));
+ } else if (IsPowerOf2_64MinusOne) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ NewMul = DAG.getNode(
+ ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(NumSign * SignMulAmt + 1), DL,
+ MVT::i8)),
+ N->getOperand(0));
+ }
+ // To negate, subtract the number from zero
+ if ((IsPowerOf2_64PlusOne || IsPowerOf2_64MinusOne) && NumSign == -1)
+ NewMul =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+ }
}
if (NewMul)
@@ -30396,42 +31204,95 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
- assert((X86ISD::VSHLI == N->getOpcode() || X86ISD::VSRLI == N->getOpcode()) &&
- "Unexpected opcode");
+static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ unsigned Opcode = N->getOpcode();
+ assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode ||
+ X86ISD::VSRLI == Opcode) &&
+ "Unexpected shift opcode");
+ bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode;
EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
-
- // This fails for mask register (vXi1) shifts.
- if ((NumBitsPerElt % 8) != 0)
- return SDValue();
+ assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
+ "Unexpected value type");
// Out of range logical bit shifts are guaranteed to be zero.
- APInt ShiftVal = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
- if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt))
- return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ // Out of range arithmetic bit shifts splat the sign bit.
+ APInt ShiftVal = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt)) {
+ if (LogicalShift)
+ return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ else
+ ShiftVal = NumBitsPerElt - 1;
+ }
// Shift N0 by zero -> N0.
if (!ShiftVal)
- return N->getOperand(0);
+ return N0;
// Shift zero -> zero.
- if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+ // fold (VSRLI (VSRAI X, Y), 31) -> (VSRLI X, 31).
+ // This VSRLI only looks at the sign bit, which is unmodified by VSRAI.
+ // TODO - support other sra opcodes as needed.
+ if (Opcode == X86ISD::VSRLI && (ShiftVal + 1) == NumBitsPerElt &&
+ N0.getOpcode() == X86ISD::VSRAI)
+ return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1);
+
// We can decode 'whole byte' logical bit shifts as shuffles.
- if ((ShiftVal.getZExtValue() % 8) == 0) {
+ if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) {
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
}
+ // Constant Folding.
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (N->isOnlyUserOf(N0.getNode()) &&
+ getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
+ assert(EltBits.size() == VT.getVectorNumElements() &&
+ "Unexpected shift value type");
+ unsigned ShiftImm = ShiftVal.getZExtValue();
+ for (APInt &Elt : EltBits) {
+ if (X86ISD::VSHLI == Opcode)
+ Elt = Elt.shl(ShiftImm);
+ else if (X86ISD::VSRAI == Opcode)
+ Elt = Elt.ashr(ShiftImm);
+ else
+ Elt = Elt.lshr(ShiftImm);
+ }
+ return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
+ }
+
+ return SDValue();
+}
+
+static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ assert(
+ ((N->getOpcode() == X86ISD::PINSRB && N->getValueType(0) == MVT::v16i8) ||
+ (N->getOpcode() == X86ISD::PINSRW &&
+ N->getValueType(0) == MVT::v8i16)) &&
+ "Unexpected vector insertion");
+
+ // Attempt to combine PINSRB/PINSRW patterns to a shuffle.
+ SDValue Op(N, 0);
+ SmallVector<int, 1> NonceMask; // Just a placeholder.
+ NonceMask.push_back(0);
+ combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
+ /*Depth*/ 1, /*HasVarMask*/ false, DAG,
+ DCI, Subtarget);
return SDValue();
}
@@ -30550,33 +31411,15 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
return SDValue();
- // Canonicalize XOR to the left.
- if (N1.getOpcode() == ISD::XOR)
- std::swap(N0, N1);
+ if (N0.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
- if (N0.getOpcode() != ISD::XOR)
- return SDValue();
-
- SDValue N00 = N0->getOperand(0);
- SDValue N01 = N0->getOperand(1);
+ if (N1.getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
- N01 = peekThroughBitcasts(N01);
-
- // Either match a direct AllOnes for 128, 256, and 512-bit vectors, or an
- // insert_subvector building a 256-bit AllOnes vector.
- if (!ISD::isBuildVectorAllOnes(N01.getNode())) {
- if (!VT.is256BitVector() || N01->getOpcode() != ISD::INSERT_SUBVECTOR)
- return SDValue();
-
- SDValue V1 = N01->getOperand(0);
- SDValue V2 = N01->getOperand(1);
- if (V1.getOpcode() != ISD::INSERT_SUBVECTOR ||
- !V1.getOperand(0).isUndef() ||
- !ISD::isBuildVectorAllOnes(V1.getOperand(1).getNode()) ||
- !ISD::isBuildVectorAllOnes(V2.getNode()))
- return SDValue();
- }
- return DAG.getNode(X86ISD::ANDNP, DL, VT, N00, N1);
+ return SDValue();
}
// On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized
@@ -30696,38 +31539,34 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// If this is a PCMPEQ or PCMPGT result that is bitwise-anded with 1 (this is
-/// the x86 lowering of a SETCC + ZEXT), replace the 'and' with a shift-right to
-/// eliminate loading the vector constant mask value. This relies on the fact
-/// that a PCMP always creates an all-ones or all-zeros bitmask per element.
-static SDValue combinePCMPAnd1(SDNode *N, SelectionDAG &DAG) {
+/// If this is a zero/all-bits result that is bitwise-anded with a low bits
+/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
+/// with a shift-right to eliminate loading the vector constant mask value.
+static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
+ EVT VT0 = Op0.getValueType();
+ EVT VT1 = Op1.getValueType();
- // TODO: Use AssertSext to mark any nodes that have the property of producing
- // all-ones or all-zeros. Then check for that node rather than particular
- // opcodes.
- if (Op0.getOpcode() != X86ISD::PCMPEQ && Op0.getOpcode() != X86ISD::PCMPGT)
+ if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger())
return SDValue();
- // The existence of the PCMP node guarantees that we have the required SSE2 or
- // AVX2 for a shift of this vector type, but there is no vector shift by
- // immediate for a vector with byte elements (PSRLB). 512-bit vectors use the
- // masked compare nodes, so they should not make it here.
- EVT VT0 = Op0.getValueType();
- EVT VT1 = Op1.getValueType();
- unsigned EltBitWidth = VT0.getScalarSizeInBits();
- if (VT0 != VT1 || EltBitWidth == 8)
+ APInt SplatVal;
+ if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
+ !SplatVal.isMask())
return SDValue();
- assert(VT0.getSizeInBits() == 128 || VT0.getSizeInBits() == 256);
+ if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
+ return SDValue();
- APInt SplatVal;
- if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || SplatVal != 1)
+ unsigned EltBitWidth = VT0.getScalarSizeInBits();
+ if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
return SDValue();
SDLoc DL(N);
- SDValue ShAmt = DAG.getConstant(EltBitWidth - 1, DL, MVT::i8);
+ unsigned ShiftVal = SplatVal.countTrailingOnes();
+ SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
@@ -30747,7 +31586,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
return R;
- if (SDValue ShiftRight = combinePCMPAnd1(N, DAG))
+ if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
return ShiftRight;
EVT VT = N->getValueType(0);
@@ -30760,7 +31599,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
SmallVector<int, 1> NonceMask; // Just a placeholder.
NonceMask.push_back(0);
- if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
/*Depth*/ 1, /*HasVarMask*/ false, DAG,
DCI, Subtarget))
return SDValue(); // This routine will use CombineTo to replace N.
@@ -30969,7 +31808,7 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&
X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&
N->getOperand(1).getOpcode() == X86ISD::CMP &&
- N->getOperand(1).getConstantOperandVal(1) == 0 &&
+ isNullConstant(N->getOperand(1).getOperand(1)) &&
N->getOperand(1).getValueType().bitsGE(MVT::i32);
};
@@ -31272,6 +32111,74 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
+/// Check if truncation with saturation form type \p SrcVT to \p DstVT
+/// is valid for the given \p Subtarget.
+static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return false;
+
+ // FIXME: Scalar type may be supported if we move it to vector register.
+ if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
+ return false;
+
+ EVT SrcElVT = SrcVT.getScalarType();
+ EVT DstElVT = DstVT.getScalarType();
+ if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
+ return false;
+ if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
+ return false;
+ if (SrcVT.is512BitVector() || Subtarget.hasVLX())
+ return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
+ return false;
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectUSatPattern(SDValue In, EVT VT) {
+ if (In.getOpcode() != ISD::UMIN)
+ return SDValue();
+
+ //Saturation with truncation. We truncate from InVT to VT.
+ assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
+ "Unexpected types for truncate operation");
+
+ APInt C;
+ if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
+ // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+ // the element size of the destination type.
+ return C.isMask(VT.getScalarSizeInBits()) ? In.getOperand(0) :
+ SDValue();
+ }
+ return SDValue();
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// The types should allow to use VPMOVUS* instruction on AVX512.
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
+ const X86Subtarget &Subtarget) {
+ if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+ return SDValue();
+ return detectUSatPattern(In, VT);
+}
+
+static SDValue
+combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(In.getValueType()) || !TLI.isTypeLegal(VT))
+ return SDValue();
+ if (auto USatVal = detectUSatPattern(In, VT))
+ if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
+ return SDValue();
+}
+
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31664,7 +32571,7 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
Mld->getBasePtr(), NewMask, WideSrc0,
Mld->getMemoryVT(), Mld->getMemOperand(),
ISD::NON_EXTLOAD);
- SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
+ SDValue NewVec = getExtendInVec(X86ISD::VSEXT, dl, VT, WideLd, DAG);
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
}
@@ -31838,6 +32745,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
+ if (SDValue Val =
+ detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -32198,13 +33111,30 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT SrcVT = Src.getValueType();
- auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
- // TODO: Add extra cases where we can truncate both inputs for the
- // cost of one (or none).
- // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
+ auto IsRepeatedOpOrFreeTruncation = [VT](SDValue Op0, SDValue Op1) {
+ unsigned TruncSizeInBits = VT.getScalarSizeInBits();
+
+ // Repeated operand, so we are only trading one output truncation for
+ // one input truncation.
if (Op0 == Op1)
return true;
+ // See if either operand has been extended from a smaller/equal size to
+ // the truncation size, allowing a truncation to combine with the extend.
+ unsigned Opcode0 = Op0.getOpcode();
+ if ((Opcode0 == ISD::ANY_EXTEND || Opcode0 == ISD::SIGN_EXTEND ||
+ Opcode0 == ISD::ZERO_EXTEND) &&
+ Op0.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
+ return true;
+
+ unsigned Opcode1 = Op1.getOpcode();
+ if ((Opcode1 == ISD::ANY_EXTEND || Opcode1 == ISD::SIGN_EXTEND ||
+ Opcode1 == ISD::ZERO_EXTEND) &&
+ Op1.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
+ return true;
+
+ // See if either operand is a single use constant which can be constant
+ // folded.
SDValue BC0 = peekThroughOneUseBitcasts(Op0);
SDValue BC1 = peekThroughOneUseBitcasts(Op1);
return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
@@ -32236,7 +33166,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
- IsRepeatedOpOrOneUseConstant(Op0, Op1))
+ IsRepeatedOpOrFreeTruncation(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -32252,7 +33182,7 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = Src.getOperand(0);
SDValue Op1 = Src.getOperand(1);
if (TLI.isOperationLegal(Opcode, VT) &&
- IsRepeatedOpOrOneUseConstant(Op0, Op1))
+ IsRepeatedOpOrFreeTruncation(Op0, Op1))
return TruncateArithmetic(Op0, Op1);
break;
}
@@ -32458,6 +33388,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try to combine truncation with unsigned saturation.
+ if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
+ return Val;
+
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
@@ -32804,6 +33738,34 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax);
}
+/// Do target-specific dag combines on X86ISD::ANDNP nodes.
+static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ // ANDNP(0, x) -> x
+ if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+ return N->getOperand(1);
+
+ // ANDNP(x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode()))
+ return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N));
+
+ EVT VT = N->getValueType(0);
+
+ // Attempt to recursively combine a bitmask ANDNP with shuffles.
+ if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+ SDValue Op(N, 0);
+ SmallVector<int, 1> NonceMask; // Just a placeholder.
+ NonceMask.push_back(0);
+ if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask, {},
+ /*Depth*/ 1, /*HasVarMask*/ false, DAG,
+ DCI, Subtarget))
+ return SDValue(); // This routine will use CombineTo to replace N.
+ }
+
+ return SDValue();
+}
+
static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
// BT ignores high bits in the bit index operand.
@@ -33065,13 +34027,22 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (!DCI.isBeforeLegalizeOps()) {
if (InVT == MVT::i1) {
SDValue Zero = DAG.getConstant(0, DL, VT);
- SDValue AllOnes =
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
return DAG.getNode(ISD::SELECT, DL, VT, N0, AllOnes, Zero);
}
return SDValue();
}
+ if (InVT == MVT::i1 && N0.getOpcode() == ISD::XOR &&
+ isAllOnesConstant(N0.getOperand(1)) && N0.hasOneUse()) {
+ // Invert and sign-extend a boolean is the same as zero-extend and subtract
+ // 1 because 0 becomes -1 and 1 becomes 0. The subtract is efficiently
+ // lowered with an LEA or a DEC. This is the same as: select Bool, 0, -1.
+ // sext (xor Bool, -1) --> sub (zext Bool), 1
+ SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Zext, DAG.getConstant(1, DL, VT));
+ }
+
if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
return V;
@@ -33212,8 +34183,47 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Optimize x == -y --> x+y == 0
-/// x != -y --> x+y != 0
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+ assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+ // We're looking for an oversized integer equality comparison, but ignore a
+ // comparison with zero because that gets special treatment in EmitTest().
+ SDValue X = SetCC->getOperand(0);
+ SDValue Y = SetCC->getOperand(1);
+ EVT OpVT = X.getValueType();
+ unsigned OpSize = OpVT.getSizeInBits();
+ if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y))
+ return SDValue();
+
+ // TODO: Use PXOR + PTEST for SSE4.1 or later?
+ // TODO: Add support for AVX-512.
+ EVT VT = SetCC->getValueType(0);
+ SDLoc DL(SetCC);
+ if ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX2())) {
+ EVT VecVT = OpSize == 128 ? MVT::v16i8 : MVT::v32i8;
+ SDValue VecX = DAG.getBitcast(VecVT, X);
+ SDValue VecY = DAG.getBitcast(VecVT, Y);
+
+ // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
+ // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
+ // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
+ // setcc i256 X, Y, eq --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, eq
+ // setcc i256 X, Y, ne --> setcc (vpmovmskb (vpcmpeqb X, Y)), 0xFFFFFFFF, ne
+ SDValue Cmp = DAG.getNode(X86ISD::PCMPEQ, DL, VecVT, VecX, VecY);
+ SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
+ SDValue FFFFs = DAG.getConstant(OpSize == 128 ? 0xFFFF : 0xFFFFFFFF, DL,
+ MVT::i32);
+ return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
+ }
+
+ return SDValue();
+}
+
static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
@@ -33222,21 +34232,27 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
- if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
- LHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
+ if (CC == ISD::SETNE || CC == ISD::SETEQ) {
+ EVT OpVT = LHS.getValueType();
+ // 0-x == y --> x+y == 0
+ // 0-x != y --> x+y != 0
+ if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
+ LHS.hasOneUse()) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, RHS, LHS.getOperand(1));
+ return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
}
- if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
- if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) {
- SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
- RHS.getOperand(1));
- return DAG.getSetCC(DL, N->getValueType(0), addV,
- DAG.getConstant(0, DL, addV.getValueType()), CC);
+ // x == 0-y --> x+y == 0
+ // x != 0-y --> x+y != 0
+ if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
+ RHS.hasOneUse()) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
+ return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
}
+ if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget))
+ return V;
+ }
+
if (VT.getScalarType() == MVT::i1 &&
(CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
bool IsSEXT0 =
@@ -33293,56 +34309,13 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// Helper function of performSETCCCombine. It is to materialize "setb reg"
-// as "sbb reg,reg", since it can be extended without zext and produces
-// an all-ones bit which is more useful than 0/1 in some cases.
-static SDValue MaterializeSETB(const SDLoc &DL, SDValue EFLAGS,
- SelectionDAG &DAG, MVT VT) {
- if (VT == MVT::i8)
- return DAG.getNode(ISD::AND, DL, VT,
- DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- EFLAGS),
- DAG.getConstant(1, DL, VT));
- assert (VT == MVT::i1 && "Unexpected type for SECCC node");
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1,
- DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- EFLAGS));
-}
-
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
- if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
- // materializing "setb reg".
- //
- // Do not flip "e > c", where "c" is a constant, because Cmp instruction
- // cannot take an immediate as its first operand.
- //
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
- EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- return MaterializeSETB(DL, NewEFLAGS, DAG, N->getSimpleValueType(0));
- }
- }
-
- // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
- // a zext and produces an all-ones bit which is more useful than 0/1 in some
- // cases.
- if (CC == X86::COND_B)
- return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
-
// Try to simplify the EFLAGS and condition code operands.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
return getSETCC(CC, Flags, DL, DAG);
@@ -33352,7 +34325,6 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
/// Optimize branch condition evaluation.
static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue EFLAGS = N->getOperand(3);
@@ -33538,45 +34510,159 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// fold (add Y, (sete X, 0)) -> adc 0, Y
-/// (add Y, (setne X, 0)) -> sbb -1, Y
-/// (sub (sete X, 0), Y) -> sbb 0, Y
-/// (sub (setne X, 0), Y) -> adc -1, Y
-static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
+/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
+/// which is more useful than 0/1 in some cases.
+static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
SDLoc DL(N);
+ // "Condition code B" is also known as "the carry flag" (CF).
+ SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
+ SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
+ MVT VT = N->getSimpleValueType(0);
+ if (VT == MVT::i8)
+ return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
- // Look through ZExts.
- SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
- if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
- return SDValue();
+ assert(VT == MVT::i1 && "Unexpected type for SETCC node");
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
+}
+
+/// If this is an add or subtract where one operand is produced by a cmp+setcc,
+/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
+/// with CMP+{ADC, SBB}.
+static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
+ bool IsSub = N->getOpcode() == ISD::SUB;
+ SDValue X = N->getOperand(0);
+ SDValue Y = N->getOperand(1);
+
+ // If this is an add, canonicalize a zext operand to the RHS.
+ // TODO: Incomplete? What if both sides are zexts?
+ if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND &&
+ Y.getOpcode() != ISD::ZERO_EXTEND)
+ std::swap(X, Y);
+
+ // Look through a one-use zext.
+ bool PeekedThroughZext = false;
+ if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) {
+ Y = Y.getOperand(0);
+ PeekedThroughZext = true;
+ }
- SDValue SetCC = Ext.getOperand(0);
- if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+ // If this is an add, canonicalize a setcc operand to the RHS.
+ // TODO: Incomplete? What if both sides are setcc?
+ // TODO: Should we allow peeking through a zext of the other operand?
+ if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC &&
+ Y.getOpcode() != X86ISD::SETCC)
+ std::swap(X, Y);
+
+ if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
return SDValue();
- X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
+
+ if (CC == X86::COND_B) {
+ // X + SETB Z --> X + (mask SBB Z, Z)
+ // X - SETB Z --> X - (mask SBB Z, Z)
+ // TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
+ SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
+ if (SBB.getValueSizeInBits() != VT.getSizeInBits())
+ SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
+ return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ }
+
+ if (CC == X86::COND_A) {
+ SDValue EFLAGS = Y->getOperand(1);
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
+ EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
+ if (SBB.getValueSizeInBits() != VT.getSizeInBits())
+ SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
+ return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ }
+ }
+
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
- SDValue Cmp = SetCC.getOperand(1);
+ SDValue Cmp = Y.getOperand(1);
if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
!X86::isZeroNode(Cmp.getOperand(1)) ||
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
- SDValue CmpOp0 = Cmp.getOperand(0);
- SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
- DAG.getConstant(1, DL, CmpOp0.getValueType()));
+ // (cmp Z, 1) sets the carry flag if Z is 0.
+ SDValue Z = Cmp.getOperand(0);
+ SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z,
+ DAG.getConstant(1, DL, Z.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
- SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+ // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
+ // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
if (CC == X86::COND_NE)
- return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
- DL, OtherVal.getValueType(), OtherVal,
- DAG.getConstant(-1ULL, DL, OtherVal.getValueType()),
- NewCmp);
- return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
- DL, OtherVal.getValueType(), OtherVal,
- DAG.getConstant(0, DL, OtherVal.getValueType()), NewCmp);
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
+ DAG.getConstant(-1ULL, DL, VT), NewCmp);
+
+ // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
+ // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
+ DAG.getConstant(0, DL, VT), NewCmp);
+}
+
+static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDValue MulOp = N->getOperand(0);
+ SDValue Phi = N->getOperand(1);
+
+ if (MulOp.getOpcode() != ISD::MUL)
+ std::swap(MulOp, Phi);
+ if (MulOp.getOpcode() != ISD::MUL)
+ return SDValue();
+
+ ShrinkMode Mode;
+ if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+
+ unsigned RegSize = 128;
+ if (Subtarget.hasBWI())
+ RegSize = 512;
+ else if (Subtarget.hasAVX2())
+ RegSize = 256;
+ unsigned VectorSize = VT.getVectorNumElements() * 16;
+ // If the vector size is less than 128, or greater than the supported RegSize,
+ // do not use PMADD.
+ if (VectorSize < 128 || VectorSize > RegSize)
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ VT.getVectorNumElements());
+ EVT MAddVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ VT.getVectorNumElements() / 2);
+
+ // Shrink the operands of mul.
+ SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(0));
+ SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, MulOp->getOperand(1));
+
+ // Madd vector size is half of the original vector size
+ SDValue Madd = DAG.getNode(X86ISD::VPMADDWD, DL, MAddVT, N0, N1);
+ // Fill the rest of the output with 0
+ SDValue Zero = getZeroVector(Madd.getSimpleValueType(), Subtarget, DAG, DL);
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Madd, Zero);
+ return DAG.getNode(ISD::ADD, DL, VT, Concat, Phi);
}
static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
@@ -33656,6 +34742,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (Flags->hasVectorReduction()) {
if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
return Sad;
+ if (SDValue MAdd = combineLoopMAddPattern(N, DAG, Subtarget))
+ return MAdd;
}
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
@@ -33667,7 +34755,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, true))
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
- return OptimizeConditionalInDecrement(N, DAG);
+ return combineAddOrSubToADCOrSBB(N, DAG);
}
static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
@@ -33700,36 +34788,44 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
isHorizontalBinOp(Op0, Op1, false))
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
- return OptimizeConditionalInDecrement(N, DAG);
+ return combineAddOrSubToADCOrSBB(N, DAG);
}
static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalize())
+ return SDValue();
+
SDLoc DL(N);
unsigned Opcode = N->getOpcode();
MVT VT = N->getSimpleValueType(0);
MVT SVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = SVT.getSizeInBits();
+
SDValue Op = N->getOperand(0);
MVT OpVT = Op.getSimpleValueType();
MVT OpEltVT = OpVT.getVectorElementType();
- unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
+ unsigned OpEltSizeInBits = OpEltVT.getSizeInBits();
+ unsigned InputBits = OpEltSizeInBits * NumElts;
// Perform any constant folding.
// FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- unsigned NumDstElts = VT.getVectorNumElements();
- SmallBitVector Undefs(NumDstElts, false);
- SmallVector<APInt, 4> Vals(NumDstElts, APInt(SVT.getSizeInBits(), 0));
- for (unsigned i = 0; i != NumDstElts; ++i) {
- SDValue OpElt = Op.getOperand(i);
- if (OpElt.getOpcode() == ISD::UNDEF) {
- Undefs[i] = true;
+ APInt UndefElts;
+ SmallVector<APInt, 64> EltBits;
+ if (getTargetConstantBitsFromNode(Op, OpEltSizeInBits, UndefElts, EltBits)) {
+ APInt Undefs(NumElts, 0);
+ SmallVector<APInt, 4> Vals(NumElts, APInt(EltSizeInBits, 0));
+ bool IsZEXT =
+ (Opcode == X86ISD::VZEXT) || (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (UndefElts[i]) {
+ Undefs.setBit(i);
continue;
}
- APInt Cst = cast<ConstantSDNode>(OpElt.getNode())->getAPIntValue();
- Vals[i] = Opcode == X86ISD::VZEXT ? Cst.zextOrTrunc(SVT.getSizeInBits())
- : Cst.sextOrTrunc(SVT.getSizeInBits());
+ Vals[i] = IsZEXT ? EltBits[i].zextOrTrunc(EltSizeInBits)
+ : EltBits[i].sextOrTrunc(EltSizeInBits);
}
return getConstVector(Vals, Undefs, VT, DAG, DL);
}
@@ -33829,7 +34925,7 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
if (N->getOperand(0) == N->getOperand(1)) {
if (N->getOpcode() == X86ISD::PCMPEQ)
- return getOnesVector(VT, Subtarget, DAG, DL);
+ return getOnesVector(VT, DAG, DL);
if (N->getOpcode() == X86ISD::PCMPGT)
return getZeroVector(VT, Subtarget, DAG, DL);
}
@@ -33837,6 +34933,98 @@ static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDLoc dl(N);
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ MVT OpVT = N->getSimpleValueType(0);
+ MVT SubVecVT = SubVec.getSimpleValueType();
+
+ // If this is an insert of an extract, combine to a shuffle. Don't do this
+ // if the insert or extract can be represented with a subvector operation.
+ if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ SubVec.getOperand(0).getSimpleValueType() == OpVT &&
+ (IdxVal != 0 || !Vec.isUndef())) {
+ int ExtIdxVal = cast<ConstantSDNode>(SubVec.getOperand(1))->getZExtValue();
+ if (ExtIdxVal != 0) {
+ int VecNumElts = OpVT.getVectorNumElements();
+ int SubVecNumElts = SubVecVT.getVectorNumElements();
+ SmallVector<int, 64> Mask(VecNumElts);
+ // First create an identity shuffle mask.
+ for (int i = 0; i != VecNumElts; ++i)
+ Mask[i] = i;
+ // Now insert the extracted portion.
+ for (int i = 0; i != SubVecNumElts; ++i)
+ Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts;
+
+ return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask);
+ }
+ }
+
+ // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
+ // load:
+ // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+ // (load16 addr + 16), Elts/2)
+ // --> load32 addr
+ // or:
+ // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+ // (load32 addr + 32), Elts/2)
+ // --> load64 addr
+ // or a 16-byte or 32-byte broadcast:
+ // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+ // (load16 addr), Elts/2)
+ // --> X86SubVBroadcast(load16 addr)
+ // or:
+ // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+ // (load32 addr), Elts/2)
+ // --> X86SubVBroadcast(load32 addr)
+ if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
+ Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
+ auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+ if (Idx2 && Idx2->getZExtValue() == 0) {
+ SDValue SubVec2 = Vec.getOperand(1);
+ // If needed, look through bitcasts to get to the load.
+ if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
+ bool Fast;
+ unsigned Alignment = FirstLd->getAlignment();
+ unsigned AS = FirstLd->getAddressSpace();
+ const X86TargetLowering *TLI = Subtarget.getTargetLowering();
+ if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ OpVT, AS, Alignment, &Fast) && Fast) {
+ SDValue Ops[] = {SubVec2, SubVec};
+ if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+ return Ld;
+ }
+ }
+ // If lower/upper loads are the same and the only users of the load, then
+ // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+ if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
+ if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
+ SDNode::areOnlyUsersOf({N, Vec.getNode()}, SubVec2.getNode())) {
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
+ }
+ }
+ // If this is subv_broadcast insert into both halves, use a larger
+ // subv_broadcast.
+ if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
+ return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
+ SubVec.getOperand(0));
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -33845,6 +35033,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::EXTRACT_VECTOR_ELT:
return combineExtractVectorElt(N, DAG, DCI, Subtarget);
+ case X86ISD::PEXTRW:
+ case X86ISD::PEXTRB:
+ return combineExtractVectorElt_SSE(N, DAG, DCI, Subtarget);
+ case ISD::INSERT_SUBVECTOR:
+ return combineInsertSubvector(N, DAG, DCI, Subtarget);
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
@@ -33870,6 +35063,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
+ case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget);
case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget);
case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -33884,12 +35078,18 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget);
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
- case X86ISD::SETCC: return combineX86SetCC(N, DAG, DCI, Subtarget);
- case X86ISD::BRCOND: return combineBrCond(N, DAG, DCI, Subtarget);
+ case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget);
+ case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget);
case X86ISD::VSHLI:
- case X86ISD::VSRLI: return combineVectorShift(N, DAG, DCI, Subtarget);
+ case X86ISD::VSRAI:
+ case X86ISD::VSRLI:
+ return combineVectorShiftImm(N, DAG, DCI, Subtarget);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
case X86ISD::VSEXT:
case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::INSERTPS:
case X86ISD::PALIGNR:
@@ -34717,10 +35917,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return Res;
}
- // 'A' means EAX + EDX.
+ // 'A' means [ER]AX + [ER]DX.
if (Constraint == "A") {
- Res.first = X86::EAX;
- Res.second = &X86::GR32_ADRegClass;
+ if (Subtarget.is64Bit()) {
+ Res.first = X86::RAX;
+ Res.second = &X86::GR64_ADRegClass;
+ } else if (Subtarget.is32Bit()) {
+ Res.first = X86::EAX;
+ Res.second = &X86::GR32_ADRegClass;
+ } else if (Subtarget.is16Bit()) {
+ Res.first = X86::AX;
+ Res.second = &X86::GR16_ADRegClass;
+ } else {
+ llvm_unreachable("Expecting 64, 32 or 16 bit subtarget");
+ }
return Res;
}
return Res;
@@ -34812,7 +36022,7 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
return -1;
}
-bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on x86 is expensive. However, when aggressively optimizing
// for code size, we prefer to use a div instruction, as it is usually smaller
// than the alternative sequence.
@@ -34820,8 +36030,8 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::MinSize);
+ bool OptSize =
+ Attr.hasAttribute(AttributeList::FunctionIndex, Attribute::MinSize);
return OptSize && !VT.isVector();
}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 37f9353042b1..ab4910daca02 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -149,8 +149,7 @@ namespace llvm {
WrapperRIP,
/// Copies a 64-bit value from the low word of an XMM vector
- /// to an MMX vector. If you think this is too close to the previous
- /// mnemonic, so do I; blame Intel.
+ /// to an MMX vector.
MOVDQ2Q,
/// Copies a 32-bit value from the low word of a MMX
@@ -179,7 +178,7 @@ namespace llvm {
/// Insert the lower 16-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRW.
- PINSRW, MMX_PINSRW,
+ PINSRW,
/// Shuffle 16 8-bit values within a vector.
PSHUFB,
@@ -195,21 +194,21 @@ namespace llvm {
/// Blend where the selector is an immediate.
BLENDI,
- /// Blend where the condition has been shrunk.
- /// This is used to emphasize that the condition mask is
- /// no more valid for generic VSELECT optimizations.
+ /// Dynamic (non-constant condition) vector blend where only the sign bits
+ /// of the condition elements are used. This is used to enforce that the
+ /// condition mask is not valid for generic VSELECT optimizations.
SHRUNKBLEND,
/// Combined add and sub on an FP vector.
ADDSUB,
// FP vector ops with rounding mode.
- FADD_RND,
- FSUB_RND,
- FMUL_RND,
- FDIV_RND,
- FMAX_RND,
- FMIN_RND,
+ FADD_RND, FADDS_RND,
+ FSUB_RND, FSUBS_RND,
+ FMUL_RND, FMULS_RND,
+ FDIV_RND, FDIVS_RND,
+ FMAX_RND, FMAXS_RND,
+ FMIN_RND, FMINS_RND,
FSQRT_RND, FSQRTS_RND,
// FP vector get exponent.
@@ -239,9 +238,6 @@ namespace llvm {
FHADD,
FHSUB,
- // Integer absolute value
- ABS,
-
// Detect Conflicts Within a Vector
CONFLICT,
@@ -251,6 +247,9 @@ namespace llvm {
/// Commutative FMIN and FMAX.
FMAXC, FMINC,
+ /// Scalar intrinsic floating point max and min.
+ FMAXS, FMINS,
+
/// Floating point reciprocal-sqrt and reciprocal approximation.
/// Note that these typically require refinement
/// in order to obtain suitable precision.
@@ -320,6 +319,9 @@ namespace llvm {
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
+ // Shifts of mask registers.
+ KSHIFTL, KSHIFTR,
+
// Bit rotate by immediate
VROTLI, VROTRI,
@@ -443,8 +445,7 @@ namespace llvm {
// Broadcast subvector to vector.
SUBV_BROADCAST,
- // Insert/Extract vector element.
- VINSERT,
+ // Extract vector element.
VEXTRACT,
/// SSE4A Extraction and Insertion.
@@ -686,6 +687,9 @@ namespace llvm {
unsigned getJumpTableEncoding() const override;
bool useSoftFloat() const override;
+ void markLibCallAttributes(MachineFunction *MF, unsigned CC,
+ ArgListTy &Args) const override;
+
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i8;
}
@@ -806,8 +810,17 @@ namespace llvm {
return false;
}
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
bool hasAndNotCompare(SDValue Y) const override;
+ bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+ return VT.isScalarInteger();
+ }
+
+ /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
+ MVT hasFastEqualityCompare(unsigned NumBits) const override;
+
/// Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
@@ -817,11 +830,13 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
/// Determine the number of bits in the operation that are sign bits.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const override;
@@ -984,6 +999,10 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ bool convertSelectOfConstantsToMath() const override {
+ return true;
+ }
+
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
@@ -1035,7 +1054,7 @@ namespace llvm {
/// \brief Customize the preferred legalization strategy for certain types.
LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
- bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+ bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool supportSwiftError() const override;
@@ -1076,7 +1095,8 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ uint32_t *RegMask) const;
SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::InputArg> &ArgInfo,
const SDLoc &dl, SelectionDAG &DAG,
@@ -1138,8 +1158,7 @@ namespace llvm {
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_INT(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index ba1aede3c1a0..08b501ff20bf 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -38,7 +38,9 @@ multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
}
-multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, bit Commutable = 0,
+ string Ver = ""> {
+ let isCommutable = Commutable in
def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
[(set VR64:$dst, (!cast<Intrinsic>(
!strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
@@ -63,25 +65,25 @@ multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
(bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", 1>;
defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", 1>;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", 1>;
defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", 1>;
defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
-defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
-defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", 1>;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", 1>;
defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", 1>;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
@@ -98,6 +100,6 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
// "3DNowA" instructions
defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
-defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
-defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", 0, "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", 0, "a">;
defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 230d1700b8d2..c38c13bb9757 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -34,13 +34,6 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
ValueType KVT = !cast<ValueType>(!if (!eq (NumElts, 1), "i1",
"v" # NumElts # "i1"));
- // The GPR register class that can hold the write mask. Use GR8 for fewer
- // than 8 elements. Use shift-right and equal to work around the lack of
- // !lt in tablegen.
- RegisterClass MRC =
- !cast<RegisterClass>("GR" #
- !if (!eq (!srl(NumElts, 3), 0), 8, NumElts));
-
// Suffix used in the instruction mnemonic.
string Suffix = suffix;
@@ -69,6 +62,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// The corresponding memory operand, e.g. i512mem for VR512.
X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
+ // FP scalar memory operand for intrinsics - ssmem/sdmem.
+ Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
+ !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
// Load patterns
// Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
@@ -89,6 +85,12 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
+ ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
+ !cast<ComplexPattern>("sse_load_f32"),
+ !if (!eq (EltTypeName, "f64"),
+ !cast<ComplexPattern>("sse_load_f64"),
+ ?));
+
// The corresponding float type, e.g. v16f32 for v16i32
// Note: For EltSize < 32, FloatVT is illegal and TableGen
// fails to compile, so we choose FloatVT = VT
@@ -207,7 +209,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
Pattern, itin>;
// Prefer over VMOV*rrk Pat<>
- let AddedComplexity = 20, isCommutable = IsKCommutable in
+ let isCommutable = IsKCommutable in
def NAME#k: AVX512<O, F, Outs, MaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#"}",
@@ -219,7 +221,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
// Zero mask does not add any restrictions to commute operands transformation.
// So, it is Ok to use IsCommutable instead of IsKCommutable.
- let AddedComplexity = 30, isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
+ let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
@@ -250,6 +252,23 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
MaskingConstraint, NoItinerary, IsCommutable,
IsKCommutable>;
+// Similar to AVX512_maskable_common, but with scalar types.
+multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs,
+ dag Ins, dag MaskingIns, dag ZeroMaskingIns,
+ string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ SDNode Select = vselect,
+ string MaskingConstraint = "",
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0,
+ bit IsKCommutable = 0> :
+ AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
+ AttSrcAsm, IntelSrcAsm,
+ [], [], [],
+ MaskingConstraint, NoItinerary, IsCommutable,
+ IsKCommutable>;
+
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
@@ -460,7 +479,7 @@ def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
}
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
[(set VR128X:$dst, (v4i32 immAllZerosV))]>;
def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
@@ -470,7 +489,7 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasVLX, HasDQI] in {
+ isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
[(set FR32X:$dst, fp32imm0)]>;
def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
@@ -484,7 +503,7 @@ multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To
PatFrag vinsert_insert> {
let ExeDomain = To.ExeDomain in {
defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
- (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
+ (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
@@ -492,7 +511,7 @@ multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To
(iPTR imm))>, AVX512AIi8Base, EVEX_4V;
defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
- (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
+ (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
@@ -625,14 +644,14 @@ multiclass vextract_for_size<int Opcode,
// vextract_extract), we interesting only in patterns without mask,
// intrinsics pattern match generated bellow.
defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
- (ins From.RC:$src1, i32u8imm:$idx),
+ (ins From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts,
"$idx, $src1", "$src1, $idx",
[(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
(iPTR imm)))]>,
AVX512AIi8Base, EVEX;
def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
- (ins To.MemOp:$dst, From.RC:$src1, i32u8imm:$idx),
+ (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts #
"\t{$idx, $src1, $dst|$dst, $src1, $idx}",
[(store (To.VT (vextract_extract:$idx
@@ -642,7 +661,7 @@ multiclass vextract_for_size<int Opcode,
let mayStore = 1, hasSideEffects = 0 in
def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
(ins To.MemOp:$dst, To.KRCWM:$mask,
- From.RC:$src1, i32u8imm:$idx),
+ From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts #
"\t{$idx, $src1, $dst {${mask}}|"
"$dst {${mask}}, $src1, $idx}",
@@ -846,32 +865,20 @@ def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
// broadcast with a scalar argument.
multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
-
- let isCodeGenOnly = 1 in {
- def r_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins SrcInfo.FRC:$src), OpcodeStr#"\t{$src, $dst|$dst, $src}",
- [(set DestInfo.RC:$dst, (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)))]>,
- Requires<[HasAVX512]>, T8PD, EVEX;
-
- let Constraints = "$src0 = $dst" in
- def rk_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.RC:$src0, DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.RC:$src0))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_K;
-
- def rkz_s : I< opc, MRMSrcReg, (outs DestInfo.RC:$dst),
- (ins DestInfo.KRCWM:$mask, SrcInfo.FRC:$src),
- OpcodeStr#"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
- [(set DestInfo.RC:$dst,
- (vselect DestInfo.KRCWM:$mask,
- (DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
- DestInfo.ImmAllZerosV))]>,
- Requires<[HasAVX512]>, T8PD, EVEX, EVEX_KZ;
- } // let isCodeGenOnly = 1 in
+ def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.RC:$src0)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
+ DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
+ (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
+ def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
+ (X86VBroadcast SrcInfo.FRC:$src),
+ DestInfo.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
+ DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
}
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
@@ -892,7 +899,6 @@ multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
(SrcInfo.VT (scalar_to_vector
(SrcInfo.ScalarLdFrag addr:$src))))),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#m) addr:$src)>;
- let AddedComplexity = 20 in
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast
(SrcInfo.VT (scalar_to_vector
@@ -900,7 +906,6 @@ multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
DestInfo.RC:$src0)),
(!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
DestInfo.RC:$src0, DestInfo.KRCWM:$mask, addr:$src)>;
- let AddedComplexity = 30 in
def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
(X86VBroadcast
(SrcInfo.VT (scalar_to_vector
@@ -951,39 +956,42 @@ def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
(VBROADCASTSDZm addr:$src)>;
multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
+ SDPatternOperator OpNode,
RegisterClass SrcRC> {
+ let ExeDomain = _.ExeDomain in
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins SrcRC:$src),
"vpbroadcast"##_.Suffix, "$src", "$src",
- (_.VT (X86VBroadcast SrcRC:$src))>, T8PD, EVEX;
+ (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
}
multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
+ SDPatternOperator OpNode,
RegisterClass SrcRC, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_int_broadcast_reg<opc, _.info512, SrcRC>, EVEX_V512;
+ defm Z : avx512_int_broadcast_reg<opc, _.info512, OpNode, SrcRC>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_int_broadcast_reg<opc, _.info256, SrcRC>, EVEX_V256;
- defm Z128 : avx512_int_broadcast_reg<opc, _.info128, SrcRC>, EVEX_V128;
+ defm Z256 : avx512_int_broadcast_reg<opc, _.info256, OpNode, SrcRC>, EVEX_V256;
+ defm Z128 : avx512_int_broadcast_reg<opc, _.info128, OpNode, SrcRC>, EVEX_V128;
}
}
let isCodeGenOnly = 1 in {
-defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info, GR8,
- HasBWI>;
-defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info, GR16,
- HasBWI>;
+defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
+ X86VBroadcast, GR8, HasBWI>;
+defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
+ X86VBroadcast, GR16, HasBWI>;
}
let isAsmParserOnly = 1 in {
defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
- GR32, HasBWI>;
+ null_frag, GR32, HasBWI>;
defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
- GR32, HasBWI>;
+ null_frag, GR32, HasBWI>;
}
-defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info, GR32,
- HasAVX512>;
-defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info, GR64,
- HasAVX512>, VEX_W;
+defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
+ X86VBroadcast, GR32, HasAVX512>;
+defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
+ X86VBroadcast, GR64, HasAVX512>, VEX_W;
def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
(VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
@@ -1035,7 +1043,18 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
+let Predicates = [HasAVX512] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZm addr:$src)>;
+}
+
let Predicates = [HasVLX, HasBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ128m addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQZ256m addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
@@ -1075,18 +1094,12 @@ def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
-def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
- (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v8f32 VR256X:$src), 1)>;
def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4f64 VR256X:$src), 1)>;
def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4i64 VR256X:$src), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
- (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
- (v8i32 VR256X:$src), 1)>;
def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v16i16 VR256X:$src), 1)>;
@@ -1098,46 +1111,6 @@ def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
-
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
- (VINSERTF64x4Zrr
- (VINSERTF32x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v8f64 (VINSERTF32x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v8i64 (VINSERTI32x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-
-def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v32i16 (VINSERTI32x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v64i8 (VINSERTI32x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
}
let Predicates = [HasVLX] in {
@@ -1209,25 +1182,6 @@ def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
-def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
- (VINSERTF64x4Zrr
- (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16f32 (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
- (VINSERTI64x4Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16i32 (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-
def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
(VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
@@ -1265,25 +1219,6 @@ def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
-
-def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
- (VINSERTF32x8Zrr
- (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16f32 (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
- (VINSERTI32x8Zrr
- (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1),
- (EXTRACT_SUBREG
- (v16i32 (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
- VR128X:$src, sub_xmm),
- VR128X:$src, 1)), sub_ymm), 1)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
@@ -1310,6 +1245,13 @@ defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
avx512vl_f32_info, avx512vl_f64_info>;
+let Predicates = [HasVLX] in {
+def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
+ (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
+def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
+ (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
+}
+
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
@@ -1604,13 +1546,13 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc)>, EVEX_4V;
+ let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+ (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@@ -1629,6 +1571,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
+ let mayLoad = 1 in
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
@@ -1667,8 +1610,10 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
}
let Predicates = [HasAVX512] in {
+ let ExeDomain = SSEPackedSingle in
defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
AVX512XSIi8Base;
+ let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
AVX512XDIi8Base, VEX_W;
}
@@ -2087,22 +2032,20 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
- let AddedComplexity = 20 in {
- def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
- (ins _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set _.KRC:$dst,
- (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i32 imm:$src2)))], NoItinerary>;
- def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
- (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
- [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.KRC:$dst,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
- (i32 imm:$src2))))], NoItinerary>, EVEX_K;
- }
+ (i32 imm:$src2)))], NoItinerary>;
+ def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+ (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+ OpcodeStr##_.Suffix##
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
+ [(set _.KRC:$dst,(or _.KRCWM:$mask,
+ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+ (i32 imm:$src2))))], NoItinerary>, EVEX_K;
}
}
@@ -2242,28 +2185,26 @@ let Predicates = [HasBWI] in {
// GR from/to mask register
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
- (COPY_TO_REGCLASS GR16:$src, VK16)>;
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
- (COPY_TO_REGCLASS VK16:$src, GR16)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
- (COPY_TO_REGCLASS GR8:$src, VK8)>;
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
- (COPY_TO_REGCLASS VK8:$src, GR8)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
(KMOVWrk VK16:$src)>;
def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
- (i32 (INSERT_SUBREG (IMPLICIT_DEF),
- (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>;
+ (COPY_TO_REGCLASS VK16:$src, GR32)>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
- (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>;
+ (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
- (i32 (INSERT_SUBREG (IMPLICIT_DEF),
- (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>;
+ (COPY_TO_REGCLASS VK8:$src, GR32)>;
def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
(COPY_TO_REGCLASS GR32:$src, VK32)>;
@@ -2296,20 +2237,20 @@ let Predicates = [HasDQI] in {
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(store VK1:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(store VK2:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK2:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(store VK4:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK4:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(store VK8:$src, addr:$dst),
(MOV8mr addr:$dst,
- (EXTRACT_SUBREG (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
- sub_8bit))>;
+ (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
+ sub_8bit)))>;
def : Pat<(v8i1 (load addr:$src)),
(COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
@@ -2340,44 +2281,41 @@ let Predicates = [HasBWI] in {
let Predicates = [HasAVX512] in {
def : Pat<(i1 (trunc (i64 GR64:$src))),
- (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
- (i32 1))), VK1)>;
+ (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
+ (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i32 GR32:$src))),
- (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
+ (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
(COPY_TO_REGCLASS GR32:$src, VK1)>;
def : Pat<(i1 (trunc (i8 GR8:$src))),
(COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit), (i32 1))),
- VK1)>;
+ (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit), (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i16 GR16:$src))),
(COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit), (i32 1))),
- VK1)>;
+ (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR16:$src, sub_16bit), (i32 1)), VK1)>;
def : Pat<(i32 (zext VK1:$src)),
- (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>;
def : Pat<(i32 (anyext VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, GR32)>;
def : Pat<(i8 (zext VK1:$src)),
(EXTRACT_SUBREG
- (AND32ri8 (KMOVWrk
- (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>;
def : Pat<(i8 (anyext VK1:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>;
def : Pat<(i64 (zext VK1:$src)),
- (AND64ri8 (SUBREG_TO_REG (i64 0),
- (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
+ (SUBREG_TO_REG (i64 0),
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>;
def : Pat<(i64 (anyext VK1:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -2385,8 +2323,7 @@ let Predicates = [HasAVX512] in {
def : Pat<(i16 (zext VK1:$src)),
(EXTRACT_SUBREG
- (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
- sub_16bit)>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>;
def : Pat<(i16 (anyext VK1:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>;
@@ -2440,15 +2377,6 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
-multiclass avx512_mask_unop_int<string IntName, string InstName> {
- let Predicates = [HasAVX512] in
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
- (i16 GR16:$src)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
- (v16i1 (COPY_TO_REGCLASS GR16:$src, VK16))), GR16)>;
-}
-defm : avx512_mask_unop_int<"knot", "KNOT">;
-
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
def : Pat<(vnot VK8:$src),
@@ -2497,21 +2425,6 @@ defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
-multiclass avx512_mask_binop_int<string IntName, string InstName> {
- let Predicates = [HasAVX512] in
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_w")
- (i16 GR16:$src1), (i16 GR16:$src2)),
- (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"Wrr")
- (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
- (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
-}
-
-defm : avx512_mask_binop_int<"kand", "KAND">;
-defm : avx512_mask_binop_int<"kandn", "KANDN">;
-defm : avx512_mask_binop_int<"kor", "KOR">;
-defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
-defm : avx512_mask_binop_int<"kxor", "KXOR">;
-
multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
Instruction Inst> {
// With AVX512F, 8-bit mask is promoted to 16-bit mask,
@@ -2613,8 +2526,8 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
}
}
-defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
-defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86vsrli>;
+defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
+defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
@@ -2625,7 +2538,6 @@ multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
}
multiclass avx512_mask_setop_w<PatFrag Val> {
- defm B : avx512_mask_setop<VK8, v8i1, Val>;
defm W : avx512_mask_setop<VK16, v16i1, Val>;
defm D : avx512_mask_setop<VK32, v32i1, Val>;
defm Q : avx512_mask_setop<VK64, v64i1, Val>;
@@ -2642,9 +2554,11 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
- def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
- def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
- def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+ let AddedComplexity = 10 in { // To optimize isel table.
+ def : Pat<(i1 0), (COPY_TO_REGCLASS (KSET0W), VK1)>;
+ def : Pat<(i1 1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+ def : Pat<(i1 -1), (COPY_TO_REGCLASS (KSHIFTRWri (KSET1W), (i8 15)), VK1)>;
+ }
}
// Patterns for kmask insert_subvector/extract_subvector to/from index=0
@@ -2695,12 +2609,12 @@ def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
// Patterns for kmask shift
multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
- def : Pat<(VT (X86vshli RC:$src, (i8 imm:$imm))),
+ def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
RC))>;
- def : Pat<(VT (X86vsrli RC:$src, (i8 imm:$imm))),
+ def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
(VT (COPY_TO_REGCLASS
(KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
(I8Imm $imm)),
@@ -2738,7 +2652,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
[(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
_.ExeDomain>, EVEX;
- let Constraints = "$src0 = $dst" in {
+ let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
@@ -3160,6 +3074,10 @@ def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src)
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64X:$dst, (bitconvert GR64:$src))],
IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
+def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
+ EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64X:$src))],
@@ -3272,20 +3190,22 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode,
(scalar_to_vector _.FRC:$src2))))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
- (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ (ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (OpNode _.RC:$src1,
+ (scalar_to_vector _.FRC:$src2))),
_.ImmAllZerosV)))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ;
let Constraints = "$src0 = $dst" in
def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
- (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+ (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (OpNode _.RC:$src1,
+ (scalar_to_vector _.FRC:$src2))),
(_.VT _.RC:$src0))))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K;
let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -3335,8 +3255,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk)
(COPY_TO_REGCLASS _.FRC:$src2, _.RC),
(COPY_TO_REGCLASS GR32:$mask, VK1WM),
- (_.VT _.RC:$src0),
- (COPY_TO_REGCLASS _.FRC:$src1, _.RC)),
+ (_.VT _.RC:$src0), _.FRC:$src1),
_.RC)>;
def : Pat<(_.VT (OpNode _.RC:$src0,
@@ -3346,10 +3265,8 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
(_.EltVT ZeroFP))))))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz)
(COPY_TO_REGCLASS GR32:$mask, VK1WM),
- (_.VT _.RC:$src0),
- (COPY_TO_REGCLASS _.FRC:$src1, _.RC)),
+ (_.VT _.RC:$src0), _.FRC:$src1),
_.RC)>;
-
}
multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
@@ -3359,14 +3276,31 @@ def : Pat<(masked_store addr:$dst, Mask,
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT _.info128.RC:$src),
- (i64 0))),
- (i64 0)))),
+ (iPTR 0))),
+ (iPTR 0)))),
(!cast<Instruction>(InstrStr#mrk) addr:$dst,
(i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
(COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
}
+multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask, RegisterClass MaskRC,
+ SubRegIndex subreg> {
+
+def : Pat<(masked_store addr:$dst, Mask,
+ (_.info512.VT (insert_subvector undef,
+ (_.info256.VT (insert_subvector undef,
+ (_.info128.VT _.info128.RC:$src),
+ (iPTR 0))),
+ (iPTR 0)))),
+ (!cast<Instruction>(InstrStr#mrk) addr:$dst,
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+
+}
+
multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
dag Mask, RegisterClass MaskRC> {
@@ -3374,7 +3308,7 @@ def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (masked_load addr:$srcAddr, Mask,
(_.info512.VT (bitconvert
(v16i32 immAllZerosV))))),
- (i64 0))),
+ (iPTR 0))),
(!cast<Instruction>(InstrStr#rmkz)
(i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
addr:$srcAddr)>;
@@ -3384,53 +3318,81 @@ def : Pat<(_.info128.VT (extract_subvector
(_.info512.VT (insert_subvector undef,
(_.info256.VT (insert_subvector undef,
(_.info128.VT (X86vzmovl _.info128.RC:$src)),
- (i64 0))),
- (i64 0))))),
- (i64 0))),
+ (iPTR 0))),
+ (iPTR 0))))),
+ (iPTR 0))),
(!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
(i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
addr:$srcAddr)>;
}
+multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
+ AVX512VLVectorVTInfo _,
+ dag Mask, RegisterClass MaskRC,
+ SubRegIndex subreg> {
+
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask,
+ (_.info512.VT (bitconvert
+ (v16i32 immAllZerosV))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmkz)
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ addr:$srcAddr)>;
+
+def : Pat<(_.info128.VT (extract_subvector
+ (_.info512.VT (masked_load addr:$srcAddr, Mask,
+ (_.info512.VT (insert_subvector undef,
+ (_.info256.VT (insert_subvector undef,
+ (_.info128.VT (X86vzmovl _.info128.RC:$src)),
+ (iPTR 0))),
+ (iPTR 0))))),
+ (iPTR 0))),
+ (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
+ (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM)),
+ addr:$srcAddr)>;
+
+}
+
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
-defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
- (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
-defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
- (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
-defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
- (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
-defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
- (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
+ (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
+ (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
- VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
+ VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
- VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
+ VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
- (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
+ (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM)),
(COPY_TO_REGCLASS VR128X:$src, FR32X))>;
let hasSideEffects = 0 in
defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
- (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+ (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2),
"vmovss.s", "$src2, $src1", "$src1, $src2", []>,
XS, EVEX_4V, VEX_LIG;
let hasSideEffects = 0 in
-defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
- (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+defm VMOVSDZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
+ (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2),
"vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
XD, EVEX_4V, VEX_LIG, VEX_W;
@@ -3439,31 +3401,31 @@ let Predicates = [HasAVX512] in {
// Move scalar to XMM zero-extended, zeroing a VR128X then do a
// MOVS{S,D} to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
- (VMOVSSZrr (v4f32 (V_SET0)), FR32X:$src)>;
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)), FR32X:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
- (VMOVSSZrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
- (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
- (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>;
}
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4i32 (V_SET0)),
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4i32 (V_SET0)),
+ (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
let AddedComplexity = 20 in {
@@ -3525,11 +3487,11 @@ let Predicates = [HasAVX512] in {
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
FR32X:$src)), sub_xmm)>;
def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
(v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (V_SET0)),
+ (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
FR64X:$src)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
@@ -3538,18 +3500,18 @@ let Predicates = [HasAVX512] in {
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDZrr (v2f64 (V_SET0)),
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSDZrr (v2f64 (V_SET0)),
+ (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
- (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+ (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
// Extract and store.
@@ -3582,10 +3544,6 @@ let Predicates = [HasAVX512] in {
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
- def : Pat<(v4f32 (X86Movsd VR128X:$src1, VR128X:$src2)),
- (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
- def : Pat<(v4i32 (X86Movsd VR128X:$src1, VR128X:$src2)),
- (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
@@ -3635,6 +3593,8 @@ let Predicates = [HasAVX512] in {
}
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
@@ -3669,42 +3629,26 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8i64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
}
-
-def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v8i64 (X86Vinsert (bc_v8i64 (v16i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
-
-def : Pat<(v16i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
-
//===----------------------------------------------------------------------===//
// AVX-512 - Non-temporals
//===----------------------------------------------------------------------===//
let SchedRW = [WriteLoad] in {
def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
(ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
- SSEPackedInt>, EVEX, T8PD, EVEX_V512,
+ [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
(ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR256X:$dst, (int_x86_avx2_movntdqa addr:$src))],
- SSEPackedInt>, EVEX, T8PD, EVEX_V256,
+ [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
EVEX_CD8<64, CD8VF>;
def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
(ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR128X:$dst, (int_x86_sse41_movntdqa addr:$src))],
- SSEPackedInt>, EVEX, T8PD, EVEX_V128,
+ [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
EVEX_CD8<64, CD8VF>;
}
}
@@ -4150,8 +4094,7 @@ let Predicates = [HasDQI, NoVLX] in {
//===----------------------------------------------------------------------===//
multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _, OpndItins itins,
- bit IsCommutable = 0> {
+ X86VectorVTInfo _, bit IsCommutable = 0> {
defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -4159,7 +4102,7 @@ multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(bitconvert (_.VT _.RC:$src2)))),
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
_.RC:$src2)))),
- itins.rr, IsCommutable>,
+ IIC_SSE_BIT_P_RR, IsCommutable>,
AVX512BIBase, EVEX_4V;
defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -4169,14 +4112,13 @@ multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(bitconvert (_.LdFrag addr:$src2)))),
(_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)))))),
- itins.rm>,
+ IIC_SSE_BIT_P_RM>,
AVX512BIBase, EVEX_4V;
}
multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _, OpndItins itins,
- bit IsCommutable = 0> :
- avx512_logic_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
+ X86VectorVTInfo _, bit IsCommutable = 0> :
+ avx512_logic_rm<opc, OpcodeStr, OpNode, _, IsCommutable> {
defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
@@ -4189,58 +4131,48 @@ multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
(bitconvert
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2)))))))),
- itins.rm>,
+ IIC_SSE_BIT_P_RM>,
AVX512BIBase, EVEX_4V, EVEX_B;
}
multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
- AVX512VLVectorVTInfo VTInfo, OpndItins itins,
- Predicate prd, bit IsCommutable = 0> {
- let Predicates = [prd] in
- defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+ AVX512VLVectorVTInfo VTInfo,
+ bit IsCommutable = 0> {
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
IsCommutable>, EVEX_V512;
- let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+ let Predicates = [HasAVX512, HasVLX] in {
+ defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
IsCommutable>, EVEX_V256;
- defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+ defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
IsCommutable>, EVEX_V128;
}
}
multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
- itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
+ IsCommutable>, EVEX_CD8<32, CD8VF>;
}
multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
- OpndItins itins, Predicate prd,
bit IsCommutable = 0> {
defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
- itins, prd, IsCommutable>,
- VEX_W, EVEX_CD8<64, CD8VF>;
+ IsCommutable>,
+ VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
- SDNode OpNode, OpndItins itins, Predicate prd,
- bit IsCommutable = 0> {
- defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
- IsCommutable>;
-
- defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
- IsCommutable>;
+ SDNode OpNode, bit IsCommutable = 0> {
+ defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, IsCommutable>;
+ defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, IsCommutable>;
}
-defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
- SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
- SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
- SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
- SSE_INTALU_ITINS_P, HasAVX512, 0>;
+defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;
+defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 1>;
+defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 1>;
+defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@@ -4252,16 +4184,16 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 FROUND_CURRENT)),
+ (_.VT (VecNode _.RC:$src1, _.RC:$src2,
+ (i32 FROUND_CURRENT))),
itins.rr>;
defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (VecNode (_.VT _.RC:$src1),
- (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
- (i32 FROUND_CURRENT)),
+ (_.VT (VecNode _.RC:$src1,
+ _.ScalarIntMemCPat:$src2,
+ (i32 FROUND_CURRENT))),
itins.rm>;
let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -4291,13 +4223,43 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
EVEX_B, EVEX_RC;
}
multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode VecNode, OpndItins itins, bit IsCommutable> {
- let ExeDomain = _.ExeDomain in
+ SDNode OpNode, SDNode VecNode, SDNode SaeNode,
+ OpndItins itins, bit IsCommutable> {
+ let ExeDomain = _.ExeDomain in {
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
+ itins.rr>;
+
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (VecNode _.RC:$src1,
+ _.ScalarIntMemCPat:$src2)),
+ itins.rm>;
+
+ let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
+ def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
+ itins.rr> {
+ let isCommutable = IsCommutable;
+ }
+ def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1,
+ (_.ScalarLdFrag addr:$src2)))], itins.rm>;
+ }
+
defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 FROUND_NO_EXC))>, EVEX_B;
+ }
}
multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4316,31 +4278,29 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDNode VecNode,
+ SDNode VecNode, SDNode SaeNode,
SizeItins itins, bit IsCommutable> {
- defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
- itins.s, IsCommutable>,
- avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode,
- itins.s, IsCommutable>,
+ defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
+ VecNode, SaeNode, itins.s, IsCommutable>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
- defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
- itins.d, IsCommutable>,
- avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode,
- itins.d, IsCommutable>,
+ defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
+ VecNode, SaeNode, itins.d, IsCommutable>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
}
-defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
-defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_MUL_ITINS_S, 1>;
-defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
-defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_DIV_ITINS_S, 0>;
-defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 0>;
-defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 0>;
+defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
+defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
+defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
+ SSE_ALU_ITINS_S, 0>;
+defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
+ SSE_ALU_ITINS_S, 0>;
// MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
// X86fminc and X86fmaxc instead of X86fmin and X86fmax
multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
- let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
+ let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4598,6 +4558,7 @@ let Predicates = [HasVLX,HasDQI] in {
multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
@@ -4613,10 +4574,12 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
EVEX_4V, EVEX_B;
+ }
}
multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
"$src2, $src1", "$src1, $src2",
@@ -4627,6 +4590,7 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1,
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>;
+ }
}
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
@@ -4899,6 +4863,33 @@ defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
+// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
+let Predicates = [HasAVX512, NoVLX] in {
+ def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ VR128X:$src2)), sub_ymm)>;
+
+ def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZrr
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ VR128X:$src2)), sub_xmm)>;
+
+ def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+ imm:$src2)), sub_ymm)>;
+
+ def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
+ (EXTRACT_SUBREG (v8i64
+ (VPSRAQZri
+ (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
+ imm:$src2)), sub_xmm)>;
+}
+
//===-------------------------------------------------------------------===//
// Variable Bit Shifts
//===-------------------------------------------------------------------===//
@@ -4932,6 +4923,7 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}
+
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
@@ -4955,12 +4947,13 @@ multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
-multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
- let Predicates = [HasBWI, NoVLX] in {
+multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
+ SDNode OpNode, list<Predicate> p> {
+ let Predicates = p in {
def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
(_.info256.VT _.info256.RC:$src2))),
(EXTRACT_SUBREG
- (!cast<Instruction>(NAME#"WZrr")
+ (!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
sub_ymm)>;
@@ -4968,13 +4961,12 @@ multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
(_.info128.VT _.info128.RC:$src2))),
(EXTRACT_SUBREG
- (!cast<Instruction>(NAME#"WZrr")
+ (!cast<Instruction>(OpcodeStr#"Zrr")
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
(INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
sub_xmm)>;
}
}
-
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
@@ -4990,19 +4982,22 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
}
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
- avx512_var_shift_w<0x12, "vpsllvw", shl>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
+ avx512_var_shift_w<0x12, "vpsllvw", shl>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
- avx512_var_shift_w<0x11, "vpsravw", sra>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
+ avx512_var_shift_w<0x11, "vpsravw", sra>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
- avx512_var_shift_w<0x10, "vpsrlvw", srl>,
- avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
+ avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
+defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
+defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
+
// Special handing for handling VPSRAV intrinsics.
multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
list<Predicate> p> {
@@ -5013,7 +5008,6 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
_.RC:$src1, addr:$src2)>;
- let AddedComplexity = 20 in {
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
@@ -5023,8 +5017,6 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
- }
- let AddedComplexity = 30 in {
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
@@ -5034,7 +5026,6 @@ multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
- }
}
}
@@ -5046,14 +5037,12 @@ multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
_.RC:$src1, addr:$src2)>;
- let AddedComplexity = 20 in
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
- let AddedComplexity = 30 in
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(X86vsrav _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
@@ -5251,6 +5240,7 @@ let Predicates = [HasAVX512] in {
//===----------------------------------------------------------------------===//
multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, f64mem:$src2),
!strconcat(OpcodeStr,
@@ -5599,7 +5589,7 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
"$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base;
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr,
+ (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base;
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -5625,13 +5615,13 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
-
+ let ExeDomain = _.ExeDomain in {
defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
// Operands for intrinsic are in 123 order to preserve passthu
// semantics.
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
- (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), (i32 FROUND_CURRENT))),
+ _.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
(i32 imm:$rc))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
@@ -5641,8 +5631,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
(_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds3 _.RC:$src2,
- (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ (_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
_.RC:$src1, (i32 FROUND_CURRENT))),
(_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
(i32 imm:$rc))),
@@ -5653,8 +5642,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
- (_.VT (OpNodeRnds1 _.RC:$src1,
- (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
+ (_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
_.RC:$src2, (i32 FROUND_CURRENT))),
(_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2,
(i32 imm:$rc))),
@@ -5662,6 +5650,7 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
_.FRC:$src2))),
(set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
+ }
}
multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
@@ -5692,6 +5681,7 @@ defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
let Constraints = "$src1 = $dst" in {
multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
@@ -5711,6 +5701,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
+ }
}
} // Constraints = "$src1 = $dst"
@@ -5878,10 +5869,10 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT ,
!strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
EVEX, VEX_LIG, EVEX_B, EVEX_RC;
- def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
+ def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode
- (SrcVT.VT (scalar_to_vector (SrcVT.ScalarLdFrag addr:$src))),
+ (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
(i32 FROUND_CURRENT)))]>,
EVEX, VEX_LIG;
} // Predicates = [HasAVX512]
@@ -5918,20 +5909,20 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
let Predicates = [HasAVX512] in {
def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
(VCVTSS2SIZrr VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvtss2si (sse_load_f32 addr:$src))),
- (VCVTSS2SIZrm addr:$src)>;
+ def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
+ (VCVTSS2SIZrm sse_load_f32:$src)>;
def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
(VCVTSS2SI64Zrr VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvtss2si64 (sse_load_f32 addr:$src))),
- (VCVTSS2SI64Zrm addr:$src)>;
+ def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
+ (VCVTSS2SI64Zrm sse_load_f32:$src)>;
def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
(VCVTSD2SIZrr VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvtsd2si (sse_load_f64 addr:$src))),
- (VCVTSD2SIZrm addr:$src)>;
+ def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
+ (VCVTSD2SIZrm sse_load_f64:$src)>;
def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
(VCVTSD2SI64Zrr VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (sse_load_f64 addr:$src))),
- (VCVTSD2SI64Zrm addr:$src)>;
+ def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
+ (VCVTSD2SI64Zrm sse_load_f64:$src)>;
} // HasAVX512
let Predicates = [HasAVX512] in {
@@ -6018,7 +6009,7 @@ let Predicates = [HasAVX512] in {
EVEX,VEX_LIG , EVEX_B;
let mayLoad = 1, hasSideEffects = 0 in
def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
- (ins _SrcRC.MemOp:$src),
+ (ins _SrcRC.IntScalarMemOp:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[]>, EVEX, VEX_LIG;
@@ -6055,47 +6046,58 @@ defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
let Predicates = [HasAVX512] in {
def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
(VCVTTSS2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse_cvttss2si (sse_load_f32 addr:$src))),
- (VCVTTSS2SIZrm_Int addr:$src)>;
+ def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
+ (VCVTTSS2SIZrm_Int ssmem:$src)>;
def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
(VCVTTSS2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse_cvttss2si64 (sse_load_f32 addr:$src))),
- (VCVTTSS2SI64Zrm_Int addr:$src)>;
+ def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
+ (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
(VCVTTSD2SIZrr_Int VR128X:$src)>;
- def : Pat<(i32 (int_x86_sse2_cvttsd2si (sse_load_f64 addr:$src))),
- (VCVTTSD2SIZrm_Int addr:$src)>;
+ def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
+ (VCVTTSD2SIZrm_Int sdmem:$src)>;
def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
(VCVTTSD2SI64Zrr_Int VR128X:$src)>;
- def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (sse_load_f64 addr:$src))),
- (VCVTTSD2SI64Zrm_Int addr:$src)>;
+ def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
+ (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
} // HasAVX512
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
//===----------------------------------------------------------------------===//
multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNode> {
- defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_Src.VT _Src.RC:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
- defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
+ defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
- (_Src.VT (scalar_to_vector
- (_Src.ScalarLdFrag addr:$src2))),
+ (_Src.VT _Src.ScalarIntMemCPat:$src2),
(i32 FROUND_CURRENT)))>,
EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+
+ let isCodeGenOnly = 1, hasSideEffects = 0 in {
+ def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _Src.FRC:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+ let mayLoad = 1 in
+ def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
+ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ }
}
// Scalar Coversion with SAE - suppress all exceptions
multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6107,7 +6109,7 @@ multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTIn
// Scalar Conversion with rounding control (RC)
multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86VectorVTInfo _Src, SDNode OpNodeRnd> {
- defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
"$rc, $src2, $src1", "$src1, $src2, $rc",
(_.VT (OpNodeRnd (_.VT _.RC:$src1),
@@ -6140,39 +6142,36 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
X86fpextRnd,f32x_info, f64x_info >;
def : Pat<(f64 (fpextend FR32X:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
- (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
+ (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (fpextend (loadf32 addr:$src))),
- (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512]>;
def : Pat<(f64 (extloadf32 addr:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+ (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[HasAVX512, OptForSize]>;
def : Pat<(f64 (extloadf32 addr:$src)),
- (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)),
- (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
+ (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
Requires<[HasAVX512, OptForSpeed]>;
def : Pat<(f32 (fpround FR64X:$src)),
- (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
- (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
+ (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
(f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
- (VCVTSD2SSZrr VR128X:$dst, VR128X:$src)>,
+ (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
def : Pat<(v2f64 (X86Movsd
(v2f64 VR128X:$dst),
(v2f64 (scalar_to_vector
(f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
- (VCVTSS2SDZrr VR128X:$dst, VR128X:$src)>,
+ (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
Requires<[HasAVX512]>;
//===----------------------------------------------------------------------===//
@@ -6808,7 +6807,7 @@ let Predicates = [HasAVX512] in {
let Predicates = [HasVLX] in {
defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
- defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
+ defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
}
}
@@ -6917,7 +6916,7 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
- let AddedComplexity = 20 , Predicates = [HasAVX512] in {
+ let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -6942,6 +6941,7 @@ defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
@@ -6955,6 +6955,7 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
EVEX, T8PD, EVEX_B;
+ }
}
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
@@ -6986,7 +6987,7 @@ defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
SDNode OpNode> {
-
+ let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -7005,6 +7006,7 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
(i32 FROUND_CURRENT))>;
+ }
}
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
@@ -7024,7 +7026,7 @@ defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode> {
-
+ let ExeDomain = _.ExeDomain in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
@@ -7041,9 +7043,11 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))),
(i32 FROUND_CURRENT))>, EVEX_B;
+ }
}
multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
SDNode OpNode> {
+ let ExeDomain = _.ExeDomain in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
@@ -7084,6 +7088,7 @@ defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
SDNode OpNodeRnd, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in
defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
(_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
@@ -7092,6 +7097,7 @@ multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
+ let ExeDomain = _.ExeDomain in {
defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.FloatVT (OpNode _.RC:$src))>, EVEX;
@@ -7106,6 +7112,7 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
(OpNode (_.FloatVT
(X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
EVEX, EVEX_B;
+ }
}
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
@@ -7143,7 +7150,7 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
-
+ let ExeDomain = _.ExeDomain in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -7176,6 +7183,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
(ins _.FRC:$src1, _.ScalarMemOp:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
}
+ }
def : Pat<(_.EltVT (OpNode _.FRC:$src)),
(!cast<Instruction>(NAME#SUFF#Zr)
@@ -7480,11 +7488,11 @@ multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasBWI] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
- v16i8x_info, i64mem, LdFrag, OpNode>,
+ v16i8x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
@@ -7499,11 +7507,11 @@ multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
- v16i8x_info, i32mem, LdFrag, OpNode>,
+ v16i8x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
@@ -7518,11 +7526,11 @@ multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
- v16i8x_info, i16mem, LdFrag, OpNode>,
+ v16i8x_info, i16mem, LdFrag, InVecNode>,
EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
@@ -7537,11 +7545,11 @@ multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
- v8i16x_info, i64mem, LdFrag, OpNode>,
+ v8i16x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
@@ -7556,11 +7564,11 @@ multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
- v8i16x_info, i32mem, LdFrag, OpNode>,
+ v8i16x_info, i32mem, LdFrag, InVecNode>,
EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
@@ -7575,12 +7583,12 @@ multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
}
multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode,
+ SDPatternOperator OpNode, SDPatternOperator InVecNode,
string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
- v4i32x_info, i64mem, LdFrag, OpNode>,
+ v4i32x_info, i64mem, LdFrag, InVecNode>,
EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
@@ -7594,19 +7602,19 @@ multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
}
}
-defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
-defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
-defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
-defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
-defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
-defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
+defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">;
+defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">;
+defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">;
+defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">;
+defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">;
+defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">;
-defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
-defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
-defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
-defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
-defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
-defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
+defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">;
+defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">;
+defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">;
+defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">;
+defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">;
+defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">;
// EXTLOAD patterns, implemented using vpmovz
multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
@@ -7649,69 +7657,69 @@ let Predicates = [HasAVX512] in {
defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
}
-multiclass AVX512_pmovx_patterns<string OpcPrefix, string ExtTy,
- SDNode ExtOp, PatFrag ExtLoad16> {
+multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
+ SDNode InVecOp, PatFrag ExtLoad16> {
// 128-bit patterns
let Predicates = [HasVLX, HasBWI] in {
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
- def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
}
let Predicates = [HasVLX] in {
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
- def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+ def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
(!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
}
// 256-bit patterns
@@ -7790,8 +7798,8 @@ multiclass AVX512_pmovx_patterns<string OpcPrefix, string ExtTy,
}
}
-defm : AVX512_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
-defm : AVX512_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
+defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
+defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
@@ -7832,7 +7840,7 @@ multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
mgatherv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz512mem,
+ defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
mgatherv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
@@ -7889,7 +7897,7 @@ multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
mscatterv16i32>, EVEX_V512;
- defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz512mem,
+ defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
mscatterv8i64>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
@@ -7922,7 +7930,7 @@ defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7934,7 +7942,7 @@ defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7946,7 +7954,7 @@ defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7958,7 +7966,7 @@ defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
- VK8WM, vz512mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
@@ -7982,6 +7990,17 @@ def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
[(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
}
+// Use 512bit version to implement 128/256 bit in case NoVLX.
+multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
+ X86VectorVTInfo _> {
+
+ def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
+ (X86Info.VT (EXTRACT_SUBREG
+ (_.VT (!cast<Instruction>(NAME#"Zrr")
+ (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
+ X86Info.SubRegIdx))>;
+}
+
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
@@ -7991,20 +8010,17 @@ let Predicates = [prd] in
defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
}
-}
+let Predicates = [prd, NoVLX] in {
+ defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
+ defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
+ }
-multiclass avx512_convert_mask_to_vector<string OpcodeStr> {
- defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr,
- HasBWI>;
- defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr,
- HasBWI>, VEX_W;
- defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr,
- HasDQI>;
- defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
- HasDQI>, VEX_W;
}
-defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
+defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
+defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
+defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
+defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
@@ -8319,6 +8335,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in
defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, {sae}, $src2, $src1",
@@ -8466,6 +8483,39 @@ defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+let Predicates = [HasAVX512] in {
+// Provide fallback in case the load node that is used in the broadcast
+// patterns above is used by additional users, which prevents the pattern
+// selection.
+def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
+ (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
+ (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+
+def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
+ (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
+ (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+
+def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
+ (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+
+def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
+ (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+ 0)>;
+}
+
multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
AVX512AIi8Base, EVEX_4V;
@@ -8503,6 +8553,7 @@ defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr,
"$src1", "$src1",
@@ -8513,6 +8564,7 @@ multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1", "$src1",
(_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
+ }
}
multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -8577,66 +8629,7 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
HasBWI>;
}
-defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
-
-def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128X:$src))>;
-def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>;
-def avx512_v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>;
-def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256X:$src))>;
-def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>;
-def avx512_v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>;
-
-let Predicates = [HasBWI, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))),
- (VPABSBZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v2i64 (avx512_v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))),
- (VPABSWZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))),
- (VPABSBZ256rr VR256X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))),
- (VPABSWZ256rr VR256X:$src)>;
-}
-let Predicates = [HasAVX512, HasVLX] in {
- def : Pat<(xor
- (bc_v2i64 (avx512_v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))),
- (VPABSDZ128rr VR128X:$src)>;
- def : Pat<(xor
- (bc_v4i64 (avx512_v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))),
- (VPABSDZ256rr VR256X:$src)>;
-}
-
-let Predicates = [HasAVX512] in {
-def : Pat<(xor
- (bc_v8i64 (v16i1sextv16i32)),
- (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
- (VPABSDZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v8i1sextv8i64)),
- (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
- (VPABSQZrr VR512:$src)>;
-}
-let Predicates = [HasBWI] in {
-def : Pat<(xor
- (bc_v8i64 (v64i1sextv64i8)),
- (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
- (VPABSBZrr VR512:$src)>;
-def : Pat<(xor
- (bc_v8i64 (v32i1sextv32i16)),
- (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
- (VPABSWZrr VR512:$src)>;
-}
+defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
@@ -8663,6 +8656,7 @@ defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
(_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
@@ -8671,6 +8665,7 @@ multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
(_.VT (OpNode (_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src)))))>,
EVEX, EVEX_CD8<_.EltSize, CD8VH>;
+ }
}
multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -8947,6 +8942,68 @@ multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
HasBWI>, EVEX_4V;
+// Transforms to swizzle an immediate to enable better matching when
+// memory operand isn't in the right place.
+def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
+ uint8_t Imm = N->getZExtValue();
+ // Swap bits 1/4 and 3/6.
+ uint8_t NewImm = Imm & 0xa5;
+ if (Imm & 0x02) NewImm |= 0x10;
+ if (Imm & 0x10) NewImm |= 0x02;
+ if (Imm & 0x08) NewImm |= 0x40;
+ if (Imm & 0x40) NewImm |= 0x08;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
+ uint8_t Imm = N->getZExtValue();
+ // Swap bits 2/4 and 3/5.
+ uint8_t NewImm = Imm & 0xc3;
+ if (Imm & 0x04) NewImm |= 0x10;
+ if (Imm & 0x10) NewImm |= 0x04;
+ if (Imm & 0x08) NewImm |= 0x20;
+ if (Imm & 0x20) NewImm |= 0x08;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
+ uint8_t Imm = N->getZExtValue();
+ // Swap bits 1/2 and 5/6.
+ uint8_t NewImm = Imm & 0x99;
+ if (Imm & 0x02) NewImm |= 0x04;
+ if (Imm & 0x04) NewImm |= 0x02;
+ if (Imm & 0x20) NewImm |= 0x40;
+ if (Imm & 0x40) NewImm |= 0x20;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by moving operand 1 to the end.
+ uint8_t Imm = N->getZExtValue();
+ // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
+ uint8_t NewImm = Imm & 0x81;
+ if (Imm & 0x02) NewImm |= 0x04;
+ if (Imm & 0x04) NewImm |= 0x10;
+ if (Imm & 0x08) NewImm |= 0x40;
+ if (Imm & 0x10) NewImm |= 0x02;
+ if (Imm & 0x20) NewImm |= 0x08;
+ if (Imm & 0x40) NewImm |= 0x20;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
+ // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
+ uint8_t Imm = N->getZExtValue();
+ // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
+ uint8_t NewImm = Imm & 0x81;
+ if (Imm & 0x02) NewImm |= 0x10;
+ if (Imm & 0x04) NewImm |= 0x02;
+ if (Imm & 0x08) NewImm |= 0x20;
+ if (Imm & 0x10) NewImm |= 0x04;
+ if (Imm & 0x20) NewImm |= 0x40;
+ if (Imm & 0x40) NewImm |= 0x08;
+ return getI8Imm(NewImm, SDLoc(N));
+}]>;
+
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _>{
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
@@ -8975,6 +9032,141 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(i8 imm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
}// Constraints = "$src1 = $dst"
+
+ // Additional patterns for matching passthru operand in other positions.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+
+ // Additional patterns for matching loads in other positions.
+ def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (OpNode _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching zero masking with loads in other
+ // positions.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching masked loads with different
+ // operand orders.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, _.RC:$src1,
+ (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (bitconvert (_.LdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
+
+ // Additional patterns for matching broadcasts in other positions.
+ def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (OpNode _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4))),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
+ addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching zero masking with broadcasts in other
+ // positions.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.ImmAllZerosV)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
+ _.KRCWM:$mask, _.RC:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 imm:$src4))>;
+
+ // Additional patterns for matching masked broadcasts with different
+ // operand orders.
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ (i8 imm:$src4)), _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
}
multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index ba970bc2048e..dcce7b9951f2 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -147,7 +147,7 @@ addOffset(const MachineInstrBuilder &MIB, int Offset) {
static inline const MachineInstrBuilder &
addOffset(const MachineInstrBuilder &MIB, const MachineOperand& Offset) {
- return MIB.addImm(1).addReg(0).addOperand(Offset).addReg(0);
+ return MIB.addImm(1).addReg(0).add(Offset).addReg(0);
}
/// addRegOffset - This function is used to add a memory reference of the form
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index c73c95019f8d..b85abfb9ca7f 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -110,3 +110,9 @@ defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>; // signed greater or equal
defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>; // signed less than or equal
defm SETG : SETCC<0x9F, "setg", X86_COND_G>; // signed greater than
+// SALC is an undocumented instruction. Information for this instruction can be found
+// here http://www.rcollins.org/secrets/opcodes/SALC.html
+// Set AL if carry.
+let Uses = [EFLAGS], Defs = [AL] in {
+ def SALC : I<0xD6, RawFrm, (outs), (ins), "salc", []>, Requires<[Not64BitMode]>;
+}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 3c27eb8077d0..e592c2b3c0aa 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -259,20 +259,20 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
- isPseudo = 1, AddedComplexity = 20 in
+ isPseudo = 1, AddedComplexity = 10 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
// Other widths can also make use of the 32-bit xor, which may have a smaller
// encoding and avoid partial register updates.
+let AddedComplexity = 10 in {
def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
-def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
- let AddedComplexity = 20;
+def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;
}
let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
- AddedComplexity = 15 in {
+ AddedComplexity = 10 in {
// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
// which only require 3 bytes compared to MOV32ri which requires 5.
let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
@@ -287,7 +287,7 @@ let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
}
-let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
+let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5 in {
// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
// FIXME: Add itinerary class and Schedule.
def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
@@ -772,11 +772,11 @@ defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b",
// the pseudo. The argument feeding EBX is ebx_input.
//
// The additional argument, $ebx_save, is a temporary register used to
-// save the value of RBX accross the actual instruction.
+// save the value of RBX across the actual instruction.
//
// To make sure the register assigned to $ebx_save does not interfere with
// the definition of the actual instruction, we use a definition $dst which
-// is tied to $rbx_save. That way, the live-range of $rbx_save spans accross
+// is tied to $rbx_save. That way, the live-range of $rbx_save spans across
// the instruction and we are sure we will have a valid register to restore
// the value of RBX.
let Defs = [EAX, EDX, EBX, EFLAGS], Uses = [EAX, ECX, EDX],
@@ -1743,6 +1743,12 @@ def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
+// sub reg, relocImm
+def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt8_su:$src2),
+ (SUB64ri8 GR64:$src1, i64relocImmSExt8_su:$src2)>;
+def : Pat<(X86sub_flag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (SUB64ri32 GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
// mul reg, reg
def : Pat<(mul GR16:$src1, GR16:$src2),
(IMUL16rr GR16:$src1, GR16:$src2)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 2f260c48df47..4ea223e82be9 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -264,6 +264,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
}
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ let Uses = [ESP, EFLAGS] in {
+ def TCRETURNdicc : PseudoI<(outs),
+ (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
+
+ // This gets substituted to a conditional jump instruction in MC lowering.
+ def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
+ (ins i32imm_pcrel:$dst, i32imm:$cond),
+ "",
+ [], IIC_JMP_REL>;
+}
+
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -325,3 +340,19 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
"rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
}
}
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+ isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+ let Uses = [RSP, EFLAGS] in {
+ def TCRETURNdi64cc : PseudoI<(outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+ i32imm:$cond), []>;
+
+ // This gets substituted to a conditional jump instruction in MC lowering.
+ def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
+ (ins i64i32imm_pcrel:$dst, i32imm:$cond),
+ "",
+ [], IIC_JMP_REL>;
+}
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 4b19f801dae1..1941ae57f0f1 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -191,13 +191,15 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpStr, string PackTy, string Suff,
SDNode OpNode, RegisterClass RC,
- X86MemOperand x86memop> {
- defm NAME#132#Suff : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
- x86memop, RC>;
- defm NAME#213#Suff : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
- x86memop, RC, OpNode>;
- defm NAME#231#Suff : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
- x86memop, RC>;
+ X86MemOperand x86memop> {
+ let Predicates = [HasFMA, NoAVX512] in {
+ defm NAME#132#Suff : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
+ x86memop, RC>;
+ defm NAME#213#Suff : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
+ x86memop, RC, OpNode>;
+ defm NAME#231#Suff : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
+ x86memop, RC>;
+ }
}
// The FMA 213 form is created for lowering of scalar FMA intrinscis
diff --git a/lib/Target/X86/X86InstrFMA3Info.cpp b/lib/Target/X86/X86InstrFMA3Info.cpp
index db83497ee69d..00ef65cdb6bd 100644
--- a/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -16,11 +16,14 @@
#include "X86InstrInfo.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Threading.h"
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
/// This flag is used in the method llvm::call_once() used below to make the
/// initialization of the map 'OpcodeToGroup' thread safe.
-LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag);
+static llvm::once_flag InitGroupsOnceFlag;
static ManagedStatic<X86InstrFMA3Info> X86InstrFMA3InfoObj;
X86InstrFMA3Info *X86InstrFMA3Info::getX86InstrFMA3Info() {
diff --git a/lib/Target/X86/X86InstrFMA3Info.h b/lib/Target/X86/X86InstrFMA3Info.h
index 025cee3b2b90..e3568160da46 100644
--- a/lib/Target/X86/X86InstrFMA3Info.h
+++ b/lib/Target/X86/X86InstrFMA3Info.h
@@ -1,4 +1,4 @@
-//===-- X86InstrFMA3Info.h - X86 FMA3 Instruction Information -------------===//
+//===- X86InstrFMA3Info.h - X86 FMA3 Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -18,9 +18,11 @@
#include "X86.h"
#include "llvm/ADT/DenseMap.h"
#include <cassert>
+#include <cstdint>
#include <set>
namespace llvm {
+
/// This class is used to group {132, 213, 231} forms of FMA opcodes together.
/// Each of the groups has either 3 register opcodes, 3 memory opcodes,
/// or 6 register and memory opcodes. Also, each group has an attrubutes field
@@ -201,7 +203,7 @@ public:
static X86InstrFMA3Info *getX86InstrFMA3Info();
/// Constructor. Just creates an object of the class.
- X86InstrFMA3Info() {}
+ X86InstrFMA3Info() = default;
/// Destructor. Deallocates the memory used for FMA3 Groups.
~X86InstrFMA3Info() {
@@ -310,6 +312,7 @@ public:
return rm_iterator(getX86InstrFMA3Info()->OpcodeToGroup.end());
}
};
-} // namespace llvm
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 10f3839ea8ed..11b1d070ef2f 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -631,6 +631,9 @@ let Defs = [FPSW] in
def FNINIT : I<0xDB, MRM_E3, (outs), (ins), "fninit", [], IIC_FNINIT>;
def FFREE : FPI<0xDD, MRM0r, (outs), (ins RST:$reg),
"ffree\t$reg", IIC_FFREE>;
+def FFREEP : FPI<0xDF, MRM0r, (outs), (ins RST:$reg),
+ "ffreep\t$reg", IIC_FFREE>;
+
// Clear exceptions
let Defs = [FPSW] in
@@ -665,15 +668,16 @@ def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
let Predicates = [HasFXSR] in {
def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+ "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
- "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
- IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
+ "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
+ IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
+ "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>,
+ TB;
def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
- "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
- IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+ "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+ IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
} // Predicates = [FeatureFXSR]
} // SchedRW
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 610756aa37da..c2fe786732dc 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -199,7 +199,8 @@ class TAPS : TA { Prefix OpPrefix = PS; }
class TAPD : TA { Prefix OpPrefix = PD; }
class TAXD : TA { Prefix OpPrefix = XD; }
class VEX { Encoding OpEnc = EncVEX; }
-class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_W { bits<2> VEX_WPrefix = 1; }
+class VEX_WIG { bits<2> VEX_WPrefix = 2; }
class VEX_4V : VEX { bit hasVEX_4V = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
@@ -270,7 +271,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
Encoding OpEnc = EncNormal; // Encoding used by this instruction
bits<2> OpEncBits = OpEnc.Value;
- bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bits<2> VEX_WPrefix = 0; // Does this inst set the VEX_W field?
bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field?
bit hasVEX_L = 0; // Does this inst use large (256-bit) registers?
bit ignoresVEX_L = 0; // Does this instruction ignore the L-bit
@@ -317,7 +318,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{28-27} = ExeDomain.Value;
let TSFlags{30-29} = OpEncBits;
let TSFlags{38-31} = Opcode;
- let TSFlags{39} = hasVEX_WPrefix;
+ // Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
+ let TSFlags{39} = VEX_WPrefix{0};
let TSFlags{40} = hasVEX_4V;
let TSFlags{41} = hasVEX_L;
let TSFlags{42} = hasEVEX_K;
@@ -453,7 +455,7 @@ class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
Domain d = GenericDomain>
: I<o, F, outs, ins, asm, pattern, itin, d> {
let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512],
- !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX],
+ !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX],
!if(!eq(OpPrefix.Value, XS.Value), [UseSSE1],
!if(!eq(OpPrefix.Value, XD.Value), [UseSSE2],
!if(!eq(OpPrefix.Value, PD.Value), [UseSSE2],
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index c5689d7c698c..9867ba84bb9b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -27,21 +27,19 @@ def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
//===----------------------------------------------------------------------===//
def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
-def load_mvmmx : PatFrag<(ops node:$ptr),
- (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
//===----------------------------------------------------------------------===//
// SSE specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<1, 2>,
- SDTCisFP<1>, SDTCisVT<3, i8>,
- SDTCisVec<1>]>;
-def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
- SDTCisSameAs<1, 2>, SDTCisInt<3>]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i8>]>;
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fmins : SDNode<"X86ISD::FMINS", SDTFPBinOp>;
+def X86fmaxs : SDNode<"X86ISD::FMAXS", SDTFPBinOp>;
// Commutative and Associative FMIN and FMAX.
def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp,
@@ -200,6 +198,15 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+def X86kshiftl : SDNode<"X86ISD::KSHIFTL",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i8>]>>;
+def X86kshiftr : SDNode<"X86ISD::KSHIFTR",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i8>]>>;
+
def X86vrotli : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>;
def X86vrotri : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>;
@@ -230,10 +237,11 @@ def X86vpermil2 : SDNode<"X86ISD::VPERMIL2",
SDTCisSameAs<0,2>,
SDTCisSameSizeAs<0,3>,
SDTCisSameNumEltsAs<0, 3>,
+ SDTCisFP<0>, SDTCisInt<3>,
SDTCisVT<4, i8>]>>;
def X86vpperm : SDNode<"X86ISD::VPPERM",
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
+ SDTCisSameAs<0,2>, SDTCisSameAs<0, 3>]>>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
@@ -300,13 +308,17 @@ def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameSizeAs<0,2>,
- SDTCisSameNumEltsAs<0,2>]>;
+ SDTCisSameNumEltsAs<0,2>,
+ SDTCisFP<0>, SDTCisInt<2>]>;
def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
-def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisInt<3>,
@@ -314,8 +326,10 @@ def SDTFPTernaryOpImmRound: SDTypeProfile<1, 5, [SDTCisFP<0>, SDTCisSameAs<0,1>,
SDTCisSameNumEltsAs<0, 3>,
SDTCisVT<4, i32>,
SDTCisVT<5, i32>]>;
-def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>]>;
def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
@@ -324,9 +338,9 @@ def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
-def SDTTernlog : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisSameAs<0,3>,
- SDTCisVT<4, i8>]>;
+def SDTTernlog : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>, SDTCisVT<4, i8>]>;
def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisVT<3, i32>]>;
@@ -334,16 +348,13 @@ def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc.
def SDTFPUnaryOpRound : SDTypeProfile<1, 2, [ // fsqrt_round, fgetexp_round, etc.
SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisVT<2, i32>]>;
-def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
- SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
- SDTCisVT<4, i32>]>;
+ SDTCisFP<0>, SDTCisVT<4, i32>]>;
def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
@@ -367,17 +378,28 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisVec<1>, SDTCisInt<1>,
SDTCisSameSizeAs<0,1>,
- SDTCisSameAs<1,2>]>;
+ SDTCisSameAs<1,2>,
+ SDTCisOpSmallerThanOp<0, 1>]>;
def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
-def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
-def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD" , SDTPack, [SDNPCommutative]>;
+def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, i8>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>>;
+def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD",
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i32>,
+ SDTCVecEltisVT<1, i16>,
+ SDTCisSameSizeAs<0,1>,
+ SDTCisSameAs<1,2>]>,
+ [SDNPCommutative]>;
def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
@@ -414,8 +436,8 @@ def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImmRound>;
def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImmRound>;
def X86Vfpclass : SDNode<"X86ISD::VFPCLASS",
- SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
- SDTCisVec<1>, SDTCisFP<1>,
+ SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>,
+ SDTCisFP<1>,
SDTCisSameNumEltsAs<0,1>,
SDTCisVT<2, i32>]>, []>;
def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS",
@@ -428,9 +450,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
-def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>, SDTCisEltOfVec<2, 1>,
- SDTCisPtrTy<3>]>, []>;
def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2,
[SDTCisEltOfVec<0, 1>, SDTCisVec<1>,
SDTCisPtrTy<2>]>, []>;
@@ -440,24 +459,30 @@ def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>;
def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>;
+def X86faddRnds : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>;
def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>;
+def X86fsubRnds : SDNode<"X86ISD::FSUBS_RND", SDTFPBinOpRound>;
def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>;
+def X86fmulRnds : SDNode<"X86ISD::FMULS_RND", SDTFPBinOpRound>;
def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>;
-def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86fdivRnds : SDNode<"X86ISD::FDIVS_RND", SDTFPBinOpRound>;
+def X86fmaxRnd : SDNode<"X86ISD::FMAX_RND", SDTFPBinOpRound>;
+def X86fmaxRnds : SDNode<"X86ISD::FMAXS_RND", SDTFPBinOpRound>;
+def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
+def X86fminRnds : SDNode<"X86ISD::FMINS_RND", SDTFPBinOpRound>;
def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOpRound>;
def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOpRound>;
-def X86fminRnd : SDNode<"X86ISD::FMIN_RND", SDTFPBinOpRound>;
def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>;
def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>;
def X86fgetexpRnd : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
def X86fgetexpRnds : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>;
-def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>;
-def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>;
-def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>;
-def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>;
-def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>;
-def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>;
+def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFPTernaryOp>;
+def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp>;
+def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp>;
+def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp>;
+def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp>;
+def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp>;
def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound>;
def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound>;
@@ -478,8 +503,10 @@ def X86FnmaddRnds3 : SDNode<"X86ISD::FNMADDS3_RND", SDTFmaRound>;
def X86FmsubRnds3 : SDNode<"X86ISD::FMSUBS3_RND", SDTFmaRound>;
def X86FnmsubRnds3 : SDNode<"X86ISD::FNMSUBS3_RND", SDTFmaRound>;
-def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTFma>;
-def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTFma>;
+def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma>;
+def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma>;
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>;
def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 627b6120b048..7b456fd68343 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -414,17 +414,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
{ X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
{ X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE },
- { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
{ X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
{ X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
{ X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
{ X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
- { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
{ X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
{ X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
{ X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
{ X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
+ { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE },
+ { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE },
+ { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
+ { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
+ { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE },
+ { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE },
{ X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE },
{ X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE },
{ X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE },
@@ -867,11 +872,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable instructions
{ X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm, TB_NO_REVERSE },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm, TB_NO_REVERSE },
{ X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 },
- { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
+ { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 },
+ { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
{ X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 },
{ X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
{ X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
@@ -883,8 +887,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
{ X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
{ X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
+ { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
+ { X86::VPABSBZrr, X86::VPABSBZrm, 0 },
{ X86::VPABSDZrr, X86::VPABSDZrm, 0 },
{ X86::VPABSQZrr, X86::VPABSQZrm, 0 },
+ { X86::VPABSWZrr, X86::VPABSWZrm, 0 },
{ X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
{ X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
@@ -904,12 +911,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 },
{ X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 },
{ X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
+ { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 },
+ { X86::VPSLLDZri, X86::VPSLLDZmi, 0 },
+ { X86::VPSLLQZri, X86::VPSLLQZmi, 0 },
+ { X86::VPSLLWZri, X86::VPSLLWZmi, 0 },
+ { X86::VPSRADZri, X86::VPSRADZmi, 0 },
+ { X86::VPSRAQZri, X86::VPSRAQZmi, 0 },
+ { X86::VPSRAWZri, X86::VPSRAWZmi, 0 },
+ { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 },
+ { X86::VPSRLDZri, X86::VPSRLDZmi, 0 },
+ { X86::VPSRLQZri, X86::VPSRLQZmi, 0 },
+ { X86::VPSRLWZri, X86::VPSRLWZmi, 0 },
// AVX-512 foldable instructions (256-bit versions)
{ X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256r_s, X86::VBROADCASTSSZ256m, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256r_s, X86::VBROADCASTSDZ256m, TB_NO_REVERSE },
{ X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
{ X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
{ X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
@@ -920,6 +936,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
{ X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
{ X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
+ { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 },
+ { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 },
+ { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 },
+ { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 },
{ X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
{ X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
{ X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
@@ -939,10 +959,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 },
{ X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 },
{ X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
+ { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 },
+ { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 },
+ { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 },
+ { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 },
+ { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 },
+ { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 },
+ { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 },
+ { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 },
+ { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 },
+ { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 },
+ { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 },
// AVX-512 foldable instructions (128-bit versions)
{ X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128r_s, X86::VBROADCASTSSZ128m, TB_NO_REVERSE },
{ X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
{ X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
{ X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
@@ -953,6 +983,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
{ X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
{ X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
+ { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 },
+ { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 },
+ { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 },
+ { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 },
{ X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
{ X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
{ X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
@@ -970,6 +1004,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 },
{ X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 },
{ X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 },
+ { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 },
+ { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 },
+ { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 },
+ { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 },
+ { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 },
+ { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 },
+ { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 },
+ { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 },
+ { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 },
+ { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 },
+ { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 },
// F16C foldable instructions
{ X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 },
@@ -1170,18 +1215,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PINSRWrri, X86::PINSRWrmi, 0 },
{ X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 },
{ X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
+ { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
+ { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
{ X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
{ X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
- { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
- { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
+ { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
+ { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
{ X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
{ X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
+ { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
+ { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
{ X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
{ X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
- { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
- { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
- { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
- { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
{ X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
{ X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 },
{ X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
@@ -1340,8 +1385,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PMULHRWrr, X86::PMULHRWrm, 0 },
// AVX 128-bit versions of foldable instructions
- { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
- { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, TB_NO_REVERSE },
{ X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 },
{ X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 },
{ X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
@@ -1350,8 +1393,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 },
{ X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
{ X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 },
- { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
- { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, TB_NO_REVERSE },
{ X86::VADDPDrr, X86::VADDPDrm, 0 },
{ X86::VADDPSrr, X86::VADDPSrm, 0 },
{ X86::VADDSDrr, X86::VADDSDrm, 0 },
@@ -1458,18 +1499,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPINSRWrri, X86::VPINSRWrmi, 0 },
{ X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 },
{ X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
+ { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
+ { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
{ X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
{ X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
- { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
- { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
+ { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
+ { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
{ X86::VPMINSBrr, X86::VPMINSBrm, 0 },
{ X86::VPMINSDrr, X86::VPMINSDrm, 0 },
+ { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
+ { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
{ X86::VPMINUDrr, X86::VPMINUDrm, 0 },
{ X86::VPMINUWrr, X86::VPMINUWrm, 0 },
- { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
- { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
- { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
- { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
{ X86::VPMULDQrr, X86::VPMULDQrm, 0 },
{ X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 },
{ X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
@@ -1626,18 +1667,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
{ X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 },
{ X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
+ { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
+ { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
{ X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
{ X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
- { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
- { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
+ { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
+ { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
{ X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
{ X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
+ { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
+ { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
{ X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
{ X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
- { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
- { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
- { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
- { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
{ X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
{ X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
{ X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 },
@@ -1732,7 +1773,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// XOP foldable instructions
{ X86::VPCMOVrrr, X86::VPCMOVrmr, 0 },
- { X86::VPCMOVrrrY, X86::VPCMOVrmrY, 0 },
+ { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 },
{ X86::VPCOMBri, X86::VPCOMBmi, 0 },
{ X86::VPCOMDri, X86::VPCOMDmi, 0 },
{ X86::VPCOMQri, X86::VPCOMQmi, 0 },
@@ -1742,9 +1783,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
{ X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
{ X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
- { X86::VPERMIL2PDrrY, X86::VPERMIL2PDmrY, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 },
{ X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
- { X86::VPERMIL2PSrrY, X86::VPERMIL2PSmrY, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 },
{ X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
{ X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
{ X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
@@ -1800,8 +1841,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
{ X86::VANDPDZrr, X86::VANDPDZrm, 0 },
{ X86::VANDPSZrr, X86::VANDPSZrm, 0 },
- { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
{ X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
{ X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 },
{ X86::VCMPSDZrr, X86::VCMPSDZrm, 0 },
@@ -1842,6 +1881,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
{ X86::VMINSSZrr, X86::VMINSSZrm, 0 },
{ X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
+ { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE },
{ X86::VMULPDZrr, X86::VMULPDZrm, 0 },
{ X86::VMULPSZrr, X86::VMULPSZrm, 0 },
{ X86::VMULSDZrr, X86::VMULSDZrm, 0 },
@@ -1850,6 +1890,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
{ X86::VORPDZrr, X86::VORPDZrm, 0 },
{ X86::VORPSZrr, X86::VORPSZrm, 0 },
+ { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 },
+ { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 },
+ { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 },
+ { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 },
{ X86::VPADDBZrr, X86::VPADDBZrm, 0 },
{ X86::VPADDDZrr, X86::VPADDDZrm, 0 },
{ X86::VPADDQZrr, X86::VPADDQZrm, 0 },
@@ -1863,6 +1907,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
{ X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
{ X86::VPANDQZrr, X86::VPANDQZrm, 0 },
+ { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 },
+ { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 },
{ X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
{ X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
{ X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
@@ -1887,26 +1933,55 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
{ X86::VPERMQZrr, X86::VPERMQZrm, 0 },
{ X86::VPERMWZrr, X86::VPERMWZrm, 0 },
+ { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 },
+ { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 },
+ { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 },
+ { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 },
{ X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 },
{ X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 },
+ { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 },
{ X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
{ X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
+ { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 },
+ { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 },
{ X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
{ X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
+ { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 },
+ { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 },
{ X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
{ X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
+ { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 },
+ { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 },
{ X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
{ X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
+ { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 },
{ X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
+ { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 },
+ { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 },
+ { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 },
{ X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
{ X86::VPORDZrr, X86::VPORDZrm, 0 },
{ X86::VPORQZrr, X86::VPORQZrm, 0 },
+ { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 },
{ X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 },
+ { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 },
+ { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 },
{ X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
{ X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
+ { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 },
+ { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 },
+ { X86::VPSRADZrr, X86::VPSRADZrm, 0 },
+ { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 },
{ X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
+ { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 },
+ { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 },
+ { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 },
+ { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 },
+ { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 },
{ X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
{ X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
+ { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 },
+ { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 },
{ X86::VPSUBBZrr, X86::VPSUBBZrm, 0 },
{ X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
{ X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
@@ -1957,9 +2032,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 },
{ X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 },
{ X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 },
- { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
{ X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
{ X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
@@ -1996,6 +2068,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VORPDZ256rr, X86::VORPDZ256rm, 0 },
{ X86::VORPSZ128rr, X86::VORPSZ128rm, 0 },
{ X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
+ { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 },
+ { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 },
+ { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 },
+ { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 },
+ { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 },
+ { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 },
+ { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 },
+ { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 },
{ X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
{ X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
{ X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
@@ -2022,6 +2102,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 },
{ X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 },
{ X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 },
+ { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 },
+ { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 },
+ { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 },
+ { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 },
{ X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
{ X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
{ X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
@@ -2070,12 +2154,92 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 },
{ X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 },
{ X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 },
+ { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 },
+ { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 },
+ { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 },
+ { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 },
+ { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 },
+ { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 },
+ { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 },
+ { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 },
+ { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 },
+ { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 },
+ { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 },
+ { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 },
+ { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 },
+ { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 },
+ { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 },
+ { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 },
+ { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 },
+ { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 },
+ { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 },
+ { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 },
+ { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 },
+ { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 },
+ { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 },
+ { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 },
+ { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 },
+ { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 },
+ { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 },
+ { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 },
+ { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 },
+ { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 },
+ { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 },
+ { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 },
+ { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 },
+ { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 },
+ { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 },
+ { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 },
+ { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 },
+ { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 },
+ { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 },
+ { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 },
+ { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 },
+ { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 },
{ X86::VPORDZ128rr, X86::VPORDZ128rm, 0 },
{ X86::VPORDZ256rr, X86::VPORDZ256rm, 0 },
{ X86::VPORQZ128rr, X86::VPORQZ128rm, 0 },
{ X86::VPORQZ256rr, X86::VPORQZ256rm, 0 },
+ { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 },
+ { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 },
{ X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 },
{ X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 },
+ { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 },
+ { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 },
+ { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 },
+ { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 },
+ { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 },
+ { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 },
+ { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 },
+ { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 },
+ { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 },
+ { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 },
+ { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 },
+ { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 },
+ { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 },
+ { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 },
+ { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 },
+ { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 },
+ { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 },
+ { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 },
+ { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 },
+ { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 },
+ { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 },
+ { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 },
+ { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 },
+ { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 },
+ { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 },
+ { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 },
+ { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 },
+ { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 },
+ { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 },
+ { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 },
+ { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 },
+ { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 },
+ { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 },
+ { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 },
+ { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 },
+ { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 },
{ X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 },
{ X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 },
{ X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 },
@@ -2112,6 +2276,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 },
{ X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 },
{ X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 },
+ { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 },
+ { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 },
+ { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 },
+ { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 },
{ X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 },
{ X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
{ X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
@@ -2130,6 +2298,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
// AVX-512 masked foldable instructions
+ { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
+ { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
+ { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
+ { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
+ { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
{ X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
{ X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
{ X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
@@ -2149,8 +2323,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 },
{ X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 },
{ X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 },
+ { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 },
+ { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 },
+ { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 },
+ { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 },
+ { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 },
+ { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 },
+ { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 },
+ { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 },
+ { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 },
// AVX-512VL 256-bit masked foldable instructions
+ { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
+ { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
+ { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
+ { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
+ { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
{ X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
{ X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
{ X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
@@ -2170,8 +2359,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 },
{ X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 },
{ X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 },
+ { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 },
+ { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 },
+ { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 },
+ { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 },
+ { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 },
+ { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 },
+ { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 },
+ { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 },
+ { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 },
// AVX-512VL 128-bit masked foldable instructions
+ { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
+ { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
+ { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
+ { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
+ { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
{ X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
{ X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
{ X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
@@ -2189,6 +2392,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 },
{ X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 },
{ X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 },
+ { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 },
+ { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 },
+ { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 },
+ { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 },
+ { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 },
+ { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 },
+ { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 },
+ { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 },
+ { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
@@ -2262,23 +2474,14 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// XOP foldable instructions
{ X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
- { X86::VPCMOVrrrY, X86::VPCMOVrrmY, 0 },
+ { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
{ X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
- { X86::VPERMIL2PDrrY, X86::VPERMIL2PDrmY, 0 },
+ { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 },
{ X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
- { X86::VPERMIL2PSrrY, X86::VPERMIL2PSrmY, 0 },
+ { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 },
{ X86::VPPERMrrr, X86::VPPERMrrm, 0 },
// AVX-512 instructions with 3 source operands.
- { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 },
- { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 },
- { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
- { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
- { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
{ X86::VPERMI2Brr, X86::VPERMI2Brm, 0 },
{ X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
{ X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
@@ -2329,6 +2532,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 masked instructions
{ X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
{ X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
+ { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 },
{ X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 },
{ X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
@@ -2337,6 +2542,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
+ { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
{ X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
{ X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
@@ -2349,14 +2556,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
{ X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
{ X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
+ { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 },
+ { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 },
{ X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
{ X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
{ X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
{ X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
+ { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 },
+ { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 },
{ X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
+ { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
{ X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
+ { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 },
+ { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 },
+ { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 },
+ { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 },
{ X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
{ X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
{ X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
@@ -2370,6 +2587,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
{ X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
{ X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
+ { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 },
+ { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 },
{ X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
{ X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
{ X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
@@ -2380,9 +2599,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 },
{ X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
{ X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 },
+ { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 },
+ { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 },
+ { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 },
+ { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 },
+ { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 },
+ { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 },
+ { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 },
+ { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 },
+ { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 },
+ { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 },
+ { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 },
+ { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 },
+ { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 },
+ { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 },
+ { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 },
+ { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 },
+ { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 },
+ { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 },
+ { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 },
+ { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 },
+ { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 },
{ X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
{ X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
{ X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
+ { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
+ { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
+ { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 },
+ { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 },
+ { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 },
+ { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 },
+ { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 },
+ { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 },
+ { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 },
+ { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 },
+ { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 },
+ { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 },
+ { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 },
+ { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 },
+ { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 },
+ { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 },
+ { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 },
+ { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 },
{ X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
{ X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
{ X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
@@ -2401,8 +2659,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 },
{ X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
{ X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
+ { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 },
+ { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
{ X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
+ { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 },
{ X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 },
{ X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 },
@@ -2437,6 +2699,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
{ X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
{ X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 },
+ { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 },
+ { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 },
+ { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 },
+ { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 },
{ X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 },
{ X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 },
{ X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 },
@@ -2450,6 +2716,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 },
{ X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 },
{ X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
+ { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 },
+ { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 },
{ X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
{ X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
{ X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
@@ -2460,9 +2728,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 },
{ X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
{ X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 },
+ { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 },
+ { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 },
+ { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 },
+ { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 },
+ { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 },
+ { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 },
+ { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 },
+ { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 },
+ { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 },
+ { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 },
+ { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 },
+ { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 },
+ { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 },
+ { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 },
+ { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 },
+ { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 },
+ { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 },
+ { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 },
+ { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 },
+ { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 },
+ { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 },
{ X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
{ X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
{ X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
+ { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
+ { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
+ { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 },
+ { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 },
+ { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 },
+ { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 },
+ { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 },
+ { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 },
+ { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 },
+ { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 },
+ { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 },
+ { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 },
+ { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 },
+ { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 },
+ { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 },
+ { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 },
+ { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 },
+ { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 },
{ X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
{ X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
{ X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
@@ -2481,6 +2788,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 },
{ X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 },
{ X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 },
+ { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 },
+ { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 },
{ X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
{ X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
{ X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
@@ -2513,6 +2822,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
{ X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
{ X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 },
+ { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 },
+ { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 },
+ { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 },
+ { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 },
{ X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 },
{ X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 },
{ X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 },
@@ -2526,15 +2839,56 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 },
{ X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 },
{ X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
+ { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 },
+ { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 },
{ X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
{ X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
{ X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
{ X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 },
{ X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
{ X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 },
+ { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 },
+ { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 },
+ { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 },
+ { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 },
+ { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 },
+ { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 },
+ { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 },
+ { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 },
+ { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 },
+ { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 },
+ { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 },
+ { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 },
+ { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 },
+ { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 },
+ { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 },
+ { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 },
+ { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 },
+ { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 },
+ { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 },
+ { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 },
+ { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 },
{ X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
{ X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
{ X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
+ { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
+ { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
+ { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 },
+ { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 },
+ { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 },
+ { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 },
+ { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 },
+ { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 },
+ { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 },
+ { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 },
+ { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 },
+ { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 },
+ { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 },
+ { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 },
+ { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 },
+ { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 },
+ { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 },
+ { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 },
{ X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
{ X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
{ X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
@@ -2553,6 +2907,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 },
{ X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 },
{ X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 },
+ { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 },
+ { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 },
{ X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
{ X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
{ X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
@@ -2563,6 +2919,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
// AVX-512 masked foldable instructions
+ { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
+ { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 },
+ { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
+ { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
+ { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
{ X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
{ X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
{ X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
@@ -2582,8 +2944,23 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 },
{ X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 },
{ X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 },
+ { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 },
+ { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 },
+ { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 },
+ { X86::VPSRADZrik, X86::VPSRADZmik, 0 },
+ { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 },
+ { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 },
+ { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 },
+ { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 },
+ { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 },
// AVX-512VL 256-bit masked foldable instructions
+ { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
+ { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 },
+ { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
+ { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
+ { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
{ X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
{ X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
{ X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
@@ -2603,8 +2980,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 },
{ X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 },
{ X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 },
+ { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 },
+ { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 },
+ { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 },
+ { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 },
+ { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 },
+ { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 },
+ { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 },
+ { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 },
+ { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 },
// AVX-512VL 128-bit masked foldable instructions
+ { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
+ { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 },
+ { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
+ { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
+ { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
{ X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
{ X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
{ X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
@@ -2622,6 +3013,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 },
{ X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 },
{ X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 },
+ { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 },
+ { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 },
+ { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 },
+ { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 },
+ { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 },
+ { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 },
+ { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
+ { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
+ { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
};
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
@@ -2651,6 +3051,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// AVX-512 foldable masked instructions
{ X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
{ X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
+ { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
{ X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 },
{ X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 },
{ X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 },
@@ -2659,6 +3061,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
+ { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
{ X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
{ X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
@@ -2671,14 +3075,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
{ X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
{ X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
+ { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 },
+ { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 },
{ X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
{ X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
{ X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
{ X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
+ { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 },
+ { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 },
{ X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
+ { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
{ X86::VORPDZrrk, X86::VORPDZrmk, 0 },
{ X86::VORPSZrrk, X86::VORPSZrmk, 0 },
+ { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 },
+ { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 },
+ { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 },
+ { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 },
{ X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
{ X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
{ X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
@@ -2692,6 +3106,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
{ X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
{ X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
+ { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 },
+ { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 },
{ X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
{ X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
{ X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
@@ -2714,9 +3130,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
{ X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
{ X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
+ { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 },
+ { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 },
+ { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 },
+ { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 },
+ { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 },
+ { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 },
+ { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 },
+ { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 },
+ { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 },
+ { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 },
+ { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 },
+ { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 },
+ { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 },
+ { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 },
+ { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 },
+ { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 },
+ { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 },
+ { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 },
+ { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 },
+ { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 },
+ { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 },
{ X86::VPORDZrrk, X86::VPORDZrmk, 0 },
{ X86::VPORQZrrk, X86::VPORQZrmk, 0 },
{ X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
+ { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
+ { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
+ { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 },
+ { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 },
+ { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 },
+ { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 },
+ { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 },
+ { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 },
+ { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 },
+ { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 },
+ { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 },
+ { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 },
+ { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 },
+ { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 },
+ { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 },
+ { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 },
+ { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 },
+ { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 },
{ X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
{ X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
{ X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
@@ -2736,8 +3191,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 },
{ X86::VPXORDZrrk, X86::VPXORDZrmk, 0 },
{ X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
+ { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 },
+ { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
{ X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
{ X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
+ { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
{ X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 },
{ X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 },
{ X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 },
@@ -2772,6 +3231,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
{ X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
{ X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 },
+ { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 },
+ { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 },
+ { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 },
+ { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 },
{ X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 },
{ X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 },
{ X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 },
@@ -2785,6 +3248,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 },
{ X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 },
{ X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
+ { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 },
+ { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 },
{ X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
{ X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
{ X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
@@ -2807,9 +3272,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
{ X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
{ X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
+ { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 },
+ { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 },
+ { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 },
+ { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 },
+ { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 },
+ { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 },
+ { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 },
+ { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 },
+ { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 },
+ { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 },
+ { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 },
+ { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 },
+ { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 },
+ { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 },
+ { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 },
+ { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 },
+ { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 },
+ { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 },
+ { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 },
+ { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 },
+ { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 },
{ X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
{ X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
{ X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
+ { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
+ { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
+ { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 },
+ { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 },
+ { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 },
+ { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 },
+ { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 },
+ { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 },
+ { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 },
+ { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 },
+ { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 },
+ { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 },
+ { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 },
+ { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 },
+ { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 },
+ { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 },
+ { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 },
+ { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 },
{ X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
{ X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
{ X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
@@ -2830,6 +3334,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 },
{ X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 },
{ X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 },
+ { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 },
+ { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 },
{ X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
{ X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
{ X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
@@ -2862,6 +3368,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
{ X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
{ X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 },
+ { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 },
+ { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 },
+ { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 },
+ { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 },
{ X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 },
{ X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 },
{ X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 },
@@ -2875,6 +3385,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 },
{ X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
{ X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
+ { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 },
+ { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 },
{ X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
{ X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
{ X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
@@ -2893,9 +3405,48 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
{ X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
{ X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
+ { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 },
+ { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 },
+ { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 },
+ { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 },
+ { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 },
+ { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 },
+ { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 },
+ { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 },
+ { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 },
+ { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 },
+ { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 },
+ { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 },
+ { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 },
+ { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 },
+ { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 },
+ { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 },
+ { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 },
+ { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 },
+ { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 },
+ { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 },
+ { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 },
{ X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
{ X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
{ X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
+ { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
+ { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
+ { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 },
+ { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 },
+ { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 },
+ { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 },
+ { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 },
+ { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 },
+ { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 },
+ { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 },
+ { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 },
+ { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 },
+ { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 },
+ { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 },
+ { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 },
+ { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 },
+ { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 },
+ { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 },
{ X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
{ X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
{ X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
@@ -2916,6 +3467,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 },
{ X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 },
{ X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 },
+ { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 },
+ { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 },
{ X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
{ X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
{ X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
@@ -3063,18 +3616,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
- if (MI.getOpcode() == getCallFrameSetupOpcode() ||
- MI.getOpcode() == getCallFrameDestroyOpcode()) {
+ if (isFrameInstr(MI)) {
unsigned StackAlign = TFI->getStackAlignment();
- int SPAdj =
- (MI.getOperand(0).getImm() + StackAlign - 1) / StackAlign * StackAlign;
-
- SPAdj -= MI.getOperand(1).getImm();
-
- if (MI.getOpcode() == getCallFrameSetupOpcode())
- return SPAdj;
- else
- return -SPAdj;
+ int SPAdj = alignTo(getFrameSize(MI), StackAlign);
+ SPAdj -= getFrameAdjustment(MI);
+ if (!isFrameSetup(MI))
+ SPAdj = -SPAdj;
+ return SPAdj;
}
// To know whether a call adjusts the stack, we need information
@@ -3569,7 +4117,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
const DebugLoc &DL = Orig.getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri))
- .addOperand(Orig.getOperand(0))
+ .add(Orig.getOperand(0))
.addImm(Value);
} else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
@@ -3654,10 +4202,10 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
// Virtual register of the wrong class, we have to create a temporary 64-bit
// vreg to feed into the LEA.
NewSrc = MF.getRegInfo().createVirtualRegister(RC);
- MachineInstr *Copy = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
- get(TargetOpcode::COPY))
- .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
- .addOperand(Src);
+ MachineInstr *Copy =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
+ .add(Src);
// Which is obviously going to be dead after we're done with it.
isKill = true;
@@ -3823,10 +4371,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
- .addOperand(Src)
+ .add(Src)
.addImm(0)
.addReg(0);
break;
@@ -3848,14 +4396,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
.addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
.addImm(0)
.addReg(0);
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = MIB;
break;
@@ -3869,10 +4417,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
NewMI = BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
+ .add(Dest)
.addReg(0)
.addImm(1ULL << ShAmt)
- .addOperand(Src)
+ .add(Src)
.addImm(0)
.addReg(0);
break;
@@ -3891,11 +4439,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstrBuilder MIB =
BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(SrcReg,
getKillRegState(isKill) | getUndefRegState(isUndef));
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = addOffset(MIB, 1);
break;
@@ -3905,10 +4453,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
assert(MI.getNumOperands() >= 2 && "Unknown inc instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
- .addOperand(Src),
- 1);
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), 1);
break;
case X86::DEC64r:
case X86::DEC32r: {
@@ -3924,11 +4470,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(SrcReg, getUndefRegState(isUndef) |
getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = addOffset(MIB, -1);
@@ -3939,10 +4485,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
assert(MI.getNumOperands() >= 2 && "Unknown dec instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
- .addOperand(Src),
- -1);
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src), -1);
break;
case X86::ADD64rr:
case X86::ADD64rr_DB:
@@ -3970,12 +4514,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
SrcReg2, isKill2, isUndef2, ImplicitOp2, LV))
return nullptr;
- MachineInstrBuilder MIB =
- BuildMI(MF, MI.getDebugLoc(), get(Opc)).addOperand(Dest);
+ MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
if (ImplicitOp2.getReg() != 0)
- MIB.addOperand(ImplicitOp2);
+ MIB.add(ImplicitOp2);
NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
@@ -3995,9 +4538,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI.getOperand(2).getReg();
bool isKill2 = MI.getOperand(2).isKill();
- NewMI = addRegReg(
- BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).addOperand(Dest),
- Src.getReg(), Src.isKill(), Src2, isKill2);
+ NewMI = addRegReg(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest),
+ Src.getReg(), Src.isKill(), Src2, isKill2);
// Preserve undefness of the operands.
bool isUndef = MI.getOperand(1).isUndef();
@@ -4014,10 +4556,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32_DB:
case X86::ADD64ri8_DB:
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
- .addOperand(Dest)
- .addOperand(Src),
- MI.getOperand(2));
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
+ MI.getOperand(2));
break;
case X86::ADD32ri:
case X86::ADD32ri8:
@@ -4034,11 +4575,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
- .addOperand(Dest)
+ .add(Dest)
.addReg(SrcReg, getUndefRegState(isUndef) |
getKillRegState(isKill));
if (ImplicitOp.getReg() != 0)
- MIB.addOperand(ImplicitOp);
+ MIB.add(ImplicitOp);
NewMI = addOffset(MIB, MI.getOperand(2));
break;
@@ -4051,12 +4592,136 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV)
: nullptr;
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
- .addOperand(Dest)
- .addOperand(Src),
- MI.getOperand(2));
+ NewMI = addOffset(
+ BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src),
+ MI.getOperand(2));
+ break;
+
+ case X86::VMOVDQU8Z128rmk:
+ case X86::VMOVDQU8Z256rmk:
+ case X86::VMOVDQU8Zrmk:
+ case X86::VMOVDQU16Z128rmk:
+ case X86::VMOVDQU16Z256rmk:
+ case X86::VMOVDQU16Zrmk:
+ case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
+ case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
+ case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
+ case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
+ case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
+ case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
+ case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
+ case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
+ case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
+ case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
+ case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
+ case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
+ unsigned Opc;
+ switch (MIOpc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
+ case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
+ case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
+ case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
+ case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
+ case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
+ case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
+ case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
+ case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
+ case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
+ case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
+ case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
+ case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
+ case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
+ case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
+ case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
+ case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
+ case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
+ }
+
+ NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .add(MI.getOperand(2))
+ .add(Src)
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6))
+ .add(MI.getOperand(7));
break;
}
+ case X86::VMOVDQU8Z128rrk:
+ case X86::VMOVDQU8Z256rrk:
+ case X86::VMOVDQU8Zrrk:
+ case X86::VMOVDQU16Z128rrk:
+ case X86::VMOVDQU16Z256rrk:
+ case X86::VMOVDQU16Zrrk:
+ case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
+ case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
+ case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
+ case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
+ case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
+ case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
+ case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
+ case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
+ case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
+ case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
+ case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
+ case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
+ unsigned Opc;
+ switch (MIOpc) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
+ case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
+ case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
+ case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
+ case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
+ case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
+ case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
+ case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
+ case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
+ case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
+ case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
+ case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
+ case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
+ case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
+ case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
+ case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
+ case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
+ case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
+ case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
+ case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
+ case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
+ case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
+ case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
+ case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
+ case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
+ case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
+ case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
+ case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
+ case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
+ case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
+ }
+
+ NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
+ .add(Dest)
+ .add(MI.getOperand(2))
+ .add(Src)
+ .add(MI.getOperand(3));
+ break;
+ }
+ }
if (!NewMI) return nullptr;
@@ -4337,6 +5002,18 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
+ case X86::PFSUBrr:
+ case X86::PFSUBRrr: {
+ // PFSUB x, y: x = x - y
+ // PFSUBR x, y: x = y - x
+ unsigned Opc =
+ (X86::PFSUBRrr == MI.getOpcode() ? X86::PFSUBrr : X86::PFSUBRrr);
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ break;
+ }
case X86::BLENDPDrri:
case X86::BLENDPSrri:
case X86::PBLENDWrri:
@@ -4606,18 +5283,30 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
- case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
- case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
- case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
- case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
- case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
- case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
+ case X86::VPTERNLOGDZrrik:
+ case X86::VPTERNLOGDZ128rrik:
+ case X86::VPTERNLOGDZ256rrik:
+ case X86::VPTERNLOGQZrrik:
+ case X86::VPTERNLOGQZ128rrik:
+ case X86::VPTERNLOGQZ256rrik:
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
- case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: {
+ case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
+ case X86::VPTERNLOGDZ128rmbi:
+ case X86::VPTERNLOGDZ256rmbi:
+ case X86::VPTERNLOGDZrmbi:
+ case X86::VPTERNLOGQZ128rmbi:
+ case X86::VPTERNLOGQZ256rmbi:
+ case X86::VPTERNLOGQZrmbi:
+ case X86::VPTERNLOGDZ128rmbikz:
+ case X86::VPTERNLOGDZ256rmbikz:
+ case X86::VPTERNLOGDZrmbikz:
+ case X86::VPTERNLOGQZ128rmbikz:
+ case X86::VPTERNLOGQZ256rmbikz:
+ case X86::VPTERNLOGQZrmbikz: {
auto &WorkingMI = cloneIfNew(MI);
if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2))
return nullptr;
@@ -4798,18 +5487,30 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
case X86::VPTERNLOGQZrri: case X86::VPTERNLOGQZrmi:
case X86::VPTERNLOGQZ128rri: case X86::VPTERNLOGQZ128rmi:
case X86::VPTERNLOGQZ256rri: case X86::VPTERNLOGQZ256rmi:
- case X86::VPTERNLOGDZrrik: case X86::VPTERNLOGDZrmik:
- case X86::VPTERNLOGDZ128rrik: case X86::VPTERNLOGDZ128rmik:
- case X86::VPTERNLOGDZ256rrik: case X86::VPTERNLOGDZ256rmik:
- case X86::VPTERNLOGQZrrik: case X86::VPTERNLOGQZrmik:
- case X86::VPTERNLOGQZ128rrik: case X86::VPTERNLOGQZ128rmik:
- case X86::VPTERNLOGQZ256rrik: case X86::VPTERNLOGQZ256rmik:
+ case X86::VPTERNLOGDZrrik:
+ case X86::VPTERNLOGDZ128rrik:
+ case X86::VPTERNLOGDZ256rrik:
+ case X86::VPTERNLOGQZrrik:
+ case X86::VPTERNLOGQZ128rrik:
+ case X86::VPTERNLOGQZ256rrik:
case X86::VPTERNLOGDZrrikz: case X86::VPTERNLOGDZrmikz:
case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
case X86::VPTERNLOGQZrrikz: case X86::VPTERNLOGQZrmikz:
case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
+ case X86::VPTERNLOGDZ128rmbi:
+ case X86::VPTERNLOGDZ256rmbi:
+ case X86::VPTERNLOGDZrmbi:
+ case X86::VPTERNLOGQZ128rmbi:
+ case X86::VPTERNLOGQZ256rmbi:
+ case X86::VPTERNLOGQZrmbi:
+ case X86::VPTERNLOGDZ128rmbikz:
+ case X86::VPTERNLOGDZ256rmbikz:
+ case X86::VPTERNLOGDZrmbikz:
+ case X86::VPTERNLOGQZ128rmbikz:
+ case X86::VPTERNLOGQZ256rmbikz:
+ case X86::VPTERNLOGQZrmbikz:
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
default:
const X86InstrFMA3Group *FMA3Group =
@@ -5108,6 +5809,95 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
return !isPredicated(MI);
}
+bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ case X86::TCRETURNdi:
+ case X86::TCRETURNri:
+ case X86::TCRETURNmi:
+ case X86::TCRETURNdi64:
+ case X86::TCRETURNri64:
+ case X86::TCRETURNmi64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool X86InstrInfo::canMakeTailCallConditional(
+ SmallVectorImpl<MachineOperand> &BranchCond,
+ const MachineInstr &TailCall) const {
+ if (TailCall.getOpcode() != X86::TCRETURNdi &&
+ TailCall.getOpcode() != X86::TCRETURNdi64) {
+ // Only direct calls can be done with a conditional branch.
+ return false;
+ }
+
+ const MachineFunction *MF = TailCall.getParent()->getParent();
+ if (Subtarget.isTargetWin64() && MF->hasWinCFI()) {
+ // Conditional tail calls confuse the Win64 unwinder.
+ return false;
+ }
+
+ assert(BranchCond.size() == 1);
+ if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
+ // Can't make a conditional tail call with this condition.
+ return false;
+ }
+
+ const X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+ if (X86FI->getTCReturnAddrDelta() != 0 ||
+ TailCall.getOperand(1).getImm() != 0) {
+ // A conditional tail call cannot do any stack adjustment.
+ return false;
+ }
+
+ return true;
+}
+
+void X86InstrInfo::replaceBranchWithTailCall(
+ MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
+ const MachineInstr &TailCall) const {
+ assert(canMakeTailCallConditional(BranchCond, TailCall));
+
+ MachineBasicBlock::iterator I = MBB.end();
+ while (I != MBB.begin()) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ if (!I->isBranch())
+ assert(0 && "Can't find the branch to replace!");
+
+ X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
+ assert(BranchCond.size() == 1);
+ if (CC != BranchCond[0].getImm())
+ continue;
+
+ break;
+ }
+
+ unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
+ : X86::TCRETURNdi64cc;
+
+ auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
+ MIB->addOperand(TailCall.getOperand(0)); // Destination.
+ MIB.addImm(0); // Stack offset (not used).
+ MIB->addOperand(BranchCond[0]); // Condition.
+ MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
+
+ // Add implicit uses and defs of all live regs potentially clobbered by the
+ // call. This way they still appear live across the call.
+ LivePhysRegs LiveRegs(&getRegisterInfo());
+ LiveRegs.addLiveOuts(MBB);
+ SmallVector<std::pair<unsigned, const MachineOperand *>, 8> Clobbers;
+ LiveRegs.stepForward(*MIB, Clobbers);
+ for (const auto &C : Clobbers) {
+ MIB.addReg(C.first, RegState::Implicit);
+ MIB.addReg(C.first, RegState::Implicit | RegState::Define);
+ }
+
+ I->eraseFromParent();
+}
+
// Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
// not be a fallthrough MBB now due to layout changes). Return nullptr if the
// fallthrough MBB cannot be identified.
@@ -5514,8 +6304,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
// SrcReg(MaskReg) -> DestReg(GR64)
// SrcReg(MaskReg) -> DestReg(GR32)
- // SrcReg(MaskReg) -> DestReg(GR16)
- // SrcReg(MaskReg) -> DestReg(GR8)
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(SrcReg)) {
@@ -5525,20 +6313,10 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
}
if (X86::GR32RegClass.contains(DestReg))
return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
- if (X86::GR16RegClass.contains(DestReg)) {
- DestReg = getX86SubSuperRegister(DestReg, 32);
- return X86::KMOVWrk;
- }
- if (X86::GR8RegClass.contains(DestReg)) {
- DestReg = getX86SubSuperRegister(DestReg, 32);
- return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
- }
}
// SrcReg(GR64) -> DestReg(MaskReg)
// SrcReg(GR32) -> DestReg(MaskReg)
- // SrcReg(GR16) -> DestReg(MaskReg)
- // SrcReg(GR8) -> DestReg(MaskReg)
// All KMASK RegClasses hold the same k registers, can be tested against anyone.
if (X86::VK16RegClass.contains(DestReg)) {
@@ -5548,14 +6326,6 @@ static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
}
if (X86::GR32RegClass.contains(SrcReg))
return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
- if (X86::GR16RegClass.contains(SrcReg)) {
- SrcReg = getX86SubSuperRegister(SrcReg, 32);
- return X86::KMOVWkr;
- }
- if (X86::GR8RegClass.contains(SrcReg)) {
- SrcReg = getX86SubSuperRegister(SrcReg, 32);
- return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
- }
}
@@ -5965,7 +6735,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.addOperand(Addr[i]);
+ MIB.add(Addr[i]);
MIB.addReg(SrcReg, getKillRegState(isKill));
(*MIB).setMemRefs(MMOBegin, MMOEnd);
NewMIs.push_back(MIB);
@@ -6000,7 +6770,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
- MIB.addOperand(Addr[i]);
+ MIB.add(Addr[i]);
(*MIB).setMemRefs(MMOBegin, MMOEnd);
NewMIs.push_back(MIB);
}
@@ -6017,12 +6787,14 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::CMP16ri:
case X86::CMP16ri8:
case X86::CMP8ri:
- if (!MI.getOperand(1).isImm())
- return false;
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
- CmpMask = ~0;
- CmpValue = MI.getOperand(1).getImm();
+ if (MI.getOperand(1).isImm()) {
+ CmpMask = ~0;
+ CmpValue = MI.getOperand(1).getImm();
+ } else {
+ CmpMask = CmpValue = 0;
+ }
return true;
// A SUB can be used to perform comparison.
case X86::SUB64rm:
@@ -6031,7 +6803,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::SUB8rm:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
- CmpMask = ~0;
+ CmpMask = 0;
CmpValue = 0;
return true;
case X86::SUB64rr:
@@ -6040,7 +6812,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::SUB8rr:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = MI.getOperand(2).getReg();
- CmpMask = ~0;
+ CmpMask = 0;
CmpValue = 0;
return true;
case X86::SUB64ri32:
@@ -6050,12 +6822,14 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB8ri:
- if (!MI.getOperand(2).isImm())
- return false;
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
- CmpMask = ~0;
- CmpValue = MI.getOperand(2).getImm();
+ if (MI.getOperand(2).isImm()) {
+ CmpMask = ~0;
+ CmpValue = MI.getOperand(2).getImm();
+ } else {
+ CmpMask = CmpValue = 0;
+ }
return true;
case X86::CMP64rr:
case X86::CMP32rr:
@@ -6063,7 +6837,7 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
case X86::CMP8rr:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = MI.getOperand(1).getReg();
- CmpMask = ~0;
+ CmpMask = 0;
CmpValue = 0;
return true;
case X86::TEST8rr:
@@ -6089,8 +6863,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
/// SrcReg, SrcRegs: register operands for FlagI.
/// ImmValue: immediate for FlagI if it takes an immediate.
inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
- unsigned SrcReg2, int ImmValue,
- MachineInstr &OI) {
+ unsigned SrcReg2, int ImmMask,
+ int ImmValue, MachineInstr &OI) {
if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
(FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
(FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
@@ -6101,7 +6875,8 @@ inline static bool isRedundantFlagInstr(MachineInstr &FlagI, unsigned SrcReg,
OI.getOperand(2).getReg() == SrcReg)))
return true;
- if (((FlagI.getOpcode() == X86::CMP64ri32 &&
+ if (ImmMask != 0 &&
+ ((FlagI.getOpcode() == X86::CMP64ri32 &&
OI.getOpcode() == X86::SUB64ri32) ||
(FlagI.getOpcode() == X86::CMP64ri8 &&
OI.getOpcode() == X86::SUB64ri8) ||
@@ -6288,7 +7063,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// If we are comparing against zero, check whether we can use MI to update
// EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
- bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
+ bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
return false;
@@ -6338,8 +7113,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
for (; RI != RE; ++RI) {
MachineInstr &Instr = *RI;
// Check whether CmpInstr can be made redundant by the current instruction.
- if (!IsCmpZero &&
- isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
+ if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
+ CmpValue, Instr)) {
Sub = &Instr;
break;
}
@@ -6764,14 +7539,33 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
assert(HasAVX && "AVX not supported");
return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
case X86::AVX512_128_SET0:
- return Expand2AddrUndef(MIB, get(X86::VPXORDZ128rr));
- case X86::AVX512_256_SET0:
- return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
+ case X86::AVX512_FsFLD0SS:
+ case X86::AVX512_FsFLD0SD: {
+ bool HasVLX = Subtarget.hasVLX();
+ unsigned SrcReg = MIB->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
+ return Expand2AddrUndef(MIB,
+ get(HasVLX ? X86::VPXORDZ128rr : X86::VXORPSrr));
+ // Extended register without VLX. Use a larger XOR.
+ SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm, &X86::VR512RegClass);
+ MIB->getOperand(0).setReg(SrcReg);
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+ }
+ case X86::AVX512_256_SET0: {
+ bool HasVLX = Subtarget.hasVLX();
+ unsigned SrcReg = MIB->getOperand(0).getReg();
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (HasVLX || TRI->getEncodingValue(SrcReg) < 16)
+ return Expand2AddrUndef(MIB,
+ get(HasVLX ? X86::VPXORDZ256rr : X86::VXORPSYrr));
+ // Extended register without VLX. Use a larger XOR.
+ SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm, &X86::VR512RegClass);
+ MIB->getOperand(0).setReg(SrcReg);
+ return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+ }
case X86::AVX512_512_SET0:
return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
- case X86::AVX512_FsFLD0SS:
- case X86::AVX512_FsFLD0SD:
- return Expand2AddrUndef(MIB, get(X86::VXORPSZ128rr));
case X86::V_SETALLONES:
return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
case X86::AVX2_SETALLONES:
@@ -6838,11 +7632,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// registers, since it is not usable as a write mask.
// FIXME: A more advanced approach would be to choose the best input mask
// register based on context.
- case X86::KSET0B:
case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
- case X86::KSET1B:
case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
@@ -6860,7 +7652,7 @@ static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
if (NumAddrOps < 4) {
// FrameIndex only - add an immediate offset (whether its zero or not).
for (unsigned i = 0; i != NumAddrOps; ++i)
- MIB.addOperand(MOs[i]);
+ MIB.add(MOs[i]);
addOffset(MIB, PtrOffset);
} else {
// General Memory Addressing - we need to add any offset to an existing
@@ -6871,7 +7663,7 @@ static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
if (i == 3 && PtrOffset != 0) {
MIB.addDisp(MO, PtrOffset);
} else {
- MIB.addOperand(MO);
+ MIB.add(MO);
}
}
}
@@ -6893,11 +7685,11 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
unsigned NumOps = MI.getDesc().getNumOperands() - 2;
for (unsigned i = 0; i != NumOps; ++i) {
MachineOperand &MO = MI.getOperand(i + 2);
- MIB.addOperand(MO);
+ MIB.add(MO);
}
for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- MIB.addOperand(MO);
+ MIB.add(MO);
}
MachineBasicBlock *MBB = InsertPt->getParent();
@@ -6922,7 +7714,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
assert(MO.isReg() && "Expected to fold into reg operand!");
addOperands(MIB, MOs, PtrOffset);
} else {
- MIB.addOperand(MO);
+ MIB.add(MO);
}
}
@@ -7226,7 +8018,7 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
return false;
}
-/// Inform the ExeDepsFix pass how many idle
+/// Inform the ExecutionDepsFix pass how many idle
/// instructions we would like before a partial register update.
unsigned X86InstrInfo::getPartialRegUpdateClearance(
const MachineInstr &MI, unsigned OpNum,
@@ -7344,11 +8136,15 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
case X86::VCVTSD2SSZrr:
- case X86::VCVTSD2SSZrrb:
+ case X86::VCVTSD2SSZrr_Int:
+ case X86::VCVTSD2SSZrrb_Int:
case X86::VCVTSD2SSZrm:
+ case X86::VCVTSD2SSZrm_Int:
case X86::VCVTSS2SDZrr:
- case X86::VCVTSS2SDZrrb:
+ case X86::VCVTSS2SDZrr_Int:
+ case X86::VCVTSS2SDZrrb_Int:
case X86::VCVTSS2SDZrm:
+ case X86::VCVTSS2SDZrm_Int:
case X86::VRNDSCALESDr:
case X86::VRNDSCALESDrb:
case X86::VRNDSCALESDm:
@@ -7375,8 +8171,8 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
return false;
}
-/// Inform the ExeDepsFix pass how many idle instructions we would like before
-/// certain undef register reads.
+/// Inform the ExecutionDepsFix pass how many idle instructions we would like
+/// before certain undef register reads.
///
/// This catches the VCVTSI2SD family of instructions:
///
@@ -7522,6 +8318,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
+ case X86::VADDSSZrr_Intk: case X86::VADDSSZrr_Intkz:
+ case X86::VDIVSSZrr_Intk: case X86::VDIVSSZrr_Intkz:
+ case X86::VMAXSSZrr_Intk: case X86::VMAXSSZrr_Intkz:
+ case X86::VMINSSZrr_Intk: case X86::VMINSSZrr_Intkz:
+ case X86::VMULSSZrr_Intk: case X86::VMULSSZrr_Intkz:
+ case X86::VSUBSSZrr_Intk: case X86::VSUBSSZrr_Intkz:
case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int:
case X86::VFMSUBSS4rr_Int: case X86::VFNMSUBSS4rr_Int:
case X86::VFMADD132SSr_Int: case X86::VFNMADD132SSr_Int:
@@ -7536,6 +8338,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
+ case X86::VFMADD132SSZr_Intk: case X86::VFNMADD132SSZr_Intk:
+ case X86::VFMADD213SSZr_Intk: case X86::VFNMADD213SSZr_Intk:
+ case X86::VFMADD231SSZr_Intk: case X86::VFNMADD231SSZr_Intk:
+ case X86::VFMSUB132SSZr_Intk: case X86::VFNMSUB132SSZr_Intk:
+ case X86::VFMSUB213SSZr_Intk: case X86::VFNMSUB213SSZr_Intk:
+ case X86::VFMSUB231SSZr_Intk: case X86::VFNMSUB231SSZr_Intk:
+ case X86::VFMADD132SSZr_Intkz: case X86::VFNMADD132SSZr_Intkz:
+ case X86::VFMADD213SSZr_Intkz: case X86::VFNMADD213SSZr_Intkz:
+ case X86::VFMADD231SSZr_Intkz: case X86::VFNMADD231SSZr_Intkz:
+ case X86::VFMSUB132SSZr_Intkz: case X86::VFNMSUB132SSZr_Intkz:
+ case X86::VFMSUB213SSZr_Intkz: case X86::VFNMSUB213SSZr_Intkz:
+ case X86::VFMSUB231SSZr_Intkz: case X86::VFNMSUB231SSZr_Intkz:
return false;
default:
return true;
@@ -7555,6 +8369,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
+ case X86::VADDSDZrr_Intk: case X86::VADDSDZrr_Intkz:
+ case X86::VDIVSDZrr_Intk: case X86::VDIVSDZrr_Intkz:
+ case X86::VMAXSDZrr_Intk: case X86::VMAXSDZrr_Intkz:
+ case X86::VMINSDZrr_Intk: case X86::VMINSDZrr_Intkz:
+ case X86::VMULSDZrr_Intk: case X86::VMULSDZrr_Intkz:
+ case X86::VSUBSDZrr_Intk: case X86::VSUBSDZrr_Intkz:
case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int:
case X86::VFMSUBSD4rr_Int: case X86::VFNMSUBSD4rr_Int:
case X86::VFMADD132SDr_Int: case X86::VFNMADD132SDr_Int:
@@ -7569,6 +8389,18 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
+ case X86::VFMADD132SDZr_Intk: case X86::VFNMADD132SDZr_Intk:
+ case X86::VFMADD213SDZr_Intk: case X86::VFNMADD213SDZr_Intk:
+ case X86::VFMADD231SDZr_Intk: case X86::VFNMADD231SDZr_Intk:
+ case X86::VFMSUB132SDZr_Intk: case X86::VFNMSUB132SDZr_Intk:
+ case X86::VFMSUB213SDZr_Intk: case X86::VFNMSUB213SDZr_Intk:
+ case X86::VFMSUB231SDZr_Intk: case X86::VFNMSUB231SDZr_Intk:
+ case X86::VFMADD132SDZr_Intkz: case X86::VFNMADD132SDZr_Intkz:
+ case X86::VFMADD213SDZr_Intkz: case X86::VFNMADD213SDZr_Intkz:
+ case X86::VFMADD231SDZr_Intkz: case X86::VFNMADD231SDZr_Intkz:
+ case X86::VFMSUB132SDZr_Intkz: case X86::VFNMSUB132SDZr_Intkz:
+ case X86::VFMSUB213SDZr_Intkz: case X86::VFNMSUB213SDZr_Intkz:
+ case X86::VFMSUB231SDZr_Intkz: case X86::VFNMSUB231SDZr_Intkz:
return false;
default:
return true;
@@ -7800,11 +8632,11 @@ bool X86InstrInfo::unfoldMemoryOperand(
if (FoldedStore)
MIB.addReg(Reg, RegState::Define);
for (MachineOperand &BeforeOp : BeforeOps)
- MIB.addOperand(BeforeOp);
+ MIB.add(BeforeOp);
if (FoldedLoad)
MIB.addReg(Reg);
for (MachineOperand &AfterOp : AfterOps)
- MIB.addOperand(AfterOp);
+ MIB.add(AfterOp);
for (MachineOperand &ImpOp : ImpOps) {
MIB.addReg(ImpOp.getReg(),
getDefRegState(ImpOp.isDef()) |
@@ -8143,28 +8975,29 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
break;
}
- // Check if chain operands and base addresses match.
- if (Load1->getOperand(0) != Load2->getOperand(0) ||
- Load1->getOperand(5) != Load2->getOperand(5))
+ // Lambda to check if both the loads have the same value for an operand index.
+ auto HasSameOp = [&](int I) {
+ return Load1->getOperand(I) == Load2->getOperand(I);
+ };
+
+ // All operands except the displacement should match.
+ if (!HasSameOp(X86::AddrBaseReg) || !HasSameOp(X86::AddrScaleAmt) ||
+ !HasSameOp(X86::AddrIndexReg) || !HasSameOp(X86::AddrSegmentReg))
return false;
- // Segment operands should match as well.
- if (Load1->getOperand(4) != Load2->getOperand(4))
+
+ // Chain Operand must be the same.
+ if (!HasSameOp(5))
return false;
- // Scale should be 1, Index should be Reg0.
- if (Load1->getOperand(1) == Load2->getOperand(1) &&
- Load1->getOperand(2) == Load2->getOperand(2)) {
- if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
- return false;
- // Now let's examine the displacements.
- if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
- isa<ConstantSDNode>(Load2->getOperand(3))) {
- Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
- Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
- return true;
- }
- }
- return false;
+ // Now let's examine if the displacements are constants.
+ auto Disp1 = dyn_cast<ConstantSDNode>(Load1->getOperand(X86::AddrDisp));
+ auto Disp2 = dyn_cast<ConstantSDNode>(Load2->getOperand(X86::AddrDisp));
+ if (!Disp1 || !Disp2)
+ return false;
+
+ Offset1 = Disp1->getSExtValue();
+ Offset2 = Disp2->getSExtValue();
+ return true;
}
bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
@@ -8215,165 +9048,6 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
return true;
}
-bool X86InstrInfo::shouldScheduleAdjacent(const MachineInstr &First,
- const MachineInstr &Second) const {
- // Check if this processor supports macro-fusion. Since this is a minor
- // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
- // proxy for SandyBridge+.
- if (!Subtarget.hasAVX())
- return false;
-
- enum {
- FuseTest,
- FuseCmp,
- FuseInc
- } FuseKind;
-
- switch (Second.getOpcode()) {
- default:
- return false;
- case X86::JE_1:
- case X86::JNE_1:
- case X86::JL_1:
- case X86::JLE_1:
- case X86::JG_1:
- case X86::JGE_1:
- FuseKind = FuseInc;
- break;
- case X86::JB_1:
- case X86::JBE_1:
- case X86::JA_1:
- case X86::JAE_1:
- FuseKind = FuseCmp;
- break;
- case X86::JS_1:
- case X86::JNS_1:
- case X86::JP_1:
- case X86::JNP_1:
- case X86::JO_1:
- case X86::JNO_1:
- FuseKind = FuseTest;
- break;
- }
- switch (First.getOpcode()) {
- default:
- return false;
- case X86::TEST8rr:
- case X86::TEST16rr:
- case X86::TEST32rr:
- case X86::TEST64rr:
- case X86::TEST8ri:
- case X86::TEST16ri:
- case X86::TEST32ri:
- case X86::TEST32i32:
- case X86::TEST64i32:
- case X86::TEST64ri32:
- case X86::TEST8rm:
- case X86::TEST16rm:
- case X86::TEST32rm:
- case X86::TEST64rm:
- case X86::TEST8ri_NOREX:
- case X86::AND16i16:
- case X86::AND16ri:
- case X86::AND16ri8:
- case X86::AND16rm:
- case X86::AND16rr:
- case X86::AND32i32:
- case X86::AND32ri:
- case X86::AND32ri8:
- case X86::AND32rm:
- case X86::AND32rr:
- case X86::AND64i32:
- case X86::AND64ri32:
- case X86::AND64ri8:
- case X86::AND64rm:
- case X86::AND64rr:
- case X86::AND8i8:
- case X86::AND8ri:
- case X86::AND8rm:
- case X86::AND8rr:
- return true;
- case X86::CMP16i16:
- case X86::CMP16ri:
- case X86::CMP16ri8:
- case X86::CMP16rm:
- case X86::CMP16rr:
- case X86::CMP32i32:
- case X86::CMP32ri:
- case X86::CMP32ri8:
- case X86::CMP32rm:
- case X86::CMP32rr:
- case X86::CMP64i32:
- case X86::CMP64ri32:
- case X86::CMP64ri8:
- case X86::CMP64rm:
- case X86::CMP64rr:
- case X86::CMP8i8:
- case X86::CMP8ri:
- case X86::CMP8rm:
- case X86::CMP8rr:
- case X86::ADD16i16:
- case X86::ADD16ri:
- case X86::ADD16ri8:
- case X86::ADD16ri8_DB:
- case X86::ADD16ri_DB:
- case X86::ADD16rm:
- case X86::ADD16rr:
- case X86::ADD16rr_DB:
- case X86::ADD32i32:
- case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD32ri8_DB:
- case X86::ADD32ri_DB:
- case X86::ADD32rm:
- case X86::ADD32rr:
- case X86::ADD32rr_DB:
- case X86::ADD64i32:
- case X86::ADD64ri32:
- case X86::ADD64ri32_DB:
- case X86::ADD64ri8:
- case X86::ADD64ri8_DB:
- case X86::ADD64rm:
- case X86::ADD64rr:
- case X86::ADD64rr_DB:
- case X86::ADD8i8:
- case X86::ADD8mi:
- case X86::ADD8mr:
- case X86::ADD8ri:
- case X86::ADD8rm:
- case X86::ADD8rr:
- case X86::SUB16i16:
- case X86::SUB16ri:
- case X86::SUB16ri8:
- case X86::SUB16rm:
- case X86::SUB16rr:
- case X86::SUB32i32:
- case X86::SUB32ri:
- case X86::SUB32ri8:
- case X86::SUB32rm:
- case X86::SUB32rr:
- case X86::SUB64i32:
- case X86::SUB64ri32:
- case X86::SUB64ri8:
- case X86::SUB64rm:
- case X86::SUB64rr:
- case X86::SUB8i8:
- case X86::SUB8ri:
- case X86::SUB8rm:
- case X86::SUB8rr:
- return FuseKind == FuseCmp || FuseKind == FuseInc;
- case X86::INC16r:
- case X86::INC32r:
- case X86::INC64r:
- case X86::INC8r:
- case X86::DEC16r:
- case X86::DEC32r:
- case X86::DEC64r:
- case X86::DEC8r:
- return FuseKind == FuseInc;
- }
-}
-
bool X86InstrInfo::
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
@@ -8424,6 +9098,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
{ X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
{ X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
+ { X86::MOVSDmr, X86::MOVSDmr, X86::MOVPQI2QImr },
{ X86::MOVSSmr, X86::MOVSSmr, X86::MOVPDI2DImr },
{ X86::MOVSDrm, X86::MOVSDrm, X86::MOVQI2PQIrm },
{ X86::MOVSSrm, X86::MOVSSrm, X86::MOVDI2PDIrm },
@@ -8443,6 +9118,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
{ X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
{ X86::VMOVLPSmr, X86::VMOVLPDmr, X86::VMOVPQI2QImr },
+ { X86::VMOVSDmr, X86::VMOVSDmr, X86::VMOVPQI2QImr },
{ X86::VMOVSSmr, X86::VMOVSSmr, X86::VMOVPDI2DImr },
{ X86::VMOVSDrm, X86::VMOVSDrm, X86::VMOVQI2PQIrm },
{ X86::VMOVSSrm, X86::VMOVSSrm, X86::VMOVDI2PDIrm },
@@ -8465,7 +9141,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
// AVX512 support
{ X86::VMOVLPSZ128mr, X86::VMOVLPDZ128mr, X86::VMOVPQI2QIZmr },
{ X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
- { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+ { X86::VMOVNTPSZ256mr, X86::VMOVNTPDZ256mr, X86::VMOVNTDQZ256mr },
{ X86::VMOVNTPSZmr, X86::VMOVNTPDZmr, X86::VMOVNTDQZmr },
{ X86::VMOVSDZmr, X86::VMOVSDZmr, X86::VMOVPQI2QIZmr },
{ X86::VMOVSSZmr, X86::VMOVSSZmr, X86::VMOVPDI2DIZmr },
@@ -8493,10 +9169,6 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
{ X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
- { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
- { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
- { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
- { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
{ X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm },
{ X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr },
{ X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm},
@@ -8508,6 +9180,14 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
{ X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 },
};
+static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = {
+ //PackedSingle PackedDouble PackedInt
+ { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+ { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+ { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm },
+ { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr },
+};
+
static const uint16_t ReplaceableInstrsAVX512[][4] = {
// Two integer columns for 64-bit and 32-bit elements.
//PackedSingle PackedDouble PackedInt PackedInt
@@ -8769,16 +9449,25 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
validDomains = 0xe;
} else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
+ } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) {
+ // Insert/extract instructions should only effect domain if AVX2
+ // is enabled.
+ if (!Subtarget.hasAVX2())
+ return std::make_pair(0, 0);
+ validDomains = 0xe;
} else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
validDomains = 0xe;
- } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
- validDomains = Subtarget.hasDQI() ? 0xe : 0x8;
- } else if (const uint16_t *table = lookupAVX512(opcode, domain,
+ } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain,
+ ReplaceableInstrsAVX512DQ)) {
+ validDomains = 0xe;
+ } else if (Subtarget.hasDQI()) {
+ if (const uint16_t *table = lookupAVX512(opcode, domain,
ReplaceableInstrsAVX512DQMasked)) {
- if (domain == 1 || (domain == 3 && table[3] == opcode))
- validDomains = Subtarget.hasDQI() ? 0xa : 0x8;
- else
- validDomains = Subtarget.hasDQI() ? 0xc : 0x8;
+ if (domain == 1 || (domain == 3 && table[3] == opcode))
+ validDomains = 0xa;
+ else
+ validDomains = 0xc;
+ }
}
}
return std::make_pair(domain, validDomains);
@@ -8794,6 +9483,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
"256-bit vector operations only available in AVX2");
table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
}
+ if (!table) { // try the other table
+ assert(Subtarget.hasAVX2() &&
+ "256-bit insert/extract only available in AVX2");
+ table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract);
+ }
if (!table) { // try the AVX512 table
assert(Subtarget.hasAVX512() && "Requires AVX-512");
table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
@@ -9457,28 +10151,6 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
-bool X86InstrInfo::isTailCall(const MachineInstr &Inst) const {
- switch (Inst.getOpcode()) {
- case X86::TCRETURNdi:
- case X86::TCRETURNmi:
- case X86::TCRETURNri:
- case X86::TCRETURNdi64:
- case X86::TCRETURNmi64:
- case X86::TCRETURNri64:
- case X86::TAILJMPd:
- case X86::TAILJMPm:
- case X86::TAILJMPr:
- case X86::TAILJMPd64:
- case X86::TAILJMPm64:
- case X86::TAILJMPr64:
- case X86::TAILJMPm64_REX:
- case X86::TAILJMPr64_REX:
- return true;
- default:
- return false;
- }
-}
-
namespace {
/// Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
@@ -9665,3 +10337,124 @@ namespace {
char LDTLSCleanup::ID = 0;
FunctionPass*
llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
+
+unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize,
+ size_t Occurrences,
+ bool CanBeTailCall) const {
+ unsigned NotOutlinedSize = SequenceSize * Occurrences;
+ unsigned OutlinedSize;
+
+ // Is it a tail call?
+ if (CanBeTailCall) {
+ // If yes, we don't have to include a return instruction-- it's already in
+ // our sequence. So we have one occurrence of the sequence + #Occurrences
+ // calls.
+ OutlinedSize = SequenceSize + Occurrences;
+ } else {
+ // If not, add one for the return instruction.
+ OutlinedSize = (SequenceSize + 1) + Occurrences;
+ }
+
+ // Return the number of instructions saved by outlining this sequence.
+ return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
+}
+
+bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
+ return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
+}
+
+X86GenInstrInfo::MachineOutlinerInstrType
+X86InstrInfo::getOutliningType(MachineInstr &MI) const {
+
+ // Don't allow debug values to impact outlining type.
+ if (MI.isDebugValue() || MI.isIndirectDebugValue())
+ return MachineOutlinerInstrType::Invisible;
+
+ // Is this a tail call? If yes, we can outline as a tail call.
+ if (isTailCall(MI))
+ return MachineOutlinerInstrType::Legal;
+
+ // Is this the terminator of a basic block?
+ if (MI.isTerminator() || MI.isReturn()) {
+
+ // Does its parent have any successors in its MachineFunction?
+ if (MI.getParent()->succ_empty())
+ return MachineOutlinerInstrType::Legal;
+
+ // It does, so we can't tail call it.
+ return MachineOutlinerInstrType::Illegal;
+ }
+
+ // Don't outline anything that modifies or reads from the stack pointer.
+ //
+ // FIXME: There are instructions which are being manually built without
+ // explicit uses/defs so we also have to check the MCInstrDesc. We should be
+ // able to remove the extra checks once those are fixed up. For example,
+ // sometimes we might get something like %RAX<def> = POP64r 1. This won't be
+ // caught by modifiesRegister or readsRegister even though the instruction
+ // really ought to be formed so that modifiesRegister/readsRegister would
+ // catch it.
+ if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) ||
+ MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) ||
+ MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Outlined calls change the instruction pointer, so don't read from it.
+ if (MI.readsRegister(X86::RIP, &RI) ||
+ MI.getDesc().hasImplicitUseOfPhysReg(X86::RIP) ||
+ MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
+ return MachineOutlinerInstrType::Illegal;
+
+ // Positions can't safely be outlined.
+ if (MI.isPosition())
+ return MachineOutlinerInstrType::Illegal;
+
+ // Make sure none of the operands of this instruction do anything tricky.
+ for (const MachineOperand &MOP : MI.operands())
+ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
+ MOP.isTargetIndex())
+ return MachineOutlinerInstrType::Illegal;
+
+ return MachineOutlinerInstrType::Legal;
+}
+
+void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+
+ // If we're a tail call, we already have a return, so don't do anything.
+ if (IsTailCall)
+ return;
+
+ // We're a normal call, so our sequence doesn't have a return instruction.
+ // Add it in.
+ MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ));
+ MBB.insert(MBB.end(), retq);
+}
+
+void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+ return;
+}
+
+MachineBasicBlock::iterator
+X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It,
+ MachineFunction &MF,
+ bool IsTailCall) const {
+ // Is it a tail call?
+ if (IsTailCall) {
+ // Yes, just insert a JMP.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(X86::JMP_1))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ } else {
+ // No, insert a call.
+ It = MBB.insert(It,
+ BuildMI(MF, DebugLoc(), get(X86::CALL64pcrel32))
+ .addGlobalAddress(M.getNamedValue(MF.getName())));
+ }
+
+ return It;
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index acfdef4da7a3..2fee48570ce1 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -182,6 +182,20 @@ public:
///
const X86RegisterInfo &getRegisterInfo() const { return RI; }
+ /// Returns the stack pointer adjustment that happens inside the frame
+ /// setup..destroy sequence (e.g. by pushes, or inside the callee).
+ int64_t getFrameAdjustment(const MachineInstr &I) const {
+ assert(isFrameInstr(I));
+ return I.getOperand(1).getImm();
+ }
+
+ /// Sets the stack pointer adjustment made inside the frame made up by this
+ /// instruction.
+ void setFrameAdjustment(MachineInstr &I, int64_t V) const {
+ assert(isFrameInstr(I));
+ I.getOperand(1).setImm(V);
+ }
+
/// getSPAdjust - This returns the stack pointer adjustment made by
/// this instruction. For x86, we need to handle more complex call
/// sequences involving PUSHes.
@@ -316,6 +330,13 @@ public:
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
+ bool isUnconditionalTailCall(const MachineInstr &MI) const override;
+ bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const override;
+ void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ const MachineInstr &TailCall) const override;
+
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -436,9 +457,6 @@ public:
int64_t Offset1, int64_t Offset2,
unsigned NumLoads) const override;
- bool shouldScheduleAdjacent(const MachineInstr &First,
- const MachineInstr &Second) const override;
-
void getNoopForMachoTarget(MCInst &NopInst) const override;
bool
@@ -539,8 +557,28 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
- bool isTailCall(const MachineInstr &Inst) const override;
+ unsigned getOutliningBenefit(size_t SequenceSize,
+ size_t Occurrences,
+ bool CanBeTailCall) const override;
+
+ bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
+
+ llvm::X86GenInstrInfo::MachineOutlinerInstrType
+ getOutliningType(MachineInstr &MI) const override;
+
+ void insertOutlinerEpilogue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
+
+ void insertOutlinerPrologue(MachineBasicBlock &MBB,
+ MachineFunction &MF,
+ bool isTailCall) const override;
+ MachineBasicBlock::iterator
+ insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &It,
+ MachineFunction &MF,
+ bool IsTailCall) const override;
protected:
/// Commutes the operands in the given instruction by changing the operands
/// order and/or changing the instruction's opcode and/or the immediate value
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 38036715a25a..e31d2769047b 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -318,6 +318,7 @@ let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in {
def X86Mem128_RC256XOperand : AsmOperandClass { let Name = "Mem128_RC256X"; }
def X86Mem256_RC256XOperand : AsmOperandClass { let Name = "Mem256_RC256X"; }
def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; }
+ def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; }
def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; }
}
@@ -374,9 +375,10 @@ def vy256mem : X86VMemOperand<VR256, "printi256mem", X86Mem256_RC256Operand>;
def vx64xmem : X86VMemOperand<VR128X, "printi64mem", X86Mem64_RC128XOperand>;
def vx128xmem : X86VMemOperand<VR128X, "printi128mem", X86Mem128_RC128XOperand>;
def vx256xmem : X86VMemOperand<VR128X, "printi256mem", X86Mem256_RC128XOperand>;
-def vy128xmem : X86VMemOperand<VR256, "printi128mem", X86Mem128_RC256XOperand>;
+def vy128xmem : X86VMemOperand<VR256X, "printi128mem", X86Mem128_RC256XOperand>;
def vy256xmem : X86VMemOperand<VR256X, "printi256mem", X86Mem256_RC256XOperand>;
def vy512mem : X86VMemOperand<VR256X, "printi512mem", X86Mem512_RC256XOperand>;
+def vz256xmem : X86VMemOperand<VR512, "printi256mem", X86Mem256_RC512Operand>;
def vz512mem : X86VMemOperand<VR512, "printi512mem", X86Mem512_RC512Operand>;
// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
@@ -831,7 +833,6 @@ def HasXSAVEC : Predicate<"Subtarget->hasXSAVEC()">;
def HasXSAVES : Predicate<"Subtarget->hasXSAVES()">;
def HasPCLMUL : Predicate<"Subtarget->hasPCLMUL()">;
def HasFMA : Predicate<"Subtarget->hasFMA()">;
-def UseFMAOnAVX : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def HasXOP : Predicate<"Subtarget->hasXOP()">;
def HasTBM : Predicate<"Subtarget->hasTBM()">;
@@ -848,8 +849,6 @@ def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
-def HasHLE : Predicate<"Subtarget->hasHLE()">;
-def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
@@ -857,9 +856,11 @@ def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
+def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasMPX : Predicate<"Subtarget->hasMPX()">;
+def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
@@ -895,6 +896,7 @@ def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
+def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
//===----------------------------------------------------------------------===//
@@ -931,6 +933,15 @@ def i32immSExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
def i64immSExt8 : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
+// FIXME: Ideally we would just replace the above i*immSExt* matchers with
+// relocImm-based matchers, but then FastISel would be unable to use them.
+def i64relocImmSExt8 : PatLeaf<(i64 relocImm), [{
+ return isSExtRelocImm<8>(N);
+}]>;
+def i64relocImmSExt32 : PatLeaf<(i64 relocImm), [{
+ return isSExtRelocImm<32>(N);
+}]>;
+
// If we have multiple users of an immediate, it's much smaller to reuse
// the register, rather than encode the immediate in every instruction.
// This has the risk of increasing register pressure from stretched live
@@ -971,6 +982,13 @@ def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
return !shouldAvoidImmediateInstFormsForSize(N);
}]>;
+def i64relocImmSExt8_su : PatLeaf<(i64relocImmSExt8), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i64relocImmSExt32_su : PatLeaf<(i64relocImmSExt32), [{
+ return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
// unsigned field.
def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
@@ -1106,13 +1124,15 @@ def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>;
def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", [],
IIC_POP_REG>, OpSize16;
-def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [],
- IIC_POP_MEM>, OpSize16;
def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[Not64BitMode]>;
+} // mayLoad, SchedRW
+let mayStore = 1, mayLoad = 1, SchedRW = [WriteRMW] in {
+def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", [],
+ IIC_POP_MEM>, OpSize16;
def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", [],
IIC_POP_MEM>, OpSize32, Requires<[Not64BitMode]>;
-} // mayLoad, SchedRW
+} // mayStore, mayLoad, WriteRMW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
@@ -1194,9 +1214,10 @@ def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>;
def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [],
IIC_POP_REG>, OpSize32, Requires<[In64BitMode]>;
+} // mayLoad, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in
def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", [],
IIC_POP_MEM>, OpSize32, Requires<[In64BitMode]>;
-} // mayLoad, SchedRW
let mayStore = 1, SchedRW = [WriteStore] in {
def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
@@ -1965,7 +1986,12 @@ def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>,
Requires<[In64BitMode]>;
// Data16 instruction prefix
-def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>;
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>,
+ Requires<[Not16BitMode]>;
+
+// Data instruction prefix
+def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>,
+ Requires<[In16BitMode]>;
// Repeat string operation instruction prefixes
// These uses the DF flag in the EFLAGS register to inc or dec ECX
@@ -2079,6 +2105,7 @@ def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_REG>,
Requires<[Not64BitMode]>;
+let mayStore = 1 in
def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>,
Requires<[Not64BitMode]>;
@@ -2448,8 +2475,19 @@ def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
//===----------------------------------------------------------------------===//
// CLZERO Instruction
//
-let Uses = [EAX] in
-def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB;
+let SchedRW = [WriteSystem] in {
+ let Uses = [EAX] in
+ def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", [], IIC_SSE_CLZERO>,
+ TB, Requires<[HasCLZERO]>;
+
+ let usesCustomInserter = 1 in {
+ def CLZERO : PseudoI<(outs), (ins i32mem:$src1),
+ [(int_x86_clzero addr:$src1)]>, Requires<[HasCLZERO]>;
+ }
+} // SchedRW
+
+def : InstAlias<"clzero\t{%eax|eax}", (CLZEROr)>, Requires<[Not64BitMode]>;
+def : InstAlias<"clzero\t{%rax|rax}", (CLZEROr)>, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Pattern fragments to auto generate TBM instructions.
@@ -2522,10 +2560,10 @@ let Predicates = [HasTBM] in {
// Memory Instructions
//
+let Predicates = [HasCLFLUSHOPT] in
def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD;
def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", []>, PD;
-def PCOMMIT : I<0xAE, MRM_F8, (outs), (ins), "pcommit", []>, PD;
//===----------------------------------------------------------------------===//
@@ -2977,7 +3015,7 @@ def : InstAlias<"mov\t{$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32me
def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"movq\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
+def : InstAlias<"mov{q}\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
// Match 'movq GR64, MMX' as an alias for movd.
def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 0bb106823983..dc3800ce381b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -294,6 +294,7 @@ def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
[(set VR64:$dst, (load_mmx addr:$src))],
IIC_MMX_MOVQ_RM>;
} // SchedRW
+
let SchedRW = [WriteStore] in
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -378,7 +379,6 @@ defm MMX_PHADD : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d,
defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw,
MMX_PHADDSUBW>;
-
// -- Subtraction
defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b,
MMX_INTALU_ITINS>;
@@ -479,13 +479,6 @@ defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
int_x86_mmx_psrl_q, int_x86_mmx_psrli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psrl_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psrl_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_mmx_psll_w, int_x86_mmx_pslli_w,
MMX_SHIFT_ITINS>;
@@ -496,13 +489,6 @@ defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
int_x86_mmx_psll_q, int_x86_mmx_pslli_q,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psll_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLDrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psll_q VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSLLQrm VR64:$src1, addr:$src2)>;
-
defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
int_x86_mmx_psra_w, int_x86_mmx_psrai_w,
MMX_SHIFT_ITINS>;
@@ -510,11 +496,6 @@ defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_mmx_psra_d, int_x86_mmx_psrai_d,
MMX_SHIFT_ITINS>;
-def : Pat<(int_x86_mmx_psra_w VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRAWrm VR64:$src1, addr:$src2)>;
-def : Pat<(int_x86_mmx_psra_d VR64:$src1, (load_mvmmx addr:$src2)),
- (MMX_PSRADrm VR64:$src1, addr:$src2)>;
-
// Comparison Instructions
defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b,
MMX_INTALU_ITINS>;
@@ -576,9 +557,6 @@ def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
imm:$src2))],
IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>;
-
-
-
// -- Conversion Instructions
defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
@@ -639,7 +617,6 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
[(set GR32orGR64:$dst,
(int_x86_mmx_pmovmskb VR64:$src))]>;
-
// Low word of XMM to MMX.
def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
[SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
@@ -670,6 +647,16 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
(MMX_MOVQ2FR64rr VR64:$src)>;
def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
(MMX_MOVFR642Qrr FR64:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (int_x86_sse2_cvtps2dq VR128:$src))))),
+ (MMX_CVTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))),
+ (MMX_CVTTPS2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+ (MMX_CVTPD2PIirr VR128:$src)>;
+def : Pat<(x86mmx (MMX_X86movdq2q
+ (bc_v2i64 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+ (MMX_CVTTPD2PIirr VR128:$src)>;
}
-
-
diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td
index 309f601d1fce..104ba2a174db 100644
--- a/lib/Target/X86/X86InstrMPX.td
+++ b/lib/Target/X86/X86InstrMPX.td
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
+let mayLoad = 1 in {
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src),
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, Not64BitMode]>;
@@ -21,16 +22,19 @@ multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, In64BitMode]>;
}
+}
defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
+let mayLoad = 1 in {
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
+}
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
@@ -45,16 +49,18 @@ defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD;
def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
+let mayLoad = 1 in {
def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>;
-
+}
def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
+let mayStore = 1 in {
def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
"bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
@@ -65,6 +71,8 @@ def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
"bndstx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
+}
+let mayLoad = 1 in
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
"bndldx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1812d01711d1..e1bf28cbf612 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -259,8 +259,8 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
- SDPatternOperator Int, RegisterClass RC,
- string asm, Operand memopr,
+ SDPatternOperator OpNode, RegisterClass RC,
+ ValueType VT, string asm, Operand memopr,
ComplexPattern mem_cpat, Domain d,
OpndItins itins, bit Is2Addr = 1> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
@@ -268,14 +268,14 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in {
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr, d>,
+ [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], itins.rr, d>,
Sched<[itins.Sched]>;
let mayLoad = 1 in
def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (Int RC:$src1, mem_cpat:$src2))], itins.rm, d>,
+ [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], itins.rm, d>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -446,9 +446,9 @@ def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isPseudo = 1, SchedRW = [WriteZero] in {
def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoVLX_Or_NoDQI]>;
+ [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>;
def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoVLX_Or_NoDQI]>;
+ [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>;
}
//===----------------------------------------------------------------------===//
@@ -461,12 +461,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [NoVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, SchedRW = [WriteZero] in {
def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4f32 immAllZerosV))]>;
}
-let Predicates = [NoVLX] in
+let Predicates = [NoAVX512] in
def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
@@ -475,7 +475,7 @@ def : Pat<(v4i32 immAllZerosV), (V_SET0)>;
// at the rename stage without using any execution unit, so SET0PSY
// and SET0PDY can be used for vector int instructions without penalty
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isPseudo = 1, Predicates = [HasAVX, NoVLX], SchedRW = [WriteZero] in {
+ isPseudo = 1, Predicates = [NoAVX512], SchedRW = [WriteZero] in {
def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "",
[(set VR256:$dst, (v8i32 immAllZerosV))]>;
}
@@ -491,7 +491,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
[(set VR256:$dst, (v8i32 immAllOnesV))]>;
}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move FP Scalar Instructions
//
@@ -527,12 +526,12 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
// AVX
defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
- VEX_4V, VEX_LIG;
+ VEX_4V, VEX_LIG, VEX_WIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>,
- VEX, VEX_LIG, Sched<[WriteStore]>;
+ VEX, VEX_LIG, Sched<[WriteStore]>, VEX_WIG;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr,
@@ -552,7 +551,7 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
- IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>;
+ IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>, VEX_WIG;
def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))],
@@ -644,10 +643,6 @@ let Predicates = [UseAVX] in {
(VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
(VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// 256-bit variants
def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
@@ -738,10 +733,6 @@ let Predicates = [UseSSE2] in {
(MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
- def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
// is during lowering, where it's not possible to recognize the fold because
@@ -786,29 +777,29 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in
let Predicates = [HasAVX, NoVLX] in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
- PS, VEX;
+ PS, VEX, VEX_WIG;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
"movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
- PD, VEX;
+ PD, VEX, VEX_WIG;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
"movups", SSEPackedSingle, SSE_MOVU_ITINS>,
- PS, VEX;
+ PS, VEX, VEX_WIG;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS>,
- PD, VEX;
+ PD, VEX, VEX_WIG;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
"movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
- PS, VEX, VEX_L;
+ PS, VEX, VEX_L, VEX_WIG;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
"movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
- PD, VEX, VEX_L;
+ PD, VEX, VEX_L, VEX_WIG;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
"movups", SSEPackedSingle, SSE_MOVU_ITINS>,
- PS, VEX, VEX_L;
+ PS, VEX, VEX_L, VEX_WIG;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, SSE_MOVU_ITINS>,
- PD, VEX, VEX_L;
+ PD, VEX, VEX_L, VEX_WIG;
}
let Predicates = [UseSSE1] in {
@@ -832,35 +823,35 @@ let SchedRW = [WriteStore], Predicates = [HasAVX, NoVLX] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG;
def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v2f64 VR128:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG;
def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_WIG;
def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_WIG;
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L, VEX_WIG;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
- IIC_SSE_MOVA_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_L, VEX_WIG;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L, VEX_WIG;
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)],
- IIC_SSE_MOVU_P_MR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_MR>, VEX, VEX_L, VEX_WIG;
} // SchedRW
// For disassembler
@@ -869,35 +860,35 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG;
def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG;
def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG;
def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG;
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movaps\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movapd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movups\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movupd\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
}
// Aliases to help the assembler pick two byte VEX encodings by swapping the
@@ -955,24 +946,10 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
IIC_SSE_MOVU_P_RR>;
}
-// Use vmovaps/vmovups for AVX integer load/store.
let Predicates = [HasAVX, NoVLX] in {
- // 128-bit load/store
- def : Pat<(alignedloadv2i64 addr:$src),
- (VMOVAPSrm addr:$src)>;
- def : Pat<(loadv2i64 addr:$src),
- (VMOVUPSrm addr:$src)>;
-
- def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
-
- // 256-bit load/store
+ // 256-bit load/store need to use floating point load/store in case we don't
+ // have AVX2. Execution domain fixing will convert to integer if AVX2 is
+ // available and changing the domain is beneficial.
def : Pat<(alignedloadv4i64 addr:$src),
(VMOVAPSYrm addr:$src)>;
def : Pat<(loadv4i64 addr:$src),
@@ -981,10 +958,18 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v4i64 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v8i32 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16i16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v32i8 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
// Special patterns for storing subvector extracts of lower 128-bits
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
@@ -994,18 +979,6 @@ let Predicates = [HasAVX, NoVLX] in {
def : Pat<(alignedstore (v4f32 (extract_subvector
(v8f32 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v2i64 (extract_subvector
- (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v4i32 (extract_subvector
- (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v8i16 (extract_subvector
- (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(alignedstore (v16i8 (extract_subvector
- (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
def : Pat<(store (v2f64 (extract_subvector
(v4f64 VR256:$src), (iPTR 0))), addr:$dst),
@@ -1013,40 +986,6 @@ let Predicates = [HasAVX, NoVLX] in {
def : Pat<(store (v4f32 (extract_subvector
(v8f32 VR256:$src), (iPTR 0))), addr:$dst),
(VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v2i64 (extract_subvector
- (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v4i32 (extract_subvector
- (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v8i16 (extract_subvector
- (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
- def : Pat<(store (v16i8 (extract_subvector
- (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
- (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
- // 128-bit load/store
- def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (VMOVAPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
- def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (VMOVUPSmr addr:$dst, VR128:$src)>;
-
- // 256-bit load/store
- def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
- (VMOVAPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v16i16 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
- def : Pat<(store (v32i8 VR256:$src), addr:$dst),
- (VMOVUPSYmr addr:$dst, VR256:$src)>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
@@ -1107,7 +1046,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
let Predicates = [UseAVX] in
defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- itin>, VEX_4V;
+ itin>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
@@ -1126,12 +1065,12 @@ def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOV_LH>, VEX;
+ IIC_SSE_MOV_LH>, VEX, VEX_WIG;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOV_LH>, VEX;
+ IIC_SSE_MOV_LH>, VEX, VEX_WIG;
}// UseAVX
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
@@ -1238,12 +1177,12 @@ def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
[(store (f64 (extractelt
(X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
(bc_v2f64 (v4f32 VR128:$src))),
- (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX, VEX_WIG;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
- (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+ (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX, VEX_WIG;
} // UseAVX
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
@@ -1343,14 +1282,14 @@ let AddedComplexity = 20, Predicates = [UseAVX] in {
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V, Sched<[WriteFShuffle]>;
+ VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG;
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
IIC_SSE_MOV_LH>,
- VEX_4V, Sched<[WriteFShuffle]>;
+ VEX_4V, Sched<[WriteFShuffle]>, VEX_WIG;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -1725,11 +1664,11 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- PS, VEX, Requires<[HasAVX, NoVLX]>;
+ PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, SSE_CVT_PS>,
- PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>;
+ PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
@@ -1777,20 +1716,21 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
// Convert scalar double to scalar single
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
- (ins FR64:$src1, FR64:$src2),
+ (ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2F]>;
+ Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
- (ins FR64:$src1, f64mem:$src2),
+ (ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
}
-def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+def : Pat<(f32 (fpround FR64:$src)),
+ (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>,
Requires<[UseAVX]>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
@@ -1810,15 +1750,15 @@ def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
def Int_VCVTSD2SSrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss
VR128:$src1, sse_load_f64:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg,
@@ -1842,30 +1782,30 @@ def Int_CVTSD2SSrm: I<0x5A, MRMSrcMem,
// SSE2 instructions with XS prefix
let hasSideEffects = 0, Predicates = [UseAVX] in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
- (ins FR32:$src1, FR32:$src2),
+ (ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RR>,
XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG,
- Sched<[WriteCvtF2F]>;
+ Sched<[WriteCvtF2F]>, VEX_WIG;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
- (ins FR32:$src1, f32mem:$src2),
+ (ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[], IIC_SSE_CVT_Scalar_RM>,
XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG;
}
def : Pat<(f64 (fpextend FR32:$src)),
- (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>;
def : Pat<(fpextend (loadf32 addr:$src)),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
+ (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>,
Requires<[UseAVX, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
- (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
+ (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>,
Requires<[UseAVX, OptForSpeed]>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
@@ -1895,15 +1835,15 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))],
- IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2F]>;
def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))],
- IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>,
- Sched<[WriteCvtF2FLd, ReadAfterLd]>;
+ IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_WIG,
+ Requires<[HasAVX]>, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -1999,22 +1939,22 @@ def : Pat<(v4f32 (X86Movss
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 VR256:$src))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))],
- IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
@@ -2035,7 +1975,7 @@ def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtF2I]>;
+ VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -2044,7 +1984,7 @@ def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
- Sched<[WriteCvtF2ILd]>;
+ Sched<[WriteCvtF2ILd]>, VEX_WIG;
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTPD2DQrm VR128:$dst, f128mem:$src), 0>;
@@ -2053,12 +1993,12 @@ def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+ VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
@@ -2083,23 +2023,23 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (v4f32 VR128:$src))))],
- IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (loadv4f32 addr:$src))))],
- IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (fp_to_sint (v8f32 VR256:$src))))],
- IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (fp_to_sint (loadv8f32 addr:$src))))],
IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
- Sched<[WriteCvtF2ILd]>;
+ Sched<[WriteCvtF2ILd]>, VEX_WIG;
}
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2118,7 +2058,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (v2f64 VR128:$src))))],
- IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG;
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
@@ -2132,7 +2072,7 @@ def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvttp2si (loadv2f64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG;
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQrm VR128:$dst, f128mem:$src), 0>;
@@ -2142,12 +2082,12 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (v4f64 VR256:$src))))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (fp_to_sint (loadv4f64 addr:$src))))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG;
}
def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
(VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
@@ -2193,19 +2133,19 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))],
- IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
@@ -2225,30 +2165,30 @@ let hasSideEffects = 0, mayLoad = 1 in
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
- VEX, Sched<[WriteCvtI2FLd]>;
+ (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))]>,
+ VEX, Sched<[WriteCvtI2FLd]>, VEX_WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtI2F]>;
+ VEX, Sched<[WriteCvtI2F]>, VEX_WIG;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))))]>,
- VEX, VEX_L, Sched<[WriteCvtI2FLd]>;
+ VEX, VEX_L, Sched<[WriteCvtI2FLd]>, VEX_WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (sint_to_fp (v4i32 VR128:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtI2F]>;
+ VEX, VEX_L, Sched<[WriteCvtI2F]>, VEX_WIG;
}
let hasSideEffects = 0, mayLoad = 1 in
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
- (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))],
+ (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))],
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
@@ -2276,7 +2216,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))],
- IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>, VEX_WIG;
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -2285,7 +2225,7 @@ let Predicates = [HasAVX, NoVLX] in
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>, VEX_WIG;
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
(VCVTPD2PSrm VR128:$dst, f128mem:$src), 0>;
@@ -2294,11 +2234,11 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround VR256:$src))],
- IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+ IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (fpround (loadv4f64 addr:$src)))],
- IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+ IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>, VEX_WIG;
}
def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
(VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
@@ -2368,21 +2308,25 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
}
}
+let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG;
+ SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSE_ALU_F32S, i8immZExt5>, // same latency as 32 bit compare
- XD, VEX_4V, VEX_LIG;
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
+ let ExeDomain = SSEPackedSingle in
defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32,
"cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
"cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S,
i8immZExt3>, XS;
+ let ExeDomain = SSEPackedDouble in
defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64,
"cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
"cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
@@ -2398,6 +2342,7 @@ multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
VR128:$src, immLeaf:$cc))],
itins.rr>,
Sched<[itins.Sched]>;
+let mayLoad = 1 in
def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src, CC:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
@@ -2408,18 +2353,22 @@ multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
let isCodeGenOnly = 1 in {
// Aliases to match intrinsics which expect XMM operand(s).
+ let ExeDomain = SSEPackedSingle in
defm Int_VCMPSS : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
SSE_ALU_F32S, i8immZExt5, sse_load_f32>,
XS, VEX_4V;
+ let ExeDomain = SSEPackedDouble in
defm Int_VCMPSD : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
SSE_ALU_F32S, i8immZExt5, sse_load_f64>, // same latency as f32
XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
+ let ExeDomain = SSEPackedSingle in
defm Int_CMPSS : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
SSE_ALU_F32S, i8immZExt3, sse_load_f32>, XS;
+ let ExeDomain = SSEPackedDouble in
defm Int_CMPSD : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
SSE_ALU_F64S, i8immZExt3, sse_load_f64>,
@@ -2437,6 +2386,7 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
IIC_SSE_COMIS_RR>,
Sched<[WriteFAdd]>;
+let mayLoad = 1 in
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
@@ -2454,6 +2404,7 @@ multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
[(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
IIC_SSE_COMIS_RR>,
Sched<[WriteFAdd]>;
+let mayLoad = 1 in
def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (OpNode (vt RC:$src1),
@@ -2464,26 +2415,26 @@ multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss">, PS, VEX, VEX_LIG;
+ "ucomiss">, PS, VEX, VEX_LIG, VEX_WIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd">, PD, VEX, VEX_LIG;
+ "ucomisd">, PD, VEX, VEX_LIG, VEX_WIG;
let Pattern = []<dag> in {
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
- "comiss">, PS, VEX, VEX_LIG;
+ "comiss">, PS, VEX, VEX_LIG, VEX_WIG;
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
- "comisd">, PD, VEX, VEX_LIG;
+ "comisd">, PD, VEX, VEX_LIG, VEX_WIG;
}
let isCodeGenOnly = 1 in {
defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss">, PS, VEX;
+ sse_load_f32, "ucomiss">, PS, VEX, VEX_WIG;
defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd">, PD, VEX;
+ sse_load_f64, "ucomisd">, PD, VEX, VEX_WIG;
defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss">, PS, VEX;
+ sse_load_f32, "comiss">, PS, VEX, VEX_WIG;
defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd">, PD, VEX;
+ sse_load_f64, "comisd">, PD, VEX, VEX_WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS;
@@ -2512,18 +2463,19 @@ let Defs = [EFLAGS] in {
// sse12_cmp_packed - sse 1 & 2 compare packed instructions
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
- Operand CC, Intrinsic Int, string asm,
+ Operand CC, ValueType VT, string asm,
string asm_alt, Domain d, ImmLeaf immLeaf,
PatFrag ld_frag, OpndItins itins = SSE_ALU_F32P> {
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))],
+ [(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, immLeaf:$cc)))],
itins.rr, d>,
Sched<[WriteFAdd]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2), immLeaf:$cc))],
+ [(set RC:$dst,
+ (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), immLeaf:$cc)))],
itins.rm, d>,
Sched<[WriteFAddLd, ReadAfterLd]>;
@@ -2540,67 +2492,33 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
}
}
-defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, v4f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedSingle, i8immZExt5, loadv4f32>, PS, VEX_4V;
-defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
+ SSEPackedSingle, i8immZExt5, loadv4f32>, PS, VEX_4V, VEX_WIG;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, v2f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SSEPackedDouble, i8immZExt5, loadv2f64>, PD, VEX_4V;
-defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
+ SSEPackedDouble, i8immZExt5, loadv2f64>, PD, VEX_4V, VEX_WIG;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, v8f32,
"cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedSingle, i8immZExt5, loadv8f32>, PS, VEX_4V, VEX_L;
-defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, v4f64,
"cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SSEPackedDouble, i8immZExt5, loadv4f64>, PD, VEX_4V, VEX_L;
let Constraints = "$src1 = $dst" in {
- defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, v4f32,
"cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedSingle, i8immZExt5, memopv4f32, SSE_ALU_F32P>, PS;
- defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, v2f64,
"cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
"cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
SSEPackedDouble, i8immZExt5, memopv2f64, SSE_ALU_F64P>, PD;
}
-let Predicates = [HasAVX] in {
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), (loadv4f32 addr:$src2), imm:$cc)),
- (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), (loadv2f64 addr:$src2), imm:$cc)),
- (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-
-def : Pat<(v8f32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
- (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
-def : Pat<(v8f32 (X86cmpp (v8f32 VR256:$src1), (loadv8f32 addr:$src2), imm:$cc)),
- (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v4f64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
- (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
-def : Pat<(v4f64 (X86cmpp (v4f64 VR256:$src1), (loadv4f64 addr:$src2), imm:$cc)),
- (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
-}
-
-let Predicates = [UseSSE1] in {
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4f32 (X86cmpp (v4f32 VR128:$src1), (memopv4f32 addr:$src2), imm:$cc)),
- (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-}
-
-let Predicates = [UseSSE2] in {
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2f64 (X86cmpp (v2f64 VR128:$src1), (memopv2f64 addr:$src2), imm:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Shuffle Instructions
//===----------------------------------------------------------------------===//
@@ -2624,16 +2542,16 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
let Predicates = [HasAVX, NoVLX] in {
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv4f32, SSEPackedSingle>, PS, VEX_4V;
+ loadv4f32, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv2f64, SSEPackedDouble>, PD, VEX_4V;
+ loadv2f64, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2715,29 +2633,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
let Predicates = [HasAVX, NoVLX] in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V;
+ SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V;
+ SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V;
+ SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V;
+ SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
@@ -2789,13 +2707,13 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
let Predicates = [HasAVX] in {
defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
- SSEPackedSingle>, PS, VEX;
+ SSEPackedSingle>, PS, VEX, VEX_WIG;
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
- SSEPackedDouble>, PD, VEX;
+ SSEPackedDouble>, PD, VEX, VEX_WIG;
defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
- SSEPackedSingle>, PS, VEX, VEX_L;
+ SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, VEX_L;
+ SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
}
defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
@@ -2839,7 +2757,7 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
OpndItins itins, bit IsCommutable = 0, Predicate prd> {
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
- VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
+ VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
@@ -2848,7 +2766,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, loadv4i64, i256mem, itins,
- IsCommutable, 0>, VEX_4V, VEX_L;
+ IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
}
// These are ordered here for pattern ordering requirements with the fp versions
@@ -2876,7 +2794,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
(bc_v4i64 (v8f32 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
- (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L;
+ (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem,
@@ -2884,14 +2802,14 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
(bc_v4i64 (v4f64 VR256:$src2))))],
[(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
(loadv4i64 addr:$src2)))], 0>,
- PD, VEX_4V, VEX_L;
+ PD, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
(bc_v2i64 (v4f32 VR128:$src2))))],
[(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
- (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V;
+ (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem,
@@ -2899,7 +2817,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
(bc_v2i64 (v2f64 VR128:$src2))))],
[(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
(loadv2i64 addr:$src2)))], 0>,
- PD, VEX_4V;
+ PD, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3065,17 +2983,17 @@ multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
- SSEPackedSingle, itins.s, 0>, PS, VEX_4V;
+ SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_WIG;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR128, v2f64, f128mem, loadv2f64,
- SSEPackedDouble, itins.d, 0>, PD, VEX_4V;
+ SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_WIG;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, VR256, v8f32, f256mem, loadv8f32,
- SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L;
+ SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
- SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L;
+ SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3092,10 +3010,10 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SizeItins itins> {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle, itins.s, 0>,
- XS, VEX_4V, VEX_LIG;
+ XS, VEX_4V, VEX_LIG, VEX_WIG;
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble, itins.d, 0>,
- XD, VEX_4V, VEX_LIG;
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -3108,21 +3026,20 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- SDPatternOperator IntSS,
- SDPatternOperator IntSD,
+ SDPatternOperator OpNode,
SizeItins itins> {
- defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG;
- defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
+ SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG;
+ SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
- defm SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, itins.s>, XS;
- defm SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, itins.d>, XD;
}
@@ -3131,29 +3048,23 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
// Binary Arithmetic instructions
defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x58, "add", null_frag, null_frag,
- SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SSE_ALU_ITINS_S>;
defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, null_frag,
- SSE_MUL_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SSE_MUL_ITINS_S>;
let isCommutable = 0 in {
defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, null_frag,
- SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag,SSE_ALU_ITINS_S>;
defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, null_frag,
- SSE_DIV_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5E, "div", null_frag,SSE_DIV_ITINS_S>;
defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5F, "max", int_x86_sse_max_ss,
- int_x86_sse2_max_sd, SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SSE_ALU_ITINS_S>;
defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
- basic_sse12_fp_binop_s_int<0x5D, "min", int_x86_sse_min_ss,
- int_x86_sse2_min_sd, SSE_ALU_ITINS_S>;
+ basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SSE_ALU_ITINS_S>;
}
let isCodeGenOnly = 1 in {
@@ -3400,7 +3311,7 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
Sched<[itins.Sched.Folded, ReadAfterLd]>,
Requires<[target, OptForSize]>;
- let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in {
+ let isCodeGenOnly = 1, Constraints = "$src1 = $dst", ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[]>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
@@ -3444,7 +3355,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
- let isCodeGenOnly = 1 in {
+ let isCodeGenOnly = 1, ExeDomain = d in {
def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3465,7 +3376,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
// which has a clobber before the rcp, vs.
// vrcpss mem, %xmm0, %xmm0
// TODO: In theory, we could fold the load, and avoid the stall caused by
- // the partial register store, either in ExeDepFix or with smarter RA.
+ // the partial register store, either in ExecutionDepsFix or with smarter RA.
let Predicates = [UseAVX] in {
def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r)
(ScalarVT (IMPLICIT_DEF)), RC:$src)>;
@@ -3495,22 +3406,22 @@ let Predicates = prds in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))],
- itins.rr>, VEX, Sched<[itins.Sched]>;
+ itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))],
- itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>, VEX_WIG;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))],
- itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>, VEX_WIG;
}
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3531,22 +3442,22 @@ let Predicates = [HasAVX] in {
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))],
- itins.rr>, VEX, Sched<[itins.Sched]>;
+ itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))],
- itins.rm>, VEX, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, Sched<[itins.Sched.Folded]>, VEX_WIG;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
- itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>;
+ itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))],
- itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>;
+ itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>, VEX_WIG;
}
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3567,7 +3478,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
f32mem,
!cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
- SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
+ SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3579,7 +3490,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
f64mem,
!cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
OpNode, SSEPackedDouble, itins, "SD">,
- XD, VEX_4V, VEX_LIG;
+ XD, VEX_4V, VEX_LIG, VEX_WIG;
}
// Square root.
@@ -3647,41 +3558,41 @@ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_WIG;
def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_WIG;
let ExeDomain = SSEPackedInt in
def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
+ (ins i128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX;
+ IIC_SSE_MOVNT>, VEX, VEX_WIG;
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v8f32 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
+ IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f64 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
+ IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
let ExeDomain = SSEPackedInt in
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
+ (ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)],
- IIC_SSE_MOVNT>, VEX, VEX_L;
+ IIC_SSE_MOVNT>, VEX, VEX_L, VEX_WIG;
}
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
@@ -3797,20 +3708,18 @@ def : Pat<(X86MFence), (MFENCE)>;
//===----------------------------------------------------------------------===//
def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>;
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>, VEX_WIG;
def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>;
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>, VEX_WIG;
-let Predicates = [UseSSE1] in {
def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
- IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>;
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+ IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>;
def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
- IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>;
-}
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+ IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>;
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3821,16 +3730,16 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
let hasSideEffects = 0, SchedRW = [WriteMove] in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
}
// For Disassembler
@@ -3839,54 +3748,58 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVA_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVA_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG;
def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
IIC_SSE_MOVU_P_RR>,
- VEX;
+ VEX, VEX_WIG;
def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", [],
- IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
+ IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG;
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
hasSideEffects = 0, SchedRW = [WriteLoad] in {
+let Predicates = [HasAVX,NoVLX] in
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
- VEX;
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (alignedloadv2i64 addr:$src))],
+ IIC_SSE_MOVA_P_RM>, VEX, VEX_WIG;
def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
- VEX, VEX_L;
-let Predicates = [HasAVX] in {
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
- XS, VEX;
- def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
- XS, VEX, VEX_L;
-}
+ VEX, VEX_L, VEX_WIG;
+let Predicates = [HasAVX,NoVLX] in
+def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (loadv2i64 addr:$src))],
+ IIC_SSE_MOVU_P_RM>, XS, VEX, VEX_WIG;
+def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+ XS, VEX, VEX_L, VEX_WIG;
}
let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
+let Predicates = [HasAVX,NoVLX] in
def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
- VEX;
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2i64 VR128:$src), addr:$dst)],
+ IIC_SSE_MOVA_P_MR>, VEX, VEX_WIG;
def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
- VEX, VEX_L;
-let Predicates = [HasAVX] in {
+ VEX, VEX_L, VEX_WIG;
+let Predicates = [HasAVX,NoVLX] in
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
- XS, VEX;
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(store (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>,
+ XS, VEX, VEX_WIG;
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
- XS, VEX, VEX_L;
-}
+ XS, VEX, VEX_L, VEX_WIG;
}
let SchedRW = [WriteMove] in {
@@ -3949,6 +3862,50 @@ def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
def : InstAlias<"vmovdqu\t{$src, $dst|$dst, $src}",
(VMOVDQUYrr_REV VR256L:$dst, VR256H:$src), 0>;
+let Predicates = [HasAVX, NoVLX] in {
+ // Additional patterns for other integer sizes.
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVDQAmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVDQAmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVDQAmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVDQUmr addr:$dst, VR128:$src)>;
+
+ // Special patterns for storing subvector extracts of lower 128-bits
+ // Its cheaper to just use VMOVDQA/VMOVDQU instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQAmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+
+ def : Pat<(store (v2i64 (extract_subvector
+ (v4i64 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v4i32 (extract_subvector
+ (v8i32 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v8i16 (extract_subvector
+ (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+ def : Pat<(store (v16i8 (extract_subvector
+ (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQUmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+}
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
@@ -4037,12 +3994,12 @@ defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
- loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V;
+ loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
VR256, loadv4i64, i256mem, SSE_PMADD,
- 0>, VEX_4V, VEX_L;
+ 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
memopv2i64, i128mem, SSE_PMADD>;
@@ -4050,11 +4007,11 @@ defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 0>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
memopv2i64, i128mem, SSE_INTALU_ITINS_P>;
@@ -4062,11 +4019,11 @@ defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
let Predicates = [HasAVX, NoVLX] in
defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
VR256, loadv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 0>, VEX_4V, VEX_L;
+ SSE_INTMUL_ITINS_P, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
memopv2i64, i128mem, SSE_INTMUL_ITINS_P>;
@@ -4113,11 +4070,11 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR128, DstVT128, SrcVT,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR256, DstVT256, SrcVT,
- loadv2i64, 0>, VEX_4V, VEX_L;
+ loadv2i64, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
VR128, DstVT128, SrcVT, memopv2i64>;
@@ -4138,10 +4095,10 @@ multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR128, v16i8, 0>, VEX_4V;
+ VR128, v16i8, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR256, v32i8, 0>, VEX_4V, VEX_L;
+ VR256, v32i8, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8>;
}
@@ -4202,7 +4159,7 @@ let Predicates = [HasAVX, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>, VEX_WIG;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
@@ -4210,7 +4167,7 @@ let Predicates = [HasAVX, prd] in {
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
(i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
- Sched<[WriteShuffleLd]>;
+ Sched<[WriteShuffleLd]>, VEX_WIG;
}
let Predicates = [HasAVX2, prd] in {
@@ -4220,7 +4177,7 @@ let Predicates = [HasAVX2, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
- IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>;
+ IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>, VEX_WIG;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
@@ -4228,7 +4185,7 @@ let Predicates = [HasAVX2, prd] in {
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
(i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L,
- Sched<[WriteShuffleLd]>;
+ Sched<[WriteShuffleLd]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
@@ -4257,20 +4214,6 @@ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
NoVLX_Or_NoBWI>, XD;
-let Predicates = [HasAVX] in {
- def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))),
- (VPSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (VPSHUFDri VR128:$src1, imm:$imm)>;
-}
-
-let Predicates = [UseSSE2] in {
- def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
- (PSHUFDmi addr:$src1, imm:$imm)>;
- def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
- (PSHUFDri VR128:$src1, imm:$imm)>;
-}
-
//===---------------------------------------------------------------------===//
// Packed Integer Pack Instructions (SSE & AVX)
//===---------------------------------------------------------------------===//
@@ -4364,24 +4307,24 @@ multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
loadv2i64, 0>, VEX_4V;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus>,
VEX_4V, VEX_L;
}
@@ -4443,44 +4386,44 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
- loadv2i64, 0>, VEX_4V;
+ loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -4565,14 +4508,14 @@ def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))],
- IIC_SSE_MOVMSK>, VEX;
+ IIC_SSE_MOVMSK>, VEX, VEX_WIG;
let Predicates = [HasAVX2] in {
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
@@ -4593,13 +4536,13 @@ def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
- IIC_SSE_MASKMOV>, VEX;
+ IIC_SSE_MASKMOV>, VEX, VEX_WIG;
let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
- IIC_SSE_MASKMOV>, VEX;
+ IIC_SSE_MASKMOV>, VEX, VEX_WIG;
let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@@ -4725,19 +4668,6 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)],
IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
} // ExeDomain = SSEPackedInt
-
-def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
-
-def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
-
-def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
- (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>;
-
//===---------------------------------------------------------------------===//
// Move Packed Doubleword Int first element to Doubleword Int
//
@@ -4758,12 +4688,12 @@ def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
} //SchedRW
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs),
+def VMOVPQIto64mr : VRS2I<0x7E, MRMDestMem, (outs),
(ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+def MOVPQIto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
[], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
} // ExeDomain = SSEPackedInt
@@ -4837,6 +4767,8 @@ let Predicates = [UseAVX] in {
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
@@ -4866,6 +4798,8 @@ let Predicates = [UseSSE2] in {
(MOV64toPQIrr GR64:$src)>;
}
let AddedComplexity = 20 in {
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(MOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
@@ -4903,7 +4837,7 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- VEX, Requires<[UseAVX]>;
+ VEX, Requires<[UseAVX]>, VEX_WIG;
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4920,7 +4854,7 @@ def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)],
- IIC_SSE_MOVDQ>, VEX;
+ IIC_SSE_MOVDQ>, VEX, VEX_WIG;
def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (extractelt (v2i64 VR128:$src),
@@ -4932,7 +4866,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
SchedRW = [WriteVecLogic] in {
def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX;
+ "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX, VEX_WIG;
def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>;
}
@@ -4978,7 +4912,7 @@ def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
IIC_SSE_MOVQ_RR>,
- XS, VEX, Requires<[UseAVX]>;
+ XS, VEX, Requires<[UseAVX]>, VEX_WIG;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -5016,13 +4950,13 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
let Predicates = [HasAVX, NoVLX] in {
defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v4f32, VR128, loadv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v4f32, VR128, loadv4f32, f128mem>, VEX;
+ v4f32, VR128, loadv4f32, f128mem>, VEX, VEX_WIG;
defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
- v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG;
defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
- v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L;
+ v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L, VEX_WIG;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
memopv4f32, f128mem>;
@@ -5090,8 +5024,8 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
}
let Predicates = [HasAVX, NoVLX] in {
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
- defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX, VEX_WIG;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L, VEX_WIG;
}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
@@ -5108,16 +5042,6 @@ let Predicates = [HasAVX, NoVLX] in {
(VMOVDDUPYrr VR256:$src)>;
}
-let Predicates = [HasAVX] in {
- def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
- def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-}
-
let Predicates = [HasAVX, NoVLX] in
def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
(VMOVDDUPrm addr:$src)>;
@@ -5128,13 +5052,6 @@ def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
let Predicates = [UseSSE3] in {
def : Pat<(X86Movddup (memopv2f64 addr:$src)),
(MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
- (MOVDDUPrm addr:$src)>;
- def : Pat<(X86Movddup (bc_v2f64
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
- (MOVDDUPrm addr:$src)>;
}
//===---------------------------------------------------------------------===//
@@ -5145,11 +5062,11 @@ let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX, VEX_WIG;
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
- VEX, VEX_L;
+ VEX, VEX_L, VEX_WIG;
}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
@@ -5183,15 +5100,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
- f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V;
+ f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V, VEX_WIG;
defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L;
+ f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
- f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V;
+ f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V, VEX_WIG;
defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L;
+ f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
}
}
let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
@@ -5278,23 +5195,23 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, loadv4f32, 0>, VEX_4V;
+ X86fhadd, loadv4f32, 0>, VEX_4V, VEX_WIG;
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, loadv4f32, 0>, VEX_4V;
+ X86fhsub, loadv4f32, 0>, VEX_4V, VEX_WIG;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L;
+ X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L;
+ X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, loadv2f64, 0>, VEX_4V;
+ X86fhadd, loadv2f64, 0>, VEX_4V, VEX_WIG;
defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, loadv2f64, 0>, VEX_4V;
+ X86fhsub, loadv2f64, 0>, VEX_4V, VEX_WIG;
defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L;
+ X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L;
+ X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -5352,84 +5269,24 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
Sched<[WriteVecALULd]>;
}
-// Helper fragments to match sext vXi1 to vXiY.
-def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
- VR128:$src))>;
-def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>;
-def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>;
-def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
- VR256:$src))>;
-def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>;
-def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>;
-
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, X86Abs, loadv2i64>, VEX;
- defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, X86Abs, loadv2i64>, VEX;
-}
-let Predicates = [HasAVX, NoVLX] in {
- defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX;
-}
-
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (VPABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (VPABSWrr VR128:$src)>;
+ defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, loadv2i64>, VEX, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, loadv2i64>, VEX, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX] in {
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (VPABSDrr VR128:$src)>;
-}
-
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, X86Abs>, VEX, VEX_L;
- defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, X86Abs>, VEX, VEX_L;
-}
-let Predicates = [HasAVX2, NoVLX] in {
- defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L;
+ defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, loadv2i64>, VEX, VEX_WIG;
}
-
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
- def : Pat<(xor
- (bc_v4i64 (v32i1sextv32i8)),
- (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
- (VPABSBYrr VR256:$src)>;
- def : Pat<(xor
- (bc_v4i64 (v16i1sextv16i16)),
- (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
- (VPABSWYrr VR256:$src)>;
+ defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs>, VEX, VEX_L, VEX_WIG;
+ defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs>, VEX, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
- def : Pat<(xor
- (bc_v4i64 (v8i1sextv8i32)),
- (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
- (VPABSDYrr VR256:$src)>;
+ defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs>, VEX, VEX_L, VEX_WIG;
}
-defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>;
-defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, X86Abs, memopv2i64>;
-defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, X86Abs, memopv2i64>;
-
-let Predicates = [UseSSSE3] in {
- def : Pat<(xor
- (bc_v2i64 (v16i1sextv16i8)),
- (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
- (PABSBrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v8i1sextv8i16)),
- (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
- (PABSWrr VR128:$src)>;
- def : Pat<(xor
- (bc_v2i64 (v4i1sextv4i32)),
- (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
- (PABSDrr VR128:$src)>;
-}
+defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, memopv2i64>;
+defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, memopv2i64>;
+defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, memopv2i64>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -5527,45 +5384,45 @@ let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
VR128, loadv2i64, i128mem,
- SSE_PSHUFB, 0>, VEX_4V;
+ SSE_PSHUFB, 0>, VEX_4V, VEX_WIG;
defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
v16i8, VR128, loadv2i64, i128mem,
- SSE_PMADD, 0>, VEX_4V;
+ SSE_PMADD, 0>, VEX_4V, VEX_WIG;
}
defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
VR128, loadv2i64, i128mem,
- SSE_PMULHRSW, 0>, VEX_4V;
+ SSE_PMULHRSW, 0>, VEX_4V, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
loadv2i64, i128mem,
- SSE_PHADDSUBD, 0>, VEX_4V;
+ SSE_PHADDSUBD, 0>, VEX_4V, VEX_WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
loadv2i64, i128mem,
- SSE_PHADDSUBW, 0>, VEX_4V;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
loadv2i64, i128mem,
SSE_PHADDSUBD, 0>, VEX_4V;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
- SSE_PSIGN, loadv2i64, 0>, VEX_4V;
+ SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128,
- SSE_PSIGN, loadv2i64, 0>, VEX_4V;
+ SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128,
- SSE_PSIGN, loadv2i64, 0>, VEX_4V;
+ SSE_PSIGN, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
+ SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
+ SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
}
@@ -5573,42 +5430,42 @@ let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
VR256, loadv4i64, i256mem,
- SSE_PSHUFB, 0>, VEX_4V, VEX_L;
+ SSE_PSHUFB, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
v32i8, VR256, loadv4i64, i256mem,
- SSE_PMADD, 0>, VEX_4V, VEX_L;
+ SSE_PMADD, 0>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- SSE_PMULHRSW, 0>, VEX_4V, VEX_L;
+ SSE_PMULHRSW, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
loadv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
VR256, loadv4i64, i256mem,
- SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
+ SSE_PHADDSUBW, 0>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
loadv4i64, i256mem,
SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
defm VPSIGNBY : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNWY : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPSIGNDY : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
- WriteVecALU>, VEX_4V, VEX_L;
+ WriteVecALU>, VEX_4V, VEX_L, VEX_WIG;
}
}
@@ -5686,9 +5543,9 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX] in
- defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V;
+ defm VPALIGNR : ssse3_palignr<"vpalignr", 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2] in
- defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L;
+ defm VPALIGNR : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGNR : ssse3_palignr<"palignr">;
@@ -5779,10 +5636,10 @@ multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
let Predicates = [HasAVX, prd] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
- VR128, VR128, AVXItins>, VEX;
+ VR128, VR128, AVXItins>, VEX, VEX_WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
- VR256, VR128, AVX2Itins>, VEX, VEX_L;
+ VR256, VR128, AVX2Itins>, VEX, VEX_L, VEX_WIG;
}
multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
@@ -6010,12 +5867,12 @@ multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy,
}
}
-defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
-defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
+defm : SS41I_pmovx_patterns<"VPMOVSX", "s", sext_invec, extloadi32i16>;
+defm : SS41I_pmovx_patterns<"VPMOVZX", "z", zext_invec, loadi16_anyext>;
let Predicates = [UseSSE41] in {
- defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>;
- defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>;
+ defm : SS41I_pmovx_patterns<"PMOVSX", "s", sext_invec, extloadi32i16>;
+ defm : SS41I_pmovx_patterns<"PMOVZX", "z", zext_invec, loadi16_anyext>;
}
//===----------------------------------------------------------------------===//
@@ -6103,20 +5960,20 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR64:$dst,
(extractelt (v2i64 VR128:$src1), imm:$src2))]>,
- Sched<[WriteShuffle]>, REX_W;
+ Sched<[WriteShuffle]>;
let SchedRW = [WriteShuffleLd, WriteRMW] in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i64mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v2i64 VR128:$src1), imm:$src2),
- addr:$dst)]>, REX_W;
+ addr:$dst)]>;
}
let Predicates = [HasAVX, NoDQI] in
defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
-defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
@@ -6140,7 +5997,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr,
let ExeDomain = SSEPackedSingle in {
let Predicates = [UseAVX] in
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG;
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>;
}
@@ -6268,7 +6125,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
let ExeDomain = SSEPackedSingle in {
let Predicates = [UseAVX] in
- defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
}
@@ -6461,14 +6318,14 @@ let Predicates = [HasAVX] in {
defm VROUND : sse41_fp_unop_p<0x08, 0x09, "vround", f128mem, VR128,
loadv4f32, loadv2f64,
int_x86_sse41_round_ps,
- int_x86_sse41_round_pd>, VEX;
+ int_x86_sse41_round_pd>, VEX, VEX_WIG;
defm VROUNDY : sse41_fp_unop_p<0x08, 0x09, "vround", f256mem, VR256,
loadv8f32, loadv4f64,
int_x86_avx_round_ps_256,
- int_x86_avx_round_pd_256>, VEX, VEX_L;
+ int_x86_avx_round_pd_256>, VEX, VEX_L, VEX_WIG;
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround",
int_x86_sse41_round_ss,
- int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+ int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG, VEX_WIG;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
}
@@ -6606,20 +6463,20 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
- Sched<[WriteVecLogic]>, VEX;
+ Sched<[WriteVecLogic]>, VEX, VEX_WIG;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
- Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX;
+ Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_WIG;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
- Sched<[WriteVecLogic]>, VEX, VEX_L;
+ Sched<[WriteVecLogic]>, VEX, VEX_L, VEX_WIG;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
- Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L;
+ Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L, VEX_WIG;
}
let Defs = [EFLAGS] in {
@@ -6722,7 +6579,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
int_x86_sse41_phminposuw, loadv2i64,
- WriteVecIMul>, VEX;
+ WriteVecIMul>, VEX, VEX_WIG;
defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
int_x86_sse41_phminposuw, memopv2i64,
WriteVecIMul>;
@@ -6778,65 +6635,65 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX, NoVLX] in {
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
VR128, loadv2i64, i128mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
VR256, loadv4i64, i256mem,
- SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -6864,18 +6721,18 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasAVX, NoVLX] in {
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V;
+ VEX_4V, VEX_WIG;
}
let Predicates = [HasAVX2] in {
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
- VEX_4V, VEX_L;
+ VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -6945,52 +6802,52 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_MPSADSCHED>, VEX_4V;
+ DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_WIG;
}
let ExeDomain = SSEPackedSingle in {
defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32,
VR128, loadv4f32, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_WIG;
defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32,
VR256, loadv8f32, f256mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
VR128, loadv2f64, f128mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_WIG;
defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
VR256, loadv4f64, f256mem, 0,
- DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
VR128, loadv2i64, i128mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V;
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, loadv4f32, f128mem, 0,
- SSE_DPPS_ITINS>, VEX_4V;
+ SSE_DPPS_ITINS>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, loadv2f64, f128mem, 0,
- SSE_DPPS_ITINS>, VEX_4V;
+ SSE_DPPS_ITINS>, VEX_4V, VEX_WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, loadv8f32, i256mem, 0,
- SSE_DPPS_ITINS>, VEX_4V, VEX_L;
+ SSE_DPPS_ITINS>, VEX_4V, VEX_L, VEX_WIG;
}
let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
VR256, loadv4i64, i256mem, 0,
- DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L;
+ DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -7020,6 +6877,19 @@ let Constraints = "$src1 = $dst" in {
SSE_DPPD_ITINS>;
}
+// For insertion into the zero index (low half) of a 256-bit vector, it is
+// more efficient to generate a blend with immediate instead of an insert*128.
+let Predicates = [HasAVX] in {
+def : Pat<(insert_subvector (v4f64 VR256:$src1), (v2f64 VR128:$src2), (iPTR 0)),
+ (VBLENDPDYrri VR256:$src1,
+ (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0x3)>;
+def : Pat<(insert_subvector (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+}
+
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
@@ -7165,14 +7035,14 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}"),
+ "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))],
itins.rr>, Sched<[itins.Sched]>;
def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, x86memop:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}"),
+ "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
[(set VR128:$dst,
(IntId VR128:$src1,
(bitconvert (mem_frag addr:$src2)), XMM0))],
@@ -7193,18 +7063,18 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem,
DEFAULT_ITINS_VARBLENDSCHED>;
// Aliases with the implicit xmm0 argument
-def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPDrr0 VR128:$dst, VR128:$src2)>;
-def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>;
-def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPSrr0 VR128:$dst, VR128:$src2)>;
-def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>;
-def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (PBLENDVBrr0 VR128:$dst, VR128:$src2)>;
-def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>;
+def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPDrr0 VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"blendvpd\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPDrm0 VR128:$dst, f128mem:$src2), 0>;
+def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPSrr0 VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"blendvps\t{$src2, $dst|$dst, $src2}",
+ (BLENDVPSrm0 VR128:$dst, f128mem:$src2), 0>;
+def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
+ (PBLENDVBrr0 VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"pblendvb\t{$src2, $dst|$dst, $src2}",
+ (PBLENDVBrm0 VR128:$dst, i128mem:$src2), 0>;
let Predicates = [UseSSE41] in {
def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
@@ -7228,17 +7098,14 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
- VEX;
+ "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
+ VEX, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovntdqa\t{$src, $dst|$dst, $src}",
- [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
- VEX, VEX_L;
+ "vmovntdqa\t{$src, $dst|$dst, $src}", []>,
+ VEX, VEX_L, VEX_WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movntdqa\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+ "movntdqa\t{$src, $dst|$dst, $src}", []>;
} // SchedRW
let Predicates = [HasAVX2, NoVLX] in {
@@ -7295,11 +7162,11 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
- loadv2i64, i128mem, 0>, VEX_4V;
+ loadv2i64, i128mem, 0>, VEX_4V, VEX_WIG;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
- loadv4i64, i256mem, 0>, VEX_4V, VEX_L;
+ loadv4i64, i256mem, 0>, VEX_4V, VEX_L, VEX_WIG;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -7323,7 +7190,7 @@ multiclass pseudo_pcmpistrm<string asm, PatFrag ld_frag> {
let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128", loadv2i64>,
- Requires<[HasAVX]>;
+ Requires<[HasAVX]>, VEX_WIG;
defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128", memopv2i64>,
Requires<[UseSSE42]>;
}
@@ -7397,7 +7264,7 @@ multiclass pseudo_pcmpistri<string asm, PatFrag ld_frag> {
let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI", loadv2i64>,
- Requires<[HasAVX]>;
+ Requires<[HasAVX]>, VEX_WIG;
defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI", memopv2i64>,
Requires<[UseSSE42]>;
}
@@ -7515,14 +7382,18 @@ multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId,
bit UsesXMM0 = 0> {
def rr : I<Opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(UsesXMM0,
+ !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
[!if(UsesXMM0,
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)),
(set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8;
def rm : I<Opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(UsesXMM0,
+ !strconcat(OpcodeStr, "\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}"),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}")),
[!if(UsesXMM0,
(set VR128:$dst, (IntId VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2)), XMM0)),
@@ -7557,10 +7428,10 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
}
// Aliases with explicit %xmm0
-def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (SHA256RNDS2rr VR128:$dst, VR128:$src2)>;
-def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}",
- (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>;
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+ (SHA256RNDS2rr VR128:$dst, VR128:$src2), 0>;
+def : InstAlias<"sha256rnds2\t{$src2, $dst|$dst, $src2}",
+ (SHA256RNDS2rm VR128:$dst, i128mem:$src2), 0>;
//===----------------------------------------------------------------------===//
// AES-NI Instructions
@@ -7588,13 +7459,13 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
// Perform One Round of an AES Encryption/Decryption Flow
let Predicates = [HasAVX, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V, VEX_WIG;
defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V;
+ int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V, VEX_WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -7615,12 +7486,12 @@ let Predicates = [HasAVX, HasAES] in {
"vaesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst,
(int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
- VEX;
+ VEX, VEX_WIG;
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>,
- Sched<[WriteAESIMCLd]>, VEX;
+ Sched<[WriteAESIMCLd]>, VEX, VEX_WIG;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
@@ -7640,13 +7511,13 @@ let Predicates = [HasAVX, HasAES] in {
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
- Sched<[WriteAESKeyGen]>, VEX;
+ Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>,
- Sched<[WriteAESKeyGenLd]>, VEX;
+ Sched<[WriteAESKeyGenLd]>, VEX, VEX_WIG;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
@@ -7672,14 +7543,14 @@ def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
- Sched<[WriteCLMul]>;
+ Sched<[WriteCLMul]>, VEX_WIG;
def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1,
(loadv2i64 addr:$src2), imm:$src3))]>,
- Sched<[WriteCLMulLd, ReadAfterLd]>;
+ Sched<[WriteCLMulLd, ReadAfterLd]>, VEX_WIG;
// Carry-less Multiplication instructions
let Constraints = "$src1 = $dst" in {
@@ -7879,6 +7750,15 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
[]>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
}
+
+// Without AVX2 we need to concat two v4i32 V_SETALLONES to create a 256-bit
+// all ones value.
+let Predicates = [HasAVX1Only] in
+def : Pat<(v8i32 immAllOnesV),
+ (VINSERTF128rr
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), (V_SETALLONES), sub_xmm),
+ (V_SETALLONES), 1)>;
+
multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
PatFrag memop_frag> {
def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
@@ -8029,41 +7909,6 @@ let ExeDomain = SSEPackedDouble in {
loadv4i64, v4f64, v4i64>, VEX_L;
}
-let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
- (VPERMILPSYrr VR256:$src1, VR256:$src2)>;
-def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
- (VPERMILPSYrm VR256:$src1, addr:$src2)>;
-def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))),
- (VPERMILPDYrr VR256:$src1, VR256:$src2)>;
-def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))),
- (VPERMILPDYrm VR256:$src1, addr:$src2)>;
-
-def : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
- (VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))),
- (VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)),
- (i8 imm:$imm))),
- (VPERMILPSYmi addr:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))),
- (VPERMILPDYmi addr:$src1, imm:$imm)>;
-
-def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))),
- (VPERMILPSrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))),
- (VPERMILPSrm VR128:$src1, addr:$src2)>;
-def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))),
- (VPERMILPDrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))),
- (VPERMILPDrm VR128:$src1, addr:$src2)>;
-
-def : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))),
- (VPERMILPDri VR128:$src1, imm:$imm)>;
-def : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))),
- (VPERMILPDmi addr:$src1, imm:$imm)>;
-}
-
//===----------------------------------------------------------------------===//
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
//
@@ -8118,15 +7963,16 @@ def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
//
+// Note, these instruction do not affect the YMM16-YMM31.
let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in {
// Zero All YMM registers
def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
- [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>;
+ [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>, VEX_WIG;
// Zero Upper bits of YMM registers
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
- [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>;
+ [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>, VEX_WIG;
}
//===----------------------------------------------------------------------===//
@@ -8235,6 +8081,46 @@ defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32,
defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
VR256, loadv4i64, i256mem>, VEX_L;
+// For insertion into the zero index (low half) of a 256-bit vector, it is
+// more efficient to generate a blend with immediate instead of an insert*128.
+let Predicates = [HasAVX2] in {
+def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
+ (VPBLENDDYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(insert_subvector (v8i32 VR256:$src1), (v4i32 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v16i16 VR256:$src1), (v8i16 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+def : Pat<(insert_subvector (v32i8 VR256:$src1), (v16i8 VR128:$src2), (iPTR 0)),
+ (VBLENDPSYrri VR256:$src1,
+ (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
+ VR128:$src2, sub_xmm), 0xf)>;
+}
+
//===----------------------------------------------------------------------===//
// VPBROADCAST - Load from memory and broadcast to all elements of the
// destination operand
@@ -8282,6 +8168,11 @@ defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+ // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
+ def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQrm addr:$src)>;
+ def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
+ (VPBROADCASTQYrm addr:$src)>;
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
// This means we'll encounter truncated i32 loads; match that here.
def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
@@ -8296,7 +8187,7 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
(VPBROADCASTWYrm addr:$src)>;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
@@ -8343,18 +8234,13 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
}
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
+ (VPBROADCASTDrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
- (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
- def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
- (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
-
- // The patterns for VPBROADCASTD are not needed because they would match
- // the exact same thing as VBROADCASTSS patterns.
-
+ (VPBROADCASTDYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
def : Pat<(v2i64 (X86VBroadcast GR64:$src)),
- (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
- // The v4i64 pattern is not needed because VBROADCASTSDYrr already match.
+ (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
+ (VPBROADCASTQYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
}
// AVX1 broadcast patterns
@@ -8377,15 +8263,15 @@ let Predicates = [HasAVX, NoVLX] in {
let Predicates = [HasAVX1Only] in {
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
- (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
+ (VPERMILPSri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
- (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
- (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_xmm),
+ (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128)), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
@@ -8399,7 +8285,7 @@ let Predicates = [HasAVX1Only] in {
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
def : Pat<(v2i64 (X86VBroadcast i64:$src)),
- (VMOVDDUPrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
+ (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44)>;
}
//===----------------------------------------------------------------------===//
@@ -8407,7 +8293,8 @@ let Predicates = [HasAVX1Only] in {
//
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT, X86FoldableSchedWrite Sched> {
+ ValueType OpVT, X86FoldableSchedWrite Sched,
+ X86MemOperand memOp> {
let Predicates = [HasAVX2, NoVLX] in {
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2),
@@ -8417,7 +8304,7 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>,
Sched<[Sched]>, VEX_4V, VEX_L;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, i256mem:$src2),
+ (ins VR256:$src1, memOp:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
@@ -8427,12 +8314,15 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
}
-defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>;
+defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256,
+ i256mem>;
let ExeDomain = SSEPackedSingle in
-defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>;
+defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256,
+ f256mem>;
multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
- ValueType OpVT, X86FoldableSchedWrite Sched> {
+ ValueType OpVT, X86FoldableSchedWrite Sched,
+ X86MemOperand memOp> {
let Predicates = [HasAVX2, NoVLX] in {
def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, u8imm:$src2),
@@ -8442,7 +8332,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
(OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
Sched<[Sched]>, VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
- (ins i256mem:$src1, u8imm:$src2),
+ (ins memOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
@@ -8453,10 +8343,10 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
- WriteShuffle256>, VEX_W;
+ WriteShuffle256, i256mem>, VEX_W;
let ExeDomain = SSEPackedDouble in
defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
- WriteFShuffle256>, VEX_W;
+ WriteFShuffle256, f256mem>, VEX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index e2be73532157..0efb383e1c8d 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -340,75 +340,71 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
let hasSideEffects = 0 in {
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
+
+let Uses = [CL, EFLAGS] in {
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcl{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcl{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcl{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [CL, EFLAGS]
+
+let Uses = [EFLAGS] in {
def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
- "rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
-let Uses = [CL] in
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "rcl{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
-
def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t$dst", [], IIC_SR>, OpSize32;
def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
-let Uses = [CL] in
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "rcl{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
-
-
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
"rcl{q}\t$dst", [], IIC_SR>;
def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
- "rcl{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [EFLAGS]
+let Uses = [CL, EFLAGS] in {
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+ "rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "rcr{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+ "rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [CL, EFLAGS]
+let Uses = [EFLAGS] in {
def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t$dst", [], IIC_SR>;
def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
- "rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
-let Uses = [CL] in
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "rcr{w}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize16;
-
def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t$dst", [], IIC_SR>, OpSize32;
def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
-let Uses = [CL] in
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
-
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
"rcr{q}\t$dst", [], IIC_SR>;
def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
-let Uses = [CL] in
-def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
- "rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
+} // Uses = [EFLAGS]
} // Constraints = "$src = $dst"
-let SchedRW = [WriteShiftLd, WriteRMW] in {
+let SchedRW = [WriteShiftLd, WriteRMW], mayStore = 1 in {
+let Uses = [EFLAGS] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", [], IIC_SR>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt),
@@ -442,8 +438,9 @@ def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t$dst", [], IIC_SR>;
def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
+} // Uses = [EFLAGS]
-let Uses = [CL] in {
+let Uses = [CL, EFLAGS] in {
def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
@@ -461,7 +458,7 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
"rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-}
+} // Uses = [CL, EFLAGS]
} // SchedRW
} // hasSideEffects = 0
@@ -665,19 +662,19 @@ def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
// Rotate by 1
def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t$dst",
- [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi8 addr:$dst), (i8 7)), addr:$dst)],
IIC_SR>;
def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
"ror{w}\t$dst",
- [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi16 addr:$dst), (i8 15)), addr:$dst)],
IIC_SR>, OpSize16;
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t$dst",
- [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi32 addr:$dst), (i8 31)), addr:$dst)],
IIC_SR>, OpSize32;
def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t$dst",
- [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+ [(store (rotl (loadi64 addr:$dst), (i8 63)), addr:$dst)],
IIC_SR>;
} // SchedRW
@@ -849,6 +846,15 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
} // Defs = [EFLAGS]
+// Sandy Bridge and newer Intel processors support faster rotates using
+// SHLD to avoid a partial flag update on the normal rotate instructions.
+let Predicates = [HasFastSHLDRotate], AddedComplexity = 5 in {
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (SHLD32rri8 GR32:$src, GR32:$src, imm:$shamt)>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (SHLD64rri8 GR64:$src, GR64:$src, imm:$shamt)>;
+}
+
def ROT32L2R_imm8 : SDNodeXForm<imm, [{
// Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
return getI8Imm(32 - N->getZExtValue(), SDLoc(N));
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 9265d64b3230..2e5350ce979e 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -173,27 +173,28 @@ def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize32;
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>;
-
+let mayStore = 1 in {
def MOV16ms : I<0x8C, MRMDestMem, (outs), (ins i16mem:$dst, SEGMENT_REG:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize16;
def MOV32ms : I<0x8C, MRMDestMem, (outs), (ins i32mem:$dst, SEGMENT_REG:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>, OpSize32;
def MOV64ms : RI<0x8C, MRMDestMem, (outs), (ins i64mem:$dst, SEGMENT_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_MEM_SR>;
-
+}
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize16;
def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>, OpSize32;
def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_REG>;
-
+let mayLoad = 1 in {
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
"mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize16;
def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
"mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>, OpSize32;
def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>;
+}
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -202,6 +203,7 @@ def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
+let mayLoad = 1 in
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize16;
@@ -210,6 +212,7 @@ def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
OpSize16;
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+let mayLoad = 1 in
def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize32;
@@ -217,23 +220,27 @@ def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB,
OpSize32;
// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+let mayLoad = 1 in
def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
+let mayLoad = 1 in
def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB,
OpSize16;
def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB,
OpSize16;
+let mayLoad = 1 in
def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB,
OpSize32;
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB,
OpSize32;
+let mayLoad = 1 in
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
@@ -248,11 +255,13 @@ def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
"str{l}\t$dst", [], IIC_STR>, TB, OpSize32;
def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
"str{q}\t$dst", [], IIC_STR>, TB;
+let mayStore = 1 in
def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst),
"str{w}\t$dst", [], IIC_STR>, TB;
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
"ltr{w}\t$src", [], IIC_LTR>, TB;
+let mayLoad = 1 in
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
"ltr{w}\t$src", [], IIC_LTR>, TB;
@@ -377,12 +386,14 @@ def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
"verr\t$seg", [], IIC_VERR>, TB;
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
- "verr\t$seg", [], IIC_VERR>, TB;
def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
"verw\t$seg", [], IIC_VERW_MEM>, TB;
+let mayLoad = 1 in {
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+ "verr\t$seg", [], IIC_VERR>, TB;
def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
"verw\t$seg", [], IIC_VERW_REG>, TB;
+}
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -403,6 +414,7 @@ def SIDT64m : I<0x01, MRM1m, (outs), (ins opaque80mem:$dst),
"sidt{q}\t$dst", []>, TB, Requires <[In64BitMode]>;
def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
"sldt{w}\t$dst", [], IIC_SLDT>, TB, OpSize16;
+let mayStore = 1 in
def SLDT16m : I<0x00, MRM0m, (outs), (ins i16mem:$dst),
"sldt{w}\t$dst", [], IIC_SLDT>, TB;
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
@@ -412,6 +424,7 @@ def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
// extension.
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
"sldt{q}\t$dst", [], IIC_SLDT>, TB;
+let mayStore = 1 in
def SLDT64m : RI<0x00, MRM0m, (outs), (ins i16mem:$dst),
"sldt{q}\t$dst", [], IIC_SLDT>, TB;
@@ -429,6 +442,7 @@ def LIDT64m : I<0x01, MRM3m, (outs), (ins opaque80mem:$src),
"lidt{q}\t$src", [], IIC_LIDT>, TB, Requires<[In64BitMode]>;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
"lldt{w}\t$src", [], IIC_LLDT_REG>, TB;
+let mayLoad = 1 in
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
"lldt{w}\t$src", [], IIC_LLDT_MEM>, TB;
} // SchedRW
@@ -459,6 +473,7 @@ def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
"lmsw{w}\t$src", [], IIC_LMSW_MEM>, TB;
+let mayLoad = 1 in
def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
"lmsw{w}\t$src", [], IIC_LMSW_REG>, TB;
diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td
index 7267d752653e..38ac8be94483 100644
--- a/lib/Target/X86/X86InstrTSX.td
+++ b/lib/Target/X86/X86InstrTSX.td
@@ -25,9 +25,9 @@ def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins),
let isBranch = 1, isTerminator = 1, Defs = [EAX] in {
def XBEGIN_2 : Ii16PCRel<0xc7, MRM_F8, (outs), (ins brtarget16:$dst),
- "xbegin\t$dst", []>, OpSize16, Requires<[HasRTM]>;
+ "xbegin\t$dst", []>, OpSize16;
def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget32:$dst),
- "xbegin\t$dst", []>, OpSize32, Requires<[HasRTM]>;
+ "xbegin\t$dst", []>, OpSize32;
}
def XEND : I<0x01, MRM_D5, (outs), (ins),
@@ -35,7 +35,7 @@ def XEND : I<0x01, MRM_D5, (outs), (ins),
let Defs = [EFLAGS] in
def XTEST : I<0x01, MRM_D6, (outs), (ins),
- "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>;
+ "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasRTM]>;
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
@@ -44,7 +44,7 @@ def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
// HLE prefixes
let isAsmParserOnly = 1 in {
-def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>, Requires<[HasHLE]>;
-def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>, Requires<[HasHLE]>;
+def XACQUIRE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "xacquire", []>;
+def XRELEASE_PREFIX : I<0xF3, RawFrm, (outs), (ins), "xrelease", []>;
}
diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h
deleted file mode 100755
index 415a891bfd97..000000000000
--- a/lib/Target/X86/X86InstrTablesInfo.h
+++ /dev/null
@@ -1,1162 +0,0 @@
-//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains related X86 Instruction Information Tables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H
-#define LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H
-
-using namespace llvm;
-
-struct X86EvexToVexCompressTableEntry {
- uint16_t EvexOpcode;
- uint16_t VexOpcode;
-};
-
-
-
-// X86 EVEX encoded instructions that have a VEX 128 encoding
-// (table format: <EVEX opcode, VEX-128 opcode>).
-static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
- // EVEX scalar with corresponding VEX.
- { X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm },
- { X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr },
- { X86::Int_VCOMISSZrm , X86::Int_VCOMISSrm },
- { X86::Int_VCOMISSZrr , X86::Int_VCOMISSrr },
- { X86::Int_VUCOMISDZrm , X86::Int_VUCOMISDrm },
- { X86::Int_VUCOMISDZrr , X86::Int_VUCOMISDrr },
- { X86::Int_VUCOMISSZrm , X86::Int_VUCOMISSrm },
- { X86::Int_VUCOMISSZrr , X86::Int_VUCOMISSrr },
- { X86::VADDSDZrm , X86::VADDSDrm },
- { X86::VADDSDZrm_Int , X86::VADDSDrm_Int },
- { X86::VADDSDZrr , X86::VADDSDrr },
- { X86::VADDSDZrr_Int , X86::VADDSDrr_Int },
- { X86::VADDSSZrm , X86::VADDSSrm },
- { X86::VADDSSZrm_Int , X86::VADDSSrm_Int },
- { X86::VADDSSZrr , X86::VADDSSrr },
- { X86::VADDSSZrr_Int , X86::VADDSSrr_Int },
- { X86::VCOMISDZrm , X86::VCOMISDrm },
- { X86::VCOMISDZrr , X86::VCOMISDrr },
- { X86::VCOMISSZrm , X86::VCOMISSrm },
- { X86::VCOMISSZrr , X86::VCOMISSrr },
- { X86::VCVTSD2SI64Zrm , X86::VCVTSD2SI64rm },
- { X86::VCVTSD2SI64Zrr , X86::VCVTSD2SI64rr },
- { X86::VCVTSD2SIZrm , X86::VCVTSD2SIrm },
- { X86::VCVTSD2SIZrr , X86::VCVTSD2SIrr },
- { X86::VCVTSD2SSZrm , X86::VCVTSD2SSrm },
- { X86::VCVTSD2SSZrr , X86::VCVTSD2SSrr },
- { X86::VCVTSI2SDZrm , X86::VCVTSI2SDrm },
- { X86::VCVTSI2SDZrm_Int , X86::Int_VCVTSI2SDrm },
- { X86::VCVTSI2SDZrr , X86::VCVTSI2SDrr },
- { X86::VCVTSI2SDZrr_Int , X86::Int_VCVTSI2SDrr },
- { X86::VCVTSI2SSZrm , X86::VCVTSI2SSrm },
- { X86::VCVTSI2SSZrm_Int , X86::Int_VCVTSI2SSrm },
- { X86::VCVTSI2SSZrr , X86::VCVTSI2SSrr },
- { X86::VCVTSI2SSZrr_Int , X86::Int_VCVTSI2SSrr },
- { X86::VCVTSS2SDZrm , X86::VCVTSS2SDrm },
- { X86::VCVTSS2SDZrr , X86::VCVTSS2SDrr },
- { X86::VCVTSS2SI64Zrm , X86::VCVTSS2SI64rm },
- { X86::VCVTSS2SI64Zrr , X86::VCVTSS2SI64rr },
- { X86::VCVTSS2SIZrm , X86::VCVTSS2SIrm },
- { X86::VCVTSS2SIZrr , X86::VCVTSS2SIrr },
- { X86::VCVTTSD2SI64Zrm , X86::VCVTTSD2SI64rm },
- { X86::VCVTTSD2SI64Zrm_Int , X86::Int_VCVTTSD2SI64rm },
- { X86::VCVTTSD2SI64Zrr , X86::VCVTTSD2SI64rr },
- { X86::VCVTTSD2SI64Zrr_Int , X86::Int_VCVTTSD2SI64rr },
- { X86::VCVTTSD2SIZrm , X86::VCVTTSD2SIrm },
- { X86::VCVTTSD2SIZrm_Int , X86::Int_VCVTTSD2SIrm },
- { X86::VCVTTSD2SIZrr , X86::VCVTTSD2SIrr },
- { X86::VCVTTSD2SIZrr_Int , X86::Int_VCVTTSD2SIrr },
- { X86::VCVTTSS2SI64Zrm , X86::VCVTTSS2SI64rm },
- { X86::VCVTTSS2SI64Zrm_Int , X86::Int_VCVTTSS2SI64rm },
- { X86::VCVTTSS2SI64Zrr , X86::VCVTTSS2SI64rr },
- { X86::VCVTTSS2SI64Zrr_Int , X86::Int_VCVTTSS2SI64rr },
- { X86::VCVTTSS2SIZrm , X86::VCVTTSS2SIrm },
- { X86::VCVTTSS2SIZrm_Int , X86::Int_VCVTTSS2SIrm },
- { X86::VCVTTSS2SIZrr , X86::VCVTTSS2SIrr },
- { X86::VCVTTSS2SIZrr_Int , X86::Int_VCVTTSS2SIrr },
- { X86::VDIVSDZrm , X86::VDIVSDrm },
- { X86::VDIVSDZrm_Int , X86::VDIVSDrm_Int },
- { X86::VDIVSDZrr , X86::VDIVSDrr },
- { X86::VDIVSDZrr_Int , X86::VDIVSDrr_Int },
- { X86::VDIVSSZrm , X86::VDIVSSrm },
- { X86::VDIVSSZrm_Int , X86::VDIVSSrm_Int },
- { X86::VDIVSSZrr , X86::VDIVSSrr },
- { X86::VDIVSSZrr_Int , X86::VDIVSSrr_Int },
- { X86::VFMADD132SDZm , X86::VFMADD132SDm },
- { X86::VFMADD132SDZm_Int , X86::VFMADD132SDm_Int },
- { X86::VFMADD132SDZr , X86::VFMADD132SDr },
- { X86::VFMADD132SDZr_Int , X86::VFMADD132SDr_Int },
- { X86::VFMADD132SSZm , X86::VFMADD132SSm },
- { X86::VFMADD132SSZm_Int , X86::VFMADD132SSm_Int },
- { X86::VFMADD132SSZr , X86::VFMADD132SSr },
- { X86::VFMADD132SSZr_Int , X86::VFMADD132SSr_Int },
- { X86::VFMADD213SDZm , X86::VFMADD213SDm },
- { X86::VFMADD213SDZm_Int , X86::VFMADD213SDm_Int },
- { X86::VFMADD213SDZr , X86::VFMADD213SDr },
- { X86::VFMADD213SDZr_Int , X86::VFMADD213SDr_Int },
- { X86::VFMADD213SSZm , X86::VFMADD213SSm },
- { X86::VFMADD213SSZm_Int , X86::VFMADD213SSm_Int },
- { X86::VFMADD213SSZr , X86::VFMADD213SSr },
- { X86::VFMADD213SSZr_Int , X86::VFMADD213SSr_Int },
- { X86::VFMADD231SDZm , X86::VFMADD231SDm },
- { X86::VFMADD231SDZm_Int , X86::VFMADD231SDm_Int },
- { X86::VFMADD231SDZr , X86::VFMADD231SDr },
- { X86::VFMADD231SDZr_Int , X86::VFMADD231SDr_Int },
- { X86::VFMADD231SSZm , X86::VFMADD231SSm },
- { X86::VFMADD231SSZm_Int , X86::VFMADD231SSm_Int },
- { X86::VFMADD231SSZr , X86::VFMADD231SSr },
- { X86::VFMADD231SSZr_Int , X86::VFMADD231SSr_Int },
- { X86::VFMSUB132SDZm , X86::VFMSUB132SDm },
- { X86::VFMSUB132SDZm_Int , X86::VFMSUB132SDm_Int },
- { X86::VFMSUB132SDZr , X86::VFMSUB132SDr },
- { X86::VFMSUB132SDZr_Int , X86::VFMSUB132SDr_Int },
- { X86::VFMSUB132SSZm , X86::VFMSUB132SSm },
- { X86::VFMSUB132SSZm_Int , X86::VFMSUB132SSm_Int },
- { X86::VFMSUB132SSZr , X86::VFMSUB132SSr },
- { X86::VFMSUB132SSZr_Int , X86::VFMSUB132SSr_Int },
- { X86::VFMSUB213SDZm , X86::VFMSUB213SDm },
- { X86::VFMSUB213SDZm_Int , X86::VFMSUB213SDm_Int },
- { X86::VFMSUB213SDZr , X86::VFMSUB213SDr },
- { X86::VFMSUB213SDZr_Int , X86::VFMSUB213SDr_Int },
- { X86::VFMSUB213SSZm , X86::VFMSUB213SSm },
- { X86::VFMSUB213SSZm_Int , X86::VFMSUB213SSm_Int },
- { X86::VFMSUB213SSZr , X86::VFMSUB213SSr },
- { X86::VFMSUB213SSZr_Int , X86::VFMSUB213SSr_Int },
- { X86::VFMSUB231SDZm , X86::VFMSUB231SDm },
- { X86::VFMSUB231SDZm_Int , X86::VFMSUB231SDm_Int },
- { X86::VFMSUB231SDZr , X86::VFMSUB231SDr },
- { X86::VFMSUB231SDZr_Int , X86::VFMSUB231SDr_Int },
- { X86::VFMSUB231SSZm , X86::VFMSUB231SSm },
- { X86::VFMSUB231SSZm_Int , X86::VFMSUB231SSm_Int },
- { X86::VFMSUB231SSZr , X86::VFMSUB231SSr },
- { X86::VFMSUB231SSZr_Int , X86::VFMSUB231SSr_Int },
- { X86::VFNMADD132SDZm , X86::VFNMADD132SDm },
- { X86::VFNMADD132SDZm_Int , X86::VFNMADD132SDm_Int },
- { X86::VFNMADD132SDZr , X86::VFNMADD132SDr },
- { X86::VFNMADD132SDZr_Int , X86::VFNMADD132SDr_Int },
- { X86::VFNMADD132SSZm , X86::VFNMADD132SSm },
- { X86::VFNMADD132SSZm_Int , X86::VFNMADD132SSm_Int },
- { X86::VFNMADD132SSZr , X86::VFNMADD132SSr },
- { X86::VFNMADD132SSZr_Int , X86::VFNMADD132SSr_Int },
- { X86::VFNMADD213SDZm , X86::VFNMADD213SDm },
- { X86::VFNMADD213SDZm_Int , X86::VFNMADD213SDm_Int },
- { X86::VFNMADD213SDZr , X86::VFNMADD213SDr },
- { X86::VFNMADD213SDZr_Int , X86::VFNMADD213SDr_Int },
- { X86::VFNMADD213SSZm , X86::VFNMADD213SSm },
- { X86::VFNMADD213SSZm_Int , X86::VFNMADD213SSm_Int },
- { X86::VFNMADD213SSZr , X86::VFNMADD213SSr },
- { X86::VFNMADD213SSZr_Int , X86::VFNMADD213SSr_Int },
- { X86::VFNMADD231SDZm , X86::VFNMADD231SDm },
- { X86::VFNMADD231SDZm_Int , X86::VFNMADD231SDm_Int },
- { X86::VFNMADD231SDZr , X86::VFNMADD231SDr },
- { X86::VFNMADD231SDZr_Int , X86::VFNMADD231SDr_Int },
- { X86::VFNMADD231SSZm , X86::VFNMADD231SSm },
- { X86::VFNMADD231SSZm_Int , X86::VFNMADD231SSm_Int },
- { X86::VFNMADD231SSZr , X86::VFNMADD231SSr },
- { X86::VFNMADD231SSZr_Int , X86::VFNMADD231SSr_Int },
- { X86::VFNMSUB132SDZm , X86::VFNMSUB132SDm },
- { X86::VFNMSUB132SDZm_Int , X86::VFNMSUB132SDm_Int },
- { X86::VFNMSUB132SDZr , X86::VFNMSUB132SDr },
- { X86::VFNMSUB132SDZr_Int , X86::VFNMSUB132SDr_Int },
- { X86::VFNMSUB132SSZm , X86::VFNMSUB132SSm },
- { X86::VFNMSUB132SSZm_Int , X86::VFNMSUB132SSm_Int },
- { X86::VFNMSUB132SSZr , X86::VFNMSUB132SSr },
- { X86::VFNMSUB132SSZr_Int , X86::VFNMSUB132SSr_Int },
- { X86::VFNMSUB213SDZm , X86::VFNMSUB213SDm },
- { X86::VFNMSUB213SDZm_Int , X86::VFNMSUB213SDm_Int },
- { X86::VFNMSUB213SDZr , X86::VFNMSUB213SDr },
- { X86::VFNMSUB213SDZr_Int , X86::VFNMSUB213SDr_Int },
- { X86::VFNMSUB213SSZm , X86::VFNMSUB213SSm },
- { X86::VFNMSUB213SSZm_Int , X86::VFNMSUB213SSm_Int },
- { X86::VFNMSUB213SSZr , X86::VFNMSUB213SSr },
- { X86::VFNMSUB213SSZr_Int , X86::VFNMSUB213SSr_Int },
- { X86::VFNMSUB231SDZm , X86::VFNMSUB231SDm },
- { X86::VFNMSUB231SDZm_Int , X86::VFNMSUB231SDm_Int },
- { X86::VFNMSUB231SDZr , X86::VFNMSUB231SDr },
- { X86::VFNMSUB231SDZr_Int , X86::VFNMSUB231SDr_Int },
- { X86::VFNMSUB231SSZm , X86::VFNMSUB231SSm },
- { X86::VFNMSUB231SSZm_Int , X86::VFNMSUB231SSm_Int },
- { X86::VFNMSUB231SSZr , X86::VFNMSUB231SSr },
- { X86::VFNMSUB231SSZr_Int , X86::VFNMSUB231SSr_Int },
- { X86::VMAXCSDZrm , X86::VMAXCSDrm },
- { X86::VMAXCSDZrr , X86::VMAXCSDrr },
- { X86::VMAXCSSZrm , X86::VMAXCSSrm },
- { X86::VMAXCSSZrr , X86::VMAXCSSrr },
- { X86::VMAXSDZrm , X86::VMAXSDrm },
- { X86::VMAXSDZrm_Int , X86::VMAXSDrm_Int },
- { X86::VMAXSDZrr , X86::VMAXSDrr },
- { X86::VMAXSDZrr_Int , X86::VMAXSDrr_Int },
- { X86::VMAXSSZrm , X86::VMAXSSrm },
- { X86::VMAXSSZrm_Int , X86::VMAXSSrm_Int },
- { X86::VMAXSSZrr , X86::VMAXSSrr },
- { X86::VMAXSSZrr_Int , X86::VMAXSSrr_Int },
- { X86::VMINCSDZrm , X86::VMINCSDrm },
- { X86::VMINCSDZrr , X86::VMINCSDrr },
- { X86::VMINCSSZrm , X86::VMINCSSrm },
- { X86::VMINCSSZrr , X86::VMINCSSrr },
- { X86::VMINSDZrm , X86::VMINSDrm },
- { X86::VMINSDZrm_Int , X86::VMINSDrm_Int },
- { X86::VMINSDZrr , X86::VMINSDrr },
- { X86::VMINSDZrr_Int , X86::VMINSDrr_Int },
- { X86::VMINSSZrm , X86::VMINSSrm },
- { X86::VMINSSZrm_Int , X86::VMINSSrm_Int },
- { X86::VMINSSZrr , X86::VMINSSrr },
- { X86::VMINSSZrr_Int , X86::VMINSSrr_Int },
- { X86::VMOV64toSDZrr , X86::VMOV64toSDrr },
- { X86::VMOVDI2SSZrm , X86::VMOVDI2SSrm },
- { X86::VMOVDI2SSZrr , X86::VMOVDI2SSrr },
- { X86::VMOVSDZmr , X86::VMOVSDmr },
- { X86::VMOVSDZrm , X86::VMOVSDrm },
- { X86::VMOVSDZrr , X86::VMOVSDrr },
- { X86::VMOVSSZmr , X86::VMOVSSmr },
- { X86::VMOVSSZrm , X86::VMOVSSrm },
- { X86::VMOVSSZrr , X86::VMOVSSrr },
- { X86::VMOVSSZrr_REV , X86::VMOVSSrr_REV },
- { X86::VMULSDZrm , X86::VMULSDrm },
- { X86::VMULSDZrm_Int , X86::VMULSDrm_Int },
- { X86::VMULSDZrr , X86::VMULSDrr },
- { X86::VMULSDZrr_Int , X86::VMULSDrr_Int },
- { X86::VMULSSZrm , X86::VMULSSrm },
- { X86::VMULSSZrm_Int , X86::VMULSSrm_Int },
- { X86::VMULSSZrr , X86::VMULSSrr },
- { X86::VMULSSZrr_Int , X86::VMULSSrr_Int },
- { X86::VSQRTSDZm , X86::VSQRTSDm },
- { X86::VSQRTSDZm_Int , X86::VSQRTSDm_Int },
- { X86::VSQRTSDZr , X86::VSQRTSDr },
- { X86::VSQRTSDZr_Int , X86::VSQRTSDr_Int },
- { X86::VSQRTSSZm , X86::VSQRTSSm },
- { X86::VSQRTSSZm_Int , X86::VSQRTSSm_Int },
- { X86::VSQRTSSZr , X86::VSQRTSSr },
- { X86::VSQRTSSZr_Int , X86::VSQRTSSr_Int },
- { X86::VSUBSDZrm , X86::VSUBSDrm },
- { X86::VSUBSDZrm_Int , X86::VSUBSDrm_Int },
- { X86::VSUBSDZrr , X86::VSUBSDrr },
- { X86::VSUBSDZrr_Int , X86::VSUBSDrr_Int },
- { X86::VSUBSSZrm , X86::VSUBSSrm },
- { X86::VSUBSSZrm_Int , X86::VSUBSSrm_Int },
- { X86::VSUBSSZrr , X86::VSUBSSrr },
- { X86::VSUBSSZrr_Int , X86::VSUBSSrr_Int },
- { X86::VUCOMISDZrm , X86::VUCOMISDrm },
- { X86::VUCOMISDZrr , X86::VUCOMISDrr },
- { X86::VUCOMISSZrm , X86::VUCOMISSrm },
- { X86::VUCOMISSZrr , X86::VUCOMISSrr },
-
- { X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr },
- { X86::VMOV64toSDZrr , X86::VMOV64toSDrr },
- { X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm },
- { X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr },
- { X86::VMOVLHPSZrr , X86::VMOVLHPSrr },
- { X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
- { X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr },
- { X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr },
- { X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr },
- { X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr },
- { X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm },
- { X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr },
-
- { X86::VPEXTRBZmr , X86::VPEXTRBmr },
- { X86::VPEXTRBZrr , X86::VPEXTRBrr },
- { X86::VPEXTRDZmr , X86::VPEXTRDmr },
- { X86::VPEXTRDZrr , X86::VPEXTRDrr },
- { X86::VPEXTRQZmr , X86::VPEXTRQmr },
- { X86::VPEXTRQZrr , X86::VPEXTRQrr },
- { X86::VPEXTRWZmr , X86::VPEXTRWmr },
- { X86::VPEXTRWZrr , X86::VPEXTRWri },
-
- { X86::VPINSRBZrm , X86::VPINSRBrm },
- { X86::VPINSRBZrr , X86::VPINSRBrr },
- { X86::VPINSRDZrm , X86::VPINSRDrm },
- { X86::VPINSRDZrr , X86::VPINSRDrr },
- { X86::VPINSRQZrm , X86::VPINSRQrm },
- { X86::VPINSRQZrr , X86::VPINSRQrr },
- { X86::VPINSRWZrm , X86::VPINSRWrmi },
- { X86::VPINSRWZrr , X86::VPINSRWrri },
-
- // EVEX 128 with corresponding VEX.
- { X86::VADDPDZ128rm , X86::VADDPDrm },
- { X86::VADDPDZ128rr , X86::VADDPDrr },
- { X86::VADDPSZ128rm , X86::VADDPSrm },
- { X86::VADDPSZ128rr , X86::VADDPSrr },
- { X86::VANDNPDZ128rm , X86::VANDNPDrm },
- { X86::VANDNPDZ128rr , X86::VANDNPDrr },
- { X86::VANDNPSZ128rm , X86::VANDNPSrm },
- { X86::VANDNPSZ128rr , X86::VANDNPSrr },
- { X86::VANDPDZ128rm , X86::VANDPDrm },
- { X86::VANDPDZ128rr , X86::VANDPDrr },
- { X86::VANDPSZ128rm , X86::VANDPSrm },
- { X86::VANDPSZ128rr , X86::VANDPSrr },
- { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm },
- { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr },
- { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr },
- { X86::VCVTDQ2PDZ128rm , X86::VCVTDQ2PDrm },
- { X86::VCVTDQ2PDZ128rr , X86::VCVTDQ2PDrr },
- { X86::VCVTDQ2PSZ128rm , X86::VCVTDQ2PSrm },
- { X86::VCVTDQ2PSZ128rr , X86::VCVTDQ2PSrr },
- { X86::VCVTPD2DQZ128rm , X86::VCVTPD2DQrm },
- { X86::VCVTPD2DQZ128rr , X86::VCVTPD2DQrr },
- { X86::VCVTPD2PSZ128rm , X86::VCVTPD2PSrm },
- { X86::VCVTPD2PSZ128rr , X86::VCVTPD2PSrr },
- { X86::VCVTPH2PSZ128rm , X86::VCVTPH2PSrm },
- { X86::VCVTPH2PSZ128rr , X86::VCVTPH2PSrr },
- { X86::VCVTPS2DQZ128rm , X86::VCVTPS2DQrm },
- { X86::VCVTPS2DQZ128rr , X86::VCVTPS2DQrr },
- { X86::VCVTPS2PDZ128rm , X86::VCVTPS2PDrm },
- { X86::VCVTPS2PDZ128rr , X86::VCVTPS2PDrr },
- { X86::VCVTPS2PHZ128mr , X86::VCVTPS2PHmr },
- { X86::VCVTPS2PHZ128rr , X86::VCVTPS2PHrr },
- { X86::VCVTTPD2DQZ128rm , X86::VCVTTPD2DQrm },
- { X86::VCVTTPD2DQZ128rr , X86::VCVTTPD2DQrr },
- { X86::VCVTTPS2DQZ128rm , X86::VCVTTPS2DQrm },
- { X86::VCVTTPS2DQZ128rr , X86::VCVTTPS2DQrr },
- { X86::VDIVPDZ128rm , X86::VDIVPDrm },
- { X86::VDIVPDZ128rr , X86::VDIVPDrr },
- { X86::VDIVPSZ128rm , X86::VDIVPSrm },
- { X86::VDIVPSZ128rr , X86::VDIVPSrr },
- { X86::VFMADD132PDZ128m , X86::VFMADD132PDm },
- { X86::VFMADD132PDZ128r , X86::VFMADD132PDr },
- { X86::VFMADD132PSZ128m , X86::VFMADD132PSm },
- { X86::VFMADD132PSZ128r , X86::VFMADD132PSr },
- { X86::VFMADD213PDZ128m , X86::VFMADD213PDm },
- { X86::VFMADD213PDZ128r , X86::VFMADD213PDr },
- { X86::VFMADD213PSZ128m , X86::VFMADD213PSm },
- { X86::VFMADD213PSZ128r , X86::VFMADD213PSr },
- { X86::VFMADD231PDZ128m , X86::VFMADD231PDm },
- { X86::VFMADD231PDZ128r , X86::VFMADD231PDr },
- { X86::VFMADD231PSZ128m , X86::VFMADD231PSm },
- { X86::VFMADD231PSZ128r , X86::VFMADD231PSr },
- { X86::VFMADDSUB132PDZ128m , X86::VFMADDSUB132PDm },
- { X86::VFMADDSUB132PDZ128r , X86::VFMADDSUB132PDr },
- { X86::VFMADDSUB132PSZ128m , X86::VFMADDSUB132PSm },
- { X86::VFMADDSUB132PSZ128r , X86::VFMADDSUB132PSr },
- { X86::VFMADDSUB213PDZ128m , X86::VFMADDSUB213PDm },
- { X86::VFMADDSUB213PDZ128r , X86::VFMADDSUB213PDr },
- { X86::VFMADDSUB213PSZ128m , X86::VFMADDSUB213PSm },
- { X86::VFMADDSUB213PSZ128r , X86::VFMADDSUB213PSr },
- { X86::VFMADDSUB231PDZ128m , X86::VFMADDSUB231PDm },
- { X86::VFMADDSUB231PDZ128r , X86::VFMADDSUB231PDr },
- { X86::VFMADDSUB231PSZ128m , X86::VFMADDSUB231PSm },
- { X86::VFMADDSUB231PSZ128r , X86::VFMADDSUB231PSr },
- { X86::VFMSUB132PDZ128m , X86::VFMSUB132PDm },
- { X86::VFMSUB132PDZ128r , X86::VFMSUB132PDr },
- { X86::VFMSUB132PSZ128m , X86::VFMSUB132PSm },
- { X86::VFMSUB132PSZ128r , X86::VFMSUB132PSr },
- { X86::VFMSUB213PDZ128m , X86::VFMSUB213PDm },
- { X86::VFMSUB213PDZ128r , X86::VFMSUB213PDr },
- { X86::VFMSUB213PSZ128m , X86::VFMSUB213PSm },
- { X86::VFMSUB213PSZ128r , X86::VFMSUB213PSr },
- { X86::VFMSUB231PDZ128m , X86::VFMSUB231PDm },
- { X86::VFMSUB231PDZ128r , X86::VFMSUB231PDr },
- { X86::VFMSUB231PSZ128m , X86::VFMSUB231PSm },
- { X86::VFMSUB231PSZ128r , X86::VFMSUB231PSr },
- { X86::VFMSUBADD132PDZ128m , X86::VFMSUBADD132PDm },
- { X86::VFMSUBADD132PDZ128r , X86::VFMSUBADD132PDr },
- { X86::VFMSUBADD132PSZ128m , X86::VFMSUBADD132PSm },
- { X86::VFMSUBADD132PSZ128r , X86::VFMSUBADD132PSr },
- { X86::VFMSUBADD213PDZ128m , X86::VFMSUBADD213PDm },
- { X86::VFMSUBADD213PDZ128r , X86::VFMSUBADD213PDr },
- { X86::VFMSUBADD213PSZ128m , X86::VFMSUBADD213PSm },
- { X86::VFMSUBADD213PSZ128r , X86::VFMSUBADD213PSr },
- { X86::VFMSUBADD231PDZ128m , X86::VFMSUBADD231PDm },
- { X86::VFMSUBADD231PDZ128r , X86::VFMSUBADD231PDr },
- { X86::VFMSUBADD231PSZ128m , X86::VFMSUBADD231PSm },
- { X86::VFMSUBADD231PSZ128r , X86::VFMSUBADD231PSr },
- { X86::VFNMADD132PDZ128m , X86::VFNMADD132PDm },
- { X86::VFNMADD132PDZ128r , X86::VFNMADD132PDr },
- { X86::VFNMADD132PSZ128m , X86::VFNMADD132PSm },
- { X86::VFNMADD132PSZ128r , X86::VFNMADD132PSr },
- { X86::VFNMADD213PDZ128m , X86::VFNMADD213PDm },
- { X86::VFNMADD213PDZ128r , X86::VFNMADD213PDr },
- { X86::VFNMADD213PSZ128m , X86::VFNMADD213PSm },
- { X86::VFNMADD213PSZ128r , X86::VFNMADD213PSr },
- { X86::VFNMADD231PDZ128m , X86::VFNMADD231PDm },
- { X86::VFNMADD231PDZ128r , X86::VFNMADD231PDr },
- { X86::VFNMADD231PSZ128m , X86::VFNMADD231PSm },
- { X86::VFNMADD231PSZ128r , X86::VFNMADD231PSr },
- { X86::VFNMSUB132PDZ128m , X86::VFNMSUB132PDm },
- { X86::VFNMSUB132PDZ128r , X86::VFNMSUB132PDr },
- { X86::VFNMSUB132PSZ128m , X86::VFNMSUB132PSm },
- { X86::VFNMSUB132PSZ128r , X86::VFNMSUB132PSr },
- { X86::VFNMSUB213PDZ128m , X86::VFNMSUB213PDm },
- { X86::VFNMSUB213PDZ128r , X86::VFNMSUB213PDr },
- { X86::VFNMSUB213PSZ128m , X86::VFNMSUB213PSm },
- { X86::VFNMSUB213PSZ128r , X86::VFNMSUB213PSr },
- { X86::VFNMSUB231PDZ128m , X86::VFNMSUB231PDm },
- { X86::VFNMSUB231PDZ128r , X86::VFNMSUB231PDr },
- { X86::VFNMSUB231PSZ128m , X86::VFNMSUB231PSm },
- { X86::VFNMSUB231PSZ128r , X86::VFNMSUB231PSr },
- { X86::VMAXCPDZ128rm , X86::VMAXCPDrm },
- { X86::VMAXCPDZ128rr , X86::VMAXCPDrr },
- { X86::VMAXCPSZ128rm , X86::VMAXCPSrm },
- { X86::VMAXCPSZ128rr , X86::VMAXCPSrr },
- { X86::VMAXPDZ128rm , X86::VMAXPDrm },
- { X86::VMAXPDZ128rr , X86::VMAXPDrr },
- { X86::VMAXPSZ128rm , X86::VMAXPSrm },
- { X86::VMAXPSZ128rr , X86::VMAXPSrr },
- { X86::VMINCPDZ128rm , X86::VMINCPDrm },
- { X86::VMINCPDZ128rr , X86::VMINCPDrr },
- { X86::VMINCPSZ128rm , X86::VMINCPSrm },
- { X86::VMINCPSZ128rr , X86::VMINCPSrr },
- { X86::VMINPDZ128rm , X86::VMINPDrm },
- { X86::VMINPDZ128rr , X86::VMINPDrr },
- { X86::VMINPSZ128rm , X86::VMINPSrm },
- { X86::VMINPSZ128rr , X86::VMINPSrr },
- { X86::VMOVAPDZ128mr , X86::VMOVAPDmr },
- { X86::VMOVAPDZ128rm , X86::VMOVAPDrm },
- { X86::VMOVAPDZ128rr , X86::VMOVAPDrr },
- { X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV },
- { X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
- { X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
- { X86::VMOVAPSZ128rr , X86::VMOVAPSrr },
- { X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV },
- { X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm },
- { X86::VMOVDDUPZ128rr , X86::VMOVDDUPrr },
- { X86::VMOVDQA32Z128mr , X86::VMOVDQAmr },
- { X86::VMOVDQA32Z128rm , X86::VMOVDQArm },
- { X86::VMOVDQA32Z128rr , X86::VMOVDQArr },
- { X86::VMOVDQA32Z128rr_REV , X86::VMOVDQArr_REV },
- { X86::VMOVDQA64Z128mr , X86::VMOVDQAmr },
- { X86::VMOVDQA64Z128rm , X86::VMOVDQArm },
- { X86::VMOVDQA64Z128rr , X86::VMOVDQArr },
- { X86::VMOVDQA64Z128rr_REV , X86::VMOVDQArr_REV },
- { X86::VMOVDQU16Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU16Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU16Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU16Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVDQU32Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU32Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU32Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU32Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVDQU64Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU64Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU64Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU64Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVDQU8Z128mr , X86::VMOVDQUmr },
- { X86::VMOVDQU8Z128rm , X86::VMOVDQUrm },
- { X86::VMOVDQU8Z128rr , X86::VMOVDQUrr },
- { X86::VMOVDQU8Z128rr_REV , X86::VMOVDQUrr_REV },
- { X86::VMOVHPDZ128mr , X86::VMOVHPDmr },
- { X86::VMOVHPDZ128rm , X86::VMOVHPDrm },
- { X86::VMOVHPSZ128mr , X86::VMOVHPSmr },
- { X86::VMOVHPSZ128rm , X86::VMOVHPSrm },
- { X86::VMOVLPDZ128mr , X86::VMOVLPDmr },
- { X86::VMOVLPDZ128rm , X86::VMOVLPDrm },
- { X86::VMOVLPSZ128mr , X86::VMOVLPSmr },
- { X86::VMOVLPSZ128rm , X86::VMOVLPSrm },
- { X86::VMOVNTDQAZ128rm , X86::VMOVNTDQArm },
- { X86::VMOVNTDQZ128mr , X86::VMOVNTDQmr },
- { X86::VMOVNTPDZ128mr , X86::VMOVNTPDmr },
- { X86::VMOVNTPSZ128mr , X86::VMOVNTPSmr },
- { X86::VMOVSHDUPZ128rm , X86::VMOVSHDUPrm },
- { X86::VMOVSHDUPZ128rr , X86::VMOVSHDUPrr },
- { X86::VMOVSLDUPZ128rm , X86::VMOVSLDUPrm },
- { X86::VMOVSLDUPZ128rr , X86::VMOVSLDUPrr },
- { X86::VMOVUPDZ128mr , X86::VMOVUPDmr },
- { X86::VMOVUPDZ128rm , X86::VMOVUPDrm },
- { X86::VMOVUPDZ128rr , X86::VMOVUPDrr },
- { X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV },
- { X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
- { X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
- { X86::VMOVUPSZ128rr , X86::VMOVUPSrr },
- { X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV },
- { X86::VMULPDZ128rm , X86::VMULPDrm },
- { X86::VMULPDZ128rr , X86::VMULPDrr },
- { X86::VMULPSZ128rm , X86::VMULPSrm },
- { X86::VMULPSZ128rr , X86::VMULPSrr },
- { X86::VORPDZ128rm , X86::VORPDrm },
- { X86::VORPDZ128rr , X86::VORPDrr },
- { X86::VORPSZ128rm , X86::VORPSrm },
- { X86::VORPSZ128rr , X86::VORPSrr },
- { X86::VPABSBZ128rm , X86::VPABSBrm },
- { X86::VPABSBZ128rr , X86::VPABSBrr },
- { X86::VPABSDZ128rm , X86::VPABSDrm },
- { X86::VPABSDZ128rr , X86::VPABSDrr },
- { X86::VPABSWZ128rm , X86::VPABSWrm },
- { X86::VPABSWZ128rr , X86::VPABSWrr },
- { X86::VPACKSSDWZ128rm , X86::VPACKSSDWrm },
- { X86::VPACKSSDWZ128rr , X86::VPACKSSDWrr },
- { X86::VPACKSSWBZ128rm , X86::VPACKSSWBrm },
- { X86::VPACKSSWBZ128rr , X86::VPACKSSWBrr },
- { X86::VPACKUSDWZ128rm , X86::VPACKUSDWrm },
- { X86::VPACKUSDWZ128rr , X86::VPACKUSDWrr },
- { X86::VPACKUSWBZ128rm , X86::VPACKUSWBrm },
- { X86::VPACKUSWBZ128rr , X86::VPACKUSWBrr },
- { X86::VPADDBZ128rm , X86::VPADDBrm },
- { X86::VPADDBZ128rr , X86::VPADDBrr },
- { X86::VPADDDZ128rm , X86::VPADDDrm },
- { X86::VPADDDZ128rr , X86::VPADDDrr },
- { X86::VPADDQZ128rm , X86::VPADDQrm },
- { X86::VPADDQZ128rr , X86::VPADDQrr },
- { X86::VPADDSBZ128rm , X86::VPADDSBrm },
- { X86::VPADDSBZ128rr , X86::VPADDSBrr },
- { X86::VPADDSWZ128rm , X86::VPADDSWrm },
- { X86::VPADDSWZ128rr , X86::VPADDSWrr },
- { X86::VPADDUSBZ128rm , X86::VPADDUSBrm },
- { X86::VPADDUSBZ128rr , X86::VPADDUSBrr },
- { X86::VPADDUSWZ128rm , X86::VPADDUSWrm },
- { X86::VPADDUSWZ128rr , X86::VPADDUSWrr },
- { X86::VPADDWZ128rm , X86::VPADDWrm },
- { X86::VPADDWZ128rr , X86::VPADDWrr },
- { X86::VPALIGNRZ128rmi , X86::VPALIGNRrmi },
- { X86::VPALIGNRZ128rri , X86::VPALIGNRrri },
- { X86::VPANDDZ128rm , X86::VPANDrm },
- { X86::VPANDDZ128rr , X86::VPANDrr },
- { X86::VPANDQZ128rm , X86::VPANDrm },
- { X86::VPANDQZ128rr , X86::VPANDrr },
- { X86::VPAVGBZ128rm , X86::VPAVGBrm },
- { X86::VPAVGBZ128rr , X86::VPAVGBrr },
- { X86::VPAVGWZ128rm , X86::VPAVGWrm },
- { X86::VPAVGWZ128rr , X86::VPAVGWrr },
- { X86::VPBROADCASTBZ128m , X86::VPBROADCASTBrm },
- { X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr },
- { X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm },
- { X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr },
- { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
- { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
- { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
- { X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr },
- { X86::VPERMILPDZ128mi , X86::VPERMILPDmi },
- { X86::VPERMILPDZ128ri , X86::VPERMILPDri },
- { X86::VPERMILPDZ128rm , X86::VPERMILPDrm },
- { X86::VPERMILPDZ128rr , X86::VPERMILPDrr },
- { X86::VPERMILPSZ128mi , X86::VPERMILPSmi },
- { X86::VPERMILPSZ128ri , X86::VPERMILPSri },
- { X86::VPERMILPSZ128rm , X86::VPERMILPSrm },
- { X86::VPERMILPSZ128rr , X86::VPERMILPSrr },
- { X86::VPMADDUBSWZ128rm , X86::VPMADDUBSWrm },
- { X86::VPMADDUBSWZ128rr , X86::VPMADDUBSWrr },
- { X86::VPMADDWDZ128rm , X86::VPMADDWDrm },
- { X86::VPMADDWDZ128rr , X86::VPMADDWDrr },
- { X86::VPMAXSBZ128rm , X86::VPMAXSBrm },
- { X86::VPMAXSBZ128rr , X86::VPMAXSBrr },
- { X86::VPMAXSDZ128rm , X86::VPMAXSDrm },
- { X86::VPMAXSDZ128rr , X86::VPMAXSDrr },
- { X86::VPMAXSWZ128rm , X86::VPMAXSWrm },
- { X86::VPMAXSWZ128rr , X86::VPMAXSWrr },
- { X86::VPMAXUBZ128rm , X86::VPMAXUBrm },
- { X86::VPMAXUBZ128rr , X86::VPMAXUBrr },
- { X86::VPMAXUDZ128rm , X86::VPMAXUDrm },
- { X86::VPMAXUDZ128rr , X86::VPMAXUDrr },
- { X86::VPMAXUWZ128rm , X86::VPMAXUWrm },
- { X86::VPMAXUWZ128rr , X86::VPMAXUWrr },
- { X86::VPMINSBZ128rm , X86::VPMINSBrm },
- { X86::VPMINSBZ128rr , X86::VPMINSBrr },
- { X86::VPMINSDZ128rm , X86::VPMINSDrm },
- { X86::VPMINSDZ128rr , X86::VPMINSDrr },
- { X86::VPMINSWZ128rm , X86::VPMINSWrm },
- { X86::VPMINSWZ128rr , X86::VPMINSWrr },
- { X86::VPMINUBZ128rm , X86::VPMINUBrm },
- { X86::VPMINUBZ128rr , X86::VPMINUBrr },
- { X86::VPMINUDZ128rm , X86::VPMINUDrm },
- { X86::VPMINUDZ128rr , X86::VPMINUDrr },
- { X86::VPMINUWZ128rm , X86::VPMINUWrm },
- { X86::VPMINUWZ128rr , X86::VPMINUWrr },
- { X86::VPMOVSXBDZ128rm , X86::VPMOVSXBDrm },
- { X86::VPMOVSXBDZ128rr , X86::VPMOVSXBDrr },
- { X86::VPMOVSXBQZ128rm , X86::VPMOVSXBQrm },
- { X86::VPMOVSXBQZ128rr , X86::VPMOVSXBQrr },
- { X86::VPMOVSXBWZ128rm , X86::VPMOVSXBWrm },
- { X86::VPMOVSXBWZ128rr , X86::VPMOVSXBWrr },
- { X86::VPMOVSXDQZ128rm , X86::VPMOVSXDQrm },
- { X86::VPMOVSXDQZ128rr , X86::VPMOVSXDQrr },
- { X86::VPMOVSXWDZ128rm , X86::VPMOVSXWDrm },
- { X86::VPMOVSXWDZ128rr , X86::VPMOVSXWDrr },
- { X86::VPMOVSXWQZ128rm , X86::VPMOVSXWQrm },
- { X86::VPMOVSXWQZ128rr , X86::VPMOVSXWQrr },
- { X86::VPMOVZXBDZ128rm , X86::VPMOVZXBDrm },
- { X86::VPMOVZXBDZ128rr , X86::VPMOVZXBDrr },
- { X86::VPMOVZXBQZ128rm , X86::VPMOVZXBQrm },
- { X86::VPMOVZXBQZ128rr , X86::VPMOVZXBQrr },
- { X86::VPMOVZXBWZ128rm , X86::VPMOVZXBWrm },
- { X86::VPMOVZXBWZ128rr , X86::VPMOVZXBWrr },
- { X86::VPMOVZXDQZ128rm , X86::VPMOVZXDQrm },
- { X86::VPMOVZXDQZ128rr , X86::VPMOVZXDQrr },
- { X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm },
- { X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr },
- { X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm },
- { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
- { X86::VPMULDQZ128rm , X86::VPMULDQrm },
- { X86::VPMULDQZ128rr , X86::VPMULDQrr },
- { X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm },
- { X86::VPMULHRSWZ128rr , X86::VPMULHRSWrr },
- { X86::VPMULHUWZ128rm , X86::VPMULHUWrm },
- { X86::VPMULHUWZ128rr , X86::VPMULHUWrr },
- { X86::VPMULHWZ128rm , X86::VPMULHWrm },
- { X86::VPMULHWZ128rr , X86::VPMULHWrr },
- { X86::VPMULLDZ128rm , X86::VPMULLDrm },
- { X86::VPMULLDZ128rr , X86::VPMULLDrr },
- { X86::VPMULLWZ128rm , X86::VPMULLWrm },
- { X86::VPMULLWZ128rr , X86::VPMULLWrr },
- { X86::VPMULUDQZ128rm , X86::VPMULUDQrm },
- { X86::VPMULUDQZ128rr , X86::VPMULUDQrr },
- { X86::VPORDZ128rm , X86::VPORrm },
- { X86::VPORDZ128rr , X86::VPORrr },
- { X86::VPORQZ128rm , X86::VPORrm },
- { X86::VPORQZ128rr , X86::VPORrr },
- { X86::VPSADBWZ128rm , X86::VPSADBWrm },
- { X86::VPSADBWZ128rr , X86::VPSADBWrr },
- { X86::VPSHUFBZ128rm , X86::VPSHUFBrm },
- { X86::VPSHUFBZ128rr , X86::VPSHUFBrr },
- { X86::VPSHUFDZ128mi , X86::VPSHUFDmi },
- { X86::VPSHUFDZ128ri , X86::VPSHUFDri },
- { X86::VPSHUFHWZ128mi , X86::VPSHUFHWmi },
- { X86::VPSHUFHWZ128ri , X86::VPSHUFHWri },
- { X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi },
- { X86::VPSHUFLWZ128ri , X86::VPSHUFLWri },
- { X86::VPSLLDQZ128rr , X86::VPSLLDQri },
- { X86::VPSLLDZ128ri , X86::VPSLLDri },
- { X86::VPSLLDZ128rm , X86::VPSLLDrm },
- { X86::VPSLLDZ128rr , X86::VPSLLDrr },
- { X86::VPSLLQZ128ri , X86::VPSLLQri },
- { X86::VPSLLQZ128rm , X86::VPSLLQrm },
- { X86::VPSLLQZ128rr , X86::VPSLLQrr },
- { X86::VPSLLVDZ128rm , X86::VPSLLVDrm },
- { X86::VPSLLVDZ128rr , X86::VPSLLVDrr },
- { X86::VPSLLVQZ128rm , X86::VPSLLVQrm },
- { X86::VPSLLVQZ128rr , X86::VPSLLVQrr },
- { X86::VPSLLWZ128ri , X86::VPSLLWri },
- { X86::VPSLLWZ128rm , X86::VPSLLWrm },
- { X86::VPSLLWZ128rr , X86::VPSLLWrr },
- { X86::VPSRADZ128ri , X86::VPSRADri },
- { X86::VPSRADZ128rm , X86::VPSRADrm },
- { X86::VPSRADZ128rr , X86::VPSRADrr },
- { X86::VPSRAVDZ128rm , X86::VPSRAVDrm },
- { X86::VPSRAVDZ128rr , X86::VPSRAVDrr },
- { X86::VPSRAWZ128ri , X86::VPSRAWri },
- { X86::VPSRAWZ128rm , X86::VPSRAWrm },
- { X86::VPSRAWZ128rr , X86::VPSRAWrr },
- { X86::VPSRLDQZ128rr , X86::VPSRLDQri },
- { X86::VPSRLDZ128ri , X86::VPSRLDri },
- { X86::VPSRLDZ128rm , X86::VPSRLDrm },
- { X86::VPSRLDZ128rr , X86::VPSRLDrr },
- { X86::VPSRLQZ128ri , X86::VPSRLQri },
- { X86::VPSRLQZ128rm , X86::VPSRLQrm },
- { X86::VPSRLQZ128rr , X86::VPSRLQrr },
- { X86::VPSRLVDZ128rm , X86::VPSRLVDrm },
- { X86::VPSRLVDZ128rr , X86::VPSRLVDrr },
- { X86::VPSRLVQZ128rm , X86::VPSRLVQrm },
- { X86::VPSRLVQZ128rr , X86::VPSRLVQrr },
- { X86::VPSRLWZ128ri , X86::VPSRLWri },
- { X86::VPSRLWZ128rm , X86::VPSRLWrm },
- { X86::VPSRLWZ128rr , X86::VPSRLWrr },
- { X86::VPSUBBZ128rm , X86::VPSUBBrm },
- { X86::VPSUBBZ128rr , X86::VPSUBBrr },
- { X86::VPSUBDZ128rm , X86::VPSUBDrm },
- { X86::VPSUBDZ128rr , X86::VPSUBDrr },
- { X86::VPSUBQZ128rm , X86::VPSUBQrm },
- { X86::VPSUBQZ128rr , X86::VPSUBQrr },
- { X86::VPSUBSBZ128rm , X86::VPSUBSBrm },
- { X86::VPSUBSBZ128rr , X86::VPSUBSBrr },
- { X86::VPSUBSWZ128rm , X86::VPSUBSWrm },
- { X86::VPSUBSWZ128rr , X86::VPSUBSWrr },
- { X86::VPSUBUSBZ128rm , X86::VPSUBUSBrm },
- { X86::VPSUBUSBZ128rr , X86::VPSUBUSBrr },
- { X86::VPSUBUSWZ128rm , X86::VPSUBUSWrm },
- { X86::VPSUBUSWZ128rr , X86::VPSUBUSWrr },
- { X86::VPSUBWZ128rm , X86::VPSUBWrm },
- { X86::VPSUBWZ128rr , X86::VPSUBWrr },
- { X86::VPUNPCKHBWZ128rm , X86::VPUNPCKHBWrm },
- { X86::VPUNPCKHBWZ128rr , X86::VPUNPCKHBWrr },
- { X86::VPUNPCKHDQZ128rm , X86::VPUNPCKHDQrm },
- { X86::VPUNPCKHDQZ128rr , X86::VPUNPCKHDQrr },
- { X86::VPUNPCKHQDQZ128rm , X86::VPUNPCKHQDQrm },
- { X86::VPUNPCKHQDQZ128rr , X86::VPUNPCKHQDQrr },
- { X86::VPUNPCKHWDZ128rm , X86::VPUNPCKHWDrm },
- { X86::VPUNPCKHWDZ128rr , X86::VPUNPCKHWDrr },
- { X86::VPUNPCKLBWZ128rm , X86::VPUNPCKLBWrm },
- { X86::VPUNPCKLBWZ128rr , X86::VPUNPCKLBWrr },
- { X86::VPUNPCKLDQZ128rm , X86::VPUNPCKLDQrm },
- { X86::VPUNPCKLDQZ128rr , X86::VPUNPCKLDQrr },
- { X86::VPUNPCKLQDQZ128rm , X86::VPUNPCKLQDQrm },
- { X86::VPUNPCKLQDQZ128rr , X86::VPUNPCKLQDQrr },
- { X86::VPUNPCKLWDZ128rm , X86::VPUNPCKLWDrm },
- { X86::VPUNPCKLWDZ128rr , X86::VPUNPCKLWDrr },
- { X86::VPXORDZ128rm , X86::VPXORrm },
- { X86::VPXORDZ128rr , X86::VPXORrr },
- { X86::VPXORQZ128rm , X86::VPXORrm },
- { X86::VPXORQZ128rr , X86::VPXORrr },
- { X86::VSHUFPDZ128rmi , X86::VSHUFPDrmi },
- { X86::VSHUFPDZ128rri , X86::VSHUFPDrri },
- { X86::VSHUFPSZ128rmi , X86::VSHUFPSrmi },
- { X86::VSHUFPSZ128rri , X86::VSHUFPSrri },
- { X86::VSQRTPDZ128m , X86::VSQRTPDm },
- { X86::VSQRTPDZ128r , X86::VSQRTPDr },
- { X86::VSQRTPSZ128m , X86::VSQRTPSm },
- { X86::VSQRTPSZ128r , X86::VSQRTPSr },
- { X86::VSUBPDZ128rm , X86::VSUBPDrm },
- { X86::VSUBPDZ128rr , X86::VSUBPDrr },
- { X86::VSUBPSZ128rm , X86::VSUBPSrm },
- { X86::VSUBPSZ128rr , X86::VSUBPSrr },
- { X86::VUNPCKHPDZ128rm , X86::VUNPCKHPDrm },
- { X86::VUNPCKHPDZ128rr , X86::VUNPCKHPDrr },
- { X86::VUNPCKHPSZ128rm , X86::VUNPCKHPSrm },
- { X86::VUNPCKHPSZ128rr , X86::VUNPCKHPSrr },
- { X86::VUNPCKLPDZ128rm , X86::VUNPCKLPDrm },
- { X86::VUNPCKLPDZ128rr , X86::VUNPCKLPDrr },
- { X86::VUNPCKLPSZ128rm , X86::VUNPCKLPSrm },
- { X86::VUNPCKLPSZ128rr , X86::VUNPCKLPSrr },
- { X86::VXORPDZ128rm , X86::VXORPDrm },
- { X86::VXORPDZ128rr , X86::VXORPDrr },
- { X86::VXORPSZ128rm , X86::VXORPSrm },
- { X86::VXORPSZ128rr , X86::VXORPSrr },
-};
-
-
-// X86 EVEX encoded instructions that have a VEX 256 encoding
-// (table format: <EVEX opcode, VEX-256 opcode>).
- static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
- { X86::VADDPDZ256rm , X86::VADDPDYrm },
- { X86::VADDPDZ256rr , X86::VADDPDYrr },
- { X86::VADDPSZ256rm , X86::VADDPSYrm },
- { X86::VADDPSZ256rr , X86::VADDPSYrr },
- { X86::VANDNPDZ256rm , X86::VANDNPDYrm },
- { X86::VANDNPDZ256rr , X86::VANDNPDYrr },
- { X86::VANDNPSZ256rm , X86::VANDNPSYrm },
- { X86::VANDNPSZ256rr , X86::VANDNPSYrr },
- { X86::VANDPDZ256rm , X86::VANDPDYrm },
- { X86::VANDPDZ256rr , X86::VANDPDYrr },
- { X86::VANDPSZ256rm , X86::VANDPSYrm },
- { X86::VANDPSZ256rr , X86::VANDPSYrr },
- { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
- { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
- { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
- { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm },
- { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
- { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr },
- { X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm },
- { X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr },
- { X86::VCVTDQ2PSZ256rm , X86::VCVTDQ2PSYrm },
- { X86::VCVTDQ2PSZ256rr , X86::VCVTDQ2PSYrr },
- { X86::VCVTPD2DQZ256rm , X86::VCVTPD2DQYrm },
- { X86::VCVTPD2DQZ256rr , X86::VCVTPD2DQYrr },
- { X86::VCVTPD2PSZ256rm , X86::VCVTPD2PSYrm },
- { X86::VCVTPD2PSZ256rr , X86::VCVTPD2PSYrr },
- { X86::VCVTPH2PSZ256rm , X86::VCVTPH2PSYrm },
- { X86::VCVTPH2PSZ256rr , X86::VCVTPH2PSYrr },
- { X86::VCVTPS2DQZ256rm , X86::VCVTPS2DQYrm },
- { X86::VCVTPS2DQZ256rr , X86::VCVTPS2DQYrr },
- { X86::VCVTPS2PDZ256rm , X86::VCVTPS2PDYrm },
- { X86::VCVTPS2PDZ256rr , X86::VCVTPS2PDYrr },
- { X86::VCVTPS2PHZ256mr , X86::VCVTPS2PHYmr },
- { X86::VCVTPS2PHZ256rr , X86::VCVTPS2PHYrr },
- { X86::VCVTTPD2DQZ256rm , X86::VCVTTPD2DQYrm },
- { X86::VCVTTPD2DQZ256rr , X86::VCVTTPD2DQYrr },
- { X86::VCVTTPS2DQZ256rm , X86::VCVTTPS2DQYrm },
- { X86::VCVTTPS2DQZ256rr , X86::VCVTTPS2DQYrr },
- { X86::VDIVPDZ256rm , X86::VDIVPDYrm },
- { X86::VDIVPDZ256rr , X86::VDIVPDYrr },
- { X86::VDIVPSZ256rm , X86::VDIVPSYrm },
- { X86::VDIVPSZ256rr , X86::VDIVPSYrr },
- { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr },
- { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr },
- { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr },
- { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr },
- { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr },
- { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr },
- { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr },
- { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr },
- { X86::VFMADD132PDZ256m , X86::VFMADD132PDYm },
- { X86::VFMADD132PDZ256r , X86::VFMADD132PDYr },
- { X86::VFMADD132PSZ256m , X86::VFMADD132PSYm },
- { X86::VFMADD132PSZ256r , X86::VFMADD132PSYr },
- { X86::VFMADD213PDZ256m , X86::VFMADD213PDYm },
- { X86::VFMADD213PDZ256r , X86::VFMADD213PDYr },
- { X86::VFMADD213PSZ256m , X86::VFMADD213PSYm },
- { X86::VFMADD213PSZ256r , X86::VFMADD213PSYr },
- { X86::VFMADD231PDZ256m , X86::VFMADD231PDYm },
- { X86::VFMADD231PDZ256r , X86::VFMADD231PDYr },
- { X86::VFMADD231PSZ256m , X86::VFMADD231PSYm },
- { X86::VFMADD231PSZ256r , X86::VFMADD231PSYr },
- { X86::VFMADDSUB132PDZ256m , X86::VFMADDSUB132PDYm },
- { X86::VFMADDSUB132PDZ256r , X86::VFMADDSUB132PDYr },
- { X86::VFMADDSUB132PSZ256m , X86::VFMADDSUB132PSYm },
- { X86::VFMADDSUB132PSZ256r , X86::VFMADDSUB132PSYr },
- { X86::VFMADDSUB213PDZ256m , X86::VFMADDSUB213PDYm },
- { X86::VFMADDSUB213PDZ256r , X86::VFMADDSUB213PDYr },
- { X86::VFMADDSUB213PSZ256m , X86::VFMADDSUB213PSYm },
- { X86::VFMADDSUB213PSZ256r , X86::VFMADDSUB213PSYr },
- { X86::VFMADDSUB231PDZ256m , X86::VFMADDSUB231PDYm },
- { X86::VFMADDSUB231PDZ256r , X86::VFMADDSUB231PDYr },
- { X86::VFMADDSUB231PSZ256m , X86::VFMADDSUB231PSYm },
- { X86::VFMADDSUB231PSZ256r , X86::VFMADDSUB231PSYr },
- { X86::VFMSUB132PDZ256m , X86::VFMSUB132PDYm },
- { X86::VFMSUB132PDZ256r , X86::VFMSUB132PDYr },
- { X86::VFMSUB132PSZ256m , X86::VFMSUB132PSYm },
- { X86::VFMSUB132PSZ256r , X86::VFMSUB132PSYr },
- { X86::VFMSUB213PDZ256m , X86::VFMSUB213PDYm },
- { X86::VFMSUB213PDZ256r , X86::VFMSUB213PDYr },
- { X86::VFMSUB213PSZ256m , X86::VFMSUB213PSYm },
- { X86::VFMSUB213PSZ256r , X86::VFMSUB213PSYr },
- { X86::VFMSUB231PDZ256m , X86::VFMSUB231PDYm },
- { X86::VFMSUB231PDZ256r , X86::VFMSUB231PDYr },
- { X86::VFMSUB231PSZ256m , X86::VFMSUB231PSYm },
- { X86::VFMSUB231PSZ256r , X86::VFMSUB231PSYr },
- { X86::VFMSUBADD132PDZ256m , X86::VFMSUBADD132PDYm },
- { X86::VFMSUBADD132PDZ256r , X86::VFMSUBADD132PDYr },
- { X86::VFMSUBADD132PSZ256m , X86::VFMSUBADD132PSYm },
- { X86::VFMSUBADD132PSZ256r , X86::VFMSUBADD132PSYr },
- { X86::VFMSUBADD213PDZ256m , X86::VFMSUBADD213PDYm },
- { X86::VFMSUBADD213PDZ256r , X86::VFMSUBADD213PDYr },
- { X86::VFMSUBADD213PSZ256m , X86::VFMSUBADD213PSYm },
- { X86::VFMSUBADD213PSZ256r , X86::VFMSUBADD213PSYr },
- { X86::VFMSUBADD231PDZ256m , X86::VFMSUBADD231PDYm },
- { X86::VFMSUBADD231PDZ256r , X86::VFMSUBADD231PDYr },
- { X86::VFMSUBADD231PSZ256m , X86::VFMSUBADD231PSYm },
- { X86::VFMSUBADD231PSZ256r , X86::VFMSUBADD231PSYr },
- { X86::VFNMADD132PDZ256m , X86::VFNMADD132PDYm },
- { X86::VFNMADD132PDZ256r , X86::VFNMADD132PDYr },
- { X86::VFNMADD132PSZ256m , X86::VFNMADD132PSYm },
- { X86::VFNMADD132PSZ256r , X86::VFNMADD132PSYr },
- { X86::VFNMADD213PDZ256m , X86::VFNMADD213PDYm },
- { X86::VFNMADD213PDZ256r , X86::VFNMADD213PDYr },
- { X86::VFNMADD213PSZ256m , X86::VFNMADD213PSYm },
- { X86::VFNMADD213PSZ256r , X86::VFNMADD213PSYr },
- { X86::VFNMADD231PDZ256m , X86::VFNMADD231PDYm },
- { X86::VFNMADD231PDZ256r , X86::VFNMADD231PDYr },
- { X86::VFNMADD231PSZ256m , X86::VFNMADD231PSYm },
- { X86::VFNMADD231PSZ256r , X86::VFNMADD231PSYr },
- { X86::VFNMSUB132PDZ256m , X86::VFNMSUB132PDYm },
- { X86::VFNMSUB132PDZ256r , X86::VFNMSUB132PDYr },
- { X86::VFNMSUB132PSZ256m , X86::VFNMSUB132PSYm },
- { X86::VFNMSUB132PSZ256r , X86::VFNMSUB132PSYr },
- { X86::VFNMSUB213PDZ256m , X86::VFNMSUB213PDYm },
- { X86::VFNMSUB213PDZ256r , X86::VFNMSUB213PDYr },
- { X86::VFNMSUB213PSZ256m , X86::VFNMSUB213PSYm },
- { X86::VFNMSUB213PSZ256r , X86::VFNMSUB213PSYr },
- { X86::VFNMSUB231PDZ256m , X86::VFNMSUB231PDYm },
- { X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr },
- { X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm },
- { X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr },
- { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm },
- { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm },
- { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr },
- { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr },
- { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm },
- { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm },
- { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr },
- { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr },
- { X86::VMAXCPDZ256rm , X86::VMAXCPDYrm },
- { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr },
- { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm },
- { X86::VMAXCPSZ256rr , X86::VMAXCPSYrr },
- { X86::VMAXPDZ256rm , X86::VMAXPDYrm },
- { X86::VMAXPDZ256rr , X86::VMAXPDYrr },
- { X86::VMAXPSZ256rm , X86::VMAXPSYrm },
- { X86::VMAXPSZ256rr , X86::VMAXPSYrr },
- { X86::VMINCPDZ256rm , X86::VMINCPDYrm },
- { X86::VMINCPDZ256rr , X86::VMINCPDYrr },
- { X86::VMINCPSZ256rm , X86::VMINCPSYrm },
- { X86::VMINCPSZ256rr , X86::VMINCPSYrr },
- { X86::VMINPDZ256rm , X86::VMINPDYrm },
- { X86::VMINPDZ256rr , X86::VMINPDYrr },
- { X86::VMINPSZ256rm , X86::VMINPSYrm },
- { X86::VMINPSZ256rr , X86::VMINPSYrr },
- { X86::VMOVAPDZ256mr , X86::VMOVAPDYmr },
- { X86::VMOVAPDZ256rm , X86::VMOVAPDYrm },
- { X86::VMOVAPDZ256rr , X86::VMOVAPDYrr },
- { X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV },
- { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
- { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
- { X86::VMOVAPSZ256rr , X86::VMOVAPSYrr },
- { X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV },
- { X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm },
- { X86::VMOVDDUPZ256rr , X86::VMOVDDUPYrr },
- { X86::VMOVDQA32Z256mr , X86::VMOVDQAYmr },
- { X86::VMOVDQA32Z256rm , X86::VMOVDQAYrm },
- { X86::VMOVDQA32Z256rr , X86::VMOVDQAYrr },
- { X86::VMOVDQA32Z256rr_REV , X86::VMOVDQAYrr_REV },
- { X86::VMOVDQA64Z256mr , X86::VMOVDQAYmr },
- { X86::VMOVDQA64Z256rm , X86::VMOVDQAYrm },
- { X86::VMOVDQA64Z256rr , X86::VMOVDQAYrr },
- { X86::VMOVDQA64Z256rr_REV , X86::VMOVDQAYrr_REV },
- { X86::VMOVDQU16Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU16Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU16Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU16Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVDQU32Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU32Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU32Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU32Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVDQU64Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU64Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU64Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU64Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVDQU8Z256mr , X86::VMOVDQUYmr },
- { X86::VMOVDQU8Z256rm , X86::VMOVDQUYrm },
- { X86::VMOVDQU8Z256rr , X86::VMOVDQUYrr },
- { X86::VMOVDQU8Z256rr_REV , X86::VMOVDQUYrr_REV },
- { X86::VMOVNTDQAZ256rm , X86::VMOVNTDQAYrm },
- { X86::VMOVNTDQZ256mr , X86::VMOVNTDQYmr },
- { X86::VMOVNTPDZ256mr , X86::VMOVNTPDYmr },
- { X86::VMOVNTPSZ256mr , X86::VMOVNTPSYmr },
- { X86::VMOVSHDUPZ256rm , X86::VMOVSHDUPYrm },
- { X86::VMOVSHDUPZ256rr , X86::VMOVSHDUPYrr },
- { X86::VMOVSLDUPZ256rm , X86::VMOVSLDUPYrm },
- { X86::VMOVSLDUPZ256rr , X86::VMOVSLDUPYrr },
- { X86::VMOVUPDZ256mr , X86::VMOVUPDYmr },
- { X86::VMOVUPDZ256rm , X86::VMOVUPDYrm },
- { X86::VMOVUPDZ256rr , X86::VMOVUPDYrr },
- { X86::VMOVUPDZ256rr_REV , X86::VMOVUPDYrr_REV },
- { X86::VMOVUPSZ256mr , X86::VMOVUPSYmr },
- { X86::VMOVUPSZ256rm , X86::VMOVUPSYrm },
- { X86::VMOVUPSZ256rr , X86::VMOVUPSYrr },
- { X86::VMOVUPSZ256rr_REV , X86::VMOVUPSYrr_REV },
- { X86::VMULPDZ256rm , X86::VMULPDYrm },
- { X86::VMULPDZ256rr , X86::VMULPDYrr },
- { X86::VMULPSZ256rm , X86::VMULPSYrm },
- { X86::VMULPSZ256rr , X86::VMULPSYrr },
- { X86::VORPDZ256rm , X86::VORPDYrm },
- { X86::VORPDZ256rr , X86::VORPDYrr },
- { X86::VORPSZ256rm , X86::VORPSYrm },
- { X86::VORPSZ256rr , X86::VORPSYrr },
- { X86::VPABSBZ256rm , X86::VPABSBYrm },
- { X86::VPABSBZ256rr , X86::VPABSBYrr },
- { X86::VPABSDZ256rm , X86::VPABSDYrm },
- { X86::VPABSDZ256rr , X86::VPABSDYrr },
- { X86::VPABSWZ256rm , X86::VPABSWYrm },
- { X86::VPABSWZ256rr , X86::VPABSWYrr },
- { X86::VPACKSSDWZ256rm , X86::VPACKSSDWYrm },
- { X86::VPACKSSDWZ256rr , X86::VPACKSSDWYrr },
- { X86::VPACKSSWBZ256rm , X86::VPACKSSWBYrm },
- { X86::VPACKSSWBZ256rr , X86::VPACKSSWBYrr },
- { X86::VPACKUSDWZ256rm , X86::VPACKUSDWYrm },
- { X86::VPACKUSDWZ256rr , X86::VPACKUSDWYrr },
- { X86::VPACKUSWBZ256rm , X86::VPACKUSWBYrm },
- { X86::VPACKUSWBZ256rr , X86::VPACKUSWBYrr },
- { X86::VPADDBZ256rm , X86::VPADDBYrm },
- { X86::VPADDBZ256rr , X86::VPADDBYrr },
- { X86::VPADDDZ256rm , X86::VPADDDYrm },
- { X86::VPADDDZ256rr , X86::VPADDDYrr },
- { X86::VPADDQZ256rm , X86::VPADDQYrm },
- { X86::VPADDQZ256rr , X86::VPADDQYrr },
- { X86::VPADDSBZ256rm , X86::VPADDSBYrm },
- { X86::VPADDSBZ256rr , X86::VPADDSBYrr },
- { X86::VPADDSWZ256rm , X86::VPADDSWYrm },
- { X86::VPADDSWZ256rr , X86::VPADDSWYrr },
- { X86::VPADDUSBZ256rm , X86::VPADDUSBYrm },
- { X86::VPADDUSBZ256rr , X86::VPADDUSBYrr },
- { X86::VPADDUSWZ256rm , X86::VPADDUSWYrm },
- { X86::VPADDUSWZ256rr , X86::VPADDUSWYrr },
- { X86::VPADDWZ256rm , X86::VPADDWYrm },
- { X86::VPADDWZ256rr , X86::VPADDWYrr },
- { X86::VPALIGNRZ256rmi , X86::VPALIGNRYrmi },
- { X86::VPALIGNRZ256rri , X86::VPALIGNRYrri },
- { X86::VPANDDZ256rm , X86::VPANDYrm },
- { X86::VPANDDZ256rr , X86::VPANDYrr },
- { X86::VPANDQZ256rm , X86::VPANDYrm },
- { X86::VPANDQZ256rr , X86::VPANDYrr },
- { X86::VPAVGBZ256rm , X86::VPAVGBYrm },
- { X86::VPAVGBZ256rr , X86::VPAVGBYrr },
- { X86::VPAVGWZ256rm , X86::VPAVGWYrm },
- { X86::VPAVGWZ256rr , X86::VPAVGWYrr },
- { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
- { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
- { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
- { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
- { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
- { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
- { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
- { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
- { X86::VPERMDZ256rm , X86::VPERMDYrm },
- { X86::VPERMDZ256rr , X86::VPERMDYrr },
- { X86::VPERMILPDZ256mi , X86::VPERMILPDYmi },
- { X86::VPERMILPDZ256ri , X86::VPERMILPDYri },
- { X86::VPERMILPDZ256rm , X86::VPERMILPDYrm },
- { X86::VPERMILPDZ256rr , X86::VPERMILPDYrr },
- { X86::VPERMILPSZ256mi , X86::VPERMILPSYmi },
- { X86::VPERMILPSZ256ri , X86::VPERMILPSYri },
- { X86::VPERMILPSZ256rm , X86::VPERMILPSYrm },
- { X86::VPERMILPSZ256rr , X86::VPERMILPSYrr },
- { X86::VPERMPDZ256mi , X86::VPERMPDYmi },
- { X86::VPERMPDZ256ri , X86::VPERMPDYri },
- { X86::VPERMPSZ256rm , X86::VPERMPSYrm },
- { X86::VPERMPSZ256rr , X86::VPERMPSYrr },
- { X86::VPERMQZ256mi , X86::VPERMQYmi },
- { X86::VPERMQZ256ri , X86::VPERMQYri },
- { X86::VPMADDUBSWZ256rm , X86::VPMADDUBSWYrm },
- { X86::VPMADDUBSWZ256rr , X86::VPMADDUBSWYrr },
- { X86::VPMADDWDZ256rm , X86::VPMADDWDYrm },
- { X86::VPMADDWDZ256rr , X86::VPMADDWDYrr },
- { X86::VPMAXSBZ256rm , X86::VPMAXSBYrm },
- { X86::VPMAXSBZ256rr , X86::VPMAXSBYrr },
- { X86::VPMAXSDZ256rm , X86::VPMAXSDYrm },
- { X86::VPMAXSDZ256rr , X86::VPMAXSDYrr },
- { X86::VPMAXSWZ256rm , X86::VPMAXSWYrm },
- { X86::VPMAXSWZ256rr , X86::VPMAXSWYrr },
- { X86::VPMAXUBZ256rm , X86::VPMAXUBYrm },
- { X86::VPMAXUBZ256rr , X86::VPMAXUBYrr },
- { X86::VPMAXUDZ256rm , X86::VPMAXUDYrm },
- { X86::VPMAXUDZ256rr , X86::VPMAXUDYrr },
- { X86::VPMAXUWZ256rm , X86::VPMAXUWYrm },
- { X86::VPMAXUWZ256rr , X86::VPMAXUWYrr },
- { X86::VPMINSBZ256rm , X86::VPMINSBYrm },
- { X86::VPMINSBZ256rr , X86::VPMINSBYrr },
- { X86::VPMINSDZ256rm , X86::VPMINSDYrm },
- { X86::VPMINSDZ256rr , X86::VPMINSDYrr },
- { X86::VPMINSWZ256rm , X86::VPMINSWYrm },
- { X86::VPMINSWZ256rr , X86::VPMINSWYrr },
- { X86::VPMINUBZ256rm , X86::VPMINUBYrm },
- { X86::VPMINUBZ256rr , X86::VPMINUBYrr },
- { X86::VPMINUDZ256rm , X86::VPMINUDYrm },
- { X86::VPMINUDZ256rr , X86::VPMINUDYrr },
- { X86::VPMINUWZ256rm , X86::VPMINUWYrm },
- { X86::VPMINUWZ256rr , X86::VPMINUWYrr },
- { X86::VPMOVSXBDZ256rm , X86::VPMOVSXBDYrm },
- { X86::VPMOVSXBDZ256rr , X86::VPMOVSXBDYrr },
- { X86::VPMOVSXBQZ256rm , X86::VPMOVSXBQYrm },
- { X86::VPMOVSXBQZ256rr , X86::VPMOVSXBQYrr },
- { X86::VPMOVSXBWZ256rm , X86::VPMOVSXBWYrm },
- { X86::VPMOVSXBWZ256rr , X86::VPMOVSXBWYrr },
- { X86::VPMOVSXDQZ256rm , X86::VPMOVSXDQYrm },
- { X86::VPMOVSXDQZ256rr , X86::VPMOVSXDQYrr },
- { X86::VPMOVSXWDZ256rm , X86::VPMOVSXWDYrm },
- { X86::VPMOVSXWDZ256rr , X86::VPMOVSXWDYrr },
- { X86::VPMOVSXWQZ256rm , X86::VPMOVSXWQYrm },
- { X86::VPMOVSXWQZ256rr , X86::VPMOVSXWQYrr },
- { X86::VPMOVZXBDZ256rm , X86::VPMOVZXBDYrm },
- { X86::VPMOVZXBDZ256rr , X86::VPMOVZXBDYrr },
- { X86::VPMOVZXBQZ256rm , X86::VPMOVZXBQYrm },
- { X86::VPMOVZXBQZ256rr , X86::VPMOVZXBQYrr },
- { X86::VPMOVZXBWZ256rm , X86::VPMOVZXBWYrm },
- { X86::VPMOVZXBWZ256rr , X86::VPMOVZXBWYrr },
- { X86::VPMOVZXDQZ256rm , X86::VPMOVZXDQYrm },
- { X86::VPMOVZXDQZ256rr , X86::VPMOVZXDQYrr },
- { X86::VPMOVZXWDZ256rm , X86::VPMOVZXWDYrm },
- { X86::VPMOVZXWDZ256rr , X86::VPMOVZXWDYrr },
- { X86::VPMOVZXWQZ256rm , X86::VPMOVZXWQYrm },
- { X86::VPMOVZXWQZ256rr , X86::VPMOVZXWQYrr },
- { X86::VPMULDQZ256rm , X86::VPMULDQYrm },
- { X86::VPMULDQZ256rr , X86::VPMULDQYrr },
- { X86::VPMULHRSWZ256rm , X86::VPMULHRSWYrm },
- { X86::VPMULHRSWZ256rr , X86::VPMULHRSWYrr },
- { X86::VPMULHUWZ256rm , X86::VPMULHUWYrm },
- { X86::VPMULHUWZ256rr , X86::VPMULHUWYrr },
- { X86::VPMULHWZ256rm , X86::VPMULHWYrm },
- { X86::VPMULHWZ256rr , X86::VPMULHWYrr },
- { X86::VPMULLDZ256rm , X86::VPMULLDYrm },
- { X86::VPMULLDZ256rr , X86::VPMULLDYrr },
- { X86::VPMULLWZ256rm , X86::VPMULLWYrm },
- { X86::VPMULLWZ256rr , X86::VPMULLWYrr },
- { X86::VPMULUDQZ256rm , X86::VPMULUDQYrm },
- { X86::VPMULUDQZ256rr , X86::VPMULUDQYrr },
- { X86::VPORDZ256rm , X86::VPORYrm },
- { X86::VPORDZ256rr , X86::VPORYrr },
- { X86::VPORQZ256rm , X86::VPORYrm },
- { X86::VPORQZ256rr , X86::VPORYrr },
- { X86::VPSADBWZ256rm , X86::VPSADBWYrm },
- { X86::VPSADBWZ256rr , X86::VPSADBWYrr },
- { X86::VPSHUFBZ256rm , X86::VPSHUFBYrm },
- { X86::VPSHUFBZ256rr , X86::VPSHUFBYrr },
- { X86::VPSHUFDZ256mi , X86::VPSHUFDYmi },
- { X86::VPSHUFDZ256ri , X86::VPSHUFDYri },
- { X86::VPSHUFHWZ256mi , X86::VPSHUFHWYmi },
- { X86::VPSHUFHWZ256ri , X86::VPSHUFHWYri },
- { X86::VPSHUFLWZ256mi , X86::VPSHUFLWYmi },
- { X86::VPSHUFLWZ256ri , X86::VPSHUFLWYri },
- { X86::VPSLLDQZ256rr , X86::VPSLLDQYri },
- { X86::VPSLLDZ256ri , X86::VPSLLDYri },
- { X86::VPSLLDZ256rm , X86::VPSLLDYrm },
- { X86::VPSLLDZ256rr , X86::VPSLLDYrr },
- { X86::VPSLLQZ256ri , X86::VPSLLQYri },
- { X86::VPSLLQZ256rm , X86::VPSLLQYrm },
- { X86::VPSLLQZ256rr , X86::VPSLLQYrr },
- { X86::VPSLLVDZ256rm , X86::VPSLLVDYrm },
- { X86::VPSLLVDZ256rr , X86::VPSLLVDYrr },
- { X86::VPSLLVQZ256rm , X86::VPSLLVQYrm },
- { X86::VPSLLVQZ256rr , X86::VPSLLVQYrr },
- { X86::VPSLLWZ256ri , X86::VPSLLWYri },
- { X86::VPSLLWZ256rm , X86::VPSLLWYrm },
- { X86::VPSLLWZ256rr , X86::VPSLLWYrr },
- { X86::VPSRADZ256ri , X86::VPSRADYri },
- { X86::VPSRADZ256rm , X86::VPSRADYrm },
- { X86::VPSRADZ256rr , X86::VPSRADYrr },
- { X86::VPSRAVDZ256rm , X86::VPSRAVDYrm },
- { X86::VPSRAVDZ256rr , X86::VPSRAVDYrr },
- { X86::VPSRAWZ256ri , X86::VPSRAWYri },
- { X86::VPSRAWZ256rm , X86::VPSRAWYrm },
- { X86::VPSRAWZ256rr , X86::VPSRAWYrr },
- { X86::VPSRLDQZ256rr , X86::VPSRLDQYri },
- { X86::VPSRLDZ256ri , X86::VPSRLDYri },
- { X86::VPSRLDZ256rm , X86::VPSRLDYrm },
- { X86::VPSRLDZ256rr , X86::VPSRLDYrr },
- { X86::VPSRLQZ256ri , X86::VPSRLQYri },
- { X86::VPSRLQZ256rm , X86::VPSRLQYrm },
- { X86::VPSRLQZ256rr , X86::VPSRLQYrr },
- { X86::VPSRLVDZ256rm , X86::VPSRLVDYrm },
- { X86::VPSRLVDZ256rr , X86::VPSRLVDYrr },
- { X86::VPSRLVQZ256rm , X86::VPSRLVQYrm },
- { X86::VPSRLVQZ256rr , X86::VPSRLVQYrr },
- { X86::VPSRLWZ256ri , X86::VPSRLWYri },
- { X86::VPSRLWZ256rm , X86::VPSRLWYrm },
- { X86::VPSRLWZ256rr , X86::VPSRLWYrr },
- { X86::VPSUBBZ256rm , X86::VPSUBBYrm },
- { X86::VPSUBBZ256rr , X86::VPSUBBYrr },
- { X86::VPSUBDZ256rm , X86::VPSUBDYrm },
- { X86::VPSUBDZ256rr , X86::VPSUBDYrr },
- { X86::VPSUBQZ256rm , X86::VPSUBQYrm },
- { X86::VPSUBQZ256rr , X86::VPSUBQYrr },
- { X86::VPSUBSBZ256rm , X86::VPSUBSBYrm },
- { X86::VPSUBSBZ256rr , X86::VPSUBSBYrr },
- { X86::VPSUBSWZ256rm , X86::VPSUBSWYrm },
- { X86::VPSUBSWZ256rr , X86::VPSUBSWYrr },
- { X86::VPSUBUSBZ256rm , X86::VPSUBUSBYrm },
- { X86::VPSUBUSBZ256rr , X86::VPSUBUSBYrr },
- { X86::VPSUBUSWZ256rm , X86::VPSUBUSWYrm },
- { X86::VPSUBUSWZ256rr , X86::VPSUBUSWYrr },
- { X86::VPSUBWZ256rm , X86::VPSUBWYrm },
- { X86::VPSUBWZ256rr , X86::VPSUBWYrr },
- { X86::VPUNPCKHBWZ256rm , X86::VPUNPCKHBWYrm },
- { X86::VPUNPCKHBWZ256rr , X86::VPUNPCKHBWYrr },
- { X86::VPUNPCKHDQZ256rm , X86::VPUNPCKHDQYrm },
- { X86::VPUNPCKHDQZ256rr , X86::VPUNPCKHDQYrr },
- { X86::VPUNPCKHQDQZ256rm , X86::VPUNPCKHQDQYrm },
- { X86::VPUNPCKHQDQZ256rr , X86::VPUNPCKHQDQYrr },
- { X86::VPUNPCKHWDZ256rm , X86::VPUNPCKHWDYrm },
- { X86::VPUNPCKHWDZ256rr , X86::VPUNPCKHWDYrr },
- { X86::VPUNPCKLBWZ256rm , X86::VPUNPCKLBWYrm },
- { X86::VPUNPCKLBWZ256rr , X86::VPUNPCKLBWYrr },
- { X86::VPUNPCKLDQZ256rm , X86::VPUNPCKLDQYrm },
- { X86::VPUNPCKLDQZ256rr , X86::VPUNPCKLDQYrr },
- { X86::VPUNPCKLQDQZ256rm , X86::VPUNPCKLQDQYrm },
- { X86::VPUNPCKLQDQZ256rr , X86::VPUNPCKLQDQYrr },
- { X86::VPUNPCKLWDZ256rm , X86::VPUNPCKLWDYrm },
- { X86::VPUNPCKLWDZ256rr , X86::VPUNPCKLWDYrr },
- { X86::VPXORDZ256rm , X86::VPXORYrm },
- { X86::VPXORDZ256rr , X86::VPXORYrr },
- { X86::VPXORQZ256rm , X86::VPXORYrm },
- { X86::VPXORQZ256rr , X86::VPXORYrr },
- { X86::VSHUFPDZ256rmi , X86::VSHUFPDYrmi },
- { X86::VSHUFPDZ256rri , X86::VSHUFPDYrri },
- { X86::VSHUFPSZ256rmi , X86::VSHUFPSYrmi },
- { X86::VSHUFPSZ256rri , X86::VSHUFPSYrri },
- { X86::VSQRTPDZ256m , X86::VSQRTPDYm },
- { X86::VSQRTPDZ256r , X86::VSQRTPDYr },
- { X86::VSQRTPSZ256m , X86::VSQRTPSYm },
- { X86::VSQRTPSZ256r , X86::VSQRTPSYr },
- { X86::VSUBPDZ256rm , X86::VSUBPDYrm },
- { X86::VSUBPDZ256rr , X86::VSUBPDYrr },
- { X86::VSUBPSZ256rm , X86::VSUBPSYrm },
- { X86::VSUBPSZ256rr , X86::VSUBPSYrr },
- { X86::VUNPCKHPDZ256rm , X86::VUNPCKHPDYrm },
- { X86::VUNPCKHPDZ256rr , X86::VUNPCKHPDYrr },
- { X86::VUNPCKHPSZ256rm , X86::VUNPCKHPSYrm },
- { X86::VUNPCKHPSZ256rr , X86::VUNPCKHPSYrr },
- { X86::VUNPCKLPDZ256rm , X86::VUNPCKLPDYrm },
- { X86::VUNPCKLPDZ256rr , X86::VUNPCKLPDYrr },
- { X86::VUNPCKLPSZ256rm , X86::VUNPCKLPSYrm },
- { X86::VUNPCKLPSZ256rr , X86::VUNPCKLPSYrr },
- { X86::VXORPDZ256rm , X86::VXORPDYrm },
- { X86::VXORPDZ256rr , X86::VXORPDYrr },
- { X86::VXORPSZ256rm , X86::VXORPSYrm },
- { X86::VXORPSZ256rr , X86::VXORPSYrr },
-};
-
-#endif
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 2ea27a934b47..315a69e6a2a2 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -43,22 +43,26 @@ def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
"vmptrld\t$vmcs", []>, PS;
def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
"vmptrst\t$vmcs", []>, TB;
-def VMREAD64rm : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
-def VMREAD32rm : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
-def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+let mayStore = 1 in {
+def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+}
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
-def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+let mayLoad = 1 in {
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
+}
// 0F 01 C4
def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 2b296e1e5b85..53224431c0e9 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -183,6 +183,27 @@ let ExeDomain = SSEPackedInt in {
defm VPMACSDD : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
}
+// IFMA patterns - for cases where we can safely ignore the overflow bits from
+// the multiply or easily match with existing intrinsics.
+let Predicates = [HasXOP] in {
+ def : Pat<(v8i16 (add (mul (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+ (v8i16 VR128:$src3))),
+ (VPMACSWWrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add (mul (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+ (v4i32 VR128:$src3))),
+ (VPMACSDDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v2i64 (add (X86pmuldq (X86PShufd (v4i32 VR128:$src1), (i8 -11)),
+ (X86PShufd (v4i32 VR128:$src2), (i8 -11))),
+ (v2i64 VR128:$src3))),
+ (VPMACSDQHrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v2i64 (add (X86pmuldq (v4i32 VR128:$src1), (v4i32 VR128:$src2)),
+ (v2i64 VR128:$src3))),
+ (VPMACSDQLrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+ def : Pat<(v4i32 (add (X86vpmaddwd (v8i16 VR128:$src1), (v8i16 VR128:$src2)),
+ (v4i32 VR128:$src3))),
+ (VPMADCSWDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+}
+
// Instruction where second source can be memory, third must be imm8
multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128> {
let isCommutable = 1 in
@@ -269,159 +290,87 @@ let ExeDomain = SSEPackedInt in {
}
// Instruction where either second or third source can be memory
-multiclass xop4op_int<bits<8> opc, string OpcodeStr,
- Intrinsic Int128, Intrinsic Int256> {
- // 128-bit Instruction
- def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType VT> {
+ def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>,
- XOP_4V;
- def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+ (X86andnp RC:$src3, RC:$src2))))]>, XOP_4V;
+ def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, VR128:$src2,
- (bitconvert (loadv2i64 addr:$src3))))]>,
+ [(set RC:$dst, (VT (or (and (load addr:$src3), RC:$src1),
+ (X86andnp (load addr:$src3), RC:$src2))))]>,
XOP_4V, VEX_W;
- def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (Int128 VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
- VR128:$src3))]>,
+ [(set RC:$dst, (VT (or (and RC:$src3, RC:$src1),
+ (X86andnp RC:$src3, (load addr:$src2)))))]>,
XOP_4V;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>, XOP_4V, VEX_W;
-
- // 256-bit Instruction
- def rrrY : IXOPi8Reg<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>,
- XOP_4V, VEX_L;
- def rrmY : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, VR256:$src2,
- (bitconvert (loadv4i64 addr:$src3))))]>,
- XOP_4V, VEX_W, VEX_L;
- def rmrY : IXOPi8Reg<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR256:$dst,
- (Int256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2)),
- VR256:$src3))]>,
- XOP_4V, VEX_L;
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrrY_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W, VEX_L;
}
let ExeDomain = SSEPackedInt in {
- defm VPCMOV : xop4op_int<0xA2, "vpcmov",
- int_x86_xop_vpcmov, int_x86_xop_vpcmov_256>;
+ defm VPCMOV : xop4op_int<0xA2, "vpcmov", VR128, i128mem, v2i64>;
+ defm VPCMOVY : xop4op_int<0xA2, "vpcmov", VR256, i256mem, v4i64>, VEX_L;
}
-let Predicates = [HasXOP] in {
- def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
- (X86andnp VR128:$src3, VR128:$src2))),
- (VPCMOVrrr VR128:$src1, VR128:$src2, VR128:$src3)>;
-
- def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
- (X86andnp VR256:$src3, VR256:$src2))),
- (VPCMOVrrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
-}
-
-multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType vt128, ValueType vt256,
- ValueType id128, ValueType id256,
- PatFrag ld_128, PatFrag ld_256> {
- def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
+multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand intmemop, X86MemOperand fpmemop,
+ ValueType VT, PatFrag FPLdFrag,
+ PatFrag IntLdFrag> {
+ def rr : IXOP5<Opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
- def rm : IXOP5<opc, MRMSrcMemOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>;
+ def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
- (id128 (bitconvert (loadv2i64 addr:$src3))),
- (i8 imm:$src4))))]>,
- VEX_W;
- def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, RC:$src2,
+ (bitconvert (IntLdFrag addr:$src3)),
+ (i8 imm:$src4))))]>, VEX_W;
+ def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR128:$dst,
- (vt128 (OpNode (vt128 VR128:$src1),
- (vt128 (bitconvert (ld_128 addr:$src2))),
- (id128 VR128:$src3), (i8 imm:$src4))))]>;
+ [(set RC:$dst,
+ (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
+ RC:$src3, (i8 imm:$src4))))]>;
// For disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rr_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
+ def rr_REV : IXOP5<Opc, MRMSrcRegOp4, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[]>, VEX_W;
-
- def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- def rmY : IXOP5<opc, MRMSrcMemOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
- (id256 (bitconvert (loadv4i64 addr:$src3))),
- (i8 imm:$src4))))]>, VEX_W, VEX_L;
- def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- [(set VR256:$dst,
- (vt256 (OpNode (vt256 VR256:$src1),
- (vt256 (bitconvert (ld_256 addr:$src2))),
- (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
- // For disassembler
- let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
- def rrY_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
- !strconcat(OpcodeStr,
- "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, VEX_L;
}
-let ExeDomain = SSEPackedDouble in
- defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", X86vpermil2, v2f64, v4f64,
- v2i64, v4i64, loadv2f64, loadv4f64>;
+let ExeDomain = SSEPackedDouble in {
+ defm VPERMIL2PD : xop_vpermil2<0x49, "vpermil2pd", VR128, i128mem, f128mem,
+ v2f64, loadv2f64, loadv2i64>;
+ defm VPERMIL2PDY : xop_vpermil2<0x49, "vpermil2pd", VR256, i256mem, f256mem,
+ v4f64, loadv4f64, loadv4i64>, VEX_L;
+}
-let ExeDomain = SSEPackedSingle in
- defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", X86vpermil2, v4f32, v8f32,
- v4i32, v8i32, loadv4f32, loadv8f32>;
+let ExeDomain = SSEPackedSingle in {
+ defm VPERMIL2PS : xop_vpermil2<0x48, "vpermil2ps", VR128, i128mem, f128mem,
+ v4f32, loadv4f32, loadv2i64>;
+ defm VPERMIL2PSY : xop_vpermil2<0x48, "vpermil2ps", VR256, i256mem, f256mem,
+ v8f32, loadv8f32, loadv4i64>, VEX_L;
+}
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
new file mode 100644
index 000000000000..6cc5e8b63597
--- /dev/null
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -0,0 +1,516 @@
+//===- X86InstructionSelector.cpp ----------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86RegisterBankInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "X86-isel"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+namespace {
+
+class X86InstructionSelector : public InstructionSelector {
+public:
+ X86InstructionSelector(const X86Subtarget &STI,
+ const X86RegisterBankInfo &RBI);
+
+ bool select(MachineInstr &I) const override;
+
+private:
+ /// tblgen-erated 'select' implementation, used as the initial selector for
+ /// the patterns that don't require complex C++.
+ bool selectImpl(MachineInstr &I) const;
+
+ // TODO: remove after selectImpl support pattern with a predicate.
+ unsigned getFAddOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getFSubOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getAddOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getSubOp(LLT &Ty, const RegisterBank &RB) const;
+ unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc,
+ uint64_t Alignment) const;
+
+ bool selectBinaryOp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectFrameIndex(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+ bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
+
+ const X86Subtarget &STI;
+ const X86InstrInfo &TII;
+ const X86RegisterInfo &TRI;
+ const X86RegisterBankInfo &RBI;
+
+#define GET_GLOBALISEL_TEMPORARIES_DECL
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_DECL
+};
+
+} // end anonymous namespace
+
+#define GET_GLOBALISEL_IMPL
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_IMPL
+
+X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI,
+ const X86RegisterBankInfo &RBI)
+ : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()),
+ TRI(*STI.getRegisterInfo()), RBI(RBI)
+#define GET_GLOBALISEL_TEMPORARIES_INIT
+#include "X86GenGlobalISel.inc"
+#undef GET_GLOBALISEL_TEMPORARIES_INIT
+{
+}
+
+// FIXME: This should be target-independent, inferred from the types declared
+// for each class in the bank.
+static const TargetRegisterClass *
+getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB) {
+ if (RB.getID() == X86::GPRRegBankID) {
+ if (Ty.getSizeInBits() == 32)
+ return &X86::GR32RegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::GR64RegClass;
+ }
+ if (RB.getID() == X86::VECRRegBankID) {
+ if (Ty.getSizeInBits() == 32)
+ return &X86::FR32XRegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::FR64XRegClass;
+ if (Ty.getSizeInBits() == 128)
+ return &X86::VR128XRegClass;
+ if (Ty.getSizeInBits() == 256)
+ return &X86::VR256XRegClass;
+ if (Ty.getSizeInBits() == 512)
+ return &X86::VR512RegClass;
+ }
+
+ llvm_unreachable("Unknown RegBank!");
+}
+
+// Set X86 Opcode and constrain DestReg.
+static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+
+ unsigned DstReg = I.getOperand(0).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ assert(I.isCopy() && "Generic operators do not allow physical registers");
+ return true;
+ }
+
+ const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+ const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+ (void)DstSize;
+ unsigned SrcReg = I.getOperand(1).getReg();
+ const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+ (void)SrcSize;
+ assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
+ "No phys reg on generic operators");
+ assert((DstSize == SrcSize ||
+ // Copies are a mean to setup initial types, the number of
+ // bits may not exactly match.
+ (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
+ "Copy with different width?!");
+
+ const TargetRegisterClass *RC = nullptr;
+
+ switch (RegBank.getID()) {
+ case X86::GPRRegBankID:
+ assert((DstSize <= 64) && "GPRs cannot get more than 64-bit width values.");
+ RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+ break;
+ case X86::VECRRegBankID:
+ RC = getRegClassForTypeOnBank(MRI.getType(DstReg), RegBank);
+ break;
+ default:
+ llvm_unreachable("Unknown RegBank!");
+ }
+
+ // No need to constrain SrcReg. It will get constrained when
+ // we hit another of its use or its defs.
+ // Copies do not have constraints.
+ const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
+ if (!OldRC || !RC->hasSubClassEq(OldRC)) {
+ if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+ DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
+ << " operand\n");
+ return false;
+ }
+ }
+ I.setDesc(TII.get(X86::COPY));
+ return true;
+}
+
+bool X86InstructionSelector::select(MachineInstr &I) const {
+ assert(I.getParent() && "Instruction should be in a basic block!");
+ assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+ MachineBasicBlock &MBB = *I.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned Opcode = I.getOpcode();
+ if (!isPreISelGenericOpcode(Opcode)) {
+ // Certain non-generic instructions also need some special handling.
+
+ if (I.isCopy())
+ return selectCopy(I, TII, MRI, TRI, RBI);
+
+ // TODO: handle more cases - LOAD_STACK_GUARD, PHI
+ return true;
+ }
+
+ assert(I.getNumOperands() == I.getNumExplicitOperands() &&
+ "Generic instruction has unexpected implicit operands\n");
+
+ // TODO: This should be implemented by tblgen, pattern with predicate not
+ // supported yet.
+ if (selectBinaryOp(I, MRI, MF))
+ return true;
+ if (selectLoadStoreOp(I, MRI, MF))
+ return true;
+ if (selectFrameIndex(I, MRI, MF))
+ return true;
+ if (selectConstant(I, MRI, MF))
+ return true;
+
+ return selectImpl(I);
+}
+
+unsigned X86InstructionSelector::getFAddOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_FADD;
+
+ if (Ty == LLT::scalar(32)) {
+ if (STI.hasAVX512()) {
+ return X86::VADDSSZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDSSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::ADDSSrr;
+ }
+ } else if (Ty == LLT::scalar(64)) {
+ if (STI.hasAVX512()) {
+ return X86::VADDSDZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDSDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::ADDSDrr;
+ }
+ } else if (Ty == LLT::vector(4, 32)) {
+ if ((STI.hasAVX512()) && (STI.hasVLX())) {
+ return X86::VADDPSZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VADDPSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::ADDPSrr;
+ }
+ }
+
+ return TargetOpcode::G_FADD;
+}
+
+unsigned X86InstructionSelector::getFSubOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_FSUB;
+
+ if (Ty == LLT::scalar(32)) {
+ if (STI.hasAVX512()) {
+ return X86::VSUBSSZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBSSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::SUBSSrr;
+ }
+ } else if (Ty == LLT::scalar(64)) {
+ if (STI.hasAVX512()) {
+ return X86::VSUBSDZrr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBSDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::SUBSDrr;
+ }
+ } else if (Ty == LLT::vector(4, 32)) {
+ if ((STI.hasAVX512()) && (STI.hasVLX())) {
+ return X86::VSUBPSZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VSUBPSrr;
+ } else if (STI.hasSSE1()) {
+ return X86::SUBPSrr;
+ }
+ }
+
+ return TargetOpcode::G_FSUB;
+}
+
+unsigned X86InstructionSelector::getAddOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_ADD;
+
+ if (Ty == LLT::vector(4, 32)) {
+ if (STI.hasAVX512() && STI.hasVLX()) {
+ return X86::VPADDDZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VPADDDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::PADDDrr;
+ }
+ }
+
+ return TargetOpcode::G_ADD;
+}
+
+unsigned X86InstructionSelector::getSubOp(LLT &Ty,
+ const RegisterBank &RB) const {
+
+ if (X86::VECRRegBankID != RB.getID())
+ return TargetOpcode::G_SUB;
+
+ if (Ty == LLT::vector(4, 32)) {
+ if (STI.hasAVX512() && STI.hasVLX()) {
+ return X86::VPSUBDZ128rr;
+ } else if (STI.hasAVX()) {
+ return X86::VPSUBDrr;
+ } else if (STI.hasSSE2()) {
+ return X86::PSUBDrr;
+ }
+ }
+
+ return TargetOpcode::G_SUB;
+}
+
+bool X86InstructionSelector::selectBinaryOp(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+ const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+ unsigned NewOpc = I.getOpcode();
+
+ switch (NewOpc) {
+ case TargetOpcode::G_FADD:
+ NewOpc = getFAddOp(Ty, RB);
+ break;
+ case TargetOpcode::G_FSUB:
+ NewOpc = getFSubOp(Ty, RB);
+ break;
+ case TargetOpcode::G_ADD:
+ NewOpc = getAddOp(Ty, RB);
+ break;
+ case TargetOpcode::G_SUB:
+ NewOpc = getSubOp(Ty, RB);
+ break;
+ default:
+ break;
+ }
+
+ if (NewOpc == I.getOpcode())
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB,
+ unsigned Opc,
+ uint64_t Alignment) const {
+ bool Isload = (Opc == TargetOpcode::G_LOAD);
+ bool HasAVX = STI.hasAVX();
+ bool HasAVX512 = STI.hasAVX512();
+ bool HasVLX = STI.hasVLX();
+
+ if (Ty == LLT::scalar(8)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV8rm : X86::MOV8mr;
+ } else if (Ty == LLT::scalar(16)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV16rm : X86::MOV16mr;
+ } else if (Ty == LLT::scalar(32)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV32rm : X86::MOV32mr;
+ if (X86::VECRRegBankID == RB.getID())
+ return Isload ? (HasAVX512 ? X86::VMOVSSZrm
+ : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm)
+ : (HasAVX512 ? X86::VMOVSSZmr
+ : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+ } else if (Ty == LLT::scalar(64)) {
+ if (X86::GPRRegBankID == RB.getID())
+ return Isload ? X86::MOV64rm : X86::MOV64mr;
+ if (X86::VECRRegBankID == RB.getID())
+ return Isload ? (HasAVX512 ? X86::VMOVSDZrm
+ : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm)
+ : (HasAVX512 ? X86::VMOVSDZmr
+ : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+ } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
+ if (Alignment >= 16)
+ return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
+ : HasAVX512
+ ? X86::VMOVAPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
+ : (HasVLX ? X86::VMOVAPSZ128mr
+ : HasAVX512
+ ? X86::VMOVAPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
+ else
+ return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
+ : HasAVX512
+ ? X86::VMOVUPSZ128rm_NOVLX
+ : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
+ : (HasVLX ? X86::VMOVUPSZ128mr
+ : HasAVX512
+ ? X86::VMOVUPSZ128mr_NOVLX
+ : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
+ }
+ return Opc;
+}
+
+bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+
+ unsigned Opc = I.getOpcode();
+
+ if (Opc != TargetOpcode::G_STORE && Opc != TargetOpcode::G_LOAD)
+ return false;
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+ const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+ auto &MemOp = **I.memoperands_begin();
+ unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlignment());
+ if (NewOpc == Opc)
+ return false;
+
+ I.setDesc(TII.get(NewOpc));
+ MachineInstrBuilder MIB(MF, I);
+ if (Opc == TargetOpcode::G_LOAD)
+ addOffset(MIB, 0);
+ else {
+ // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
+ I.RemoveOperand(0);
+ addOffset(MIB, 0).addUse(DefReg);
+ }
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool X86InstructionSelector::selectFrameIndex(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_FRAME_INDEX)
+ return false;
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+
+ // Use LEA to calculate frame index.
+ unsigned NewOpc;
+ if (Ty == LLT::pointer(0, 64))
+ NewOpc = X86::LEA64r;
+ else if (Ty == LLT::pointer(0, 32))
+ NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
+ else
+ llvm_unreachable("Can't select G_FRAME_INDEX, unsupported type.");
+
+ I.setDesc(TII.get(NewOpc));
+ MachineInstrBuilder MIB(MF, I);
+ addOffset(MIB, 0);
+
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+bool X86InstructionSelector::selectConstant(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ if (I.getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ const unsigned DefReg = I.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DefReg);
+
+ assert(Ty.isScalar() && "invalid element type.");
+
+ uint64_t Val = 0;
+ if (I.getOperand(1).isCImm()) {
+ Val = I.getOperand(1).getCImm()->getZExtValue();
+ I.getOperand(1).ChangeToImmediate(Val);
+ } else if (I.getOperand(1).isImm()) {
+ Val = I.getOperand(1).getImm();
+ } else
+ llvm_unreachable("Unsupported operand type.");
+
+ unsigned NewOpc;
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ NewOpc = X86::MOV8ri;
+ break;
+ case 16:
+ NewOpc = X86::MOV16ri;
+ break;
+ case 32:
+ NewOpc = X86::MOV32ri;
+ break;
+ case 64: {
+ // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
+ if (isInt<32>(Val))
+ NewOpc = X86::MOV64ri32;
+ else
+ NewOpc = X86::MOV64ri;
+ break;
+ }
+ default:
+ llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
+ }
+
+ I.setDesc(TII.get(NewOpc));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
+
+InstructionSelector *
+llvm::createX86InstructionSelector(X86Subtarget &Subtarget,
+ X86RegisterBankInfo &RBI) {
+ return new X86InstructionSelector(Subtarget, RBI);
+}
diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp
index d9edf4676faf..806d6cc888f0 100644
--- a/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/lib/Target/X86/X86InterleavedAccess.cpp
@@ -19,6 +19,7 @@
using namespace llvm;
+namespace {
/// \brief This class holds necessary information to represent an interleaved
/// access group and supports utilities to lower the group into
/// X86-specific instructions/intrinsics.
@@ -27,7 +28,6 @@ using namespace llvm;
/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
/// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
/// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
-
class X86InterleavedAccessGroup {
/// \brief Reference to the wide-load instruction of an interleaved access
/// group.
@@ -95,6 +95,7 @@ public:
/// instructions/intrinsics.
bool lowerIntoOptimizedSequence();
};
+} // end anonymous namespace
bool X86InterleavedAccessGroup::isSupported() const {
VectorType *ShuffleVecTy = Shuffles[0]->getType();
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 63a02af02faa..2a40399ba571 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -36,7 +36,7 @@ enum IntrinsicType : uint16_t {
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
- FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
+ FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK, GATHER_AVX2, MASK_BINOP,
};
struct IntrinsicData {
@@ -67,6 +67,23 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d, GATHER_AVX2, X86::VPGATHERDDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_d_256, GATHER_AVX2, X86::VPGATHERDDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd, GATHER_AVX2, X86::VGATHERDPDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_pd_256, GATHER_AVX2, X86::VGATHERDPDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps, GATHER_AVX2, X86::VGATHERDPSrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_ps_256, GATHER_AVX2, X86::VGATHERDPSYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q, GATHER_AVX2, X86::VPGATHERDQrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_d_q_256, GATHER_AVX2, X86::VPGATHERDQYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d, GATHER_AVX2, X86::VPGATHERQDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_d_256, GATHER_AVX2, X86::VPGATHERQDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd, GATHER_AVX2, X86::VGATHERQPDrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_pd_256, GATHER_AVX2, X86::VGATHERQPDYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps, GATHER_AVX2, X86::VGATHERQPSrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_ps_256, GATHER_AVX2, X86::VGATHERQPSYrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q, GATHER_AVX2, X86::VPGATHERQQrm, 0),
+ X86_INTRINSIC_DATA(avx2_gather_q_q_256, GATHER_AVX2, X86::VPGATHERQQYrm, 0),
+
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
@@ -325,6 +342,8 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) {
* the alphabetical order.
*/
static const IntrinsicData IntrinsicsWithoutChain[] = {
+ X86_INTRINSIC_DATA(avx_cmp_pd_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
+ X86_INTRINSIC_DATA(avx_cmp_ps_256, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
@@ -351,9 +370,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx_vpermilvar_pd_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
X86_INTRINSIC_DATA(avx_vpermilvar_ps_256, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
- X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_b, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_d, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx2_pabs_w, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx2_packssdw, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -421,18 +440,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2b_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2b_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2b_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2d_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2d_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2d_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2q_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2q_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2q_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2w_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2w_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
- X86_INTRINSIC_DATA(avx512_cvtmask2w_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
@@ -455,18 +462,20 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
+ X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0),
+ X86_INTRINSIC_DATA(avx512_kor_w, MASK_BINOP, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
-
+ X86_INTRINSIC_DATA(avx512_kxor_w, MASK_BINOP, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FADD_RND, 0),
+ X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FADD_RND, 0),
+ X86ISD::FADDS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, BRCST32x2_TO_VEC,
X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, BRCST32x2_TO_VEC,
@@ -720,9 +729,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FDIV_RND, 0),
+ X86ISD::FDIVS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FDIV_RND, 0),
+ X86ISD::FDIVS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
@@ -795,74 +804,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VGETMANTS, 0),
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
X86ISD::VGETMANTS, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
- ISD::CTLZ, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
X86ISD::FMAX_RND),
- X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMAX_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMAX_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMAXS, X86ISD::FMAXS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMAXS, X86ISD::FMAXS_RND),
X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
X86ISD::FMIN_RND),
- X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMIN_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMIN_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMINS, X86ISD::FMINS_RND),
+ X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMINS, X86ISD::FMINS_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMUL_RND, 0),
+ X86ISD::FMULS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FMUL_RND, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packssdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packssdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packssdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packsswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packsswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packsswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKSS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packusdw_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packusdw_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packusdw_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packuswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packuswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_mask_packuswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
+ X86ISD::FMULS_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_d_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_q_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_128, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_256, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pabs_w_512, INTR_TYPE_1OP_MASK, ISD::ABS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
@@ -1191,9 +1168,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
X86ISD::FSUB_RND),
X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSUB_RND, 0),
+ X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
- X86ISD::FSUB_RND, 0),
+ X86ISD::FSUBS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256, CMP_MASK_CC, X86ISD::CMPMU, 0),
X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512, CMP_MASK_CC, X86ISD::CMPMU, 0),
@@ -1486,6 +1463,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ,
X86ISD::VPMADD52L, 0),
+ X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
+ X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+ X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_pmul_dq_512, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_pmulu_dq_512, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
@@ -1613,6 +1594,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
+ X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
@@ -1620,7 +1602,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse_min_ss, INTR_TYPE_2OP, X86ISD::FMINS, 0),
X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
@@ -1631,6 +1615,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse_ucomile_ss, COMI, X86ISD::UCOMI, ISD::SETLE),
X86_INTRINSIC_DATA(sse_ucomilt_ss, COMI, X86ISD::UCOMI, ISD::SETLT),
X86_INTRINSIC_DATA(sse_ucomineq_ss, COMI, X86ISD::UCOMI, ISD::SETNE),
+ X86_INTRINSIC_DATA(sse2_cmp_pd, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse2_comieq_sd, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_comige_sd, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_comigt_sd, COMI, X86ISD::COMI, ISD::SETGT),
@@ -1643,7 +1628,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse2_min_sd, INTR_TYPE_2OP, X86ISD::FMINS, 0),
X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
@@ -1696,9 +1683,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
- X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, X86ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_b_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_d_128, INTR_TYPE_1OP, ISD::ABS, 0),
+ X86_INTRINSIC_DATA(ssse3_pabs_w_128, INTR_TYPE_1OP, ISD::ABS, 0),
X86_INTRINSIC_DATA(ssse3_phadd_d_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
new file mode 100644
index 000000000000..c2dc762fec5e
--- /dev/null
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -0,0 +1,142 @@
+//===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "X86LegalizerInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetOpcodes.h"
+
+using namespace llvm;
+using namespace TargetOpcode;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
+ const X86TargetMachine &TM)
+ : Subtarget(STI), TM(TM) {
+
+ setLegalizerInfo32bit();
+ setLegalizerInfo64bit();
+ setLegalizerInfoSSE1();
+ setLegalizerInfoSSE2();
+
+ computeTables();
+}
+
+void X86LegalizerInfo::setLegalizerInfo32bit() {
+
+ if (Subtarget.is64Bit())
+ return;
+
+ const LLT p0 = LLT::pointer(0, 32);
+ const LLT s1 = LLT::scalar(1);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {s8, s16, s32})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s8, s16, s32, p0})
+ setAction({MemOp, Ty}, Legal);
+
+ // And everything's fine in addrspace 0.
+ setAction({MemOp, 1, p0}, Legal);
+ }
+
+ // Pointer-handling
+ setAction({G_FRAME_INDEX, p0}, Legal);
+
+ // Constants
+ for (auto Ty : {s8, s16, s32, p0})
+ setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
+
+ setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
+ setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar);
+}
+
+void X86LegalizerInfo::setLegalizerInfo64bit() {
+
+ if (!Subtarget.is64Bit())
+ return;
+
+ const LLT p0 = LLT::pointer(0, TM.getPointerSize() * 8);
+ const LLT s1 = LLT::scalar(1);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+ const LLT s64 = LLT::scalar(64);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {s8, s16, s32, s64})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE}) {
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({MemOp, Ty}, Legal);
+
+ // And everything's fine in addrspace 0.
+ setAction({MemOp, 1, p0}, Legal);
+ }
+
+ // Pointer-handling
+ setAction({G_FRAME_INDEX, p0}, Legal);
+
+ // Constants
+ for (auto Ty : {s8, s16, s32, s64, p0})
+ setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
+
+ setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar);
+}
+
+void X86LegalizerInfo::setLegalizerInfoSSE1() {
+ if (!Subtarget.hasSSE1())
+ return;
+
+ const LLT s32 = LLT::scalar(32);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
+
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (auto Ty : {s32, v4s32})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned MemOp : {G_LOAD, G_STORE})
+ for (auto Ty : {v4s32, v2s64})
+ setAction({MemOp, Ty}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoSSE2() {
+ if (!Subtarget.hasSSE2())
+ return;
+
+ const LLT s64 = LLT::scalar(64);
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v2s64 = LLT::vector(2, 64);
+
+ for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ for (auto Ty : {s64, v2s64})
+ setAction({BinOp, Ty}, Legal);
+
+ for (unsigned BinOp : {G_ADD, G_SUB})
+ for (auto Ty : {v4s32})
+ setAction({BinOp, Ty}, Legal);
+}
diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h
new file mode 100644
index 000000000000..3f00898b4232
--- /dev/null
+++ b/lib/Target/X86/X86LegalizerInfo.h
@@ -0,0 +1,43 @@
+//===- X86LegalizerInfo.h ------------------------------------------*- C++
+//-*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86MACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_X86_X86MACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class X86Subtarget;
+class X86TargetMachine;
+
+/// This class provides the information for the target register banks.
+class X86LegalizerInfo : public LegalizerInfo {
+private:
+ /// Keep a reference to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget &Subtarget;
+ const X86TargetMachine &TM;
+
+public:
+ X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM);
+
+private:
+ void setLegalizerInfo32bit();
+ void setLegalizerInfo64bit();
+ void setLegalizerInfoSSE1();
+ void setLegalizerInfoSSE2();
+};
+} // namespace llvm
+#endif
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index feeb2fd5993c..550e3543a71e 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -102,7 +102,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
}
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
- OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
+ OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo);
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
}
@@ -215,6 +215,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_GOT: RefKind = MCSymbolRefExpr::VK_GOT; break;
case X86II::MO_GOTOFF: RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
case X86II::MO_PLT: RefKind = MCSymbolRefExpr::VK_PLT; break;
+ case X86II::MO_ABS8: RefKind = MCSymbolRefExpr::VK_X86_ABS8; break;
case X86II::MO_PIC_BASE_OFFSET:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
Expr = MCSymbolRefExpr::create(Sym, Ctx);
@@ -357,7 +358,7 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
const MachineOperand &MO) const {
switch (MO.getType()) {
default:
- MI->dump();
+ MI->print(errs());
llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
@@ -498,11 +499,16 @@ ReSimplify:
break;
}
- // TAILJMPd, TAILJMPd64 - Lower to the correct jump instruction.
+ // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
{ unsigned Opcode;
case X86::TAILJMPr: Opcode = X86::JMP32r; goto SetTailJmpOpcode;
case X86::TAILJMPd:
case X86::TAILJMPd64: Opcode = X86::JMP_1; goto SetTailJmpOpcode;
+ case X86::TAILJMPd_CC:
+ case X86::TAILJMPd64_CC:
+ Opcode = X86::GetCondBranchFromCond(
+ static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
+ goto SetTailJmpOpcode;
SetTailJmpOpcode:
MCOperand Saved = OutMI.getOperand(0);
@@ -888,30 +894,47 @@ void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
SM.recordStatepoint(MI);
}
-void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
- X86MCInstLower &MCIL) {
- // FAULTING_LOAD_OP <def>, <MBB handler>, <load opcode>, <load operands>
+void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
+ X86MCInstLower &MCIL) {
+ // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
+ // <opcode>, <operands>
- unsigned LoadDefRegister = MI.getOperand(0).getReg();
- MCSymbol *HandlerLabel = MI.getOperand(1).getMBB()->getSymbol();
- unsigned LoadOpcode = MI.getOperand(2).getImm();
- unsigned LoadOperandsBeginIdx = 3;
+ unsigned DefRegister = FaultingMI.getOperand(0).getReg();
+ FaultMaps::FaultKind FK =
+ static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
+ MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
+ unsigned Opcode = FaultingMI.getOperand(3).getImm();
+ unsigned OperandsBeginIdx = 4;
- FM.recordFaultingOp(FaultMaps::FaultingLoad, HandlerLabel);
+ assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
+ FM.recordFaultingOp(FK, HandlerLabel);
- MCInst LoadMI;
- LoadMI.setOpcode(LoadOpcode);
+ MCInst MI;
+ MI.setOpcode(Opcode);
- if (LoadDefRegister != X86::NoRegister)
- LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+ if (DefRegister != X86::NoRegister)
+ MI.addOperand(MCOperand::createReg(DefRegister));
- for (auto I = MI.operands_begin() + LoadOperandsBeginIdx,
- E = MI.operands_end();
+ for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
+ E = FaultingMI.operands_end();
I != E; ++I)
- if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, *I))
- LoadMI.addOperand(MaybeOperand.getValue());
+ if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
+ MI.addOperand(MaybeOperand.getValue());
+
+ OutStreamer->EmitInstruction(MI, getSubtargetInfo());
+}
- OutStreamer->EmitInstruction(LoadMI, getSubtargetInfo());
+void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
+ X86MCInstLower &MCIL) {
+ bool Is64Bits = Subtarget->is64Bit();
+ MCContext &Ctx = OutStreamer->getContext();
+ MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
+ const MCSymbolRefExpr *Op =
+ MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
+
+ EmitAndCountInstruction(
+ MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
+ .addExpr(Op));
}
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
@@ -1276,9 +1299,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TAILJMPr:
case X86::TAILJMPm:
case X86::TAILJMPd:
+ case X86::TAILJMPd_CC:
case X86::TAILJMPr64:
case X86::TAILJMPm64:
case X86::TAILJMPd64:
+ case X86::TAILJMPd64_CC:
case X86::TAILJMPr64_REX:
case X86::TAILJMPm64_REX:
// Lower these as normal, but add some comments.
@@ -1367,8 +1392,11 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::STATEPOINT:
return LowerSTATEPOINT(*MI, MCInstLowering);
- case TargetOpcode::FAULTING_LOAD_OP:
- return LowerFAULTING_LOAD_OP(*MI, MCInstLowering);
+ case TargetOpcode::FAULTING_OP:
+ return LowerFAULTING_OP(*MI, MCInstLowering);
+
+ case TargetOpcode::FENTRY_CALL:
+ return LowerFENTRY_CALL(*MI, MCInstLowering);
case TargetOpcode::PATCHABLE_OP:
return LowerPATCHABLE_OP(*MI, MCInstLowering);
@@ -1501,7 +1529,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 64> Mask;
DecodePSHUFBMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1572,15 +1601,16 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
case X86::VPERMIL2PDrm:
case X86::VPERMIL2PSrm:
- case X86::VPERMIL2PDrmY:
- case X86::VPERMIL2PSrmY: {
+ case X86::VPERMIL2PDYrm:
+ case X86::VPERMIL2PSYrm: {
if (!OutStreamer->isVerboseAsm())
break;
assert(MI->getNumOperands() >= 8 &&
@@ -1593,8 +1623,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
unsigned ElSize;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::VPERMIL2PSrm: case X86::VPERMIL2PSrmY: ElSize = 32; break;
- case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
+ case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
+ case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
}
const MachineOperand &MaskOp = MI->getOperand(6);
@@ -1602,7 +1632,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1618,7 +1649,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPPERMMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1678,7 +1710,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
CS << "?";
}
CS << "]";
- OutStreamer->AddComment(CS.str());
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
@@ -1710,7 +1742,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << ">";
- OutStreamer->AddComment(CS.str());
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
}
}
break;
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
index c9e636f1eb00..3fcb642424ad 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -9,6 +9,7 @@
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -20,11 +21,8 @@ void X86MachineFunctionInfo::setRestoreBasePointer(const MachineFunction *MF) {
const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
- for (const MCPhysReg *CSR =
- RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF);
- unsigned Reg = *CSR;
- ++CSR)
- {
+ for (const MCPhysReg *CSR = MF->getRegInfo().getCalleeSavedRegs();
+ unsigned Reg = *CSR; ++CSR) {
if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
RestoreBasePointerOffset -= SlotSize;
}
diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp
new file mode 100644
index 000000000000..dd21e2b7c4a1
--- /dev/null
+++ b/lib/Target/X86/X86MacroFusion.cpp
@@ -0,0 +1,271 @@
+//===- X86MacroFusion.cpp - X86 Macro Fusion ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// \file This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the DAG scheduling mutation to
+// pair instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MacroFusion.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+STATISTIC(NumFused, "Number of instr pairs fused");
+
+using namespace llvm;
+
+static cl::opt<bool> EnableMacroFusion("x86-misched-fusion", cl::Hidden,
+ cl::desc("Enable scheduling for macro fusion."), cl::init(true));
+
+namespace {
+
+/// \brief Verify that the instruction pair, First and Second,
+/// should be scheduled back to back. If either instruction is unspecified,
+/// then verify that the other instruction may be part of a pair at all.
+static bool shouldScheduleAdjacent(const X86Subtarget &ST,
+ const MachineInstr *First,
+ const MachineInstr *Second) {
+ // Check if this processor supports macro-fusion. Since this is a minor
+ // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
+ // proxy for SandyBridge+.
+ if (!ST.hasAVX())
+ return false;
+
+ enum {
+ FuseTest,
+ FuseCmp,
+ FuseInc
+ } FuseKind;
+
+ assert((First || Second) && "At least one instr must be specified");
+ unsigned FirstOpcode = First
+ ? First->getOpcode()
+ : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = Second
+ ? Second->getOpcode()
+ : static_cast<unsigned>(X86::INSTRUCTION_LIST_END);
+
+ switch (SecondOpcode) {
+ default:
+ return false;
+ case X86::JE_1:
+ case X86::JNE_1:
+ case X86::JL_1:
+ case X86::JLE_1:
+ case X86::JG_1:
+ case X86::JGE_1:
+ FuseKind = FuseInc;
+ break;
+ case X86::JB_1:
+ case X86::JBE_1:
+ case X86::JA_1:
+ case X86::JAE_1:
+ FuseKind = FuseCmp;
+ break;
+ case X86::JS_1:
+ case X86::JNS_1:
+ case X86::JP_1:
+ case X86::JNP_1:
+ case X86::JO_1:
+ case X86::JNO_1:
+ FuseKind = FuseTest;
+ break;
+ }
+
+ switch (FirstOpcode) {
+ default:
+ return false;
+ case X86::TEST8rr:
+ case X86::TEST16rr:
+ case X86::TEST32rr:
+ case X86::TEST64rr:
+ case X86::TEST8ri:
+ case X86::TEST16ri:
+ case X86::TEST32ri:
+ case X86::TEST32i32:
+ case X86::TEST64i32:
+ case X86::TEST64ri32:
+ case X86::TEST8rm:
+ case X86::TEST16rm:
+ case X86::TEST32rm:
+ case X86::TEST64rm:
+ case X86::TEST8ri_NOREX:
+ case X86::AND16i16:
+ case X86::AND16ri:
+ case X86::AND16ri8:
+ case X86::AND16rm:
+ case X86::AND16rr:
+ case X86::AND32i32:
+ case X86::AND32ri:
+ case X86::AND32ri8:
+ case X86::AND32rm:
+ case X86::AND32rr:
+ case X86::AND64i32:
+ case X86::AND64ri32:
+ case X86::AND64ri8:
+ case X86::AND64rm:
+ case X86::AND64rr:
+ case X86::AND8i8:
+ case X86::AND8ri:
+ case X86::AND8rm:
+ case X86::AND8rr:
+ return true;
+ case X86::CMP16i16:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP16rm:
+ case X86::CMP16rr:
+ case X86::CMP32i32:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP32rm:
+ case X86::CMP32rr:
+ case X86::CMP64i32:
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP64rm:
+ case X86::CMP64rr:
+ case X86::CMP8i8:
+ case X86::CMP8ri:
+ case X86::CMP8rm:
+ case X86::CMP8rr:
+ case X86::ADD16i16:
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ case X86::ADD16ri8_DB:
+ case X86::ADD16ri_DB:
+ case X86::ADD16rm:
+ case X86::ADD16rr:
+ case X86::ADD16rr_DB:
+ case X86::ADD32i32:
+ case X86::ADD32ri:
+ case X86::ADD32ri8:
+ case X86::ADD32ri8_DB:
+ case X86::ADD32ri_DB:
+ case X86::ADD32rm:
+ case X86::ADD32rr:
+ case X86::ADD32rr_DB:
+ case X86::ADD64i32:
+ case X86::ADD64ri32:
+ case X86::ADD64ri32_DB:
+ case X86::ADD64ri8:
+ case X86::ADD64ri8_DB:
+ case X86::ADD64rm:
+ case X86::ADD64rr:
+ case X86::ADD64rr_DB:
+ case X86::ADD8i8:
+ case X86::ADD8mi:
+ case X86::ADD8mr:
+ case X86::ADD8ri:
+ case X86::ADD8rm:
+ case X86::ADD8rr:
+ case X86::SUB16i16:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB16rm:
+ case X86::SUB16rr:
+ case X86::SUB32i32:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB32rm:
+ case X86::SUB32rr:
+ case X86::SUB64i32:
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB64rm:
+ case X86::SUB64rr:
+ case X86::SUB8i8:
+ case X86::SUB8ri:
+ case X86::SUB8rm:
+ case X86::SUB8rr:
+ return FuseKind == FuseCmp || FuseKind == FuseInc;
+ case X86::INC16r:
+ case X86::INC32r:
+ case X86::INC64r:
+ case X86::INC8r:
+ case X86::DEC16r:
+ case X86::DEC32r:
+ case X86::DEC64r:
+ case X86::DEC8r:
+ return FuseKind == FuseInc;
+ case X86::INSTRUCTION_LIST_END:
+ return true;
+ }
+}
+
+/// \brief Post-process the DAG to create cluster edges between instructions
+/// that may be fused by the processor into a single operation.
+class X86MacroFusion : public ScheduleDAGMutation {
+public:
+ X86MacroFusion() {}
+
+ void apply(ScheduleDAGInstrs *DAGInstrs) override;
+};
+
+void X86MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
+ const X86Subtarget &ST = DAG->MF.getSubtarget<X86Subtarget>();
+
+ // For now, assume targets can only fuse with the branch.
+ SUnit &ExitSU = DAG->ExitSU;
+ MachineInstr *Branch = ExitSU.getInstr();
+ if (!Branch || !shouldScheduleAdjacent(ST, nullptr, Branch))
+ return;
+
+ for (SDep &PredDep : ExitSU.Preds) {
+ if (PredDep.isWeak())
+ continue;
+ SUnit &SU = *PredDep.getSUnit();
+ MachineInstr &Pred = *SU.getInstr();
+ if (!shouldScheduleAdjacent(ST, &Pred, Branch))
+ continue;
+
+ // Create a single weak edge from SU to ExitSU. The only effect is to cause
+ // bottom-up scheduling to heavily prioritize the clustered SU. There is no
+ // need to copy predecessor edges from ExitSU to SU, since top-down
+ // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
+ // of SU, we could create an artificial edge from the deepest root, but it
+ // hasn't been needed yet.
+ bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
+ (void)Success;
+ assert(Success && "No DAG nodes should be reachable from ExitSU");
+
+ // Adjust latency of data deps between the nodes.
+ for (SDep &PredDep : ExitSU.Preds)
+ if (PredDep.getSUnit() == &SU)
+ PredDep.setLatency(0);
+ for (SDep &SuccDep : SU.Succs)
+ if (SuccDep.getSUnit() == &ExitSU)
+ SuccDep.setLatency(0);
+
+ ++NumFused;
+ DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
+ SU.print(dbgs(), DAG);
+ dbgs() << " - ExitSU"
+ << " / " << DAG->TII->getName(Pred.getOpcode()) << " - "
+ << DAG->TII->getName(Branch->getOpcode()) << '\n';);
+
+ break;
+ }
+}
+
+} // end namespace
+
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createX86MacroFusionDAGMutation () {
+ return EnableMacroFusion ? make_unique<X86MacroFusion>() : nullptr;
+}
+
+} // end namespace llvm
diff --git a/lib/Target/X86/X86MacroFusion.h b/lib/Target/X86/X86MacroFusion.h
new file mode 100644
index 000000000000..e630f802e8e6
--- /dev/null
+++ b/lib/Target/X86/X86MacroFusion.h
@@ -0,0 +1,30 @@
+//===- X86MacroFusion.h - X86 Macro Fusion --------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// \file This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 definition of the DAG scheduling mutation to pair
+// instructions back to back.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+
+//===----------------------------------------------------------------------===//
+// X86MacroFusion - DAG post-processing to encourage fusion of macro ops.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// Note that you have to add:
+/// DAG.addMutation(createX86MacroFusionDAGMutation());
+/// to X86PassConfig::createMachineScheduler() to have an effect.
+std::unique_ptr<ScheduleDAGMutation>
+createX86MacroFusionDAGMutation();
+
+} // end namespace llvm
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index e1447006cd18..debb192732e5 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -389,9 +389,6 @@ bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
assert(isLEA(First) && isLEA(Last) &&
"The function works only with LEA instructions");
- // Get new address displacement.
- AddrDispShift = getAddrDispShift(Last, 1, First, 1);
-
// Make sure that LEA def registers belong to the same class. There may be
// instructions (like MOV8mr_NOREX) which allow a limited set of registers to
// be used as their operands, so we must be sure that replacing one LEA
@@ -400,10 +397,13 @@ bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
MRI->getRegClass(Last.getOperand(0).getReg()))
return false;
+ // Get new address displacement.
+ AddrDispShift = getAddrDispShift(Last, 1, First, 1);
+
// Loop over all uses of the Last LEA to check that its def register is
// used only as address base for memory accesses. If so, it can be
// replaced, otherwise - no.
- for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) {
+ for (auto &MO : MRI->use_nodbg_operands(Last.getOperand(0).getReg())) {
MachineInstr &MI = *MO.getParent();
// Get the number of the first memory operand.
@@ -563,8 +563,9 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
// Loop over all uses of the Last LEA and update their operands. Note
// that the correctness of this has already been checked in the
// isReplaceable function.
- for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()),
- UE = MRI->use_end();
+ unsigned LastVReg = Last.getOperand(0).getReg();
+ for (auto UI = MRI->use_nodbg_begin(LastVReg),
+ UE = MRI->use_nodbg_end();
UI != UE;) {
MachineOperand &MO = *UI++;
MachineInstr &MI = *MO.getParent();
@@ -586,6 +587,9 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
Op.setOffset(Op.getOffset() + AddrDispShift);
}
+ // Mark debug values referring to Last LEA as undefined.
+ MRI->markUsesInDebugValueAsUndef(LastVReg);
+
// Since we can possibly extend register lifetime, clear kill flags.
MRI->clearKillFlags(First.getOperand(0).getReg());
@@ -594,7 +598,7 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
// By this moment, all of the Last LEA's uses must be replaced. So we
// can freely remove it.
- assert(MRI->use_empty(Last.getOperand(0).getReg()) &&
+ assert(MRI->use_empty(LastVReg) &&
"The LEA's def register must have no uses");
Last.eraseFromParent();
diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp
new file mode 100644
index 000000000000..d395c826e6bf
--- /dev/null
+++ b/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -0,0 +1,243 @@
+//===- X86RegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "X86RegisterBankInfo.h"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "X86GenRegisterBank.inc"
+
+using namespace llvm;
+// This file will be TableGen'ed at some point.
+#define GET_TARGET_REGBANK_INFO_IMPL
+#include "X86GenRegisterBankInfo.def"
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI)
+ : X86GenRegisterBankInfo() {
+
+ // validate RegBank initialization.
+ const RegisterBank &RBGPR = getRegBank(X86::GPRRegBankID);
+ (void)RBGPR;
+ assert(&X86::GPRRegBank == &RBGPR && "Incorrect RegBanks inizalization.");
+
+ // The GPR register bank is fully defined by all the registers in
+ // GR64 + its subclasses.
+ assert(RBGPR.covers(*TRI.getRegClass(X86::GR64RegClassID)) &&
+ "Subclass not added?");
+ assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
+}
+
+const RegisterBank &X86RegisterBankInfo::getRegBankFromRegClass(
+ const TargetRegisterClass &RC) const {
+
+ if (X86::GR8RegClass.hasSubClassEq(&RC) ||
+ X86::GR16RegClass.hasSubClassEq(&RC) ||
+ X86::GR32RegClass.hasSubClassEq(&RC) ||
+ X86::GR64RegClass.hasSubClassEq(&RC))
+ return getRegBank(X86::GPRRegBankID);
+
+ if (X86::FR32XRegClass.hasSubClassEq(&RC) ||
+ X86::FR64XRegClass.hasSubClassEq(&RC) ||
+ X86::VR128XRegClass.hasSubClassEq(&RC) ||
+ X86::VR256XRegClass.hasSubClassEq(&RC) ||
+ X86::VR512RegClass.hasSubClassEq(&RC))
+ return getRegBank(X86::VECRRegBankID);
+
+ llvm_unreachable("Unsupported register kind yet.");
+}
+
+X86GenRegisterBankInfo::PartialMappingIdx
+X86GenRegisterBankInfo::getPartialMappingIdx(const LLT &Ty, bool isFP) {
+ if ((Ty.isScalar() && !isFP) || Ty.isPointer()) {
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ return PMI_GPR8;
+ case 16:
+ return PMI_GPR16;
+ case 32:
+ return PMI_GPR32;
+ case 64:
+ return PMI_GPR64;
+ break;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ } else if (Ty.isScalar()) {
+ switch (Ty.getSizeInBits()) {
+ case 32:
+ return PMI_FP32;
+ case 64:
+ return PMI_FP64;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ } else {
+ switch (Ty.getSizeInBits()) {
+ case 128:
+ return PMI_VEC128;
+ case 256:
+ return PMI_VEC256;
+ case 512:
+ return PMI_VEC512;
+ default:
+ llvm_unreachable("Unsupported register size.");
+ }
+ }
+
+ return PMI_None;
+}
+
+void X86RegisterBankInfo::getInstrPartialMappingIdxs(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI, const bool isFP,
+ SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx) {
+
+ unsigned NumOperands = MI.getNumOperands();
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ auto &MO = MI.getOperand(Idx);
+ if (!MO.isReg())
+ OpRegBankIdx[Idx] = PMI_None;
+ else
+ OpRegBankIdx[Idx] = getPartialMappingIdx(MRI.getType(MO.getReg()), isFP);
+ }
+}
+
+bool X86RegisterBankInfo::getInstrValueMapping(
+ const MachineInstr &MI,
+ const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
+ SmallVectorImpl<const ValueMapping *> &OpdsMapping) {
+
+ unsigned NumOperands = MI.getNumOperands();
+ for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+ if (!MI.getOperand(Idx).isReg())
+ continue;
+
+ auto Mapping = getValueMapping(OpRegBankIdx[Idx], 1);
+ if (!Mapping->isValid())
+ return false;
+
+ OpdsMapping[Idx] = Mapping;
+ }
+ return true;
+}
+
+RegisterBankInfo::InstructionMapping
+X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ unsigned NumOperands = MI.getNumOperands();
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+
+ if (NumOperands != 3 || (Ty != MRI.getType(MI.getOperand(1).getReg())) ||
+ (Ty != MRI.getType(MI.getOperand(2).getReg())))
+ llvm_unreachable("Unsupported operand mapping yet.");
+
+ auto Mapping = getValueMapping(getPartialMappingIdx(Ty, isFP), 3);
+ return InstructionMapping{DefaultMappingID, 1, Mapping, NumOperands};
+}
+
+RegisterBankInfo::InstructionMapping
+X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ auto Opc = MI.getOpcode();
+
+ // Try the default logic for non-generic instructions that are either copies
+ // or already have some operands assigned to banks.
+ if (!isPreISelGenericOpcode(Opc)) {
+ InstructionMapping Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
+
+ switch (Opc) {
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ return getSameOperandsMapping(MI, false);
+ break;
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FDIV:
+ return getSameOperandsMapping(MI, true);
+ break;
+ default:
+ break;
+ }
+
+ unsigned NumOperands = MI.getNumOperands();
+
+ // Track the bank of each register, use NotFP mapping (all scalars in GPRs)
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ getInstrPartialMappingIdxs(MI, MRI, /* isFP */ false, OpRegBankIdx);
+
+ // Finally construct the computed mapping.
+ SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
+ if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping))
+ return InstructionMapping();
+
+ return InstructionMapping{DefaultMappingID, /* Cost */ 1,
+ getOperandsMapping(OpdsMapping), NumOperands};
+}
+
+void X86RegisterBankInfo::applyMappingImpl(
+ const OperandsMapper &OpdMapper) const {
+ return applyDefaultMapping(OpdMapper);
+}
+
+RegisterBankInfo::InstructionMappings
+X86RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
+
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE: {
+ // we going to try to map 32/64 bit to PMI_FP32/PMI_FP64
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+ if (Size != 32 && Size != 64)
+ break;
+
+ unsigned NumOperands = MI.getNumOperands();
+
+ // Track the bank of each register, use FP mapping (all scalars in VEC)
+ SmallVector<PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+ getInstrPartialMappingIdxs(MI, MRI, /* isFP */ true, OpRegBankIdx);
+
+ // Finally construct the computed mapping.
+ SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
+ if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping))
+ break;
+
+ RegisterBankInfo::InstructionMapping Mapping = InstructionMapping{
+ /*ID*/ 1, /*Cost*/ 1, getOperandsMapping(OpdsMapping), NumOperands};
+ InstructionMappings AltMappings;
+ AltMappings.emplace_back(std::move(Mapping));
+ return AltMappings;
+ }
+ default:
+ break;
+ }
+ return RegisterBankInfo::getInstrAlternativeMappings(MI);
+}
diff --git a/lib/Target/X86/X86RegisterBankInfo.h b/lib/Target/X86/X86RegisterBankInfo.h
new file mode 100644
index 000000000000..a1e01a9ab949
--- /dev/null
+++ b/lib/Target/X86/X86RegisterBankInfo.h
@@ -0,0 +1,81 @@
+//===- X86RegisterBankInfo ---------------------------------------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for X86.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_X86_X86REGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+
+#define GET_REGBANK_DECLARATIONS
+#include "X86GenRegisterBank.inc"
+
+namespace llvm {
+
+class LLT;
+
+class X86GenRegisterBankInfo : public RegisterBankInfo {
+protected:
+#define GET_TARGET_REGBANK_CLASS
+#include "X86GenRegisterBank.inc"
+#define GET_TARGET_REGBANK_INFO_CLASS
+#include "X86GenRegisterBankInfo.def"
+
+ static RegisterBankInfo::PartialMapping PartMappings[];
+ static RegisterBankInfo::ValueMapping ValMappings[];
+
+ static PartialMappingIdx getPartialMappingIdx(const LLT &Ty, bool isFP);
+ static const RegisterBankInfo::ValueMapping *
+ getValueMapping(PartialMappingIdx Idx, unsigned NumOperands);
+};
+
+class TargetRegisterInfo;
+
+/// This class provides the information for the target register banks.
+class X86RegisterBankInfo final : public X86GenRegisterBankInfo {
+private:
+ /// Get an instruction mapping.
+ /// \return An InstructionMappings with a statically allocated
+ /// OperandsMapping.
+ static InstructionMapping getSameOperandsMapping(const MachineInstr &MI,
+ bool isFP);
+
+ /// Track the bank of each instruction operand(register)
+ static void
+ getInstrPartialMappingIdxs(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI, const bool isFP,
+ SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx);
+
+ /// Construct the instruction ValueMapping from PartialMappingIdxs
+ /// \return true if mapping succeeded.
+ static bool
+ getInstrValueMapping(const MachineInstr &MI,
+ const SmallVectorImpl<PartialMappingIdx> &OpRegBankIdx,
+ SmallVectorImpl<const ValueMapping *> &OpdsMapping);
+
+public:
+ X86RegisterBankInfo(const TargetRegisterInfo &TRI);
+
+ const RegisterBank &
+ getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+
+ InstructionMappings
+ getInstrAlternativeMappings(const MachineInstr &MI) const override;
+
+ /// See RegisterBankInfo::applyMapping.
+ void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
+
+ InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+};
+
+} // namespace llvm
+#endif
diff --git a/lib/Target/X86/X86RegisterBanks.td b/lib/Target/X86/X86RegisterBanks.td
new file mode 100644
index 000000000000..6d17cd53a0c1
--- /dev/null
+++ b/lib/Target/X86/X86RegisterBanks.td
@@ -0,0 +1,17 @@
+//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+/// General Purpose Registers: RAX, RCX,...
+def GPRRegBank : RegisterBank<"GPR", [GR64]>;
+
+/// Floating Point/Vector Registers
+def VECRRegBank : RegisterBank<"VECR", [VR512]>;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 65f438f94b04..9bab9a4cf3ba 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -80,7 +80,7 @@ X86RegisterInfo::X86RegisterInfo(const Triple &TT)
bool
X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
- // ExeDepsFixer and PostRAScheduler require liveness.
+ // ExecutionDepsFixer and PostRAScheduler require liveness.
return true;
}
@@ -337,7 +337,9 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_AllRegs_AVX512_SaveList;
if (HasAVX)
return CSR_64_AllRegs_AVX_SaveList;
- return CSR_64_AllRegs_SaveList;
+ if (HasSSE)
+ return CSR_64_AllRegs_SaveList;
+ return CSR_64_AllRegs_NoSSE_SaveList;
} else {
if (HasAVX512)
return CSR_32_AllRegs_AVX512_SaveList;
@@ -447,7 +449,9 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_AllRegs_AVX512_RegMask;
if (HasAVX)
return CSR_64_AllRegs_AVX_RegMask;
- return CSR_64_AllRegs_RegMask;
+ if (HasSSE)
+ return CSR_64_AllRegs_RegMask;
+ return CSR_64_AllRegs_NoSSE_RegMask;
} else {
if (HasAVX512)
return CSR_32_AllRegs_AVX512_RegMask;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 372a15aff15a..c177ba1d52f7 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -189,22 +189,22 @@ def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;
def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;
def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;
-def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[60, -2, -2]>;
-def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[61, -2, -2]>;
-def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[62, -2, -2]>;
-def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[63, -2, -2]>;
-def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[64, -2, -2]>;
-def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[65, -2, -2]>;
-def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[66, -2, -2]>;
-def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[67, -2, -2]>;
-def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[68, -2, -2]>;
-def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[69, -2, -2]>;
-def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[70, -2, -2]>;
-def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[71, -2, -2]>;
-def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[72, -2, -2]>;
-def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[73, -2, -2]>;
-def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[74, -2, -2]>;
-def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[75, -2, -2]>;
+def XMM16: X86Reg<"xmm16", 16>, DwarfRegNum<[67, -2, -2]>;
+def XMM17: X86Reg<"xmm17", 17>, DwarfRegNum<[68, -2, -2]>;
+def XMM18: X86Reg<"xmm18", 18>, DwarfRegNum<[69, -2, -2]>;
+def XMM19: X86Reg<"xmm19", 19>, DwarfRegNum<[70, -2, -2]>;
+def XMM20: X86Reg<"xmm20", 20>, DwarfRegNum<[71, -2, -2]>;
+def XMM21: X86Reg<"xmm21", 21>, DwarfRegNum<[72, -2, -2]>;
+def XMM22: X86Reg<"xmm22", 22>, DwarfRegNum<[73, -2, -2]>;
+def XMM23: X86Reg<"xmm23", 23>, DwarfRegNum<[74, -2, -2]>;
+def XMM24: X86Reg<"xmm24", 24>, DwarfRegNum<[75, -2, -2]>;
+def XMM25: X86Reg<"xmm25", 25>, DwarfRegNum<[76, -2, -2]>;
+def XMM26: X86Reg<"xmm26", 26>, DwarfRegNum<[77, -2, -2]>;
+def XMM27: X86Reg<"xmm27", 27>, DwarfRegNum<[78, -2, -2]>;
+def XMM28: X86Reg<"xmm28", 28>, DwarfRegNum<[79, -2, -2]>;
+def XMM29: X86Reg<"xmm29", 29>, DwarfRegNum<[80, -2, -2]>;
+def XMM30: X86Reg<"xmm30", 30>, DwarfRegNum<[81, -2, -2]>;
+def XMM31: X86Reg<"xmm31", 31>, DwarfRegNum<[82, -2, -2]>;
} // CostPerUse
@@ -437,8 +437,10 @@ def LOW32_ADDR_ACCESS : RegisterClass<"X86", [i32], 32, (add GR32, RIP)>;
def LOW32_ADDR_ACCESS_RBP : RegisterClass<"X86", [i32], 32,
(add LOW32_ADDR_ACCESS, RBP)>;
-// A class to support the 'A' assembler constraint: EAX then EDX.
+// A class to support the 'A' assembler constraint: [ER]AX then [ER]DX.
+def GR16_AD : RegisterClass<"X86", [i16], 16, (add AX, DX)>;
def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)>;
+def GR64_AD : RegisterClass<"X86", [i64], 64, (add RAX, RDX)>;
// Scalar SSE2 floating point registers.
def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index 35257f89100c..7f7efd7cad3f 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -366,6 +366,7 @@ def IIC_SSE_MWAIT : InstrItinClass;
def IIC_SSE_MONITOR : InstrItinClass;
def IIC_SSE_MWAITX : InstrItinClass;
def IIC_SSE_MONITORX : InstrItinClass;
+def IIC_SSE_CLZERO : InstrItinClass;
def IIC_SSE_PREFETCH : InstrItinClass;
def IIC_SSE_PAUSE : InstrItinClass;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index f031a281e5dd..9da8a18965ea 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -85,10 +85,12 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args))
- .setDiscardResult();
+ CLI.setDebugLoc(dl)
+ .setChain(Chain)
+ .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(bzeroEntry, IntPtr),
+ std::move(Args))
+ .setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 11115524c810..2cebb76022ef 100644
--- a/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -14,7 +14,7 @@
#include "X86ShuffleDecodeConstantPool.h"
#include "Utils/X86ShuffleDecode.h"
-#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/Constants.h"
@@ -25,7 +25,7 @@
namespace llvm {
static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
- SmallBitVector &UndefElts,
+ APInt &UndefElts,
SmallVectorImpl<uint64_t> &RawMask) {
// It is not an error for shuffle masks to not be a vector of
// MaskEltSizeInBits because the constant pool uniques constants by their
@@ -49,6 +49,33 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
unsigned NumCstElts = CstTy->getVectorNumElements();
+ assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
+ "Unaligned shuffle mask size");
+
+ unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
+ UndefElts = APInt(NumMaskElts, 0);
+ RawMask.resize(NumMaskElts, 0);
+
+ // Fast path - if the constants match the mask size then copy direct.
+ if (MaskEltSizeInBits == CstEltSizeInBits) {
+ assert(NumCstElts == NumMaskElts && "Unaligned shuffle mask size");
+ for (unsigned i = 0; i != NumMaskElts; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+ return false;
+
+ if (isa<UndefValue>(COp)) {
+ UndefElts.setBit(i);
+ RawMask[i] = 0;
+ continue;
+ }
+
+ auto *Elt = cast<ConstantInt>(COp);
+ RawMask[i] = Elt->getValue().getZExtValue();
+ }
+ return true;
+ }
+
// Extract all the undef/constant element data and pack into single bitsets.
APInt UndefBits(CstSizeInBits, 0);
APInt MaskBits(CstSizeInBits, 0);
@@ -57,39 +84,30 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
return false;
+ unsigned BitOffset = i * CstEltSizeInBits;
+
if (isa<UndefValue>(COp)) {
- APInt EltUndef = APInt::getLowBitsSet(CstSizeInBits, CstEltSizeInBits);
- UndefBits |= EltUndef.shl(i * CstEltSizeInBits);
+ UndefBits.setBits(BitOffset, BitOffset + CstEltSizeInBits);
continue;
}
- APInt EltBits = cast<ConstantInt>(COp)->getValue();
- EltBits = EltBits.zextOrTrunc(CstSizeInBits);
- MaskBits |= EltBits.shl(i * CstEltSizeInBits);
+ MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
}
// Now extract the undef/constant bit data into the raw shuffle masks.
- assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
- "Unaligned shuffle mask size");
-
- unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
- UndefElts = SmallBitVector(NumMaskElts, false);
- RawMask.resize(NumMaskElts, 0);
-
for (unsigned i = 0; i != NumMaskElts; ++i) {
- APInt EltUndef = UndefBits.lshr(i * MaskEltSizeInBits);
- EltUndef = EltUndef.zextOrTrunc(MaskEltSizeInBits);
+ unsigned BitOffset = i * MaskEltSizeInBits;
+ APInt EltUndef = UndefBits.extractBits(MaskEltSizeInBits, BitOffset);
// Only treat the element as UNDEF if all bits are UNDEF, otherwise
// treat it as zero.
if (EltUndef.isAllOnesValue()) {
- UndefElts[i] = true;
+ UndefElts.setBit(i);
RawMask[i] = 0;
continue;
}
- APInt EltBits = MaskBits.lshr(i * MaskEltSizeInBits);
- EltBits = EltBits.zextOrTrunc(MaskEltSizeInBits);
+ APInt EltBits = MaskBits.extractBits(MaskEltSizeInBits, BitOffset);
RawMask[i] = EltBits.getZExtValue();
}
@@ -104,8 +122,8 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 32> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
@@ -145,8 +163,8 @@ void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 8> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 16> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
@@ -180,7 +198,7 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
+ APInt UndefElts;
SmallVector<uint64_t, 8> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
@@ -231,8 +249,8 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
"Unexpected vector size.");
// The shuffle mask requires a byte vector.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 32> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 16> RawMask;
if (!extractConstantMask(C, 8, UndefElts, RawMask))
return;
@@ -286,8 +304,8 @@ void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
"Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 8> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
@@ -314,8 +332,8 @@ void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
"Unexpected vector element size.");
// The shuffle mask requires elements the same size as the target.
- SmallBitVector UndefElts;
- SmallVector<uint64_t, 8> RawMask;
+ APInt UndefElts;
+ SmallVector<uint64_t, 64> RawMask;
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
return;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 586bb7bd7b1a..92a68759195c 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -11,19 +11,23 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86Subtarget.h"
-#include "X86InstrInfo.h"
#include "X86TargetMachine.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <string>
#if defined(_MSC_VER)
#include <intrin.h>
@@ -93,8 +97,17 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
return X86II::MO_NO_FLAG;
// Absolute symbols can be referenced directly.
- if (GV && GV->isAbsoluteSymbolRef())
- return X86II::MO_NO_FLAG;
+ if (GV) {
+ if (Optional<ConstantRange> CR = GV->getAbsoluteSymbolRange()) {
+ // See if we can use the 8-bit immediate form. Note that some instructions
+ // will sign extend the immediate operand, so to be conservative we only
+ // accept the range [0,128).
+ if (CR->getUnsignedMax().ult(128))
+ return X86II::MO_ABS8;
+ else
+ return X86II::MO_NO_FLAG;
+ }
+ }
if (TM.shouldAssumeDSOLocal(M, GV))
return classifyLocalReference(GV);
@@ -195,7 +208,6 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+sahf";
}
-
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
@@ -263,7 +275,6 @@ void X86Subtarget::initializeEnvironment() {
HasVBMI = false;
HasIFMA = false;
HasRTM = false;
- HasHLE = false;
HasERI = false;
HasCDI = false;
HasPFI = false;
@@ -277,6 +288,7 @@ void X86Subtarget::initializeEnvironment() {
HasRDSEED = false;
HasLAHFSAHF = false;
HasMWAITX = false;
+ HasCLZERO = false;
HasMPX = false;
IsBTMemSlow = false;
IsPMULLDSlow = false;
@@ -286,10 +298,11 @@ void X86Subtarget::initializeEnvironment() {
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
- HasFastPartialYMMWrite = false;
+ HasFastPartialYMMorZMMWrite = false;
HasFastScalarFSQRT = false;
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
+ HasFastSHLDRotate = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
@@ -321,7 +334,7 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (!isPositionIndependent())
@@ -359,4 +372,3 @@ const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
}
-
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d80dc4a9b5e8..d0d88d326949 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -18,33 +18,36 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
+#include <memory>
#define GET_SUBTARGETINFO_HEADER
#include "X86GenSubtargetInfo.inc"
namespace llvm {
+
class GlobalValue;
-class StringRef;
-class TargetMachine;
/// The X86 backend supports a number of different styles of PIC.
///
namespace PICStyles {
+
enum Style {
StubPIC, // Used on i386-darwin in pic mode.
GOT, // Used on 32 bit elf on when in pic mode.
RIPRel, // Used on X86-64 when in pic mode.
None // Set when not in pic mode.
};
-}
-class X86Subtarget final : public X86GenSubtargetInfo {
+} // end namespace PICStyles
+class X86Subtarget final : public X86GenSubtargetInfo {
protected:
enum X86SSEEnum {
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
@@ -96,10 +99,13 @@ protected:
/// Target has XSAVE instructions
bool HasXSAVE;
+
/// Target has XSAVEOPT instructions
bool HasXSAVEOPT;
+
/// Target has XSAVEC instructions
bool HasXSAVEC;
+
/// Target has XSAVES instructions
bool HasXSAVES;
@@ -148,9 +154,6 @@ protected:
/// Processor has RTM instructions.
bool HasRTM;
- /// Processor has HLE.
- bool HasHLE;
-
/// Processor has ADX instructions.
bool HasADX;
@@ -169,6 +172,9 @@ protected:
/// Processor has MONITORX/MWAITX instructions.
bool HasMWAITX;
+ /// Processor has Cache Line Zero instruction
+ bool HasCLZERO;
+
/// Processor has Prefetch with intent to Write instruction
bool HasPFPREFETCHWT1;
@@ -201,8 +207,8 @@ protected:
bool UseLeaForSP;
/// True if there is no performance penalty to writing only the lower parts
- /// of a YMM register without clearing the upper part.
- bool HasFastPartialYMMWrite;
+ /// of a YMM or ZMM register without clearing the upper part.
+ bool HasFastPartialYMMorZMMWrite;
/// True if hardware SQRTSS instruction is at least as fast (latency) as
/// RSQRTSS followed by a Newton-Raphson iteration.
@@ -223,6 +229,9 @@ protected:
/// True if LZCNT instruction is fast.
bool HasFastLZCNT;
+ /// True if SHLD based rotate is fast.
+ bool HasFastSHLDRotate;
+
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
@@ -265,24 +274,12 @@ protected:
/// Processor supports MPX - Memory Protection Extensions
bool HasMPX;
- /// Processor supports Invalidate Process-Context Identifier
- bool HasInvPCId;
-
- /// Processor has VM Functions
- bool HasVMFUNC;
-
- /// Processor has Supervisor Mode Access Protection
- bool HasSMAP;
-
/// Processor has Software Guard Extensions
bool HasSGX;
/// Processor supports Flush Cache Line instruction
bool HasCLFLUSHOPT;
- /// Processor has Persistent Commit feature
- bool HasPCOMMIT;
-
/// Processor supports Cache Line Write Back instruction
bool HasCLWB;
@@ -307,8 +304,8 @@ protected:
/// This is used to avoid ifndefs spreading around while GISel is
/// an optional library.
std::unique_ptr<GISelAccessor> GISel;
-private:
+private:
/// Override the stack alignment.
unsigned StackAlignOverride;
@@ -341,13 +338,17 @@ public:
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
+
const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
const X86FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
+
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
+
const X86RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
@@ -370,12 +371,14 @@ public:
const InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
+
private:
/// Initialize the full set of dependencies so we can use an initializer
/// list for X86Subtarget.
X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
void initSubtargetFeatures(StringRef CPU, StringRef FS);
+
public:
/// Is this x86_64? (disregarding specific ABI / programming model)
bool is64Bit() const {
@@ -432,9 +435,9 @@ public:
bool hasPCLMUL() const { return HasPCLMUL; }
// Prefer FMA4 to FMA - its better for commutation/memory folding and
// has equal or better performance on all supported targets.
- bool hasFMA() const { return HasFMA && !HasFMA4; }
+ bool hasFMA() const { return (HasFMA || hasAVX512()) && !HasFMA4; }
bool hasFMA4() const { return HasFMA4; }
- bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
+ bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
bool hasXOP() const { return HasXOP; }
bool hasTBM() const { return HasTBM; }
bool hasMOVBE() const { return HasMOVBE; }
@@ -447,13 +450,13 @@ public:
bool hasVBMI() const { return HasVBMI; }
bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; }
- bool hasHLE() const { return HasHLE; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool hasMWAITX() const { return HasMWAITX; }
+ bool hasCLZERO() const { return HasCLZERO; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isPMULLDSlow() const { return IsPMULLDSlow; }
@@ -462,10 +465,13 @@ public:
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
- bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; }
+ bool hasFastPartialYMMorZMMWrite() const {
+ return HasFastPartialYMMorZMMWrite;
+ }
bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
+ bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
@@ -481,8 +487,9 @@ public:
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
bool hasMPX() const { return HasMPX; }
+ bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
- virtual bool isXRaySupported() const override { return is64Bit(); }
+ bool isXRaySupported() const override { return is64Bit(); }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
@@ -513,6 +520,7 @@ public:
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
+ bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetWindowsMSVC() const {
return TargetTriple.isWindowsMSVCEnvironment();
@@ -616,6 +624,9 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
+ // TODO: Update the regression tests and return true.
+ bool supportPrintSchedInfo() const override { return false; }
+
bool enableEarlyIfConversion() const override;
/// Return the instruction itineraries based on the subtarget selection.
@@ -628,6 +639,6 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index aa5cfc64e9eb..03a1958121ab 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -11,22 +11,47 @@
//
//===----------------------------------------------------------------------===//
-#include "X86TargetMachine.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
#include "X86.h"
#include "X86CallLowering.h"
+#include "X86LegalizerInfo.h"
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+#include "X86RegisterBankInfo.h"
+#endif
+#include "X86MacroFusion.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "X86TargetTransformInfo.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ExecutionDepsFix.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include <memory>
+#include <string>
+
using namespace llvm;
static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
@@ -34,8 +59,11 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
cl::init(true), cl::Hidden);
namespace llvm {
+
void initializeWinEHStatePassPass(PassRegistry &);
-}
+void initializeX86ExecutionDepsFixPass(PassRegistry &);
+
+} // end namespace llvm
extern "C" void LLVMInitializeX86Target() {
// Register the target.
@@ -47,27 +75,28 @@ extern "C" void LLVMInitializeX86Target() {
initializeWinEHStatePassPass(PR);
initializeFixupBWInstPassPass(PR);
initializeEvexToVexInstPassPass(PR);
+ initializeX86ExecutionDepsFixPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
if (TT.isOSBinFormatMachO()) {
if (TT.getArch() == Triple::x86_64)
- return make_unique<X86_64MachoTargetObjectFile>();
- return make_unique<TargetLoweringObjectFileMachO>();
+ return llvm::make_unique<X86_64MachoTargetObjectFile>();
+ return llvm::make_unique<TargetLoweringObjectFileMachO>();
}
if (TT.isOSFreeBSD())
- return make_unique<X86FreeBSDTargetObjectFile>();
+ return llvm::make_unique<X86FreeBSDTargetObjectFile>();
if (TT.isOSLinux() || TT.isOSNaCl())
- return make_unique<X86LinuxNaClTargetObjectFile>();
+ return llvm::make_unique<X86LinuxNaClTargetObjectFile>();
if (TT.isOSFuchsia())
- return make_unique<X86FuchsiaTargetObjectFile>();
+ return llvm::make_unique<X86FuchsiaTargetObjectFile>();
if (TT.isOSBinFormatELF())
- return make_unique<X86ELFTargetObjectFile>();
+ return llvm::make_unique<X86ELFTargetObjectFile>();
if (TT.isKnownWindowsMSVCEnvironment() || TT.isWindowsCoreCLREnvironment())
- return make_unique<X86WindowsTargetObjectFile>();
+ return llvm::make_unique<X86WindowsTargetObjectFile>();
if (TT.isOSBinFormatCOFF())
- return make_unique<TargetLoweringObjectFileCOFF>();
+ return llvm::make_unique<TargetLoweringObjectFileCOFF>();
llvm_unreachable("unknown subtarget type");
}
@@ -177,31 +206,37 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
-X86TargetMachine::~X86TargetMachine() {}
+X86TargetMachine::~X86TargetMachine() = default;
#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {
+
struct X86GISelActualAccessor : public GISelAccessor {
- std::unique_ptr<CallLowering> CL;
- X86GISelActualAccessor(CallLowering* CL): CL(CL) {}
+ std::unique_ptr<CallLowering> CallLoweringInfo;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+
const CallLowering *getCallLowering() const override {
- return CL.get();
+ return CallLoweringInfo.get();
}
+
const InstructionSelector *getInstructionSelector() const override {
- //TODO: Implement
- return nullptr;
+ return InstSelector.get();
}
+
const LegalizerInfo *getLegalizerInfo() const override {
- //TODO: Implement
- return nullptr;
+ return Legalizer.get();
}
+
const RegisterBankInfo *getRegBankInfo() const override {
- //TODO: Implement
- return nullptr;
+ return RegBankInfo.get();
}
};
-} // End anonymous namespace.
+
+} // end anonymous namespace
#endif
+
const X86Subtarget *
X86TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -244,8 +279,14 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *GISel = new GISelAccessor();
#else
- X86GISelActualAccessor *GISel = new X86GISelActualAccessor(
- new X86CallLowering(*I->getTargetLowering()));
+ X86GISelActualAccessor *GISel = new X86GISelActualAccessor();
+
+ GISel->CallLoweringInfo.reset(new X86CallLowering(*I->getTargetLowering()));
+ GISel->Legalizer.reset(new X86LegalizerInfo(*I, *this));
+
+ auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo());
+ GISel->RegBankInfo.reset(RBI);
+ GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI));
#endif
I->setGISelAccessor(*GISel);
}
@@ -270,12 +311,12 @@ TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() {
});
}
-
//===----------------------------------------------------------------------===//
// Pass Pipeline Configuration
//===----------------------------------------------------------------------===//
namespace {
+
/// X86 Code Generator Pass Configuration Options.
class X86PassConfig : public TargetPassConfig {
public:
@@ -289,7 +330,7 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
ScheduleDAGMILive *DAG = createGenericSchedLive(C);
- DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+ DAG->addMutation(createX86MacroFusionDAGMutation());
return DAG;
}
@@ -301,14 +342,28 @@ public:
bool addRegBankSelect() override;
bool addGlobalInstructionSelect() override;
#endif
-bool addILPOpts() override;
+ bool addILPOpts() override;
bool addPreISel() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addPreEmitPass() override;
void addPreSched2() override;
};
-} // namespace
+
+class X86ExecutionDepsFix : public ExecutionDepsFix {
+public:
+ static char ID;
+ X86ExecutionDepsFix() : ExecutionDepsFix(ID, X86::VR128XRegClass) {}
+ StringRef getPassName() const override {
+ return "X86 Execution Dependency Fix";
+ }
+};
+char X86ExecutionDepsFix::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(X86ExecutionDepsFix, "x86-execution-deps-fix",
+ "X86 Execution Dependency Fix", false, false)
TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
return new X86PassConfig(this, PM);
@@ -343,17 +398,17 @@ bool X86PassConfig::addIRTranslator() {
}
bool X86PassConfig::addLegalizeMachineIR() {
- //TODO: Implement
+ addPass(new Legalizer());
return false;
}
bool X86PassConfig::addRegBankSelect() {
- //TODO: Implement
+ addPass(new RegBankSelect());
return false;
}
bool X86PassConfig::addGlobalInstructionSelect() {
- //TODO: Implement
+ addPass(new InstructionSelect());
return false;
}
#endif
@@ -391,7 +446,7 @@ void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
void X86PassConfig::addPreEmitPass() {
if (getOptLevel() != CodeGenOpt::None)
- addPass(createExecutionDependencyFixPass(&X86::VR128XRegClass));
+ addPass(new X86ExecutionDepsFix());
if (UseVZeroUpper)
addPass(createX86IssueVZeroUpperPass());
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index d756d07926dd..cf933f52604e 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -13,14 +13,20 @@
#ifndef LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
#define LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
-#include "X86InstrInfo.h"
+
#include "X86Subtarget.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
class StringRef;
+class X86Subtarget;
+class X86RegisterBankInfo;
class X86TargetMachine final : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
@@ -32,17 +38,19 @@ public:
Optional<Reloc::Model> RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
~X86TargetMachine() override;
+
const X86Subtarget *getSubtargetImpl(const Function &F) const override;
TargetIRAnalysis getTargetIRAnalysis() override;
// Set up the pass pipeline.
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
};
-} // End llvm namespace
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_X86_X86TARGETMACHINE_H
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 5715d826862e..b742fb472372 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -78,7 +78,7 @@ unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
return 8;
}
-unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
+unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const {
if (Vector) {
if (ST->hasAVX512())
return 512;
@@ -95,6 +95,10 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
return 32;
}
+unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const {
+ return getRegisterBitWidth(true);
+}
+
unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
// If the loop will not be vectorized, don't interleave the loop.
// Let regular unroll to unroll the loop, which saves the overflow
@@ -114,7 +118,7 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
}
int X86TTIImpl::getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
+ unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo,
@@ -207,6 +211,10 @@ int X86TTIImpl::getArithmeticInstrCost(
}
static const CostTblEntry AVX512UniformConstCostTable[] = {
+ { ISD::SRA, MVT::v2i64, 1 },
+ { ISD::SRA, MVT::v4i64, 1 },
+ { ISD::SRA, MVT::v8i64, 1 },
+
{ ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
{ ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
};
@@ -319,6 +327,14 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWCostTable[] = {
+ { ISD::SHL, MVT::v8i16, 1 }, // vpsllvw
+ { ISD::SRL, MVT::v8i16, 1 }, // vpsrlvw
+ { ISD::SRA, MVT::v8i16, 1 }, // vpsravw
+
+ { ISD::SHL, MVT::v16i16, 1 }, // vpsllvw
+ { ISD::SRL, MVT::v16i16, 1 }, // vpsrlvw
+ { ISD::SRA, MVT::v16i16, 1 }, // vpsravw
+
{ ISD::SHL, MVT::v32i16, 1 }, // vpsllvw
{ ISD::SRL, MVT::v32i16, 1 }, // vpsrlvw
{ ISD::SRA, MVT::v32i16, 1 }, // vpsravw
@@ -347,8 +363,12 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v16i32, 1 },
{ ISD::SRL, MVT::v16i32, 1 },
{ ISD::SRA, MVT::v16i32, 1 },
+
{ ISD::SHL, MVT::v8i64, 1 },
{ ISD::SRL, MVT::v8i64, 1 },
+
+ { ISD::SRA, MVT::v2i64, 1 },
+ { ISD::SRA, MVT::v4i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
{ ISD::MUL, MVT::v32i8, 13 }, // extend/pmullw/trunc sequence.
@@ -595,7 +615,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
- { ISD::SHL, MVT::v8i32, 2*2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence.
{ ISD::SHL, MVT::v4i64, 2*4 }, // splat+shuffle sequence.
@@ -804,7 +823,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
{ TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
- { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
+ { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb
+
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2 * vpshufb
+ // + vpblendvb
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 } // vperm2i128 + 2 * vpshufb
+ // + vpblendvb
};
if (ST->hasAVX2())
@@ -861,7 +887,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 }, // pshufb + pshufb + por
+
+ { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 } // pshufb
};
if (ST->hasSSSE3())
@@ -886,7 +915,10 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
- { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por
+
+ { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 } // pshufd
};
if (ST->hasSSE2())
@@ -906,7 +938,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
-int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -1272,7 +1305,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
-int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
+int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I) {
// Legalize the type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
@@ -1338,11 +1372,12 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
- return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF) {
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
// Costs should match the codegen from:
// BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
// BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
@@ -1418,8 +1453,8 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/
};
static const CostTblEntry SSE42CostTbl[] = {
- { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
static const CostTblEntry SSSE3CostTbl[] = {
{ ISD::BITREVERSE, MVT::v2i64, 5 },
@@ -1443,6 +1478,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTTZ, MVT::v16i8, 9 }
};
static const CostTblEntry SSE2CostTbl[] = {
+ { ISD::BITREVERSE, MVT::v2i64, 29 },
+ { ISD::BITREVERSE, MVT::v4i32, 27 },
+ { ISD::BITREVERSE, MVT::v8i16, 27 },
+ { ISD::BITREVERSE, MVT::v16i8, 20 },
{ ISD::BSWAP, MVT::v2i64, 7 },
{ ISD::BSWAP, MVT::v4i32, 7 },
{ ISD::BSWAP, MVT::v8i16, 7 },
@@ -1462,8 +1501,16 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/
};
static const CostTblEntry SSE1CostTbl[] = {
- { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
+ };
+ static const CostTblEntry X64CostTbl[] = { // 64-bit targets
+ { ISD::BITREVERSE, MVT::i64, 14 }
+ };
+ static const CostTblEntry X86CostTbl[] = { // 32 or 64-bit targets
+ { ISD::BITREVERSE, MVT::i32, 14 },
+ { ISD::BITREVERSE, MVT::i16, 14 },
+ { ISD::BITREVERSE, MVT::i8, 11 }
};
unsigned ISD = ISD::DELETED_NODE;
@@ -1523,12 +1570,19 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
return LT.first * Entry->Cost;
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF);
+ if (ST->is64Bit())
+ if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, ScalarizationCostPassed);
}
int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF) {
- return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF);
}
int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
@@ -1562,22 +1616,8 @@ int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost;
}
-int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
- assert (Ty->isVectorTy() && "Can only scalarize vectors");
- int Cost = 0;
-
- for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
- if (Insert)
- Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
- if (Extract)
- Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, i);
- }
-
- return Cost;
-}
-
int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) {
+ unsigned AddressSpace, const Instruction *I) {
// Handle non-power-of-two vectors such as <3 x float>
if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
unsigned NumElem = VTy->getVectorNumElements();
@@ -2132,7 +2172,7 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
// TODO: We expect this to be beneficial regardless of arch,
// but there are currently some unexplained performance artifacts on Atom.
// As a temporary solution, disable on Atom.
- return !(ST->isAtom() || ST->isSLM());
+ return !(ST->isAtom());
}
// Get estimation for interleaved load/store operations and strided load.
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index ecaaf951cff7..9bef9e80c395 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -33,8 +33,6 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
const X86Subtarget *ST;
const X86TargetLowering *TLI;
- int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
-
const X86Subtarget *getST() const { return ST; }
const X86TargetLowering *getTLI() const { return TLI; }
@@ -53,7 +51,8 @@ public:
/// @{
unsigned getNumberOfRegisters(bool Vector);
- unsigned getRegisterBitWidth(bool Vector);
+ unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
@@ -63,11 +62,13 @@ public:
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
ArrayRef<const Value *> Args = ArrayRef<const Value *>());
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
- int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+ int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I = nullptr);
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace);
+ unsigned AddressSpace, const Instruction *I = nullptr);
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace);
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
@@ -76,9 +77,11 @@ public:
const SCEV *Ptr);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF);
+ ArrayRef<Type *> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF);
+ ArrayRef<Value *> Args, FastMathFlags FMF,
+ unsigned VF = 1);
int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 9766b84be652..d17dfac6a997 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -56,11 +56,11 @@ namespace {
// Core algorithm state:
// BlockState - Each block is either:
- // - PASS_THROUGH: There are neither YMM dirtying instructions nor
+ // - PASS_THROUGH: There are neither YMM/ZMM dirtying instructions nor
// vzeroupper instructions in this block.
// - EXITS_CLEAN: There is (or will be) a vzeroupper instruction in this
- // block that will ensure that YMM is clean on exit.
- // - EXITS_DIRTY: An instruction in the block dirties YMM and no
+ // block that will ensure that YMM/ZMM is clean on exit.
+ // - EXITS_DIRTY: An instruction in the block dirties YMM/ZMM and no
// subsequent vzeroupper in the block clears it.
//
// AddedToDirtySuccessors - This flag is raised when a block is added to the
@@ -97,6 +97,7 @@ FunctionPass *llvm::createX86IssueVZeroUpperPass() {
return new VZeroUpperInserter();
}
+#ifndef NDEBUG
const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
switch (ST) {
case PASS_THROUGH: return "Pass-through";
@@ -105,52 +106,56 @@ const char* VZeroUpperInserter::getBlockExitStateName(BlockExitState ST) {
}
llvm_unreachable("Invalid block exit state.");
}
+#endif
-static bool isYmmReg(unsigned Reg) {
- return (Reg >= X86::YMM0 && Reg <= X86::YMM15);
+/// VZEROUPPER cleans state that is related to Y/ZMM0-15 only.
+/// Thus, there is no need to check for Y/ZMM16 and above.
+static bool isYmmOrZmmReg(unsigned Reg) {
+ return (Reg >= X86::YMM0 && Reg <= X86::YMM15) ||
+ (Reg >= X86::ZMM0 && Reg <= X86::ZMM15);
}
-static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
+static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) {
for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
E = MRI.livein_end(); I != E; ++I)
- if (isYmmReg(I->first))
+ if (isYmmOrZmmReg(I->first))
return true;
return false;
}
-static bool clobbersAllYmmRegs(const MachineOperand &MO) {
+static bool clobbersAllYmmAndZmmRegs(const MachineOperand &MO) {
for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
if (!MO.clobbersPhysReg(reg))
return false;
}
+ for (unsigned reg = X86::ZMM0; reg <= X86::ZMM15; ++reg) {
+ if (!MO.clobbersPhysReg(reg))
+ return false;
+ }
return true;
}
-static bool hasYmmReg(MachineInstr &MI) {
+static bool hasYmmOrZmmReg(MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
- if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
+ if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmAndZmmRegs(MO))
return true;
if (!MO.isReg())
continue;
if (MO.isDebug())
continue;
- if (isYmmReg(MO.getReg()))
+ if (isYmmOrZmmReg(MO.getReg()))
return true;
}
return false;
}
-/// Check if any YMM register will be clobbered by this instruction.
-static bool callClobbersAnyYmmReg(MachineInstr &MI) {
+/// Check if given call instruction has a RegMask operand.
+static bool callHasRegMask(MachineInstr &MI) {
assert(MI.isCall() && "Can only be called on call instructions.");
for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isRegMask())
- continue;
- for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
- if (MO.clobbersPhysReg(reg))
- return true;
- }
+ if (MO.isRegMask())
+ return true;
}
return false;
}
@@ -175,17 +180,20 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
/// Loop over all of the instructions in the basic block, inserting vzeroupper
/// instructions before function calls.
void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
-
// Start by assuming that the block is PASS_THROUGH which implies no unguarded
// calls.
BlockExitState CurState = PASS_THROUGH;
BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
for (MachineInstr &MI : MBB) {
+ bool IsCall = MI.isCall();
+ bool IsReturn = MI.isReturn();
+ bool IsControlFlow = IsCall || IsReturn;
+
// No need for vzeroupper before iret in interrupt handler function,
- // epilogue will restore YMM registers if needed.
- bool IsReturnFromX86INTR = IsX86INTR && MI.isReturn();
- bool IsControlFlow = MI.isCall() || MI.isReturn();
+ // epilogue will restore YMM/ZMM registers if needed.
+ if (IsX86INTR && IsReturn)
+ continue;
// An existing VZERO* instruction resets the state.
if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
@@ -194,30 +202,30 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
}
// Shortcut: don't need to check regular instructions in dirty state.
- if ((!IsControlFlow || IsReturnFromX86INTR) && CurState == EXITS_DIRTY)
+ if (!IsControlFlow && CurState == EXITS_DIRTY)
continue;
- if (hasYmmReg(MI)) {
- // We found a ymm-using instruction; this could be an AVX instruction,
- // or it could be control flow.
+ if (hasYmmOrZmmReg(MI)) {
+ // We found a ymm/zmm-using instruction; this could be an AVX/AVX512
+ // instruction, or it could be control flow.
CurState = EXITS_DIRTY;
continue;
}
// Check for control-flow out of the current function (which might
// indirectly execute SSE instructions).
- if (!IsControlFlow || IsReturnFromX86INTR)
+ if (!IsControlFlow)
continue;
- // If the call won't clobber any YMM register, skip it as well. It usually
- // happens on helper function calls (such as '_chkstk', '_ftol2') where
- // standard calling convention is not used (RegMask is not used to mark
- // register clobbered and register usage (def/imp-def/use) is well-defined
- // and explicitly specified.
- if (MI.isCall() && !callClobbersAnyYmmReg(MI))
+ // If the call has no RegMask, skip it as well. It usually happens on
+ // helper function calls (such as '_chkstk', '_ftol2') where standard
+ // calling convention is not used (RegMask is not used to mark register
+ // clobbered and register usage (def/imp-def/use) is well-defined and
+ // explicitly specified.
+ if (IsCall && !callHasRegMask(MI))
continue;
- // The VZEROUPPER instruction resets the upper 128 bits of all AVX
+ // The VZEROUPPER instruction resets the upper 128 bits of YMM0-YMM15
// registers. In addition, the processor changes back to Clean state, after
// which execution of SSE instructions or AVX instructions has no transition
// penalty. Add the VZEROUPPER instruction before any function call/return
@@ -226,7 +234,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// predecessor block.
if (CurState == EXITS_DIRTY) {
// After the inserted VZEROUPPER the state becomes clean again, but
- // other YMM may appear before other subsequent calls or even before
+ // other YMM/ZMM may appear before other subsequent calls or even before
// the end of the BB.
insertVZeroUpper(MI, MBB);
CurState = EXITS_CLEAN;
@@ -257,30 +265,32 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
/// function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
+ if (!ST.hasAVX() || ST.hasFastPartialYMMorZMMWrite())
return false;
TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
EverMadeChange = false;
IsX86INTR = MF.getFunction()->getCallingConv() == CallingConv::X86_INTR;
- bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
-
- // Fast check: if the function doesn't use any ymm registers, we don't need
- // to insert any VZEROUPPER instructions. This is constant-time, so it is
- // cheap in the common case of no ymm use.
- bool YMMUsed = FnHasLiveInYmm;
- if (!YMMUsed) {
- const TargetRegisterClass *RC = &X86::VR256RegClass;
- for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
- i++) {
- if (!MRI.reg_nodbg_empty(*i)) {
- YMMUsed = true;
- break;
+ bool FnHasLiveInYmmOrZmm = checkFnHasLiveInYmmOrZmm(MRI);
+
+ // Fast check: if the function doesn't use any ymm/zmm registers, we don't
+ // need to insert any VZEROUPPER instructions. This is constant-time, so it
+ // is cheap in the common case of no ymm/zmm use.
+ bool YmmOrZmmUsed = FnHasLiveInYmmOrZmm;
+ const TargetRegisterClass *RCs[2] = {&X86::VR256RegClass, &X86::VR512RegClass};
+ for (auto *RC : RCs) {
+ if (!YmmOrZmmUsed) {
+ for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); i != e;
+ i++) {
+ if (!MRI.reg_nodbg_empty(*i)) {
+ YmmOrZmmUsed = true;
+ break;
+ }
}
}
}
- if (!YMMUsed) {
+ if (!YmmOrZmmUsed) {
return false;
}
@@ -294,9 +304,9 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
processBasicBlock(MBB);
- // If any YMM regs are live-in to this function, add the entry block to the
- // DirtySuccessors list
- if (FnHasLiveInYmm)
+ // If any YMM/ZMM regs are live-in to this function, add the entry block to
+ // the DirtySuccessors list
+ if (FnHasLiveInYmmOrZmm)
addDirtySuccessor(MF.front());
// Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 500c84d2a418..b03c1852281d 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -12,13 +12,15 @@
//===----------------------------------------------------------------------===//
#include "XCoreInstPrinter.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
using namespace llvm;
#define DEBUG_TYPE "asm-printer"
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index dc513f7b225b..8a7efe2e39c6 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -15,6 +15,8 @@
#ifndef LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
#define LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
+
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
@@ -32,12 +34,14 @@ public:
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
const MCSubtargetInfo &STI) override;
+
private:
void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
};
+
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_XCORE_INSTPRINTER_XCOREINSTPRINTER_H
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index c5859b7786f7..5fc58d831319 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -11,15 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#include "XCoreMCTargetDesc.h"
#include "InstPrinter/XCoreInstPrinter.h"
-#include "XCoreMCAsmInfo.h"
+#include "MCTargetDesc/XCoreMCAsmInfo.h"
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
#include "XCoreTargetStreamer.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -79,20 +83,25 @@ static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T,
}
XCoreTargetStreamer::XCoreTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
-XCoreTargetStreamer::~XCoreTargetStreamer() {}
+
+XCoreTargetStreamer::~XCoreTargetStreamer() = default;
namespace {
class XCoreTargetAsmStreamer : public XCoreTargetStreamer {
formatted_raw_ostream &OS;
+
public:
XCoreTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+
void emitCCTopData(StringRef Name) override;
void emitCCTopFunction(StringRef Name) override;
void emitCCBottomData(StringRef Name) override;
void emitCCBottomFunction(StringRef Name) override;
};
+} // end anonymous namespace
+
XCoreTargetAsmStreamer::XCoreTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
: XCoreTargetStreamer(S), OS(OS) {}
@@ -112,7 +121,6 @@ void XCoreTargetAsmStreamer::emitCCBottomData(StringRef Name) {
void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
OS << "\t.cc_bottom " << Name << ".function\n";
}
-}
static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index ac0f3fefbae7..1dc384fadf69 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -14,13 +14,13 @@
#ifndef LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
#define LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
-#include "llvm/Support/DataTypes.h"
-
namespace llvm {
+
class Target;
+
Target &getTheXCoreTarget();
-} // End llvm namespace
+} // end namespace llvm
// Defines symbolic names for XCore registers. This defines a mapping from
// register name to register number.
@@ -36,4 +36,4 @@ Target &getTheXCoreTarget();
#define GET_SUBTARGETINFO_ENUM
#include "XCoreGenSubtargetInfo.inc"
-#endif
+#endif // LLVM_LIB_TARGET_XCORE_MCTARGETDESC_XCOREMCTARGETDESC_H
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index e0e2e0319964..a752357400b3 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -238,7 +238,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
report_fatal_error("emitPrologue unsupported alignment: "
+ Twine(MFI.getMaxAlignment()));
- const AttributeSet &PAL = MF.getFunction()->getAttributes();
+ const AttributeList &PAL = MF.getFunction()->getAttributes();
if (PAL.hasAttrSomewhere(Attribute::Nest))
BuildMI(MBB, MBBI, dl, TII.get(XCore::LDWSP_ru6), XCore::R11).addImm(0);
// FIX: Needs addMemOperand() but can't use getFixedStack() or getStack().
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9244d594460f..45437815fa37 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -483,7 +483,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(DL).setChain(Chain).setCallee(
+ CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
CallingConv::C, IntPtrTy,
DAG.getExternalSymbol("__misaligned_load",
getPointerTy(DAG.getDataLayout())),
@@ -1824,6 +1824,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 41813bbb8156..188f4f1fa06b 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -202,6 +202,7 @@ namespace llvm {
void computeKnownBitsForTargetNode(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
+ const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index cdcc52fdc32d..cf469ec3cf1a 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -14,50 +14,39 @@
#ifndef LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include <cassert>
+#include <utility>
#include <vector>
namespace llvm {
-// Forward declarations
-class Function;
-
/// XCoreFunctionInfo - This class is derived from MachineFunction private
/// XCore target-specific information for each MachineFunction.
class XCoreFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
- bool LRSpillSlotSet;
+ bool LRSpillSlotSet = false;
int LRSpillSlot;
- bool FPSpillSlotSet;
+ bool FPSpillSlotSet = false;
int FPSpillSlot;
- bool EHSpillSlotSet;
+ bool EHSpillSlotSet = false;
int EHSpillSlot[2];
unsigned ReturnStackOffset;
- bool ReturnStackOffsetSet;
- int VarArgsFrameIndex;
- mutable int CachedEStackSize;
+ bool ReturnStackOffsetSet = false;
+ int VarArgsFrameIndex = 0;
+ mutable int CachedEStackSize = -1;
std::vector<std::pair<MachineBasicBlock::iterator, CalleeSavedInfo>>
SpillLabels;
+ virtual void anchor();
+
public:
- XCoreFunctionInfo() :
- LRSpillSlotSet(false),
- FPSpillSlotSet(false),
- EHSpillSlotSet(false),
- ReturnStackOffsetSet(false),
- VarArgsFrameIndex(0),
- CachedEStackSize(-1) {}
+ XCoreFunctionInfo() = default;
- explicit XCoreFunctionInfo(MachineFunction &MF) :
- LRSpillSlotSet(false),
- FPSpillSlotSet(false),
- EHSpillSlotSet(false),
- ReturnStackOffsetSet(false),
- VarArgsFrameIndex(0),
- CachedEStackSize(-1) {}
+ explicit XCoreFunctionInfo(MachineFunction &MF) {}
- ~XCoreFunctionInfo() {}
+ ~XCoreFunctionInfo() override = default;
void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
@@ -101,6 +90,7 @@ public:
return SpillLabels;
}
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_XCORE_XCOREMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index c03b0afceba3..646309e02de8 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -35,11 +35,11 @@ SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__memcpy_4",
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args))
+ .setLibCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(
+ "__memcpy_4", TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args))
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index bf3138f2164a..e28e05c7f6a8 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -10,15 +10,19 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
+#include "XCore.h"
#include "XCoreTargetMachine.h"
#include "XCoreTargetObjectFile.h"
#include "XCoreTargetTransformInfo.h"
-#include "XCore.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
@@ -38,14 +42,15 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const Triple &TT,
: LLVMTargetMachine(
T, "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32",
TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM, OL),
- TLOF(make_unique<XCoreTargetObjectFile>()),
+ TLOF(llvm::make_unique<XCoreTargetObjectFile>()),
Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
-XCoreTargetMachine::~XCoreTargetMachine() {}
+XCoreTargetMachine::~XCoreTargetMachine() = default;
namespace {
+
/// XCore Code Generator Pass Configuration Options.
class XCorePassConfig : public TargetPassConfig {
public:
@@ -61,7 +66,8 @@ public:
bool addInstSelector() override;
void addPreEmitPass() override;
};
-} // namespace
+
+} // end anonymous namespace
TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
return new XCorePassConfig(this, PM);
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 4bd25bc8776c..2b53f01a996d 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,13 +15,19 @@
#define LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
#include "XCoreSubtarget.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
+#include <memory>
namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
XCoreSubtarget Subtarget;
+
public:
XCoreTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -38,6 +44,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetIRAnalysis getTargetIRAnalysis() override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
@@ -45,4 +52,4 @@ public:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_XCORE_XCORETARGETMACHINE_H
diff --git a/lib/Transforms/Coroutines/CoroElide.cpp b/lib/Transforms/Coroutines/CoroElide.cpp
index 99974d8da64c..c6ac3f614ff7 100644
--- a/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/lib/Transforms/Coroutines/CoroElide.cpp
@@ -92,7 +92,7 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type.
static Type *getFrameType(Function *Resume) {
- auto *ArgType = Resume->getArgumentList().front().getType();
+ auto *ArgType = Resume->arg_begin()->getType();
return cast<PointerType>(ArgType)->getElementType();
}
@@ -127,7 +127,8 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
// is spilled into the coroutine frame and recreate the alignment information
// here. Possibly we will need to do a mini SROA here and break the coroutine
// frame into individual AllocaInst recreating the original alignment.
- auto *Frame = new AllocaInst(FrameTy, "", InsertPt);
+ const DataLayout &DL = F->getParent()->getDataLayout();
+ auto *Frame = new AllocaInst(FrameTy, DL.getAllocaAddrSpace(), "", InsertPt);
auto *FrameVoidPtr =
new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt);
diff --git a/lib/Transforms/Coroutines/CoroFrame.cpp b/lib/Transforms/Coroutines/CoroFrame.cpp
index bb28558a29e2..19e6789dfa74 100644
--- a/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -133,6 +133,7 @@ struct SuspendCrossingInfo {
};
} // end anonymous namespace
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
BitVector const &BV) const {
dbgs() << Label << ":";
@@ -151,6 +152,7 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
}
dbgs() << "\n";
}
+#endif
SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
: Mapping(F) {
@@ -420,15 +422,31 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
report_fatal_error("Coroutines cannot handle non static allocas yet");
} else {
// Otherwise, create a store instruction storing the value into the
- // coroutine frame. For, argument, we will place the store instruction
- // right after the coroutine frame pointer instruction, i.e. bitcase of
- // coro.begin from i8* to %f.frame*. For all other values, the spill is
- // placed immediately after the definition.
- Builder.SetInsertPoint(
- isa<Argument>(CurrentValue)
- ? FramePtr->getNextNode()
- : dyn_cast<Instruction>(E.def())->getNextNode());
+ // coroutine frame.
+
+ Instruction *InsertPt = nullptr;
+ if (isa<Argument>(CurrentValue)) {
+ // For arguments, we will place the store instruction right after
+ // the coroutine frame pointer instruction, i.e. bitcast of
+ // coro.begin from i8* to %f.frame*.
+ InsertPt = FramePtr->getNextNode();
+ } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) {
+ // If we are spilling the result of the invoke instruction, split the
+ // normal edge and insert the spill in the new block.
+ auto NewBB = SplitEdge(II->getParent(), II->getNormalDest());
+ InsertPt = NewBB->getTerminator();
+ } else if (dyn_cast<PHINode>(CurrentValue)) {
+ // Skip the PHINodes and EH pads instructions.
+ InsertPt =
+ &*cast<Instruction>(E.def())->getParent()->getFirstInsertionPt();
+ } else {
+ // For all other values, the spill is placed immediately after
+ // the definition.
+ assert(!isa<TerminatorInst>(E.def()) && "unexpected terminator");
+ InsertPt = cast<Instruction>(E.def())->getNextNode();
+ }
+ Builder.SetInsertPoint(InsertPt);
auto *G = Builder.CreateConstInBoundsGEP2_32(
FrameTy, FramePtr, 0, Index,
CurrentValue->getName() + Twine(".spill.addr"));
@@ -484,7 +502,7 @@ static void rewritePHIs(BasicBlock &BB) {
// loop:
// %n.val = phi i32[%n, %entry], [%inc, %loop]
//
- // It will create:
+ // It will create:
//
// loop.from.entry:
// %n.loop.pre = phi i32 [%n, %entry]
@@ -687,13 +705,12 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
Spills.emplace_back(&I, U);
// Rewrite materializable instructions to be materialized at the use point.
- std::sort(Spills.begin(), Spills.end());
DEBUG(dump("Materializations", Spills));
rewriteMaterializableInstructions(Builder, Spills);
// Collect the spills for arguments and other not-materializable values.
Spills.clear();
- for (Argument &A : F.getArgumentList())
+ for (Argument &A : F.args())
for (User *U : A.users())
if (Checker.isDefinitionAcrossSuspend(A, U))
Spills.emplace_back(&A, U);
@@ -719,7 +736,6 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
Spills.emplace_back(&I, U);
}
}
- std::sort(Spills.begin(), Spills.end());
DEBUG(dump("Spills", Spills));
moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
Shape.FrameTy = buildFrameType(F, Shape, Spills);
diff --git a/lib/Transforms/Coroutines/CoroInstr.h b/lib/Transforms/Coroutines/CoroInstr.h
index e03cef4bfc46..5c666bdfea1f 100644
--- a/lib/Transforms/Coroutines/CoroInstr.h
+++ b/lib/Transforms/Coroutines/CoroInstr.h
@@ -23,6 +23,9 @@
// the Coroutine library.
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TRANSFORMS_COROUTINES_COROINSTR_H
+#define LLVM_LIB_TRANSFORMS_COROUTINES_COROINSTR_H
+
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -316,3 +319,5 @@ public:
};
} // End namespace llvm.
+
+#endif
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 7a3f4f60bae9..ab648f884c5b 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -22,6 +22,7 @@
#include "CoroInternal.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h"
@@ -144,6 +145,33 @@ static void replaceFallthroughCoroEnd(IntrinsicInst *End,
BB->getTerminator()->eraseFromParent();
}
+// In Resumers, we replace unwind coro.end with True to force the immediate
+// unwind to caller.
+static void replaceUnwindCoroEnds(coro::Shape &Shape, ValueToValueMapTy &VMap) {
+ if (Shape.CoroEnds.empty())
+ return;
+
+ LLVMContext &Context = Shape.CoroEnds.front()->getContext();
+ auto *True = ConstantInt::getTrue(Context);
+ for (CoroEndInst *CE : Shape.CoroEnds) {
+ if (!CE->isUnwind())
+ continue;
+
+ auto *NewCE = cast<IntrinsicInst>(VMap[CE]);
+
+ // If coro.end has an associated bundle, add cleanupret instruction.
+ if (auto Bundle = NewCE->getOperandBundle(LLVMContext::OB_funclet)) {
+ Value *FromPad = Bundle->Inputs[0];
+ auto *CleanupRet = CleanupReturnInst::Create(FromPad, nullptr, NewCE);
+ NewCE->getParent()->splitBasicBlock(NewCE);
+ CleanupRet->getParent()->getTerminator()->eraseFromParent();
+ }
+
+ NewCE->replaceAllUsesWith(True);
+ NewCE->eraseFromParent();
+ }
+}
+
// Rewrite final suspend point handling. We do not use suspend index to
// represent the final suspend point. Instead we zero-out ResumeFnAddr in the
// coroutine frame, since it is undefined behavior to resume a coroutine
@@ -157,9 +185,9 @@ static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr,
coro::Shape &Shape, SwitchInst *Switch,
bool IsDestroy) {
assert(Shape.HasFinalSuspend);
- auto FinalCase = --Switch->case_end();
- BasicBlock *ResumeBB = FinalCase.getCaseSuccessor();
- Switch->removeCase(FinalCase);
+ auto FinalCaseIt = std::prev(Switch->case_end());
+ BasicBlock *ResumeBB = FinalCaseIt->getCaseSuccessor();
+ Switch->removeCase(FinalCaseIt);
if (IsDestroy) {
BasicBlock *OldSwitchBB = Switch->getParent();
auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
@@ -195,7 +223,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
// Replace all args with undefs. The buildCoroutineFrame algorithm already
// rewritten access to the args that occurs after suspend points with loads
// and stores to/from the coroutine frame.
- for (Argument &A : F.getArgumentList())
+ for (Argument &A : F.args())
VMap[&A] = UndefValue::get(A.getType());
SmallVector<ReturnInst *, 4> Returns;
@@ -216,9 +244,9 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
// Remove old return attributes.
NewF->removeAttributes(
- AttributeSet::ReturnIndex,
- AttributeSet::get(
- NewF->getContext(), AttributeSet::ReturnIndex,
+ AttributeList::ReturnIndex,
+ AttributeList::get(
+ NewF->getContext(), AttributeList::ReturnIndex,
AttributeFuncs::typeIncompatible(NewF->getReturnType())));
// Make AllocaSpillBlock the new entry block.
@@ -236,7 +264,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
IRBuilder<> Builder(&NewF->getEntryBlock().front());
// Remap frame pointer.
- Argument *NewFramePtr = &NewF->getArgumentList().front();
+ Argument *NewFramePtr = &*NewF->arg_begin();
Value *OldFramePtr = cast<Value>(VMap[Shape.FramePtr]);
NewFramePtr->takeName(OldFramePtr);
OldFramePtr->replaceAllUsesWith(NewFramePtr);
@@ -270,9 +298,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
// Remove coro.end intrinsics.
replaceFallthroughCoroEnd(Shape.CoroEnds.front(), VMap);
- // FIXME: coming in upcoming patches:
- // replaceUnwindCoroEnds(Shape.CoroEnds, VMap);
-
+ replaceUnwindCoroEnds(Shape, VMap);
// Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
// to suppress deallocation code.
coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
@@ -284,8 +310,16 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
}
static void removeCoroEnds(coro::Shape &Shape) {
- for (CoroEndInst *CE : Shape.CoroEnds)
+ if (Shape.CoroEnds.empty())
+ return;
+
+ LLVMContext &Context = Shape.CoroEnds.front()->getContext();
+ auto *False = ConstantInt::getFalse(Context);
+
+ for (CoroEndInst *CE : Shape.CoroEnds) {
+ CE->replaceAllUsesWith(False);
CE->eraseFromParent();
+ }
}
static void replaceFrameSize(coro::Shape &Shape) {
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index 877ec34b4d3b..ea48043f9381 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -245,9 +245,9 @@ void coro::Shape::buildFrom(Function &F) {
if (CoroBegin)
report_fatal_error(
"coroutine should have exactly one defining @llvm.coro.begin");
- CB->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
- CB->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
- CB->removeAttribute(AttributeSet::FunctionIndex,
+ CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ CB->removeAttribute(AttributeList::FunctionIndex,
Attribute::NoDuplicate);
CoroBegin = CB;
}
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 65b7bad3b1ed..a2c8a32dfe86 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,8 +29,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -38,6 +39,7 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
@@ -51,323 +53,400 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
#include <set>
using namespace llvm;
#define DEBUG_TYPE "argpromotion"
-STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
-STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
-STATISTIC(NumArgumentsDead , "Number of dead pointer args eliminated");
+STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
-namespace {
- /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
- ///
- struct ArgPromotion : public CallGraphSCCPass {
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- getAAResultsAnalysisUsage(AU);
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
+/// A vector used to hold the indices of a single GEP instruction
+typedef std::vector<uint64_t> IndicesVector;
- bool runOnSCC(CallGraphSCC &SCC) override;
- static char ID; // Pass identification, replacement for typeid
- explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {
- initializeArgPromotionPass(*PassRegistry::getPassRegistry());
- }
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function. At this point, we know that it's
+/// safe to do so.
+static Function *
+doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
+ SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
+ Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ ReplaceCallSite) {
- private:
+ // Start by computing a new prototype for the function, which is the same as
+ // the old function, but has modified arguments.
+ FunctionType *FTy = F->getFunctionType();
+ std::vector<Type *> Params;
- using llvm::Pass::doInitialization;
- bool doInitialization(CallGraph &CG) override;
- /// The maximum number of elements to expand, or 0 for unlimited.
- unsigned maxElements;
- };
-}
+ typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
-/// A vector used to hold the indices of a single GEP instruction
-typedef std::vector<uint64_t> IndicesVector;
+ // ScalarizedElements - If we are promoting a pointer that has elements
+ // accessed out of it, keep track of which elements are accessed so that we
+ // can add one argument for each.
+ //
+ // Arguments that are directly loaded will have a zero element value here, to
+ // handle cases where there are both a direct load and GEP accesses.
+ //
+ std::map<Argument *, ScalarizeTable> ScalarizedElements;
-static CallGraphNode *
-PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements);
-static bool isDenselyPacked(Type *type, const DataLayout &DL);
-static bool canPaddingBeAccessed(Argument *Arg);
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
- unsigned MaxElements);
-static CallGraphNode *
-DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG);
+ // OriginalLoads - Keep track of a representative load instruction from the
+ // original function so that we can tell the alias analysis implementation
+ // what the new GEP/Load instructions we are inserting look like.
+ // We need to keep the original loads for each argument and the elements
+ // of the argument that are accessed.
+ std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;
-char ArgPromotion::ID = 0;
-INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
- "Promote 'by reference' arguments to scalars", false, false)
+ // Attribute - Keep track of the parameter attributes for the arguments
+ // that we are *not* promoting. For the ones that we do promote, the parameter
+ // attributes are lost
+ SmallVector<AttributeSet, 8> ArgAttrVec;
+ AttributeList PAL = F->getAttributes();
-Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
- return new ArgPromotion(maxElements);
-}
+ // First, determine the new argument list
+ unsigned ArgIndex = 0;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++ArgIndex) {
+ if (ByValArgsToTransform.count(&*I)) {
+ // Simple byval argument? Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Params.insert(Params.end(), STy->element_begin(), STy->element_end());
+ ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
+ AttributeSet());
+ ++NumByValArgsPromoted;
+ } else if (!ArgsToPromote.count(&*I)) {
+ // Unchanged argument
+ Params.push_back(I->getType());
+ ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex));
+ } else if (I->use_empty()) {
+ // Dead argument (which are always marked as promotable)
+ ++NumArgumentsDead;
+ } else {
+ // Okay, this is being promoted. This means that the only uses are loads
+ // or GEPs which are only used by loads
-static bool runImpl(CallGraphSCC &SCC, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements) {
- bool Changed = false, LocalChange;
+ // In this table, we will track which indices are loaded from the argument
+ // (where direct loads are tracked as no indices).
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ Type *SrcTy;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ SrcTy = L->getType();
+ else
+ SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
+ IndicesVector Indices;
+ Indices.reserve(UI->getNumOperands() - 1);
+ // Since loads will only have a single operand, and GEPs only a single
+ // non-index operand, this will record direct loads without any indices,
+ // and gep+loads with the GEP indices.
+ for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
+ II != IE; ++II)
+ Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Indices.size() == 1 && Indices.front() == 0)
+ Indices.clear();
+ ArgIndices.insert(std::make_pair(SrcTy, Indices));
+ LoadInst *OrigLoad;
+ if (LoadInst *L = dyn_cast<LoadInst>(UI))
+ OrigLoad = L;
+ else
+ // Take any load, we will use it only to update Alias Analysis
+ OrigLoad = cast<LoadInst>(UI->user_back());
+ OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
+ }
- do { // Iterate until we stop promoting from this SCC.
- LocalChange = false;
- // Attempt to promote arguments from all functions in this SCC.
- for (CallGraphNode *OldNode : SCC) {
- if (CallGraphNode *NewNode =
- PromoteArguments(OldNode, CG, AARGetter, MaxElements)) {
- LocalChange = true;
- SCC.ReplaceNode(OldNode, NewNode);
+ // Add a parameter to the function for each element passed in.
+ for (const auto &ArgIndex : ArgIndices) {
+ // not allowed to dereference ->begin() if size() is 0
+ Params.push_back(GetElementPtrInst::getIndexedType(
+ cast<PointerType>(I->getType()->getScalarType())->getElementType(),
+ ArgIndex.second));
+ ArgAttrVec.push_back(AttributeSet());
+ assert(Params.back());
}
+
+ if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
+ ++NumArgumentsPromoted;
+ else
+ ++NumAggregatesPromoted;
}
- Changed |= LocalChange; // Remember that we changed something.
- } while (LocalChange);
-
- return Changed;
-}
+ }
-bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
- if (skipSCC(SCC))
- return false;
+ Type *RetTy = FTy->getReturnType();
- // Get the callgraph information that we need to update to reflect our
- // changes.
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ // Construct the new function type using the new arguments.
+ FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
-
- return runImpl(SCC, CG, AARGetter, maxElements);
-}
+ // Create the new function body and insert it into the module.
+ Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+ NF->copyAttributesFrom(F);
-/// \brief Checks if a type could have padding bytes.
-static bool isDenselyPacked(Type *type, const DataLayout &DL) {
+ // Patch the pointer to LLVM function in debug info descriptor.
+ NF->setSubprogram(F->getSubprogram());
+ F->setSubprogram(nullptr);
- // There is no size information, so be conservative.
- if (!type->isSized())
- return false;
+ DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
+ << "From: " << *F);
- // If the alloc size is not equal to the storage size, then there are padding
- // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
- if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
- return false;
+ // Recompute the parameter attributes list based on the new arguments for
+ // the function.
+ NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
+ PAL.getRetAttributes(), ArgAttrVec));
+ ArgAttrVec.clear();
- if (!isa<CompositeType>(type))
- return true;
+ F->getParent()->getFunctionList().insert(F->getIterator(), NF);
+ NF->takeName(F);
- // For homogenous sequential types, check for padding within members.
- if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
- return isDenselyPacked(seqTy->getElementType(), DL);
+ // Loop over all of the callers of the function, transforming the call sites
+ // to pass in the loaded pointers.
+ //
+ SmallVector<Value *, 16> Args;
+ while (!F->use_empty()) {
+ CallSite CS(F->user_back());
+ assert(CS.getCalledFunction() == F);
+ Instruction *Call = CS.getInstruction();
+ const AttributeList &CallPAL = CS.getAttributes();
- // Check for padding within and between elements of a struct.
- StructType *StructTy = cast<StructType>(type);
- const StructLayout *Layout = DL.getStructLayout(StructTy);
- uint64_t StartPos = 0;
- for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
- Type *ElTy = StructTy->getElementType(i);
- if (!isDenselyPacked(ElTy, DL))
- return false;
- if (StartPos != Layout->getElementOffsetInBits(i))
- return false;
- StartPos += DL.getTypeAllocSizeInBits(ElTy);
- }
+ // Loop over the operands, inserting GEP and loads in the caller as
+ // appropriate.
+ CallSite::arg_iterator AI = CS.arg_begin();
+ ArgIndex = 1;
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+ ++I, ++AI, ++ArgIndex)
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ Args.push_back(*AI); // Unmodified argument
+ ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+ } else if (ByValArgsToTransform.count(&*I)) {
+ // Emit a GEP and load for each element of the struct.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
+ // TODO: Tell AA about the new values?
+ Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
+ ArgAttrVec.push_back(AttributeSet());
+ }
+ } else if (!I->use_empty()) {
+ // Non-dead argument: insert GEPs and loads as appropriate.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+ // Store the Value* version of the indices in here, but declare it now
+ // for reuse.
+ std::vector<Value *> Ops;
+ for (const auto &ArgIndex : ArgIndices) {
+ Value *V = *AI;
+ LoadInst *OrigLoad =
+ OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
+ if (!ArgIndex.second.empty()) {
+ Ops.reserve(ArgIndex.second.size());
+ Type *ElTy = V->getType();
+ for (unsigned long II : ArgIndex.second) {
+ // Use i32 to index structs, and i64 for others (pointers/arrays).
+ // This satisfies GEP constraints.
+ Type *IdxTy =
+ (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
+ : Type::getInt64Ty(F->getContext()));
+ Ops.push_back(ConstantInt::get(IdxTy, II));
+ // Keep track of the type we're currently indexing.
+ if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
+ ElTy = ElPTy->getElementType();
+ else
+ ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
+ }
+ // And create a GEP to extract those indices.
+ V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
+ V->getName() + ".idx", Call);
+ Ops.clear();
+ }
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ // Transfer the AA info too.
+ AAMDNodes AAInfo;
+ OrigLoad->getAAMetadata(AAInfo);
+ newLoad->setAAMetadata(AAInfo);
- return true;
-}
+ Args.push_back(newLoad);
+ ArgAttrVec.push_back(AttributeSet());
+ }
+ }
-/// \brief Checks if the padding bytes of an argument could be accessed.
-static bool canPaddingBeAccessed(Argument *arg) {
+ // Push any varargs arguments on the list.
+ for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+ Args.push_back(*AI);
+ ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex));
+ }
- assert(arg->hasByValAttr());
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CS.getOperandBundlesAsDefs(OpBundles);
- // Track all the pointers to the argument to make sure they are not captured.
- SmallPtrSet<Value *, 16> PtrValues;
- PtrValues.insert(arg);
+ CallSite NewCS;
+ if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call);
+ } else {
+ auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
+ NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
+ NewCS = NewCall;
+ }
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(
+ AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
+ CallPAL.getRetAttributes(), ArgAttrVec));
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
+ Args.clear();
+ ArgAttrVec.clear();
- // Track all of the stores.
- SmallVector<StoreInst *, 16> Stores;
+ // Update the callgraph to know that the callsite has been transformed.
+ if (ReplaceCallSite)
+ (*ReplaceCallSite)(CS, NewCS);
- // Scan through the uses recursively to make sure the pointer is always used
- // sanely.
- SmallVector<Value *, 16> WorkList;
- WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
- while (!WorkList.empty()) {
- Value *V = WorkList.back();
- WorkList.pop_back();
- if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
- if (PtrValues.insert(V).second)
- WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
- } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
- Stores.push_back(Store);
- } else if (!isa<LoadInst>(V)) {
- return true;
+ if (!Call->use_empty()) {
+ Call->replaceAllUsesWith(NewCS.getInstruction());
+ NewCS->takeName(Call);
}
- }
-// Check to make sure the pointers aren't captured
- for (StoreInst *Store : Stores)
- if (PtrValues.count(Store->getValueOperand()))
- return true;
-
- return false;
-}
+ // Finally, remove the old call from the program, reducing the use-count of
+ // F.
+ Call->eraseFromParent();
+ }
-/// PromoteArguments - This method checks the specified function to see if there
-/// are any promotable arguments and if it is safe to promote the function (for
-/// example, all callers are direct). If safe to promote some arguments, it
-/// calls the DoPromotion method.
-///
-static CallGraphNode *
-PromoteArguments(CallGraphNode *CGN, CallGraph &CG,
- function_ref<AAResults &(Function &F)> AARGetter,
- unsigned MaxElements) {
- Function *F = CGN->getFunction();
+ const DataLayout &DL = F->getParent()->getDataLayout();
- // Make sure that it is local to this module.
- if (!F || !F->hasLocalLinkage()) return nullptr;
+ // Since we have now created the new function, splice the body of the old
+ // function right into the new function, leaving the old rotting hulk of the
+ // function empty.
+ NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
- // Don't promote arguments for variadic functions. Adding, removing, or
- // changing non-pack parameters can change the classification of pack
- // parameters. Frontends encode that classification at the call site in the
- // IR, while in the callee the classification is determined dynamically based
- // on the number of registers consumed so far.
- if (F->isVarArg()) return nullptr;
+ // Loop over the argument list, transferring uses of the old arguments over to
+ // the new arguments, also transferring over the names as well.
+ //
+ for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+ I2 = NF->arg_begin();
+ I != E; ++I) {
+ if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
+ // If this is an unmodified argument, move the name and users over to the
+ // new version.
+ I->replaceAllUsesWith(&*I2);
+ I2->takeName(&*I);
+ ++I2;
+ continue;
+ }
- // First check: see if there are any pointer arguments! If not, quick exit.
- SmallVector<Argument*, 16> PointerArgs;
- for (Argument &I : F->args())
- if (I.getType()->isPointerTy())
- PointerArgs.push_back(&I);
- if (PointerArgs.empty()) return nullptr;
+ if (ByValArgsToTransform.count(&*I)) {
+ // In the callee, we create an alloca, and store each of the new incoming
+ // arguments into the alloca.
+ Instruction *InsertPt = &NF->begin()->front();
- // Second check: make sure that all callers are direct callers. We can't
- // transform functions that have indirect callers. Also see if the function
- // is self-recursive.
- bool isSelfRecursive = false;
- for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
- // Must be a direct call.
- if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
-
- if (CS.getInstruction()->getParent()->getParent() == F)
- isSelfRecursive = true;
- }
-
- const DataLayout &DL = F->getParent()->getDataLayout();
+ // Just add all the struct element types.
+ Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+ Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
+ "", InsertPt);
+ StructType *STy = cast<StructType>(AgTy);
+ Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
+ nullptr};
- AAResults &AAR = AARGetter(*F);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+ Value *Idx = GetElementPtrInst::Create(
+ AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
+ InsertPt);
+ I2->setName(I->getName() + "." + Twine(i));
+ new StoreInst(&*I2++, Idx, InsertPt);
+ }
- // Check to see which arguments are promotable. If an argument is promotable,
- // add it to ArgsToPromote.
- SmallPtrSet<Argument*, 8> ArgsToPromote;
- SmallPtrSet<Argument*, 8> ByValArgsToTransform;
- for (Argument *PtrArg : PointerArgs) {
- Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+ // Anything that used the arg should now use the alloca.
+ I->replaceAllUsesWith(TheAlloca);
+ TheAlloca->takeName(&*I);
- // Replace sret attribute with noalias. This reduces register pressure by
- // avoiding a register copy.
- if (PtrArg->hasStructRetAttr()) {
- unsigned ArgNo = PtrArg->getArgNo();
- F->setAttributes(
- F->getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
- for (Use &U : F->uses()) {
- CallSite CS(U.getUser());
- CS.setAttributes(
- CS.getAttributes()
- .removeAttribute(F->getContext(), ArgNo + 1,
- Attribute::StructRet)
- .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ // If the alloca is used in a call, we must clear the tail flag since
+ // the callee now uses an alloca from the caller.
+ for (User *U : TheAlloca->users()) {
+ CallInst *Call = dyn_cast<CallInst>(U);
+ if (!Call)
+ continue;
+ Call->setTailCall(false);
}
+ continue;
}
- // If this is a byval argument, and if the aggregate type is small, just
- // pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed. This does
- // not apply to inalloca.
- bool isSafeToPromote =
- PtrArg->hasByValAttr() &&
- (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
- if (isSafeToPromote) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
- DEBUG(dbgs() << "argpromotion disable promoting argument '"
- << PtrArg->getName() << "' because it would require adding more"
- << " than " << MaxElements << " arguments to the function.\n");
- continue;
- }
-
- // If all the elements are single-value types, we can promote it.
- bool AllSimple = true;
- for (const auto *EltTy : STy->elements()) {
- if (!EltTy->isSingleValueType()) {
- AllSimple = false;
- break;
- }
+ if (I->use_empty())
+ continue;
+
+ // Otherwise, if we promoted this argument, then all users are load
+ // instructions (or GEPs with only load users), and all loads should be
+ // using the new argument that we added.
+ ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+
+ while (!I->use_empty()) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
+ assert(ArgIndices.begin()->second.empty() &&
+ "Load element should sort to front!");
+ I2->setName(I->getName() + ".val");
+ LI->replaceAllUsesWith(&*I2);
+ LI->eraseFromParent();
+ DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+ << "' in function '" << F->getName() << "'\n");
+ } else {
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
+ IndicesVector Operands;
+ Operands.reserve(GEP->getNumIndices());
+ for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+ II != IE; ++II)
+ Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+ // GEPs with a single 0 index can be merged with direct loads
+ if (Operands.size() == 1 && Operands.front() == 0)
+ Operands.clear();
+
+ Function::arg_iterator TheArg = I2;
+ for (ScalarizeTable::iterator It = ArgIndices.begin();
+ It->second != Operands; ++It, ++TheArg) {
+ assert(It != ArgIndices.end() && "GEP not handled??");
}
- // Safe to transform, don't even bother trying to "promote" it.
- // Passing the elements as a scalar will allow sroa to hack on
- // the new alloca we introduce.
- if (AllSimple) {
- ByValArgsToTransform.insert(PtrArg);
- continue;
+ std::string NewName = I->getName();
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ NewName += "." + utostr(Operands[i]);
}
- }
- }
+ NewName += ".val";
+ TheArg->setName(NewName);
- // If the argument is a recursive type and we're in a recursive
- // function, we could end up infinitely peeling the function argument.
- if (isSelfRecursive) {
- if (StructType *STy = dyn_cast<StructType>(AgTy)) {
- bool RecursiveType = false;
- for (const auto *EltTy : STy->elements()) {
- if (EltTy == PtrArg->getType()) {
- RecursiveType = true;
- break;
- }
+ DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+ << "' of function '" << NF->getName() << "'\n");
+
+ // All of the uses must be load instructions. Replace them all with
+ // the argument specified by ArgNo.
+ while (!GEP->use_empty()) {
+ LoadInst *L = cast<LoadInst>(GEP->user_back());
+ L->replaceAllUsesWith(&*TheArg);
+ L->eraseFromParent();
}
- if (RecursiveType)
- continue;
+ GEP->eraseFromParent();
}
}
-
- // Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
- MaxElements))
- ArgsToPromote.insert(PtrArg);
- }
- // No promotable pointer arguments.
- if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
- return nullptr;
+ // Increment I2 past all of the arguments added for this promoted pointer.
+ std::advance(I2, ArgIndices.size());
+ }
- return DoPromotion(F, ArgsToPromote, ByValArgsToTransform, CG);
+ return NF;
}
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+static bool allCallersPassInValidPointerForArgument(Argument *Arg) {
Function *Callee = Arg->getParent();
const DataLayout &DL = Callee->getParent()->getDataLayout();
@@ -390,26 +469,25 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
/// elements in Prefix is the same as the corresponding elements in Longer.
///
/// This means it also returns true when Prefix and Longer are equal!
-static bool IsPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
+static bool isPrefix(const IndicesVector &Prefix, const IndicesVector &Longer) {
if (Prefix.size() > Longer.size())
return false;
return std::equal(Prefix.begin(), Prefix.end(), Longer.begin());
}
-
/// Checks if Indices, or a prefix of Indices, is in Set.
-static bool PrefixIn(const IndicesVector &Indices,
+static bool prefixIn(const IndicesVector &Indices,
std::set<IndicesVector> &Set) {
- std::set<IndicesVector>::iterator Low;
- Low = Set.upper_bound(Indices);
- if (Low != Set.begin())
- Low--;
- // Low is now the last element smaller than or equal to Indices. This means
- // it points to a prefix of Indices (possibly Indices itself), if such
- // prefix exists.
- //
- // This load is safe if any prefix of its operands is safe to load.
- return Low != Set.end() && IsPrefix(*Low, Indices);
+ std::set<IndicesVector>::iterator Low;
+ Low = Set.upper_bound(Indices);
+ if (Low != Set.begin())
+ Low--;
+ // Low is now the last element smaller than or equal to Indices. This means
+ // it points to a prefix of Indices (possibly Indices itself), if such
+ // prefix exists.
+ //
+ // This load is safe if any prefix of its operands is safe to load.
+ return Low != Set.end() && isPrefix(*Low, Indices);
}
/// Mark the given indices (ToMark) as safe in the given set of indices
@@ -417,7 +495,7 @@ static bool PrefixIn(const IndicesVector &Indices,
/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
/// already. Furthermore, any indices that Indices is itself a prefix of, are
/// removed from Safe (since they are implicitely safe because of Indices now).
-static void MarkIndicesSafe(const IndicesVector &ToMark,
+static void markIndicesSafe(const IndicesVector &ToMark,
std::set<IndicesVector> &Safe) {
std::set<IndicesVector>::iterator Low;
Low = Safe.upper_bound(ToMark);
@@ -428,7 +506,7 @@ static void MarkIndicesSafe(const IndicesVector &ToMark,
// means it points to a prefix of Indices (possibly Indices itself), if
// such prefix exists.
if (Low != Safe.end()) {
- if (IsPrefix(*Low, ToMark))
+ if (isPrefix(*Low, ToMark))
// If there is already a prefix of these indices (or exactly these
// indices) marked a safe, don't bother adding these indices
return;
@@ -441,7 +519,7 @@ static void MarkIndicesSafe(const IndicesVector &ToMark,
++Low;
// If there we're a prefix of longer index list(s), remove those
std::set<IndicesVector>::iterator End = Safe.end();
- while (Low != End && IsPrefix(ToMark, *Low)) {
+ while (Low != End && isPrefix(ToMark, *Low)) {
std::set<IndicesVector>::iterator Remove = Low;
++Low;
Safe.erase(Remove);
@@ -486,7 +564,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+ if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -512,25 +590,26 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
return false;
// Indices checked out, mark them as safe
- MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+ markIndicesSafe(Indices, SafeToUnconditionallyLoad);
Indices.clear();
}
} else if (V == Arg) {
// Direct loads are equivalent to a GEP with a single 0 index.
- MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+ markIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
}
}
// Now, iterate all uses of the argument to see if there are any uses that are
// not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
- SmallVector<LoadInst*, 16> Loads;
+ SmallVector<LoadInst *, 16> Loads;
IndicesVector Operands;
for (Use &U : Arg->uses()) {
User *UR = U.getUser();
Operands.clear();
if (LoadInst *LI = dyn_cast<LoadInst>(UR)) {
// Don't hack volatile/atomic loads
- if (!LI->isSimple()) return false;
+ if (!LI->isSimple())
+ return false;
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
@@ -547,30 +626,31 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
}
// Ensure that all of the indices are constants.
- for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
- i != e; ++i)
+ for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end(); i != e;
+ ++i)
if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
Operands.push_back(C->getSExtValue());
else
- return false; // Not a constant operand GEP!
+ return false; // Not a constant operand GEP!
// Ensure that the only users of the GEP are load instructions.
for (User *GEPU : GEP->users())
if (LoadInst *LI = dyn_cast<LoadInst>(GEPU)) {
// Don't hack volatile/atomic loads
- if (!LI->isSimple()) return false;
+ if (!LI->isSimple())
+ return false;
Loads.push_back(LI);
} else {
// Other uses than load?
return false;
}
} else {
- return false; // Not a load or a GEP.
+ return false; // Not a load or a GEP.
}
// Now, see if it is safe to promote this load / loads of this GEP. Loading
// is safe if Operands, or a prefix of Operands, is marked as safe.
- if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+ if (!prefixIn(Operands, SafeToUnconditionallyLoad))
return false;
// See if we are already promoting a load with these indices. If not, check
@@ -579,8 +659,10 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
if (ToPromote.find(Operands) == ToPromote.end()) {
if (MaxElements > 0 && ToPromote.size() == MaxElements) {
DEBUG(dbgs() << "argpromotion not promoting argument '"
- << Arg->getName() << "' because it would require adding more "
- << "than " << MaxElements << " arguments to the function.\n");
+ << Arg->getName()
+ << "' because it would require adding more "
+ << "than " << MaxElements
+ << " arguments to the function.\n");
// We limit aggregate promotion to only promoting up to a fixed number
// of elements of the aggregate.
return false;
@@ -589,7 +671,8 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
}
}
- if (Loads.empty()) return true; // No users, this is a dead argument.
+ if (Loads.empty())
+ return true; // No users, this is a dead argument.
// Okay, now we know that the argument is only used by load instructions and
// it is safe to unconditionally perform all of them. Use alias analysis to
@@ -598,7 +681,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
// Because there could be several/many load instructions, remember which
// blocks we know to be transparent to the load.
- df_iterator_default_set<BasicBlock*, 16> TranspBlocks;
+ df_iterator_default_set<BasicBlock *, 16> TranspBlocks;
for (LoadInst *Load : Loads) {
// Check to see if the load is invalidated from the start of the block to
@@ -607,7 +690,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
MemoryLocation Loc = MemoryLocation::get(Load);
if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
- return false; // Pointer is invalidated!
+ return false; // Pointer is invalidated!
// Now check every path from the entry block to the load for transparency.
// To do this, we perform a depth first search on the inverse CFG from the
@@ -625,416 +708,352 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
return true;
}
-/// DoPromotion - This method actually performs the promotion of the specified
-/// arguments, and returns the new function. At this point, we know that it's
-/// safe to do so.
-static CallGraphNode *
-DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
- SmallPtrSetImpl<Argument *> &ByValArgsToTransform, CallGraph &CG) {
+/// \brief Checks if a type could have padding bytes.
+static bool isDenselyPacked(Type *type, const DataLayout &DL) {
- // Start by computing a new prototype for the function, which is the same as
- // the old function, but has modified arguments.
- FunctionType *FTy = F->getFunctionType();
- std::vector<Type*> Params;
+ // There is no size information, so be conservative.
+ if (!type->isSized())
+ return false;
- typedef std::set<std::pair<Type *, IndicesVector>> ScalarizeTable;
+ // If the alloc size is not equal to the storage size, then there are padding
+ // bytes. For x86_fp80 on x86-64, size: 80 alloc size: 128.
+ if (DL.getTypeSizeInBits(type) != DL.getTypeAllocSizeInBits(type))
+ return false;
- // ScalarizedElements - If we are promoting a pointer that has elements
- // accessed out of it, keep track of which elements are accessed so that we
- // can add one argument for each.
- //
- // Arguments that are directly loaded will have a zero element value here, to
- // handle cases where there are both a direct load and GEP accesses.
- //
- std::map<Argument*, ScalarizeTable> ScalarizedElements;
+ if (!isa<CompositeType>(type))
+ return true;
- // OriginalLoads - Keep track of a representative load instruction from the
- // original function so that we can tell the alias analysis implementation
- // what the new GEP/Load instructions we are inserting look like.
- // We need to keep the original loads for each argument and the elements
- // of the argument that are accessed.
- std::map<std::pair<Argument*, IndicesVector>, LoadInst*> OriginalLoads;
+ // For homogenous sequential types, check for padding within members.
+ if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
+ return isDenselyPacked(seqTy->getElementType(), DL);
- // Attribute - Keep track of the parameter attributes for the arguments
- // that we are *not* promoting. For the ones that we do promote, the parameter
- // attributes are lost
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &PAL = F->getAttributes();
+ // Check for padding within and between elements of a struct.
+ StructType *StructTy = cast<StructType>(type);
+ const StructLayout *Layout = DL.getStructLayout(StructTy);
+ uint64_t StartPos = 0;
+ for (unsigned i = 0, E = StructTy->getNumElements(); i < E; ++i) {
+ Type *ElTy = StructTy->getElementType(i);
+ if (!isDenselyPacked(ElTy, DL))
+ return false;
+ if (StartPos != Layout->getElementOffsetInBits(i))
+ return false;
+ StartPos += DL.getTypeAllocSizeInBits(ElTy);
+ }
- // Add any return attributes.
- if (PAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- PAL.getRetAttributes()));
+ return true;
+}
- // First, determine the new argument list
- unsigned ArgIndex = 1;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++ArgIndex) {
- if (ByValArgsToTransform.count(&*I)) {
- // Simple byval argument? Just add all the struct element types.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- StructType *STy = cast<StructType>(AgTy);
- Params.insert(Params.end(), STy->element_begin(), STy->element_end());
- ++NumByValArgsPromoted;
- } else if (!ArgsToPromote.count(&*I)) {
- // Unchanged argument
- Params.push_back(I->getType());
- AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
- if (attrs.hasAttributes(ArgIndex)) {
- AttrBuilder B(attrs, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Params.size(), B));
- }
- } else if (I->use_empty()) {
- // Dead argument (which are always marked as promotable)
- ++NumArgumentsDead;
- } else {
- // Okay, this is being promoted. This means that the only uses are loads
- // or GEPs which are only used by loads
+/// \brief Checks if the padding bytes of an argument could be accessed.
+static bool canPaddingBeAccessed(Argument *arg) {
- // In this table, we will track which indices are loaded from the argument
- // (where direct loads are tracked as no indices).
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- for (User *U : I->users()) {
- Instruction *UI = cast<Instruction>(U);
- Type *SrcTy;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- SrcTy = L->getType();
- else
- SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
- IndicesVector Indices;
- Indices.reserve(UI->getNumOperands() - 1);
- // Since loads will only have a single operand, and GEPs only a single
- // non-index operand, this will record direct loads without any indices,
- // and gep+loads with the GEP indices.
- for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
- II != IE; ++II)
- Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
- // GEPs with a single 0 index can be merged with direct loads
- if (Indices.size() == 1 && Indices.front() == 0)
- Indices.clear();
- ArgIndices.insert(std::make_pair(SrcTy, Indices));
- LoadInst *OrigLoad;
- if (LoadInst *L = dyn_cast<LoadInst>(UI))
- OrigLoad = L;
- else
- // Take any load, we will use it only to update Alias Analysis
- OrigLoad = cast<LoadInst>(UI->user_back());
- OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
- }
+ assert(arg->hasByValAttr());
- // Add a parameter to the function for each element passed in.
- for (const auto &ArgIndex : ArgIndices) {
- // not allowed to dereference ->begin() if size() is 0
- Params.push_back(GetElementPtrInst::getIndexedType(
- cast<PointerType>(I->getType()->getScalarType())->getElementType(),
- ArgIndex.second));
- assert(Params.back());
- }
+ // Track all the pointers to the argument to make sure they are not captured.
+ SmallPtrSet<Value *, 16> PtrValues;
+ PtrValues.insert(arg);
- if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
- ++NumArgumentsPromoted;
- else
- ++NumAggregatesPromoted;
+ // Track all of the stores.
+ SmallVector<StoreInst *, 16> Stores;
+
+ // Scan through the uses recursively to make sure the pointer is always used
+ // sanely.
+ SmallVector<Value *, 16> WorkList;
+ WorkList.insert(WorkList.end(), arg->user_begin(), arg->user_end());
+ while (!WorkList.empty()) {
+ Value *V = WorkList.back();
+ WorkList.pop_back();
+ if (isa<GetElementPtrInst>(V) || isa<PHINode>(V)) {
+ if (PtrValues.insert(V).second)
+ WorkList.insert(WorkList.end(), V->user_begin(), V->user_end());
+ } else if (StoreInst *Store = dyn_cast<StoreInst>(V)) {
+ Stores.push_back(Store);
+ } else if (!isa<LoadInst>(V)) {
+ return true;
}
}
- // Add any function attributes.
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
- PAL.getFnAttributes()));
+ // Check to make sure the pointers aren't captured
+ for (StoreInst *Store : Stores)
+ if (PtrValues.count(Store->getValueOperand()))
+ return true;
- Type *RetTy = FTy->getReturnType();
+ return false;
+}
- // Construct the new function type using the new arguments.
- FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct). If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+static Function *
+promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
+ unsigned MaxElements,
+ Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
+ ReplaceCallSite) {
+ // Make sure that it is local to this module.
+ if (!F->hasLocalLinkage())
+ return nullptr;
- // Create the new function body and insert it into the module.
- Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
- NF->copyAttributesFrom(F);
+ // Don't promote arguments for variadic functions. Adding, removing, or
+ // changing non-pack parameters can change the classification of pack
+ // parameters. Frontends encode that classification at the call site in the
+ // IR, while in the callee the classification is determined dynamically based
+ // on the number of registers consumed so far.
+ if (F->isVarArg())
+ return nullptr;
- // Patch the pointer to LLVM function in debug info descriptor.
- NF->setSubprogram(F->getSubprogram());
- F->setSubprogram(nullptr);
+ // First check: see if there are any pointer arguments! If not, quick exit.
+ SmallVector<Argument *, 16> PointerArgs;
+ for (Argument &I : F->args())
+ if (I.getType()->isPointerTy())
+ PointerArgs.push_back(&I);
+ if (PointerArgs.empty())
+ return nullptr;
- DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
- << "From: " << *F);
-
- // Recompute the parameter attributes list based on the new arguments for
- // the function.
- NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
- AttributesVec.clear();
+ // Second check: make sure that all callers are direct callers. We can't
+ // transform functions that have indirect callers. Also see if the function
+ // is self-recursive.
+ bool isSelfRecursive = false;
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ // Must be a direct call.
+ if (CS.getInstruction() == nullptr || !CS.isCallee(&U))
+ return nullptr;
- F->getParent()->getFunctionList().insert(F->getIterator(), NF);
- NF->takeName(F);
+ if (CS.getInstruction()->getParent()->getParent() == F)
+ isSelfRecursive = true;
+ }
- // Get a new callgraph node for NF.
- CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+ const DataLayout &DL = F->getParent()->getDataLayout();
- // Loop over all of the callers of the function, transforming the call sites
- // to pass in the loaded pointers.
- //
- SmallVector<Value*, 16> Args;
- while (!F->use_empty()) {
- CallSite CS(F->user_back());
- assert(CS.getCalledFunction() == F);
- Instruction *Call = CS.getInstruction();
- const AttributeSet &CallPAL = CS.getAttributes();
+ AAResults &AAR = AARGetter(*F);
- // Add any return attributes.
- if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- CallPAL.getRetAttributes()));
+ // Check to see which arguments are promotable. If an argument is promotable,
+ // add it to ArgsToPromote.
+ SmallPtrSet<Argument *, 8> ArgsToPromote;
+ SmallPtrSet<Argument *, 8> ByValArgsToTransform;
+ for (Argument *PtrArg : PointerArgs) {
+ Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
- // Loop over the operands, inserting GEP and loads in the caller as
- // appropriate.
- CallSite::arg_iterator AI = CS.arg_begin();
- ArgIndex = 1;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++AI, ++ArgIndex)
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
- Args.push_back(*AI); // Unmodified argument
+ // Replace sret attribute with noalias. This reduces register pressure by
+ // avoiding a register copy.
+ if (PtrArg->hasStructRetAttr()) {
+ unsigned ArgNo = PtrArg->getArgNo();
+ F->setAttributes(
+ F->getAttributes()
+ .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
+ .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ for (Use &U : F->uses()) {
+ CallSite CS(U.getUser());
+ CS.setAttributes(
+ CS.getAttributes()
+ .removeAttribute(F->getContext(), ArgNo + 1,
+ Attribute::StructRet)
+ .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
+ }
+ }
- if (CallPAL.hasAttributes(ArgIndex)) {
- AttrBuilder B(CallPAL, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
- }
- } else if (ByValArgsToTransform.count(&*I)) {
- // Emit a GEP and load for each element of the struct.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
- // TODO: Tell AA about the new values?
- Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+ // If this is a byval argument, and if the aggregate type is small, just
+ // pass the elements, which is always safe, if the passed value is densely
+ // packed or if we can prove the padding bytes are never accessed. This does
+ // not apply to inalloca.
+ bool isSafeToPromote =
+ PtrArg->hasByValAttr() &&
+ (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+ if (isSafeToPromote) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
+ DEBUG(dbgs() << "argpromotion disable promoting argument '"
+ << PtrArg->getName()
+ << "' because it would require adding more"
+ << " than " << MaxElements
+ << " arguments to the function.\n");
+ continue;
}
- } else if (!I->use_empty()) {
- // Non-dead argument: insert GEPs and loads as appropriate.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
- // Store the Value* version of the indices in here, but declare it now
- // for reuse.
- std::vector<Value*> Ops;
- for (const auto &ArgIndex : ArgIndices) {
- Value *V = *AI;
- LoadInst *OrigLoad =
- OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
- if (!ArgIndex.second.empty()) {
- Ops.reserve(ArgIndex.second.size());
- Type *ElTy = V->getType();
- for (unsigned long II : ArgIndex.second) {
- // Use i32 to index structs, and i64 for others (pointers/arrays).
- // This satisfies GEP constraints.
- Type *IdxTy = (ElTy->isStructTy() ?
- Type::getInt32Ty(F->getContext()) :
- Type::getInt64Ty(F->getContext()));
- Ops.push_back(ConstantInt::get(IdxTy, II));
- // Keep track of the type we're currently indexing.
- if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
- ElTy = ElPTy->getElementType();
- else
- ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
- }
- // And create a GEP to extract those indices.
- V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
- V->getName() + ".idx", Call);
- Ops.clear();
+
+ // If all the elements are single-value types, we can promote it.
+ bool AllSimple = true;
+ for (const auto *EltTy : STy->elements()) {
+ if (!EltTy->isSingleValueType()) {
+ AllSimple = false;
+ break;
}
- // Since we're replacing a load make sure we take the alignment
- // of the previous load.
- LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
- newLoad->setAlignment(OrigLoad->getAlignment());
- // Transfer the AA info too.
- AAMDNodes AAInfo;
- OrigLoad->getAAMetadata(AAInfo);
- newLoad->setAAMetadata(AAInfo);
+ }
- Args.push_back(newLoad);
+ // Safe to transform, don't even bother trying to "promote" it.
+ // Passing the elements as a scalar will allow sroa to hack on
+ // the new alloca we introduce.
+ if (AllSimple) {
+ ByValArgsToTransform.insert(PtrArg);
+ continue;
}
}
+ }
- // Push any varargs arguments on the list.
- for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
- Args.push_back(*AI);
- if (CallPAL.hasAttributes(ArgIndex)) {
- AttrBuilder B(CallPAL, ArgIndex);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ // If the argument is a recursive type and we're in a recursive
+ // function, we could end up infinitely peeling the function argument.
+ if (isSelfRecursive) {
+ if (StructType *STy = dyn_cast<StructType>(AgTy)) {
+ bool RecursiveType = false;
+ for (const auto *EltTy : STy->elements()) {
+ if (EltTy == PtrArg->getType()) {
+ RecursiveType = true;
+ break;
+ }
+ }
+ if (RecursiveType)
+ continue;
}
}
- // Add any function attributes.
- if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Call->getContext(),
- CallPAL.getFnAttributes()));
+ // Otherwise, see if we can promote the pointer to its value.
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
+ MaxElements))
+ ArgsToPromote.insert(PtrArg);
+ }
+
+ // No promotable pointer arguments.
+ if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
+ return nullptr;
- SmallVector<OperandBundleDef, 1> OpBundles;
- CS.getOperandBundlesAsDefs(OpBundles);
+ return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
+}
- Instruction *New;
- if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
- AttributesVec));
- } else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
- AttributesVec));
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
- }
- New->setDebugLoc(Call->getDebugLoc());
- Args.clear();
- AttributesVec.clear();
+PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
+ CGSCCAnalysisManager &AM,
+ LazyCallGraph &CG,
+ CGSCCUpdateResult &UR) {
+ bool Changed = false, LocalChange;
- // Update the callgraph to know that the callsite has been transformed.
- CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
- CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
+ // Iterate until we stop promoting from this SCC.
+ do {
+ LocalChange = false;
- if (!Call->use_empty()) {
- Call->replaceAllUsesWith(New);
- New->takeName(Call);
+ for (LazyCallGraph::Node &N : C) {
+ Function &OldF = N.getFunction();
+
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+ // FIXME: This lambda must only be used with this function. We should
+ // skip the lambda and just get the AA results directly.
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ assert(&F == &OldF && "Called with an unexpected function!");
+ return FAM.getResult<AAManager>(F);
+ };
+
+ Function *NewF = promoteArguments(&OldF, AARGetter, 3u, None);
+ if (!NewF)
+ continue;
+ LocalChange = true;
+
+ // Directly substitute the functions in the call graph. Note that this
+ // requires the old function to be completely dead and completely
+ // replaced by the new function. It does no call graph updates, it merely
+ // swaps out the particular function mapped to a particular node in the
+ // graph.
+ C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
+ OldF.eraseFromParent();
}
- // Finally, remove the old call from the program, reducing the use-count of
- // F.
- Call->eraseFromParent();
- }
-
- // Since we have now created the new function, splice the body of the old
- // function right into the new function, leaving the old rotting hulk of the
- // function empty.
- NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+ Changed |= LocalChange;
+ } while (LocalChange);
- // Loop over the argument list, transferring uses of the old arguments over to
- // the new arguments, also transferring over the names as well.
- //
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
- I2 = NF->arg_begin(); I != E; ++I) {
- if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
- // If this is an unmodified argument, move the name and users over to the
- // new version.
- I->replaceAllUsesWith(&*I2);
- I2->takeName(&*I);
- ++I2;
- continue;
- }
+ if (!Changed)
+ return PreservedAnalyses::all();
- if (ByValArgsToTransform.count(&*I)) {
- // In the callee, we create an alloca, and store each of the new incoming
- // arguments into the alloca.
- Instruction *InsertPt = &NF->begin()->front();
+ return PreservedAnalyses::none();
+}
- // Just add all the struct element types.
- Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
- StructType *STy = cast<StructType>(AgTy);
- Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
+namespace {
+/// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+///
+struct ArgPromotion : public CallGraphSCCPass {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ getAAResultsAnalysisUsage(AU);
+ CallGraphSCCPass::getAnalysisUsage(AU);
+ }
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
- Value *Idx = GetElementPtrInst::Create(
- AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
- InsertPt);
- I2->setName(I->getName()+"."+Twine(i));
- new StoreInst(&*I2++, Idx, InsertPt);
- }
+ bool runOnSCC(CallGraphSCC &SCC) override;
+ static char ID; // Pass identification, replacement for typeid
+ explicit ArgPromotion(unsigned MaxElements = 3)
+ : CallGraphSCCPass(ID), MaxElements(MaxElements) {
+ initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+ }
- // Anything that used the arg should now use the alloca.
- I->replaceAllUsesWith(TheAlloca);
- TheAlloca->takeName(&*I);
+private:
+ using llvm::Pass::doInitialization;
+ bool doInitialization(CallGraph &CG) override;
+ /// The maximum number of elements to expand, or 0 for unlimited.
+ unsigned MaxElements;
+};
+}
- // If the alloca is used in a call, we must clear the tail flag since
- // the callee now uses an alloca from the caller.
- for (User *U : TheAlloca->users()) {
- CallInst *Call = dyn_cast<CallInst>(U);
- if (!Call)
- continue;
- Call->setTailCall(false);
- }
- continue;
- }
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false)
- if (I->use_empty())
- continue;
+Pass *llvm::createArgumentPromotionPass(unsigned MaxElements) {
+ return new ArgPromotion(MaxElements);
+}
- // Otherwise, if we promoted this argument, then all users are load
- // instructions (or GEPs with only load users), and all loads should be
- // using the new argument that we added.
- ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+ if (skipSCC(SCC))
+ return false;
- while (!I->use_empty()) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
- assert(ArgIndices.begin()->second.empty() &&
- "Load element should sort to front!");
- I2->setName(I->getName()+".val");
- LI->replaceAllUsesWith(&*I2);
- LI->eraseFromParent();
- DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
- << "' in function '" << F->getName() << "'\n");
- } else {
- GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
- IndicesVector Operands;
- Operands.reserve(GEP->getNumIndices());
- for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
- II != IE; ++II)
- Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ // Get the callgraph information that we need to update to reflect our
+ // changes.
+ CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- // GEPs with a single 0 index can be merged with direct loads
- if (Operands.size() == 1 && Operands.front() == 0)
- Operands.clear();
+ LegacyAARGetter AARGetter(*this);
- Function::arg_iterator TheArg = I2;
- for (ScalarizeTable::iterator It = ArgIndices.begin();
- It->second != Operands; ++It, ++TheArg) {
- assert(It != ArgIndices.end() && "GEP not handled??");
- }
+ bool Changed = false, LocalChange;
- std::string NewName = I->getName();
- for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
- NewName += "." + utostr(Operands[i]);
- }
- NewName += ".val";
- TheArg->setName(NewName);
+ // Iterate until we stop promoting from this SCC.
+ do {
+ LocalChange = false;
+ // Attempt to promote arguments from all functions in this SCC.
+ for (CallGraphNode *OldNode : SCC) {
+ Function *OldF = OldNode->getFunction();
+ if (!OldF)
+ continue;
+
+ auto ReplaceCallSite = [&](CallSite OldCS, CallSite NewCS) {
+ Function *Caller = OldCS.getInstruction()->getParent()->getParent();
+ CallGraphNode *NewCalleeNode =
+ CG.getOrInsertFunction(NewCS.getCalledFunction());
+ CallGraphNode *CallerNode = CG[Caller];
+ CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
+ };
+
+ if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
+ {ReplaceCallSite})) {
+ LocalChange = true;
- DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
- << "' of function '" << NF->getName() << "'\n");
+ // Update the call graph for the newly promoted function.
+ CallGraphNode *NewNode = CG.getOrInsertFunction(NewF);
+ NewNode->stealCalledFunctionsFrom(OldNode);
+ if (OldNode->getNumReferences() == 0)
+ delete CG.removeFunctionFromModule(OldNode);
+ else
+ OldF->setLinkage(Function::ExternalLinkage);
- // All of the uses must be load instructions. Replace them all with
- // the argument specified by ArgNo.
- while (!GEP->use_empty()) {
- LoadInst *L = cast<LoadInst>(GEP->user_back());
- L->replaceAllUsesWith(&*TheArg);
- L->eraseFromParent();
- }
- GEP->eraseFromParent();
+ // And updat ethe SCC we're iterating as well.
+ SCC.ReplaceNode(OldNode, NewNode);
}
}
+ // Remember that we changed something.
+ Changed |= LocalChange;
+ } while (LocalChange);
- // Increment I2 past all of the arguments added for this promoted pointer.
- std::advance(I2, ArgIndices.size());
- }
-
- NF_CGN->stealCalledFunctionsFrom(CG[F]);
-
- // Now that the old function is dead, delete it. If there is a dangling
- // reference to the CallgraphNode, just leave the dead function around for
- // someone else to nuke.
- CallGraphNode *CGN = CG[F];
- if (CGN->getNumReferences() == 0)
- delete CG.removeFunctionFromModule(CGN);
- else
- F->setLinkage(Function::ExternalLinkage);
-
- return NF_CGN;
+ return Changed;
}
bool ArgPromotion::doInitialization(CallGraph &CG) {
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index d75ed206ad23..62b5a9c9ba26 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -60,6 +60,23 @@ static bool IsBetterCanonical(const GlobalVariable &A,
return A.hasGlobalUnnamedAddr();
}
+static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ GV->getAllMetadata(MDs);
+ for (const auto &V : MDs)
+ if (V.first != LLVMContext::MD_dbg)
+ return true;
+ return false;
+}
+
+static void copyDebugLocMetadata(const GlobalVariable *From,
+ GlobalVariable *To) {
+ SmallVector<DIGlobalVariableExpression *, 1> MDs;
+ From->getDebugInfo(MDs);
+ for (auto MD : MDs)
+ To->addDebugInfo(MD);
+}
+
static unsigned getAlignment(GlobalVariable *GV) {
unsigned Align = GV->getAlignment();
if (Align)
@@ -113,6 +130,10 @@ static bool mergeConstants(Module &M) {
if (GV->isWeakForLinker())
continue;
+ // Don't touch globals with metadata other then !dbg.
+ if (hasMetadataOtherThanDebugLoc(GV))
+ continue;
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
@@ -155,6 +176,9 @@ static bool mergeConstants(Module &M) {
if (!Slot->hasGlobalUnnamedAddr() && !GV->hasGlobalUnnamedAddr())
continue;
+ if (hasMetadataOtherThanDebugLoc(GV))
+ continue;
+
if (!GV->hasGlobalUnnamedAddr())
Slot->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
@@ -178,6 +202,8 @@ static bool mergeConstants(Module &M) {
getAlignment(Replacements[i].second)));
}
+ copyDebugLocMetadata(Replacements[i].first, Replacements[i].second);
+
// Eliminate any uses of the dead global.
Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
index ba2e60dee3bc..1b111de06157 100644
--- a/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -98,8 +98,11 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
LLVMContext &Ctx = M.getContext();
Constant *C = M.getOrInsertFunction(
"__cfi_check", Type::getVoidTy(Ctx), Type::getInt64Ty(Ctx),
- Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx), nullptr);
+ Type::getInt8PtrTy(Ctx), Type::getInt8PtrTy(Ctx));
Function *F = dyn_cast<Function>(C);
+ // Take over the existing function. The frontend emits a weak stub so that the
+ // linker knows about the symbol; this pass replaces the function body.
+ F->deleteBody();
F->setAlignment(4096);
auto args = F->arg_begin();
Value &CallSiteTypeId = *(args++);
@@ -117,7 +120,7 @@ void CrossDSOCFI::buildCFICheck(Module &M) {
IRBuilder<> IRBFail(TrapBB);
Constant *CFICheckFailFn = M.getOrInsertFunction(
"__cfi_check_fail", Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx),
- Type::getInt8PtrTy(Ctx), nullptr);
+ Type::getInt8PtrTy(Ctx));
IRBFail.CreateCall(CFICheckFailFn, {&CFICheckFailData, &Addr});
IRBFail.CreateBr(ExitBB);
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1a5ed4692211..375b74c494d9 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -166,41 +166,43 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
// Drop any attributes that were on the vararg arguments.
- AttributeSet PAL = CS.getAttributes();
+ AttributeList PAL = CS.getAttributes();
if (!PAL.isEmpty() && PAL.getSlotIndex(PAL.getNumSlots() - 1) > NumArgs) {
- SmallVector<AttributeSet, 8> AttributesVec;
+ SmallVector<AttributeList, 8> AttributesVec;
for (unsigned i = 0; PAL.getSlotIndex(i) <= NumArgs; ++i)
AttributesVec.push_back(PAL.getSlotAttributes(i));
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Fn.getContext(),
- PAL.getFnAttributes()));
- PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
+ if (PAL.hasAttributes(AttributeList::FunctionIndex))
+ AttributesVec.push_back(AttributeList::get(Fn.getContext(),
+ AttributeList::FunctionIndex,
+ PAL.getFnAttributes()));
+ PAL = AttributeList::get(Fn.getContext(), AttributesVec);
}
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
- Instruction *New;
+ CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call);
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(PAL);
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call);
} else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(PAL);
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
+ NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ cast<CallInst>(NewCS.getInstruction())
+ ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
- New->setDebugLoc(Call->getDebugLoc());
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(PAL);
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
Args.clear();
if (!Call->use_empty())
- Call->replaceAllUsesWith(New);
+ Call->replaceAllUsesWith(NewCS.getInstruction());
- New->takeName(Call);
+ NewCS->takeName(Call);
// Finally, remove the old call from the program, reducing the use-count of
// F.
@@ -681,8 +683,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
bool HasLiveReturnedArg = false;
// Set up to build a new list of parameter attributes.
- SmallVector<AttributeSet, 8> AttributesVec;
- const AttributeSet &PAL = F->getAttributes();
+ SmallVector<AttributeSet, 8> ArgAttrVec;
+ const AttributeList &PAL = F->getAttributes();
// Remember which arguments are still alive.
SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -696,16 +698,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[i] = true;
-
- // Get the original parameter attributes (skipping the first one, that is
- // for the return value.
- if (PAL.hasAttributes(i + 1)) {
- AttrBuilder B(PAL, i + 1);
- if (B.contains(Attribute::Returned))
- HasLiveReturnedArg = true;
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Params.size(), B));
- }
+ ArgAttrVec.push_back(PAL.getParamAttributes(i));
+ HasLiveReturnedArg |= PAL.hasParamAttribute(i, Attribute::Returned);
} else {
++NumArgumentsEliminated;
DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument " << i
@@ -779,30 +773,24 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
assert(NRetTy && "No new return type found?");
// The existing function return attributes.
- AttributeSet RAttrs = PAL.getRetAttributes();
+ AttrBuilder RAttrs(PAL.getRetAttributes());
// Remove any incompatible attributes, but only if we removed all return
// values. Otherwise, ensure that we don't have any conflicting attributes
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
if (NRetTy->isVoidTy())
- RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NRetTy));
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
else
- assert(!AttrBuilder(RAttrs, AttributeSet::ReturnIndex).
- overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
+ assert(!RAttrs.overlaps(AttributeFuncs::typeIncompatible(NRetTy)) &&
"Return attributes no longer compatible?");
- if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(NRetTy->getContext(), RAttrs));
-
- if (PAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(F->getContext(),
- PAL.getFnAttributes()));
+ AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
+ assert(ArgAttrVec.size() == Params.size());
+ AttributeList NewPAL = AttributeList::get(
+ F->getContext(), PAL.getFnAttributes(), RetAttrs, ArgAttrVec);
// Create the new function type based on the recomputed parameters.
FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
@@ -829,18 +817,14 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
CallSite CS(F->user_back());
Instruction *Call = CS.getInstruction();
- AttributesVec.clear();
- const AttributeSet &CallPAL = CS.getAttributes();
-
- // The call return attributes.
- AttributeSet RAttrs = CallPAL.getRetAttributes();
+ ArgAttrVec.clear();
+ const AttributeList &CallPAL = CS.getAttributes();
- // Adjust in case the function was changed to return void.
- RAttrs = RAttrs.removeAttributes(NRetTy->getContext(),
- AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NF->getReturnType()));
- if (RAttrs.hasAttributes(AttributeSet::ReturnIndex))
- AttributesVec.push_back(AttributeSet::get(NF->getContext(), RAttrs));
+ // Adjust the call return attributes in case the function was changed to
+ // return void.
+ AttrBuilder RAttrs(CallPAL.getRetAttributes());
+ RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
+ AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Declare these outside of the loops, so we can reuse them for the second
// loop, which loops the varargs.
@@ -852,57 +836,55 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[i]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- if (CallPAL.hasAttributes(i + 1)) {
- AttrBuilder B(CallPAL, i + 1);
+ AttributeSet Attrs = CallPAL.getParamAttributes(i);
+ if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
// call sites keep the return value alive just like 'returned'
- // attributes on function declaration but it's less clearly a win
- // and this is not an expected case anyway
- if (NRetTy != RetTy && B.contains(Attribute::Returned))
- B.removeAttribute(Attribute::Returned);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
+ // attributes on function declaration but it's less clearly a win and
+ // this is not an expected case anyway
+ ArgAttrVec.push_back(AttributeSet::get(
+ F->getContext(),
+ AttrBuilder(Attrs).removeAttribute(Attribute::Returned)));
+ } else {
+ // Otherwise, use the original attributes.
+ ArgAttrVec.push_back(Attrs);
}
}
// Push any varargs arguments on the list. Don't forget their attributes.
for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
Args.push_back(*I);
- if (CallPAL.hasAttributes(i + 1)) {
- AttrBuilder B(CallPAL, i + 1);
- AttributesVec.
- push_back(AttributeSet::get(F->getContext(), Args.size(), B));
- }
+ ArgAttrVec.push_back(CallPAL.getParamAttributes(i));
}
- if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
- AttributesVec.push_back(AttributeSet::get(Call->getContext(),
- CallPAL.getFnAttributes()));
-
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
+ assert(ArgAttrVec.size() == Args.size());
+ AttributeList NewCallPAL = AttributeList::get(
+ F->getContext(), CallPAL.getFnAttributes(), RetAttrs, ArgAttrVec);
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
- Instruction *New;
+ CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
- New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles, "", Call->getParent());
- cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
- cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+ NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+ Args, OpBundles, "", Call->getParent());
} else {
- New = CallInst::Create(NF, Args, OpBundles, "", Call);
- cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
- cast<CallInst>(New)->setAttributes(NewCallPAL);
- cast<CallInst>(New)->setTailCallKind(
- cast<CallInst>(Call)->getTailCallKind());
+ NewCS = CallInst::Create(NF, Args, OpBundles, "", Call);
+ cast<CallInst>(NewCS.getInstruction())
+ ->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
}
- New->setDebugLoc(Call->getDebugLoc());
-
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(NewCallPAL);
+ NewCS->setDebugLoc(Call->getDebugLoc());
+ uint64_t W;
+ if (Call->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
Args.clear();
+ ArgAttrVec.clear();
+ Instruction *New = NewCS.getInstruction();
if (!Call->use_empty()) {
if (New->getType() == Call->getType()) {
// Return type not changed? Just replace users then.
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 402a66552c24..4d13b3f40688 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -49,31 +49,35 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
-namespace {
-typedef SmallSetVector<Function *, 8> SCCNodeSet;
-}
+// FIXME: This is disabled by default to avoid exposing security vulnerabilities
+// in C/C++ code compiled by clang:
+// http://lists.llvm.org/pipermail/cfe-dev/2017-January/052066.html
+static cl::opt<bool> EnableNonnullArgPropagation(
+ "enable-nonnull-arg-prop", cl::Hidden,
+ cl::desc("Try to propagate nonnull argument attributes from callsites to "
+ "caller functions."));
namespace {
-/// The three kinds of memory access relevant to 'readonly' and
-/// 'readnone' attributes.
-enum MemoryAccessKind {
- MAK_ReadNone = 0,
- MAK_ReadOnly = 1,
- MAK_MayWrite = 2
-};
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
}
-static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+/// Returns the memory access attribute for function F using AAR for AA results,
+/// where SCCNodes is the current SCC.
+///
+/// If ThisBody is true, this function may examine the function body and will
+/// return a result pertaining to this copy of the function. If it is false, the
+/// result will be based only on AA results for the function declaration; it
+/// will be assumed that some other (perhaps less optimized) version of the
+/// function may be selected at link time.
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
+ AAResults &AAR,
const SCCNodeSet &SCCNodes) {
FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
if (MRB == FMRB_DoesNotAccessMemory)
// Already perfect!
return MAK_ReadNone;
- // Non-exact function definitions may not be selected at link time, and an
- // alternative version that writes to memory may be selected. See the comment
- // on GlobalValue::isDefinitionExact for more details.
- if (!F.hasExactDefinition()) {
+ if (!ThisBody) {
if (AliasAnalysis::onlyReadsMemory(MRB))
return MAK_ReadOnly;
@@ -172,9 +176,14 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
}
+MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
+ AAResults &AAR) {
+ return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {});
+}
+
/// Deduce readonly/readnone attributes for the SCC.
template <typename AARGetterT>
-static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
@@ -182,7 +191,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
- switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+ // Non-exact function definitions may not be selected at link time, and an
+ // alternative version that writes to memory may be selected. See the
+ // comment on GlobalValue::isDefinitionExact for more details.
+ switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
+ AAR, SCCNodes)) {
case MAK_MayWrite:
return false;
case MAK_ReadOnly:
@@ -212,11 +225,11 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
F->removeAttributes(
- AttributeSet::FunctionIndex,
- AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
+ AttributeList::FunctionIndex,
+ AttributeList::get(F->getContext(), AttributeList::FunctionIndex, B));
// Add in the new attribute.
- F->addAttribute(AttributeSet::FunctionIndex,
+ F->addAttribute(AttributeList::FunctionIndex,
ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
if (ReadsMemory)
@@ -522,7 +535,7 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
if (Value *RetArg = FindRetArg()) {
auto *A = cast<Argument>(RetArg);
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
++NumReturned;
Changed = true;
}
@@ -531,6 +544,49 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
return Changed;
}
+/// If a callsite has arguments that are also arguments to the parent function,
+/// try to propagate attributes from the callsite's arguments to the parent's
+/// arguments. This may be important because inlining can cause information loss
+/// when attribute knowledge disappears with the inlined call.
+static bool addArgumentAttrsFromCallsites(Function &F) {
+ if (!EnableNonnullArgPropagation)
+ return false;
+
+ bool Changed = false;
+
+ // For an argument attribute to transfer from a callsite to the parent, the
+ // call must be guaranteed to execute every time the parent is called.
+ // Conservatively, just check for calls in the entry block that are guaranteed
+ // to execute.
+ // TODO: This could be enhanced by testing if the callsite post-dominates the
+ // entry block or by doing simple forward walks or backward walks to the
+ // callsite.
+ BasicBlock &Entry = F.getEntryBlock();
+ for (Instruction &I : Entry) {
+ if (auto CS = CallSite(&I)) {
+ if (auto *CalledFunc = CS.getCalledFunction()) {
+ for (auto &CSArg : CalledFunc->args()) {
+ if (!CSArg.hasNonNullAttr())
+ continue;
+
+ // If the non-null callsite argument operand is an argument to 'F'
+ // (the caller) and the call is guaranteed to execute, then the value
+ // must be non-null throughout 'F'.
+ auto *FArg = dyn_cast<Argument>(CS.getArgOperand(CSArg.getArgNo()));
+ if (FArg && !FArg->hasNonNullAttr()) {
+ FArg->addAttr(Attribute::NonNull);
+ Changed = true;
+ }
+ }
+ }
+ }
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ break;
+ }
+
+ return Changed;
+}
+
/// Deduce nocapture attributes for the SCC.
static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
bool Changed = false;
@@ -549,6 +605,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!F->hasExactDefinition())
continue;
+ Changed |= addArgumentAttrsFromCallsites(*F);
+
// Functions that are readonly (or readnone) and nounwind and don't return
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
@@ -556,7 +614,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
- A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -576,7 +634,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
A->addAttr(
- AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+ AttributeList::get(F->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
} else {
@@ -604,7 +662,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (R != Attribute::None) {
AttrBuilder B;
B.addAttribute(R);
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
Changed = true;
R == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
}
@@ -629,7 +687,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
Argument *A = ArgumentSCC[0]->Definition;
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -671,7 +729,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
++NumNoCapture;
Changed = true;
}
@@ -708,8 +766,9 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
// Clear out existing readonly/readnone attributes
- A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, R));
- A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1, B));
+ A->removeAttr(
+ AttributeList::get(A->getContext(), A->getArgNo() + 1, R));
+ A->addAttr(AttributeList::get(A->getContext(), A->getArgNo() + 1, B));
ReadAttr == Attribute::ReadOnly ? ++NumReadOnlyArg : ++NumReadNoneArg;
Changed = true;
}
@@ -769,7 +828,7 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
case Instruction::Call:
case Instruction::Invoke: {
CallSite CS(RVI);
- if (CS.paramHasAttr(0, Attribute::NoAlias))
+ if (CS.hasRetAttr(Attribute::NoAlias))
break;
if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
break;
@@ -905,7 +964,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// pointers.
for (Function *F : SCCNodes) {
// Already nonnull.
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::NonNull))
continue;
@@ -926,7 +985,7 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// Mark the function eagerly since we may discover a function
// which prevents us from speculating about the entire SCC
DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
++NumNonNullReturn;
MadeChange = true;
}
@@ -939,13 +998,13 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
if (SCCReturnsNonNull) {
for (Function *F : SCCNodes) {
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::NonNull) ||
!F->getReturnType()->isPointerTy())
continue;
DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
++NumNonNullReturn;
MadeChange = true;
}
@@ -1163,19 +1222,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
if (skipSCC(SCC))
return false;
-
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
-
- return runImpl(SCC, AARGetter);
+ return runImpl(SCC, LegacyAARGetter(*this));
}
namespace {
@@ -1275,16 +1322,9 @@ PreservedAnalyses
ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &CG = AM.getResult<CallGraphAnalysis>(M);
- bool Changed = deduceFunctionAttributeInRPO(M, CG);
-
- // CallGraphAnalysis holds AssertingVH and must be invalidated eagerly so
- // that other passes don't delete stuff from under it.
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<CallGraphAnalysis>(M);
-
- if (!Changed)
+ if (!deduceFunctionAttributeInRPO(M, CG))
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
PA.preserve<CallGraphAnalysis>();
return PA;
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 6b32f6c31f72..d66411f04cc4 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -75,12 +75,6 @@ static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
static cl::opt<bool> ComputeDead("compute-dead", cl::init(true), cl::Hidden,
cl::desc("Compute dead symbols"));
-// Temporary allows the function import pass to disable always linking
-// referenced discardable symbols.
-static cl::opt<bool>
- DontForceImportReferencedDiscardableSymbols("disable-force-link-odr",
- cl::init(false), cl::Hidden);
-
static cl::opt<bool> EnableImportMetadata(
"enable-import-metadata", cl::init(
#if !defined(NDEBUG)
@@ -124,7 +118,7 @@ namespace {
static const GlobalValueSummary *
selectCallee(const ModuleSummaryIndex &Index,
const GlobalValueSummaryList &CalleeSummaryList,
- unsigned Threshold) {
+ unsigned Threshold, StringRef CallerModulePath) {
auto It = llvm::find_if(
CalleeSummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
@@ -145,6 +139,21 @@ selectCallee(const ModuleSummaryIndex &Index,
auto *Summary = cast<FunctionSummary>(GVSummary);
+ // If this is a local function, make sure we import the copy
+ // in the caller's module. The only time a local function can
+ // share an entry in the index is if there is a local with the same name
+ // in another module that had the same source file name (in a different
+ // directory), where each was compiled in their own directory so there
+ // was not distinguishing path.
+ // However, do the import from another module if there is only one
+ // entry in the list - in that case this must be a reference due
+ // to indirect call profile data, since a function pointer can point to
+ // a local in another module.
+ if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
+ CalleeSummaryList.size() > 1 &&
+ Summary->modulePath() != CallerModulePath)
+ return false;
+
if (Summary->instCount() > Threshold)
return false;
@@ -163,11 +172,13 @@ selectCallee(const ModuleSummaryIndex &Index,
/// null if there's no match.
static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID,
unsigned Threshold,
- const ModuleSummaryIndex &Index) {
+ const ModuleSummaryIndex &Index,
+ StringRef CallerModulePath) {
auto CalleeSummaryList = Index.findGlobalValueSummaryList(GUID);
if (CalleeSummaryList == Index.end())
return nullptr; // This function does not have a summary
- return selectCallee(Index, CalleeSummaryList->second, Threshold);
+ return selectCallee(Index, CalleeSummaryList->second, Threshold,
+ CallerModulePath);
}
using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */,
@@ -186,6 +197,15 @@ static void computeImportForFunction(
auto GUID = Edge.first.getGUID();
DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n");
+ if (Index.findGlobalValueSummaryList(GUID) == Index.end()) {
+ // For SamplePGO, the indirect call targets for local functions will
+ // have its original name annotated in profile. We try to find the
+ // corresponding PGOFuncName as the GUID.
+ GUID = Index.getGUIDFromOriginalID(GUID);
+ if (GUID == 0)
+ continue;
+ }
+
if (DefinedGVSummaries.count(GUID)) {
DEBUG(dbgs() << "ignored! Target already in destination module.\n");
continue;
@@ -202,7 +222,8 @@ static void computeImportForFunction(
const auto NewThreshold =
Threshold * GetBonusMultiplier(Edge.second.Hotness);
- auto *CalleeSummary = selectCallee(GUID, NewThreshold, Index);
+ auto *CalleeSummary =
+ selectCallee(GUID, NewThreshold, Index, Summary.modulePath());
if (!CalleeSummary) {
DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
continue;
@@ -522,6 +543,23 @@ llvm::EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename,
/// Fixup WeakForLinker linkages in \p TheModule based on summary analysis.
void llvm::thinLTOResolveWeakForLinkerModule(
Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
+ auto ConvertToDeclaration = [](GlobalValue &GV) {
+ DEBUG(dbgs() << "Converting to a declaration: `" << GV.getName() << "\n");
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ F->deleteBody();
+ F->clearMetadata();
+ } else if (GlobalVariable *V = dyn_cast<GlobalVariable>(&GV)) {
+ V->setInitializer(nullptr);
+ V->setLinkage(GlobalValue::ExternalLinkage);
+ V->clearMetadata();
+ } else
+ // For now we don't resolve or drop aliases. Once we do we'll
+ // need to add support here for creating either a function or
+ // variable declaration, and return the new GlobalValue* for
+ // the caller to use.
+ llvm_unreachable("Expected function or variable");
+ };
+
auto updateLinkage = [&](GlobalValue &GV) {
if (!GlobalValue::isWeakForLinker(GV.getLinkage()))
return;
@@ -532,18 +570,25 @@ void llvm::thinLTOResolveWeakForLinkerModule(
auto NewLinkage = GS->second->linkage();
if (NewLinkage == GV.getLinkage())
return;
- DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
- << GV.getLinkage() << " to " << NewLinkage << "\n");
- GV.setLinkage(NewLinkage);
- // Remove functions converted to available_externally from comdats,
+ // Check for a non-prevailing def that has interposable linkage
+ // (e.g. non-odr weak or linkonce). In that case we can't simply
+ // convert to available_externally, since it would lose the
+ // interposable property and possibly get inlined. Simply drop
+ // the definition in that case.
+ if (GlobalValue::isAvailableExternallyLinkage(NewLinkage) &&
+ GlobalValue::isInterposableLinkage(GV.getLinkage()))
+ ConvertToDeclaration(GV);
+ else {
+ DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
+ << GV.getLinkage() << " to " << NewLinkage << "\n");
+ GV.setLinkage(NewLinkage);
+ }
+ // Remove declarations from comdats, including available_externally
// as this is a declaration for the linker, and will be dropped eventually.
// It is illegal for comdats to contain declarations.
auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
- if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
- assert(GO->hasAvailableExternallyLinkage() &&
- "Expected comdat on definition (possibly available external)");
+ if (GO && GO->isDeclarationForLinker() && GO->hasComdat())
GO->setComdat(nullptr);
- }
};
// Process functions and global now
@@ -562,7 +607,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// the current module.
StringSet<> AsmUndefinedRefs;
ModuleSymbolTable::CollectAsmSymbols(
- Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
+ TheModule,
[&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
if (Flags & object::BasicSymbolRef::SF_Undefined)
AsmUndefinedRefs.insert(Name);
@@ -617,14 +662,12 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
// index.
//
Expected<bool> FunctionImporter::importFunctions(
- Module &DestModule, const FunctionImporter::ImportMapTy &ImportList,
- bool ForceImportReferencedDiscardableSymbols) {
+ Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
DEBUG(dbgs() << "Starting import for Module "
<< DestModule.getModuleIdentifier() << "\n");
unsigned ImportedCount = 0;
- // Linker that will be used for importing function
- Linker TheLinker(DestModule);
+ IRMover Mover(DestModule);
// Do the actual import of functions now, one Module at a time
std::set<StringRef> ModuleNameOrderedList;
for (auto &FunctionsToImportPerModule : ImportList) {
@@ -648,7 +691,7 @@ Expected<bool> FunctionImporter::importFunctions(
auto &ImportGUIDs = FunctionsToImportPerModule->second;
// Find the globals to import
- DenseSet<const GlobalValue *> GlobalsToImport;
+ SetVector<GlobalValue *> GlobalsToImport;
for (Function &F : *SrcModule) {
if (!F.hasName())
continue;
@@ -687,6 +730,13 @@ Expected<bool> FunctionImporter::importFunctions(
}
}
for (GlobalAlias &GA : SrcModule->aliases()) {
+ // FIXME: This should eventually be controlled entirely by the summary.
+ if (FunctionImportGlobalProcessing::doImportAsDefinition(
+ &GA, &GlobalsToImport)) {
+ GlobalsToImport.insert(&GA);
+ continue;
+ }
+
if (!GA.hasName())
continue;
auto GUID = GA.getGUID();
@@ -731,12 +781,9 @@ Expected<bool> FunctionImporter::importFunctions(
<< " from " << SrcModule->getSourceFileName() << "\n";
}
- // Instruct the linker that the client will take care of linkonce resolution
- unsigned Flags = Linker::Flags::None;
- if (!ForceImportReferencedDiscardableSymbols)
- Flags |= Linker::Flags::DontForceLinkLinkonceODR;
-
- if (TheLinker.linkInModule(std::move(SrcModule), Flags, &GlobalsToImport))
+ if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(),
+ [](GlobalValue &, IRMover::ValueAdder) {},
+ /*IsPerformingImport=*/true))
report_fatal_error("Function Import: link error");
ImportedCount += GlobalsToImport.size();
@@ -796,8 +843,7 @@ static bool doImportingForModule(Module &M) {
return loadFile(Identifier, M.getContext());
};
FunctionImporter Importer(*Index, ModuleLoader);
- Expected<bool> Result = Importer.importFunctions(
- M, ImportList, !DontForceImportReferencedDiscardableSymbols);
+ Expected<bool> Result = Importer.importFunctions(M, ImportList);
// FIXME: Probably need to propagate Errors through the pass manager.
if (!Result) {
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 7a04de3d12db..c91e8b454927 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -25,7 +25,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
-#include <unordered_map>
+
using namespace llvm;
#define DEBUG_TYPE "globaldce"
@@ -50,7 +50,14 @@ namespace {
if (skipModule(M))
return false;
+ // We need a minimally functional dummy module analysis manager. It needs
+ // to at least know about the possibility of proxying a function analysis
+ // manager.
+ FunctionAnalysisManager DummyFAM;
ModuleAnalysisManager DummyMAM;
+ DummyMAM.registerPass(
+ [&] { return FunctionAnalysisManagerModuleProxy(DummyFAM); });
+
auto PA = Impl.run(M, DummyMAM);
return !PA.areAllPreserved();
}
@@ -78,9 +85,67 @@ static bool isEmptyFunction(Function *F) {
return RI.getReturnValue() == nullptr;
}
-PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
+/// Compute the set of GlobalValue that depends from V.
+/// The recursion stops as soon as a GlobalValue is met.
+void GlobalDCEPass::ComputeDependencies(Value *V,
+ SmallPtrSetImpl<GlobalValue *> &Deps) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ Function *Parent = I->getParent()->getParent();
+ Deps.insert(Parent);
+ } else if (auto *GV = dyn_cast<GlobalValue>(V)) {
+ Deps.insert(GV);
+ } else if (auto *CE = dyn_cast<Constant>(V)) {
+ // Avoid walking the whole tree of a big ConstantExprs multiple times.
+ auto Where = ConstantDependenciesCache.find(CE);
+ if (Where != ConstantDependenciesCache.end()) {
+ auto const &K = Where->second;
+ Deps.insert(K.begin(), K.end());
+ } else {
+ SmallPtrSetImpl<GlobalValue *> &LocalDeps = ConstantDependenciesCache[CE];
+ for (User *CEUser : CE->users())
+ ComputeDependencies(CEUser, LocalDeps);
+ Deps.insert(LocalDeps.begin(), LocalDeps.end());
+ }
+ }
+}
+
+void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) {
+ SmallPtrSet<GlobalValue *, 8> Deps;
+ for (User *User : GV.users())
+ ComputeDependencies(User, Deps);
+ Deps.erase(&GV); // Remove self-reference.
+ for (GlobalValue *GVU : Deps) {
+ GVDependencies.insert(std::make_pair(GVU, &GV));
+ }
+}
+
+/// Mark Global value as Live
+void GlobalDCEPass::MarkLive(GlobalValue &GV,
+ SmallVectorImpl<GlobalValue *> *Updates) {
+ auto const Ret = AliveGlobals.insert(&GV);
+ if (!Ret.second)
+ return;
+
+ if (Updates)
+ Updates->push_back(&GV);
+ if (Comdat *C = GV.getComdat()) {
+ for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
+ MarkLive(*CM.second, Updates); // Recursion depth is only two because only
+ // globals in the same comdat are visited.
+ }
+}
+
+PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool Changed = false;
+ // The algorithm first computes the set L of global variables that are
+ // trivially live. Then it walks the initialization of these variables to
+ // compute the globals used to initialize them, which effectively builds a
+ // directed graph where nodes are global variables, and an edge from A to B
+ // means B is used to initialize A. Finally, it propagates the liveness
+ // information through the graph starting from the nodes in L. Nodes note
+ // marked as alive are discarded.
+
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
@@ -103,21 +168,39 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
// initializer.
if (!GO.isDeclaration() && !GO.hasAvailableExternallyLinkage())
if (!GO.isDiscardableIfUnused())
- GlobalIsNeeded(&GO);
+ MarkLive(GO);
+
+ UpdateGVDependencies(GO);
}
+ // Compute direct dependencies of aliases.
for (GlobalAlias &GA : M.aliases()) {
Changed |= RemoveUnusedGlobalValue(GA);
// Externally visible aliases are needed.
if (!GA.isDiscardableIfUnused())
- GlobalIsNeeded(&GA);
+ MarkLive(GA);
+
+ UpdateGVDependencies(GA);
}
+ // Compute direct dependencies of ifuncs.
for (GlobalIFunc &GIF : M.ifuncs()) {
Changed |= RemoveUnusedGlobalValue(GIF);
// Externally visible ifuncs are needed.
if (!GIF.isDiscardableIfUnused())
- GlobalIsNeeded(&GIF);
+ MarkLive(GIF);
+
+ UpdateGVDependencies(GIF);
+ }
+
+ // Propagate liveness from collected Global Values through the computed
+ // dependencies.
+ SmallVector<GlobalValue *, 8> NewLiveGVs{AliveGlobals.begin(),
+ AliveGlobals.end()};
+ while (!NewLiveGVs.empty()) {
+ GlobalValue *LGV = NewLiveGVs.pop_back_val();
+ for (auto &&GVD : make_range(GVDependencies.equal_range(LGV)))
+ MarkLive(*GVD.second, &NewLiveGVs);
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -154,7 +237,7 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
GA.setAliasee(nullptr);
}
- // The third pass drops targets of ifuncs which are dead...
+ // The fourth pass drops targets of ifuncs which are dead...
std::vector<GlobalIFunc*> DeadIFuncs;
for (GlobalIFunc &GIF : M.ifuncs())
if (!AliveGlobals.count(&GIF)) {
@@ -188,7 +271,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
// Make sure that all memory is released
AliveGlobals.clear();
- SeenConstants.clear();
+ ConstantDependenciesCache.clear();
+ GVDependencies.clear();
ComdatMembers.clear();
if (Changed)
@@ -196,60 +280,6 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
}
-/// GlobalIsNeeded - the specific global value as needed, and
-/// recursively mark anything that it uses as also needed.
-void GlobalDCEPass::GlobalIsNeeded(GlobalValue *G) {
- // If the global is already in the set, no need to reprocess it.
- if (!AliveGlobals.insert(G).second)
- return;
-
- if (Comdat *C = G->getComdat()) {
- for (auto &&CM : make_range(ComdatMembers.equal_range(C)))
- GlobalIsNeeded(CM.second);
- }
-
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
- // If this is a global variable, we must make sure to add any global values
- // referenced by the initializer to the alive set.
- if (GV->hasInitializer())
- MarkUsedGlobalsAsNeeded(GV->getInitializer());
- } else if (GlobalIndirectSymbol *GIS = dyn_cast<GlobalIndirectSymbol>(G)) {
- // The target of a global alias or ifunc is needed.
- MarkUsedGlobalsAsNeeded(GIS->getIndirectSymbol());
- } else {
- // Otherwise this must be a function object. We have to scan the body of
- // the function looking for constants and global values which are used as
- // operands. Any operands of these types must be processed to ensure that
- // any globals used will be marked as needed.
- Function *F = cast<Function>(G);
-
- for (Use &U : F->operands())
- MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
-
- for (BasicBlock &BB : *F)
- for (Instruction &I : BB)
- for (Use &U : I.operands())
- if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
- GlobalIsNeeded(GV);
- else if (Constant *C = dyn_cast<Constant>(U))
- MarkUsedGlobalsAsNeeded(C);
- }
-}
-
-void GlobalDCEPass::MarkUsedGlobalsAsNeeded(Constant *C) {
- if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return GlobalIsNeeded(GV);
-
- // Loop over all of the operands of the constant, adding any globals they
- // use to the list of needed globals.
- for (Use &U : C->operands()) {
- // If we've already processed this constant there's no need to do it again.
- Constant *Op = dyn_cast<Constant>(U);
- if (Op && SeenConstants.insert(Op).second)
- MarkUsedGlobalsAsNeeded(Op);
- }
-}
-
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
// GlobalValue, looking for the constant pointer ref that may be pointing to it.
// If found, check to see if the constant pointer ref is safe to destroy, and if
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 5b0d5e3bc01e..ade4f21ceb52 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1819,12 +1819,14 @@ static bool processInternalGlobal(
GS.AccessingFunction->doesNotRecurse() &&
isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
LookupDomTree)) {
+ const DataLayout &DL = GV->getParent()->getDataLayout();
+
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
->getEntryBlock().begin());
Type *ElemTy = GV->getValueType();
// FIXME: Pass Global's alignment when globals have alignment
- AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+ AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
@@ -1977,7 +1979,7 @@ static void ChangeCalleesToFastCall(Function *F) {
}
}
-static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
+static AttributeList StripNest(LLVMContext &C, const AttributeList &Attrs) {
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
unsigned Index = Attrs.getSlotIndex(i);
if (!Attrs.getSlotAttributes(i).hasAttribute(Index, Attribute::Nest))
@@ -2387,7 +2389,7 @@ OptimizeGlobalAliases(Module &M,
}
static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
- LibFunc::Func F = LibFunc::cxa_atexit;
+ LibFunc F = LibFunc_cxa_atexit;
if (!TLI->has(F))
return nullptr;
@@ -2396,7 +2398,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
return nullptr;
// Make sure that the function has the correct prototype.
- if (!TLI->getLibFunc(*Fn, F) || F != LibFunc::cxa_atexit)
+ if (!TLI->getLibFunc(*Fn, F) || F != LibFunc_cxa_atexit)
return nullptr;
return Fn;
diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp
index bbbd096e89c0..4705ebe265ae 100644
--- a/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/lib/Transforms/IPO/GlobalSplit.cpp
@@ -85,7 +85,16 @@ bool splitGlobal(GlobalVariable &GV) {
uint64_t ByteOffset = cast<ConstantInt>(
cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
->getZExtValue();
- if (ByteOffset < SplitBegin || ByteOffset >= SplitEnd)
+ // Type metadata may be attached one byte after the end of the vtable, for
+ // classes without virtual methods in Itanium ABI. AFAIK, it is never
+ // attached to the first byte of a vtable. Subtract one to get the right
+ // slice.
+ // This is making an assumption that vtable groups are the only kinds of
+ // global variables that !type metadata can be attached to, and that they
+ // are either Itanium ABI vtable groups or contain a single vtable (i.e.
+ // Microsoft ABI vtables).
+ uint64_t AttachedTo = (ByteOffset == 0) ? ByteOffset : ByteOffset - 1;
+ if (AttachedTo < SplitBegin || AttachedTo >= SplitEnd)
continue;
SplitGV->addMetadata(
LLVMContext::MD_type,
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 916135e33cd5..349807496dc2 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -136,7 +136,13 @@ static bool PropagateConstantReturn(Function &F) {
// For more details, see GlobalValue::mayBeDerefined.
if (!F.isDefinitionExact())
return false;
-
+
+ // Don't touch naked functions. The may contain asm returning
+ // value we don't see, so we may end up interprocedurally propagating
+ // the return value incorrectly.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return false;
+
// Check to see if this function returns a constant.
SmallVector<Value *,4> RetVals;
StructType *STy = dyn_cast<StructType>(F.getReturnType());
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 1770445b413f..50e7cc89a3b3 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -48,7 +48,7 @@ public:
}
explicit SimpleInliner(InlineParams Params)
- : LegacyInlinerBase(ID), Params(Params) {
+ : LegacyInlinerBase(ID), Params(std::move(Params)) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -61,7 +61,8 @@ public:
[&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
- return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, PSI);
+ return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache,
+ /*GetBFI=*/None, PSI);
}
bool runOnSCC(CallGraphSCC &SCC) override;
@@ -92,8 +93,12 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
}
Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
- unsigned SizeOptLevel) {
- return new SimpleInliner(llvm::getInlineParams(OptLevel, SizeOptLevel));
+ unsigned SizeOptLevel,
+ bool DisableInlineHotCallSite) {
+ auto Param = llvm::getInlineParams(OptLevel, SizeOptLevel);
+ if (DisableInlineHotCallSite)
+ Param.HotCallSiteThreshold = 0;
+ return new SimpleInliner(Param);
}
Pass *llvm::createFunctionInliningPass(InlineParams &Params) {
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 3f4731c937d1..6c83c99ae3be 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
@@ -260,8 +261,8 @@ static bool InlineCallIfPossible(
/// Return true if inlining of CS can block the caller from being
/// inlined which is proved to be more beneficial. \p IC is the
/// estimated inline cost associated with callsite \p CS.
-/// \p TotalAltCost will be set to the estimated cost of inlining the caller
-/// if \p CS is suppressed for inlining.
+/// \p TotalSecondaryCost will be set to the estimated cost of inlining the
+/// caller if \p CS is suppressed for inlining.
static bool
shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
int &TotalSecondaryCost,
@@ -288,7 +289,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// treating them as truly abstract units etc.
TotalSecondaryCost = 0;
// The candidate cost to be imposed upon the current function.
- int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
+ int CandidateCost = IC.getCost() - 1;
// This bool tracks what happens if we do NOT inline C into B.
bool callerWillBeRemoved = Caller->hasLocalLinkage();
// This bool tracks what happens if we DO inline C into B.
@@ -325,7 +326,7 @@ shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
// one is set very low by getInlineCost, in anticipation that Caller will
// be removed entirely. We did not account for this above unless there
// is only one caller of Caller.
- if (callerWillBeRemoved && !Caller->use_empty())
+ if (callerWillBeRemoved && !Caller->hasOneUse())
TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
@@ -342,6 +343,7 @@ static bool shouldInline(CallSite CS,
InlineCost IC = GetInlineCost(CS);
Instruction *Call = CS.getInstruction();
Function *Callee = CS.getCalledFunction();
+ Function *Caller = CS.getCaller();
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
@@ -355,19 +357,20 @@ static bool shouldInline(CallSite CS,
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee)
- << " should never be inlined (cost=never)");
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller)
+ << " because it should never be inlined (cost=never)");
return false;
}
- Function *Caller = CS.getCaller();
if (!IC) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " too costly to inline (cost="
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
+ << NV("Callee", Callee) << " not inlined into "
+ << NV("Caller", Caller) << " because too costly to inline (cost="
<< NV("Cost", IC.getCost()) << ", threshold="
<< NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
return false;
@@ -378,8 +381,8 @@ static bool shouldInline(CallSite CS,
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction()
<< " Cost = " << IC.getCost()
<< ", outer Cost = " << TotalSecondaryCost << '\n');
- ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
- "IncreaseCostInOtherContexts", Call)
+ ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
+ Call)
<< "Not inlining. Cost of inlining " << NV("Callee", Callee)
<< " increases the cost of inlining " << NV("Caller", Caller)
<< " in other contexts");
@@ -552,16 +555,11 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
// If the policy determines that we should inline this function,
// try to do so.
- using namespace ore;
- if (!shouldInline(CS, GetInlineCost, ORE)) {
- ORE.emit(
- OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller));
+ if (!shouldInline(CS, GetInlineCost, ORE))
continue;
- }
// Attempt to inline the function.
+ using namespace ore;
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime, AARGetter,
ImportedFunctionsStats)) {
@@ -638,22 +636,12 @@ bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
ACT = &getAnalysis<AssumptionCacheTracker>();
PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- // We compute dedicated AA results for each function in the SCC as needed. We
- // use a lambda referencing external objects so that they live long enough to
- // be queried, but we re-use them each time.
- Optional<BasicAAResult> BAR;
- Optional<AAResults> AAR;
- auto AARGetter = [&](Function &F) -> AAResults & {
- BAR.emplace(createLegacyPMBasicAAResult(*this, F));
- AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
- return *AAR;
- };
auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
return ACT->getAssumptionCache(F);
};
return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
[this](CallSite CS) { return getInlineCost(CS); },
- AARGetter, ImportedFunctionsStats);
+ LegacyAARGetter(*this), ImportedFunctionsStats);
}
/// Remove now-dead linkonce functions at the end of
@@ -750,9 +738,6 @@ bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
- .getManager();
const ModuleAnalysisManager &MAM =
AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
bool Changed = false;
@@ -761,35 +746,52 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
Module &M = *InitialC.begin()->getFunction().getParent();
ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
- };
-
- // Setup the data structure used to plumb customization into the
- // `InlineFunction` routine.
- InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+ // We use a single common worklist for calls across the entire SCC. We
+ // process these in-order and append new calls introduced during inlining to
+ // the end.
+ //
+ // Note that this particular order of processing is actually critical to
+ // avoid very bad behaviors. Consider *highly connected* call graphs where
+ // each function contains a small amonut of code and a couple of calls to
+ // other functions. Because the LLVM inliner is fundamentally a bottom-up
+ // inliner, it can handle gracefully the fact that these all appear to be
+ // reasonable inlining candidates as it will flatten things until they become
+ // too big to inline, and then move on and flatten another batch.
+ //
+ // However, when processing call edges *within* an SCC we cannot rely on this
+ // bottom-up behavior. As a consequence, with heavily connected *SCCs* of
+ // functions we can end up incrementally inlining N calls into each of
+ // N functions because each incremental inlining decision looks good and we
+ // don't have a topological ordering to prevent explosions.
+ //
+ // To compensate for this, we don't process transitive edges made immediate
+ // by inlining until we've done one pass of inlining across the entire SCC.
+ // Large, highly connected SCCs still lead to some amount of code bloat in
+ // this model, but it is uniformly spread across all the functions in the SCC
+ // and eventually they all become too large to inline, rather than
+ // incrementally maknig a single function grow in a super linear fashion.
+ SmallVector<std::pair<CallSite, int>, 16> Calls;
- auto GetInlineCost = [&](CallSite CS) {
- Function &Callee = *CS.getCalledFunction();
- auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
- return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
- };
+ // Populate the initial list of calls in this SCC.
+ for (auto &N : InitialC) {
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions.
+ // FIXME: Using instructions sequence is a really bad way to do this.
+ // Instead we should do an actual RPO walk of the function body.
+ for (Instruction &I : instructions(N.getFunction()))
+ if (auto CS = CallSite(&I))
+ if (Function *Callee = CS.getCalledFunction())
+ if (!Callee->isDeclaration())
+ Calls.push_back({CS, -1});
+ }
+ if (Calls.empty())
+ return PreservedAnalyses::all();
- // We use a worklist of nodes to process so that we can handle if the SCC
- // structure changes and some nodes are no longer part of the current SCC. We
- // also need to use an updatable pointer for the SCC as a consequence.
- SmallVector<LazyCallGraph::Node *, 16> Nodes;
- for (auto &N : InitialC)
- Nodes.push_back(&N);
+ // Capture updatable variables for the current SCC and RefSCC.
auto *C = &InitialC;
auto *RC = &C->getOuterRefSCC();
- // We also use a secondary worklist of call sites within a particular node to
- // allow quickly continuing to inline through newly inlined call sites where
- // possible.
- SmallVector<std::pair<CallSite, int>, 16> Calls;
-
// When inlining a callee produces new call sites, we want to keep track of
// the fact that they were inlined from the callee. This allows us to avoid
// infinite inlining in some obscure cases. To represent this, we use an
@@ -805,34 +807,58 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// defer deleting these to make it easier to handle the call graph updates.
SmallVector<Function *, 4> DeadFunctions;
- do {
- auto &N = *Nodes.pop_back_val();
+ // Loop forward over all of the calls. Note that we cannot cache the size as
+ // inlining can introduce new calls that need to be processed.
+ for (int i = 0; i < (int)Calls.size(); ++i) {
+ // We expect the calls to typically be batched with sequences of calls that
+ // have the same caller, so we first set up some shared infrastructure for
+ // this caller. We also do any pruning we can at this layer on the caller
+ // alone.
+ Function &F = *Calls[i].first.getCaller();
+ LazyCallGraph::Node &N = *CG.lookup(F);
if (CG.lookupSCC(N) != C)
continue;
- Function &F = N.getFunction();
if (F.hasFnAttribute(Attribute::OptimizeNone))
continue;
+ DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n");
+
+ // Get a FunctionAnalysisManager via a proxy for this particular node. We
+ // do this each time we visit a node as the SCC may have changed and as
+ // we're going to mutate this particular function we want to make sure the
+ // proxy is in place to forward any invalidation events. We can use the
+ // manager we get here for looking up results for functions other than this
+ // node however because those functions aren't going to be mutated by this
+ // pass.
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG)
+ .getManager();
+ std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+ [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
+ auto GetInlineCost = [&](CallSite CS) {
+ Function &Callee = *CS.getCalledFunction();
+ auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+ return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI},
+ PSI);
+ };
+
// Get the remarks emission analysis for the caller.
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- // We want to generally process call sites top-down in order for
- // simplifications stemming from replacing the call with the returned value
- // after inlining to be visible to subsequent inlining decisions. So we
- // walk the function backwards and then process the back of the vector.
- // FIXME: Using reverse is a really bad way to do this. Instead we should
- // do an actual PO walk of the function body.
- for (Instruction &I : reverse(instructions(F)))
- if (auto CS = CallSite(&I))
- if (Function *Callee = CS.getCalledFunction())
- if (!Callee->isDeclaration())
- Calls.push_back({CS, -1});
-
+ // Now process as many calls as we have within this caller in the sequnece.
+ // We bail out as soon as the caller has to change so we can update the
+ // call graph and prepare the context of that new caller.
bool DidInline = false;
- while (!Calls.empty()) {
+ for (; i < (int)Calls.size() && Calls[i].first.getCaller() == &F; ++i) {
int InlineHistoryID;
CallSite CS;
- std::tie(CS, InlineHistoryID) = Calls.pop_back_val();
+ std::tie(CS, InlineHistoryID) = Calls[i];
Function &Callee = *CS.getCalledFunction();
if (InlineHistoryID != -1 &&
@@ -843,6 +869,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!shouldInline(CS, GetInlineCost, ORE))
continue;
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, &GetAssumptionCache,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CS.getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
if (!InlineFunction(CS, IFI))
continue;
DidInline = true;
@@ -870,6 +903,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// made dead by this operation on other functions).
Callee.removeDeadConstantUsers();
if (Callee.use_empty()) {
+ Calls.erase(
+ std::remove_if(Calls.begin() + i + 1, Calls.end(),
+ [&Callee](const std::pair<CallSite, int> &Call) {
+ return Call.first.getCaller() == &Callee;
+ }),
+ Calls.end());
// Clear the body and queue the function itself for deletion when we
// finish inlining and call graph updates.
// Note that after this point, it is an error to do anything other
@@ -882,6 +921,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
}
}
+ // Back the call index up by one to put us in a good position to go around
+ // the outer loop.
+ --i;
+
if (!DidInline)
continue;
Changed = true;
@@ -896,8 +939,8 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// below.
for (Function *InlinedCallee : InlinedCallees) {
LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
- for (LazyCallGraph::Edge &E : CalleeN)
- RC->insertTrivialRefEdge(N, *E.getNode());
+ for (LazyCallGraph::Edge &E : *CalleeN)
+ RC->insertTrivialRefEdge(N, E.getNode());
}
InlinedCallees.clear();
@@ -908,8 +951,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// re-use the exact same logic for updating the call graph to reflect the
// change..
C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+ DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n");
RC = &C->getOuterRefSCC();
- } while (!Nodes.empty());
+ }
// Now that we've finished inlining all of the calls across this SCC, delete
// all of the trivially dead functions, updating the call graph and the CGSCC
@@ -920,8 +964,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// sets.
for (Function *DeadF : DeadFunctions) {
// Get the necessary information out of the call graph and nuke the
- // function there.
+ // function there. Also, cclear out any cached analyses.
auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(DeadC, CG)
+ .getManager();
+ FAM.clear(*DeadF);
+ AM.clear(DeadC);
auto &DeadRC = DeadC.getOuterRefSCC();
CG.removeDeadFunction(*DeadF);
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index deb7e819480b..785207efbe5c 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -42,8 +42,6 @@
using namespace llvm;
using namespace lowertypetests;
-using SummaryAction = LowerTypeTestsSummaryAction;
-
#define DEBUG_TYPE "lowertypetests"
STATISTIC(ByteArraySizeBits, "Byte array size in bits");
@@ -57,13 +55,13 @@ static cl::opt<bool> AvoidReuse(
cl::desc("Try to avoid reuse of byte array addresses using aliases"),
cl::Hidden, cl::init(true));
-static cl::opt<SummaryAction> ClSummaryAction(
+static cl::opt<PassSummaryAction> ClSummaryAction(
"lowertypetests-summary-action",
cl::desc("What to do with the summary when running this pass"),
- cl::values(clEnumValN(SummaryAction::None, "none", "Do nothing"),
- clEnumValN(SummaryAction::Import, "import",
+ cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
+ clEnumValN(PassSummaryAction::Import, "import",
"Import typeid resolutions from summary and globals"),
- clEnumValN(SummaryAction::Export, "export",
+ clEnumValN(PassSummaryAction::Export, "export",
"Export typeid resolutions to summary and globals")),
cl::Hidden);
@@ -234,8 +232,8 @@ public:
class LowerTypeTestsModule {
Module &M;
- SummaryAction Action;
- ModuleSummaryIndex *Summary;
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
bool LinkerSubsectionsViaSymbols;
Triple::ArchType Arch;
@@ -253,15 +251,21 @@ class LowerTypeTestsModule {
// Indirect function call index assignment counter for WebAssembly
uint64_t IndirectIndex = 1;
- // Mapping from type identifiers to the call sites that test them.
- DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
+ // Mapping from type identifiers to the call sites that test them, as well as
+ // whether the type identifier needs to be exported to ThinLTO backends as
+ // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId).
+ struct TypeIdUserInfo {
+ std::vector<CallInst *> CallSites;
+ bool IsExported = false;
+ };
+ DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers;
/// This structure describes how to lower type tests for a particular type
/// identifier. It is either built directly from the global analysis (during
/// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
/// identifier summaries and external symbol references (in ThinLTO backends).
struct TypeIdLowering {
- TypeTestResolution::Kind TheKind;
+ TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
/// All except Unsat: the start address within the combined global.
Constant *OffsetedGlobal;
@@ -274,9 +278,6 @@ class LowerTypeTestsModule {
/// covering members of this type identifier as a multiple of 2^AlignLog2.
Constant *SizeM1;
- /// ByteArray, Inline, AllOnes: range of SizeM1 expressed as a bit width.
- unsigned SizeM1BitWidth;
-
/// ByteArray: the byte array to test the address against.
Constant *TheByteArray;
@@ -291,6 +292,10 @@ class LowerTypeTestsModule {
Function *WeakInitializerFn = nullptr;
+ void exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
+ TypeIdLowering importTypeId(StringRef TypeId);
+ void importTypeTest(CallInst *CI);
+
BitSetInfo
buildBitSet(Metadata *TypeId,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
@@ -327,8 +332,8 @@ class LowerTypeTestsModule {
void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
public:
- LowerTypeTestsModule(Module &M, SummaryAction Action,
- ModuleSummaryIndex *Summary);
+ LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary);
bool lower();
// Lower the module using the action and summary passed as command line
@@ -341,15 +346,17 @@ struct LowerTypeTests : public ModulePass {
bool UseCommandLine = false;
- SummaryAction Action;
- ModuleSummaryIndex *Summary;
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
LowerTypeTests() : ModulePass(ID), UseCommandLine(true) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
- LowerTypeTests(SummaryAction Action, ModuleSummaryIndex *Summary)
- : ModulePass(ID), Action(Action), Summary(Summary) {
+ LowerTypeTests(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : ModulePass(ID), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary) {
initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
}
@@ -358,7 +365,7 @@ struct LowerTypeTests : public ModulePass {
return false;
if (UseCommandLine)
return LowerTypeTestsModule::runForTesting(M);
- return LowerTypeTestsModule(M, Action, Summary).lower();
+ return LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
}
};
@@ -368,9 +375,10 @@ INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false,
false)
char LowerTypeTests::ID = 0;
-ModulePass *llvm::createLowerTypeTestsPass(SummaryAction Action,
- ModuleSummaryIndex *Summary) {
- return new LowerTypeTests(Action, Summary);
+ModulePass *
+llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
+ return new LowerTypeTests(ExportSummary, ImportSummary);
}
/// Build a bit set for TypeId using the object layouts in
@@ -494,10 +502,11 @@ Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
} else {
Constant *ByteArray = TIL.TheByteArray;
- if (!LinkerSubsectionsViaSymbols && AvoidReuse) {
+ if (!LinkerSubsectionsViaSymbols && AvoidReuse && !ImportSummary) {
// Each use of the byte array uses a different alias. This makes the
// backend less likely to reuse previously computed byte array addresses,
// improving the security of the CFI mechanism based on this pass.
+ // This won't work when importing because TheByteArray is external.
ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
"bits_use", ByteArray, &M);
}
@@ -593,8 +602,7 @@ Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
IntPtrTy));
Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
- Constant *BitSizeConst = ConstantExpr::getZExt(TIL.SizeM1, IntPtrTy);
- Value *OffsetInRange = B.CreateICmpULE(BitOffset, BitSizeConst);
+ Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1);
// If the bit set is all ones, testing against it is unnecessary.
if (TIL.TheKind == TypeTestResolution::AllOnes)
@@ -687,6 +695,123 @@ void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
}
}
+/// Export the given type identifier so that ThinLTO backends may import it.
+/// Type identifiers are exported by adding coarse-grained information about how
+/// to test the type identifier to the summary, and creating symbols in the
+/// object file (aliases and absolute symbols) containing fine-grained
+/// information about the type identifier.
+void LowerTypeTestsModule::exportTypeId(StringRef TypeId,
+ const TypeIdLowering &TIL) {
+ TypeTestResolution &TTRes =
+ ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
+ TTRes.TheKind = TIL.TheKind;
+
+ auto ExportGlobal = [&](StringRef Name, Constant *C) {
+ GlobalAlias *GA =
+ GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
+ "__typeid_" + TypeId + "_" + Name, C, &M);
+ GA->setVisibility(GlobalValue::HiddenVisibility);
+ };
+
+ if (TIL.TheKind != TypeTestResolution::Unsat)
+ ExportGlobal("global_addr", TIL.OffsetedGlobal);
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray ||
+ TIL.TheKind == TypeTestResolution::Inline ||
+ TIL.TheKind == TypeTestResolution::AllOnes) {
+ ExportGlobal("align", ConstantExpr::getIntToPtr(TIL.AlignLog2, Int8PtrTy));
+ ExportGlobal("size_m1", ConstantExpr::getIntToPtr(TIL.SizeM1, Int8PtrTy));
+
+ uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1;
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6;
+ else
+ TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32;
+ }
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray) {
+ ExportGlobal("byte_array", TIL.TheByteArray);
+ ExportGlobal("bit_mask", TIL.BitMask);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ ExportGlobal("inline_bits",
+ ConstantExpr::getIntToPtr(TIL.InlineBits, Int8PtrTy));
+}
+
+LowerTypeTestsModule::TypeIdLowering
+LowerTypeTestsModule::importTypeId(StringRef TypeId) {
+ const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId);
+ if (!TidSummary)
+ return {}; // Unsat: no globals match this type id.
+ const TypeTestResolution &TTRes = TidSummary->TTRes;
+
+ TypeIdLowering TIL;
+ TIL.TheKind = TTRes.TheKind;
+
+ auto ImportGlobal = [&](StringRef Name, unsigned AbsWidth) {
+ Constant *C =
+ M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), Int8Ty);
+ auto *GV = dyn_cast<GlobalVariable>(C);
+ // We only need to set metadata if the global is newly created, in which
+ // case it would not have hidden visibility.
+ if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ return C;
+
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(M.getContext(), {MinC, MaxC}));
+ };
+ if (AbsWidth == IntPtrTy->getBitWidth())
+ SetAbsRange(~0ull, ~0ull); // Full set.
+ else if (AbsWidth)
+ SetAbsRange(0, 1ull << AbsWidth);
+ return C;
+ };
+
+ if (TIL.TheKind != TypeTestResolution::Unsat)
+ TIL.OffsetedGlobal = ImportGlobal("global_addr", 0);
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray ||
+ TIL.TheKind == TypeTestResolution::Inline ||
+ TIL.TheKind == TypeTestResolution::AllOnes) {
+ TIL.AlignLog2 = ConstantExpr::getPtrToInt(ImportGlobal("align", 8), Int8Ty);
+ TIL.SizeM1 = ConstantExpr::getPtrToInt(
+ ImportGlobal("size_m1", TTRes.SizeM1BitWidth), IntPtrTy);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::ByteArray) {
+ TIL.TheByteArray = ImportGlobal("byte_array", 0);
+ TIL.BitMask = ImportGlobal("bit_mask", 8);
+ }
+
+ if (TIL.TheKind == TypeTestResolution::Inline)
+ TIL.InlineBits = ConstantExpr::getPtrToInt(
+ ImportGlobal("inline_bits", 1 << TTRes.SizeM1BitWidth),
+ TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
+
+ return TIL;
+}
+
+void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
+ auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeIdMDVal)
+ report_fatal_error("Second argument of llvm.type.test must be metadata");
+
+ auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
+ if (!TypeIdStr)
+ report_fatal_error(
+ "Second argument of llvm.type.test must be a metadata string");
+
+ TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
+ Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
+ CI->replaceAllUsesWith(Lowered);
+ CI->eraseFromParent();
+}
+
void LowerTypeTestsModule::lowerTypeTestCalls(
ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
@@ -708,16 +833,12 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
+ TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1);
if (BSI.isAllOnes()) {
TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
: TypeTestResolution::AllOnes;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32;
- TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty,
- BSI.BitSize - 1);
} else if (BSI.BitSize <= 64) {
TIL.TheKind = TypeTestResolution::Inline;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 32) ? 5 : 6;
- TIL.SizeM1 = ConstantInt::get(Int8Ty, BSI.BitSize - 1);
uint64_t InlineBits = 0;
for (auto Bit : BSI.Bits)
InlineBits |= uint64_t(1) << Bit;
@@ -728,17 +849,19 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
(BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
} else {
TIL.TheKind = TypeTestResolution::ByteArray;
- TIL.SizeM1BitWidth = (BSI.BitSize <= 128) ? 7 : 32;
- TIL.SizeM1 = ConstantInt::get((BSI.BitSize <= 128) ? Int8Ty : Int32Ty,
- BSI.BitSize - 1);
++NumByteArraysCreated;
ByteArrayInfo *BAI = createByteArray(BSI);
TIL.TheByteArray = BAI->ByteArray;
TIL.BitMask = BAI->MaskGlobal;
}
+ TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
+
+ if (TIUI.IsExported)
+ exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
+
// Lower each call to llvm.type.test for this type identifier.
- for (CallInst *CI : TypeTestCallSites[TypeId]) {
+ for (CallInst *CI : TIUI.CallSites) {
++NumTypeTestCallsLowered;
Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
CI->replaceAllUsesWith(Lowered);
@@ -757,9 +880,9 @@ void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
report_fatal_error(
"A member of a type identifier may not have an explicit section");
- if (isa<GlobalVariable>(GO) && GO->isDeclarationForLinker())
- report_fatal_error(
- "A global var member of a type identifier must be a definition");
+ // FIXME: We previously checked that global var member of a type identifier
+ // must be a definition, but the IR linker may leave type metadata on
+ // declarations. We should restore this check after fixing PR31759.
auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0));
if (!OffsetConstMD)
@@ -1173,13 +1296,11 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
}
/// Lower all type tests in this module.
-LowerTypeTestsModule::LowerTypeTestsModule(Module &M, SummaryAction Action,
- ModuleSummaryIndex *Summary)
- : M(M), Action(Action), Summary(Summary) {
- // FIXME: Use these fields.
- (void)this->Action;
- (void)this->Summary;
-
+LowerTypeTestsModule::LowerTypeTestsModule(
+ Module &M, ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+ assert(!(ExportSummary && ImportSummary));
Triple TargetTriple(M.getTargetTriple());
LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
Arch = TargetTriple.getArch();
@@ -1203,7 +1324,11 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
ExitOnErr(errorCodeToError(In.error()));
}
- bool Changed = LowerTypeTestsModule(M, ClSummaryAction, &Summary).lower();
+ bool Changed =
+ LowerTypeTestsModule(
+ M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+ ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ .lower();
if (!ClWriteSummary.empty()) {
ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
@@ -1222,9 +1347,18 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool LowerTypeTestsModule::lower() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
- if (!TypeTestFunc || TypeTestFunc->use_empty())
+ if ((!TypeTestFunc || TypeTestFunc->use_empty()) && !ExportSummary)
return false;
+ if (ImportSummary) {
+ for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
+ UI != UE;) {
+ auto *CI = cast<CallInst>((*UI++).getUser());
+ importTypeTest(CI);
+ }
+ return true;
+ }
+
// Equivalence class set containing type identifiers and the globals that
// reference them. This is used to partition the set of type identifiers in
// the module into disjoint sets.
@@ -1248,6 +1382,9 @@ bool LowerTypeTestsModule::lower() {
unsigned I = 0;
SmallVector<MDNode *, 2> Types;
for (GlobalObject &GO : M.global_objects()) {
+ if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
+ continue;
+
Types.clear();
GO.getMetadata(LLVMContext::MD_type, Types);
if (Types.empty())
@@ -1262,33 +1399,57 @@ bool LowerTypeTestsModule::lower() {
}
}
- for (const Use &U : TypeTestFunc->uses()) {
- auto CI = cast<CallInst>(U.getUser());
+ auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & {
+ // Add the call site to the list of call sites for this type identifier. We
+ // also use TypeIdUsers to keep track of whether we have seen this type
+ // identifier before. If we have, we don't need to re-add the referenced
+ // globals to the equivalence class.
+ auto Ins = TypeIdUsers.insert({TypeId, {}});
+ if (Ins.second) {
+ // Add the type identifier to the equivalence class.
+ GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId);
+ GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+
+ // Add the referenced globals to the type identifier's equivalence class.
+ for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals)
+ CurSet = GlobalClasses.unionSets(
+ CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
+ }
+
+ return Ins.first->second;
+ };
- auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
- if (!BitSetMDVal)
- report_fatal_error("Second argument of llvm.type.test must be metadata");
- auto BitSet = BitSetMDVal->getMetadata();
+ if (TypeTestFunc) {
+ for (const Use &U : TypeTestFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
- // Add the call site to the list of call sites for this type identifier. We
- // also use TypeTestCallSites to keep track of whether we have seen this
- // type identifier before. If we have, we don't need to re-add the
- // referenced globals to the equivalence class.
- std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, bool>
- Ins = TypeTestCallSites.insert(
- std::make_pair(BitSet, std::vector<CallInst *>()));
- Ins.first->second.push_back(CI);
- if (!Ins.second)
- continue;
+ auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
+ if (!TypeIdMDVal)
+ report_fatal_error("Second argument of llvm.type.test must be metadata");
+ auto TypeId = TypeIdMDVal->getMetadata();
+ AddTypeIdUse(TypeId).CallSites.push_back(CI);
+ }
+ }
- // Add the type identifier to the equivalence class.
- GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet);
- GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
+ if (ExportSummary) {
+ DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
+ for (auto &P : TypeIdInfo) {
+ if (auto *TypeId = dyn_cast<MDString>(P.first))
+ MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
+ TypeId);
+ }
- // Add the referenced globals to the type identifier's equivalence class.
- for (GlobalTypeMember *GTM : TypeIdInfo[BitSet].RefGlobals)
- CurSet = GlobalClasses.unionSets(
- CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
+ for (auto &P : *ExportSummary) {
+ for (auto &S : P.second) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (GlobalValue::GUID G : FS->type_tests())
+ for (Metadata *MD : MetadataByGUID[G])
+ AddTypeIdUse(MD).IsExported = true;
+ }
+ }
}
if (GlobalClasses.empty())
@@ -1349,8 +1510,9 @@ bool LowerTypeTestsModule::lower() {
PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
- bool Changed =
- LowerTypeTestsModule(M, SummaryAction::None, /*Summary=*/nullptr).lower();
+ bool Changed = LowerTypeTestsModule(M, /*ExportSummary=*/nullptr,
+ /*ImportSummary=*/nullptr)
+ .lower();
if (!Changed)
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index e0bb0eb42b59..771770ddc060 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -96,8 +96,10 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueHandle.h"
@@ -127,6 +129,26 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
"'0' disables this check. Works only with '-debug' key."),
cl::init(0), cl::Hidden);
+// Under option -mergefunc-preserve-debug-info we:
+// - Do not create a new function for a thunk.
+// - Retain the debug info for a thunk's parameters (and associated
+// instructions for the debug info) from the entry block.
+// Note: -debug will display the algorithm at work.
+// - Create debug-info for the call (to the shared implementation) made by
+// a thunk and its return value.
+// - Erase the rest of the function, retaining the (minimally sized) entry
+// block to create a thunk.
+// - Preserve a thunk's call site to point to the thunk even when both occur
+// within the same translation unit, to aid debugability. Note that this
+// behaviour differs from the underlying -mergefunc implementation which
+// modifies the thunk's call site to point to the shared implementation
+// when both occur within the same translation unit.
+static cl::opt<bool>
+ MergeFunctionsPDI("mergefunc-preserve-debug-info", cl::Hidden,
+ cl::init(false),
+ cl::desc("Preserve debug info in thunk when mergefunc "
+ "transformations are made."));
+
namespace {
class FunctionNode {
@@ -215,8 +237,21 @@ private:
/// Replace G with a thunk or an alias to F. Deletes G.
void writeThunkOrAlias(Function *F, Function *G);
- /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
- /// of G with bitcast(F). Deletes G.
+ /// Fill PDIUnrelatedWL with instructions from the entry block that are
+ /// unrelated to parameter related debug info.
+ void filterInstsUnrelatedToPDI(BasicBlock *GEntryBlock,
+ std::vector<Instruction *> &PDIUnrelatedWL);
+
+ /// Erase the rest of the CFG (i.e. barring the entry block).
+ void eraseTail(Function *G);
+
+ /// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
+ /// parameter debug info, from the entry block.
+ void eraseInstsUnrelatedToPDI(std::vector<Instruction *> &PDIUnrelatedWL);
+
+ /// Replace G with a simple tail call to bitcast(F). Also (unless
+ /// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
+ /// delete G.
void writeThunk(Function *F, Function *G);
/// Replace G with an alias to F. Deletes G.
@@ -269,8 +304,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
if (Res1 != -Res2) {
dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
<< "\n";
- F1->dump();
- F2->dump();
+ dbgs() << *F1 << '\n' << *F2 << '\n';
Valid = false;
}
@@ -305,9 +339,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
<< TripleNumber << "\n";
dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
<< Res4 << "\n";
- F1->dump();
- F2->dump();
- F3->dump();
+ dbgs() << *F1 << '\n' << *F2 << '\n' << *F3 << '\n';
Valid = false;
}
}
@@ -400,19 +432,15 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
// Transferring other attributes may help other optimizations, but that
// should be done uniformly and not in this ad-hoc way.
auto &Context = New->getContext();
- auto NewFuncAttrs = New->getAttributes();
- auto CallSiteAttrs = CS.getAttributes();
-
- CallSiteAttrs = CallSiteAttrs.addAttributes(
- Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
-
- for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
- AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
- if (Attrs.getNumSlots())
- CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
- }
-
- CS.setAttributes(CallSiteAttrs);
+ auto NewPAL = New->getAttributes();
+ SmallVector<AttributeSet, 4> NewArgAttrs;
+ for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++)
+ NewArgAttrs.push_back(NewPAL.getParamAttributes(argIdx));
+ // Don't transfer attributes from the function to the callee. Function
+ // attributes typically aren't relevant to the calling convention or ABI.
+ CS.setAttributes(AttributeList::get(Context, /*FnAttrs=*/AttributeSet(),
+ NewPAL.getRetAttributes(),
+ NewArgAttrs));
remove(CS.getInstruction()->getParent()->getParent());
U->set(BitcastNew);
@@ -461,51 +489,242 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
return Builder.CreateBitCast(V, DestTy);
}
-// Replace G with a simple tail call to bitcast(F). Also replace direct uses
-// of G with bitcast(F). Deletes G.
+// Erase the instructions in PDIUnrelatedWL as they are unrelated to the
+// parameter debug info, from the entry block.
+void MergeFunctions::eraseInstsUnrelatedToPDI(
+ std::vector<Instruction *> &PDIUnrelatedWL) {
+
+ DEBUG(dbgs() << " Erasing instructions (in reverse order of appearance in "
+ "entry block) unrelated to parameter debug info from entry "
+ "block: {\n");
+ while (!PDIUnrelatedWL.empty()) {
+ Instruction *I = PDIUnrelatedWL.back();
+ DEBUG(dbgs() << " Deleting Instruction: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ I->eraseFromParent();
+ PDIUnrelatedWL.pop_back();
+ }
+ DEBUG(dbgs() << " } // Done erasing instructions unrelated to parameter "
+ "debug info from entry block. \n");
+}
+
+// Reduce G to its entry block.
+void MergeFunctions::eraseTail(Function *G) {
+
+ std::vector<BasicBlock *> WorklistBB;
+ for (Function::iterator BBI = std::next(G->begin()), BBE = G->end();
+ BBI != BBE; ++BBI) {
+ BBI->dropAllReferences();
+ WorklistBB.push_back(&*BBI);
+ }
+ while (!WorklistBB.empty()) {
+ BasicBlock *BB = WorklistBB.back();
+ BB->eraseFromParent();
+ WorklistBB.pop_back();
+ }
+}
+
+// We are interested in the following instructions from the entry block as being
+// related to parameter debug info:
+// - @llvm.dbg.declare
+// - stores from the incoming parameters to locations on the stack-frame
+// - allocas that create these locations on the stack-frame
+// - @llvm.dbg.value
+// - the entry block's terminator
+// The rest are unrelated to debug info for the parameters; fill up
+// PDIUnrelatedWL with such instructions.
+void MergeFunctions::filterInstsUnrelatedToPDI(
+ BasicBlock *GEntryBlock, std::vector<Instruction *> &PDIUnrelatedWL) {
+
+ std::set<Instruction *> PDIRelated;
+ for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end();
+ BI != BIE; ++BI) {
+ if (auto *DVI = dyn_cast<DbgValueInst>(&*BI)) {
+ DEBUG(dbgs() << " Deciding: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ DILocalVariable *DILocVar = DVI->getVariable();
+ if (DILocVar->isParameter()) {
+ DEBUG(dbgs() << " Include (parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else if (auto *DDI = dyn_cast<DbgDeclareInst>(&*BI)) {
+ DEBUG(dbgs() << " Deciding: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ DILocalVariable *DILocVar = DDI->getVariable();
+ if (DILocVar->isParameter()) {
+ DEBUG(dbgs() << " Parameter: ");
+ DEBUG(DILocVar->print(dbgs()));
+ AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
+ if (AI) {
+ DEBUG(dbgs() << " Processing alloca users: ");
+ DEBUG(dbgs() << "\n");
+ for (User *U : AI->users()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+ if (Value *Arg = SI->getValueOperand()) {
+ if (dyn_cast<Argument>(Arg)) {
+ DEBUG(dbgs() << " Include: ");
+ DEBUG(AI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(AI);
+ DEBUG(dbgs() << " Include (parameter): ");
+ DEBUG(SI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(SI);
+ DEBUG(dbgs() << " Include: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(SI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ } else {
+ DEBUG(dbgs() << " Defer: ");
+ DEBUG(U->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ } else {
+ DEBUG(dbgs() << " Delete (alloca NULL): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else {
+ DEBUG(dbgs() << " Delete (!parameter): ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ } else if (dyn_cast<TerminatorInst>(BI) == GEntryBlock->getTerminator()) {
+ DEBUG(dbgs() << " Will Include Terminator: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIRelated.insert(&*BI);
+ } else {
+ DEBUG(dbgs() << " Defer: ");
+ DEBUG(BI->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ DEBUG(dbgs()
+ << " Report parameter debug info related/related instructions: {\n");
+ for (BasicBlock::iterator BI = GEntryBlock->begin(), BE = GEntryBlock->end();
+ BI != BE; ++BI) {
+
+ Instruction *I = &*BI;
+ if (PDIRelated.find(I) == PDIRelated.end()) {
+ DEBUG(dbgs() << " !PDIRelated: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ PDIUnrelatedWL.push_back(I);
+ } else {
+ DEBUG(dbgs() << " PDIRelated: ");
+ DEBUG(I->print(dbgs()));
+ DEBUG(dbgs() << "\n");
+ }
+ }
+ DEBUG(dbgs() << " }\n");
+}
+
+// Replace G with a simple tail call to bitcast(F). Also (unless
+// MergeFunctionsPDI holds) replace direct uses of G with bitcast(F),
+// delete G. Under MergeFunctionsPDI, we use G itself for creating
+// the thunk as we preserve the debug info (and associated instructions)
+// from G's entry block pertaining to G's incoming arguments which are
+// passed on as corresponding arguments in the call that G makes to F.
+// For better debugability, under MergeFunctionsPDI, we do not modify G's
+// call sites to point to F even when within the same translation unit.
void MergeFunctions::writeThunk(Function *F, Function *G) {
- if (!G->isInterposable()) {
- // Redirect direct callers of G to F.
+ if (!G->isInterposable() && !MergeFunctionsPDI) {
+ // Redirect direct callers of G to F. (See note on MergeFunctionsPDI
+ // above).
replaceDirectCallers(G, F);
}
// If G was internal then we may have replaced all uses of G with F. If so,
- // stop here and delete G. There's no need for a thunk.
- if (G->hasLocalLinkage() && G->use_empty()) {
+ // stop here and delete G. There's no need for a thunk. (See note on
+ // MergeFunctionsPDI above).
+ if (G->hasLocalLinkage() && G->use_empty() && !MergeFunctionsPDI) {
G->eraseFromParent();
return;
}
- Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
- G->getParent());
- BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
- IRBuilder<> Builder(BB);
+ BasicBlock *GEntryBlock = nullptr;
+ std::vector<Instruction *> PDIUnrelatedWL;
+ BasicBlock *BB = nullptr;
+ Function *NewG = nullptr;
+ if (MergeFunctionsPDI) {
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) Do not create a new "
+ "function as thunk; retain original: "
+ << G->getName() << "()\n");
+ GEntryBlock = &G->getEntryBlock();
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) filter parameter related "
+ "debug info for "
+ << G->getName() << "() {\n");
+ filterInstsUnrelatedToPDI(GEntryBlock, PDIUnrelatedWL);
+ GEntryBlock->getTerminator()->eraseFromParent();
+ BB = GEntryBlock;
+ } else {
+ NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+ G->getParent());
+ BB = BasicBlock::Create(F->getContext(), "", NewG);
+ }
+ IRBuilder<> Builder(BB);
+ Function *H = MergeFunctionsPDI ? G : NewG;
SmallVector<Value *, 16> Args;
unsigned i = 0;
FunctionType *FFTy = F->getFunctionType();
- for (Argument & AI : NewG->args()) {
+ for (Argument & AI : H->args()) {
Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
++i;
}
CallInst *CI = Builder.CreateCall(F, Args);
+ ReturnInst *RI = nullptr;
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
CI->setAttributes(F->getAttributes());
- if (NewG->getReturnType()->isVoidTy()) {
- Builder.CreateRetVoid();
+ if (H->getReturnType()->isVoidTy()) {
+ RI = Builder.CreateRetVoid();
} else {
- Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType()));
+ RI = Builder.CreateRet(createCast(Builder, CI, H->getReturnType()));
}
- NewG->copyAttributesFrom(G);
- NewG->takeName(G);
- removeUsers(G);
- G->replaceAllUsesWith(NewG);
- G->eraseFromParent();
+ if (MergeFunctionsPDI) {
+ DISubprogram *DIS = G->getSubprogram();
+ if (DIS) {
+ DebugLoc CIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
+ DebugLoc RIDbgLoc = DebugLoc::get(DIS->getScopeLine(), 0, DIS);
+ CI->setDebugLoc(CIDbgLoc);
+ RI->setDebugLoc(RIDbgLoc);
+ } else {
+ DEBUG(dbgs() << "writeThunk: (MergeFunctionsPDI) No DISubprogram for "
+ << G->getName() << "()\n");
+ }
+ eraseTail(G);
+ eraseInstsUnrelatedToPDI(PDIUnrelatedWL);
+ DEBUG(dbgs() << "} // End of parameter related debug info filtering for: "
+ << G->getName() << "()\n");
+ } else {
+ NewG->copyAttributesFrom(G);
+ NewG->takeName(G);
+ removeUsers(G);
+ G->replaceAllUsesWith(NewG);
+ G->eraseFromParent();
+ }
- DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+ DEBUG(dbgs() << "writeThunk: " << H->getName() << '\n');
++NumThunksWritten;
}
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 7ef3fc1fc2a7..a2f6e5639d9d 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumPartialInlined, "Number of functions partially inlined");
namespace {
struct PartialInlinerImpl {
- PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(IFI) {}
+ PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {}
bool run(Module &M);
Function *unswitchFunction(Function *F);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 941efb210d1c..f11b58d1adc4 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -93,10 +93,6 @@ static cl::opt<CFLAAType>
clEnumValN(CFLAAType::Both, "both",
"Enable both variants of CFL-AA")));
-static cl::opt<bool>
-EnableMLSM("mlsm", cl::init(true), cl::Hidden,
- cl::desc("Enable motion of merged load and store"));
-
static cl::opt<bool> EnableLoopInterchange(
"enable-loopinterchange", cl::init(false), cl::Hidden,
cl::desc("Enable the new, experimental LoopInterchange Pass"));
@@ -141,8 +137,8 @@ static cl::opt<int> PreInlineThreshold(
"(default = 75)"));
static cl::opt<bool> EnableGVNHoist(
- "enable-gvn-hoist", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN hoisting pass"));
+ "enable-gvn-hoist", cl::init(true), cl::Hidden,
+ cl::desc("Enable the GVN hoisting pass (default = on)"));
static cl::opt<bool>
DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
@@ -172,6 +168,7 @@ PassManagerBuilder::PassManagerBuilder() {
PGOInstrUse = RunPGOInstrUse;
PrepareForThinLTO = EnablePrepareForThinLTO;
PerformThinLTO = false;
+ DivergentTarget = false;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -248,8 +245,6 @@ void PassManagerBuilder::populateFunctionPassManager(
FPM.add(createCFGSimplificationPass());
FPM.add(createSROAPass());
FPM.add(createEarlyCSEPass());
- if(EnableGVNHoist)
- FPM.add(createGVNHoistPass());
FPM.add(createLowerExpectIntrinsicPass());
}
@@ -294,6 +289,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// Break up aggregate allocas, using SSAUpdater.
MPM.add(createSROAPass());
MPM.add(createEarlyCSEPass()); // Catch trivial redundancies
+ if (EnableGVNHoist)
+ MPM.add(createGVNHoistPass());
// Speculative execution if the target has divergent branches; otherwise nop.
MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
MPM.add(createJumpThreadingPass()); // Thread jumps.
@@ -305,29 +302,34 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createLibCallsShrinkWrapPass());
addExtensionsToPM(EP_Peephole, MPM);
+ // Optimize memory intrinsic calls based on the profiled size information.
+ if (SizeLevel == 0)
+ MPM.add(createPGOMemOPSizeOptLegacyPass());
+
MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
MPM.add(createLICMPass()); // Hoist loop invariants
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
+ addExtensionsToPM(EP_LateLoopOptimizations, MPM);
MPM.add(createLoopDeletionPass()); // Delete dead loops
+
if (EnableLoopInterchange) {
MPM.add(createLoopInterchangePass()); // Interchange loops
MPM.add(createCFGSimplificationPass());
}
if (!DisableUnrollLoops)
- MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
if (OptLevel > 1) {
- if (EnableMLSM)
- MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
+ MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
MPM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
}
@@ -369,7 +371,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ MPM.add(createLoopUnrollPass(OptLevel));
}
}
@@ -434,7 +436,16 @@ void PassManagerBuilder::populateModulePassManager(
// earlier in the pass pipeline, here before globalopt. Otherwise imported
// available_externally functions look unreferenced and are removed.
if (PerformThinLTO)
- MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true));
+ MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
+ !PGOSampleUse.empty()));
+
+ // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
+ // as it will change the CFG too much to make the 2nd profile annotation
+ // in backend more difficult.
+ bool PrepareForThinLTOUsingPGOSampleProfile =
+ PrepareForThinLTO && !PGOSampleUse.empty();
+ if (PrepareForThinLTOUsingPGOSampleProfile)
+ DisableUnrollLoops = true;
if (!DisableUnitAtATime) {
// Infer attributes about declarations if possible.
@@ -454,14 +465,18 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
- if (!PerformThinLTO) {
+ // For SamplePGO in ThinLTO compile phase, we do not want to do indirect
+ // call promotion as it will change the CFG too much to make the 2nd
+ // profile annotation in backend more difficult.
+ if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) {
/// PGO instrumentation is added during the compile phase for ThinLTO, do
/// not run it a second time
addPGOInstrPasses(MPM);
// Indirect call promotion that promotes intra-module targets only.
// For ThinLTO this is done earlier due to interactions with globalopt
// for imported functions.
- MPM.add(createPGOIndirectCallPromotionLegacyPass());
+ MPM.add(
+ createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty()));
}
if (EnableNonLTOGlobalsModRef)
@@ -589,7 +604,7 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createCorrelatedValuePropagationPass());
addInstructionCombiningPass(MPM);
MPM.add(createLICMPass());
- MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+ MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
MPM.add(createCFGSimplificationPass());
addInstructionCombiningPass(MPM);
}
@@ -615,16 +630,16 @@ void PassManagerBuilder::populateModulePassManager(
// BBVectorize may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- MPM.add(createLoopUnrollPass());
+ MPM.add(createLoopUnrollPass(OptLevel));
}
}
addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createCFGSimplificationPass());
+ MPM.add(createLateCFGSimplificationPass()); // Switches to lookup tables
addInstructionCombiningPass(MPM);
if (!DisableUnrollLoops) {
- MPM.add(createLoopUnrollPass()); // Unroll small loops
+ MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops
// LoopUnroll may generate some redundency to cleanup.
addInstructionCombiningPass(MPM);
@@ -684,7 +699,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// left by the earlier promotion pass that promotes intra-module targets.
// This two-step promotion is to save the compile time. For LTO, it should
// produce the same result as if we only do promotion here.
- PM.add(createPGOIndirectCallPromotionLegacyPass(true));
+ PM.add(
+ createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
@@ -703,7 +719,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalSplitPass());
// Apply whole-program devirtualization and virtual constant propagation.
- PM.add(createWholeProgramDevirtPass());
+ PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
// That's all we need at opt level 1.
if (OptLevel == 1)
@@ -759,8 +775,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
- if (EnableMLSM)
- PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
+ PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
PM.add(NewGVN ? createNewGVNPass()
: createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
PM.add(createMemCpyOptPass()); // Remove dead memcpys.
@@ -775,11 +790,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createLoopInterchangePass());
if (!DisableUnrollLoops)
- PM.add(createSimpleLoopUnrollPass()); // Unroll small loops
+ PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops
PM.add(createLoopVectorizePass(true, LoopVectorize));
// The vectorizer may have significantly shortened a loop body; unroll again.
if (!DisableUnrollLoops)
- PM.add(createLoopUnrollPass());
+ PM.add(createLoopUnrollPass(OptLevel));
// Now that we've optimized loops (in particular loop induction variables),
// we may have exposed more scalar opportunities. Run parts of the scalar
@@ -833,6 +848,23 @@ void PassManagerBuilder::populateThinLTOPassManager(
if (VerifyInput)
PM.add(createVerifierPass());
+ if (ImportSummary) {
+ // These passes import type identifier resolutions for whole-program
+ // devirtualization and CFI. They must run early because other passes may
+ // disturb the specific instruction patterns that these passes look for,
+ // creating dependencies on resolutions that may not appear in the summary.
+ //
+ // For example, GVN may transform the pattern assume(type.test) appearing in
+ // two basic blocks into assume(phi(type.test, type.test)), which would
+ // transform a dependency on a WPD resolution into a dependency on a type
+ // identifier resolution for CFI.
+ //
+ // Also, WPD has access to more precise information than ICP and can
+ // devirtualize more effectively, so it should operate on the IR first.
+ PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary));
+ PM.add(createLowerTypeTestsPass(nullptr, ImportSummary));
+ }
+
populateModulePassManager(PM);
if (VerifyOutput)
@@ -857,8 +889,7 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
// Lower type metadata and the type.test intrinsic. This pass supports Clang's
// control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
// link time if CFI is enabled. The pass does nothing if CFI is disabled.
- PM.add(createLowerTypeTestsPass(LowerTypeTestsSummaryAction::None,
- /*Summary=*/nullptr));
+ PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
if (OptLevel != 0)
addLateLTOOptimizationPasses(PM);
diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
index 6a43f8dbac48..3371de6e3d14 100644
--- a/lib/Transforms/IPO/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -43,6 +44,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -50,6 +52,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
@@ -159,8 +162,11 @@ protected:
ErrorOr<uint64_t> getInstWeight(const Instruction &I);
ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+ std::vector<const FunctionSamples *>
+ findIndirectCallFunctionSamples(const Instruction &I) const;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineHotFunctions(Function &F);
+ bool inlineHotFunctions(Function &F,
+ DenseSet<GlobalValue::GUID> &ImportGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -173,7 +179,7 @@ protected:
void buildEdges(Function &F);
bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
void computeDominanceAndLoopInfo(Function &F);
- unsigned getOffset(unsigned L, unsigned H) const;
+ unsigned getOffset(const DILocation *DIL) const;
void clearFunctionData();
/// \brief Map basic blocks to their computed weights.
@@ -326,11 +332,12 @@ SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
// If there are inlined callsites in this function, count the samples found
// in the respective bodies. However, do not bother counting callees with 0
// total samples, these are callees that were never invoked at runtime.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countUsedRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countUsedRecords(CalleeSamples);
+ }
return Count;
}
@@ -343,11 +350,12 @@ SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
unsigned Count = FS->getBodySamples().size();
// Only count records in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Count += countBodyRecords(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Count += countBodyRecords(CalleeSamples);
+ }
return Count;
}
@@ -362,11 +370,12 @@ SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
Total += I.second.getSamples();
// Only count samples in hot callsites.
- for (const auto &I : FS->getCallsiteSamples()) {
- const FunctionSamples *CalleeSamples = &I.second;
- if (callsiteIsHot(FS, CalleeSamples))
- Total += countBodySamples(CalleeSamples);
- }
+ for (const auto &I : FS->getCallsiteSamples())
+ for (const auto &J : I.second) {
+ const FunctionSamples *CalleeSamples = &J.second;
+ if (callsiteIsHot(FS, CalleeSamples))
+ Total += countBodySamples(CalleeSamples);
+ }
return Total;
}
@@ -398,15 +407,11 @@ void SampleProfileLoader::clearFunctionData() {
CoverageTracker.clear();
}
-/// \brief Returns the offset of lineno \p L to head_lineno \p H
-///
-/// \param L Lineno
-/// \param H Header lineno of the function
-///
-/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// Returns the line offset to the start line of the subprogram.
/// We assume that a single function will not exceed 65535 LOC.
-unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
- return (L - H) & 0xffff;
+unsigned SampleProfileLoader::getOffset(const DILocation *DIL) const {
+ return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+ 0xffff;
}
/// \brief Print the weight of edge \p E on stream \p OS.
@@ -451,8 +456,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
/// \param Inst Instruction to query.
///
/// \returns the weight of \p Inst.
-ErrorOr<uint64_t>
-SampleProfileLoader::getInstWeight(const Instruction &Inst) {
+ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
const DebugLoc &DLoc = Inst.getDebugLoc();
if (!DLoc)
return std::error_code();
@@ -470,19 +474,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
// If a call/invoke instruction is inlined in profile, but not inlined here,
// it means that the inlined callsite has no sample, thus the call
// instruction should have 0 count.
- bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst);
- if (IsCall && findCalleeFunctionSamples(Inst))
+ if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
+ findCalleeFunctionSamples(Inst))
return 0;
const DILocation *DIL = DLoc;
- unsigned Lineno = DLoc.getLine();
- unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
-
- uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
- uint32_t Discriminator = DIL->getDiscriminator();
- ErrorOr<uint64_t> R = IsCall
- ? FS->findCallSamplesAt(LineOffset, Discriminator)
- : FS->findSamplesAt(LineOffset, Discriminator);
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+ ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
if (R) {
bool FirstMark =
CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
@@ -491,13 +490,14 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
LLVMContext &Ctx = F->getContext();
emitOptimizationRemark(
Ctx, DEBUG_TYPE, *F, DLoc,
- Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
- Twine(LineOffset) +
+ Twine("Applied ") + Twine(*R) +
+ " samples from profile (offset: " + Twine(LineOffset) +
((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
}
- DEBUG(dbgs() << " " << Lineno << "." << DIL->getDiscriminator() << ":"
- << Inst << " (line offset: " << Lineno - HeaderLineno << "."
- << DIL->getDiscriminator() << " - weight: " << R.get()
+ DEBUG(dbgs() << " " << DLoc.getLine() << "."
+ << DIL->getBaseDiscriminator() << ":" << Inst
+ << " (line offset: " << LineOffset << "."
+ << DIL->getBaseDiscriminator() << " - weight: " << R.get()
<< ")\n");
}
return R;
@@ -511,8 +511,7 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) {
/// \param BB The basic block to query.
///
/// \returns the weight for \p BB.
-ErrorOr<uint64_t>
-SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
+ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
uint64_t Max = 0;
bool HasWeight = false;
for (auto &I : BB->getInstList()) {
@@ -565,16 +564,49 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
if (!DIL) {
return nullptr;
}
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
+
+ StringRef CalleeName;
+ if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
+ if (Function *Callee = CI->getCalledFunction())
+ CalleeName = Callee->getName();
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return nullptr;
- return FS->findFunctionSamplesAt(LineLocation(
- getOffset(DIL->getLine(), SP->getLine()), DIL->getDiscriminator()));
+ return FS->findFunctionSamplesAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName);
+}
+
+/// Returns a vector of FunctionSamples that are the indirect call targets
+/// of \p Inst. The vector is sorted by the total number of samples.
+std::vector<const FunctionSamples *>
+SampleProfileLoader::findIndirectCallFunctionSamples(
+ const Instruction &Inst) const {
+ const DILocation *DIL = Inst.getDebugLoc();
+ std::vector<const FunctionSamples *> R;
+
+ if (!DIL) {
+ return R;
+ }
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ if (FS == nullptr)
+ return R;
+
+ if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) {
+ if (M->size() == 0)
+ return R;
+ for (const auto &NameFS : *M) {
+ R.push_back(&NameFS.second);
+ }
+ std::sort(R.begin(), R.end(),
+ [](const FunctionSamples *L, const FunctionSamples *R) {
+ return L->getTotalSamples() > R->getTotalSamples();
+ });
+ }
+ return R;
}
/// \brief Get the FunctionSamples for an instruction.
@@ -588,23 +620,23 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
/// \returns the FunctionSamples pointer to the inlined instance.
const FunctionSamples *
SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
- SmallVector<LineLocation, 10> S;
+ SmallVector<std::pair<LineLocation, StringRef>, 10> S;
const DILocation *DIL = Inst.getDebugLoc();
- if (!DIL) {
+ if (!DIL)
return Samples;
- }
+
+ const DILocation *PrevDIL = DIL;
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
- DISubprogram *SP = DIL->getScope()->getSubprogram();
- if (!SP)
- return nullptr;
- S.push_back(LineLocation(getOffset(DIL->getLine(), SP->getLine()),
- DIL->getDiscriminator()));
+ S.push_back(std::make_pair(
+ LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()),
+ PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ PrevDIL = DIL;
}
if (S.size() == 0)
return Samples;
const FunctionSamples *FS = Samples;
for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
- FS = FS->findFunctionSamplesAt(S[i]);
+ FS = FS->findFunctionSamplesAt(S[i].first, S[i].second);
}
return FS;
}
@@ -614,14 +646,17 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
/// Iteratively traverse all callsites of the function \p F, and find if
/// the corresponding inlined instance exists and is hot in profile. If
/// it is hot enough, inline the callsites and adds new callsites of the
-/// callee into the caller.
-///
-/// TODO: investigate the possibility of not invoking InlineFunction directly.
+/// callee into the caller. If the call is an indirect call, first promote
+/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
+/// \param ImportGUIDs a set to be updated to include all GUIDs that come
+/// from a different module but inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
-bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+bool SampleProfileLoader::inlineHotFunctions(
+ Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
+ DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
LLVMContext &Ctx = F.getContext();
std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
@@ -647,18 +682,42 @@ bool SampleProfileLoader::inlineHotFunctions(Function &F) {
}
for (auto I : CIS) {
InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
- CallSite CS(I);
- Function *CalledFunction = CS.getCalledFunction();
- if (!CalledFunction || !CalledFunction->getSubprogram())
+ Function *CalledFunction = CallSite(I).getCalledFunction();
+ Instruction *DI = I;
+ if (!CalledFunction && !PromotedInsns.count(I) &&
+ CallSite(I).isIndirectCall())
+ for (const auto *FS : findIndirectCallFunctionSamples(*I)) {
+ auto CalleeFunctionName = FS->getName();
+ const char *Reason = "Callee function not available";
+ CalledFunction = F.getParent()->getFunction(CalleeFunctionName);
+ if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) {
+ // The indirect target was promoted and inlined in the profile, as a
+ // result, we do not have profile info for the branch probability.
+ // We set the probability to 80% taken to indicate that the static
+ // call is likely taken.
+ DI = dyn_cast<Instruction>(
+ promoteIndirectCall(I, CalledFunction, 80, 100, false)
+ ->stripPointerCasts());
+ PromotedInsns.insert(I);
+ } else {
+ DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << CalleeFunctionName << " because " << Reason
+ << "\n");
+ continue;
+ }
+ }
+ if (!CalledFunction || !CalledFunction->getSubprogram()) {
+ findCalleeFunctionSamples(*I)->findImportedFunctions(
+ ImportGUIDs, F.getParent(),
+ Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
continue;
+ }
DebugLoc DLoc = I->getDebugLoc();
- uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
- if (InlineFunction(CS, IFI)) {
+ if (InlineFunction(CallSite(DI), IFI)) {
LocalChanged = true;
emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
Twine("inlined hot callee '") +
- CalledFunction->getName() + "' with " +
- Twine(NumSamples) + " samples into '" +
+ CalledFunction->getName() + "' into '" +
F.getName() + "'");
}
}
@@ -994,6 +1053,26 @@ void SampleProfileLoader::buildEdges(Function &F) {
}
}
+/// Sorts the CallTargetMap \p M by count in descending order and stores the
+/// sorted result in \p Sorted. Returns the total counts.
+static uint64_t SortCallTargets(SmallVector<InstrProfValueData, 2> &Sorted,
+ const SampleRecord::CallTargetMap &M) {
+ Sorted.clear();
+ uint64_t Sum = 0;
+ for (auto I = M.begin(); I != M.end(); ++I) {
+ Sum += I->getValue();
+ Sorted.push_back({Function::getGUID(I->getKey()), I->getValue()});
+ }
+ std::sort(Sorted.begin(), Sorted.end(),
+ [](const InstrProfValueData &L, const InstrProfValueData &R) {
+ if (L.Count == R.Count)
+ return L.Value > R.Value;
+ else
+ return L.Count > R.Count;
+ });
+ return Sum;
+}
+
/// \brief Propagate weights into edges
///
/// The following rules are applied to every block BB in the CFG:
@@ -1015,10 +1094,6 @@ void SampleProfileLoader::propagateWeights(Function &F) {
bool Changed = true;
unsigned I = 0;
- // Add an entry count to the function using the samples gathered
- // at the function entry.
- F.setEntryCount(Samples->getHeadSamples() + 1);
-
// If BB weight is larger than its corresponding loop's header BB weight,
// use the BB weight to replace the loop header BB weight.
for (auto &BI : F) {
@@ -1071,13 +1146,32 @@ void SampleProfileLoader::propagateWeights(Function &F) {
if (BlockWeights[BB]) {
for (auto &I : BB->getInstList()) {
- if (CallInst *CI = dyn_cast<CallInst>(&I)) {
- if (!dyn_cast<IntrinsicInst>(&I)) {
- SmallVector<uint32_t, 1> Weights;
- Weights.push_back(BlockWeights[BB]);
- CI->setMetadata(LLVMContext::MD_prof,
- MDB.createBranchWeights(Weights));
- }
+ if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
+ continue;
+ CallSite CS(&I);
+ if (!CS.getCalledFunction()) {
+ const DebugLoc &DLoc = I.getDebugLoc();
+ if (!DLoc)
+ continue;
+ const DILocation *DIL = DLoc;
+ uint32_t LineOffset = getOffset(DIL);
+ uint32_t Discriminator = DIL->getBaseDiscriminator();
+
+ const FunctionSamples *FS = findFunctionSamples(I);
+ if (!FS)
+ continue;
+ auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
+ if (!T || T.get().size() == 0)
+ continue;
+ SmallVector<InstrProfValueData, 2> SortedCallTargets;
+ uint64_t Sum = SortCallTargets(SortedCallTargets, T.get());
+ annotateValueSite(*I.getParent()->getParent()->getParent(), I,
+ SortedCallTargets, Sum, IPVK_IndirectCallTarget,
+ SortedCallTargets.size());
+ } else if (!dyn_cast<IntrinsicInst>(&I)) {
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(BlockWeights[BB]);
+ I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
}
}
}
@@ -1115,9 +1209,13 @@ void SampleProfileLoader::propagateWeights(Function &F) {
}
}
+ uint64_t TempWeight;
// Only set weights if there is at least one non-zero weight.
// In any other case, let the analyzer set weights.
- if (MaxWeight > 0) {
+ // Do not set weights if the weights are present. In ThinLTO, the profile
+ // annotation is done twice. If the first annotation already set the
+ // weights, the second pass does not need to set it.
+ if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
TI->setMetadata(llvm::LLVMContext::MD_prof,
MDB.createBranchWeights(Weights));
@@ -1228,12 +1326,19 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- Changed |= inlineHotFunctions(F);
+ DenseSet<GlobalValue::GUID> ImportGUIDs;
+ Changed |= inlineHotFunctions(F, ImportGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
+ // Add an entry count to the function using the samples gathered at the
+ // function entry. Also sets the GUIDs that comes from a different
+ // module but inlined in the profiled binary. This is aiming at making
+ // the IR match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
@@ -1329,7 +1434,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
bool SampleProfileLoader::runOnFunction(Function &F) {
F.setEntryCount(0);
Samples = Reader->getSamplesFor(F);
- if (!Samples->empty())
+ if (Samples && !Samples->empty())
return emitAnnotations(F);
return false;
}
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 8f6f161428e8..fb64367eef91 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -323,6 +323,14 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
LiveGVs.insert(GVE);
}
+ std::set<DICompileUnit *> LiveCUs;
+ // Any CU referenced from a subprogram is live.
+ for (DISubprogram *SP : F.subprograms()) {
+ if (SP->getUnit())
+ LiveCUs.insert(SP->getUnit());
+ }
+
+ bool HasDeadCUs = false;
for (DICompileUnit *DIC : F.compile_units()) {
// Create our live global variable list.
bool GlobalVariableChange = false;
@@ -341,6 +349,11 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
GlobalVariableChange = true;
}
+ if (!LiveGlobalVariables.empty())
+ LiveCUs.insert(DIC);
+ else if (!LiveCUs.count(DIC))
+ HasDeadCUs = true;
+
// If we found dead global variables, replace the current global
// variable list with our new live global variable list.
if (GlobalVariableChange) {
@@ -352,5 +365,16 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
LiveGlobalVariables.clear();
}
+ if (HasDeadCUs) {
+ // Delete the old node and replace it with a new one
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+ NMD->clearOperands();
+ if (!LiveCUs.empty()) {
+ for (DICompileUnit *CU : LiveCUs)
+ NMD->addOperand(CU);
+ }
+ Changed = true;
+ }
+
return Changed;
}
diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 3680cfc813a1..65deb82cd2a5 100644
--- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -14,16 +14,21 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
@@ -41,23 +46,14 @@ namespace {
std::string getModuleId(Module *M) {
MD5 Md5;
bool ExportsSymbols = false;
- auto AddGlobal = [&](GlobalValue &GV) {
+ for (auto &GV : M->global_values()) {
if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
!GV.hasExternalLinkage())
- return;
+ continue;
ExportsSymbols = true;
Md5.update(GV.getName());
Md5.update(ArrayRef<uint8_t>{0});
- };
-
- for (auto &F : *M)
- AddGlobal(F);
- for (auto &GV : M->globals())
- AddGlobal(GV);
- for (auto &GA : M->aliases())
- AddGlobal(GA);
- for (auto &IF : M->ifuncs())
- AddGlobal(IF);
+ }
if (!ExportsSymbols)
return "";
@@ -73,15 +69,21 @@ std::string getModuleId(Module *M) {
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
- auto PromoteInternal = [&](GlobalValue &ExportGV) {
+ DenseMap<const Comdat *, Comdat *> RenamedComdats;
+ for (auto &ExportGV : ExportM.global_values()) {
if (!ExportGV.hasLocalLinkage())
- return;
+ continue;
- GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
+ auto Name = ExportGV.getName();
+ GlobalValue *ImportGV = ImportM.getNamedValue(Name);
if (!ImportGV || ImportGV->use_empty())
- return;
+ continue;
+
+ std::string NewName = (Name + ModuleId).str();
- std::string NewName = (ExportGV.getName() + ModuleId).str();
+ if (const auto *C = ExportGV.getComdat())
+ if (C->getName() == Name)
+ RenamedComdats.try_emplace(C, ExportM.getOrInsertComdat(NewName));
ExportGV.setName(NewName);
ExportGV.setLinkage(GlobalValue::ExternalLinkage);
@@ -89,16 +91,15 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
ImportGV->setName(NewName);
ImportGV->setVisibility(GlobalValue::HiddenVisibility);
- };
+ }
- for (auto &F : ExportM)
- PromoteInternal(F);
- for (auto &GV : ExportM.globals())
- PromoteInternal(GV);
- for (auto &GA : ExportM.aliases())
- PromoteInternal(GA);
- for (auto &IF : ExportM.ifuncs())
- PromoteInternal(IF);
+ if (!RenamedComdats.empty())
+ for (auto &GO : ExportM.global_objects())
+ if (auto *C = GO.getComdat()) {
+ auto Replacement = RenamedComdats.find(C);
+ if (Replacement != RenamedComdats.end())
+ GO.setComdat(Replacement->second);
+ }
}
// Promote all internal (i.e. distinct) type ids used by the module by replacing
@@ -194,24 +195,7 @@ void simplifyExternals(Module &M) {
}
void filterModule(
- Module *M, std::function<bool(const GlobalValue *)> ShouldKeepDefinition) {
- for (Function &F : *M) {
- if (ShouldKeepDefinition(&F))
- continue;
-
- F.deleteBody();
- F.clearMetadata();
- }
-
- for (GlobalVariable &GV : M->globals()) {
- if (ShouldKeepDefinition(&GV))
- continue;
-
- GV.setInitializer(nullptr);
- GV.setLinkage(GlobalValue::ExternalLinkage);
- GV.clearMetadata();
- }
-
+ Module *M, function_ref<bool(const GlobalValue *)> ShouldKeepDefinition) {
for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E;) {
GlobalAlias *GA = &*I++;
@@ -219,7 +203,7 @@ void filterModule(
continue;
GlobalObject *GO;
- if (I->getValueType()->isFunctionTy())
+ if (GA->getValueType()->isFunctionTy())
GO = Function::Create(cast<FunctionType>(GA->getValueType()),
GlobalValue::ExternalLinkage, "", M);
else
@@ -231,53 +215,168 @@ void filterModule(
GA->replaceAllUsesWith(GO);
GA->eraseFromParent();
}
+
+ for (Function &F : *M) {
+ if (ShouldKeepDefinition(&F))
+ continue;
+
+ F.deleteBody();
+ F.setComdat(nullptr);
+ F.clearMetadata();
+ }
+
+ for (GlobalVariable &GV : M->globals()) {
+ if (ShouldKeepDefinition(&GV))
+ continue;
+
+ GV.setInitializer(nullptr);
+ GV.setLinkage(GlobalValue::ExternalLinkage);
+ GV.setComdat(nullptr);
+ GV.clearMetadata();
+ }
+}
+
+void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {
+ if (auto *F = dyn_cast<Function>(C))
+ return Fn(F);
+ if (isa<GlobalValue>(C))
+ return;
+ for (Value *Op : C->operands())
+ forEachVirtualFunction(cast<Constant>(Op), Fn);
}
// If it's possible to split M into regular and thin LTO parts, do so and write
// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
// regular LTO bitcode file to OS.
-void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
+void splitAndWriteThinLTOBitcode(
+ raw_ostream &OS, raw_ostream *ThinLinkOS,
+ function_ref<AAResults &(Function &)> AARGetter, Module &M) {
std::string ModuleId = getModuleId(&M);
if (ModuleId.empty()) {
// We couldn't generate a module ID for this module, just write it out as a
// regular LTO module.
WriteBitcodeToFile(&M, OS);
+ if (ThinLinkOS)
+ // We don't have a ThinLTO part, but still write the module to the
+ // ThinLinkOS if requested so that the expected output file is produced.
+ WriteBitcodeToFile(&M, *ThinLinkOS);
return;
}
promoteTypeIds(M, ModuleId);
- auto IsInMergedM = [&](const GlobalValue *GV) {
- auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
- if (!GVar)
- return false;
-
+ // Returns whether a global has attached type metadata. Such globals may
+ // participate in CFI or whole-program devirtualization, so they need to
+ // appear in the merged module instead of the thin LTO module.
+ auto HasTypeMetadata = [&](const GlobalObject *GO) {
SmallVector<MDNode *, 1> MDs;
- GVar->getMetadata(LLVMContext::MD_type, MDs);
+ GO->getMetadata(LLVMContext::MD_type, MDs);
return !MDs.empty();
};
+ // Collect the set of virtual functions that are eligible for virtual constant
+ // propagation. Each eligible function must not access memory, must return
+ // an integer of width <=64 bits, must take at least one argument, must not
+ // use its first argument (assumed to be "this") and all arguments other than
+ // the first one must be of <=64 bit integer type.
+ //
+ // Note that we test whether this copy of the function is readnone, rather
+ // than testing function attributes, which must hold for any copy of the
+ // function, even a less optimized version substituted at link time. This is
+ // sound because the virtual constant propagation optimizations effectively
+ // inline all implementations of the virtual function into each call site,
+ // rather than using function attributes to perform local optimization.
+ std::set<const Function *> EligibleVirtualFns;
+ // If any member of a comdat lives in MergedM, put all members of that
+ // comdat in MergedM to keep the comdat together.
+ DenseSet<const Comdat *> MergedMComdats;
+ for (GlobalVariable &GV : M.globals())
+ if (HasTypeMetadata(&GV)) {
+ if (const auto *C = GV.getComdat())
+ MergedMComdats.insert(C);
+ forEachVirtualFunction(GV.getInitializer(), [&](Function *F) {
+ auto *RT = dyn_cast<IntegerType>(F->getReturnType());
+ if (!RT || RT->getBitWidth() > 64 || F->arg_empty() ||
+ !F->arg_begin()->use_empty())
+ return;
+ for (auto &Arg : make_range(std::next(F->arg_begin()), F->arg_end())) {
+ auto *ArgT = dyn_cast<IntegerType>(Arg.getType());
+ if (!ArgT || ArgT->getBitWidth() > 64)
+ return;
+ }
+ if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone)
+ EligibleVirtualFns.insert(F);
+ });
+ }
+
ValueToValueMapTy VMap;
- std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
+ std::unique_ptr<Module> MergedM(
+ CloneModule(&M, VMap, [&](const GlobalValue *GV) -> bool {
+ if (const auto *C = GV->getComdat())
+ if (MergedMComdats.count(C))
+ return true;
+ if (auto *F = dyn_cast<Function>(GV))
+ return EligibleVirtualFns.count(F);
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ return HasTypeMetadata(GVar);
+ return false;
+ }));
+ StripDebugInfo(*MergedM);
+
+ for (Function &F : *MergedM)
+ if (!F.isDeclaration()) {
+ // Reset the linkage of all functions eligible for virtual constant
+ // propagation. The canonical definitions live in the thin LTO module so
+ // that they can be imported.
+ F.setLinkage(GlobalValue::AvailableExternallyLinkage);
+ F.setComdat(nullptr);
+ }
- filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
+ // Remove all globals with type metadata, globals with comdats that live in
+ // MergedM, and aliases pointing to such globals from the thin LTO module.
+ filterModule(&M, [&](const GlobalValue *GV) {
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (HasTypeMetadata(GVar))
+ return false;
+ if (const auto *C = GV->getComdat())
+ if (MergedMComdats.count(C))
+ return false;
+ return true;
+ });
promoteInternals(*MergedM, M, ModuleId);
promoteInternals(M, *MergedM, ModuleId);
simplifyExternals(*MergedM);
- SmallVector<char, 0> Buffer;
- BitcodeWriter W(Buffer);
// FIXME: Try to re-use BSI and PFI from the original module here.
ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
- W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
- /*GenerateHash=*/true);
- W.writeModule(MergedM.get());
+ SmallVector<char, 0> Buffer;
+ BitcodeWriter W(Buffer);
+ // Save the module hash produced for the full bitcode, which will
+ // be used in the backends, and use that in the minimized bitcode
+ // produced for the full link.
+ ModuleHash ModHash = {{0}};
+ W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*GenerateHash=*/true, &ModHash);
+ W.writeModule(MergedM.get());
OS << Buffer;
+
+ // If a minimized bitcode module was requested for the thin link,
+ // strip the debug info (the merged module was already stripped above)
+ // and write it to the given OS.
+ if (ThinLinkOS) {
+ Buffer.clear();
+ BitcodeWriter W2(Buffer);
+ StripDebugInfo(M);
+ W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*GenerateHash=*/false, &ModHash);
+ W2.writeModule(MergedM.get());
+ *ThinLinkOS << Buffer;
+ }
}
// Returns whether this module needs to be split because it uses type metadata.
@@ -292,28 +391,45 @@ bool requiresSplit(Module &M) {
return false;
}
-void writeThinLTOBitcode(raw_ostream &OS, Module &M,
- const ModuleSummaryIndex *Index) {
+void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
+ function_ref<AAResults &(Function &)> AARGetter,
+ Module &M, const ModuleSummaryIndex *Index) {
// See if this module has any type metadata. If so, we need to split it.
if (requiresSplit(M))
- return splitAndWriteThinLTOBitcode(OS, M);
+ return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
// Otherwise we can just write it out as a regular module.
+
+ // Save the module hash produced for the full bitcode, which will
+ // be used in the backends, and use that in the minimized bitcode
+ // produced for the full link.
+ ModuleHash ModHash = {{0}};
WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
- /*GenerateHash=*/true);
+ /*GenerateHash=*/true, &ModHash);
+ // If a minimized bitcode module was requested for the thin link,
+ // strip the debug info and write it to the given OS.
+ if (ThinLinkOS) {
+ StripDebugInfo(M);
+ WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
+ Index,
+ /*GenerateHash=*/false, &ModHash);
+ }
}
class WriteThinLTOBitcode : public ModulePass {
raw_ostream &OS; // raw_ostream to print on
+ // The output stream on which to emit a minimized module for use
+ // just in the thin link, if requested.
+ raw_ostream *ThinLinkOS;
public:
static char ID; // Pass identification, replacement for typeid
- WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
+ WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()), ThinLinkOS(nullptr) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
- explicit WriteThinLTOBitcode(raw_ostream &o)
- : ModulePass(ID), OS(o) {
+ explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS)
+ : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) {
initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
}
@@ -322,12 +438,14 @@ public:
bool runOnModule(Module &M) override {
const ModuleSummaryIndex *Index =
&(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
- writeThinLTOBitcode(OS, M, Index);
+ writeThinLTOBitcode(OS, ThinLinkOS, LegacyAARGetter(*this), M, Index);
return true;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ModuleSummaryIndexWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // anonymous namespace
@@ -335,10 +453,13 @@ public:
char WriteThinLTOBitcode::ID = 0;
INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
"Write ThinLTO Bitcode", false, true)
-ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
- return new WriteThinLTOBitcode(Str);
+ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str,
+ raw_ostream *ThinLinkOS) {
+ return new WriteThinLTOBitcode(Str, ThinLinkOS);
}
diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 844cc0f70eed..cb7d487b68b0 100644
--- a/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -25,6 +25,20 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
+// This pass is intended to be used during the regular and thin LTO pipelines.
+// During regular LTO, the pass determines the best optimization for each
+// virtual call and applies the resolutions directly to virtual calls that are
+// eligible for virtual call optimization (i.e. calls that use either of the
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
+// ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over a single merged module
+// that contains all vtables with !type metadata that participate in the link.
+// The pass computes a resolution for each virtual call and stores it in the
+// type identifier summary.
+// - Import phase: this is run during the thin backends over the individual
+// modules. The pass applies the resolutions previously computed during the
+// import phase to each eligible virtual call.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -35,6 +49,8 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
@@ -54,12 +70,16 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/Evaluator.h"
#include <algorithm>
#include <cstddef>
@@ -72,6 +92,26 @@ using namespace wholeprogramdevirt;
#define DEBUG_TYPE "wholeprogramdevirt"
+static cl::opt<PassSummaryAction> ClSummaryAction(
+ "wholeprogramdevirt-summary-action",
+ cl::desc("What to do with the summary when running this pass"),
+ cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
+ clEnumValN(PassSummaryAction::Import, "import",
+ "Import typeid resolutions from summary and globals"),
+ clEnumValN(PassSummaryAction::Export, "export",
+ "Export typeid resolutions to summary and globals")),
+ cl::Hidden);
+
+static cl::opt<std::string> ClReadSummary(
+ "wholeprogramdevirt-read-summary",
+ cl::desc("Read summary from given YAML file before running pass"),
+ cl::Hidden);
+
+static cl::opt<std::string> ClWriteSummary(
+ "wholeprogramdevirt-write-summary",
+ cl::desc("Write summary to given YAML file after running pass"),
+ cl::Hidden);
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -259,15 +299,92 @@ struct VirtualCallSite {
}
};
+// Call site information collected for a specific VTableSlot and possibly a list
+// of constant integer arguments. The grouping by arguments is handled by the
+// VTableSlotInfo class.
+struct CallSiteInfo {
+ /// The set of call sites for this slot. Used during regular LTO and the
+ /// import phase of ThinLTO (as well as the export phase of ThinLTO for any
+ /// call sites that appear in the merged module itself); in each of these
+ /// cases we are directly operating on the call sites at the IR level.
+ std::vector<VirtualCallSite> CallSites;
+
+ // These fields are used during the export phase of ThinLTO and reflect
+ // information collected from function summaries.
+
+ /// Whether any function summary contains an llvm.assume(llvm.type.test) for
+ /// this slot.
+ bool SummaryHasTypeTestAssumeUsers;
+
+ /// CFI-specific: a vector containing the list of function summaries that use
+ /// the llvm.type.checked.load intrinsic and therefore will require
+ /// resolutions for llvm.type.test in order to implement CFI checks if
+ /// devirtualization was unsuccessful. If devirtualization was successful, the
+ /// pass will clear this vector by calling markDevirt(). If at the end of the
+ /// pass the vector is non-empty, we will need to add a use of llvm.type.test
+ /// to each of the function summaries in the vector.
+ std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+
+ bool isExported() const {
+ return SummaryHasTypeTestAssumeUsers ||
+ !SummaryTypeCheckedLoadUsers.empty();
+ }
+
+ /// As explained in the comment for SummaryTypeCheckedLoadUsers.
+ void markDevirt() { SummaryTypeCheckedLoadUsers.clear(); }
+};
+
+// Call site information collected for a specific VTableSlot.
+struct VTableSlotInfo {
+ // The set of call sites which do not have all constant integer arguments
+ // (excluding "this").
+ CallSiteInfo CSInfo;
+
+ // The set of call sites with all constant integer arguments (excluding
+ // "this"), grouped by argument list.
+ std::map<std::vector<uint64_t>, CallSiteInfo> ConstCSInfo;
+
+ void addCallSite(Value *VTable, CallSite CS, unsigned *NumUnsafeUses);
+
+private:
+ CallSiteInfo &findCallSiteInfo(CallSite CS);
+};
+
+CallSiteInfo &VTableSlotInfo::findCallSiteInfo(CallSite CS) {
+ std::vector<uint64_t> Args;
+ auto *CI = dyn_cast<IntegerType>(CS.getType());
+ if (!CI || CI->getBitWidth() > 64 || CS.arg_empty())
+ return CSInfo;
+ for (auto &&Arg : make_range(CS.arg_begin() + 1, CS.arg_end())) {
+ auto *CI = dyn_cast<ConstantInt>(Arg);
+ if (!CI || CI->getBitWidth() > 64)
+ return CSInfo;
+ Args.push_back(CI->getZExtValue());
+ }
+ return ConstCSInfo[Args];
+}
+
+void VTableSlotInfo::addCallSite(Value *VTable, CallSite CS,
+ unsigned *NumUnsafeUses) {
+ findCallSiteInfo(CS).CallSites.push_back({VTable, CS, NumUnsafeUses});
+}
+
struct DevirtModule {
Module &M;
+ function_ref<AAResults &(Function &)> AARGetter;
+
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
+
IntegerType *Int8Ty;
PointerType *Int8PtrTy;
IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *IntPtrTy;
bool RemarksEnabled;
- MapVector<VTableSlot, std::vector<VirtualCallSite>> CallSlots;
+ MapVector<VTableSlot, VTableSlotInfo> CallSlots;
// This map keeps track of the number of "unsafe" uses of a loaded function
// pointer. The key is the associated llvm.type.test intrinsic call generated
@@ -279,11 +396,18 @@ struct DevirtModule {
// true.
std::map<CallInst *, unsigned> NumUnsafeUsesForTypeTest;
- DevirtModule(Module &M)
- : M(M), Int8Ty(Type::getInt8Ty(M.getContext())),
+ DevirtModule(Module &M, function_ref<AAResults &(Function &)> AARGetter,
+ ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : M(M), AARGetter(AARGetter), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary), Int8Ty(Type::getInt8Ty(M.getContext())),
Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
Int32Ty(Type::getInt32Ty(M.getContext())),
- RemarksEnabled(areRemarksEnabled()) {}
+ Int64Ty(Type::getInt64Ty(M.getContext())),
+ IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
+ RemarksEnabled(areRemarksEnabled()) {
+ assert(!(ExportSummary && ImportSummary));
+ }
bool areRemarksEnabled();
@@ -298,57 +422,169 @@ struct DevirtModule {
tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
const std::set<TypeMemberInfo> &TypeMemberInfos,
uint64_t ByteOffset);
+
+ void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn,
+ bool &IsExported);
bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res);
+
bool tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<ConstantInt *> Args);
- bool tryUniformRetValOpt(IntegerType *RetType,
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ ArrayRef<uint64_t> Args);
+
+ void applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ uint64_t TheRetVal);
+ bool tryUniformRetValOpt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
+ CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res);
+
+ // Returns the global symbol name that is used to export information about the
+ // given vtable slot and list of arguments.
+ std::string getGlobalName(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name);
+
+ // This function is called during the export phase to create a symbol
+ // definition containing information about the given vtable slot and list of
+ // arguments.
+ void exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args, StringRef Name,
+ Constant *C);
+
+ // This function is called during the import phase to create a reference to
+ // the symbol definition created during the export phase.
+ Constant *importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, unsigned AbsWidth = 0);
+
+ void applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName, bool IsOne,
+ Constant *UniqueMemberAddr);
bool tryUniqueRetValOpt(unsigned BitWidth,
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites);
+ CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res,
+ VTableSlot Slot, ArrayRef<uint64_t> Args);
+
+ void applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
+ Constant *Byte, Constant *Bit);
bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<VirtualCallSite> CallSites);
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot);
void rebuildGlobal(VTableBits &B);
+ // Apply the summary resolution for Slot to all virtual calls in SlotInfo.
+ void importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo);
+
+ // If we were able to eliminate all unsafe uses for a type checked load,
+ // eliminate the associated type tests by replacing them with true.
+ void removeRedundantTypeTests();
+
bool run();
+
+ // Lower the module using the action and summary passed as command line
+ // arguments. For testing purposes only.
+ static bool runForTesting(Module &M,
+ function_ref<AAResults &(Function &)> AARGetter);
};
struct WholeProgramDevirt : public ModulePass {
static char ID;
- WholeProgramDevirt() : ModulePass(ID) {
+ bool UseCommandLine = false;
+
+ ModuleSummaryIndex *ExportSummary;
+ const ModuleSummaryIndex *ImportSummary;
+
+ WholeProgramDevirt() : ModulePass(ID), UseCommandLine(true) {
+ initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
+ }
+
+ WholeProgramDevirt(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary)
+ : ModulePass(ID), ExportSummary(ExportSummary),
+ ImportSummary(ImportSummary) {
initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
+ if (UseCommandLine)
+ return DevirtModule::runForTesting(M, LegacyAARGetter(*this));
+ return DevirtModule(M, LegacyAARGetter(*this), ExportSummary, ImportSummary)
+ .run();
+ }
- return DevirtModule(M).run();
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
} // end anonymous namespace
-INITIALIZE_PASS(WholeProgramDevirt, "wholeprogramdevirt",
- "Whole program devirtualization", false, false)
+INITIALIZE_PASS_BEGIN(WholeProgramDevirt, "wholeprogramdevirt",
+ "Whole program devirtualization", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(WholeProgramDevirt, "wholeprogramdevirt",
+ "Whole program devirtualization", false, false)
char WholeProgramDevirt::ID = 0;
-ModulePass *llvm::createWholeProgramDevirtPass() {
- return new WholeProgramDevirt;
+ModulePass *
+llvm::createWholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
+ const ModuleSummaryIndex *ImportSummary) {
+ return new WholeProgramDevirt(ExportSummary, ImportSummary);
}
PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
- ModuleAnalysisManager &) {
- if (!DevirtModule(M).run())
+ ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto AARGetter = [&](Function &F) -> AAResults & {
+ return FAM.getResult<AAManager>(F);
+ };
+ if (!DevirtModule(M, AARGetter, nullptr, nullptr).run())
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
+bool DevirtModule::runForTesting(
+ Module &M, function_ref<AAResults &(Function &)> AARGetter) {
+ ModuleSummaryIndex Summary;
+
+ // Handle the command-line summary arguments. This code is for testing
+ // purposes only, so we handle errors directly.
+ if (!ClReadSummary.empty()) {
+ ExitOnError ExitOnErr("-wholeprogramdevirt-read-summary: " + ClReadSummary +
+ ": ");
+ auto ReadSummaryFile =
+ ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
+
+ yaml::Input In(ReadSummaryFile->getBuffer());
+ In >> Summary;
+ ExitOnErr(errorCodeToError(In.error()));
+ }
+
+ bool Changed =
+ DevirtModule(
+ M, AARGetter,
+ ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+ ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr)
+ .run();
+
+ if (!ClWriteSummary.empty()) {
+ ExitOnError ExitOnErr(
+ "-wholeprogramdevirt-write-summary: " + ClWriteSummary + ": ");
+ std::error_code EC;
+ raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::F_Text);
+ ExitOnErr(errorCodeToError(EC));
+
+ yaml::Output Out(OS);
+ Out << Summary;
+ }
+
+ return Changed;
+}
+
void DevirtModule::buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
@@ -443,9 +679,31 @@ bool DevirtModule::tryFindVirtualCallTargets(
return !TargetsForSlot.empty();
}
+void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
+ Constant *TheFn, bool &IsExported) {
+ auto Apply = [&](CallSiteInfo &CSInfo) {
+ for (auto &&VCallSite : CSInfo.CallSites) {
+ if (RemarksEnabled)
+ VCallSite.emitRemark("single-impl", TheFn->getName());
+ VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
+ TheFn, VCallSite.CS.getCalledValue()->getType()));
+ // This use is no longer unsafe.
+ if (VCallSite.NumUnsafeUses)
+ --*VCallSite.NumUnsafeUses;
+ }
+ if (CSInfo.isExported()) {
+ IsExported = true;
+ CSInfo.markDevirt();
+ }
+ };
+ Apply(SlotInfo.CSInfo);
+ for (auto &P : SlotInfo.ConstCSInfo)
+ Apply(P.second);
+}
+
bool DevirtModule::trySingleImplDevirt(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) {
// See if the program contains a single implementation of this virtual
// function.
Function *TheFn = TargetsForSlot[0].Fn;
@@ -453,39 +711,51 @@ bool DevirtModule::trySingleImplDevirt(
if (TheFn != Target.Fn)
return false;
+ // If so, update each call site to call that implementation directly.
if (RemarksEnabled)
TargetsForSlot[0].WasDevirt = true;
- // If so, update each call site to call that implementation directly.
- for (auto &&VCallSite : CallSites) {
- if (RemarksEnabled)
- VCallSite.emitRemark("single-impl", TheFn->getName());
- VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
- TheFn, VCallSite.CS.getCalledValue()->getType()));
- // This use is no longer unsafe.
- if (VCallSite.NumUnsafeUses)
- --*VCallSite.NumUnsafeUses;
+
+ bool IsExported = false;
+ applySingleImplDevirt(SlotInfo, TheFn, IsExported);
+ if (!IsExported)
+ return false;
+
+ // If the only implementation has local linkage, we must promote to external
+ // to make it visible to thin LTO objects. We can only get here during the
+ // ThinLTO export phase.
+ if (TheFn->hasLocalLinkage()) {
+ TheFn->setLinkage(GlobalValue::ExternalLinkage);
+ TheFn->setVisibility(GlobalValue::HiddenVisibility);
+ TheFn->setName(TheFn->getName() + "$merged");
}
+
+ Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+ Res->SingleImplName = TheFn->getName();
+
return true;
}
bool DevirtModule::tryEvaluateFunctionsWithArgs(
MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<ConstantInt *> Args) {
+ ArrayRef<uint64_t> Args) {
// Evaluate each function and store the result in each target's RetVal
// field.
for (VirtualCallTarget &Target : TargetsForSlot) {
if (Target.Fn->arg_size() != Args.size() + 1)
return false;
- for (unsigned I = 0; I != Args.size(); ++I)
- if (Target.Fn->getFunctionType()->getParamType(I + 1) !=
- Args[I]->getType())
- return false;
Evaluator Eval(M.getDataLayout(), nullptr);
SmallVector<Constant *, 2> EvalArgs;
EvalArgs.push_back(
Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
- EvalArgs.insert(EvalArgs.end(), Args.begin(), Args.end());
+ for (unsigned I = 0; I != Args.size(); ++I) {
+ auto *ArgTy = dyn_cast<IntegerType>(
+ Target.Fn->getFunctionType()->getParamType(I + 1));
+ if (!ArgTy)
+ return false;
+ EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
+ }
+
Constant *RetVal;
if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
!isa<ConstantInt>(RetVal))
@@ -495,9 +765,18 @@ bool DevirtModule::tryEvaluateFunctionsWithArgs(
return true;
}
+void DevirtModule::applyUniformRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ uint64_t TheRetVal) {
+ for (auto Call : CSInfo.CallSites)
+ Call.replaceAndErase(
+ "uniform-ret-val", FnName, RemarksEnabled,
+ ConstantInt::get(cast<IntegerType>(Call.CS.getType()), TheRetVal));
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryUniformRetValOpt(
- IntegerType *RetType, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, CallSiteInfo &CSInfo,
+ WholeProgramDevirtResolution::ByArg *Res) {
// Uniform return value optimization. If all functions return the same
// constant, replace all calls with that constant.
uint64_t TheRetVal = TargetsForSlot[0].RetVal;
@@ -505,19 +784,77 @@ bool DevirtModule::tryUniformRetValOpt(
if (Target.RetVal != TheRetVal)
return false;
- auto TheRetValConst = ConstantInt::get(RetType, TheRetVal);
- for (auto Call : CallSites)
- Call.replaceAndErase("uniform-ret-val", TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, TheRetValConst);
+ if (CSInfo.isExported()) {
+ Res->TheKind = WholeProgramDevirtResolution::ByArg::UniformRetVal;
+ Res->Info = TheRetVal;
+ }
+
+ applyUniformRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), TheRetVal);
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
return true;
}
+std::string DevirtModule::getGlobalName(VTableSlot Slot,
+ ArrayRef<uint64_t> Args,
+ StringRef Name) {
+ std::string FullName = "__typeid_";
+ raw_string_ostream OS(FullName);
+ OS << cast<MDString>(Slot.TypeID)->getString() << '_' << Slot.ByteOffset;
+ for (uint64_t Arg : Args)
+ OS << '_' << Arg;
+ OS << '_' << Name;
+ return OS.str();
+}
+
+void DevirtModule::exportGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, Constant *C) {
+ GlobalAlias *GA = GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
+ getGlobalName(Slot, Args, Name), C, &M);
+ GA->setVisibility(GlobalValue::HiddenVisibility);
+}
+
+Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
+ StringRef Name, unsigned AbsWidth) {
+ Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty);
+ auto *GV = dyn_cast<GlobalVariable>(C);
+ // We only need to set metadata if the global is newly created, in which
+ // case it would not have hidden visibility.
+ if (!GV || GV->getVisibility() == GlobalValue::HiddenVisibility)
+ return C;
+
+ GV->setVisibility(GlobalValue::HiddenVisibility);
+ auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(M.getContext(), {MinC, MaxC}));
+ };
+ if (AbsWidth == IntPtrTy->getBitWidth())
+ SetAbsRange(~0ull, ~0ull); // Full set.
+ else if (AbsWidth)
+ SetAbsRange(0, 1ull << AbsWidth);
+ return GV;
+}
+
+void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
+ bool IsOne,
+ Constant *UniqueMemberAddr) {
+ for (auto &&Call : CSInfo.CallSites) {
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+ Call.VTable, UniqueMemberAddr);
+ Cmp = B.CreateZExt(Cmp, Call.CS->getType());
+ Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, Cmp);
+ }
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryUniqueRetValOpt(
unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- MutableArrayRef<VirtualCallSite> CallSites) {
+ CallSiteInfo &CSInfo, WholeProgramDevirtResolution::ByArg *Res,
+ VTableSlot Slot, ArrayRef<uint64_t> Args) {
// IsOne controls whether we look for a 0 or a 1.
auto tryUniqueRetValOptFor = [&](bool IsOne) {
const TypeMemberInfo *UniqueMember = nullptr;
@@ -533,16 +870,23 @@ bool DevirtModule::tryUniqueRetValOpt(
// checked for a uniform return value in tryUniformRetValOpt.
assert(UniqueMember);
- // Replace each call with the comparison.
- for (auto &&Call : CallSites) {
- IRBuilder<> B(Call.CS.getInstruction());
- Value *OneAddr = B.CreateBitCast(UniqueMember->Bits->GV, Int8PtrTy);
- OneAddr = B.CreateConstGEP1_64(OneAddr, UniqueMember->Offset);
- Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
- Call.VTable, OneAddr);
- Call.replaceAndErase("unique-ret-val", TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, Cmp);
+ Constant *UniqueMemberAddr =
+ ConstantExpr::getBitCast(UniqueMember->Bits->GV, Int8PtrTy);
+ UniqueMemberAddr = ConstantExpr::getGetElementPtr(
+ Int8Ty, UniqueMemberAddr,
+ ConstantInt::get(Int64Ty, UniqueMember->Offset));
+
+ if (CSInfo.isExported()) {
+ Res->TheKind = WholeProgramDevirtResolution::ByArg::UniqueRetVal;
+ Res->Info = IsOne;
+
+ exportGlobal(Slot, Args, "unique_member", UniqueMemberAddr);
}
+
+ // Replace each call with the comparison.
+ applyUniqueRetValOpt(CSInfo, TargetsForSlot[0].Fn->getName(), IsOne,
+ UniqueMemberAddr);
+
// Update devirtualization statistics for targets.
if (RemarksEnabled)
for (auto &&Target : TargetsForSlot)
@@ -560,9 +904,30 @@ bool DevirtModule::tryUniqueRetValOpt(
return false;
}
+void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
+ Constant *Byte, Constant *Bit) {
+ for (auto Call : CSInfo.CallSites) {
+ auto *RetType = cast<IntegerType>(Call.CS.getType());
+ IRBuilder<> B(Call.CS.getInstruction());
+ Value *Addr = B.CreateGEP(Int8Ty, Call.VTable, Byte);
+ if (RetType->getBitWidth() == 1) {
+ Value *Bits = B.CreateLoad(Addr);
+ Value *BitsAndBit = B.CreateAnd(Bits, Bit);
+ auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
+ Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
+ IsBitSet);
+ } else {
+ Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
+ Value *Val = B.CreateLoad(RetType, ValAddr);
+ Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled, Val);
+ }
+ }
+ CSInfo.markDevirt();
+}
+
bool DevirtModule::tryVirtualConstProp(
- MutableArrayRef<VirtualCallTarget> TargetsForSlot,
- ArrayRef<VirtualCallSite> CallSites) {
+ MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res, VTableSlot Slot) {
// This only works if the function returns an integer.
auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
if (!RetType)
@@ -571,55 +936,38 @@ bool DevirtModule::tryVirtualConstProp(
if (BitWidth > 64)
return false;
- // Make sure that each function does not access memory, takes at least one
- // argument, does not use its first argument (which we assume is 'this'),
- // and has the same return type.
+ // Make sure that each function is defined, does not access memory, takes at
+ // least one argument, does not use its first argument (which we assume is
+ // 'this'), and has the same return type.
+ //
+ // Note that we test whether this copy of the function is readnone, rather
+ // than testing function attributes, which must hold for any copy of the
+ // function, even a less optimized version substituted at link time. This is
+ // sound because the virtual constant propagation optimizations effectively
+ // inline all implementations of the virtual function into each call site,
+ // rather than using function attributes to perform local optimization.
for (VirtualCallTarget &Target : TargetsForSlot) {
- if (!Target.Fn->doesNotAccessMemory() || Target.Fn->arg_empty() ||
- !Target.Fn->arg_begin()->use_empty() ||
+ if (Target.Fn->isDeclaration() ||
+ computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn)) !=
+ MAK_ReadNone ||
+ Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
Target.Fn->getReturnType() != RetType)
return false;
}
- // Group call sites by the list of constant arguments they pass.
- // The comparator ensures deterministic ordering.
- struct ByAPIntValue {
- bool operator()(const std::vector<ConstantInt *> &A,
- const std::vector<ConstantInt *> &B) const {
- return std::lexicographical_compare(
- A.begin(), A.end(), B.begin(), B.end(),
- [](ConstantInt *AI, ConstantInt *BI) {
- return AI->getValue().ult(BI->getValue());
- });
- }
- };
- std::map<std::vector<ConstantInt *>, std::vector<VirtualCallSite>,
- ByAPIntValue>
- VCallSitesByConstantArg;
- for (auto &&VCallSite : CallSites) {
- std::vector<ConstantInt *> Args;
- if (VCallSite.CS.getType() != RetType)
- continue;
- for (auto &&Arg :
- make_range(VCallSite.CS.arg_begin() + 1, VCallSite.CS.arg_end())) {
- if (!isa<ConstantInt>(Arg))
- break;
- Args.push_back(cast<ConstantInt>(&Arg));
- }
- if (Args.size() + 1 != VCallSite.CS.arg_size())
- continue;
-
- VCallSitesByConstantArg[Args].push_back(VCallSite);
- }
-
- for (auto &&CSByConstantArg : VCallSitesByConstantArg) {
+ for (auto &&CSByConstantArg : SlotInfo.ConstCSInfo) {
if (!tryEvaluateFunctionsWithArgs(TargetsForSlot, CSByConstantArg.first))
continue;
- if (tryUniformRetValOpt(RetType, TargetsForSlot, CSByConstantArg.second))
+ WholeProgramDevirtResolution::ByArg *ResByArg = nullptr;
+ if (Res)
+ ResByArg = &Res->ResByArg[CSByConstantArg.first];
+
+ if (tryUniformRetValOpt(TargetsForSlot, CSByConstantArg.second, ResByArg))
continue;
- if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second))
+ if (tryUniqueRetValOpt(BitWidth, TargetsForSlot, CSByConstantArg.second,
+ ResByArg, Slot, CSByConstantArg.first))
continue;
// Find an allocation offset in bits in all vtables associated with the
@@ -659,26 +1007,20 @@ bool DevirtModule::tryVirtualConstProp(
for (auto &&Target : TargetsForSlot)
Target.WasDevirt = true;
- // Rewrite each call to a load from OffsetByte/OffsetBit.
- for (auto Call : CSByConstantArg.second) {
- IRBuilder<> B(Call.CS.getInstruction());
- Value *Addr = B.CreateConstGEP1_64(Call.VTable, OffsetByte);
- if (BitWidth == 1) {
- Value *Bits = B.CreateLoad(Addr);
- Value *Bit = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
- Value *BitsAndBit = B.CreateAnd(Bits, Bit);
- auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
- Call.replaceAndErase("virtual-const-prop-1-bit",
- TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, IsBitSet);
- } else {
- Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
- Value *Val = B.CreateLoad(RetType, ValAddr);
- Call.replaceAndErase("virtual-const-prop",
- TargetsForSlot[0].Fn->getName(),
- RemarksEnabled, Val);
- }
+ Constant *ByteConst = ConstantInt::get(Int32Ty, OffsetByte);
+ Constant *BitConst = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
+
+ if (CSByConstantArg.second.isExported()) {
+ ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp;
+ exportGlobal(Slot, CSByConstantArg.first, "byte",
+ ConstantExpr::getIntToPtr(ByteConst, Int8PtrTy));
+ exportGlobal(Slot, CSByConstantArg.first, "bit",
+ ConstantExpr::getIntToPtr(BitConst, Int8PtrTy));
}
+
+ // Rewrite each call to a load from OffsetByte/OffsetBit.
+ applyVirtualConstProp(CSByConstantArg.second,
+ TargetsForSlot[0].Fn->getName(), ByteConst, BitConst);
}
return true;
}
@@ -733,7 +1075,11 @@ bool DevirtModule::areRemarksEnabled() {
if (FL.empty())
return false;
const Function &Fn = FL.front();
- auto DI = OptimizationRemark(DEBUG_TYPE, Fn, DebugLoc(), "");
+
+ const auto &BBL = Fn.getBasicBlockList();
+ if (BBL.empty())
+ return false;
+ auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBL.front());
return DI.isEnabled();
}
@@ -766,8 +1112,8 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
if (SeenPtrs.insert(Ptr).second) {
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].push_back(
- {CI->getArgOperand(0), Call.CS, nullptr});
+ CallSlots[{TypeId, Call.Offset}].addCallSite(CI->getArgOperand(0),
+ Call.CS, nullptr);
}
}
}
@@ -853,14 +1199,79 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
if (HasNonCallUses)
++NumUnsafeUses;
for (DevirtCallSite Call : DevirtCalls) {
- CallSlots[{TypeId, Call.Offset}].push_back(
- {Ptr, Call.CS, &NumUnsafeUses});
+ CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CS,
+ &NumUnsafeUses);
}
CI->eraseFromParent();
}
}
+void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+ const TypeIdSummary *TidSummary =
+ ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+ if (!TidSummary)
+ return;
+ auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
+ if (ResI == TidSummary->WPDRes.end())
+ return;
+ const WholeProgramDevirtResolution &Res = ResI->second;
+
+ if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+ // The type of the function in the declaration is irrelevant because every
+ // call site will cast it to the correct type.
+ auto *SingleImpl = M.getOrInsertFunction(
+ Res.SingleImplName, Type::getVoidTy(M.getContext()));
+
+ // This is the import phase so we should not be exporting anything.
+ bool IsExported = false;
+ applySingleImplDevirt(SlotInfo, SingleImpl, IsExported);
+ assert(!IsExported);
+ }
+
+ for (auto &CSByConstantArg : SlotInfo.ConstCSInfo) {
+ auto I = Res.ResByArg.find(CSByConstantArg.first);
+ if (I == Res.ResByArg.end())
+ continue;
+ auto &ResByArg = I->second;
+ // FIXME: We should figure out what to do about the "function name" argument
+ // to the apply* functions, as the function names are unavailable during the
+ // importing phase. For now we just pass the empty string. This does not
+ // impact correctness because the function names are just used for remarks.
+ switch (ResByArg.TheKind) {
+ case WholeProgramDevirtResolution::ByArg::UniformRetVal:
+ applyUniformRetValOpt(CSByConstantArg.second, "", ResByArg.Info);
+ break;
+ case WholeProgramDevirtResolution::ByArg::UniqueRetVal: {
+ Constant *UniqueMemberAddr =
+ importGlobal(Slot, CSByConstantArg.first, "unique_member");
+ applyUniqueRetValOpt(CSByConstantArg.second, "", ResByArg.Info,
+ UniqueMemberAddr);
+ break;
+ }
+ case WholeProgramDevirtResolution::ByArg::VirtualConstProp: {
+ Constant *Byte = importGlobal(Slot, CSByConstantArg.first, "byte", 32);
+ Byte = ConstantExpr::getPtrToInt(Byte, Int32Ty);
+ Constant *Bit = importGlobal(Slot, CSByConstantArg.first, "bit", 8);
+ Bit = ConstantExpr::getPtrToInt(Bit, Int8Ty);
+ applyVirtualConstProp(CSByConstantArg.second, "", Byte, Bit);
+ }
+ default:
+ break;
+ }
+ }
+}
+
+void DevirtModule::removeRedundantTypeTests() {
+ auto True = ConstantInt::getTrue(M.getContext());
+ for (auto &&U : NumUnsafeUsesForTypeTest) {
+ if (U.second == 0) {
+ U.first->replaceAllUsesWith(True);
+ U.first->eraseFromParent();
+ }
+ }
+}
+
bool DevirtModule::run() {
Function *TypeTestFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
@@ -868,7 +1279,11 @@ bool DevirtModule::run() {
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
- if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
+ // Normally if there are no users of the devirtualization intrinsics in the
+ // module, this pass has nothing to do. But if we are exporting, we also need
+ // to handle any users that appear only in the function summaries.
+ if (!ExportSummary &&
+ (!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
AssumeFunc->use_empty()) &&
(!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
return false;
@@ -879,6 +1294,17 @@ bool DevirtModule::run() {
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
+ if (ImportSummary) {
+ for (auto &S : CallSlots)
+ importResolution(S.first, S.second);
+
+ removeRedundantTypeTests();
+
+ // The rest of the code is only necessary when exporting or during regular
+ // LTO, so we are done.
+ return true;
+ }
+
// Rebuild type metadata into a map for easy lookup.
std::vector<VTableBits> Bits;
DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
@@ -886,6 +1312,53 @@ bool DevirtModule::run() {
if (TypeIdMap.empty())
return true;
+ // Collect information from summary about which calls to try to devirtualize.
+ if (ExportSummary) {
+ DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
+ for (auto &P : TypeIdMap) {
+ if (auto *TypeId = dyn_cast<MDString>(P.first))
+ MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
+ TypeId);
+ }
+
+ for (auto &P : *ExportSummary) {
+ for (auto &S : P.second) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VF.GUID]) {
+ CallSlots[{MD, VF.Offset}].CSInfo.SummaryHasTypeTestAssumeUsers =
+ true;
+ }
+ }
+ for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VF.GUID]) {
+ CallSlots[{MD, VF.Offset}]
+ .CSInfo.SummaryTypeCheckedLoadUsers.push_back(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_test_assume_const_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
+ CallSlots[{MD, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .SummaryHasTypeTestAssumeUsers = true;
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_checked_load_const_vcalls()) {
+ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) {
+ CallSlots[{MD, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .SummaryTypeCheckedLoadUsers.push_back(FS);
+ }
+ }
+ }
+ }
+ }
+
// For each (type, offset) pair:
bool DidVirtualConstProp = false;
std::map<std::string, Function*> DevirtTargets;
@@ -894,19 +1367,39 @@ bool DevirtModule::run() {
// function implementation at offset S.first.ByteOffset, and add to
// TargetsForSlot.
std::vector<VirtualCallTarget> TargetsForSlot;
- if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
- S.first.ByteOffset))
- continue;
-
- if (!trySingleImplDevirt(TargetsForSlot, S.second) &&
- tryVirtualConstProp(TargetsForSlot, S.second))
+ if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
+ S.first.ByteOffset)) {
+ WholeProgramDevirtResolution *Res = nullptr;
+ if (ExportSummary && isa<MDString>(S.first.TypeID))
+ Res = &ExportSummary
+ ->getOrInsertTypeIdSummary(
+ cast<MDString>(S.first.TypeID)->getString())
+ .WPDRes[S.first.ByteOffset];
+
+ if (!trySingleImplDevirt(TargetsForSlot, S.second, Res) &&
+ tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first))
DidVirtualConstProp = true;
- // Collect functions devirtualized at least for one call site for stats.
- if (RemarksEnabled)
- for (const auto &T : TargetsForSlot)
- if (T.WasDevirt)
- DevirtTargets[T.Fn->getName()] = T.Fn;
+ // Collect functions devirtualized at least for one call site for stats.
+ if (RemarksEnabled)
+ for (const auto &T : TargetsForSlot)
+ if (T.WasDevirt)
+ DevirtTargets[T.Fn->getName()] = T.Fn;
+ }
+
+ // CFI-specific: if we are exporting and any llvm.type.checked.load
+ // intrinsics were *not* devirtualized, we need to add the resulting
+ // llvm.type.test intrinsics to the function summaries so that the
+ // LowerTypeTests pass will export them.
+ if (ExportSummary && isa<MDString>(S.first.TypeID)) {
+ auto GUID =
+ GlobalValue::getGUID(cast<MDString>(S.first.TypeID)->getString());
+ for (auto FS : S.second.CSInfo.SummaryTypeCheckedLoadUsers)
+ FS->addTypeTest(GUID);
+ for (auto &CCS : S.second.ConstCSInfo)
+ for (auto FS : CCS.second.SummaryTypeCheckedLoadUsers)
+ FS->addTypeTest(GUID);
+ }
}
if (RemarksEnabled) {
@@ -914,23 +1407,12 @@ bool DevirtModule::run() {
for (const auto &DT : DevirtTargets) {
Function *F = DT.second;
DISubprogram *SP = F->getSubprogram();
- DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc();
- emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL,
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, SP,
Twine("devirtualized ") + F->getName());
}
}
- // If we were able to eliminate all unsafe uses for a type checked load,
- // eliminate the type test by replacing it with true.
- if (TypeCheckedLoadFunc) {
- auto True = ConstantInt::getTrue(M.getContext());
- for (auto &&U : NumUnsafeUsesForTypeTest) {
- if (U.second == 0) {
- U.first->replaceAllUsesWith(True);
- U.first->eraseFromParent();
- }
- }
- }
+ removeRedundantTypeTests();
// Rebuild each global we touched as part of virtual constant propagation to
// include the before and after bytes.
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 2d34c1cc74bd..174ec8036274 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -902,7 +902,7 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS,
APInt RHSKnownOne(BitWidth, 0);
computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
- // Addition of two 2's compliment numbers having opposite signs will never
+ // Addition of two 2's complement numbers having opposite signs will never
// overflow.
if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) ||
(LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1]))
@@ -939,7 +939,7 @@ bool InstCombiner::WillNotOverflowSignedSub(Value *LHS, Value *RHS,
APInt RHSKnownOne(BitWidth, 0);
computeKnownBits(RHS, RHSKnownZero, RHSKnownOne, 0, &CxtI);
- // Subtraction of two 2's compliment numbers having identical signs will
+ // Subtraction of two 2's complement numbers having identical signs will
// never overflow.
if ((LHSKnownOne[BitWidth - 1] && RHSKnownOne[BitWidth - 1]) ||
(LHSKnownZero[BitWidth - 1] && RHSKnownZero[BitWidth - 1]))
@@ -1042,43 +1042,42 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
- const APInt *Val;
- if (match(RHS, m_APInt(Val))) {
- // X + (signbit) --> X ^ signbit
- if (Val->isSignBit())
+ const APInt *RHSC;
+ if (match(RHS, m_APInt(RHSC))) {
+ if (RHSC->isSignBit()) {
+ // If wrapping is not allowed, then the addition must set the sign bit:
+ // X + (signbit) --> X | signbit
+ if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap())
+ return BinaryOperator::CreateOr(LHS, RHS);
+
+ // If wrapping is allowed, then the addition flips the sign bit of LHS:
+ // X + (signbit) --> X ^ signbit
return BinaryOperator::CreateXor(LHS, RHS);
+ }
// Is this add the last step in a convoluted sext?
Value *X;
const APInt *C;
if (match(LHS, m_ZExt(m_Xor(m_Value(X), m_APInt(C)))) &&
C->isMinSignedValue() &&
- C->sext(LHS->getType()->getScalarSizeInBits()) == *Val) {
+ C->sext(LHS->getType()->getScalarSizeInBits()) == *RHSC) {
// add(zext(xor i16 X, -32768), -32768) --> sext X
return CastInst::Create(Instruction::SExt, X, LHS->getType());
}
- if (Val->isNegative() &&
+ if (RHSC->isNegative() &&
match(LHS, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C)))) &&
- Val->sge(-C->sext(Val->getBitWidth()))) {
+ RHSC->sge(-C->sext(RHSC->getBitWidth()))) {
// (add (zext (add nuw X, C)), Val) -> (zext (add nuw X, C+Val))
- return CastInst::Create(
- Instruction::ZExt,
- Builder->CreateNUWAdd(
- X, Constant::getIntegerValue(X->getType(),
- *C + Val->trunc(C->getBitWidth()))),
- I.getType());
+ Constant *NewC =
+ ConstantInt::get(X->getType(), *C + RHSC->trunc(C->getBitWidth()));
+ return new ZExtInst(Builder->CreateNUWAdd(X, NewC), I.getType());
}
}
// FIXME: Use the match above instead of dyn_cast to allow these transforms
// for splat vectors.
if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- // See if SimplifyDemandedBits can simplify this. This handles stuff like
- // (X & 254)+1 -> (X&254)|1
- if (SimplifyDemandedInstructionBits(I))
- return &I;
-
// zext(bool) + C -> bool ? C + 1 : C
if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
if (ZI->getSrcTy()->isIntegerTy(1))
@@ -1129,8 +1128,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- if (isa<Constant>(RHS) && isa<PHINode>(LHS))
- if (Instruction *NV = FoldOpIntoPhi(I))
+ if (isa<Constant>(RHS))
+ if (Instruction *NV = foldOpWithConstantIntoOperand(I))
return NV;
if (I.getType()->getScalarType()->isIntegerTy(1))
@@ -1201,11 +1200,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateAnd(NewAdd, C2);
}
}
-
- // Try to fold constant add into select arguments.
- if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
- if (Instruction *R = FoldOpIntoSelect(I, SI))
- return R;
}
// add (select X 0 (sub n A)) A --> select X A n
@@ -1253,7 +1247,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (add (sext x), (sext y)) --> (sext (add int x, y))
if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
- // Only do this if x/y have the same type, if at last one of them has a
+ // Only do this if x/y have the same type, if at least one of them has a
// single use (so we don't increase the number of sexts), and if the
// integer add will not overflow.
if (LHSConv->getOperand(0)->getType() ==
@@ -1290,7 +1284,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (add (zext x), (zext y)) --> (zext (add int x, y))
if (auto *RHSConv = dyn_cast<ZExtInst>(RHS)) {
- // Only do this if x/y have the same type, if at last one of them has a
+ // Only do this if x/y have the same type, if at least one of them has a
// single use (so we don't increase the number of zexts), and if the
// integer add will not overflow.
if (LHSConv->getOperand(0)->getType() ==
@@ -1311,13 +1305,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
{
Value *A = nullptr, *B = nullptr;
if (match(RHS, m_Xor(m_Value(A), m_Value(B))) &&
- (match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
- match(LHS, m_And(m_Specific(B), m_Specific(A)))))
+ match(LHS, m_c_And(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateOr(A, B);
if (match(LHS, m_Xor(m_Value(A), m_Value(B))) &&
- (match(RHS, m_And(m_Specific(A), m_Specific(B))) ||
- match(RHS, m_And(m_Specific(B), m_Specific(A)))))
+ match(RHS, m_c_And(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateOr(A, B);
}
@@ -1325,8 +1317,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
{
Value *A = nullptr, *B = nullptr;
if (match(RHS, m_Or(m_Value(A), m_Value(B))) &&
- (match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
- match(LHS, m_And(m_Specific(B), m_Specific(A))))) {
+ match(LHS, m_c_And(m_Specific(A), m_Specific(B)))) {
auto *New = BinaryOperator::CreateAdd(A, B);
New->setHasNoSignedWrap(I.hasNoSignedWrap());
New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
@@ -1334,8 +1325,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
if (match(LHS, m_Or(m_Value(A), m_Value(B))) &&
- (match(RHS, m_And(m_Specific(A), m_Specific(B))) ||
- match(RHS, m_And(m_Specific(B), m_Specific(A))))) {
+ match(RHS, m_c_And(m_Specific(A), m_Specific(B)))) {
auto *New = BinaryOperator::CreateAdd(A, B);
New->setHasNoSignedWrap(I.hasNoSignedWrap());
New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
@@ -1394,6 +1384,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// Check for (fadd double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+ Value *LHSIntVal = LHSConv->getOperand(0);
+
// (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
// ... if the constant fits in the integer value. This is useful for things
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
@@ -1401,12 +1393,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// instcombined.
if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
Constant *CI =
- ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+ ConstantExpr::getFPToSI(CFP, LHSIntVal->getType());
if (LHSConv->hasOneUse() &&
ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
+ WillNotOverflowSignedAdd(LHSIntVal, CI, I)) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+ Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
CI, "addconv");
return new SIToFPInst(NewAdd, I.getType());
}
@@ -1414,17 +1406,17 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
// (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
- // Only do this if x/y have the same type, if at last one of them has a
+ Value *RHSIntVal = RHSConv->getOperand(0);
+
+ // Only do this if x/y have the same type, if at least one of them has a
// single use (so we don't increase the number of int->fp conversions),
// and if the integer add will not overflow.
- if (LHSConv->getOperand(0)->getType() ==
- RHSConv->getOperand(0)->getType() &&
+ if (LHSIntVal->getType() == RHSIntVal->getType() &&
(LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
- WillNotOverflowSignedAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0), I)) {
+ WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) {
// Insert the new integer add.
- Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
- RHSConv->getOperand(0),"addconv");
+ Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal,
+ RHSIntVal, "addconv");
return new SIToFPInst(NewAdd, I.getType());
}
}
@@ -1562,7 +1554,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return Res;
}
- if (I.getType()->isIntegerTy(1))
+ if (I.getType()->getScalarType()->isIntegerTy(1))
return BinaryOperator::CreateXor(Op0, Op1);
// Replace (-1 - A) with (~A).
@@ -1580,14 +1572,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
+ // Try to fold constant sub into PHI values.
+ if (PHINode *PN = dyn_cast<PHINode>(Op1))
+ if (Instruction *R = foldOpIntoPhi(I, PN))
+ return R;
+
// C-(X+C2) --> (C-C2)-X
Constant *C2;
if (match(Op1, m_Add(m_Value(X), m_Constant(C2))))
return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
- if (SimplifyDemandedInstructionBits(I))
- return &I;
-
// Fold (sub 0, (zext bool to B)) --> (sext bool to B)
if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X))))
if (X->getType()->getScalarType()->isIntegerTy(1))
@@ -1622,11 +1616,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
- if ((*Op0C + 1).isPowerOf2()) {
- APInt KnownZero(BitWidth, 0);
- APInt KnownOne(BitWidth, 0);
- computeKnownBits(&I, KnownZero, KnownOne, 0, &I);
- if ((*Op0C | KnownZero).isAllOnesValue())
+ if (Op0C->isMask()) {
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(Op1, RHSKnownZero, RHSKnownOne, 0, &I);
+ if ((*Op0C | RHSKnownZero).isAllOnesValue())
return BinaryOperator::CreateXor(Op1, Op0);
}
}
@@ -1634,8 +1628,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
{
Value *Y;
// X-(X+Y) == -Y X-(Y+X) == -Y
- if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
- match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+ if (match(Op1, m_c_Add(m_Specific(Op0), m_Value(Y))))
return BinaryOperator::CreateNeg(Y);
// (X-Y)-X == -Y
@@ -1645,18 +1638,16 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// (sub (or A, B) (xor A, B)) --> (and A, B)
{
- Value *A = nullptr, *B = nullptr;
+ Value *A, *B;
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
- (match(Op0, m_Or(m_Specific(A), m_Specific(B))) ||
- match(Op0, m_Or(m_Specific(B), m_Specific(A)))))
+ match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateAnd(A, B);
}
- if (Op0->hasOneUse()) {
- Value *Y = nullptr;
+ {
+ Value *Y;
// ((X | Y) - X) --> (~X & Y)
- if (match(Op0, m_Or(m_Value(Y), m_Specific(Op1))) ||
- match(Op0, m_Or(m_Specific(Op1), m_Value(Y))))
+ if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1)))))
return BinaryOperator::CreateAnd(
Y, Builder->CreateNot(Op1, Op1->getName() + ".not"));
}
@@ -1664,7 +1655,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Op1->hasOneUse()) {
Value *X = nullptr, *Y = nullptr, *Z = nullptr;
Constant *C = nullptr;
- Constant *CI = nullptr;
// (X - (Y - Z)) --> (X + (Z - Y)).
if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
@@ -1673,8 +1663,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// (X - (X & Y)) --> (X & ~Y)
//
- if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
- match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+ if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0))))
return BinaryOperator::CreateAnd(Op0,
Builder->CreateNot(Y, Y->getName() + ".not"));
@@ -1702,14 +1691,14 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// X - A*-B -> X + A*B
// X - -A*B -> X + A*B
Value *A, *B;
- if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
- match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+ Constant *CI;
+ if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B)))))
return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
// X - A*CI -> X + A*-CI
- // X - CI*A -> X + A*-CI
- if (match(Op1, m_Mul(m_Value(A), m_Constant(CI))) ||
- match(Op1, m_Mul(m_Constant(CI), m_Value(A)))) {
+ // No need to handle commuted multiply because multiply handling will
+ // ensure constant will be move to the right hand side.
+ if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) {
Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
return BinaryOperator::CreateAdd(Op0, NewMul);
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index da5384a86aac..b2a41c699202 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -137,9 +137,8 @@ Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
}
/// This handles expressions of the form ((val OP C1) & C2). Where
-/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is
-/// guaranteed to be a binary operator.
-Instruction *InstCombiner::OptAndOp(Instruction *Op,
+/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.
+Instruction *InstCombiner::OptAndOp(BinaryOperator *Op,
ConstantInt *OpRHS,
ConstantInt *AndRHS,
BinaryOperator &TheAnd) {
@@ -149,6 +148,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
Together = ConstantExpr::getAnd(AndRHS, OpRHS);
switch (Op->getOpcode()) {
+ default: break;
case Instruction::Xor:
if (Op->hasOneUse()) {
// (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
@@ -159,13 +159,6 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
break;
case Instruction::Or:
if (Op->hasOneUse()){
- if (Together != OpRHS) {
- // (X | C1) & C2 --> (X | (C1&C2)) & C2
- Value *Or = Builder->CreateOr(X, Together);
- Or->takeName(Op);
- return BinaryOperator::CreateAnd(Or, AndRHS);
- }
-
ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
if (TogetherCI && !TogetherCI->isZero()){
// (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
@@ -302,178 +295,91 @@ Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo);
}
-/// Returns true iff Val consists of one contiguous run of 1s with any number
-/// of 0s on either side. The 1s are allowed to wrap from LSB to MSB,
-/// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is
-/// not, since all 1s are not contiguous.
-static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
- const APInt& V = Val->getValue();
- uint32_t BitWidth = Val->getType()->getBitWidth();
- if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
-
- // look for the first zero bit after the run of ones
- MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
- // look for the first non-zero bit
- ME = V.getActiveBits();
- return true;
-}
-
-/// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines
-/// whether the operator is a sub. If we can fold one of the following xforms:
+/// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns
+/// that can be simplified.
+/// One of A and B is considered the mask. The other is the value. This is
+/// described as the "AMask" or "BMask" part of the enum. If the enum contains
+/// only "Mask", then both A and B can be considered masks. If A is the mask,
+/// then it was proven that (A & C) == C. This is trivial if C == A or C == 0.
+/// If both A and C are constants, this proof is also easy.
+/// For the following explanations, we assume that A is the mask.
///
-/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
-/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
-/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// "AllOnes" declares that the comparison is true only if (A & B) == A or all
+/// bits of A are set in B.
+/// Example: (icmp eq (A & 3), 3) -> AMask_AllOnes
///
-/// return (A +/- B).
+/// "AllZeros" declares that the comparison is true only if (A & B) == 0 or all
+/// bits of A are cleared in B.
+/// Example: (icmp eq (A & 3), 0) -> Mask_AllZeroes
+///
+/// "Mixed" declares that (A & B) == C and C might or might not contain any
+/// number of one bits and zero bits.
+/// Example: (icmp eq (A & 3), 1) -> AMask_Mixed
+///
+/// "Not" means that in above descriptions "==" should be replaced by "!=".
+/// Example: (icmp ne (A & 3), 3) -> AMask_NotAllOnes
///
-Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
- ConstantInt *Mask, bool isSub,
- Instruction &I) {
- Instruction *LHSI = dyn_cast<Instruction>(LHS);
- if (!LHSI || LHSI->getNumOperands() != 2 ||
- !isa<ConstantInt>(LHSI->getOperand(1))) return nullptr;
-
- ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
-
- switch (LHSI->getOpcode()) {
- default: return nullptr;
- case Instruction::And:
- if (ConstantExpr::getAnd(N, Mask) == Mask) {
- // If the AndRHS is a power of two minus one (0+1+), this is simple.
- if ((Mask->getValue().countLeadingZeros() +
- Mask->getValue().countPopulation()) ==
- Mask->getValue().getBitWidth())
- break;
-
- // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
- // part, we don't need any explicit masks to take them out of A. If that
- // is all N is, ignore it.
- uint32_t MB = 0, ME = 0;
- if (isRunOfOnes(Mask, MB, ME)) { // begin/end bit of run, inclusive
- uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
- APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
- if (MaskedValueIsZero(RHS, Mask, 0, &I))
- break;
- }
- }
- return nullptr;
- case Instruction::Or:
- case Instruction::Xor:
- // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
- if ((Mask->getValue().countLeadingZeros() +
- Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
- && ConstantExpr::getAnd(N, Mask)->isNullValue())
- break;
- return nullptr;
- }
-
- if (isSub)
- return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
- return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
-}
-
-/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
-/// One of A and B is considered the mask, the other the value. This is
-/// described as the "AMask" or "BMask" part of the enum. If the enum
-/// contains only "Mask", then both A and B can be considered masks.
-/// If A is the mask, then it was proven, that (A & C) == C. This
-/// is trivial if C == A, or C == 0. If both A and C are constants, this
-/// proof is also easy.
-/// For the following explanations we assume that A is the mask.
-/// The part "AllOnes" declares, that the comparison is true only
-/// if (A & B) == A, or all bits of A are set in B.
-/// Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
-/// The part "AllZeroes" declares, that the comparison is true only
-/// if (A & B) == 0, or all bits of A are cleared in B.
-/// Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
-/// The part "Mixed" declares, that (A & B) == C and C might or might not
-/// contain any number of one bits and zero bits.
-/// Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
-/// The Part "Not" means, that in above descriptions "==" should be replaced
-/// by "!=".
-/// Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
/// If the mask A contains a single bit, then the following is equivalent:
/// (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
/// (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
enum MaskedICmpType {
- FoldMskICmp_AMask_AllOnes = 1,
- FoldMskICmp_AMask_NotAllOnes = 2,
- FoldMskICmp_BMask_AllOnes = 4,
- FoldMskICmp_BMask_NotAllOnes = 8,
- FoldMskICmp_Mask_AllZeroes = 16,
- FoldMskICmp_Mask_NotAllZeroes = 32,
- FoldMskICmp_AMask_Mixed = 64,
- FoldMskICmp_AMask_NotMixed = 128,
- FoldMskICmp_BMask_Mixed = 256,
- FoldMskICmp_BMask_NotMixed = 512
+ AMask_AllOnes = 1,
+ AMask_NotAllOnes = 2,
+ BMask_AllOnes = 4,
+ BMask_NotAllOnes = 8,
+ Mask_AllZeros = 16,
+ Mask_NotAllZeros = 32,
+ AMask_Mixed = 64,
+ AMask_NotMixed = 128,
+ BMask_Mixed = 256,
+ BMask_NotMixed = 512
};
-/// Return the set of pattern classes (from MaskedICmpType)
-/// that (icmp SCC (A & B), C) satisfies.
-static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
- ICmpInst::Predicate SCC)
-{
+/// Return the set of patterns (from MaskedICmpType) that (icmp SCC (A & B), C)
+/// satisfies.
+static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
+ ICmpInst::Predicate Pred) {
ConstantInt *ACst = dyn_cast<ConstantInt>(A);
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
- bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
- bool icmp_abit = (ACst && !ACst->isZero() &&
- ACst->getValue().isPowerOf2());
- bool icmp_bbit = (BCst && !BCst->isZero() &&
- BCst->getValue().isPowerOf2());
- unsigned result = 0;
+ bool IsEq = (Pred == ICmpInst::ICMP_EQ);
+ bool IsAPow2 = (ACst && !ACst->isZero() && ACst->getValue().isPowerOf2());
+ bool IsBPow2 = (BCst && !BCst->isZero() && BCst->getValue().isPowerOf2());
+ unsigned MaskVal = 0;
if (CCst && CCst->isZero()) {
// if C is zero, then both A and B qualify as mask
- result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
- FoldMskICmp_AMask_Mixed |
- FoldMskICmp_BMask_Mixed)
- : (FoldMskICmp_Mask_NotAllZeroes |
- FoldMskICmp_AMask_NotMixed |
- FoldMskICmp_BMask_NotMixed));
- if (icmp_abit)
- result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
- FoldMskICmp_AMask_NotMixed)
- : (FoldMskICmp_AMask_AllOnes |
- FoldMskICmp_AMask_Mixed));
- if (icmp_bbit)
- result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
- FoldMskICmp_BMask_NotMixed)
- : (FoldMskICmp_BMask_AllOnes |
- FoldMskICmp_BMask_Mixed));
- return result;
+ MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed)
+ : (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed));
+ if (IsAPow2)
+ MaskVal |= (IsEq ? (AMask_NotAllOnes | AMask_NotMixed)
+ : (AMask_AllOnes | AMask_Mixed));
+ if (IsBPow2)
+ MaskVal |= (IsEq ? (BMask_NotAllOnes | BMask_NotMixed)
+ : (BMask_AllOnes | BMask_Mixed));
+ return MaskVal;
}
+
if (A == C) {
- result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
- FoldMskICmp_AMask_Mixed)
- : (FoldMskICmp_AMask_NotAllOnes |
- FoldMskICmp_AMask_NotMixed));
- if (icmp_abit)
- result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
- FoldMskICmp_AMask_NotMixed)
- : (FoldMskICmp_Mask_AllZeroes |
- FoldMskICmp_AMask_Mixed));
- } else if (ACst && CCst &&
- ConstantExpr::getAnd(ACst, CCst) == CCst) {
- result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
- : FoldMskICmp_AMask_NotMixed);
+ MaskVal |= (IsEq ? (AMask_AllOnes | AMask_Mixed)
+ : (AMask_NotAllOnes | AMask_NotMixed));
+ if (IsAPow2)
+ MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed)
+ : (Mask_AllZeros | AMask_Mixed));
+ } else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed);
}
+
if (B == C) {
- result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
- FoldMskICmp_BMask_Mixed)
- : (FoldMskICmp_BMask_NotAllOnes |
- FoldMskICmp_BMask_NotMixed));
- if (icmp_bbit)
- result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
- FoldMskICmp_BMask_NotMixed)
- : (FoldMskICmp_Mask_AllZeroes |
- FoldMskICmp_BMask_Mixed));
- } else if (BCst && CCst &&
- ConstantExpr::getAnd(BCst, CCst) == CCst) {
- result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
- : FoldMskICmp_BMask_NotMixed);
- }
- return result;
+ MaskVal |= (IsEq ? (BMask_AllOnes | BMask_Mixed)
+ : (BMask_NotAllOnes | BMask_NotMixed));
+ if (IsBPow2)
+ MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed)
+ : (Mask_AllZeros | BMask_Mixed));
+ } else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed);
+ }
+
+ return MaskVal;
}
/// Convert an analysis of a masked ICmp into its equivalent if all boolean
@@ -482,32 +388,30 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
/// involves swapping those bits over.
static unsigned conjugateICmpMask(unsigned Mask) {
unsigned NewMask;
- NewMask = (Mask & (FoldMskICmp_AMask_AllOnes | FoldMskICmp_BMask_AllOnes |
- FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed |
- FoldMskICmp_BMask_Mixed))
+ NewMask = (Mask & (AMask_AllOnes | BMask_AllOnes | Mask_AllZeros |
+ AMask_Mixed | BMask_Mixed))
<< 1;
- NewMask |=
- (Mask & (FoldMskICmp_AMask_NotAllOnes | FoldMskICmp_BMask_NotAllOnes |
- FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_AMask_NotMixed |
- FoldMskICmp_BMask_NotMixed))
- >> 1;
+ NewMask |= (Mask & (AMask_NotAllOnes | BMask_NotAllOnes | Mask_NotAllZeros |
+ AMask_NotMixed | BMask_NotMixed))
+ >> 1;
return NewMask;
}
-/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
-/// Return the set of pattern classes (from MaskedICmpType)
-/// that both LHS and RHS satisfy.
-static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
- Value*& B, Value*& C,
- Value*& D, Value*& E,
- ICmpInst *LHS, ICmpInst *RHS,
- ICmpInst::Predicate &LHSCC,
- ICmpInst::Predicate &RHSCC) {
- if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E).
+/// Return the set of pattern classes (from MaskedICmpType) that both LHS and
+/// RHS satisfy.
+static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
+ Value *&D, Value *&E, ICmpInst *LHS,
+ ICmpInst *RHS,
+ ICmpInst::Predicate &PredL,
+ ICmpInst::Predicate &PredR) {
+ if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
+ return 0;
// vectors are not (yet?) supported
- if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+ if (LHS->getOperand(0)->getType()->isVectorTy())
+ return 0;
// Here comes the tricky part:
// LHS might be of the form L11 & L12 == X, X == L21 & L22,
@@ -517,9 +421,9 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
// above.
Value *L1 = LHS->getOperand(0);
Value *L2 = LHS->getOperand(1);
- Value *L11,*L12,*L21,*L22;
+ Value *L11, *L12, *L21, *L22;
// Check whether the icmp can be decomposed into a bit test.
- if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) {
+ if (decomposeBitTestICmp(LHS, PredL, L11, L12, L2)) {
L21 = L22 = L1 = nullptr;
} else {
// Look for ANDs in the LHS icmp.
@@ -543,22 +447,26 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
}
// Bail if LHS was a icmp that can't be decomposed into an equality.
- if (!ICmpInst::isEquality(LHSCC))
+ if (!ICmpInst::isEquality(PredL))
return 0;
Value *R1 = RHS->getOperand(0);
Value *R2 = RHS->getOperand(1);
- Value *R11,*R12;
- bool ok = false;
- if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) {
+ Value *R11, *R12;
+ bool Ok = false;
+ if (decomposeBitTestICmp(RHS, PredR, R11, R12, R2)) {
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
- A = R11; D = R12;
+ A = R11;
+ D = R12;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
- A = R12; D = R11;
+ A = R12;
+ D = R11;
} else {
return 0;
}
- E = R2; R1 = nullptr; ok = true;
+ E = R2;
+ R1 = nullptr;
+ Ok = true;
} else if (R1->getType()->isIntegerTy()) {
if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
// As before, model no mask as a trivial mask if it'll let us do an
@@ -568,46 +476,62 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
}
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
- A = R11; D = R12; E = R2; ok = true;
+ A = R11;
+ D = R12;
+ E = R2;
+ Ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
- A = R12; D = R11; E = R2; ok = true;
+ A = R12;
+ D = R11;
+ E = R2;
+ Ok = true;
}
}
// Bail if RHS was a icmp that can't be decomposed into an equality.
- if (!ICmpInst::isEquality(RHSCC))
+ if (!ICmpInst::isEquality(PredR))
return 0;
// Look for ANDs on the right side of the RHS icmp.
- if (!ok && R2->getType()->isIntegerTy()) {
+ if (!Ok && R2->getType()->isIntegerTy()) {
if (!match(R2, m_And(m_Value(R11), m_Value(R12)))) {
R11 = R2;
R12 = Constant::getAllOnesValue(R2->getType());
}
if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
- A = R11; D = R12; E = R1; ok = true;
+ A = R11;
+ D = R12;
+ E = R1;
+ Ok = true;
} else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
- A = R12; D = R11; E = R1; ok = true;
+ A = R12;
+ D = R11;
+ E = R1;
+ Ok = true;
} else {
return 0;
}
}
- if (!ok)
+ if (!Ok)
return 0;
if (L11 == A) {
- B = L12; C = L2;
+ B = L12;
+ C = L2;
} else if (L12 == A) {
- B = L11; C = L2;
+ B = L11;
+ C = L2;
} else if (L21 == A) {
- B = L22; C = L1;
+ B = L22;
+ C = L1;
} else if (L22 == A) {
- B = L21; C = L1;
+ B = L21;
+ C = L1;
}
- unsigned LeftType = getTypeOfMaskedICmp(A, B, C, LHSCC);
- unsigned RightType = getTypeOfMaskedICmp(A, D, E, RHSCC);
+ unsigned LeftType = getMaskedICmpType(A, B, C, PredL);
+ unsigned RightType = getMaskedICmpType(A, D, E, PredR);
return LeftType & RightType;
}
@@ -616,12 +540,14 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy *Builder) {
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
- ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
- unsigned Mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
- LHSCC, RHSCC);
- if (Mask == 0) return nullptr;
- assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
- "foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
+ ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
+ unsigned Mask =
+ getMaskedTypeForICmpPair(A, B, C, D, E, LHS, RHS, PredL, PredR);
+ if (Mask == 0)
+ return nullptr;
+
+ assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
+ "Expected equality predicates for masked type of icmps.");
// In full generality:
// (icmp (A & B) Op C) | (icmp (A & D) Op E)
@@ -642,7 +568,7 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
Mask = conjugateICmpMask(Mask);
}
- if (Mask & FoldMskICmp_Mask_AllZeroes) {
+ if (Mask & Mask_AllZeros) {
// (icmp eq (A & B), 0) & (icmp eq (A & D), 0)
// -> (icmp eq (A & (B|D)), 0)
Value *NewOr = Builder->CreateOr(B, D);
@@ -653,14 +579,14 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
Value *Zero = Constant::getNullValue(A->getType());
return Builder->CreateICmp(NewCC, NewAnd, Zero);
}
- if (Mask & FoldMskICmp_BMask_AllOnes) {
+ if (Mask & BMask_AllOnes) {
// (icmp eq (A & B), B) & (icmp eq (A & D), D)
// -> (icmp eq (A & (B|D)), (B|D))
Value *NewOr = Builder->CreateOr(B, D);
Value *NewAnd = Builder->CreateAnd(A, NewOr);
return Builder->CreateICmp(NewCC, NewAnd, NewOr);
}
- if (Mask & FoldMskICmp_AMask_AllOnes) {
+ if (Mask & AMask_AllOnes) {
// (icmp eq (A & B), A) & (icmp eq (A & D), A)
// -> (icmp eq (A & (B&D)), A)
Value *NewAnd1 = Builder->CreateAnd(B, D);
@@ -672,11 +598,13 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// their actual values. This isn't strictly necessary, just a "handle the
// easy cases for now" decision.
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (!BCst) return nullptr;
+ if (!BCst)
+ return nullptr;
ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (!DCst) return nullptr;
+ if (!DCst)
+ return nullptr;
- if (Mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
+ if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {
// (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
// (icmp ne (A & B), B) & (icmp ne (A & D), D)
// -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
@@ -689,7 +617,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
else if (NewMask == DCst->getValue())
return RHS;
}
- if (Mask & FoldMskICmp_AMask_NotAllOnes) {
+
+ if (Mask & AMask_NotAllOnes) {
// (icmp ne (A & B), B) & (icmp ne (A & D), D)
// -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
// Only valid if one of the masks is a superset of the other (check "B|D" is
@@ -701,7 +630,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
else if (NewMask == DCst->getValue())
return RHS;
}
- if (Mask & FoldMskICmp_BMask_Mixed) {
+
+ if (Mask & BMask_Mixed) {
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
// We already know that B & C == C && D & E == E.
// If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
@@ -713,23 +643,28 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set.
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
- if (!CCst) return nullptr;
+ if (!CCst)
+ return nullptr;
ConstantInt *ECst = dyn_cast<ConstantInt>(E);
- if (!ECst) return nullptr;
- if (LHSCC != NewCC)
+ if (!ECst)
+ return nullptr;
+ if (PredL != NewCC)
CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
- if (RHSCC != NewCC)
+ if (PredR != NewCC)
ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
+
// If there is a conflict, we should actually return a false for the
// whole construct.
if (((BCst->getValue() & DCst->getValue()) &
(CCst->getValue() ^ ECst->getValue())) != 0)
return ConstantInt::get(LHS->getType(), !IsAnd);
+
Value *NewOr1 = Builder->CreateOr(B, D);
Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
Value *NewAnd = Builder->CreateAnd(A, NewOr1);
return Builder->CreateICmp(NewCC, NewAnd, NewOr2);
}
+
return nullptr;
}
@@ -789,12 +724,67 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
return Builder->CreateICmp(NewPred, Input, RangeEnd);
}
+static Value *
+foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
+ bool JoinedByAnd,
+ InstCombiner::BuilderTy *Builder) {
+ Value *X = LHS->getOperand(0);
+ if (X != RHS->getOperand(0))
+ return nullptr;
+
+ const APInt *C1, *C2;
+ if (!match(LHS->getOperand(1), m_APInt(C1)) ||
+ !match(RHS->getOperand(1), m_APInt(C2)))
+ return nullptr;
+
+ // We only handle (X != C1 && X != C2) and (X == C1 || X == C2).
+ ICmpInst::Predicate Pred = LHS->getPredicate();
+ if (Pred != RHS->getPredicate())
+ return nullptr;
+ if (JoinedByAnd && Pred != ICmpInst::ICMP_NE)
+ return nullptr;
+ if (!JoinedByAnd && Pred != ICmpInst::ICMP_EQ)
+ return nullptr;
+
+ // The larger unsigned constant goes on the right.
+ if (C1->ugt(*C2))
+ std::swap(C1, C2);
+
+ APInt Xor = *C1 ^ *C2;
+ if (Xor.isPowerOf2()) {
+ // If LHSC and RHSC differ by only one bit, then set that bit in X and
+ // compare against the larger constant:
+ // (X == C1 || X == C2) --> (X | (C1 ^ C2)) == C2
+ // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2
+ // We choose an 'or' with a Pow2 constant rather than the inverse mask with
+ // 'and' because that may lead to smaller codegen from a smaller constant.
+ Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor));
+ return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
+ }
+
+ // Special case: get the ordering right when the values wrap around zero.
+ // Ie, we assumed the constants were unsigned when swapping earlier.
+ if (*C1 == 0 && C2->isAllOnesValue())
+ std::swap(C1, C2);
+
+ if (*C1 == *C2 - 1) {
+ // (X == 13 || X == 14) --> X - 13 <=u 1
+ // (X != 13 && X != 14) --> X - 13 >u 1
+ // An 'add' is the canonical IR form, so favor that over a 'sub'.
+ Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
+ auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE;
+ return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
+ }
+
+ return nullptr;
+}
+
/// Fold (icmp)&(icmp) if possible.
Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
- ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
- if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (PredicatesFoldable(PredL, PredR)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
@@ -819,86 +809,90 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/false))
return V;
+ if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
- Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
- ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
- if (!LHSCst || !RHSCst) return nullptr;
+ Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ if (!LHSC || !RHSC)
+ return nullptr;
- if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ if (LHSC == RHSC && PredL == PredR) {
// (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
// where C is a power of 2 or
// (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
- if ((LHSCC == ICmpInst::ICMP_ULT && LHSCst->getValue().isPowerOf2()) ||
- (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero())) {
- Value *NewOr = Builder->CreateOr(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) ||
+ (PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) {
+ Value *NewOr = Builder->CreateOr(LHS0, RHS0);
+ return Builder->CreateICmp(PredL, NewOr, LHSC);
}
}
// (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
// where CMAX is the all ones value for the truncated type,
// iff the lower bits of C2 and CA are zero.
- if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC &&
- LHS->hasOneUse() && RHS->hasOneUse()) {
+ if (PredL == ICmpInst::ICMP_EQ && PredL == PredR && LHS->hasOneUse() &&
+ RHS->hasOneUse()) {
Value *V;
- ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr;
+ ConstantInt *AndC, *SmallC = nullptr, *BigC = nullptr;
// (trunc x) == C1 & (and x, CA) == C2
// (and x, CA) == C2 & (trunc x) == C1
- if (match(Val2, m_Trunc(m_Value(V))) &&
- match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
- SmallCst = RHSCst;
- BigCst = LHSCst;
- } else if (match(Val, m_Trunc(m_Value(V))) &&
- match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) {
- SmallCst = LHSCst;
- BigCst = RHSCst;
+ if (match(RHS0, m_Trunc(m_Value(V))) &&
+ match(LHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
+ SmallC = RHSC;
+ BigC = LHSC;
+ } else if (match(LHS0, m_Trunc(m_Value(V))) &&
+ match(RHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
+ SmallC = LHSC;
+ BigC = RHSC;
}
- if (SmallCst && BigCst) {
- unsigned BigBitSize = BigCst->getType()->getBitWidth();
- unsigned SmallBitSize = SmallCst->getType()->getBitWidth();
+ if (SmallC && BigC) {
+ unsigned BigBitSize = BigC->getType()->getBitWidth();
+ unsigned SmallBitSize = SmallC->getType()->getBitWidth();
// Check that the low bits are zero.
APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
- if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) {
- Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue());
- APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue();
- Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N);
- return Builder->CreateICmp(LHSCC, NewAnd, NewVal);
+ if ((Low & AndC->getValue()) == 0 && (Low & BigC->getValue()) == 0) {
+ Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue());
+ APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue();
+ Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N);
+ return Builder->CreateICmp(PredL, NewAnd, NewVal);
}
}
}
// From here on, we only handle:
// (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
- if (Val != Val2) return nullptr;
+ if (LHS0 != RHS0)
+ return nullptr;
- // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
- if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
- RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
- LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
- RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
+ if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
+ PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
+ PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
+ PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
return nullptr;
// We can't fold (ugt x, C) & (sgt x, C2).
- if (!PredicatesFoldable(LHSCC, RHSCC))
+ if (!PredicatesFoldable(PredL, PredR))
return nullptr;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
- if (CmpInst::isSigned(LHSCC) ||
- (ICmpInst::isEquality(LHSCC) &&
- CmpInst::isSigned(RHSCC)))
- ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ if (CmpInst::isSigned(PredL) ||
+ (ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
+ ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
else
- ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+ ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
if (ShouldSwap) {
std::swap(LHS, RHS);
- std::swap(LHSCst, RHSCst);
- std::swap(LHSCC, RHSCC);
+ std::swap(LHSC, RHSC);
+ std::swap(PredL, PredR);
}
// At this point, we know we have two icmp instructions
@@ -907,113 +901,95 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
// (from the icmp folding check above), that the two constants
// are not equal and that the larger constant is on the RHS
- assert(LHSCst != RHSCst && "Compares not folded above?");
+ assert(LHSC != RHSC && "Compares not folded above?");
- switch (LHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
+ switch (PredL) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
- case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
- case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13
+ case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13
+ case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13
return LHS;
}
case ICmpInst::ICMP_NE:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_ULT:
- if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
- return Builder->CreateICmpULT(Val, LHSCst);
- if (LHSCst->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13
- return insertRangeTest(Val, LHSCst->getValue() + 1, RHSCst->getValue(),
+ if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13
+ return Builder->CreateICmpULT(LHS0, LHSC);
+ if (LHSC->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
false, true);
- break; // (X != 13 & X u< 15) -> no change
+ break; // (X != 13 & X u< 15) -> no change
case ICmpInst::ICMP_SLT:
- if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
- return Builder->CreateICmpSLT(Val, LHSCst);
- break; // (X != 13 & X s< 15) -> no change
- case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
- case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
+ if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13
+ return Builder->CreateICmpSLT(LHS0, LHSC);
+ break; // (X != 13 & X s< 15) -> no change
+ case ICmpInst::ICMP_EQ: // (X != 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X != 13 & X u> 15) -> X u> 15
+ case ICmpInst::ICMP_SGT: // (X != 13 & X s> 15) -> X s> 15
return RHS;
case ICmpInst::ICMP_NE:
- // Special case to get the ordering right when the values wrap around
- // zero.
- if (LHSCst->getValue() == 0 && RHSCst->getValue().isAllOnesValue())
- std::swap(LHSCst, RHSCst);
- if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
- Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1),
- Val->getName()+".cmp");
- }
- break; // (X != 13 & X != 15) -> no change
+ // Potential folds for this case should already be handled.
+ break;
}
break;
case ICmpInst::ICMP_ULT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
- case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 & X == 15) -> false
+ case ICmpInst::ICMP_UGT: // (X u< 13 & X u> 15) -> false
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
- case ICmpInst::ICMP_SGT: // (X u< 13 & X s> 15) -> no change
- break;
- case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
- case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
+ case ICmpInst::ICMP_NE: // (X u< 13 & X != 15) -> X u< 13
+ case ICmpInst::ICMP_ULT: // (X u< 13 & X u< 15) -> X u< 13
return LHS;
- case ICmpInst::ICMP_SLT: // (X u< 13 & X s< 15) -> no change
- break;
}
break;
case ICmpInst::ICMP_SLT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change
- break;
- case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
- case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13
+ case ICmpInst::ICMP_SLT: // (X s< 13 & X s< 15) -> X < 13
return LHS;
- case ICmpInst::ICMP_ULT: // (X s< 13 & X u< 15) -> no change
- break;
}
break;
case ICmpInst::ICMP_UGT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_UGT: // (X u> 13 & X u> 15) -> X u> 15
return RHS;
- case ICmpInst::ICMP_SGT: // (X u> 13 & X s> 15) -> no change
- break;
case ICmpInst::ICMP_NE:
- if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
- return Builder->CreateICmp(LHSCC, Val, RHSCst);
- break; // (X u> 13 & X != 15) -> no change
- case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
- return insertRangeTest(Val, LHSCst->getValue() + 1, RHSCst->getValue(),
+ if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14
+ return Builder->CreateICmp(PredL, LHS0, RHSC);
+ break; // (X u> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
false, true);
- case ICmpInst::ICMP_SLT: // (X u> 13 & X s< 15) -> no change
- break;
}
break;
case ICmpInst::ICMP_SGT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
- case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 & X == 15) -> X == 15
+ case ICmpInst::ICMP_SGT: // (X s> 13 & X s> 15) -> X s> 15
return RHS;
- case ICmpInst::ICMP_UGT: // (X s> 13 & X u> 15) -> no change
- break;
case ICmpInst::ICMP_NE:
- if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
- return Builder->CreateICmp(LHSCC, Val, RHSCst);
- break; // (X s> 13 & X != 15) -> no change
- case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
- return insertRangeTest(Val, LHSCst->getValue() + 1, RHSCst->getValue(),
- true, true);
- case ICmpInst::ICMP_ULT: // (X s> 13 & X u< 15) -> no change
- break;
+ if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14
+ return Builder->CreateICmp(PredL, LHS0, RHSC);
+ break; // (X s> 13 & X != 15) -> no change
+ case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1
+ return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true,
+ true);
}
break;
}
@@ -1314,39 +1290,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
break;
}
- case Instruction::Add:
- // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
- // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
- // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
- if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
- return BinaryOperator::CreateAnd(V, AndRHS);
- if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
- return BinaryOperator::CreateAnd(V, AndRHS); // Add commutes
- break;
-
case Instruction::Sub:
- // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
- // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
- // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
- if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
- return BinaryOperator::CreateAnd(V, AndRHS);
-
// -x & 1 -> x & 1
if (AndRHSMask == 1 && match(Op0LHS, m_Zero()))
return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
- // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
- // has 1's for all bits that the subtraction with A might affect.
- if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
- uint32_t BitWidth = AndRHSMask.getBitWidth();
- uint32_t Zeros = AndRHSMask.countLeadingZeros();
- APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
-
- if (MaskedValueIsZero(Op0LHS, Mask, 0, &I)) {
- Value *NewNeg = Builder->CreateNeg(Op0RHS);
- return BinaryOperator::CreateAnd(NewNeg, AndRHS);
- }
- }
break;
case Instruction::Shl:
@@ -1361,6 +1309,33 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
break;
}
+ // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
+ // of X and OP behaves well when given trunc(C1) and X.
+ switch (Op0I->getOpcode()) {
+ default:
+ break;
+ case Instruction::Xor:
+ case Instruction::Or:
+ case Instruction::Mul:
+ case Instruction::Add:
+ case Instruction::Sub:
+ Value *X;
+ ConstantInt *C1;
+ if (match(Op0I, m_c_BinOp(m_ZExt(m_Value(X)), m_ConstantInt(C1)))) {
+ if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
+ auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
+ Value *BinOp;
+ if (isa<ZExtInst>(Op0LHS))
+ BinOp = Builder->CreateBinOp(Op0I->getOpcode(), X, TruncC1);
+ else
+ BinOp = Builder->CreateBinOp(Op0I->getOpcode(), TruncC1, X);
+ auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType());
+ auto *And = Builder->CreateAnd(BinOp, TruncC2);
+ return new ZExtInst(And, I.getType());
+ }
+ }
+ }
+
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
return Res;
@@ -1381,10 +1356,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return BinaryOperator::CreateAnd(NewCast, C3);
}
}
+ }
+ if (isa<Constant>(Op1))
if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
return FoldedLogic;
- }
if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
return DeMorgan;
@@ -1630,15 +1606,15 @@ static Value *matchSelectFromAndOr(Value *A, Value *C, Value *B, Value *D,
/// Fold (icmp)|(icmp) if possible.
Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Instruction *CxtI) {
- ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+ ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
// Fold (iszero(A & K1) | iszero(A & K2)) -> (A & (K1 | K2)) != (K1 | K2)
// if K1 and K2 are a one-bit mask.
- ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
- ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+ ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS->getOperand(1));
- if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero() &&
- RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
+ if (LHS->getPredicate() == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero() &&
+ RHS->getPredicate() == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) {
BinaryOperator *LAnd = dyn_cast<BinaryOperator>(LHS->getOperand(0));
BinaryOperator *RAnd = dyn_cast<BinaryOperator>(RHS->getOperand(0));
@@ -1680,52 +1656,52 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// 4) LowRange1 ^ LowRange2 and HighRange1 ^ HighRange2 are one-bit mask.
// This implies all values in the two ranges differ by exactly one bit.
- if ((LHSCC == ICmpInst::ICMP_ULT || LHSCC == ICmpInst::ICMP_ULE) &&
- LHSCC == RHSCC && LHSCst && RHSCst && LHS->hasOneUse() &&
- RHS->hasOneUse() && LHSCst->getType() == RHSCst->getType() &&
- LHSCst->getValue() == (RHSCst->getValue())) {
+ if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) &&
+ PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() &&
+ LHSC->getType() == RHSC->getType() &&
+ LHSC->getValue() == (RHSC->getValue())) {
Value *LAdd = LHS->getOperand(0);
Value *RAdd = RHS->getOperand(0);
Value *LAddOpnd, *RAddOpnd;
- ConstantInt *LAddCst, *RAddCst;
- if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddCst))) &&
- match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddCst))) &&
- LAddCst->getValue().ugt(LHSCst->getValue()) &&
- RAddCst->getValue().ugt(LHSCst->getValue())) {
-
- APInt DiffCst = LAddCst->getValue() ^ RAddCst->getValue();
- if (LAddOpnd == RAddOpnd && DiffCst.isPowerOf2()) {
- ConstantInt *MaxAddCst = nullptr;
- if (LAddCst->getValue().ult(RAddCst->getValue()))
- MaxAddCst = RAddCst;
+ ConstantInt *LAddC, *RAddC;
+ if (match(LAdd, m_Add(m_Value(LAddOpnd), m_ConstantInt(LAddC))) &&
+ match(RAdd, m_Add(m_Value(RAddOpnd), m_ConstantInt(RAddC))) &&
+ LAddC->getValue().ugt(LHSC->getValue()) &&
+ RAddC->getValue().ugt(LHSC->getValue())) {
+
+ APInt DiffC = LAddC->getValue() ^ RAddC->getValue();
+ if (LAddOpnd == RAddOpnd && DiffC.isPowerOf2()) {
+ ConstantInt *MaxAddC = nullptr;
+ if (LAddC->getValue().ult(RAddC->getValue()))
+ MaxAddC = RAddC;
else
- MaxAddCst = LAddCst;
+ MaxAddC = LAddC;
- APInt RRangeLow = -RAddCst->getValue();
- APInt RRangeHigh = RRangeLow + LHSCst->getValue();
- APInt LRangeLow = -LAddCst->getValue();
- APInt LRangeHigh = LRangeLow + LHSCst->getValue();
+ APInt RRangeLow = -RAddC->getValue();
+ APInt RRangeHigh = RRangeLow + LHSC->getValue();
+ APInt LRangeLow = -LAddC->getValue();
+ APInt LRangeHigh = LRangeLow + LHSC->getValue();
APInt LowRangeDiff = RRangeLow ^ LRangeLow;
APInt HighRangeDiff = RRangeHigh ^ LRangeHigh;
APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow
: RRangeLow - LRangeLow;
if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff &&
- RangeDiff.ugt(LHSCst->getValue())) {
- Value *MaskCst = ConstantInt::get(LAddCst->getType(), ~DiffCst);
+ RangeDiff.ugt(LHSC->getValue())) {
+ Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC);
- Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskCst);
- Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddCst);
- return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSCst));
+ Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskC);
+ Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddC);
+ return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSC));
}
}
}
}
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
- if (PredicatesFoldable(LHSCC, RHSCC)) {
+ if (PredicatesFoldable(PredL, PredR)) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
LHS->swapOperands();
@@ -1743,25 +1719,25 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, false, Builder))
return V;
- Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+ Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
if (LHS->hasOneUse() || RHS->hasOneUse()) {
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
Value *A = nullptr, *B = nullptr;
- if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) {
- B = Val;
- if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1))
- A = Val2;
- else if (RHSCC == ICmpInst::ICMP_UGT && Val == Val2)
+ if (PredL == ICmpInst::ICMP_EQ && LHSC && LHSC->isZero()) {
+ B = LHS0;
+ if (PredR == ICmpInst::ICMP_ULT && LHS0 == RHS->getOperand(1))
+ A = RHS0;
+ else if (PredR == ICmpInst::ICMP_UGT && LHS0 == RHS0)
A = RHS->getOperand(1);
}
// (icmp ult A, B) | (icmp eq B, 0) -> (icmp ule A, B-1)
// (icmp ugt B, A) | (icmp eq B, 0) -> (icmp ule A, B-1)
- else if (RHSCC == ICmpInst::ICMP_EQ && RHSCst && RHSCst->isZero()) {
- B = Val2;
- if (LHSCC == ICmpInst::ICMP_ULT && Val2 == LHS->getOperand(1))
- A = Val;
- else if (LHSCC == ICmpInst::ICMP_UGT && Val2 == Val)
+ else if (PredR == ICmpInst::ICMP_EQ && RHSC && RHSC->isZero()) {
+ B = RHS0;
+ if (PredL == ICmpInst::ICMP_ULT && RHS0 == LHS->getOperand(1))
+ A = LHS0;
+ else if (PredL == ICmpInst::ICMP_UGT && LHS0 == RHS0)
A = LHS->getOperand(1);
}
if (A && B)
@@ -1778,54 +1754,58 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true))
return V;
+ if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, false, Builder))
+ return V;
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
- if (!LHSCst || !RHSCst) return nullptr;
+ if (!LHSC || !RHSC)
+ return nullptr;
- if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ if (LHSC == RHSC && PredL == PredR) {
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
- if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Value *NewOr = Builder->CreateOr(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) {
+ Value *NewOr = Builder->CreateOr(LHS0, RHS0);
+ return Builder->CreateICmp(PredL, NewOr, LHSC);
}
}
// (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
// iff C2 + CA == C1.
- if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
- ConstantInt *AddCst;
- if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
- if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
- return Builder->CreateICmpULE(Val, LHSCst);
+ if (PredL == ICmpInst::ICMP_ULT && PredR == ICmpInst::ICMP_EQ) {
+ ConstantInt *AddC;
+ if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC))))
+ if (RHSC->getValue() + AddC->getValue() == LHSC->getValue())
+ return Builder->CreateICmpULE(LHS0, LHSC);
}
// From here on, we only handle:
// (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
- if (Val != Val2) return nullptr;
+ if (LHS0 != RHS0)
+ return nullptr;
- // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
- if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
- RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
- LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
- RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+ // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
+ if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
+ PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
+ PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
+ PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
return nullptr;
// We can't fold (ugt x, C) | (sgt x, C2).
- if (!PredicatesFoldable(LHSCC, RHSCC))
+ if (!PredicatesFoldable(PredL, PredR))
return nullptr;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
- if (CmpInst::isSigned(LHSCC) ||
- (ICmpInst::isEquality(LHSCC) &&
- CmpInst::isSigned(RHSCC)))
- ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+ if (CmpInst::isSigned(PredL) ||
+ (ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
+ ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
else
- ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+ ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
if (ShouldSwap) {
std::swap(LHS, RHS);
- std::swap(LHSCst, RHSCst);
- std::swap(LHSCC, RHSCC);
+ std::swap(LHSC, RHSC);
+ std::swap(PredL, PredR);
}
// At this point, we know we have two icmp instructions
@@ -1834,127 +1814,98 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
// icmp folding check above), that the two constants are not
// equal.
- assert(LHSCst != RHSCst && "Compares not folded above?");
+ assert(LHSC != RHSC && "Compares not folded above?");
- switch (LHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
+ switch (PredL) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
case ICmpInst::ICMP_EQ:
- if (LHS->getOperand(0) == RHS->getOperand(0)) {
- // if LHSCst and RHSCst differ only by one bit:
- // (A == C1 || A == C2) -> (A | (C1 ^ C2)) == C2
- assert(LHSCst->getValue().ule(LHSCst->getValue()));
-
- APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
- if (Xor.isPowerOf2()) {
- Value *Cst = Builder->getInt(Xor);
- Value *Or = Builder->CreateOr(LHS->getOperand(0), Cst);
- return Builder->CreateICmp(ICmpInst::ICMP_EQ, Or, RHSCst);
- }
- }
-
- if (LHSCst == SubOne(RHSCst)) {
- // (X == 13 | X == 14) -> X-13 <u 2
- Constant *AddCST = ConstantExpr::getNeg(LHSCst);
- Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
- AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
- return Builder->CreateICmpULT(Add, AddCST);
- }
-
- break; // (X == 13 | X == 15) -> no change
- case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
- case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
+ // Potential folds for this case should already be handled.
+ break;
+ case ICmpInst::ICMP_UGT: // (X == 13 | X u> 14) -> no change
+ case ICmpInst::ICMP_SGT: // (X == 13 | X s> 14) -> no change
break;
- case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
- case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
- case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
+ case ICmpInst::ICMP_NE: // (X == 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X == 13 | X u< 15) -> X u< 15
+ case ICmpInst::ICMP_SLT: // (X == 13 | X s< 15) -> X s< 15
return RHS;
}
break;
case ICmpInst::ICMP_NE:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
- case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
- case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X != 13 | X == 15) -> X != 13
+ case ICmpInst::ICMP_UGT: // (X != 13 | X u> 15) -> X != 13
+ case ICmpInst::ICMP_SGT: // (X != 13 | X s> 15) -> X != 13
return LHS;
- case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
- case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
- case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
+ case ICmpInst::ICMP_NE: // (X != 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X != 13 | X u< 15) -> true
+ case ICmpInst::ICMP_SLT: // (X != 13 | X s< 15) -> true
return Builder->getTrue();
}
case ICmpInst::ICMP_ULT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
break;
- case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
- // If RHSCst is [us]MAXINT, it is always false. Not handling
+ case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
+ // If RHSC is [us]MAXINT, it is always false. Not handling
// this can cause overflow.
- if (RHSCst->isMaxValue(false))
+ if (RHSC->isMaxValue(false))
return LHS;
- return insertRangeTest(Val, LHSCst->getValue(), RHSCst->getValue() + 1,
+ return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1,
false, false);
- case ICmpInst::ICMP_SGT: // (X u< 13 | X s> 15) -> no change
- break;
- case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
- case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
+ case ICmpInst::ICMP_NE: // (X u< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_ULT: // (X u< 13 | X u< 15) -> X u< 15
return RHS;
- case ICmpInst::ICMP_SLT: // (X u< 13 | X s< 15) -> no change
- break;
}
break;
case ICmpInst::ICMP_SLT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s< 13 | X == 14) -> no change
break;
- case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
- // If RHSCst is [us]MAXINT, it is always false. Not handling
+ case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) s> 2
+ // If RHSC is [us]MAXINT, it is always false. Not handling
// this can cause overflow.
- if (RHSCst->isMaxValue(true))
+ if (RHSC->isMaxValue(true))
return LHS;
- return insertRangeTest(Val, LHSCst->getValue(), RHSCst->getValue() + 1,
- true, false);
- case ICmpInst::ICMP_UGT: // (X s< 13 | X u> 15) -> no change
- break;
- case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
- case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
+ return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true,
+ false);
+ case ICmpInst::ICMP_NE: // (X s< 13 | X != 15) -> X != 15
+ case ICmpInst::ICMP_SLT: // (X s< 13 | X s< 15) -> X s< 15
return RHS;
- case ICmpInst::ICMP_ULT: // (X s< 13 | X u< 15) -> no change
- break;
}
break;
case ICmpInst::ICMP_UGT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
- case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X u> 13 | X == 15) -> X u> 13
+ case ICmpInst::ICMP_UGT: // (X u> 13 | X u> 15) -> X u> 13
return LHS;
- case ICmpInst::ICMP_SGT: // (X u> 13 | X s> 15) -> no change
- break;
- case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
- case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
+ case ICmpInst::ICMP_NE: // (X u> 13 | X != 15) -> true
+ case ICmpInst::ICMP_ULT: // (X u> 13 | X u< 15) -> true
return Builder->getTrue();
- case ICmpInst::ICMP_SLT: // (X u> 13 | X s< 15) -> no change
- break;
}
break;
case ICmpInst::ICMP_SGT:
- switch (RHSCC) {
- default: llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
- case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
+ switch (PredR) {
+ default:
+ llvm_unreachable("Unknown integer condition code!");
+ case ICmpInst::ICMP_EQ: // (X s> 13 | X == 15) -> X > 13
+ case ICmpInst::ICMP_SGT: // (X s> 13 | X s> 15) -> X > 13
return LHS;
- case ICmpInst::ICMP_UGT: // (X s> 13 | X u> 15) -> no change
- break;
- case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
- case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
+ case ICmpInst::ICMP_NE: // (X s> 13 | X != 15) -> true
+ case ICmpInst::ICMP_SLT: // (X s> 13 | X s< 15) -> true
return Builder->getTrue();
- case ICmpInst::ICMP_ULT: // (X s> 13 | X u< 15) -> no change
- break;
}
break;
}
@@ -2100,17 +2051,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = nullptr; Value *X = nullptr;
- // (X & C1) | C2 --> (X | C2) & (C1|C2)
- // iff (C1 & C2) == 0.
- if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
- (RHS->getValue() & C1->getValue()) != 0 &&
- Op0->hasOneUse()) {
- Value *Or = Builder->CreateOr(X, RHS);
- Or->takeName(Op0);
- return BinaryOperator::CreateAnd(Or,
- Builder->getInt(RHS->getValue() | C1->getValue()));
- }
-
// (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
Op0->hasOneUse()) {
@@ -2119,45 +2059,51 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateXor(Or,
Builder->getInt(C1->getValue() & ~RHS->getValue()));
}
+ }
+ if (isa<Constant>(Op1))
if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
return FoldedLogic;
- }
// Given an OR instruction, check to see if this is a bswap.
if (Instruction *BSwap = MatchBSwap(I))
return BSwap;
- Value *A = nullptr, *B = nullptr;
- ConstantInt *C1 = nullptr, *C2 = nullptr;
+ {
+ Value *A;
+ const APInt *C;
+ // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+ if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
+ MaskedValueIsZero(Op1, *C, 0, &I)) {
+ Value *NOr = Builder->CreateOr(A, Op1);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr,
+ cast<Instruction>(Op0)->getOperand(1));
+ }
- // (X^C)|Y -> (X|Y)^C iff Y&C == 0
- if (Op0->hasOneUse() &&
- match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
- MaskedValueIsZero(Op1, C1->getValue(), 0, &I)) {
- Value *NOr = Builder->CreateOr(A, Op1);
- NOr->takeName(Op0);
- return BinaryOperator::CreateXor(NOr, C1);
+ // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+ if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) &&
+ MaskedValueIsZero(Op0, *C, 0, &I)) {
+ Value *NOr = Builder->CreateOr(A, Op0);
+ NOr->takeName(Op0);
+ return BinaryOperator::CreateXor(NOr,
+ cast<Instruction>(Op1)->getOperand(1));
+ }
}
- // Y|(X^C) -> (X|Y)^C iff Y&C == 0
- if (Op1->hasOneUse() &&
- match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
- MaskedValueIsZero(Op0, C1->getValue(), 0, &I)) {
- Value *NOr = Builder->CreateOr(A, Op0);
- NOr->takeName(Op0);
- return BinaryOperator::CreateXor(NOr, C1);
- }
+ Value *A, *B;
// ((~A & B) | A) -> (A | B)
- if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1, m_Specific(A)))
- return BinaryOperator::CreateOr(A, B);
+ if (match(Op0, m_c_And(m_Not(m_Specific(Op1)), m_Value(A))))
+ return BinaryOperator::CreateOr(A, Op1);
+ if (match(Op1, m_c_And(m_Not(m_Specific(Op0)), m_Value(A))))
+ return BinaryOperator::CreateOr(Op0, A);
// ((A & B) | ~A) -> (~A | B)
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- match(Op1, m_Not(m_Specific(A))))
- return BinaryOperator::CreateOr(Builder->CreateNot(A), B);
+ // The NOT is guaranteed to be in the RHS by complexity ordering.
+ if (match(Op1, m_Not(m_Value(A))) &&
+ match(Op0, m_c_And(m_Specific(A), m_Value(B))))
+ return BinaryOperator::CreateOr(Op1, B);
// (A & ~B) | (A ^ B) -> (A ^ B)
// (~B & A) | (A ^ B) -> (A ^ B)
@@ -2177,8 +2123,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
Value *V1 = nullptr, *V2 = nullptr;
- C1 = dyn_cast<ConstantInt>(C);
- C2 = dyn_cast<ConstantInt>(D);
+ ConstantInt *C1 = dyn_cast<ConstantInt>(C);
+ ConstantInt *C2 = dyn_cast<ConstantInt>(D);
if (C1 && C2) { // (A & C1)|(B & C2)
if ((C1->getValue() & C2->getValue()) == 0) {
// ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
@@ -2403,6 +2349,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// be simplified by a later pass either, so we try swapping the inner/outer
// ORs in the hopes that we'll be able to simplify it this way.
// (X|C) | V --> (X|V) | C
+ ConstantInt *C1;
if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
Value *Inner = Builder->CreateOr(A, Op1);
@@ -2493,23 +2440,22 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- if (Constant *RHS = dyn_cast<Constant>(Op1)) {
- if (RHS->isAllOnesValue() && Op0->hasOneUse())
- // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
- if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
- return CmpInst::Create(CI->getOpcode(),
- CI->getInversePredicate(),
- CI->getOperand(0), CI->getOperand(1));
+ // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+ ICmpInst::Predicate Pred;
+ if (match(Op0, m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))) &&
+ match(Op1, m_AllOnes())) {
+ cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
+ return replaceInstUsesWith(I, Op0);
}
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1)) {
// fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
if (CI->hasOneUse() && Op0C->hasOneUse()) {
Instruction::CastOps Opcode = Op0C->getOpcode();
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
- (RHS == ConstantExpr::getCast(Opcode, Builder->getTrue(),
+ (RHSC == ConstantExpr::getCast(Opcode, Builder->getTrue(),
Op0C->getDestTy()))) {
CI->setPredicate(CI->getInversePredicate());
return CastInst::Create(Opcode, CI, Op0C->getType());
@@ -2520,26 +2466,23 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ~(c-X) == X-c-1 == X+(-c-1)
- if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+ if (Op0I->getOpcode() == Instruction::Sub && RHSC->isAllOnesValue())
if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
- Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
- ConstantInt::get(I.getType(), 1));
- return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+ return BinaryOperator::CreateAdd(Op0I->getOperand(1),
+ SubOne(NegOp0I0C));
}
if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
if (Op0I->getOpcode() == Instruction::Add) {
// ~(X-c) --> (-c-1)-X
- if (RHS->isAllOnesValue()) {
+ if (RHSC->isAllOnesValue()) {
Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
- return BinaryOperator::CreateSub(
- ConstantExpr::getSub(NegOp0CI,
- ConstantInt::get(I.getType(), 1)),
- Op0I->getOperand(0));
- } else if (RHS->getValue().isSignBit()) {
+ return BinaryOperator::CreateSub(SubOne(NegOp0CI),
+ Op0I->getOperand(0));
+ } else if (RHSC->getValue().isSignBit()) {
// (X + C) ^ signbit -> (X + C + signbit)
- Constant *C = Builder->getInt(RHS->getValue() + Op0CI->getValue());
+ Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue());
return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
}
@@ -2547,10 +2490,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue(),
0, &I)) {
- Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+ Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHSC);
// Anything in both C1 and C2 is known to be zero, remove it from
// NewRHS.
- Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHSC);
NewRHS = ConstantExpr::getAnd(NewRHS,
ConstantExpr::getNot(CommonBits));
Worklist.Add(Op0I);
@@ -2568,7 +2511,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
E1->getOpcode() == Instruction::Xor &&
(C1 = dyn_cast<ConstantInt>(E1->getOperand(1)))) {
// fold (C1 >> C2) ^ C3
- ConstantInt *C2 = Op0CI, *C3 = RHS;
+ ConstantInt *C2 = Op0CI, *C3 = RHSC;
APInt FoldConst = C1->getValue().lshr(C2->getValue());
FoldConst ^= C3->getValue();
// Prepare the two operands.
@@ -2582,27 +2525,26 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
}
+ }
+ if (isa<Constant>(Op1))
if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I))
return FoldedLogic;
- }
- BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
- if (Op1I) {
+ {
Value *A, *B;
- if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
- if (A == Op0) { // B^(B|A) == (A|B)^B
- Op1I->swapOperands();
- I.swapOperands();
- std::swap(Op0, Op1);
- } else if (B == Op0) { // B^(A|B) == (A|B)^B
+ if (match(Op1, m_OneUse(m_Or(m_Value(A), m_Value(B))))) {
+ if (A == Op0) { // A^(A|B) == A^(B|A)
+ cast<BinaryOperator>(Op1)->swapOperands();
+ std::swap(A, B);
+ }
+ if (B == Op0) { // A^(B|A) == (B|A)^A
I.swapOperands(); // Simplified below.
std::swap(Op0, Op1);
}
- } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&
- Op1I->hasOneUse()){
+ } else if (match(Op1, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
if (A == Op0) { // A^(A&B) -> A^(B&A)
- Op1I->swapOperands();
+ cast<BinaryOperator>(Op1)->swapOperands();
std::swap(A, B);
}
if (B == Op0) { // A^(B&A) -> (B&A)^A
@@ -2612,65 +2554,63 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
- if (Op0I) {
+ {
Value *A, *B;
- if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
- Op0I->hasOneUse()) {
+ if (match(Op0, m_OneUse(m_Or(m_Value(A), m_Value(B))))) {
if (A == Op1) // (B|A)^B == (A|B)^B
std::swap(A, B);
if (B == Op1) // (A|B)^B == A & ~B
return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1));
- } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
- Op0I->hasOneUse()){
+ } else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) {
if (A == Op1) // (A&B)^A -> (B&A)^A
std::swap(A, B);
+ const APInt *C;
if (B == Op1 && // (B&A)^A == ~B & A
- !isa<ConstantInt>(Op1)) { // Canonical form is (B&C)^C
+ !match(Op1, m_APInt(C))) { // Canonical form is (B&C)^C
return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1);
}
}
}
- if (Op0I && Op1I) {
+ {
Value *A, *B, *C, *D;
// (A & B)^(A | B) -> A ^ B
- if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
- match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Or(m_Value(C), m_Value(D)))) {
if ((A == C && B == D) || (A == D && B == C))
return BinaryOperator::CreateXor(A, B);
}
// (A | B)^(A & B) -> A ^ B
- if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_And(m_Value(C), m_Value(D)))) {
if ((A == C && B == D) || (A == D && B == C))
return BinaryOperator::CreateXor(A, B);
}
// (A | ~B) ^ (~A | B) -> A ^ B
// (~B | A) ^ (~A | B) -> A ^ B
- if (match(Op0I, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1I, m_Or(m_Not(m_Specific(A)), m_Specific(B))))
+ if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_Or(m_Not(m_Specific(A)), m_Specific(B))))
return BinaryOperator::CreateXor(A, B);
// (~A | B) ^ (A | ~B) -> A ^ B
- if (match(Op0I, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1I, m_Or(m_Specific(A), m_Not(m_Specific(B))))) {
+ if (match(Op0, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_Or(m_Specific(A), m_Not(m_Specific(B))))) {
return BinaryOperator::CreateXor(A, B);
}
// (A & ~B) ^ (~A & B) -> A ^ B
// (~B & A) ^ (~A & B) -> A ^ B
- if (match(Op0I, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
- match(Op1I, m_And(m_Not(m_Specific(A)), m_Specific(B))))
+ if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Op1, m_And(m_Not(m_Specific(A)), m_Specific(B))))
return BinaryOperator::CreateXor(A, B);
// (~A & B) ^ (A & ~B) -> A ^ B
- if (match(Op0I, m_And(m_Not(m_Value(A)), m_Value(B))) &&
- match(Op1I, m_And(m_Specific(A), m_Not(m_Specific(B))))) {
+ if (match(Op0, m_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B))))) {
return BinaryOperator::CreateXor(A, B);
}
// (A ^ C)^(A | B) -> ((~A) & B) ^ C
- if (match(Op0I, m_Xor(m_Value(D), m_Value(C))) &&
- match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+ if (match(Op0, m_Xor(m_Value(D), m_Value(C))) &&
+ match(Op1, m_Or(m_Value(A), m_Value(B)))) {
if (D == A)
return BinaryOperator::CreateXor(
Builder->CreateAnd(Builder->CreateNot(A), B), C);
@@ -2679,8 +2619,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Builder->CreateAnd(Builder->CreateNot(B), A), C);
}
// (A | B)^(A ^ C) -> ((~A) & B) ^ C
- if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
- match(Op1I, m_Xor(m_Value(D), m_Value(C)))) {
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Xor(m_Value(D), m_Value(C)))) {
if (D == A)
return BinaryOperator::CreateXor(
Builder->CreateAnd(Builder->CreateNot(A), B), C);
@@ -2689,12 +2629,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Builder->CreateAnd(Builder->CreateNot(B), A), C);
}
// (A & B) ^ (A ^ B) -> (A | B)
- if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
- match(Op1I, m_Xor(m_Specific(A), m_Specific(B))))
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Xor(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateOr(A, B);
// (A ^ B) ^ (A & B) -> (A | B)
- if (match(Op0I, m_Xor(m_Value(A), m_Value(B))) &&
- match(Op1I, m_And(m_Specific(A), m_Specific(B))))
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_And(m_Specific(A), m_Specific(B))))
return BinaryOperator::CreateOr(A, B);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 2ef82ba3ed8c..69484f47223f 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -60,6 +60,12 @@ using namespace PatternMatch;
STATISTIC(NumSimplified, "Number of library calls simplified");
+static cl::opt<unsigned> UnfoldElementAtomicMemcpyMaxElements(
+ "unfold-element-atomic-memcpy-max-elements",
+ cl::init(16),
+ cl::desc("Maximum number of elements in atomic memcpy the optimizer is "
+ "allowed to unfold"));
+
/// Return the specified type promoted as it would be to pass though a va_arg
/// area.
static Type *getPromotedType(Type *Ty) {
@@ -70,27 +76,6 @@ static Type *getPromotedType(Type *Ty) {
return Ty;
}
-/// Given an aggregate type which ultimately holds a single scalar element,
-/// like {{{type}}} or [1 x type], return type.
-static Type *reduceToSingleValueType(Type *T) {
- while (!T->isSingleValueType()) {
- if (StructType *STy = dyn_cast<StructType>(T)) {
- if (STy->getNumElements() == 1)
- T = STy->getElementType(0);
- else
- break;
- } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) {
- if (ATy->getNumElements() == 1)
- T = ATy->getElementType();
- else
- break;
- } else
- break;
- }
-
- return T;
-}
-
/// Return a constant boolean vector that has true elements in all positions
/// where the input constant data vector has an element with the sign bit set.
static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
@@ -108,6 +93,78 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
return ConstantVector::get(BoolVec);
}
+Instruction *
+InstCombiner::SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI) {
+ // Try to unfold this intrinsic into sequence of explicit atomic loads and
+ // stores.
+ // First check that number of elements is compile time constant.
+ auto *NumElementsCI = dyn_cast<ConstantInt>(AMI->getNumElements());
+ if (!NumElementsCI)
+ return nullptr;
+
+ // Check that there are not too many elements.
+ uint64_t NumElements = NumElementsCI->getZExtValue();
+ if (NumElements >= UnfoldElementAtomicMemcpyMaxElements)
+ return nullptr;
+
+ // Don't unfold into illegal integers
+ uint64_t ElementSizeInBytes = AMI->getElementSizeInBytes() * 8;
+ if (!getDataLayout().isLegalInteger(ElementSizeInBytes))
+ return nullptr;
+
+ // Cast source and destination to the correct type. Intrinsic input arguments
+ // are usually represented as i8*.
+ // Often operands will be explicitly casted to i8* and we can just strip
+ // those casts instead of inserting new ones. However it's easier to rely on
+ // other InstCombine rules which will cover trivial cases anyway.
+ Value *Src = AMI->getRawSource();
+ Value *Dst = AMI->getRawDest();
+ Type *ElementPointerType = Type::getIntNPtrTy(
+ AMI->getContext(), ElementSizeInBytes, Src->getType()->getPointerAddressSpace());
+
+ Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType,
+ "memcpy_unfold.src_casted");
+ Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType,
+ "memcpy_unfold.dst_casted");
+
+ for (uint64_t i = 0; i < NumElements; ++i) {
+ // Get current element addresses
+ ConstantInt *ElementIdxCI =
+ ConstantInt::get(AMI->getContext(), APInt(64, i));
+ Value *SrcElementAddr =
+ Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr");
+ Value *DstElementAddr =
+ Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr");
+
+ // Load from the source. Transfer alignment information and mark load as
+ // unordered atomic.
+ LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val");
+ Load->setOrdering(AtomicOrdering::Unordered);
+ // We know alignment of the first element. It is also guaranteed by the
+ // verifier that element size is less or equal than first element alignment
+ // and both of this values are powers of two.
+ // This means that all subsequent accesses are at least element size
+ // aligned.
+ // TODO: We can infer better alignment but there is no evidence that this
+ // will matter.
+ Load->setAlignment(i == 0 ? AMI->getSrcAlignment()
+ : AMI->getElementSizeInBytes());
+ Load->setDebugLoc(AMI->getDebugLoc());
+
+ // Store loaded value via unordered atomic store.
+ StoreInst *Store = Builder->CreateStore(Load, DstElementAddr);
+ Store->setOrdering(AtomicOrdering::Unordered);
+ Store->setAlignment(i == 0 ? AMI->getDstAlignment()
+ : AMI->getElementSizeInBytes());
+ Store->setDebugLoc(AMI->getDebugLoc());
+ }
+
+ // Set the number of elements of the copy to 0, it will be deleted on the
+ // next iteration.
+ AMI->setNumElements(Constant::getNullValue(NumElementsCI->getType()));
+ return AMI;
+}
+
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
@@ -144,41 +201,19 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
- // Memcpy forces the use of i8* for the source and destination. That means
- // that if you're using memcpy to move one double around, you'll get a cast
- // from double* to i8*. We'd much rather use a double load+store rather than
- // an i64 load+store, here because this improves the odds that the source or
- // dest address will be promotable. See if we can find a better type than the
- // integer datatype.
- Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ // If the memcpy has metadata describing the members, see if we can get the
+ // TBAA tag describing our copy.
MDNode *CopyMD = nullptr;
- if (StrippedDest != MI->getArgOperand(0)) {
- Type *SrcETy = cast<PointerType>(StrippedDest->getType())
- ->getElementType();
- if (SrcETy->isSized() && DL.getTypeStoreSize(SrcETy) == Size) {
- // The SrcETy might be something like {{{double}}} or [1 x double]. Rip
- // down through these levels if so.
- SrcETy = reduceToSingleValueType(SrcETy);
-
- if (SrcETy->isSingleValueType()) {
- NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
- NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
-
- // If the memcpy has metadata describing the members, see if we can
- // get the TBAA tag describing our copy.
- if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
- if (M->getNumOperands() == 3 && M->getOperand(0) &&
- mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
- mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
- M->getOperand(1) &&
- mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
- mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
- Size &&
- M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
- CopyMD = cast<MDNode>(M->getOperand(2));
- }
- }
- }
+ if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
+ if (M->getNumOperands() == 3 && M->getOperand(0) &&
+ mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
+ mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() &&
+ M->getOperand(1) &&
+ mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
+ mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
+ Size &&
+ M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
+ CopyMD = cast<MDNode>(M->getOperand(2));
}
// If the memcpy/memmove provides better alignment info than we can
@@ -510,6 +545,131 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
return Builder.CreateAShr(Vec, ShiftVec);
}
+static Value *simplifyX86muldq(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+ Type *ResTy = II.getType();
+ assert(Arg0->getType()->getScalarSizeInBits() == 32 &&
+ Arg1->getType()->getScalarSizeInBits() == 32 &&
+ ResTy->getScalarSizeInBits() == 64 && "Unexpected muldq/muludq types");
+
+ // muldq/muludq(undef, undef) -> zero (matches generic mul behavior)
+ if (isa<UndefValue>(Arg0) || isa<UndefValue>(Arg1))
+ return ConstantAggregateZero::get(ResTy);
+
+ // Constant folding.
+ // PMULDQ = (mul(vXi64 sext(shuffle<0,2,..>(Arg0)),
+ // vXi64 sext(shuffle<0,2,..>(Arg1))))
+ // PMULUDQ = (mul(vXi64 zext(shuffle<0,2,..>(Arg0)),
+ // vXi64 zext(shuffle<0,2,..>(Arg1))))
+ if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
+ return nullptr;
+
+ unsigned NumElts = ResTy->getVectorNumElements();
+ assert(Arg0->getType()->getVectorNumElements() == (2 * NumElts) &&
+ Arg1->getType()->getVectorNumElements() == (2 * NumElts) &&
+ "Unexpected muldq/muludq types");
+
+ unsigned IntrinsicID = II.getIntrinsicID();
+ bool IsSigned = (Intrinsic::x86_sse41_pmuldq == IntrinsicID ||
+ Intrinsic::x86_avx2_pmul_dq == IntrinsicID ||
+ Intrinsic::x86_avx512_pmul_dq_512 == IntrinsicID);
+
+ SmallVector<unsigned, 16> ShuffleMask;
+ for (unsigned i = 0; i != NumElts; ++i)
+ ShuffleMask.push_back(i * 2);
+
+ auto *LHS = Builder.CreateShuffleVector(Arg0, Arg0, ShuffleMask);
+ auto *RHS = Builder.CreateShuffleVector(Arg1, Arg1, ShuffleMask);
+
+ if (IsSigned) {
+ LHS = Builder.CreateSExt(LHS, ResTy);
+ RHS = Builder.CreateSExt(RHS, ResTy);
+ } else {
+ LHS = Builder.CreateZExt(LHS, ResTy);
+ RHS = Builder.CreateZExt(RHS, ResTy);
+ }
+
+ return Builder.CreateMul(LHS, RHS);
+}
+
+static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC,
+ InstCombiner::BuilderTy &Builder, bool IsSigned) {
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+ Type *ResTy = II.getType();
+
+ // Fast all undef handling.
+ if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
+ return UndefValue::get(ResTy);
+
+ Type *ArgTy = Arg0->getType();
+ unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
+ unsigned NumDstElts = ResTy->getVectorNumElements();
+ unsigned NumSrcElts = ArgTy->getVectorNumElements();
+ assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
+
+ unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
+ unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
+ unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
+ assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
+ "Unexpected packing types");
+
+ // Constant folding.
+ auto *Cst0 = dyn_cast<Constant>(Arg0);
+ auto *Cst1 = dyn_cast<Constant>(Arg1);
+ if (!Cst0 || !Cst1)
+ return nullptr;
+
+ SmallVector<Constant *, 32> Vals;
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
+ unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
+ auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
+ auto *COp = Cst->getAggregateElement(SrcIdx);
+ if (COp && isa<UndefValue>(COp)) {
+ Vals.push_back(UndefValue::get(ResTy->getScalarType()));
+ continue;
+ }
+
+ auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
+ if (!CInt)
+ return nullptr;
+
+ APInt Val = CInt->getValue();
+ assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
+ "Unexpected constant bitwidth");
+
+ if (IsSigned) {
+ // PACKSS: Truncate signed value with signed saturation.
+ // Source values less than dst minint are saturated to minint.
+ // Source values greater than dst maxint are saturated to maxint.
+ if (Val.isSignedIntN(DstScalarSizeInBits))
+ Val = Val.trunc(DstScalarSizeInBits);
+ else if (Val.isNegative())
+ Val = APInt::getSignedMinValue(DstScalarSizeInBits);
+ else
+ Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
+ } else {
+ // PACKUS: Truncate signed value with unsigned saturation.
+ // Source values less than zero are saturated to zero.
+ // Source values greater than dst maxuint are saturated to maxuint.
+ if (Val.isIntN(DstScalarSizeInBits))
+ Val = Val.trunc(DstScalarSizeInBits);
+ else if (Val.isNegative())
+ Val = APInt::getNullValue(DstScalarSizeInBits);
+ else
+ Val = APInt::getAllOnesValue(DstScalarSizeInBits);
+ }
+
+ Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
+ }
+ }
+
+ return ConstantVector::get(Vals);
+}
+
static Value *simplifyX86movmsk(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
Value *Arg = II.getArgOperand(0);
@@ -1330,6 +1490,27 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) {
return true;
}
+// Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
+//
+// A single NaN input is folded to minnum, so we rely on that folding for
+// handling NaNs.
+static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
+ const APFloat &Src2) {
+ APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
+
+ APFloat::cmpResult Cmp0 = Max3.compare(Src0);
+ assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
+ if (Cmp0 == APFloat::cmpEqual)
+ return maxnum(Src1, Src2);
+
+ APFloat::cmpResult Cmp1 = Max3.compare(Src1);
+ assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
+ if (Cmp1 == APFloat::cmpEqual)
+ return maxnum(Src0, Src2);
+
+ return maxnum(Src0, Src1);
+}
+
// Returns true iff the 2 intrinsics have the same operands, limiting the
// comparison to the first NumOperands.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
@@ -1373,6 +1554,254 @@ static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
return false;
}
+// Convert NVVM intrinsics to target-generic LLVM code where possible.
+static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
+ // Each NVVM intrinsic we can simplify can be replaced with one of:
+ //
+ // * an LLVM intrinsic,
+ // * an LLVM cast operation,
+ // * an LLVM binary operation, or
+ // * ad-hoc LLVM IR for the particular operation.
+
+ // Some transformations are only valid when the module's
+ // flush-denormals-to-zero (ftz) setting is true/false, whereas other
+ // transformations are valid regardless of the module's ftz setting.
+ enum FtzRequirementTy {
+ FTZ_Any, // Any ftz setting is ok.
+ FTZ_MustBeOn, // Transformation is valid only if ftz is on.
+ FTZ_MustBeOff, // Transformation is valid only if ftz is off.
+ };
+ // Classes of NVVM intrinsics that can't be replaced one-to-one with a
+ // target-generic intrinsic, cast op, or binary op but that we can nonetheless
+ // simplify.
+ enum SpecialCase {
+ SPC_Reciprocal,
+ };
+
+ // SimplifyAction is a poor-man's variant (plus an additional flag) that
+ // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
+ struct SimplifyAction {
+ // Invariant: At most one of these Optionals has a value.
+ Optional<Intrinsic::ID> IID;
+ Optional<Instruction::CastOps> CastOp;
+ Optional<Instruction::BinaryOps> BinaryOp;
+ Optional<SpecialCase> Special;
+
+ FtzRequirementTy FtzRequirement = FTZ_Any;
+
+ SimplifyAction() = default;
+
+ SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
+ : IID(IID), FtzRequirement(FtzReq) {}
+
+ // Cast operations don't have anything to do with FTZ, so we skip that
+ // argument.
+ SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
+
+ SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
+ : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
+
+ SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
+ : Special(Special), FtzRequirement(FtzReq) {}
+ };
+
+ // Try to generate a SimplifyAction describing how to replace our
+ // IntrinsicInstr with target-generic LLVM IR.
+ const SimplifyAction Action = [II]() -> SimplifyAction {
+ switch (II->getIntrinsicID()) {
+
+ // NVVM intrinsics that map directly to LLVM intrinsics.
+ case Intrinsic::nvvm_ceil_d:
+ return {Intrinsic::ceil, FTZ_Any};
+ case Intrinsic::nvvm_ceil_f:
+ return {Intrinsic::ceil, FTZ_MustBeOff};
+ case Intrinsic::nvvm_ceil_ftz_f:
+ return {Intrinsic::ceil, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fabs_d:
+ return {Intrinsic::fabs, FTZ_Any};
+ case Intrinsic::nvvm_fabs_f:
+ return {Intrinsic::fabs, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fabs_ftz_f:
+ return {Intrinsic::fabs, FTZ_MustBeOn};
+ case Intrinsic::nvvm_floor_d:
+ return {Intrinsic::floor, FTZ_Any};
+ case Intrinsic::nvvm_floor_f:
+ return {Intrinsic::floor, FTZ_MustBeOff};
+ case Intrinsic::nvvm_floor_ftz_f:
+ return {Intrinsic::floor, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fma_rn_d:
+ return {Intrinsic::fma, FTZ_Any};
+ case Intrinsic::nvvm_fma_rn_f:
+ return {Intrinsic::fma, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fma_rn_ftz_f:
+ return {Intrinsic::fma, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmax_d:
+ return {Intrinsic::maxnum, FTZ_Any};
+ case Intrinsic::nvvm_fmax_f:
+ return {Intrinsic::maxnum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmax_ftz_f:
+ return {Intrinsic::maxnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_fmin_d:
+ return {Intrinsic::minnum, FTZ_Any};
+ case Intrinsic::nvvm_fmin_f:
+ return {Intrinsic::minnum, FTZ_MustBeOff};
+ case Intrinsic::nvvm_fmin_ftz_f:
+ return {Intrinsic::minnum, FTZ_MustBeOn};
+ case Intrinsic::nvvm_round_d:
+ return {Intrinsic::round, FTZ_Any};
+ case Intrinsic::nvvm_round_f:
+ return {Intrinsic::round, FTZ_MustBeOff};
+ case Intrinsic::nvvm_round_ftz_f:
+ return {Intrinsic::round, FTZ_MustBeOn};
+ case Intrinsic::nvvm_sqrt_rn_d:
+ return {Intrinsic::sqrt, FTZ_Any};
+ case Intrinsic::nvvm_sqrt_f:
+ // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
+ // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
+ // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
+ // the versions with explicit ftz-ness.
+ return {Intrinsic::sqrt, FTZ_Any};
+ case Intrinsic::nvvm_sqrt_rn_f:
+ return {Intrinsic::sqrt, FTZ_MustBeOff};
+ case Intrinsic::nvvm_sqrt_rn_ftz_f:
+ return {Intrinsic::sqrt, FTZ_MustBeOn};
+ case Intrinsic::nvvm_trunc_d:
+ return {Intrinsic::trunc, FTZ_Any};
+ case Intrinsic::nvvm_trunc_f:
+ return {Intrinsic::trunc, FTZ_MustBeOff};
+ case Intrinsic::nvvm_trunc_ftz_f:
+ return {Intrinsic::trunc, FTZ_MustBeOn};
+
+ // NVVM intrinsics that map to LLVM cast operations.
+ //
+ // Note that llvm's target-generic conversion operators correspond to the rz
+ // (round to zero) versions of the nvvm conversion intrinsics, even though
+ // most everything else here uses the rn (round to nearest even) nvvm ops.
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz:
+ return {Instruction::FPToSI};
+ case Intrinsic::nvvm_d2ui_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ return {Instruction::FPToUI};
+ case Intrinsic::nvvm_i2d_rz:
+ case Intrinsic::nvvm_i2f_rz:
+ case Intrinsic::nvvm_ll2d_rz:
+ case Intrinsic::nvvm_ll2f_rz:
+ return {Instruction::SIToFP};
+ case Intrinsic::nvvm_ui2d_rz:
+ case Intrinsic::nvvm_ui2f_rz:
+ case Intrinsic::nvvm_ull2d_rz:
+ case Intrinsic::nvvm_ull2f_rz:
+ return {Instruction::UIToFP};
+
+ // NVVM intrinsics that map to LLVM binary ops.
+ case Intrinsic::nvvm_add_rn_d:
+ return {Instruction::FAdd, FTZ_Any};
+ case Intrinsic::nvvm_add_rn_f:
+ return {Instruction::FAdd, FTZ_MustBeOff};
+ case Intrinsic::nvvm_add_rn_ftz_f:
+ return {Instruction::FAdd, FTZ_MustBeOn};
+ case Intrinsic::nvvm_mul_rn_d:
+ return {Instruction::FMul, FTZ_Any};
+ case Intrinsic::nvvm_mul_rn_f:
+ return {Instruction::FMul, FTZ_MustBeOff};
+ case Intrinsic::nvvm_mul_rn_ftz_f:
+ return {Instruction::FMul, FTZ_MustBeOn};
+ case Intrinsic::nvvm_div_rn_d:
+ return {Instruction::FDiv, FTZ_Any};
+ case Intrinsic::nvvm_div_rn_f:
+ return {Instruction::FDiv, FTZ_MustBeOff};
+ case Intrinsic::nvvm_div_rn_ftz_f:
+ return {Instruction::FDiv, FTZ_MustBeOn};
+
+ // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
+ // need special handling.
+ //
+ // We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just
+ // as well.
+ case Intrinsic::nvvm_rcp_rn_d:
+ return {SPC_Reciprocal, FTZ_Any};
+ case Intrinsic::nvvm_rcp_rn_f:
+ return {SPC_Reciprocal, FTZ_MustBeOff};
+ case Intrinsic::nvvm_rcp_rn_ftz_f:
+ return {SPC_Reciprocal, FTZ_MustBeOn};
+
+ // We do not currently simplify intrinsics that give an approximate answer.
+ // These include:
+ //
+ // - nvvm_cos_approx_{f,ftz_f}
+ // - nvvm_ex2_approx_{d,f,ftz_f}
+ // - nvvm_lg2_approx_{d,f,ftz_f}
+ // - nvvm_sin_approx_{f,ftz_f}
+ // - nvvm_sqrt_approx_{f,ftz_f}
+ // - nvvm_rsqrt_approx_{d,f,ftz_f}
+ // - nvvm_div_approx_{ftz_d,ftz_f,f}
+ // - nvvm_rcp_approx_ftz_d
+ //
+ // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
+ // means that fastmath is enabled in the intrinsic. Unfortunately only
+ // binary operators (currently) have a fastmath bit in SelectionDAG, so this
+ // information gets lost and we can't select on it.
+ //
+ // TODO: div and rcp are lowered to a binary op, so these we could in theory
+ // lower them to "fast fdiv".
+
+ default:
+ return {};
+ }
+ }();
+
+ // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
+ // can bail out now. (Notice that in the case that IID is not an NVVM
+ // intrinsic, we don't have to look up any module metadata, as
+ // FtzRequirementTy will be FTZ_Any.)
+ if (Action.FtzRequirement != FTZ_Any) {
+ bool FtzEnabled =
+ II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
+ "true";
+
+ if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
+ return nullptr;
+ }
+
+ // Simplify to target-generic intrinsic.
+ if (Action.IID) {
+ SmallVector<Value *, 4> Args(II->arg_operands());
+ // All the target-generic intrinsics currently of interest to us have one
+ // type argument, equal to that of the nvvm intrinsic's argument.
+ Type *Tys[] = {II->getArgOperand(0)->getType()};
+ return CallInst::Create(
+ Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
+ }
+
+ // Simplify to target-generic binary op.
+ if (Action.BinaryOp)
+ return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
+ II->getArgOperand(1), II->getName());
+
+ // Simplify to target-generic cast op.
+ if (Action.CastOp)
+ return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
+ II->getName());
+
+ // All that's left are the special cases.
+ if (!Action.Special)
+ return nullptr;
+
+ switch (*Action.Special) {
+ case SPC_Reciprocal:
+ // Simplify reciprocal.
+ return BinaryOperator::Create(
+ Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
+ II->getArgOperand(0), II->getName());
+ }
+ llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
+}
+
Instruction *InstCombiner::visitVAStartInst(VAStartInst &I) {
removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
return nullptr;
@@ -1462,6 +1891,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}
+ if (auto *AMI = dyn_cast<ElementAtomicMemCpyInst>(II)) {
+ if (Constant *C = dyn_cast<Constant>(AMI->getNumElements()))
+ if (C->isNullValue())
+ return eraseInstFromFunction(*AMI);
+
+ if (Instruction *I = SimplifyElementAtomicMemCpy(AMI))
+ return I;
+ }
+
+ if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
+ return I;
+
auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
unsigned DemandedWidth) {
APInt UndefElts(Width, 0);
@@ -1581,8 +2022,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, V);
break;
}
- case Intrinsic::fma:
case Intrinsic::fmuladd: {
+ // Canonicalize fast fmuladd to the separate fmul + fadd.
+ if (II->hasUnsafeAlgebra()) {
+ BuilderTy::FastMathFlagGuard Guard(*Builder);
+ Builder->setFastMathFlags(II->getFastMathFlags());
+ Value *Mul = Builder->CreateFMul(II->getArgOperand(0),
+ II->getArgOperand(1));
+ Value *Add = Builder->CreateFAdd(Mul, II->getArgOperand(2));
+ Add->takeName(II);
+ return replaceInstUsesWith(*II, Add);
+ }
+
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::fma: {
Value *Src0 = II->getArgOperand(0);
Value *Src1 = II->getArgOperand(1);
@@ -1631,6 +2085,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return SelectInst::Create(Cond, Call0, Call1);
}
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::ceil:
+ case Intrinsic::floor:
+ case Intrinsic::round:
+ case Intrinsic::nearbyint:
+ case Intrinsic::rint:
+ case Intrinsic::trunc: {
+ Value *ExtSrc;
+ if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc))) &&
+ II->getArgOperand(0)->hasOneUse()) {
+ // fabs (fpext x) -> fpext (fabs x)
+ Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(),
+ { ExtSrc->getType() });
+ CallInst *NewFabs = Builder->CreateCall(F, ExtSrc);
+ NewFabs->copyFastMathFlags(II);
+ NewFabs->takeName(II);
+ return new FPExtInst(NewFabs, II->getType());
+ }
+
break;
}
case Intrinsic::cos:
@@ -1863,6 +2337,37 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return II;
break;
}
+ case Intrinsic::x86_avx512_mask_cmp_pd_128:
+ case Intrinsic::x86_avx512_mask_cmp_pd_256:
+ case Intrinsic::x86_avx512_mask_cmp_pd_512:
+ case Intrinsic::x86_avx512_mask_cmp_ps_128:
+ case Intrinsic::x86_avx512_mask_cmp_ps_256:
+ case Intrinsic::x86_avx512_mask_cmp_ps_512: {
+ // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ bool Arg0IsZero = match(Arg0, m_Zero());
+ if (Arg0IsZero)
+ std::swap(Arg0, Arg1);
+ Value *A, *B;
+ // This fold requires only the NINF(not +/- inf) since inf minus
+ // inf is nan.
+ // NSZ(No Signed Zeros) is not needed because zeros of any sign are
+ // equal for both compares.
+ // NNAN is not needed because nans compare the same for both compares.
+ // The compare intrinsic uses the above assumptions and therefore
+ // doesn't require additional flags.
+ if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
+ match(Arg1, m_Zero()) &&
+ cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
+ if (Arg0IsZero)
+ std::swap(A, B);
+ II->setArgOperand(0, A);
+ II->setArgOperand(1, B);
+ return II;
+ }
+ break;
+ }
case Intrinsic::x86_avx512_mask_add_ps_512:
case Intrinsic::x86_avx512_mask_div_ps_512:
@@ -2130,6 +2635,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_avx2_pmulu_dq:
case Intrinsic::x86_avx512_pmul_dq_512:
case Intrinsic::x86_avx512_pmulu_dq_512: {
+ if (Value *V = simplifyX86muldq(*II, *Builder))
+ return replaceInstUsesWith(*II, V);
+
unsigned VWidth = II->getType()->getVectorNumElements();
APInt UndefElts(VWidth, 0);
APInt DemandedElts = APInt::getAllOnesValue(VWidth);
@@ -2141,6 +2649,64 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packsswb:
+ case Intrinsic::x86_avx512_packssdw_512:
+ case Intrinsic::x86_avx512_packsswb_512:
+ if (Value *V = simplifyX86pack(*II, *this, *Builder, true))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_sse2_packuswb_128:
+ case Intrinsic::x86_sse41_packusdw:
+ case Intrinsic::x86_avx2_packusdw:
+ case Intrinsic::x86_avx2_packuswb:
+ case Intrinsic::x86_avx512_packusdw_512:
+ case Intrinsic::x86_avx512_packuswb_512:
+ if (Value *V = simplifyX86pack(*II, *this, *Builder, false))
+ return replaceInstUsesWith(*II, V);
+ break;
+
+ case Intrinsic::x86_pclmulqdq: {
+ if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ unsigned Imm = C->getZExtValue();
+
+ bool MadeChange = false;
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ unsigned VWidth = Arg0->getType()->getVectorNumElements();
+ APInt DemandedElts(VWidth, 0);
+
+ APInt UndefElts1(VWidth, 0);
+ DemandedElts = (Imm & 0x01) ? 2 : 1;
+ if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts,
+ UndefElts1)) {
+ II->setArgOperand(0, V);
+ MadeChange = true;
+ }
+
+ APInt UndefElts2(VWidth, 0);
+ DemandedElts = (Imm & 0x10) ? 2 : 1;
+ if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts,
+ UndefElts2)) {
+ II->setArgOperand(1, V);
+ MadeChange = true;
+ }
+
+ // If both input elements are undef, the result is undef.
+ if (UndefElts1[(Imm & 0x01) ? 1 : 0] ||
+ UndefElts2[(Imm & 0x10) ? 1 : 0])
+ return replaceInstUsesWith(*II,
+ ConstantAggregateZero::get(II->getType()));
+
+ if (MadeChange)
+ return II;
+ }
+ break;
+ }
+
case Intrinsic::x86_sse41_insertps:
if (Value *V = simplifyX86insertps(*II, *Builder))
return replaceInstUsesWith(*II, V);
@@ -2531,9 +3097,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
-
case Intrinsic::amdgcn_rcp: {
- if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
+ Value *Src = II->getArgOperand(0);
+
+ // TODO: Move to ConstantFolding/InstSimplify?
+ if (isa<UndefValue>(Src))
+ return replaceInstUsesWith(CI, Src);
+
+ if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
const APFloat &ArgVal = C->getValueAPF();
APFloat Val(ArgVal.getSemantics(), 1.0);
APFloat::opStatus Status = Val.divide(ArgVal,
@@ -2546,6 +3117,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::amdgcn_rsq: {
+ Value *Src = II->getArgOperand(0);
+
+ // TODO: Move to ConstantFolding/InstSimplify?
+ if (isa<UndefValue>(Src))
+ return replaceInstUsesWith(CI, Src);
+ break;
+ }
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp: {
Value *Src = II->getArgOperand(0);
@@ -2650,6 +3229,274 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
}
+ case Intrinsic::amdgcn_cvt_pkrtz: {
+ Value *Src0 = II->getArgOperand(0);
+ Value *Src1 = II->getArgOperand(1);
+ if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
+ if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
+ const fltSemantics &HalfSem
+ = II->getType()->getScalarType()->getFltSemantics();
+ bool LosesInfo;
+ APFloat Val0 = C0->getValueAPF();
+ APFloat Val1 = C1->getValueAPF();
+ Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
+ Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
+
+ Constant *Folded = ConstantVector::get({
+ ConstantFP::get(II->getContext(), Val0),
+ ConstantFP::get(II->getContext(), Val1) });
+ return replaceInstUsesWith(*II, Folded);
+ }
+ }
+
+ if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
+ return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
+
+ break;
+ }
+ case Intrinsic::amdgcn_ubfe:
+ case Intrinsic::amdgcn_sbfe: {
+ // Decompose simple cases into standard shifts.
+ Value *Src = II->getArgOperand(0);
+ if (isa<UndefValue>(Src))
+ return replaceInstUsesWith(*II, Src);
+
+ unsigned Width;
+ Type *Ty = II->getType();
+ unsigned IntSize = Ty->getIntegerBitWidth();
+
+ ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
+ if (CWidth) {
+ Width = CWidth->getZExtValue();
+ if ((Width & (IntSize - 1)) == 0)
+ return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
+
+ if (Width >= IntSize) {
+ // Hardware ignores high bits, so remove those.
+ II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
+ Width & (IntSize - 1)));
+ return II;
+ }
+ }
+
+ unsigned Offset;
+ ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
+ if (COffset) {
+ Offset = COffset->getZExtValue();
+ if (Offset >= IntSize) {
+ II->setArgOperand(1, ConstantInt::get(COffset->getType(),
+ Offset & (IntSize - 1)));
+ return II;
+ }
+ }
+
+ bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
+
+ // TODO: Also emit sub if only width is constant.
+ if (!CWidth && COffset && Offset == 0) {
+ Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
+ Value *ShiftVal = Builder->CreateSub(KSize, II->getArgOperand(2));
+ ShiftVal = Builder->CreateZExt(ShiftVal, II->getType());
+
+ Value *Shl = Builder->CreateShl(Src, ShiftVal);
+ Value *RightShift = Signed ?
+ Builder->CreateAShr(Shl, ShiftVal) :
+ Builder->CreateLShr(Shl, ShiftVal);
+ RightShift->takeName(II);
+ return replaceInstUsesWith(*II, RightShift);
+ }
+
+ if (!CWidth || !COffset)
+ break;
+
+ // TODO: This allows folding to undef when the hardware has specific
+ // behavior?
+ if (Offset + Width < IntSize) {
+ Value *Shl = Builder->CreateShl(Src, IntSize - Offset - Width);
+ Value *RightShift = Signed ?
+ Builder->CreateAShr(Shl, IntSize - Width) :
+ Builder->CreateLShr(Shl, IntSize - Width);
+ RightShift->takeName(II);
+ return replaceInstUsesWith(*II, RightShift);
+ }
+
+ Value *RightShift = Signed ?
+ Builder->CreateAShr(Src, Offset) :
+ Builder->CreateLShr(Src, Offset);
+
+ RightShift->takeName(II);
+ return replaceInstUsesWith(*II, RightShift);
+ }
+ case Intrinsic::amdgcn_exp:
+ case Intrinsic::amdgcn_exp_compr: {
+ ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
+ if (!En) // Illegal.
+ break;
+
+ unsigned EnBits = En->getZExtValue();
+ if (EnBits == 0xf)
+ break; // All inputs enabled.
+
+ bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
+ bool Changed = false;
+ for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
+ if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
+ (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
+ Value *Src = II->getArgOperand(I + 2);
+ if (!isa<UndefValue>(Src)) {
+ II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
+ Changed = true;
+ }
+ }
+ }
+
+ if (Changed)
+ return II;
+
+ break;
+
+ }
+ case Intrinsic::amdgcn_fmed3: {
+ // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
+ // for the shader.
+
+ Value *Src0 = II->getArgOperand(0);
+ Value *Src1 = II->getArgOperand(1);
+ Value *Src2 = II->getArgOperand(2);
+
+ bool Swap = false;
+ // Canonicalize constants to RHS operands.
+ //
+ // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
+ std::swap(Src1, Src2);
+ Swap = true;
+ }
+
+ if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+ std::swap(Src0, Src1);
+ Swap = true;
+ }
+
+ if (Swap) {
+ II->setArgOperand(0, Src0);
+ II->setArgOperand(1, Src1);
+ II->setArgOperand(2, Src2);
+ return II;
+ }
+
+ if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
+ CallInst *NewCall = Builder->CreateMinNum(Src0, Src1);
+ NewCall->copyFastMathFlags(II);
+ NewCall->takeName(II);
+ return replaceInstUsesWith(*II, NewCall);
+ }
+
+ if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
+ if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
+ if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
+ APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
+ C2->getValueAPF());
+ return replaceInstUsesWith(*II,
+ ConstantFP::get(Builder->getContext(), Result));
+ }
+ }
+ }
+
+ break;
+ }
+ case Intrinsic::amdgcn_icmp:
+ case Intrinsic::amdgcn_fcmp: {
+ const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
+ if (!CC)
+ break;
+
+ // Guard against invalid arguments.
+ int64_t CCVal = CC->getZExtValue();
+ bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
+ if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
+ CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
+ (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
+ CCVal > CmpInst::LAST_FCMP_PREDICATE)))
+ break;
+
+ Value *Src0 = II->getArgOperand(0);
+ Value *Src1 = II->getArgOperand(1);
+
+ if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
+ if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
+ Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
+ return replaceInstUsesWith(*II,
+ ConstantExpr::getSExt(CCmp, II->getType()));
+ }
+
+ // Canonicalize constants to RHS.
+ CmpInst::Predicate SwapPred
+ = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
+ II->setArgOperand(0, Src1);
+ II->setArgOperand(1, Src0);
+ II->setArgOperand(2, ConstantInt::get(CC->getType(),
+ static_cast<int>(SwapPred)));
+ return II;
+ }
+
+ if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
+ break;
+
+ // Canonicalize compare eq with true value to compare != 0
+ // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
+ // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
+ // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
+ // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
+ Value *ExtSrc;
+ if (CCVal == CmpInst::ICMP_EQ &&
+ ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
+ (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
+ ExtSrc->getType()->isIntegerTy(1)) {
+ II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
+ II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
+ return II;
+ }
+
+ CmpInst::Predicate SrcPred;
+ Value *SrcLHS;
+ Value *SrcRHS;
+
+ // Fold compare eq/ne with 0 from a compare result as the predicate to the
+ // intrinsic. The typical use is a wave vote function in the library, which
+ // will be fed from a user code condition compared with 0. Fold in the
+ // redundant compare.
+
+ // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
+ // -> llvm.amdgcn.[if]cmp(a, b, pred)
+ //
+ // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
+ // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
+ if (match(Src1, m_Zero()) &&
+ match(Src0,
+ m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
+ if (CCVal == CmpInst::ICMP_EQ)
+ SrcPred = CmpInst::getInversePredicate(SrcPred);
+
+ Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
+ Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
+
+ Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
+ SrcLHS->getType());
+ Value *Args[] = { SrcLHS, SrcRHS,
+ ConstantInt::get(CC->getType(), SrcPred) };
+ CallInst *NewCall = Builder->CreateCall(NewF, Args);
+ NewCall->takeName(II);
+ return replaceInstUsesWith(*II, NewCall);
+ }
+
+ break;
+ }
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
@@ -2790,7 +3637,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// isKnownNonNull -> nonnull attribute
if (isKnownNonNullAt(DerivedPtr, II, &DT))
- II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+ II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
}
// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
@@ -2799,11 +3646,38 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
break;
}
- }
+ case Intrinsic::experimental_guard: {
+ // Is this guard followed by another guard?
+ Instruction *NextInst = II->getNextNode();
+ Value *NextCond = nullptr;
+ if (match(NextInst,
+ m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
+ Value *CurrCond = II->getArgOperand(0);
+
+ // Remove a guard that it is immediately preceded by an identical guard.
+ if (CurrCond == NextCond)
+ return eraseInstFromFunction(*NextInst);
+
+ // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
+ II->setArgOperand(0, Builder->CreateAnd(CurrCond, NextCond));
+ return eraseInstFromFunction(*NextInst);
+ }
+ break;
+ }
+ }
return visitCallSite(II);
}
+// Fence instruction simplification
+Instruction *InstCombiner::visitFenceInst(FenceInst &FI) {
+ // Remove identical consecutive fences.
+ if (auto *NFI = dyn_cast<FenceInst>(FI.getNextNode()))
+ if (FI.isIdenticalTo(NFI))
+ return eraseInstFromFunction(FI);
+ return nullptr;
+}
+
// InvokeInst simplification
//
Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
@@ -2950,7 +3824,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
for (Value *V : CS.args()) {
if (V->getType()->isPointerTy() &&
- !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
isKnownNonNullAt(V, CS.getInstruction(), &DT))
Indices.push_back(ArgNo + 1);
ArgNo++;
@@ -2959,7 +3833,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
assert(ArgNo == CS.arg_size() && "sanity check");
if (!Indices.empty()) {
- AttributeSet AS = CS.getAttributes();
+ AttributeList AS = CS.getAttributes();
LLVMContext &Ctx = CS.getInstruction()->getContext();
AS = AS.addAttribute(Ctx, Indices,
Attribute::get(Ctx, Attribute::NonNull));
@@ -3081,7 +3955,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
return false;
Instruction *Caller = CS.getInstruction();
- const AttributeSet &CallerPAL = CS.getAttributes();
+ const AttributeList &CallerPAL = CS.getAttributes();
// Okay, this is a cast from a function to a different type. Unless doing so
// would cause a type conversion of one of our arguments, change this call to
@@ -3108,7 +3982,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
}
if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
- AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+ AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
return false; // Attribute not compatible with transformed value.
}
@@ -3149,8 +4023,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
return false; // Cannot transform this parameter value.
- if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1).
- overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
+ if (AttrBuilder(CallerPAL.getParamAttributes(i))
+ .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
if (CS.isInAllocaArgument(i))
@@ -3158,9 +4032,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
- if (ParamTy != ActTy &&
- CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
- Attribute::ByVal)) {
+ if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
@@ -3205,7 +4077,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
break;
// Check if it has an attribute that's incompatible with varargs.
- AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1);
+ AttributeList PAttrs = CallerPAL.getSlotAttributes(i - 1);
if (PAttrs.hasAttribute(Index, Attribute::StructRet))
return false;
}
@@ -3213,44 +4085,37 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Okay, we decided that this is a safe thing to do: go ahead and start
// inserting cast instructions as necessary.
- std::vector<Value*> Args;
+ SmallVector<Value *, 8> Args;
+ SmallVector<AttributeSet, 8> ArgAttrs;
Args.reserve(NumActualArgs);
- SmallVector<AttributeSet, 8> attrVec;
- attrVec.reserve(NumCommonArgs);
+ ArgAttrs.reserve(NumActualArgs);
// Get any return attributes.
- AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex);
+ AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
// If the return value is not being used, the type may not be compatible
// with the existing attributes. Wipe out any problematic attributes.
RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
- // Add the new return attributes.
- if (RAttrs.hasAttributes())
- attrVec.push_back(AttributeSet::get(Caller->getContext(),
- AttributeSet::ReturnIndex, RAttrs));
-
AI = CS.arg_begin();
for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
Type *ParamTy = FT->getParamType(i);
- if ((*AI)->getType() == ParamTy) {
- Args.push_back(*AI);
- } else {
- Args.push_back(Builder->CreateBitOrPointerCast(*AI, ParamTy));
- }
+ Value *NewArg = *AI;
+ if ((*AI)->getType() != ParamTy)
+ NewArg = Builder->CreateBitOrPointerCast(*AI, ParamTy);
+ Args.push_back(NewArg);
// Add any parameter attributes.
- AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
- if (PAttrs.hasAttributes())
- attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1,
- PAttrs));
+ ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
}
// If the function takes more arguments than the call was taking, add them
// now.
- for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+ for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+ ArgAttrs.push_back(AttributeSet());
+ }
// If we are removing arguments to the function, emit an obnoxious warning.
if (FT->getNumParams() < NumActualArgs) {
@@ -3259,54 +4124,56 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Add all of the arguments in their promoted form to the arg list.
for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
Type *PTy = getPromotedType((*AI)->getType());
+ Value *NewArg = *AI;
if (PTy != (*AI)->getType()) {
// Must promote to pass through va_arg area!
Instruction::CastOps opcode =
CastInst::getCastOpcode(*AI, false, PTy, false);
- Args.push_back(Builder->CreateCast(opcode, *AI, PTy));
- } else {
- Args.push_back(*AI);
+ NewArg = Builder->CreateCast(opcode, *AI, PTy);
}
+ Args.push_back(NewArg);
// Add any parameter attributes.
- AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1);
- if (PAttrs.hasAttributes())
- attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1,
- PAttrs));
+ ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
}
}
}
AttributeSet FnAttrs = CallerPAL.getFnAttributes();
- if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex))
- attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs));
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
- const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
- attrVec);
+ assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
+ "missing argument attributes");
+ LLVMContext &Ctx = Callee->getContext();
+ AttributeList NewCallerPAL = AttributeList::get(
+ Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
- Instruction *NC;
+ CallSite NewCS;
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
- NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
- Args, OpBundles);
- NC->takeName(II);
- cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
- cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+ NewCS = Builder->CreateInvoke(Callee, II->getNormalDest(),
+ II->getUnwindDest(), Args, OpBundles);
} else {
- CallInst *CI = cast<CallInst>(Caller);
- NC = Builder->CreateCall(Callee, Args, OpBundles);
- NC->takeName(CI);
- cast<CallInst>(NC)->setTailCallKind(CI->getTailCallKind());
- cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
- cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+ NewCS = Builder->CreateCall(Callee, Args, OpBundles);
+ cast<CallInst>(NewCS.getInstruction())
+ ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
}
+ NewCS->takeName(Caller);
+ NewCS.setCallingConv(CS.getCallingConv());
+ NewCS.setAttributes(NewCallerPAL);
+
+ // Preserve the weight metadata for the new call instruction. The metadata
+ // is used by SamplePGO to check callsite's hotness.
+ uint64_t W;
+ if (Caller->extractProfTotalWeight(W))
+ NewCS->setProfWeight(W);
// Insert a cast of the return type as necessary.
+ Instruction *NC = NewCS.getInstruction();
Value *NV = NC;
if (OldRetTy != NV->getType() && !Caller->use_empty()) {
if (!NV->getType()->isVoidTy()) {
@@ -3351,7 +4218,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
Value *Callee = CS.getCalledValue();
PointerType *PTy = cast<PointerType>(Callee->getType());
FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
- const AttributeSet &Attrs = CS.getAttributes();
+ AttributeList Attrs = CS.getAttributes();
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
@@ -3364,50 +4231,46 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
- const AttributeSet &NestAttrs = NestF->getAttributes();
+ AttributeList NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
- unsigned NestIdx = 1;
+ unsigned NestArgNo = 0;
Type *NestTy = nullptr;
AttributeSet NestAttr;
// Look for a parameter marked with the 'nest' attribute.
for (FunctionType::param_iterator I = NestFTy->param_begin(),
- E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
- if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) {
+ E = NestFTy->param_end();
+ I != E; ++NestArgNo, ++I) {
+ AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
+ if (AS.hasAttribute(Attribute::Nest)) {
// Record the parameter type and any other attributes.
NestTy = *I;
- NestAttr = NestAttrs.getParamAttributes(NestIdx);
+ NestAttr = AS;
break;
}
+ }
if (NestTy) {
Instruction *Caller = CS.getInstruction();
std::vector<Value*> NewArgs;
+ std::vector<AttributeSet> NewArgAttrs;
NewArgs.reserve(CS.arg_size() + 1);
-
- SmallVector<AttributeSet, 8> NewAttrs;
- NewAttrs.reserve(Attrs.getNumSlots() + 1);
+ NewArgAttrs.reserve(CS.arg_size());
// Insert the nest argument into the call argument list, which may
// mean appending it. Likewise for attributes.
- // Add any result attributes.
- if (Attrs.hasAttributes(AttributeSet::ReturnIndex))
- NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
- Attrs.getRetAttributes()));
-
{
- unsigned Idx = 1;
+ unsigned ArgNo = 0;
CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
do {
- if (Idx == NestIdx) {
+ if (ArgNo == NestArgNo) {
// Add the chain argument and attributes.
Value *NestVal = Tramp->getArgOperand(2);
if (NestVal->getType() != NestTy)
NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest");
NewArgs.push_back(NestVal);
- NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
- NestAttr));
+ NewArgAttrs.push_back(NestAttr);
}
if (I == E)
@@ -3415,23 +4278,13 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Add the original argument and attributes.
NewArgs.push_back(*I);
- AttributeSet Attr = Attrs.getParamAttributes(Idx);
- if (Attr.hasAttributes(Idx)) {
- AttrBuilder B(Attr, Idx);
- NewAttrs.push_back(AttributeSet::get(Caller->getContext(),
- Idx + (Idx >= NestIdx), B));
- }
+ NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
- ++Idx;
+ ++ArgNo;
++I;
} while (true);
}
- // Add any function attributes.
- if (Attrs.hasAttributes(AttributeSet::FunctionIndex))
- NewAttrs.push_back(AttributeSet::get(FTy->getContext(),
- Attrs.getFnAttributes()));
-
// The trampoline may have been bitcast to a bogus type (FTy).
// Handle this by synthesizing a new function type, equal to FTy
// with the chain parameter inserted.
@@ -3442,12 +4295,12 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Insert the chain's type into the list of parameter types, which may
// mean appending it.
{
- unsigned Idx = 1;
+ unsigned ArgNo = 0;
FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end();
do {
- if (Idx == NestIdx)
+ if (ArgNo == NestArgNo)
// Add the chain's type.
NewTypes.push_back(NestTy);
@@ -3457,7 +4310,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// Add the original type.
NewTypes.push_back(*I);
- ++Idx;
+ ++ArgNo;
++I;
} while (true);
}
@@ -3470,8 +4323,9 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
NestF->getType() == PointerType::getUnqual(NewFTy) ?
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
- const AttributeSet &NewPAL =
- AttributeSet::get(FTy->getContext(), NewAttrs);
+ AttributeList NewPAL =
+ AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
+ Attrs.getRetAttributes(), NewArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
CS.getOperandBundlesAsDefs(OpBundles);
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e74b590e2b7c..25683132c786 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -274,12 +274,12 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return NV;
// If we are casting a PHI, then fold the cast into the PHI.
- if (isa<PHINode>(Src)) {
+ if (auto *PN = dyn_cast<PHINode>(Src)) {
// Don't do this if it would create a PHI node with an illegal type from a
// legal type.
if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
- ShouldChangeType(CI.getType(), Src->getType()))
- if (Instruction *NV = FoldOpIntoPhi(CI))
+ shouldChangeType(CI.getType(), Src->getType()))
+ if (Instruction *NV = foldOpIntoPhi(CI, PN))
return NV;
}
@@ -447,7 +447,7 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) {
Type *SrcTy = Trunc.getSrcTy();
Type *DestTy = Trunc.getType();
- if (isa<IntegerType>(SrcTy) && !ShouldChangeType(SrcTy, DestTy))
+ if (isa<IntegerType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
return nullptr;
BinaryOperator *LogicOp;
@@ -463,6 +463,56 @@ Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) {
return BinaryOperator::Create(LogicOp->getOpcode(), NarrowOp0, NarrowC);
}
+/// Try to narrow the width of a splat shuffle. This could be generalized to any
+/// shuffle with a constant operand, but we limit the transform to avoid
+/// creating a shuffle type that targets may not be able to lower effectively.
+static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
+ InstCombiner::BuilderTy &Builder) {
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
+ if (Shuf && Shuf->hasOneUse() && isa<UndefValue>(Shuf->getOperand(1)) &&
+ Shuf->getMask()->getSplatValue() &&
+ Shuf->getType() == Shuf->getOperand(0)->getType()) {
+ // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Undef, SplatMask
+ Constant *NarrowUndef = UndefValue::get(Trunc.getType());
+ Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
+ return new ShuffleVectorInst(NarrowOp, NarrowUndef, Shuf->getMask());
+ }
+
+ return nullptr;
+}
+
+/// Try to narrow the width of an insert element. This could be generalized for
+/// any vector constant, but we limit the transform to insertion into undef to
+/// avoid potential backend problems from unsupported insertion widths. This
+/// could also be extended to handle the case of inserting a scalar constant
+/// into a vector variable.
+static Instruction *shrinkInsertElt(CastInst &Trunc,
+ InstCombiner::BuilderTy &Builder) {
+ Instruction::CastOps Opcode = Trunc.getOpcode();
+ assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
+ "Unexpected instruction for shrinking");
+
+ auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
+ if (!InsElt || !InsElt->hasOneUse())
+ return nullptr;
+
+ Type *DestTy = Trunc.getType();
+ Type *DestScalarTy = DestTy->getScalarType();
+ Value *VecOp = InsElt->getOperand(0);
+ Value *ScalarOp = InsElt->getOperand(1);
+ Value *Index = InsElt->getOperand(2);
+
+ if (isa<UndefValue>(VecOp)) {
+ // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
+ // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
+ UndefValue *NarrowUndef = UndefValue::get(DestTy);
+ Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
+ return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
@@ -488,7 +538,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
canEvaluateTruncated(Src, DestTy, *this, &CI)) {
// If this cast is a truncate, evaluting in a different type always
@@ -554,8 +604,14 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
if (Instruction *I = shrinkBitwiseLogic(CI))
return I;
+ if (Instruction *I = shrinkSplatShuffle(CI, *Builder))
+ return I;
+
+ if (Instruction *I = shrinkInsertElt(CI, *Builder))
+ return I;
+
if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
- ShouldChangeType(SrcTy, DestTy)) {
+ shouldChangeType(SrcTy, DestTy)) {
// Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
// dest type is native and cst < dest size.
if (match(Src, m_Shl(m_Value(A), m_ConstantInt(Cst))) &&
@@ -838,11 +894,6 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
if (Instruction *Result = commonCastTransforms(CI))
return Result;
- // See if we can simplify any instructions used by the input whose sole
- // purpose is to compute bits we don't care about.
- if (SimplifyDemandedInstructionBits(CI))
- return &CI;
-
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
@@ -851,10 +902,10 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
unsigned BitsToClear;
- if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
- assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
- "Unreasonable BitsToClear");
+ assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
+ "Can't clear more bits than in SrcTy");
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1124,11 +1175,6 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
- // See if we can simplify any instructions used by the input whose sole
- // purpose is to compute bits we don't care about.
- if (SimplifyDemandedInstructionBits(CI))
- return &CI;
-
Value *Src = CI.getOperand(0);
Type *SrcTy = Src->getType(), *DestTy = CI.getType();
@@ -1145,7 +1191,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
canEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1167,18 +1213,16 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
ShAmt);
}
- // If this input is a trunc from our destination, then turn sext(trunc(x))
+ // If the input is a trunc from the destination type, then turn sext(trunc(x))
// into shifts.
- if (TruncInst *TI = dyn_cast<TruncInst>(Src))
- if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
- uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
- uint32_t DestBitSize = DestTy->getScalarSizeInBits();
-
- // We need to emit a shl + ashr to do the sign extend.
- Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
- Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
- return BinaryOperator::CreateAShr(Res, ShAmt);
- }
+ Value *X;
+ if (match(Src, m_OneUse(m_Trunc(m_Value(X)))) && X->getType() == DestTy) {
+ // sext(trunc(X)) --> ashr(shl(X, C), C)
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
+ Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
+ return BinaryOperator::CreateAShr(Builder->CreateShl(X, ShAmt), ShAmt);
+ }
if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
return transformSExtICmp(ICI, CI);
@@ -1225,17 +1269,15 @@ static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
return nullptr;
}
-/// If this is a floating-point extension instruction, look
-/// through it until we get the source value.
+/// Look through floating-point extensions until we get the source value.
static Value *lookThroughFPExtensions(Value *V) {
- if (Instruction *I = dyn_cast<Instruction>(V))
- if (I->getOpcode() == Instruction::FPExt)
- return lookThroughFPExtensions(I->getOperand(0));
+ while (auto *FPExt = dyn_cast<FPExtInst>(V))
+ V = FPExt->getOperand(0);
// If this value is a constant, return the constant in the smallest FP type
// that can accurately represent it. This allows us to turn
// (float)((double)X+2.0) into x+2.0f.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+ if (auto *CFP = dyn_cast<ConstantFP>(V)) {
if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
return V; // No constant folding of this.
// See if the value can be truncated to half and then reextended.
@@ -1392,24 +1434,49 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI.getOperand(0));
if (II) {
switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::fabs: {
- // (fptrunc (fabs x)) -> (fabs (fptrunc x))
- Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
- CI.getType());
- Type *IntrinsicType[] = { CI.getType() };
- Function *Overload = Intrinsic::getDeclaration(
- CI.getModule(), II->getIntrinsicID(), IntrinsicType);
-
- SmallVector<OperandBundleDef, 1> OpBundles;
- II->getOperandBundlesAsDefs(OpBundles);
-
- Value *Args[] = { InnerTrunc };
- return CallInst::Create(Overload, Args, OpBundles, II->getName());
+ default: break;
+ case Intrinsic::fabs:
+ case Intrinsic::ceil:
+ case Intrinsic::floor:
+ case Intrinsic::rint:
+ case Intrinsic::round:
+ case Intrinsic::nearbyint:
+ case Intrinsic::trunc: {
+ Value *Src = II->getArgOperand(0);
+ if (!Src->hasOneUse())
+ break;
+
+ // Except for fabs, this transformation requires the input of the unary FP
+ // operation to be itself an fpext from the type to which we're
+ // truncating.
+ if (II->getIntrinsicID() != Intrinsic::fabs) {
+ FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
+ if (!FPExtSrc || FPExtSrc->getOperand(0)->getType() != CI.getType())
+ break;
}
+
+ // Do unary FP operation on smaller type.
+ // (fptrunc (fabs x)) -> (fabs (fptrunc x))
+ Value *InnerTrunc = Builder->CreateFPTrunc(Src, CI.getType());
+ Type *IntrinsicType[] = { CI.getType() };
+ Function *Overload = Intrinsic::getDeclaration(
+ CI.getModule(), II->getIntrinsicID(), IntrinsicType);
+
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ II->getOperandBundlesAsDefs(OpBundles);
+
+ Value *Args[] = { InnerTrunc };
+ CallInst *NewCI = CallInst::Create(Overload, Args,
+ OpBundles, II->getName());
+ NewCI->copyFastMathFlags(II);
+ return NewCI;
+ }
}
}
+ if (Instruction *I = shrinkInsertElt(CI, *Builder))
+ return I;
+
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 428f94bb5e93..bbafa9e9f468 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -230,7 +230,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
return nullptr;
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
- if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays.
+ // Don't blow up on huge arrays.
+ if (ArrayElementCount > MaxArraySizeForCombine)
+ return nullptr;
// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array.
@@ -1663,7 +1665,7 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
(Cmp.isEquality() || (!C1->isNegative() && !C2->isNegative()))) {
// TODO: Is this a good transform for vectors? Wider types may reduce
// throughput. Should this transform be limited (even for scalars) by using
- // ShouldChangeType()?
+ // shouldChangeType()?
if (!Cmp.getType()->isVectorTy()) {
Type *WideType = W->getType();
unsigned WideScalarBits = WideType->getScalarSizeInBits();
@@ -1792,6 +1794,15 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
ConstantInt::get(V->getType(), 1));
}
+ // X | C == C --> X <=u C
+ // X | C != C --> X >u C
+ // iff C+1 is a power of 2 (C is a bitmask of the low bits)
+ if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
+ (*C + 1).isPowerOf2()) {
+ Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
+ return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
+ }
+
if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse())
return nullptr;
@@ -1914,61 +1925,89 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Shl->getOperand(0);
- if (Cmp.isEquality()) {
- // If the shift is NUW, then it is just shifting out zeros, no need for an
- // AND.
- Constant *LShrC = ConstantInt::get(Shl->getType(), C->lshr(*ShiftAmt));
- if (Shl->hasNoUnsignedWrap())
- return new ICmpInst(Pred, X, LShrC);
-
- // If the shift is NSW and we compare to 0, then it is just shifting out
- // sign bits, no need for an AND either.
- if (Shl->hasNoSignedWrap() && *C == 0)
- return new ICmpInst(Pred, X, LShrC);
-
- if (Shl->hasOneUse()) {
- // Otherwise, strength reduce the shift into an and.
- Constant *Mask = ConstantInt::get(Shl->getType(),
- APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
-
- Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
- return new ICmpInst(Pred, And, LShrC);
+ Type *ShType = Shl->getType();
+
+ // NSW guarantees that we are only shifting out sign bits from the high bits,
+ // so we can ASHR the compare constant without needing a mask and eliminate
+ // the shift.
+ if (Shl->hasNoSignedWrap()) {
+ if (Pred == ICmpInst::ICMP_SGT) {
+ // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
+ APInt ShiftedC = C->ashr(*ShiftAmt);
+ return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
+ }
+ if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) {
+ // This is the same code as the SGT case, but assert the pre-condition
+ // that is needed for this to work with equality predicates.
+ assert(C->ashr(*ShiftAmt).shl(*ShiftAmt) == *C &&
+ "Compare known true or false was not folded");
+ APInt ShiftedC = C->ashr(*ShiftAmt);
+ return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
+ }
+ if (Pred == ICmpInst::ICMP_SLT) {
+ // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
+ // (X << S) <=s C is equiv to X <=s (C >> S) for all C
+ // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
+ // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
+ assert(!C->isMinSignedValue() && "Unexpected icmp slt");
+ APInt ShiftedC = (*C - 1).ashr(*ShiftAmt) + 1;
+ return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
+ }
+ // If this is a signed comparison to 0 and the shift is sign preserving,
+ // use the shift LHS operand instead; isSignTest may change 'Pred', so only
+ // do that if we're sure to not continue on in this function.
+ if (isSignTest(Pred, *C))
+ return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
+ }
+
+ // NUW guarantees that we are only shifting out zero bits from the high bits,
+ // so we can LSHR the compare constant without needing a mask and eliminate
+ // the shift.
+ if (Shl->hasNoUnsignedWrap()) {
+ if (Pred == ICmpInst::ICMP_UGT) {
+ // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
+ APInt ShiftedC = C->lshr(*ShiftAmt);
+ return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
+ }
+ if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) {
+ // This is the same code as the UGT case, but assert the pre-condition
+ // that is needed for this to work with equality predicates.
+ assert(C->lshr(*ShiftAmt).shl(*ShiftAmt) == *C &&
+ "Compare known true or false was not folded");
+ APInt ShiftedC = C->lshr(*ShiftAmt);
+ return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
+ }
+ if (Pred == ICmpInst::ICMP_ULT) {
+ // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
+ // (X << S) <=u C is equiv to X <=u (C >> S) for all C
+ // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
+ // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
+ assert(C->ugt(0) && "ult 0 should have been eliminated");
+ APInt ShiftedC = (*C - 1).lshr(*ShiftAmt) + 1;
+ return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
}
- // If this is a signed comparison to 0 and the shift is sign preserving,
- // use the shift LHS operand instead; isSignTest may change 'Pred', so only
- // do that if we're sure to not continue on in this function.
- if (Shl->hasNoSignedWrap() && isSignTest(Pred, *C))
- return new ICmpInst(Pred, X, Constant::getNullValue(X->getType()));
+ if (Cmp.isEquality() && Shl->hasOneUse()) {
+ // Strength-reduce the shift into an 'and'.
+ Constant *Mask = ConstantInt::get(
+ ShType,
+ APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
+ Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
+ Constant *LShrC = ConstantInt::get(ShType, C->lshr(*ShiftAmt));
+ return new ICmpInst(Pred, And, LShrC);
+ }
// Otherwise, if this is a comparison of the sign bit, simplify to and/test.
bool TrueIfSigned = false;
if (Shl->hasOneUse() && isSignBitCheck(Pred, *C, TrueIfSigned)) {
// (X << 31) <s 0 --> (X & 1) != 0
Constant *Mask = ConstantInt::get(
- X->getType(),
+ ShType,
APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
- And, Constant::getNullValue(And->getType()));
- }
-
- // When the shift is nuw and pred is >u or <=u, comparison only really happens
- // in the pre-shifted bits. Since InstSimplify canonicalizes <=u into <u, the
- // <=u case can be further converted to match <u (see below).
- if (Shl->hasNoUnsignedWrap() &&
- (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT)) {
- // Derivation for the ult case:
- // (X << S) <=u C is equiv to X <=u (C >> S) for all C
- // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
- // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
- assert((Pred != ICmpInst::ICMP_ULT || C->ugt(0)) &&
- "Encountered `ult 0` that should have been eliminated by "
- "InstSimplify.");
- APInt ShiftedC = Pred == ICmpInst::ICMP_ULT ? (*C - 1).lshr(*ShiftAmt) + 1
- : C->lshr(*ShiftAmt);
- return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), ShiftedC));
+ And, Constant::getNullValue(ShType));
}
// Transform (icmp pred iM (shl iM %v, N), C)
@@ -1981,8 +2020,8 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt &&
DL.isLegalInteger(TypeBits - Amt)) {
Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
- if (X->getType()->isVectorTy())
- TruncTy = VectorType::get(TruncTy, X->getType()->getVectorNumElements());
+ if (ShType->isVectorTy())
+ TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements());
Constant *NewC =
ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt));
return new ICmpInst(Pred, Builder->CreateTrunc(X, TruncTy), NewC);
@@ -2342,8 +2381,24 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
// Fold icmp pred (add X, C2), C.
Value *X = Add->getOperand(0);
Type *Ty = Add->getType();
- auto CR =
- ConstantRange::makeExactICmpRegion(Cmp.getPredicate(), *C).subtract(*C2);
+ CmpInst::Predicate Pred = Cmp.getPredicate();
+
+ // If the add does not wrap, we can always adjust the compare by subtracting
+ // the constants. Equality comparisons are handled elsewhere. SGE/SLE are
+ // canonicalized to SGT/SLT.
+ if (Add->hasNoSignedWrap() &&
+ (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) {
+ bool Overflow;
+ APInt NewC = C->ssub_ov(*C2, Overflow);
+ // If there is overflow, the result must be true or false.
+ // TODO: Can we assert there is no overflow because InstSimplify always
+ // handles those cases?
+ if (!Overflow)
+ // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
+ return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
+ }
+
+ auto CR = ConstantRange::makeExactICmpRegion(Pred, *C).subtract(*C2);
const APInt &Upper = CR.getUpper();
const APInt &Lower = CR.getLower();
if (Cmp.isSigned()) {
@@ -2364,16 +2419,14 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
// X+C <u C2 -> (X & -C2) == C
// iff C & (C2-1) == 0
// C2 is a power of 2
- if (Cmp.getPredicate() == ICmpInst::ICMP_ULT && C->isPowerOf2() &&
- (*C2 & (*C - 1)) == 0)
+ if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == 0)
return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateAnd(X, -(*C)),
ConstantExpr::getNeg(cast<Constant>(Y)));
// X+C >u C2 -> (X & ~C2) != C
// iff C & C2 == 0
// C2+1 is a power of 2
- if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() &&
- (*C2 & *C) == 0)
+ if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == 0)
return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateAnd(X, ~(*C)),
ConstantExpr::getNeg(cast<Constant>(Y)));
@@ -2656,7 +2709,7 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
// block. If in the same block, we're encouraging jump threading. If
// not, we are just pessimizing the code by making an i1 phi.
if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = FoldOpIntoPhi(I))
+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
return NV;
break;
case Instruction::Select: {
@@ -2767,12 +2820,6 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
D = BO1->getOperand(1);
}
- // icmp (X+cst) < 0 --> X < -cst
- if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero()))
- if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B))
- if (!RHSC->isMinValue(/*isSigned=*/true))
- return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC));
-
// icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
return new ICmpInst(Pred, A == Op1 ? B : A,
@@ -2847,6 +2894,31 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
+ // TODO: The subtraction-related identities shown below also hold, but
+ // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
+ // wouldn't happen even if they were implemented.
+ //
+ // icmp ult (X - 1), Y -> icmp ule X, Y
+ // icmp uge (X - 1), Y -> icmp ugt X, Y
+ // icmp ugt X, (Y - 1) -> icmp uge X, Y
+ // icmp ule X, (Y - 1) -> icmp ult X, Y
+
+ // icmp ule (X + 1), Y -> icmp ult X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
+
+ // icmp ugt (X + 1), Y -> icmp uge X, Y
+ if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
+ return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
+
+ // icmp uge X, (Y + 1) -> icmp ugt X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
+
+ // icmp ult X, (Y + 1) -> icmp ule X, Y
+ if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
+ return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
+
// if C1 has greater magnitude than C2:
// icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
// s.t. C3 = C1 - C2
@@ -3738,16 +3810,14 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth,
// greater than the RHS must differ in a bit higher than these due to carry.
case ICmpInst::ICMP_UGT: {
unsigned trailingOnes = RHS.countTrailingOnes();
- APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
- return ~lowBitsSet;
+ return APInt::getBitsSetFrom(BitWidth, trailingOnes);
}
// Similarly, for a ULT comparison, we don't care about the trailing zeros.
// Any value less than the RHS must differ in a higher bit because of carries.
case ICmpInst::ICMP_ULT: {
unsigned trailingZeros = RHS.countTrailingZeros();
- APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
- return ~lowBitsSet;
+ return APInt::getBitsSetFrom(BitWidth, trailingZeros);
}
default:
@@ -3887,7 +3957,7 @@ bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
- // The check for the unique predecessor is not the best that can be
+ // The check for the single predecessor is not the best that can be
// done. But it protects efficiently against cases like when SI's
// home block has two successors, Succ and Succ1, and Succ1 predecessor
// of Succ. Then SI can't be replaced by SIOpd because the use that gets
@@ -3895,8 +3965,10 @@ bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
// guarantees that the path all uses of SI (outside SI's parent) are on
// is disjoint from all other paths out of SI. But that information
// is more expensive to compute, and the trade-off here is in favor
- // of compile-time.
- if (Succ->getUniquePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
+ // of compile-time. It should also be noticed that we check for a single
+ // predecessor and not only uniqueness. This to handle the situation when
+ // Succ and Succ1 points to the same basic block.
+ if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
NumSel++;
SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
return true;
@@ -3932,12 +4004,12 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
- if (SimplifyDemandedBits(I.getOperandUse(0),
+ if (SimplifyDemandedBits(&I, 0,
getDemandedBitsLHSMask(I, BitWidth, IsSignBit),
Op0KnownZero, Op0KnownOne, 0))
return &I;
- if (SimplifyDemandedBits(I.getOperandUse(1), APInt::getAllOnesValue(BitWidth),
+ if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
Op1KnownZero, Op1KnownOne, 0))
return &I;
@@ -4801,7 +4873,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
// block. If in the same block, we're encouraging jump threading. If
// not, we are just pessimizing the code by making an i1 phi.
if (LHSI->getParent() == I.getParent())
- if (Instruction *NV = FoldOpIntoPhi(I))
+ if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
return NV;
break;
case Instruction::SIToFP:
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 2847ce858e79..71000063ab3c 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -28,6 +28,9 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/IR/DIBuilder.h"
#define DEBUG_TYPE "instcombine"
@@ -40,21 +43,29 @@ class DbgDeclareInst;
class MemIntrinsic;
class MemSetInst;
-/// \brief Assign a complexity or rank value to LLVM Values.
+/// Assign a complexity or rank value to LLVM Values. This is used to reduce
+/// the amount of pattern matching needed for compares and commutative
+/// instructions. For example, if we have:
+/// icmp ugt X, Constant
+/// or
+/// xor (add X, Constant), cast Z
+///
+/// We do not have to consider the commuted variants of these patterns because
+/// canonicalization based on complexity guarantees the above ordering.
///
/// This routine maps IR values to various complexity ranks:
/// 0 -> undef
/// 1 -> Constants
/// 2 -> Other non-instructions
/// 3 -> Arguments
-/// 3 -> Unary operations
-/// 4 -> Other instructions
+/// 4 -> Cast and (f)neg/not instructions
+/// 5 -> Other instructions
static inline unsigned getComplexity(Value *V) {
if (isa<Instruction>(V)) {
- if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
- BinaryOperator::isNot(V))
- return 3;
- return 4;
+ if (isa<CastInst>(V) || BinaryOperator::isNeg(V) ||
+ BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V))
+ return 4;
+ return 5;
}
if (isa<Argument>(V))
return 3;
@@ -289,6 +300,7 @@ public:
Instruction *visitLoadInst(LoadInst &LI);
Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
+ Instruction *visitFenceInst(FenceInst &FI);
Instruction *visitSwitchInst(SwitchInst &SI);
Instruction *visitReturnInst(ReturnInst &RI);
Instruction *visitInsertValueInst(InsertValueInst &IV);
@@ -313,9 +325,14 @@ public:
bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp,
const unsigned SIOpd);
+ /// Try to replace instruction \p I with value \p V which are pointers
+ /// in different address space.
+ /// \return true if successful.
+ bool replacePointer(Instruction &I, Value *V);
+
private:
- bool ShouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
- bool ShouldChangeType(Type *From, Type *To) const;
+ bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
+ bool shouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
@@ -456,8 +473,9 @@ public:
/// methods should return the value returned by this function.
Instruction *eraseInstFromFunction(Instruction &I) {
DEBUG(dbgs() << "IC: ERASE " << I << '\n');
-
assert(I.use_empty() && "Cannot erase instruction that is used!");
+ salvageDebugInfo(I);
+
// Make sure that we reprocess all operands now that we reduced their
// use counts.
if (I.getNumOperands() < 8) {
@@ -499,6 +517,9 @@ public:
return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
}
+ /// Maximum size of array considered when transforming.
+ uint64_t MaxArraySizeForCombine;
+
private:
/// \brief Performs a few simplifications for operators which are associative
/// or commutative.
@@ -518,8 +539,16 @@ private:
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
Instruction *CxtI);
- bool SimplifyDemandedBits(Use &U, const APInt &DemandedMask, APInt &KnownZero,
+ bool SimplifyDemandedBits(Instruction *I, unsigned Op,
+ const APInt &DemandedMask, APInt &KnownZero,
APInt &KnownOne, unsigned Depth = 0);
+ /// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne
+ /// bits. It also tries to handle simplifications that can be done based on
+ /// DemandedMask, but without modifying the Instruction.
+ Value *SimplifyMultipleUseDemandedBits(Instruction *I,
+ const APInt &DemandedMask,
+ APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth, Instruction *CxtI);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
/// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
@@ -540,7 +569,7 @@ private:
/// Given a binary operator, cast instruction, or select which has a PHI node
/// as operand #0, see if we can fold the instruction into the PHI (which is
/// only possible if all operands to the PHI are constants).
- Instruction *FoldOpIntoPhi(Instruction &I);
+ Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN);
/// Given an instruction with a select as one operand and a constant as the
/// other operand, try to fold the binary operator into the select arguments.
@@ -549,7 +578,7 @@ private:
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// This is a convenience wrapper function for the above two functions.
- Instruction *foldOpWithConstantIntoOperand(Instruction &I);
+ Instruction *foldOpWithConstantIntoOperand(BinaryOperator &I);
/// \brief Try to rotate an operation below a PHI node, using PHI nodes for
/// its operands.
@@ -628,16 +657,16 @@ private:
SelectPatternFlavor SPF2, Value *C);
Instruction *foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
- Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+ Instruction *OptAndOp(BinaryOperator *Op, ConstantInt *OpRHS,
ConstantInt *AndRHS, BinaryOperator &TheAnd);
- Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
- bool isSub, Instruction &I);
Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
bool isSigned, bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
Instruction *MatchBSwap(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+
+ Instruction *SimplifyElementAtomicMemCpy(ElementAtomicMemCpyInst *AMI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
Instruction *SimplifyMemSet(MemSetInst *MI);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 49e516e9c176..6288e054f1bc 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -12,13 +12,15 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -223,6 +225,107 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
return nullptr;
}
+namespace {
+// If I and V are pointers in different address space, it is not allowed to
+// use replaceAllUsesWith since I and V have different types. A
+// non-target-specific transformation should not use addrspacecast on V since
+// the two address space may be disjoint depending on target.
+//
+// This class chases down uses of the old pointer until reaching the load
+// instructions, then replaces the old pointer in the load instructions with
+// the new pointer. If during the chasing it sees bitcast or GEP, it will
+// create new bitcast or GEP with the new pointer and use them in the load
+// instruction.
+class PointerReplacer {
+public:
+ PointerReplacer(InstCombiner &IC) : IC(IC) {}
+ void replacePointer(Instruction &I, Value *V);
+
+private:
+ void findLoadAndReplace(Instruction &I);
+ void replace(Instruction *I);
+ Value *getReplacement(Value *I);
+
+ SmallVector<Instruction *, 4> Path;
+ MapVector<Value *, Value *> WorkMap;
+ InstCombiner &IC;
+};
+} // end anonymous namespace
+
+void PointerReplacer::findLoadAndReplace(Instruction &I) {
+ for (auto U : I.users()) {
+ auto *Inst = dyn_cast<Instruction>(&*U);
+ if (!Inst)
+ return;
+ DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
+ if (isa<LoadInst>(Inst)) {
+ for (auto P : Path)
+ replace(P);
+ replace(Inst);
+ } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
+ Path.push_back(Inst);
+ findLoadAndReplace(*Inst);
+ Path.pop_back();
+ } else {
+ return;
+ }
+ }
+}
+
+Value *PointerReplacer::getReplacement(Value *V) {
+ auto Loc = WorkMap.find(V);
+ if (Loc != WorkMap.end())
+ return Loc->second;
+ return nullptr;
+}
+
+void PointerReplacer::replace(Instruction *I) {
+ if (getReplacement(I))
+ return;
+
+ if (auto *LT = dyn_cast<LoadInst>(I)) {
+ auto *V = getReplacement(LT->getPointerOperand());
+ assert(V && "Operand not replaced");
+ auto *NewI = new LoadInst(V);
+ NewI->takeName(LT);
+ IC.InsertNewInstWith(NewI, *LT);
+ IC.replaceInstUsesWith(*LT, NewI);
+ WorkMap[LT] = NewI;
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ auto *V = getReplacement(GEP->getPointerOperand());
+ assert(V && "Operand not replaced");
+ SmallVector<Value *, 8> Indices;
+ Indices.append(GEP->idx_begin(), GEP->idx_end());
+ auto *NewI = GetElementPtrInst::Create(
+ V->getType()->getPointerElementType(), V, Indices);
+ IC.InsertNewInstWith(NewI, *GEP);
+ NewI->takeName(GEP);
+ WorkMap[GEP] = NewI;
+ } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
+ auto *V = getReplacement(BC->getOperand(0));
+ assert(V && "Operand not replaced");
+ auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
+ V->getType()->getPointerAddressSpace());
+ auto *NewI = new BitCastInst(V, NewT);
+ IC.InsertNewInstWith(NewI, *BC);
+ NewI->takeName(BC);
+ WorkMap[BC] = NewI;
+ } else {
+ llvm_unreachable("should never reach here");
+ }
+}
+
+void PointerReplacer::replacePointer(Instruction &I, Value *V) {
+#ifndef NDEBUG
+ auto *PT = cast<PointerType>(I.getType());
+ auto *NT = cast<PointerType>(V->getType());
+ assert(PT != NT && PT->getElementType() == NT->getElementType() &&
+ "Invalid usage");
+#endif
+ WorkMap[&I] = V;
+ findLoadAndReplace(I);
+}
+
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
if (auto *I = simplifyAllocaArraySize(*this, AI))
return I;
@@ -293,12 +396,22 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
eraseInstFromFunction(*ToDelete[i]);
Constant *TheSrc = cast<Constant>(Copy->getSource());
- Constant *Cast
- = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType());
- Instruction *NewI = replaceInstUsesWith(AI, Cast);
- eraseInstFromFunction(*Copy);
- ++NumGlobalCopies;
- return NewI;
+ auto *SrcTy = TheSrc->getType();
+ auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
+ SrcTy->getPointerAddressSpace());
+ Constant *Cast =
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, DestTy);
+ if (AI.getType()->getPointerAddressSpace() ==
+ SrcTy->getPointerAddressSpace()) {
+ Instruction *NewI = replaceInstUsesWith(AI, Cast);
+ eraseInstFromFunction(*Copy);
+ ++NumGlobalCopies;
+ return NewI;
+ } else {
+ PointerReplacer PtrReplacer(*this);
+ PtrReplacer.replacePointer(AI, Cast);
+ ++NumGlobalCopies;
+ }
}
}
}
@@ -608,7 +721,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
// arrays of arbitrary size but this has a terrible impact on compile time.
// The threshold here is chosen arbitrarily, maybe needs a little bit of
// tuning.
- if (NumElements > 1024)
+ if (NumElements > IC.MaxArraySizeForCombine)
return nullptr;
const DataLayout &DL = IC.getDataLayout();
@@ -1113,7 +1226,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
// arrays of arbitrary size but this has a terrible impact on compile time.
// The threshold here is chosen arbitrarily, maybe needs a little bit of
// tuning.
- if (NumElements > 1024)
+ if (NumElements > IC.MaxArraySizeForCombine)
return false;
const DataLayout &DL = IC.getDataLayout();
@@ -1268,8 +1381,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
break;
}
- // Don't skip over loads or things that can modify memory.
- if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+ // Don't skip over loads, throws or things that can modify memory.
+ if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
break;
}
@@ -1392,8 +1505,8 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
}
// If we find something that may be using or overwriting the stored
// value, or if we run out of instructions, we can't do the xform.
- if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
- BBI == OtherBB->begin())
+ if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
+ BBI->mayWriteToMemory() || BBI == OtherBB->begin())
return false;
}
@@ -1402,7 +1515,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// StoreBB.
for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
// FIXME: This should really be AA driven.
- if (I->mayReadFromMemory() || I->mayWriteToMemory())
+ if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
return false;
}
}
@@ -1425,7 +1538,9 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
SI.getOrdering(),
SI.getSynchScope());
InsertNewInstBefore(NewSI, *BBI);
- NewSI->setDebugLoc(OtherStore->getDebugLoc());
+ // The debug locations of the original instructions might differ; merge them.
+ NewSI->setDebugLoc(DILocation::getMergedLocation(SI.getDebugLoc(),
+ OtherStore->getDebugLoc()));
// If the two stores had AA tags, merge them.
AAMDNodes AATags;
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 45a19fb0f1f2..f1ac82057e6c 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -298,39 +298,33 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
// (X / Y) * Y = X - (X % Y)
// (X / Y) * -Y = (X % Y) - X
{
- Value *Op1C = Op1;
- BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
- if (!BO ||
- (BO->getOpcode() != Instruction::UDiv &&
- BO->getOpcode() != Instruction::SDiv)) {
- Op1C = Op0;
- BO = dyn_cast<BinaryOperator>(Op1);
+ Value *Y = Op1;
+ BinaryOperator *Div = dyn_cast<BinaryOperator>(Op0);
+ if (!Div || (Div->getOpcode() != Instruction::UDiv &&
+ Div->getOpcode() != Instruction::SDiv)) {
+ Y = Op0;
+ Div = dyn_cast<BinaryOperator>(Op1);
}
- Value *Neg = dyn_castNegVal(Op1C);
- if (BO && BO->hasOneUse() &&
- (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
- (BO->getOpcode() == Instruction::UDiv ||
- BO->getOpcode() == Instruction::SDiv)) {
- Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+ Value *Neg = dyn_castNegVal(Y);
+ if (Div && Div->hasOneUse() &&
+ (Div->getOperand(1) == Y || Div->getOperand(1) == Neg) &&
+ (Div->getOpcode() == Instruction::UDiv ||
+ Div->getOpcode() == Instruction::SDiv)) {
+ Value *X = Div->getOperand(0), *DivOp1 = Div->getOperand(1);
// If the division is exact, X % Y is zero, so we end up with X or -X.
- if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
- if (SDiv->isExact()) {
- if (Op1BO == Op1C)
- return replaceInstUsesWith(I, Op0BO);
- return BinaryOperator::CreateNeg(Op0BO);
- }
-
- Value *Rem;
- if (BO->getOpcode() == Instruction::UDiv)
- Rem = Builder->CreateURem(Op0BO, Op1BO);
- else
- Rem = Builder->CreateSRem(Op0BO, Op1BO);
- Rem->takeName(BO);
+ if (Div->isExact()) {
+ if (DivOp1 == Y)
+ return replaceInstUsesWith(I, X);
+ return BinaryOperator::CreateNeg(X);
+ }
- if (Op1BO == Op1C)
- return BinaryOperator::CreateSub(Op0BO, Rem);
- return BinaryOperator::CreateSub(Rem, Op0BO);
+ auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem
+ : Instruction::SRem;
+ Value *Rem = Builder->CreateBinOp(RemOpc, X, DivOp1);
+ if (DivOp1 == Y)
+ return BinaryOperator::CreateSub(X, Rem);
+ return BinaryOperator::CreateSub(Rem, X);
}
}
@@ -1461,16 +1455,16 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
- } else if (isa<PHINode>(Op0I)) {
+ } else if (auto *PN = dyn_cast<PHINode>(Op0I)) {
using namespace llvm::PatternMatch;
const APInt *Op1Int;
if (match(Op1, m_APInt(Op1Int)) && !Op1Int->isMinValue() &&
(I.getOpcode() == Instruction::URem ||
!Op1Int->isMinSignedValue())) {
- // FoldOpIntoPhi will speculate instructions to the end of the PHI's
+ // foldOpIntoPhi will speculate instructions to the end of the PHI's
// predecessor blocks, so do this only if we know the srem or urem
// will not fault.
- if (Instruction *NV = FoldOpIntoPhi(I))
+ if (Instruction *NV = foldOpIntoPhi(I, PN))
return NV;
}
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 4cbffe9533b7..85e5b6ba2dc2 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -457,8 +457,8 @@ Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
}
// The more common cases of a phi with no constant operands or just one
- // variable operand are handled by FoldPHIArgOpIntoPHI() and FoldOpIntoPhi()
- // respectively. FoldOpIntoPhi() wants to do the opposite transform that is
+ // variable operand are handled by FoldPHIArgOpIntoPHI() and foldOpIntoPhi()
+ // respectively. foldOpIntoPhi() wants to do the opposite transform that is
// performed here. It tries to replicate a cast in the phi operand's basic
// block to expose other folding opportunities. Thus, InstCombine will
// infinite loop without this check.
@@ -507,7 +507,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// Be careful about transforming integer PHIs. We don't want to pessimize
// the code by turning an i32 into an i1293.
if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
- if (!ShouldChangeType(PN.getType(), CastSrcTy))
+ if (!shouldChangeType(PN.getType(), CastSrcTy))
return nullptr;
}
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 36644845352e..693b6c95c169 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -120,6 +120,16 @@ static Constant *getSelectFoldableConstant(Instruction *I) {
/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
Instruction *FI) {
+ // Don't break up min/max patterns. The hasOneUse checks below prevent that
+ // for most cases, but vector min/max with bitcasts can be transformed. If the
+ // one-use restrictions are eased for other patterns, we still don't want to
+ // obfuscate min/max.
+ if ((match(&SI, m_SMin(m_Value(), m_Value())) ||
+ match(&SI, m_SMax(m_Value(), m_Value())) ||
+ match(&SI, m_UMin(m_Value(), m_Value())) ||
+ match(&SI, m_UMax(m_Value(), m_Value()))))
+ return nullptr;
+
// If this is a cast from the same type, merge.
if (TI->getNumOperands() == 1 && TI->isCast()) {
Type *FIOpndTy = FI->getOperand(0)->getType();
@@ -364,7 +374,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
/// into:
/// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false)
static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
- InstCombiner::BuilderTy *Builder) {
+ InstCombiner::BuilderTy *Builder) {
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *CmpLHS = ICI->getOperand(0);
Value *CmpRHS = ICI->getOperand(1);
@@ -395,13 +405,12 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
if (match(Count, m_Intrinsic<Intrinsic::cttz>(m_Specific(CmpLHS))) ||
match(Count, m_Intrinsic<Intrinsic::ctlz>(m_Specific(CmpLHS)))) {
IntrinsicInst *II = cast<IntrinsicInst>(Count);
- IRBuilder<> Builder(II);
// Explicitly clear the 'undef_on_zero' flag.
IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone());
Type *Ty = NewI->getArgOperand(1)->getType();
NewI->setArgOperand(1, Constant::getNullValue(Ty));
- Builder.Insert(NewI);
- return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType());
+ Builder->Insert(NewI);
+ return Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType());
}
return nullptr;
@@ -500,18 +509,16 @@ static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) {
return true;
}
-/// If this is an integer min/max where the select's 'true' operand is a
-/// constant, canonicalize that constant to the 'false' operand:
-/// select (icmp Pred X, C), C, X --> select (icmp Pred' X, C), X, C
+/// If this is an integer min/max (icmp + select) with a constant operand,
+/// create the canonical icmp for the min/max operation and canonicalize the
+/// constant to the 'false' operand of the select:
+/// select (icmp Pred X, C1), C2, X --> select (icmp Pred' X, C2), X, C2
+/// Note: if C1 != C2, this will change the icmp constant to the existing
+/// constant operand of the select.
static Instruction *
canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
InstCombiner::BuilderTy &Builder) {
- // TODO: We should also canonicalize min/max when the select has a different
- // constant value than the cmp constant, but we need to fix the backend first.
- if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)) ||
- !isa<Constant>(Sel.getTrueValue()) ||
- isa<Constant>(Sel.getFalseValue()) ||
- Cmp.getOperand(1) != Sel.getTrueValue())
+ if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)))
return nullptr;
// Canonicalize the compare predicate based on whether we have min or max.
@@ -526,16 +533,25 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
default: return nullptr;
}
- // Canonicalize the constant to the right side.
- if (isa<Constant>(LHS))
- std::swap(LHS, RHS);
+ // Is this already canonical?
+ if (Cmp.getOperand(0) == LHS && Cmp.getOperand(1) == RHS &&
+ Cmp.getPredicate() == NewPred)
+ return nullptr;
+
+ // Create the canonical compare and plug it into the select.
+ Sel.setCondition(Builder.CreateICmp(NewPred, LHS, RHS));
- Value *NewCmp = Builder.CreateICmp(NewPred, LHS, RHS);
- SelectInst *NewSel = SelectInst::Create(NewCmp, LHS, RHS, "", nullptr, &Sel);
+ // If the select operands did not change, we're done.
+ if (Sel.getTrueValue() == LHS && Sel.getFalseValue() == RHS)
+ return &Sel;
- // We swapped the select operands, so swap the metadata too.
- NewSel->swapProfMetadata();
- return NewSel;
+ // If we are swapping the select operands, swap the metadata too.
+ assert(Sel.getTrueValue() == RHS && Sel.getFalseValue() == LHS &&
+ "Unexpected results from matchSelectPattern");
+ Sel.setTrueValue(LHS);
+ Sel.setFalseValue(RHS);
+ Sel.swapProfMetadata();
+ return &Sel;
}
/// Visit a SelectInst that has an ICmpInst as its first operand.
@@ -786,7 +802,9 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
// This transform is performance neutral if we can elide at least one xor from
// the set of three operands, since we'll be tacking on an xor at the very
// end.
- if (IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
+ if (SelectPatternResult::isMinOrMax(SPF1) &&
+ SelectPatternResult::isMinOrMax(SPF2) &&
+ IsFreeOrProfitableToInvert(A, NotA, ElidesXor) &&
IsFreeOrProfitableToInvert(B, NotB, ElidesXor) &&
IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) {
if (!NotA)
@@ -1035,8 +1053,10 @@ static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {
// If the select condition element is false, choose from the 2nd vector.
Mask.push_back(ConstantInt::get(Int32Ty, i + NumElts));
} else if (isa<UndefValue>(Elt)) {
- // If the select condition element is undef, the shuffle mask is undef.
- Mask.push_back(UndefValue::get(Int32Ty));
+ // Undef in a select condition (choose one of the operands) does not mean
+ // the same thing as undef in a shuffle mask (any value is acceptable), so
+ // give up.
+ return nullptr;
} else {
// Bail out on a constant expression.
return nullptr;
@@ -1364,11 +1384,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
// See if we can fold the select into a phi node if the condition is a select.
- if (isa<PHINode>(SI.getCondition()))
+ if (auto *PN = dyn_cast<PHINode>(SI.getCondition()))
// The true/false values have to be live in the PHI predecessor's blocks.
if (canSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
canSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
- if (Instruction *NV = FoldOpIntoPhi(SI))
+ if (Instruction *NV = foldOpIntoPhi(SI, PN))
return NV;
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
@@ -1450,6 +1470,20 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
}
+ // If we can compute the condition, there's no need for a select.
+ // Like the above fold, we are attempting to reduce compile-time cost by
+ // putting this fold here with limitations rather than in InstSimplify.
+ // The motivation for this call into value tracking is to take advantage of
+ // the assumption cache, so make sure that is populated.
+ if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {
+ APInt KnownOne(1, 0), KnownZero(1, 0);
+ computeKnownBits(CondVal, KnownZero, KnownOne, 0, &SI);
+ if (KnownOne == 1)
+ return replaceInstUsesWith(SI, TrueVal);
+ if (KnownZero == 1)
+ return replaceInstUsesWith(SI, FalseVal);
+ }
+
if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, *Builder))
return BitCastSel;
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 4ff9b64ac57c..9aa679c60e47 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -22,8 +22,8 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
- assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ assert(Op0->getType() == Op1->getType());
// See if we can fold away this shift.
if (SimplifyDemandedInstructionBits(I))
@@ -65,63 +65,60 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
}
/// Return true if we can simplify two logical (either left or right) shifts
-/// that have constant shift amounts.
-static bool canEvaluateShiftedShift(unsigned FirstShiftAmt,
- bool IsFirstShiftLeft,
- Instruction *SecondShift, InstCombiner &IC,
+/// that have constant shift amounts: OuterShift (InnerShift X, C1), C2.
+static bool canEvaluateShiftedShift(unsigned OuterShAmt, bool IsOuterShl,
+ Instruction *InnerShift, InstCombiner &IC,
Instruction *CxtI) {
- assert(SecondShift->isLogicalShift() && "Unexpected instruction type");
+ assert(InnerShift->isLogicalShift() && "Unexpected instruction type");
- // We need constant shifts.
- auto *SecondShiftConst = dyn_cast<ConstantInt>(SecondShift->getOperand(1));
- if (!SecondShiftConst)
+ // We need constant scalar or constant splat shifts.
+ const APInt *InnerShiftConst;
+ if (!match(InnerShift->getOperand(1), m_APInt(InnerShiftConst)))
return false;
- unsigned SecondShiftAmt = SecondShiftConst->getZExtValue();
- bool IsSecondShiftLeft = SecondShift->getOpcode() == Instruction::Shl;
-
- // We can always fold shl(c1) + shl(c2) -> shl(c1+c2).
- // We can always fold lshr(c1) + lshr(c2) -> lshr(c1+c2).
- if (IsFirstShiftLeft == IsSecondShiftLeft)
+ // Two logical shifts in the same direction:
+ // shl (shl X, C1), C2 --> shl X, C1 + C2
+ // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
+ bool IsInnerShl = InnerShift->getOpcode() == Instruction::Shl;
+ if (IsInnerShl == IsOuterShl)
return true;
- // We can always fold lshr(c) + shl(c) -> and(c2).
- // We can always fold shl(c) + lshr(c) -> and(c2).
- if (FirstShiftAmt == SecondShiftAmt)
+ // Equal shift amounts in opposite directions become bitwise 'and':
+ // lshr (shl X, C), C --> and X, C'
+ // shl (lshr X, C), C --> and X, C'
+ unsigned InnerShAmt = InnerShiftConst->getZExtValue();
+ if (InnerShAmt == OuterShAmt)
return true;
- unsigned TypeWidth = SecondShift->getType()->getScalarSizeInBits();
-
// If the 2nd shift is bigger than the 1st, we can fold:
- // lshr(c1) + shl(c2) -> shl(c3) + and(c4) or
- // shl(c1) + lshr(c2) -> lshr(c3) + and(c4),
+ // lshr (shl X, C1), C2 --> and (shl X, C1 - C2), C3
+ // shl (lshr X, C1), C2 --> and (lshr X, C1 - C2), C3
// but it isn't profitable unless we know the and'd out bits are already zero.
- // Also check that the 2nd shift is valid (less than the type width) or we'll
- // crash trying to produce the bit mask for the 'and'.
- if (SecondShiftAmt > FirstShiftAmt && SecondShiftAmt < TypeWidth) {
- unsigned MaskShift = IsSecondShiftLeft ? TypeWidth - SecondShiftAmt
- : SecondShiftAmt - FirstShiftAmt;
- APInt Mask = APInt::getLowBitsSet(TypeWidth, FirstShiftAmt) << MaskShift;
- if (IC.MaskedValueIsZero(SecondShift->getOperand(0), Mask, 0, CxtI))
+ // Also, check that the inner shift is valid (less than the type width) or
+ // we'll crash trying to produce the bit mask for the 'and'.
+ unsigned TypeWidth = InnerShift->getType()->getScalarSizeInBits();
+ if (InnerShAmt > OuterShAmt && InnerShAmt < TypeWidth) {
+ unsigned MaskShift =
+ IsInnerShl ? TypeWidth - InnerShAmt : InnerShAmt - OuterShAmt;
+ APInt Mask = APInt::getLowBitsSet(TypeWidth, OuterShAmt) << MaskShift;
+ if (IC.MaskedValueIsZero(InnerShift->getOperand(0), Mask, 0, CxtI))
return true;
}
return false;
}
-/// See if we can compute the specified value, but shifted
-/// logically to the left or right by some number of bits. This should return
-/// true if the expression can be computed for the same cost as the current
-/// expression tree. This is used to eliminate extraneous shifting from things
-/// like:
+/// See if we can compute the specified value, but shifted logically to the left
+/// or right by some number of bits. This should return true if the expression
+/// can be computed for the same cost as the current expression tree. This is
+/// used to eliminate extraneous shifting from things like:
/// %C = shl i128 %A, 64
/// %D = shl i128 %B, 96
/// %E = or i128 %C, %D
/// %F = lshr i128 %E, 64
-/// where the client will ask if E can be computed shifted right by 64-bits. If
-/// this succeeds, the GetShiftedValue function will be called to produce the
-/// value.
-static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, getShiftedValue() will be called to produce the value.
+static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
InstCombiner &IC, Instruction *CxtI) {
// We can always evaluate constants shifted.
if (isa<Constant>(V))
@@ -165,8 +162,8 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
case Instruction::Or:
case Instruction::Xor:
// Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
- return CanEvaluateShifted(I->getOperand(0), NumBits, IsLeftShift, IC, I) &&
- CanEvaluateShifted(I->getOperand(1), NumBits, IsLeftShift, IC, I);
+ return canEvaluateShifted(I->getOperand(0), NumBits, IsLeftShift, IC, I) &&
+ canEvaluateShifted(I->getOperand(1), NumBits, IsLeftShift, IC, I);
case Instruction::Shl:
case Instruction::LShr:
@@ -176,8 +173,8 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
SelectInst *SI = cast<SelectInst>(I);
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
- return CanEvaluateShifted(TrueVal, NumBits, IsLeftShift, IC, SI) &&
- CanEvaluateShifted(FalseVal, NumBits, IsLeftShift, IC, SI);
+ return canEvaluateShifted(TrueVal, NumBits, IsLeftShift, IC, SI) &&
+ canEvaluateShifted(FalseVal, NumBits, IsLeftShift, IC, SI);
}
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -185,16 +182,79 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
- if (!CanEvaluateShifted(IncValue, NumBits, IsLeftShift, IC, PN))
+ if (!canEvaluateShifted(IncValue, NumBits, IsLeftShift, IC, PN))
return false;
return true;
}
}
}
-/// When CanEvaluateShifted returned true for an expression,
-/// this value inserts the new computation that produces the shifted value.
-static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+/// Fold OuterShift (InnerShift X, C1), C2.
+/// See canEvaluateShiftedShift() for the constraints on these instructions.
+static Value *foldShiftedShift(BinaryOperator *InnerShift, unsigned OuterShAmt,
+ bool IsOuterShl,
+ InstCombiner::BuilderTy &Builder) {
+ bool IsInnerShl = InnerShift->getOpcode() == Instruction::Shl;
+ Type *ShType = InnerShift->getType();
+ unsigned TypeWidth = ShType->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in canEvaluateShifted().
+ const APInt *C1;
+ match(InnerShift->getOperand(1), m_APInt(C1));
+ unsigned InnerShAmt = C1->getZExtValue();
+
+ // Change the shift amount and clear the appropriate IR flags.
+ auto NewInnerShift = [&](unsigned ShAmt) {
+ InnerShift->setOperand(1, ConstantInt::get(ShType, ShAmt));
+ if (IsInnerShl) {
+ InnerShift->setHasNoUnsignedWrap(false);
+ InnerShift->setHasNoSignedWrap(false);
+ } else {
+ InnerShift->setIsExact(false);
+ }
+ return InnerShift;
+ };
+
+ // Two logical shifts in the same direction:
+ // shl (shl X, C1), C2 --> shl X, C1 + C2
+ // lshr (lshr X, C1), C2 --> lshr X, C1 + C2
+ if (IsInnerShl == IsOuterShl) {
+ // If this is an oversized composite shift, then unsigned shifts get 0.
+ if (InnerShAmt + OuterShAmt >= TypeWidth)
+ return Constant::getNullValue(ShType);
+
+ return NewInnerShift(InnerShAmt + OuterShAmt);
+ }
+
+ // Equal shift amounts in opposite directions become bitwise 'and':
+ // lshr (shl X, C), C --> and X, C'
+ // shl (lshr X, C), C --> and X, C'
+ if (InnerShAmt == OuterShAmt) {
+ APInt Mask = IsInnerShl
+ ? APInt::getLowBitsSet(TypeWidth, TypeWidth - OuterShAmt)
+ : APInt::getHighBitsSet(TypeWidth, TypeWidth - OuterShAmt);
+ Value *And = Builder.CreateAnd(InnerShift->getOperand(0),
+ ConstantInt::get(ShType, Mask));
+ if (auto *AndI = dyn_cast<Instruction>(And)) {
+ AndI->moveBefore(InnerShift);
+ AndI->takeName(InnerShift);
+ }
+ return And;
+ }
+
+ assert(InnerShAmt > OuterShAmt &&
+ "Unexpected opposite direction logical shift pair");
+
+ // In general, we would need an 'and' for this transform, but
+ // canEvaluateShiftedShift() guarantees that the masked-off bits are not used.
+ // lshr (shl X, C1), C2 --> shl X, C1 - C2
+ // shl (lshr X, C1), C2 --> lshr X, C1 - C2
+ return NewInnerShift(InnerShAmt - OuterShAmt);
+}
+
+/// When canEvaluateShifted() returns true for an expression, this function
+/// inserts the new computation that produces the shifted value.
+static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
InstCombiner &IC, const DataLayout &DL) {
// We can always evaluate constants shifted.
if (Constant *C = dyn_cast<Constant>(V)) {
@@ -220,100 +280,21 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Xor:
// Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
I->setOperand(
- 0, GetShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
+ 0, getShiftedValue(I->getOperand(0), NumBits, isLeftShift, IC, DL));
I->setOperand(
- 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ 1, getShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
return I;
- case Instruction::Shl: {
- BinaryOperator *BO = cast<BinaryOperator>(I);
- unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
-
- // We only accept shifts-by-a-constant in CanEvaluateShifted.
- ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
-
- // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
- if (isLeftShift) {
- // If this is oversized composite shift, then unsigned shifts get 0.
- unsigned NewShAmt = NumBits+CI->getZExtValue();
- if (NewShAmt >= TypeWidth)
- return Constant::getNullValue(I->getType());
-
- BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
- BO->setHasNoUnsignedWrap(false);
- BO->setHasNoSignedWrap(false);
- return I;
- }
-
- // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
- // zeros.
- if (CI->getValue() == NumBits) {
- APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
- V = IC.Builder->CreateAnd(BO->getOperand(0),
- ConstantInt::get(BO->getContext(), Mask));
- if (Instruction *VI = dyn_cast<Instruction>(V)) {
- VI->moveBefore(BO);
- VI->takeName(BO);
- }
- return V;
- }
-
- // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
- // the and won't be needed.
- assert(CI->getZExtValue() > NumBits);
- BO->setOperand(1, ConstantInt::get(BO->getType(),
- CI->getZExtValue() - NumBits));
- BO->setHasNoUnsignedWrap(false);
- BO->setHasNoSignedWrap(false);
- return BO;
- }
- // FIXME: This is almost identical to the SHL case. Refactor both cases into
- // a helper function.
- case Instruction::LShr: {
- BinaryOperator *BO = cast<BinaryOperator>(I);
- unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
- // We only accept shifts-by-a-constant in CanEvaluateShifted.
- ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
-
- // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
- if (!isLeftShift) {
- // If this is oversized composite shift, then unsigned shifts get 0.
- unsigned NewShAmt = NumBits+CI->getZExtValue();
- if (NewShAmt >= TypeWidth)
- return Constant::getNullValue(BO->getType());
-
- BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
- BO->setIsExact(false);
- return I;
- }
-
- // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
- // zeros.
- if (CI->getValue() == NumBits) {
- APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
- V = IC.Builder->CreateAnd(I->getOperand(0),
- ConstantInt::get(BO->getContext(), Mask));
- if (Instruction *VI = dyn_cast<Instruction>(V)) {
- VI->moveBefore(I);
- VI->takeName(I);
- }
- return V;
- }
-
- // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
- // the and won't be needed.
- assert(CI->getZExtValue() > NumBits);
- BO->setOperand(1, ConstantInt::get(BO->getType(),
- CI->getZExtValue() - NumBits));
- BO->setIsExact(false);
- return BO;
- }
+ case Instruction::Shl:
+ case Instruction::LShr:
+ return foldShiftedShift(cast<BinaryOperator>(I), NumBits, isLeftShift,
+ *(IC.Builder));
case Instruction::Select:
I->setOperand(
- 1, GetShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
+ 1, getShiftedValue(I->getOperand(1), NumBits, isLeftShift, IC, DL));
I->setOperand(
- 2, GetShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
+ 2, getShiftedValue(I->getOperand(2), NumBits, isLeftShift, IC, DL));
return I;
case Instruction::PHI: {
// We can change a phi if we can change all operands. Note that we never
@@ -321,215 +302,39 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
// instructions with a single use.
PHINode *PN = cast<PHINode>(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i), NumBits,
+ PN->setIncomingValue(i, getShiftedValue(PN->getIncomingValue(i), NumBits,
isLeftShift, IC, DL));
return PN;
}
}
}
-/// Try to fold (X << C1) << C2, where the shifts are some combination of
-/// shl/ashr/lshr.
-static Instruction *
-foldShiftByConstOfShiftByConst(BinaryOperator &I, ConstantInt *COp1,
- InstCombiner::BuilderTy *Builder) {
- Value *Op0 = I.getOperand(0);
- uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
-
- // Find out if this is a shift of a shift by a constant.
- BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
- if (ShiftOp && !ShiftOp->isShift())
- ShiftOp = nullptr;
-
- if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
-
- // This is a constant shift of a constant shift. Be careful about hiding
- // shl instructions behind bit masks. They are used to represent multiplies
- // by a constant, and it is important that simple arithmetic expressions
- // are still recognizable by scalar evolution.
- //
- // The transforms applied to shl are very similar to the transforms applied
- // to mul by constant. We can be more aggressive about optimizing right
- // shifts.
- //
- // Combinations of right and left shifts will still be optimized in
- // DAGCombine where scalar evolution no longer applies.
-
- ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
- uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
- uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits);
- assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
- if (ShiftAmt1 == 0)
- return nullptr; // Will be simplified in the future.
- Value *X = ShiftOp->getOperand(0);
-
- IntegerType *Ty = cast<IntegerType>(I.getType());
-
- // Check for (X << c1) << c2 and (X >> c1) >> c2
- if (I.getOpcode() == ShiftOp->getOpcode()) {
- uint32_t AmtSum = ShiftAmt1 + ShiftAmt2; // Fold into one big shift.
- // If this is an oversized composite shift, then unsigned shifts become
- // zero (handled in InstSimplify) and ashr saturates.
- if (AmtSum >= TypeBits) {
- if (I.getOpcode() != Instruction::AShr)
- return nullptr;
- AmtSum = TypeBits - 1; // Saturate to 31 for i32 ashr.
- }
-
- return BinaryOperator::Create(I.getOpcode(), X,
- ConstantInt::get(Ty, AmtSum));
- }
-
- if (ShiftAmt1 == ShiftAmt2) {
- // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
- if (I.getOpcode() == Instruction::LShr &&
- ShiftOp->getOpcode() == Instruction::Shl) {
- APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
- return BinaryOperator::CreateAnd(
- X, ConstantInt::get(I.getContext(), Mask));
- }
- } else if (ShiftAmt1 < ShiftAmt2) {
- uint32_t ShiftDiff = ShiftAmt2 - ShiftAmt1;
-
- // (X >>?,exact C1) << C2 --> X << (C2-C1)
- // The inexact version is deferred to DAGCombine so we don't hide shl
- // behind a bit mask.
- if (I.getOpcode() == Instruction::Shl &&
- ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) {
- assert(ShiftOp->getOpcode() == Instruction::LShr ||
- ShiftOp->getOpcode() == Instruction::AShr);
- ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
- BinaryOperator *NewShl =
- BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst);
- NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
- NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
- return NewShl;
- }
-
- // (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr &&
- ShiftOp->getOpcode() == Instruction::Shl) {
- ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
- // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
- if (ShiftOp->hasNoUnsignedWrap()) {
- BinaryOperator *NewLShr =
- BinaryOperator::Create(Instruction::LShr, X, ShiftDiffCst);
- NewLShr->setIsExact(I.isExact());
- return NewLShr;
- }
- Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
-
- APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::CreateAnd(
- Shift, ConstantInt::get(I.getContext(), Mask));
- }
-
- // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
- // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
- if (I.getOpcode() == Instruction::AShr &&
- ShiftOp->getOpcode() == Instruction::Shl) {
- if (ShiftOp->hasNoSignedWrap()) {
- // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
- ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
- BinaryOperator *NewAShr =
- BinaryOperator::Create(Instruction::AShr, X, ShiftDiffCst);
- NewAShr->setIsExact(I.isExact());
- return NewAShr;
- }
- }
- } else {
- assert(ShiftAmt2 < ShiftAmt1);
- uint32_t ShiftDiff = ShiftAmt1 - ShiftAmt2;
-
- // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
- // The inexact version is deferred to DAGCombine so we don't hide shl
- // behind a bit mask.
- if (I.getOpcode() == Instruction::Shl &&
- ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) {
- ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
- BinaryOperator *NewShr =
- BinaryOperator::Create(ShiftOp->getOpcode(), X, ShiftDiffCst);
- NewShr->setIsExact(true);
- return NewShr;
- }
-
- // (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr &&
- ShiftOp->getOpcode() == Instruction::Shl) {
- ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
- if (ShiftOp->hasNoUnsignedWrap()) {
- // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
- BinaryOperator *NewShl =
- BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst);
- NewShl->setHasNoUnsignedWrap(true);
- return NewShl;
- }
- Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
-
- APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::CreateAnd(
- Shift, ConstantInt::get(I.getContext(), Mask));
- }
-
- // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
- // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
- if (I.getOpcode() == Instruction::AShr &&
- ShiftOp->getOpcode() == Instruction::Shl) {
- if (ShiftOp->hasNoSignedWrap()) {
- // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
- ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
- BinaryOperator *NewShl =
- BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst);
- NewShl->setHasNoSignedWrap(true);
- return NewShl;
- }
- }
- }
- }
-
- return nullptr;
-}
-
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
- ConstantInt *COp1 = nullptr;
- if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1))
- COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
- else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1))
- COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
- else
- COp1 = dyn_cast<ConstantInt>(Op1);
-
- if (!COp1)
+ const APInt *Op1C;
+ if (!match(Op1, m_APInt(Op1C)))
return nullptr;
// See if we can propagate this shift into the input, this covers the trivial
// cast of lshr(shl(x,c1),c2) as well as other more complex cases.
if (I.getOpcode() != Instruction::AShr &&
- CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this, &I)) {
+ canEvaluateShifted(Op0, Op1C->getZExtValue(), isLeftShift, *this, &I)) {
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
return replaceInstUsesWith(
- I, GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this, DL));
+ I, getShiftedValue(Op0, Op1C->getZExtValue(), isLeftShift, *this, DL));
}
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+ unsigned TypeBits = Op0->getType()->getScalarSizeInBits();
- assert(!COp1->uge(TypeBits) &&
+ assert(!Op1C->uge(TypeBits) &&
"Shift over the type width should have been removed already");
- // ((X*C1) << C2) == (X * (C1 << C2))
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
- if (BO->getOpcode() == Instruction::Mul && isLeftShift)
- if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
- return BinaryOperator::CreateMul(BO->getOperand(0),
- ConstantExpr::getShl(BOOp, Op1));
-
if (Instruction *FoldedShift = foldOpWithConstantIntoOperand(I))
return FoldedShift;
@@ -544,7 +349,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
isa<ConstantInt>(TrOp->getOperand(1))) {
// Okay, we'll do this xform. Make the shift of shift.
- Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType());
+ Constant *ShAmt =
+ ConstantExpr::getZExt(cast<Constant>(Op1), TrOp->getType());
// (shift2 (shift1 & 0x00FF), c2)
Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
@@ -561,10 +367,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// shift. We know that it is a logical shift by a constant, so adjust the
// mask as appropriate.
if (I.getOpcode() == Instruction::Shl)
- MaskV <<= COp1->getZExtValue();
+ MaskV <<= Op1C->getZExtValue();
else {
assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
- MaskV = MaskV.lshr(COp1->getZExtValue());
+ MaskV = MaskV.lshr(Op1C->getZExtValue());
}
// shift1 & 0x00FF
@@ -598,7 +404,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
Op0BO->getOperand(1)->getName());
- uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+ unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
Constant *Mask = ConstantInt::get(I.getContext(), Bits);
@@ -634,7 +440,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
Op0BO->getOperand(0)->getName());
- uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+ unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
Constant *Mask = ConstantInt::get(I.getContext(), Bits);
@@ -705,9 +511,6 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
}
}
- if (Instruction *Folded = foldShiftByConstOfShiftByConst(I, COp1, Builder))
- return Folded;
-
return nullptr;
}
@@ -715,59 +518,97 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V =
- SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
if (Instruction *V = commonShiftTransforms(I))
return V;
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
- unsigned ShAmt = Op1C->getZExtValue();
-
- // Turn:
- // %zext = zext i32 %V to i64
- // %res = shl i64 %V, 8
- //
- // Into:
- // %shl = shl i32 %V, 8
- // %res = zext i32 %shl to i64
- //
- // This is only valid if %V would have zeros shifted out.
- if (auto *ZI = dyn_cast<ZExtInst>(I.getOperand(0))) {
- unsigned SrcBitWidth = ZI->getSrcTy()->getScalarSizeInBits();
- if (ShAmt < SrcBitWidth &&
- MaskedValueIsZero(ZI->getOperand(0),
- APInt::getHighBitsSet(SrcBitWidth, ShAmt), 0, &I)) {
- auto *Shl = Builder->CreateShl(ZI->getOperand(0), ShAmt);
- return new ZExtInst(Shl, I.getType());
+ const APInt *ShAmtAPInt;
+ if (match(Op1, m_APInt(ShAmtAPInt))) {
+ unsigned ShAmt = ShAmtAPInt->getZExtValue();
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+ Type *Ty = I.getType();
+
+ // shl (zext X), ShAmt --> zext (shl X, ShAmt)
+ // This is only valid if X would have zeros shifted out.
+ Value *X;
+ if (match(Op0, m_ZExt(m_Value(X)))) {
+ unsigned SrcWidth = X->getType()->getScalarSizeInBits();
+ if (ShAmt < SrcWidth &&
+ MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I))
+ return new ZExtInst(Builder->CreateShl(X, ShAmt), Ty);
+ }
+
+ // (X >>u C) << C --> X & (-1 << C)
+ if (match(Op0, m_LShr(m_Value(X), m_Specific(Op1)))) {
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
+ }
+
+ // Be careful about hiding shl instructions behind bit masks. They are used
+ // to represent multiplies by a constant, and it is important that simple
+ // arithmetic expressions are still recognizable by scalar evolution.
+ // The inexact versions are deferred to DAGCombine, so we don't hide shl
+ // behind a bit mask.
+ const APInt *ShOp1;
+ if (match(Op0, m_CombineOr(m_Exact(m_LShr(m_Value(X), m_APInt(ShOp1))),
+ m_Exact(m_AShr(m_Value(X), m_APInt(ShOp1)))))) {
+ unsigned ShrAmt = ShOp1->getZExtValue();
+ if (ShrAmt < ShAmt) {
+ // If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt);
+ auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
+ NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+ NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+ return NewShl;
}
+ if (ShrAmt > ShAmt) {
+ // If C1 > C2: (X >>?exact C1) << C2 --> X >>?exact (C1 - C2)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt);
+ auto *NewShr = BinaryOperator::Create(
+ cast<BinaryOperator>(Op0)->getOpcode(), X, ShiftDiff);
+ NewShr->setIsExact(true);
+ return NewShr;
+ }
+ }
+
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1)))) {
+ unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
+ // Oversized shifts are simplified to zero in InstSimplify.
+ if (AmtSum < BitWidth)
+ // (X << C1) << C2 --> X << (C1 + C2)
+ return BinaryOperator::CreateShl(X, ConstantInt::get(Ty, AmtSum));
}
// If the shifted-out value is known-zero, then this is a NUW shift.
if (!I.hasNoUnsignedWrap() &&
- MaskedValueIsZero(I.getOperand(0),
- APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt), 0,
- &I)) {
+ MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmt), 0, &I)) {
I.setHasNoUnsignedWrap();
return &I;
}
- // If the shifted out value is all signbits, this is a NSW shift.
- if (!I.hasNoSignedWrap() &&
- ComputeNumSignBits(I.getOperand(0), 0, &I) > ShAmt) {
+ // If the shifted-out value is all signbits, then this is a NSW shift.
+ if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmt) {
I.setHasNoSignedWrap();
return &I;
}
}
- // (C1 << A) << C2 -> (C1 << C2) << A
- Constant *C1, *C2;
- Value *A;
- if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) &&
- match(I.getOperand(1), m_Constant(C2)))
- return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
+ Constant *C1;
+ if (match(Op1, m_Constant(C1))) {
+ Constant *C2;
+ Value *X;
+ // (C2 << X) << C1 --> (C2 << C1) << X
+ if (match(Op0, m_OneUse(m_Shl(m_Constant(C2), m_Value(X)))))
+ return BinaryOperator::CreateShl(ConstantExpr::getShl(C2, C1), X);
+
+ // (X * C2) << C1 --> X * (C2 << C1)
+ if (match(Op0, m_Mul(m_Value(X), m_Constant(C2))))
+ return BinaryOperator::CreateMul(X, ConstantExpr::getShl(C2, C1));
+ }
return nullptr;
}
@@ -776,43 +617,83 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
- DL, &TLI, &DT, &AC))
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
return R;
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
- unsigned ShAmt = Op1C->getZExtValue();
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
- unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ Type *Ty = I.getType();
+ const APInt *ShAmtAPInt;
+ if (match(Op1, m_APInt(ShAmtAPInt))) {
+ unsigned ShAmt = ShAmtAPInt->getZExtValue();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ auto *II = dyn_cast<IntrinsicInst>(Op0);
+ if (II && isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt &&
+ (II->getIntrinsicID() == Intrinsic::ctlz ||
+ II->getIntrinsicID() == Intrinsic::cttz ||
+ II->getIntrinsicID() == Intrinsic::ctpop)) {
// ctlz.i32(x)>>5 --> zext(x == 0)
// cttz.i32(x)>>5 --> zext(x == 0)
// ctpop.i32(x)>>5 --> zext(x == -1)
- if ((II->getIntrinsicID() == Intrinsic::ctlz ||
- II->getIntrinsicID() == Intrinsic::cttz ||
- II->getIntrinsicID() == Intrinsic::ctpop) &&
- isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
- bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
- Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
- Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
- return new ZExtInst(Cmp, II->getType());
+ bool IsPop = II->getIntrinsicID() == Intrinsic::ctpop;
+ Constant *RHS = ConstantInt::getSigned(Ty, IsPop ? -1 : 0);
+ Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+ return new ZExtInst(Cmp, Ty);
+ }
+
+ Value *X;
+ const APInt *ShOp1;
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1)))) {
+ unsigned ShlAmt = ShOp1->getZExtValue();
+ if (ShlAmt < ShAmt) {
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt);
+ if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
+ // (X <<nuw C1) >>u C2 --> X >>u (C2 - C1)
+ auto *NewLShr = BinaryOperator::CreateLShr(X, ShiftDiff);
+ NewLShr->setIsExact(I.isExact());
+ return NewLShr;
+ }
+ // (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2)
+ Value *NewLShr = Builder->CreateLShr(X, ShiftDiff, "", I.isExact());
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
}
+ if (ShlAmt > ShAmt) {
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt);
+ if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
+ // (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
+ auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
+ NewShl->setHasNoUnsignedWrap(true);
+ return NewShl;
+ }
+ // (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2)
+ Value *NewShl = Builder->CreateShl(X, ShiftDiff);
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
+ }
+ assert(ShlAmt == ShAmt);
+ // (X << C) >>u C --> X & (-1 >>u C)
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
+ }
+
+ if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) {
+ unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
+ // Oversized shifts are simplified to zero in InstSimplify.
+ if (AmtSum < BitWidth)
+ // (X >>u C1) >>u C2 --> X >>u (C1 + C2)
+ return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
}
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
- MaskedValueIsZero(Op0, APInt::getLowBitsSet(Op1C->getBitWidth(), ShAmt),
- 0, &I)){
+ MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
I.setIsExact();
return &I;
}
}
-
return nullptr;
}
@@ -820,48 +701,66 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (Value *V = SimplifyVectorOp(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
- DL, &TLI, &DT, &AC))
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC))
return replaceInstUsesWith(I, V);
if (Instruction *R = commonShiftTransforms(I))
return R;
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-
- if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
- unsigned ShAmt = Op1C->getZExtValue();
+ Type *Ty = I.getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ const APInt *ShAmtAPInt;
+ if (match(Op1, m_APInt(ShAmtAPInt))) {
+ unsigned ShAmt = ShAmtAPInt->getZExtValue();
- // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
- // have a sign-extend idiom.
+ // If the shift amount equals the difference in width of the destination
+ // and source scalar types:
+ // ashr (shl (zext X), C), C --> sext X
Value *X;
- if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
- // If the input is an extension from the shifted amount value, e.g.
- // %x = zext i8 %A to i32
- // %y = shl i32 %x, 24
- // %z = ashr %y, 24
- // then turn this into "z = sext i8 A to i32".
- if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) {
- uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits();
- uint32_t DestBits = ZI->getType()->getScalarSizeInBits();
- if (Op1C->getZExtValue() == DestBits-SrcBits)
- return new SExtInst(ZI->getOperand(0), ZI->getType());
+ if (match(Op0, m_Shl(m_ZExt(m_Value(X)), m_Specific(Op1))) &&
+ ShAmt == BitWidth - X->getType()->getScalarSizeInBits())
+ return new SExtInst(X, Ty);
+
+ // We can't handle (X << C1) >>s C2. It shifts arbitrary bits in. However,
+ // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+ const APInt *ShOp1;
+ if (match(Op0, m_NSWShl(m_Value(X), m_APInt(ShOp1)))) {
+ unsigned ShlAmt = ShOp1->getZExtValue();
+ if (ShlAmt < ShAmt) {
+ // (X <<nsw C1) >>s C2 --> X >>s (C2 - C1)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt);
+ auto *NewAShr = BinaryOperator::CreateAShr(X, ShiftDiff);
+ NewAShr->setIsExact(I.isExact());
+ return NewAShr;
}
+ if (ShlAmt > ShAmt) {
+ // (X <<nsw C1) >>s C2 --> X <<nsw (C1 - C2)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt);
+ auto *NewShl = BinaryOperator::Create(Instruction::Shl, X, ShiftDiff);
+ NewShl->setHasNoSignedWrap(true);
+ return NewShl;
+ }
+ }
+
+ if (match(Op0, m_AShr(m_Value(X), m_APInt(ShOp1)))) {
+ unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
+ // Oversized arithmetic shifts replicate the sign bit.
+ AmtSum = std::min(AmtSum, BitWidth - 1);
+ // (X >>s C1) >>s C2 --> X >>s (C1 + C2)
+ return BinaryOperator::CreateAShr(X, ConstantInt::get(Ty, AmtSum));
}
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
- MaskedValueIsZero(Op0, APInt::getLowBitsSet(Op1C->getBitWidth(), ShAmt),
- 0, &I)) {
+ MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
I.setIsExact();
return &I;
}
}
// See if we can turn a signed shr into an unsigned shr.
- if (MaskedValueIsZero(Op0,
- APInt::getSignBit(I.getType()->getScalarSizeInBits()),
- 0, &I))
+ if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I))
return BinaryOperator::CreateLShr(Op0, Op1);
return nullptr;
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8b930bd95dfe..4e6f02058d83 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -30,18 +30,20 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
assert(I && "No instruction?");
assert(OpNo < I->getNumOperands() && "Operand index too large");
- // If the operand is not a constant integer, nothing to do.
- ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
- if (!OpC) return false;
+ // The operand must be a constant integer or splat integer.
+ Value *Op = I->getOperand(OpNo);
+ const APInt *C;
+ if (!match(Op, m_APInt(C)))
+ return false;
// If there are no bits set that aren't demanded, nothing to do.
- Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
- if ((~Demanded & OpC->getValue()) == 0)
+ Demanded = Demanded.zextOrTrunc(C->getBitWidth());
+ if ((~Demanded & *C) == 0)
return false;
// This instruction is producing bits that are not demanded. Shrink the RHS.
- Demanded &= OpC->getValue();
- I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+ Demanded &= *C;
+ I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded));
return true;
}
@@ -66,12 +68,13 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
/// This form of SimplifyDemandedBits simplifies the specified instruction
/// operand if possible, updating it in place. It returns true if it made any
/// change and false otherwise.
-bool InstCombiner::SimplifyDemandedBits(Use &U, const APInt &DemandedMask,
+bool InstCombiner::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
+ const APInt &DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
- auto *UserI = dyn_cast<Instruction>(U.getUser());
+ Use &U = I->getOperandUse(OpNo);
Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero,
- KnownOne, Depth, UserI);
+ KnownOne, Depth, I);
if (!NewVal) return false;
U = NewVal;
return true;
@@ -114,9 +117,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownOne.getBitWidth() == BitWidth &&
"Value *V, DemandedMask, KnownZero and KnownOne "
"must have same BitWidth");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- // We know all of the bits for a constant!
- KnownOne = CI->getValue() & DemandedMask;
+ const APInt *C;
+ if (match(V, m_APInt(C))) {
+ // We know all of the bits for a scalar constant or a splat vector constant!
+ KnownOne = *C & DemandedMask;
KnownZero = ~KnownOne & DemandedMask;
return nullptr;
}
@@ -138,9 +142,6 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (Depth == 6) // Limit search depth.
return nullptr;
- APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
-
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI);
@@ -151,107 +152,43 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// we can't do any simplifications of the operands, because DemandedMask
// only reflects the bits demanded by *one* of the users.
if (Depth != 0 && !I->hasOneUse()) {
- // Despite the fact that we can't simplify this instruction in all User's
- // context, we can at least compute the knownzero/knownone bits, and we can
- // do simplifications that apply to *just* the one user if we know that
- // this instruction has a simpler value in that context.
- if (I->getOpcode() == Instruction::And) {
- // If either the LHS or the RHS are Zero, the result is zero.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
- CxtI);
-
- // If all of the demanded bits are known 1 on one side, return the other.
- // These bits cannot contribute to the result of the 'and' in this
- // context.
- if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
- (DemandedMask & ~LHSKnownZero))
- return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
- (DemandedMask & ~RHSKnownZero))
- return I->getOperand(1);
-
- // If all of the demanded bits in the inputs are known zeros, return zero.
- if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
- return Constant::getNullValue(VTy);
-
- } else if (I->getOpcode() == Instruction::Or) {
- // We can simplify (X|Y) -> X or Y in the user's context if we know that
- // only bits from X or Y are demanded.
-
- // If either the LHS or the RHS are One, the result is One.
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
- CxtI);
-
- // If all of the demanded bits are known zero on one side, return the
- // other. These bits cannot contribute to the result of the 'or' in this
- // context.
- if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
- (DemandedMask & ~LHSKnownOne))
- return I->getOperand(0);
- if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
- (DemandedMask & ~RHSKnownOne))
- return I->getOperand(1);
-
- // If all of the potentially set bits on one side are known to be set on
- // the other side, just use the 'other' side.
- if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
- (DemandedMask & (~RHSKnownZero)))
- return I->getOperand(0);
- if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
- (DemandedMask & (~LHSKnownZero)))
- return I->getOperand(1);
- } else if (I->getOpcode() == Instruction::Xor) {
- // We can simplify (X^Y) -> X or Y in the user's context if we know that
- // only bits from X or Y are demanded.
-
- computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
- CxtI);
- computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
- CxtI);
-
- // If all of the demanded bits are known zero on one side, return the
- // other.
- if ((DemandedMask & RHSKnownZero) == DemandedMask)
- return I->getOperand(0);
- if ((DemandedMask & LHSKnownZero) == DemandedMask)
- return I->getOperand(1);
- }
-
- // Compute the KnownZero/KnownOne bits to simplify things downstream.
- computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
- return nullptr;
+ return SimplifyMultipleUseDemandedBits(I, DemandedMask, KnownZero, KnownOne,
+ Depth, CxtI);
}
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedMask to all bits so that we can try to simplify the
// operands. This allows visitTruncInst (for example) to simplify the
// operand of a trunc without duplicating all the logic below.
if (Depth == 0 && !V->hasOneUse())
- DemandedMask = APInt::getAllOnesValue(BitWidth);
+ DemandedMask.setAllBits();
switch (I->getOpcode()) {
default:
computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
break;
- case Instruction::And:
+ case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
- RHSKnownOne, Depth + 1) ||
- SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
- LHSKnownZero, LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
+ Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownZero, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ APInt IKnownZero = RHSKnownZero | LHSKnownZero;
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ APInt IKnownOne = RHSKnownOne & LHSKnownOne;
+
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & ((RHSKnownZero | LHSKnownZero)|
- (RHSKnownOne & LHSKnownOne))) == DemandedMask)
- return Constant::getIntegerValue(VTy, RHSKnownOne & LHSKnownOne);
+ if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
@@ -262,34 +199,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
(DemandedMask & ~RHSKnownZero))
return I->getOperand(1);
- // If all of the demanded bits in the inputs are known zeros, return zero.
- if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
- return Constant::getNullValue(VTy);
-
// If the RHS is a constant, see if we can simplify it.
if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
return I;
- // Output known-1 bits are only known if set in both the LHS & RHS.
- KnownOne = RHSKnownOne & LHSKnownOne;
- // Output known-0 are known to be clear if zero in either the LHS | RHS.
- KnownZero = RHSKnownZero | LHSKnownZero;
+ KnownZero = std::move(IKnownZero);
+ KnownOne = std::move(IKnownOne);
break;
- case Instruction::Or:
+ }
+ case Instruction::Or: {
// If either the LHS or the RHS are One, the result is One.
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
- RHSKnownOne, Depth + 1) ||
- SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne,
- LHSKnownZero, LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
+ Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnownOne, LHSKnownZero,
+ LHSKnownOne, Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ APInt IKnownZero = RHSKnownZero & LHSKnownZero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ APInt IKnownOne = RHSKnownOne | LHSKnownOne;
+
// If the client is only demanding bits that we know, return the known
// constant.
- if ((DemandedMask & ((RHSKnownZero & LHSKnownZero)|
- (RHSKnownOne | LHSKnownOne))) == DemandedMask)
- return Constant::getIntegerValue(VTy, RHSKnownOne | LHSKnownOne);
+ if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(VTy, IKnownOne);
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
@@ -313,16 +249,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (ShrinkDemandedConstant(I, 1, DemandedMask))
return I;
- // Output known-0 bits are only known if clear in both the LHS & RHS.
- KnownZero = RHSKnownZero & LHSKnownZero;
- // Output known-1 are known to be set if set in either the LHS | RHS.
- KnownOne = RHSKnownOne | LHSKnownOne;
+ KnownZero = std::move(IKnownZero);
+ KnownOne = std::move(IKnownOne);
break;
+ }
case Instruction::Xor: {
- if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, RHSKnownZero,
- RHSKnownOne, Depth + 1) ||
- SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, LHSKnownZero,
- LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnownZero, RHSKnownOne,
+ Depth + 1) ||
+ SimplifyDemandedBits(I, 0, DemandedMask, LHSKnownZero, LHSKnownOne,
+ Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -400,9 +335,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
// Output known-0 bits are known if clear or set in both the LHS & RHS.
- KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+ KnownZero = std::move(IKnownZero);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
- KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
+ KnownOne = std::move(IKnownOne);
break;
}
case Instruction::Select:
@@ -412,10 +347,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
return nullptr;
- if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
- RHSKnownOne, Depth + 1) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
- LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnownZero, RHSKnownOne,
+ Depth + 1) ||
+ SimplifyDemandedBits(I, 1, DemandedMask, LHSKnownZero, LHSKnownOne,
+ Depth + 1))
return I;
assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?");
assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");
@@ -434,8 +369,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.zext(truncBf);
KnownZero = KnownZero.zext(truncBf);
KnownOne = KnownOne.zext(truncBf);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
- KnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
+ Depth + 1))
return I;
DemandedMask = DemandedMask.trunc(BitWidth);
KnownZero = KnownZero.trunc(BitWidth);
@@ -460,8 +395,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Don't touch a vector-to-scalar bitcast.
return nullptr;
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
- KnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
+ Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
break;
@@ -472,15 +407,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
DemandedMask = DemandedMask.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero,
- KnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMask, KnownZero, KnownOne,
+ Depth + 1))
return I;
DemandedMask = DemandedMask.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// The top bits are known to be zero.
- KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+ KnownZero.setBitsFrom(SrcBitWidth);
break;
}
case Instruction::SExt: {
@@ -490,7 +425,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt InputDemandedBits = DemandedMask &
APInt::getLowBitsSet(BitWidth, SrcBitWidth);
- APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+ APInt NewBits(APInt::getBitsSetFrom(BitWidth, SrcBitWidth));
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded.
if ((NewBits & DemandedMask) != 0)
@@ -499,8 +434,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits, KnownZero,
- KnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, InputDemandedBits, KnownZero, KnownOne,
+ Depth + 1))
return I;
InputDemandedBits = InputDemandedBits.zext(BitWidth);
KnownZero = KnownZero.zext(BitWidth);
@@ -530,11 +465,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Right fill the mask of bits for this ADD/SUB to demand the most
// significant bit and all those below it.
APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth + 1) ||
+ if (ShrinkDemandedConstant(I, 0, DemandedFromOps) ||
+ SimplifyDemandedBits(I, 0, DemandedFromOps, LHSKnownZero, LHSKnownOne,
+ Depth + 1) ||
ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
- SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
- LHSKnownZero, LHSKnownOne, Depth + 1)) {
+ SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnownZero, RHSKnownOne,
+ Depth + 1)) {
// Disable the nsw and nuw flags here: We can no longer guarantee that
// we won't wrap after simplification. Removing the nsw/nuw flags is
// legal here because the top bit is not demanded.
@@ -543,6 +479,15 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
BinOP.setHasNoUnsignedWrap(false);
return I;
}
+
+ // If we are known to be adding/subtracting zeros to every bit below
+ // the highest demanded bit, we just return the other side.
+ if ((DemandedFromOps & RHSKnownZero) == DemandedFromOps)
+ return I->getOperand(0);
+ // We can't do this with the LHS for subtraction.
+ if (I->getOpcode() == Instruction::Add &&
+ (DemandedFromOps & LHSKnownZero) == DemandedFromOps)
+ return I->getOperand(1);
}
// Otherwise just hand the add/sub off to computeKnownBits to fill in
@@ -569,19 +514,19 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the shift is NUW/NSW, then it does demand the high bits.
ShlOperator *IOp = cast<ShlOperator>(I);
if (IOp->hasNoSignedWrap())
- DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+ DemandedMaskIn.setHighBits(ShiftAmt+1);
else if (IOp->hasNoUnsignedWrap())
- DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+ DemandedMaskIn.setHighBits(ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
- KnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
+ Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
KnownZero <<= ShiftAmt;
KnownOne <<= ShiftAmt;
// low bits known zero.
if (ShiftAmt)
- KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ KnownZero.setLowBits(ShiftAmt);
}
break;
case Instruction::LShr:
@@ -595,19 +540,16 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (cast<LShrOperator>(I)->isExact())
- DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ DemandedMaskIn.setLowBits(ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
- KnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
+ Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
- if (ShiftAmt) {
- // Compute the new bits that are at the top now.
- APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- KnownZero |= HighBits; // high bits known zero.
- }
+ KnownZero = KnownZero.lshr(ShiftAmt);
+ KnownOne = KnownOne.lshr(ShiftAmt);
+ if (ShiftAmt)
+ KnownZero.setHighBits(ShiftAmt); // high bits known zero.
}
break;
case Instruction::AShr:
@@ -635,26 +577,26 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If any of the "high bits" are demanded, we should set the sign bit as
// demanded.
if (DemandedMask.countLeadingZeros() <= ShiftAmt)
- DemandedMaskIn.setBit(BitWidth-1);
+ DemandedMaskIn.setSignBit();
// If the shift is exact, then it does demand the low bits (and knows that
// they are zero).
if (cast<AShrOperator>(I)->isExact())
- DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+ DemandedMaskIn.setLowBits(ShiftAmt);
- if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, KnownZero,
- KnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne,
+ Depth + 1))
return I;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
// Compute the new bits that are at the top now.
APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
- KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
- KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
+ KnownZero = KnownZero.lshr(ShiftAmt);
+ KnownOne = KnownOne.lshr(ShiftAmt);
// Handle the sign bits.
APInt SignBit(APInt::getSignBit(BitWidth));
// Adjust to where it is now in the mask.
- SignBit = APIntOps::lshr(SignBit, ShiftAmt);
+ SignBit = SignBit.lshr(ShiftAmt);
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
@@ -683,8 +625,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt LowBits = RA - 1;
APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), Mask2, LHSKnownZero,
- LHSKnownOne, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne,
+ Depth + 1))
return I;
// The low bits of LHS are unchanged by the srem.
@@ -693,12 +635,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If LHS is non-negative or has all low bits zero, then the upper bits
// are all zero.
- if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+ if (LHSKnownZero.isNegative() || ((LHSKnownZero & LowBits) == LowBits))
KnownZero |= ~LowBits;
// If LHS is negative and not all low bits are zero, then the upper bits
// are all one.
- if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+ if (LHSKnownOne.isNegative() && ((LHSKnownOne & LowBits) != 0))
KnownOne |= ~LowBits;
assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
@@ -713,21 +655,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
CxtI);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
- KnownZero.setBit(KnownZero.getBitWidth() - 1);
+ KnownZero.setSignBit();
}
break;
case Instruction::URem: {
APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
APInt AllOnes = APInt::getAllOnesValue(BitWidth);
- if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes, KnownZero2,
- KnownOne2, Depth + 1) ||
- SimplifyDemandedBits(I->getOperandUse(1), AllOnes, KnownZero2,
- KnownOne2, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, AllOnes, KnownZero2, KnownOne2, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, AllOnes, KnownZero2, KnownOne2, Depth + 1))
return I;
unsigned Leaders = KnownZero2.countLeadingOnes();
- Leaders = std::max(Leaders,
- KnownZero2.countLeadingOnes());
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
break;
}
@@ -792,11 +730,11 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return ConstantInt::getNullValue(VTy);
// We know that the upper bits are set to zero.
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - ArgWidth);
+ KnownZero.setBitsFrom(ArgWidth);
return nullptr;
}
case Intrinsic::x86_sse42_crc32_64_64:
- KnownZero = APInt::getHighBitsSet(64, 32);
+ KnownZero.setBitsFrom(32);
return nullptr;
}
}
@@ -811,6 +749,150 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return nullptr;
}
+/// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne
+/// bits. It also tries to handle simplifications that can be done based on
+/// DemandedMask, but without modifying the Instruction.
+Value *InstCombiner::SimplifyMultipleUseDemandedBits(Instruction *I,
+ const APInt &DemandedMask,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ unsigned Depth,
+ Instruction *CxtI) {
+ unsigned BitWidth = DemandedMask.getBitWidth();
+ Type *ITy = I->getType();
+
+ APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+ APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
+ // Despite the fact that we can't simplify this instruction in all User's
+ // context, we can at least compute the knownzero/knownone bits, and we can
+ // do simplifications that apply to *just* the one user if we know that
+ // this instruction has a simpler value in that context.
+ switch (I->getOpcode()) {
+ case Instruction::And: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ CxtI);
+
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ APInt IKnownZero = RHSKnownZero | LHSKnownZero;
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ APInt IKnownOne = RHSKnownOne & LHSKnownOne;
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(ITy, IKnownOne);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) ==
+ (DemandedMask & ~LHSKnownZero))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) ==
+ (DemandedMask & ~RHSKnownZero))
+ return I->getOperand(1);
+
+ KnownZero = std::move(IKnownZero);
+ KnownOne = std::move(IKnownOne);
+ break;
+ }
+ case Instruction::Or: {
+ // We can simplify (X|Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ // If either the LHS or the RHS are One, the result is One.
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ CxtI);
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ APInt IKnownZero = RHSKnownZero & LHSKnownZero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ APInt IKnownOne = RHSKnownOne | LHSKnownOne;
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(ITy, IKnownOne);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) ==
+ (DemandedMask & ~LHSKnownOne))
+ return I->getOperand(0);
+ if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) ==
+ (DemandedMask & ~RHSKnownOne))
+ return I->getOperand(1);
+
+ // If all of the potentially set bits on one side are known to be set on
+ // the other side, just use the 'other' side.
+ if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) ==
+ (DemandedMask & (~RHSKnownZero)))
+ return I->getOperand(0);
+ if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) ==
+ (DemandedMask & (~LHSKnownZero)))
+ return I->getOperand(1);
+
+ KnownZero = std::move(IKnownZero);
+ KnownOne = std::move(IKnownOne);
+ break;
+ }
+ case Instruction::Xor: {
+ // We can simplify (X^Y) -> X or Y in the user's context if we know that
+ // only bits from X or Y are demanded.
+
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth + 1,
+ CxtI);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth + 1,
+ CxtI);
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt IKnownZero = (RHSKnownZero & LHSKnownZero) |
+ (RHSKnownOne & LHSKnownOne);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ APInt IKnownOne = (RHSKnownZero & LHSKnownOne) |
+ (RHSKnownOne & LHSKnownZero);
+
+ // If the client is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(ITy, IKnownOne);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if ((DemandedMask & RHSKnownZero) == DemandedMask)
+ return I->getOperand(0);
+ if ((DemandedMask & LHSKnownZero) == DemandedMask)
+ return I->getOperand(1);
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ KnownZero = std::move(IKnownZero);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ KnownOne = std::move(IKnownOne);
+ break;
+ }
+ default:
+ // Compute the KnownZero/KnownOne bits to simplify things downstream.
+ computeKnownBits(I, KnownZero, KnownOne, Depth, CxtI);
+
+ // If this user is only demanding bits that we know, return the known
+ // constant.
+ if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+ return Constant::getIntegerValue(ITy, KnownOne);
+
+ break;
+ }
+
+ return nullptr;
+}
+
+
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
@@ -849,7 +931,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
unsigned ShrAmt = ShrOp1.getZExtValue();
KnownOne.clearAllBits();
- KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
+ KnownZero.setLowBits(ShlAmt - 1);
KnownZero &= DemandedMask;
APInt BitMask1(APInt::getAllOnesValue(BitWidth));
@@ -1472,14 +1554,136 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_sse2_packuswb_128:
+ case Intrinsic::x86_sse41_packusdw:
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packsswb:
+ case Intrinsic::x86_avx2_packusdw:
+ case Intrinsic::x86_avx2_packuswb:
+ case Intrinsic::x86_avx512_packssdw_512:
+ case Intrinsic::x86_avx512_packsswb_512:
+ case Intrinsic::x86_avx512_packusdw_512:
+ case Intrinsic::x86_avx512_packuswb_512: {
+ auto *Ty0 = II->getArgOperand(0)->getType();
+ unsigned InnerVWidth = Ty0->getVectorNumElements();
+ assert(VWidth == (InnerVWidth * 2) && "Unexpected input size");
+
+ unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
+ unsigned VWidthPerLane = VWidth / NumLanes;
+ unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
+
+ // Per lane, pack the elements of the first input and then the second.
+ // e.g.
+ // v8i16 PACK(v4i32 X, v4i32 Y) - (X[0..3],Y[0..3])
+ // v32i8 PACK(v16i16 X, v16i16 Y) - (X[0..7],Y[0..7]),(X[8..15],Y[8..15])
+ for (int OpNum = 0; OpNum != 2; ++OpNum) {
+ APInt OpDemandedElts(InnerVWidth, 0);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ unsigned LaneIdx = Lane * VWidthPerLane;
+ for (unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
+ unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
+ if (DemandedElts[Idx])
+ OpDemandedElts.setBit((Lane * InnerVWidthPerLane) + Elt);
+ }
+ }
+
+ // Demand elements from the operand.
+ auto *Op = II->getArgOperand(OpNum);
+ APInt OpUndefElts(InnerVWidth, 0);
+ TmpV = SimplifyDemandedVectorElts(Op, OpDemandedElts, OpUndefElts,
+ Depth + 1);
+ if (TmpV) {
+ II->setArgOperand(OpNum, TmpV);
+ MadeChange = true;
+ }
+
+ // Pack the operand's UNDEF elements, one lane at a time.
+ OpUndefElts = OpUndefElts.zext(VWidth);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);
+ LaneElts = LaneElts.getLoBits(InnerVWidthPerLane);
+ LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum));
+ UndefElts |= LaneElts;
+ }
+ }
+ break;
+ }
+
+ // PSHUFB
+ case Intrinsic::x86_ssse3_pshuf_b_128:
+ case Intrinsic::x86_avx2_pshuf_b:
+ case Intrinsic::x86_avx512_pshuf_b_512:
+ // PERMILVAR
+ case Intrinsic::x86_avx_vpermilvar_ps:
+ case Intrinsic::x86_avx_vpermilvar_ps_256:
+ case Intrinsic::x86_avx512_vpermilvar_ps_512:
+ case Intrinsic::x86_avx_vpermilvar_pd:
+ case Intrinsic::x86_avx_vpermilvar_pd_256:
+ case Intrinsic::x86_avx512_vpermilvar_pd_512:
+ // PERMV
+ case Intrinsic::x86_avx2_permd:
+ case Intrinsic::x86_avx2_permps: {
+ Value *Op1 = II->getArgOperand(1);
+ TmpV = SimplifyDemandedVectorElts(Op1, DemandedElts, UndefElts,
+ Depth + 1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+ break;
+ }
+
// SSE4A instructions leave the upper 64-bits of the 128-bit result
// in an undefined state.
case Intrinsic::x86_sse4a_extrq:
case Intrinsic::x86_sse4a_extrqi:
case Intrinsic::x86_sse4a_insertq:
case Intrinsic::x86_sse4a_insertqi:
- UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2);
+ UndefElts.setHighBits(VWidth / 2);
break;
+ case Intrinsic::amdgcn_buffer_load:
+ case Intrinsic::amdgcn_buffer_load_format: {
+ if (VWidth == 1 || !DemandedElts.isMask())
+ return nullptr;
+
+ // TODO: Handle 3 vectors when supported in code gen.
+ unsigned NewNumElts = PowerOf2Ceil(DemandedElts.countTrailingOnes());
+ if (NewNumElts == VWidth)
+ return nullptr;
+
+ Module *M = II->getParent()->getParent()->getParent();
+ Type *EltTy = V->getType()->getVectorElementType();
+
+ Type *NewTy = (NewNumElts == 1) ? EltTy :
+ VectorType::get(EltTy, NewNumElts);
+
+ Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(),
+ NewTy);
+
+ SmallVector<Value *, 5> Args;
+ for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
+ Args.push_back(II->getArgOperand(I));
+
+ IRBuilderBase::InsertPointGuard Guard(*Builder);
+ Builder->SetInsertPoint(II);
+
+ CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
+ NewCall->takeName(II);
+ NewCall->copyMetadata(*II);
+ if (NewNumElts == 1) {
+ return Builder->CreateInsertElement(UndefValue::get(V->getType()),
+ NewCall, static_cast<uint64_t>(0));
+ }
+
+ SmallVector<uint32_t, 8> EltMask;
+ for (unsigned I = 0; I < VWidth; ++I)
+ EltMask.push_back(I);
+
+ Value *Shuffle = Builder->CreateShuffleVector(
+ NewCall, UndefValue::get(NewTy), EltMask);
+
+ MadeChange = true;
+ return Shuffle;
+ }
}
break;
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index b2477f6c8633..e89b400a4afc 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -645,6 +645,36 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
}
+/// If we have an insertelement instruction feeding into another insertelement
+/// and the 2nd is inserting a constant into the vector, canonicalize that
+/// constant insertion before the insertion of a variable:
+///
+/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
+/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
+///
+/// This has the potential of eliminating the 2nd insertelement instruction
+/// via constant folding of the scalar constant into a vector constant.
+static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
+ InstCombiner::BuilderTy &Builder) {
+ auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
+ if (!InsElt1 || !InsElt1->hasOneUse())
+ return nullptr;
+
+ Value *X, *Y;
+ Constant *ScalarC;
+ ConstantInt *IdxC1, *IdxC2;
+ if (match(InsElt1->getOperand(0), m_Value(X)) &&
+ match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
+ match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
+ match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
+ match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
+ Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
+ return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
+ }
+
+ return nullptr;
+}
+
/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
/// --> shufflevector X, CVec', Mask'
static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
@@ -806,6 +836,9 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
return Shuf;
+ if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder))
+ return NewInsElt;
+
// Turn a sequence of inserts that broadcasts a scalar into a single
// insert + shufflevector.
if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
@@ -1107,12 +1140,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
SmallVector<int, 16> Mask = SVI.getShuffleMask();
Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
- bool MadeChange = false;
-
- // Undefined shuffle mask -> undefined value.
- if (isa<UndefValue>(SVI.getOperand(2)))
- return replaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+ if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(),
+ SVI.getType(), DL, &TLI, &DT, &AC))
+ return replaceInstUsesWith(SVI, V);
+ bool MadeChange = false;
unsigned VWidth = SVI.getType()->getVectorNumElements();
APInt UndefElts(VWidth, 0);
@@ -1209,7 +1241,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isShuffleExtractingFromLHS(SVI, Mask)) {
Value *V = LHS;
unsigned MaskElems = Mask.size();
- unsigned BegIdx = Mask.front();
VectorType *SrcTy = cast<VectorType>(V->getType());
unsigned VecBitWidth = SrcTy->getBitWidth();
unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
@@ -1223,6 +1254,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// Only visit bitcasts that weren't previously handled.
BCs.push_back(BC);
for (BitCastInst *BC : BCs) {
+ unsigned BegIdx = Mask.front();
Type *TgtTy = BC->getDestTy();
unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
if (!TgtElemBitWidth)
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 27fc34d23175..88ef17bbc8fa 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -82,18 +82,24 @@ static cl::opt<bool>
EnableExpensiveCombines("expensive-combines",
cl::desc("Enable expensive instruction combines"));
+static cl::opt<unsigned>
+MaxArraySize("instcombine-maxarray-size", cl::init(1024),
+ cl::desc("Maximum array size considered when doing a combine"));
+
Value *InstCombiner::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(Builder, DL, GEP);
}
/// Return true if it is desirable to convert an integer computation from a
/// given bit width to a new bit width.
-/// We don't want to convert from a legal to an illegal type for example or from
-/// a smaller to a larger illegal type.
-bool InstCombiner::ShouldChangeType(unsigned FromWidth,
+/// We don't want to convert from a legal to an illegal type or from a smaller
+/// to a larger illegal type. A width of '1' is always treated as a legal type
+/// because i1 is a fundamental type in IR, and there are many specialized
+/// optimizations for i1 types.
+bool InstCombiner::shouldChangeType(unsigned FromWidth,
unsigned ToWidth) const {
- bool FromLegal = DL.isLegalInteger(FromWidth);
- bool ToLegal = DL.isLegalInteger(ToWidth);
+ bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
+ bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
// If this is a legal integer from type, and the result would be an illegal
// type, don't do the transformation.
@@ -109,14 +115,16 @@ bool InstCombiner::ShouldChangeType(unsigned FromWidth,
}
/// Return true if it is desirable to convert a computation from 'From' to 'To'.
-/// We don't want to convert from a legal to an illegal type for example or from
-/// a smaller to a larger illegal type.
-bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
+/// We don't want to convert from a legal to an illegal type or from a smaller
+/// to a larger illegal type. i1 is always treated as a legal type because it is
+/// a fundamental type in IR, and there are many specialized optimizations for
+/// i1 types.
+bool InstCombiner::shouldChangeType(Type *From, Type *To) const {
assert(From->isIntegerTy() && To->isIntegerTy());
unsigned FromWidth = From->getPrimitiveSizeInBits();
unsigned ToWidth = To->getPrimitiveSizeInBits();
- return ShouldChangeType(FromWidth, ToWidth);
+ return shouldChangeType(FromWidth, ToWidth);
}
// Return true, if No Signed Wrap should be maintained for I.
@@ -447,16 +455,11 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
/// This function returns identity value for given opcode, which can be used to
/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
-static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
+static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) {
if (isa<Constant>(V))
return nullptr;
- if (OpCode == Instruction::Mul)
- return ConstantInt::get(V->getType(), 1);
-
- // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc.
-
- return nullptr;
+ return ConstantExpr::getBinOpIdentity(Opcode, V->getType());
}
/// This function factors binary ops which can be combined using distributive
@@ -468,8 +471,7 @@ static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
static Instruction::BinaryOps
getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,
BinaryOperator *Op, Value *&LHS, Value *&RHS) {
- if (!Op)
- return Instruction::BinaryOpsEnd;
+ assert(Op && "Expected a binary operator");
LHS = Op->getOperand(0);
RHS = Op->getOperand(1);
@@ -499,11 +501,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
const DataLayout &DL, BinaryOperator &I,
Instruction::BinaryOps InnerOpcode, Value *A,
Value *B, Value *C, Value *D) {
-
- // If any of A, B, C, D are null, we can not factor I, return early.
- // Checking A and C should be enough.
- if (!A || !C || !B || !D)
- return nullptr;
+ assert(A && B && C && D && "All values must be provided");
Value *V = nullptr;
Value *SimplifiedInst = nullptr;
@@ -564,13 +562,11 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
if (isa<OverflowingBinaryOperator>(&I))
HasNSW = I.hasNoSignedWrap();
- if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
- if (isa<OverflowingBinaryOperator>(Op0))
- HasNSW &= Op0->hasNoSignedWrap();
+ if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS))
+ HasNSW &= LOBO->hasNoSignedWrap();
- if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
- if (isa<OverflowingBinaryOperator>(Op1))
- HasNSW &= Op1->hasNoSignedWrap();
+ if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS))
+ HasNSW &= ROBO->hasNoSignedWrap();
// We can propagate 'nsw' if we know that
// %Y = mul nsw i16 %X, C
@@ -599,31 +595,39 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
- // Factorization.
- Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- auto TopLevelOpcode = I.getOpcode();
- auto LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
- auto RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
-
- // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
- // a common term.
- if (LHSOpcode == RHSOpcode) {
- if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
- return V;
- }
-
- // The instruction has the form "(A op' B) op (C)". Try to factorize common
- // term.
- if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
- getIdentityValue(LHSOpcode, RHS)))
- return V;
+ {
+ // Factorization.
+ Value *A, *B, *C, *D;
+ Instruction::BinaryOps LHSOpcode, RHSOpcode;
+ if (Op0)
+ LHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op0, A, B);
+ if (Op1)
+ RHSOpcode = getBinOpsForFactorization(TopLevelOpcode, Op1, C, D);
+
+ // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
+ // a common term.
+ if (Op0 && Op1 && LHSOpcode == RHSOpcode)
+ if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+ return V;
+
+ // The instruction has the form "(A op' B) op (C)". Try to factorize common
+ // term.
+ if (Op0)
+ if (Value *Ident = getIdentityValue(LHSOpcode, RHS))
+ if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
+ Ident))
+ return V;
- // The instruction has the form "(B) op (C op' D)". Try to factorize common
- // term.
- if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS,
- getIdentityValue(RHSOpcode, LHS), C, D))
- return V;
+ // The instruction has the form "(B) op (C op' D)". Try to factorize common
+ // term.
+ if (Op1)
+ if (Value *Ident = getIdentityValue(RHSOpcode, LHS))
+ if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, Ident,
+ C, D))
+ return V;
+ }
// Expansion.
if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
@@ -720,6 +724,21 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
if (C->getType()->getElementType()->isIntegerTy())
return ConstantExpr::getNeg(C);
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ Constant *Elt = CV->getAggregateElement(i);
+ if (!Elt)
+ return nullptr;
+
+ if (isa<UndefValue>(Elt))
+ continue;
+
+ if (!isa<ConstantInt>(Elt))
+ return nullptr;
+ }
+ return ConstantExpr::getNeg(CV);
+ }
+
return nullptr;
}
@@ -820,8 +839,29 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
}
-Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
- PHINode *PN = cast<PHINode>(I.getOperand(0));
+static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV,
+ InstCombiner *IC) {
+ bool ConstIsRHS = isa<Constant>(I->getOperand(1));
+ Constant *C = cast<Constant>(I->getOperand(ConstIsRHS));
+
+ if (auto *InC = dyn_cast<Constant>(InV)) {
+ if (ConstIsRHS)
+ return ConstantExpr::get(I->getOpcode(), InC, C);
+ return ConstantExpr::get(I->getOpcode(), C, InC);
+ }
+
+ Value *Op0 = InV, *Op1 = C;
+ if (!ConstIsRHS)
+ std::swap(Op0, Op1);
+
+ Value *RI = IC->Builder->CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp");
+ auto *FPInst = dyn_cast<Instruction>(RI);
+ if (FPInst && isa<FPMathOperator>(FPInst))
+ FPInst->copyFastMathFlags(I);
+ return RI;
+}
+
+Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {
unsigned NumPHIValues = PN->getNumIncomingValues();
if (NumPHIValues == 0)
return nullptr;
@@ -902,7 +942,11 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// Beware of ConstantExpr: it may eventually evaluate to getNullValue,
// even if currently isNullValue gives false.
Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
- if (InC && !isa<ConstantExpr>(InC))
+ // For vector constants, we cannot use isNullValue to fold into
+ // FalseVInPred versus TrueVInPred. When we have individual nonzero
+ // elements in the vector, we will incorrectly fold InC to
+ // `TrueVInPred`.
+ if (InC && !isa<ConstantExpr>(InC) && isa<ConstantInt>(InC))
InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
else
InV = Builder->CreateSelect(PN->getIncomingValue(i),
@@ -923,15 +967,9 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
C, "phitmp");
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
- } else if (I.getNumOperands() == 2) {
- Constant *C = cast<Constant>(I.getOperand(1));
+ } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
for (unsigned i = 0; i != NumPHIValues; ++i) {
- Value *InV = nullptr;
- if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
- InV = ConstantExpr::get(I.getOpcode(), InC, C);
- else
- InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
- PN->getIncomingValue(i), C, "phitmp");
+ Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), this);
NewPN->addIncoming(InV, PN->getIncomingBlock(i));
}
} else {
@@ -957,14 +995,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
return replaceInstUsesWith(I, NewPN);
}
-Instruction *InstCombiner::foldOpWithConstantIntoOperand(Instruction &I) {
+Instruction *InstCombiner::foldOpWithConstantIntoOperand(BinaryOperator &I) {
assert(isa<Constant>(I.getOperand(1)) && "Unexpected operand type");
if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) {
if (Instruction *NewSel = FoldOpIntoSelect(I, Sel))
return NewSel;
- } else if (isa<PHINode>(I.getOperand(0))) {
- if (Instruction *NewPhi = FoldOpIntoPhi(I))
+ } else if (auto *PN = dyn_cast<PHINode>(I.getOperand(0))) {
+ if (Instruction *NewPhi = foldOpIntoPhi(I, PN))
return NewPhi;
}
return nullptr;
@@ -1315,22 +1353,19 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth);
- // If both arguments of binary operation are shuffles, which use the same
- // mask and shuffle within a single vector, it is worthwhile to move the
- // shuffle after binary operation:
+ // If both arguments of the binary operation are shuffles that use the same
+ // mask and shuffle within a single vector, move the shuffle after the binop:
// Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m)
- if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) {
- ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS);
- ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS);
- if (isa<UndefValue>(LShuf->getOperand(1)) &&
- isa<UndefValue>(RShuf->getOperand(1)) &&
- LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() &&
- LShuf->getMask() == RShuf->getMask()) {
- Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
- RShuf->getOperand(0), Builder);
- return Builder->CreateShuffleVector(NewBO,
- UndefValue::get(NewBO->getType()), LShuf->getMask());
- }
+ auto *LShuf = dyn_cast<ShuffleVectorInst>(LHS);
+ auto *RShuf = dyn_cast<ShuffleVectorInst>(RHS);
+ if (LShuf && RShuf && LShuf->getMask() == RShuf->getMask() &&
+ isa<UndefValue>(LShuf->getOperand(1)) &&
+ isa<UndefValue>(RShuf->getOperand(1)) &&
+ LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) {
+ Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
+ RShuf->getOperand(0), Builder);
+ return Builder->CreateShuffleVector(
+ NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask());
}
// If one argument is a shuffle within one vector, the other is a constant,
@@ -1559,27 +1594,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Replace: gep (gep %P, long B), long A, ...
// With: T = long A+B; gep %P, T, ...
//
- Value *Sum;
Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
Value *GO1 = GEP.getOperand(1);
- if (SO1 == Constant::getNullValue(SO1->getType())) {
- Sum = GO1;
- } else if (GO1 == Constant::getNullValue(GO1->getType())) {
- Sum = SO1;
- } else {
- // If they aren't the same type, then the input hasn't been processed
- // by the loop above yet (which canonicalizes sequential index types to
- // intptr_t). Just avoid transforming this until the input has been
- // normalized.
- if (SO1->getType() != GO1->getType())
- return nullptr;
- // Only do the combine when GO1 and SO1 are both constants. Only in
- // this case, we are sure the cost after the merge is never more than
- // that before the merge.
- if (!isa<Constant>(GO1) || !isa<Constant>(SO1))
- return nullptr;
- Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
- }
+
+ // If they aren't the same type, then the input hasn't been processed
+ // by the loop above yet (which canonicalizes sequential index types to
+ // intptr_t). Just avoid transforming this until the input has been
+ // normalized.
+ if (SO1->getType() != GO1->getType())
+ return nullptr;
+
+ Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC);
+ // Only do the combine when we are sure the cost after the
+ // merge is never more than that before the merge.
+ if (Sum == nullptr)
+ return nullptr;
// Update the GEP in place if possible.
if (Src->getNumOperands() == 2) {
@@ -1654,14 +1683,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
- Value *StrippedPtr = PtrOp->stripPointerCasts();
- PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
-
// We do not handle pointer-vector geps here.
- if (!StrippedPtrTy)
+ if (GEP.getType()->isVectorTy())
return nullptr;
+ // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+ Value *StrippedPtr = PtrOp->stripPointerCasts();
+ PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
+
if (StrippedPtr != PtrOp) {
bool HasZeroPointerIndex = false;
if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
@@ -2239,11 +2268,11 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
ConstantInt *AddRHS;
if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
// Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
- for (SwitchInst::CaseIt CaseIter : SI.cases()) {
- Constant *NewCase = ConstantExpr::getSub(CaseIter.getCaseValue(), AddRHS);
+ for (auto Case : SI.cases()) {
+ Constant *NewCase = ConstantExpr::getSub(Case.getCaseValue(), AddRHS);
assert(isa<ConstantInt>(NewCase) &&
"Result of expression should be constant");
- CaseIter.setValue(cast<ConstantInt>(NewCase));
+ Case.setValue(cast<ConstantInt>(NewCase));
}
SI.setCondition(Op0);
return &SI;
@@ -2275,9 +2304,9 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc");
SI.setCondition(NewCond);
- for (SwitchInst::CaseIt CaseIter : SI.cases()) {
- APInt TruncatedCase = CaseIter.getCaseValue()->getValue().trunc(NewWidth);
- CaseIter.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
+ for (auto Case : SI.cases()) {
+ APInt TruncatedCase = Case.getCaseValue()->getValue().trunc(NewWidth);
+ Case.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
}
return &SI;
}
@@ -2934,8 +2963,8 @@ bool InstCombiner::run() {
Result->takeName(I);
// Push the new instruction and any users onto the worklist.
- Worklist.Add(Result);
Worklist.AddUsersToWorkList(*Result);
+ Worklist.Add(Result);
// Insert the new instruction into the basic block...
BasicBlock *InstParent = I->getParent();
@@ -2958,8 +2987,8 @@ bool InstCombiner::run() {
if (isInstructionTriviallyDead(I, &TLI)) {
eraseInstFromFunction(*I);
} else {
- Worklist.Add(I);
Worklist.AddUsersToWorkList(*I);
+ Worklist.Add(I);
}
}
MadeIRChange = true;
@@ -3022,12 +3051,11 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
}
// See if we can constant fold its operands.
- for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
- ++i) {
- if (!isa<ConstantVector>(i) && !isa<ConstantExpr>(i))
+ for (Use &U : Inst->operands()) {
+ if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
continue;
- auto *C = cast<Constant>(i);
+ auto *C = cast<Constant>(U);
Constant *&FoldRes = FoldedConstants[C];
if (!FoldRes)
FoldRes = ConstantFoldConstant(C, DL, TLI);
@@ -3035,7 +3063,10 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
FoldRes = C;
if (FoldRes != C) {
- *i = FoldRes;
+ DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
+ << "\n Old = " << *C
+ << "\n New = " << *FoldRes << '\n');
+ U = FoldRes;
MadeIRChange = true;
}
}
@@ -3055,17 +3086,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
- // See if this is an explicit destination.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i)
- if (i.getCaseValue() == Cond) {
- BasicBlock *ReachableBB = i.getCaseSuccessor();
- Worklist.push_back(ReachableBB);
- continue;
- }
-
- // Otherwise it is the default destination.
- Worklist.push_back(SI->getDefaultDest());
+ Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
continue;
}
}
@@ -3152,6 +3173,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines,
AA, AC, TLI, DT, DL, LI);
+ IC.MaxArraySizeForCombine = MaxArraySize;
Changed |= IC.run();
if (!Changed)
@@ -3176,9 +3198,10 @@ PreservedAnalyses InstCombinePass::run(Function &F,
return PreservedAnalyses::all();
// Mark all the analyses that instcombine updates as preserved.
- // FIXME: This should also 'preserve the CFG'.
PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<AAManager>();
+ PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index f5e9e7dd5a93..94cfc69ed555 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -80,6 +80,7 @@ static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
+static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40;
static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
// The shadow memory space is dynamically allocated.
static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel;
@@ -380,6 +381,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
bool IsAndroid = TargetTriple.isAndroid();
bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
bool IsFreeBSD = TargetTriple.isOSFreeBSD();
+ bool IsPS4CPU = TargetTriple.isPS4CPU();
bool IsLinux = TargetTriple.isOSLinux();
bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 ||
TargetTriple.getArch() == llvm::Triple::ppc64le;
@@ -392,6 +394,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
TargetTriple.getArch() == llvm::Triple::mips64el;
bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64;
bool IsWindows = TargetTriple.isOSWindows();
+ bool IsFuchsia = TargetTriple.isOSFuchsia();
ShadowMapping Mapping;
@@ -412,12 +415,18 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
else
Mapping.Offset = kDefaultShadowOffset32;
} else { // LongSize == 64
- if (IsPPC64)
+ // Fuchsia is always PIE, which means that the beginning of the address
+ // space is always available.
+ if (IsFuchsia)
+ Mapping.Offset = 0;
+ else if (IsPPC64)
Mapping.Offset = kPPC64_ShadowOffset64;
else if (IsSystemZ)
Mapping.Offset = kSystemZ_ShadowOffset64;
else if (IsFreeBSD)
Mapping.Offset = kFreeBSD_ShadowOffset64;
+ else if (IsPS4CPU)
+ Mapping.Offset = kPS4CPU_ShadowOffset64;
else if (IsLinux && IsX86_64) {
if (IsKasan)
Mapping.Offset = kLinuxKasan_ShadowOffset64;
@@ -456,9 +465,9 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
// offset is not necessary 1/8-th of the address space. On SystemZ,
// we could OR the constant in a single instruction, but it's more
// efficient to load it once and use indexed addressing.
- Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ
- && !(Mapping.Offset & (Mapping.Offset - 1))
- && Mapping.Offset != kDynamicShadowSentinel;
+ Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ && !IsPS4CPU &&
+ !(Mapping.Offset & (Mapping.Offset - 1)) &&
+ Mapping.Offset != kDynamicShadowSentinel;
return Mapping;
}
@@ -567,8 +576,6 @@ struct AddressSanitizer : public FunctionPass {
Type *IntptrTy;
ShadowMapping Mapping;
DominatorTree *DT;
- Function *AsanCtorFunction = nullptr;
- Function *AsanInitFunction = nullptr;
Function *AsanHandleNoReturnFunc;
Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
// This array is indexed by AccessIsWrite, Experiment and log2(AccessSize).
@@ -1561,31 +1568,31 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
// Declare our poisoning and unpoisoning functions.
AsanPoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
+ kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy));
AsanPoisonGlobals->setLinkage(Function::ExternalLinkage);
AsanUnpoisonGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanUnpoisonGlobalsName, IRB.getVoidTy(), nullptr));
+ kAsanUnpoisonGlobalsName, IRB.getVoidTy()));
AsanUnpoisonGlobals->setLinkage(Function::ExternalLinkage);
// Declare functions that register/unregister globals.
AsanRegisterGlobals = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy));
AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
AsanUnregisterGlobals = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanUnregisterGlobalsName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, nullptr));
+ IntptrTy, IntptrTy));
AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
// Declare the functions that find globals in a shared object and then invoke
// the (un)register function on them.
AsanRegisterImageGlobals =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
+ kAsanRegisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
AsanRegisterImageGlobals->setLinkage(Function::ExternalLinkage);
AsanUnregisterImageGlobals =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy, nullptr));
+ kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));
AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage);
}
@@ -1618,11 +1625,12 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(
GlobalVariable *
AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,
StringRef OriginalName) {
- GlobalVariable *Metadata =
- new GlobalVariable(M, Initializer->getType(), false,
- GlobalVariable::InternalLinkage, Initializer,
- Twine("__asan_global_") +
- GlobalValue::getRealLinkageName(OriginalName));
+ auto Linkage = TargetTriple.isOSBinFormatMachO()
+ ? GlobalVariable::InternalLinkage
+ : GlobalVariable::PrivateLinkage;
+ GlobalVariable *Metadata = new GlobalVariable(
+ M, Initializer->getType(), false, Linkage, Initializer,
+ Twine("__asan_global_") + GlobalValue::getRealLinkageName(OriginalName));
Metadata->setSection(getGlobalMetadataSection());
return Metadata;
}
@@ -1862,7 +1870,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
GlobalValue *InstrumentedGlobal = NewGlobal;
bool CanUsePrivateAliases =
- TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO();
+ TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO() ||
+ TargetTriple.isOSBinFormatWasm();
if (CanUsePrivateAliases && ClUsePrivateAliasForGlobals) {
// Create local alias for NewGlobal to avoid crash on ODR between
// instrumented and non-instrumented libraries.
@@ -1926,13 +1935,19 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
initializeCallbacks(M);
- bool Changed = false;
+ if (CompileKernel)
+ return false;
+
+ Function *AsanCtorFunction;
+ std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{},
+ /*InitArgs=*/{}, kAsanVersionCheckName);
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
+ bool Changed = false;
// TODO(glider): temporarily disabled globals instrumentation for KASan.
- if (ClGlobals && !CompileKernel) {
- Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
- assert(CtorFunc);
- IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+ if (ClGlobals) {
+ IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator());
Changed |= InstrumentGlobals(IRB, M);
}
@@ -1949,49 +1964,60 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
const std::string ExpStr = Exp ? "exp_" : "";
const std::string SuffixStr = CompileKernel ? "N" : "_n";
const std::string EndingStr = Recover ? "_noabort" : "";
- Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
- AsanErrorCallbackSized[AccessIsWrite][Exp] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr,
- IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
- AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
- IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
- for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
- AccessSizeIndex++) {
- const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex);
- AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
- IRB.getVoidTy(), IntptrTy, ExpType, nullptr));
- AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
- IRB.getVoidTy(), IntptrTy, ExpType, nullptr));
+
+ SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
+ SmallVector<Type *, 2> Args1{1, IntptrTy};
+ if (Exp) {
+ Type *ExpType = Type::getInt32Ty(*C);
+ Args2.push_back(ExpType);
+ Args1.push_back(ExpType);
}
- }
+ AsanErrorCallbackSized[AccessIsWrite][Exp] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr +
+ EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false)));
+
+ AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + TypeStr + "N" + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args2, false)));
+
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ const std::string Suffix = TypeStr + itostr(1ULL << AccessSizeIndex);
+ AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args1, false)));
+
+ AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
+ checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + ExpStr + Suffix + EndingStr,
+ FunctionType::get(IRB.getVoidTy(), Args1, false)));
+ }
+ }
}
const std::string MemIntrinCallbackPrefix =
CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
AsanMemmove = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
MemIntrinCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
AsanMemcpy = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
MemIntrinCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy));
AsanMemset = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
MemIntrinCallbackPrefix + "memset", IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy));
AsanHandleNoReturnFunc = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), nullptr));
+ M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy()));
AsanPtrCmpFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ kAsanPtrCmp, IRB.getVoidTy(), IntptrTy, IntptrTy));
AsanPtrSubFunction = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ kAsanPtrSub, IRB.getVoidTy(), IntptrTy, IntptrTy));
// We insert an empty inline asm after __asan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
StringRef(""), StringRef(""),
@@ -2001,7 +2027,6 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
// virtual
bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
-
GlobalsMD.init(M);
C = &(M.getContext());
@@ -2009,13 +2034,6 @@ bool AddressSanitizer::doInitialization(Module &M) {
IntptrTy = Type::getIntNTy(*C, LongSize);
TargetTriple = Triple(M.getTargetTriple());
- if (!CompileKernel) {
- std::tie(AsanCtorFunction, AsanInitFunction) =
- createSanitizerCtorAndInitFunctions(
- M, kAsanModuleCtorName, kAsanInitName,
- /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName);
- appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
- }
Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
return true;
}
@@ -2034,6 +2052,8 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
// We cannot just ignore these methods, because they may call other
// instrumented functions.
if (F.getName().find(" load]") != std::string::npos) {
+ Function *AsanInitFunction =
+ declareSanitizerInitFunction(*F.getParent(), kAsanInitName, {});
IRBuilder<> IRB(&F.front(), F.front().begin());
IRB.CreateCall(AsanInitFunction, {});
return true;
@@ -2081,7 +2101,6 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
}
bool AddressSanitizer::runOnFunction(Function &F) {
- if (&F == AsanCtorFunction) return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
if (F.getName().startswith("__asan_")) return false;
@@ -2175,8 +2194,9 @@ bool AddressSanitizer::runOnFunction(Function &F) {
(ClInstrumentationWithCallsThreshold >= 0 &&
ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold);
const DataLayout &DL = F.getParent()->getDataLayout();
- ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(),
- /*RoundToAlign=*/true);
+ ObjectSizeOpts ObjSizeOpts;
+ ObjSizeOpts.RoundToAlign = true;
+ ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), ObjSizeOpts);
// Instrument.
int NumInstrumented = 0;
@@ -2234,18 +2254,18 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
std::string Suffix = itostr(i);
AsanStackMallocFunc[i] = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanStackMallocNameTemplate + Suffix, IntptrTy,
- IntptrTy, nullptr));
+ IntptrTy));
AsanStackFreeFunc[i] = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanStackFreeNameTemplate + Suffix,
- IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ IRB.getVoidTy(), IntptrTy, IntptrTy));
}
if (ASan.UseAfterScope) {
AsanPoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanPoisonStackMemoryName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, nullptr));
+ IntptrTy, IntptrTy));
AsanUnpoisonStackMemoryFunc = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(kAsanUnpoisonStackMemoryName, IRB.getVoidTy(),
- IntptrTy, IntptrTy, nullptr));
+ IntptrTy, IntptrTy));
}
for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) {
@@ -2254,14 +2274,14 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
Name << std::setw(2) << std::setfill('0') << std::hex << Val;
AsanSetShadowFunc[Val] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy));
}
AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
AsanAllocasUnpoisonFunc =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+ kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy));
}
void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index b34d5b8c45a7..4e454f0c95b6 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -254,7 +254,7 @@ class DataFlowSanitizer : public ModulePass {
MDNode *ColdCallWeights;
DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
- AttributeSet ReadOnlyNoneAttrs;
+ AttributeList ReadOnlyNoneAttrs;
bool DFSanRuntimeShadowMask;
Value *getShadowAddress(Value *Addr, Instruction *Pos);
@@ -331,6 +331,10 @@ class DFSanVisitor : public InstVisitor<DFSanVisitor> {
DFSanFunction &DFSF;
DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
+ const DataLayout &getDataLayout() const {
+ return DFSF.F->getParent()->getDataLayout();
+ }
+
void visitOperandShadowInst(Instruction &I);
void visitBinaryOperator(BinaryOperator &BO);
@@ -539,16 +543,17 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
F->getParent());
NewF->copyAttributesFrom(F);
NewF->removeAttributes(
- AttributeSet::ReturnIndex,
- AttributeSet::get(F->getContext(), AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
+ AttributeList::ReturnIndex,
+ AttributeList::get(
+ F->getContext(), AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
if (F->isVarArg()) {
NewF->removeAttributes(
- AttributeSet::FunctionIndex,
- AttributeSet().addAttribute(*Ctx, AttributeSet::FunctionIndex,
- "split-stack"));
+ AttributeList::FunctionIndex,
+ AttributeList().addAttribute(*Ctx, AttributeList::FunctionIndex,
+ "split-stack"));
CallInst::Create(DFSanVarargWrapperFn,
IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
BB);
@@ -580,8 +585,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
Function::arg_iterator AI = F->arg_begin(); ++AI;
for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N)
Args.push_back(&*AI);
- CallInst *CI =
- CallInst::Create(&F->getArgumentList().front(), Args, "", BB);
+ CallInst *CI = CallInst::Create(&*F->arg_begin(), Args, "", BB);
ReturnInst *RI;
if (FT->getReturnType()->isVoidTy())
RI = ReturnInst::Create(*Ctx, BB);
@@ -595,7 +599,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
DFSanVisitor(DFSF).visitCallInst(*CI);
if (!FT->getReturnType()->isVoidTy())
new StoreInst(DFSF.getShadow(RI->getReturnValue()),
- &F->getArgumentList().back(), RI);
+ &*std::prev(F->arg_end()), RI);
}
return C;
@@ -622,26 +626,26 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
- F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+ F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
F->addAttribute(1, Attribute::ZExt);
F->addAttribute(2, Attribute::ZExt);
}
DFSanCheckedUnionFn = Mod->getOrInsertFunction("dfsan_union", DFSanUnionFnTy);
if (Function *F = dyn_cast<Function>(DFSanCheckedUnionFn)) {
- F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+ F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
F->addAttribute(1, Attribute::ZExt);
F->addAttribute(2, Attribute::ZExt);
}
DFSanUnionLoadFn =
Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy);
if (Function *F = dyn_cast<Function>(DFSanUnionLoadFn)) {
- F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
- F->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
- F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ F->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+ F->addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
+ F->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
}
DFSanUnimplementedFn =
Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
@@ -696,7 +700,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
- ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B);
+ ReadOnlyNoneAttrs = AttributeList::get(*Ctx, AttributeList::FunctionIndex, B);
// First, change the ABI of every function in the module. ABI-listed
// functions keep their original ABI and get a wrapper function.
@@ -717,9 +721,10 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M);
NewF->copyAttributesFrom(&F);
NewF->removeAttributes(
- AttributeSet::ReturnIndex,
- AttributeSet::get(NewF->getContext(), AttributeSet::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
+ AttributeList::ReturnIndex,
+ AttributeList::get(
+ NewF->getContext(), AttributeList::ReturnIndex,
+ AttributeFuncs::typeIncompatible(NewFT->getReturnType())));
for (Function::arg_iterator FArg = F.arg_begin(),
NewFArg = NewF->arg_begin(),
FArgEnd = F.arg_end();
@@ -758,7 +763,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
&F, std::string("dfsw$") + std::string(F.getName()),
GlobalValue::LinkOnceODRLinkage, NewFT);
if (getInstrumentedABI() == IA_TLS)
- NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs);
+ NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
Value *WrappedFnCst =
ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
@@ -906,7 +911,7 @@ Value *DFSanFunction::getShadow(Value *V) {
break;
}
case DataFlowSanitizer::IA_Args: {
- unsigned ArgIdx = A->getArgNo() + F->getArgumentList().size() / 2;
+ unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
Function::arg_iterator i = F->arg_begin();
while (ArgIdx--)
++i;
@@ -983,7 +988,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
IRBuilder<> IRB(Pos);
if (AvoidNewBlocks) {
CallInst *Call = IRB.CreateCall(DFS.DFSanCheckedUnionFn, {V1, V2});
- Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
Call->addAttribute(1, Attribute::ZExt);
Call->addAttribute(2, Attribute::ZExt);
@@ -996,7 +1001,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
Ne, Pos, /*Unreachable=*/false, DFS.ColdCallWeights, &DT));
IRBuilder<> ThenIRB(BI);
CallInst *Call = ThenIRB.CreateCall(DFS.DFSanUnionFn, {V1, V2});
- Call->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
Call->addAttribute(1, Attribute::ZExt);
Call->addAttribute(2, Attribute::ZExt);
@@ -1099,7 +1104,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
CallInst *FallbackCall = FallbackIRB.CreateCall(
DFS.DFSanUnionLoadFn,
{ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
- FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
// Compare each of the shadows stored in the loaded 64 bits to each other,
// by computing (WideShadow rotl ShadowWidth) == WideShadow.
@@ -1156,7 +1161,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
IRBuilder<> IRB(Pos);
CallInst *FallbackCall = IRB.CreateCall(
DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
- FallbackCall->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt);
+ FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
return FallbackCall;
}
@@ -1446,7 +1451,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
// Custom functions returning non-void will write to the return label.
if (!FT->getReturnType()->isVoidTy()) {
- CustomFn->removeAttributes(AttributeSet::FunctionIndex,
+ CustomFn->removeAttributes(AttributeList::FunctionIndex,
DFSF.DFS.ReadOnlyNoneAttrs);
}
}
@@ -1481,7 +1486,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
CS.arg_size() - FT->getNumParams());
auto *LabelVAAlloca = new AllocaInst(
- LabelVATy, "labelva", &DFSF.F->getEntryBlock().front());
+ LabelVATy, getDataLayout().getAllocaAddrSpace(),
+ "labelva", &DFSF.F->getEntryBlock().front());
for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
@@ -1494,8 +1500,9 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
if (!FT->getReturnType()->isVoidTy()) {
if (!DFSF.LabelReturnAlloca) {
DFSF.LabelReturnAlloca =
- new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
- &DFSF.F->getEntryBlock().front());
+ new AllocaInst(DFSF.DFS.ShadowTy,
+ getDataLayout().getAllocaAddrSpace(),
+ "labelreturn", &DFSF.F->getEntryBlock().front());
}
Args.push_back(DFSF.LabelReturnAlloca);
}
@@ -1574,7 +1581,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
AllocaInst *VarArgShadow =
- new AllocaInst(VarArgArrayTy, "", &DFSF.F->getEntryBlock().front());
+ new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
+ "", &DFSF.F->getEntryBlock().front());
Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
for (unsigned n = 0; i != e; ++i, ++n) {
IRB.CreateStore(
@@ -1593,7 +1601,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
}
NewCS.setCallingConv(CS.getCallingConv());
NewCS.setAttributes(CS.getAttributes().removeAttributes(
- *DFSF.DFS.Ctx, AttributeSet::ReturnIndex,
+ *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
AttributeFuncs::typeIncompatible(NewCS.getInstruction()->getType())));
if (Next) {
diff --git a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
index 05eba6c4dc69..7dea1dee756a 100644
--- a/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
@@ -267,35 +267,35 @@ void EfficiencySanitizer::initializeCallbacks(Module &M) {
SmallString<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr);
EsanAlignedLoad[Idx] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ AlignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
SmallString<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr);
EsanAlignedStore[Idx] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ AlignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
SmallString<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr);
EsanUnalignedLoad[Idx] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ UnalignedLoadName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
SmallString<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr);
EsanUnalignedStore[Idx] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ UnalignedStoreName, IRB.getVoidTy(), IRB.getInt8PtrTy()));
}
EsanUnalignedLoadN = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("__esan_unaligned_loadN", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IntptrTy));
EsanUnalignedStoreN = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("__esan_unaligned_storeN", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IntptrTy));
MemmoveFn = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IntptrTy));
MemcpyFn = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IntptrTy));
MemsetFn = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy, nullptr));
+ IRB.getInt32Ty(), IntptrTy));
}
bool EfficiencySanitizer::shouldIgnoreStructType(StructType *StructTy) {
@@ -533,7 +533,7 @@ void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
IRBuilder<> IRB_Dtor(EsanDtorFunction->getEntryBlock().getTerminator());
Function *EsanExit = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(EsanExitName, IRB_Dtor.getVoidTy(),
- Int8PtrTy, nullptr));
+ Int8PtrTy));
EsanExit->setLinkage(Function::ExternalLinkage);
IRB_Dtor.CreateCall(EsanExit, {ToolInfoArg});
appendToGlobalDtors(M, EsanDtorFunction, EsanCtorAndDtorPriority);
@@ -757,7 +757,7 @@ bool EfficiencySanitizer::instrumentGetElementPtr(Instruction *I, Module &M) {
return false;
}
Type *SourceTy = GepInst->getSourceElementType();
- StructType *StructTy;
+ StructType *StructTy = nullptr;
ConstantInt *Idx;
// Check if GEP calculates address from a struct array.
if (isa<StructType>(SourceTy)) {
diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 1ba13bdfe05a..61d627673c90 100644
--- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -1,4 +1,4 @@
-//===-- IndirectCallPromotion.cpp - Promote indirect calls to direct calls ===//
+//===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,8 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/IndirectCallSiteVisitor.h"
#include "llvm/IR/BasicBlock.h"
@@ -40,6 +42,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/PGOInstrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -53,6 +56,8 @@ using namespace llvm;
STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
+STATISTIC(NumOfPGOMemOPOpt, "Number of memop intrinsics optimized.");
+STATISTIC(NumOfPGOMemOPAnnotate, "Number of memop intrinsics annotated.");
// Command line option to disable indirect-call promotion with the default as
// false. This is for debug purpose.
@@ -80,6 +85,12 @@ static cl::opt<bool> ICPLTOMode("icp-lto", cl::init(false), cl::Hidden,
cl::desc("Run indirect-call promotion in LTO "
"mode"));
+// Set if the pass is called in SamplePGO mode. The difference for SamplePGO
+// mode is it will add prof metadatato the created direct call.
+static cl::opt<bool>
+ ICPSamplePGOMode("icp-samplepgo", cl::init(false), cl::Hidden,
+ cl::desc("Run indirect-call promotion in SamplePGO mode"));
+
// If the option is set to true, only call instructions will be considered for
// transformation -- invoke instructions will be ignored.
static cl::opt<bool>
@@ -100,13 +111,51 @@ static cl::opt<bool>
ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
cl::desc("Dump IR after transformation happens"));
+// The minimum call count to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+ MemOPCountThreshold("pgo-memop-count-threshold", cl::Hidden, cl::ZeroOrMore,
+ cl::init(1000),
+ cl::desc("The minimum count to optimize memory "
+ "intrinsic calls"));
+
+// Command line option to disable memory intrinsic optimization. The default is
+// false. This is for debug purpose.
+static cl::opt<bool> DisableMemOPOPT("disable-memop-opt", cl::init(false),
+ cl::Hidden, cl::desc("Disable optimize"));
+
+// The percent threshold to optimize memory intrinsic calls.
+static cl::opt<unsigned>
+ MemOPPercentThreshold("pgo-memop-percent-threshold", cl::init(40),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("The percentage threshold for the "
+ "memory intrinsic calls optimization"));
+
+// Maximum number of versions for optimizing memory intrinsic call.
+static cl::opt<unsigned>
+ MemOPMaxVersion("pgo-memop-max-version", cl::init(3), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc("The max version for the optimized memory "
+ " intrinsic calls"));
+
+// Scale the counts from the annotation using the BB count value.
+static cl::opt<bool>
+ MemOPScaleCount("pgo-memop-scale-count", cl::init(true), cl::Hidden,
+ cl::desc("Scale the memop size counts using the basic "
+ " block count value"));
+
+// This option sets the rangge of precise profile memop sizes.
+extern cl::opt<std::string> MemOPSizeRange;
+
+// This option sets the value that groups large memop sizes
+extern cl::opt<unsigned> MemOPSizeLarge;
+
namespace {
class PGOIndirectCallPromotionLegacyPass : public ModulePass {
public:
static char ID;
- PGOIndirectCallPromotionLegacyPass(bool InLTO = false)
- : ModulePass(ID), InLTO(InLTO) {
+ PGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false)
+ : ModulePass(ID), InLTO(InLTO), SamplePGO(SamplePGO) {
initializePGOIndirectCallPromotionLegacyPassPass(
*PassRegistry::getPassRegistry());
}
@@ -119,6 +168,28 @@ private:
// If this pass is called in LTO. We need to special handling the PGOFuncName
// for the static variables due to LTO's internalization.
bool InLTO;
+
+ // If this pass is called in SamplePGO. We need to add the prof metadata to
+ // the promoted direct call.
+ bool SamplePGO;
+};
+
+class PGOMemOPSizeOptLegacyPass : public FunctionPass {
+public:
+ static char ID;
+
+ PGOMemOPSizeOptLegacyPass() : FunctionPass(ID) {
+ initializePGOMemOPSizeOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "PGOMemOPSize"; }
+
+private:
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
};
} // end anonymous namespace
@@ -128,8 +199,22 @@ INITIALIZE_PASS(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom",
"direct calls.",
false, false)
-ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO) {
- return new PGOIndirectCallPromotionLegacyPass(InLTO);
+ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO,
+ bool SamplePGO) {
+ return new PGOIndirectCallPromotionLegacyPass(InLTO, SamplePGO);
+}
+
+char PGOMemOPSizeOptLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+ "Optimize memory intrinsic using its size value profile",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
+ "Optimize memory intrinsic using its size value profile",
+ false, false)
+
+FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
+ return new PGOMemOPSizeOptLegacyPass();
}
namespace {
@@ -144,17 +229,11 @@ private:
// defines.
InstrProfSymtab *Symtab;
- enum TargetStatus {
- OK, // Should be able to promote.
- NotAvailableInModule, // Cannot find the target in current module.
- ReturnTypeMismatch, // Return type mismatch b/w target and indirect-call.
- NumArgsMismatch, // Number of arguments does not match.
- ArgTypeMismatch // Type mismatch in the arguments (cannot bitcast).
- };
+ bool SamplePGO;
// Test if we can legally promote this direct-call of Target.
- TargetStatus isPromotionLegal(Instruction *Inst, uint64_t Target,
- Function *&F);
+ bool isPromotionLegal(Instruction *Inst, uint64_t Target, Function *&F,
+ const char **Reason = nullptr);
// A struct that records the direct target and it's call count.
struct PromotionCandidate {
@@ -172,91 +251,77 @@ private:
Instruction *Inst, const ArrayRef<InstrProfValueData> &ValueDataRef,
uint64_t TotalCount, uint32_t NumCandidates);
- // Main function that transforms Inst (either a indirect-call instruction, or
- // an invoke instruction , to a conditional call to F. This is like:
- // if (Inst.CalledValue == F)
- // F(...);
- // else
- // Inst(...);
- // end
- // TotalCount is the profile count value that the instruction executes.
- // Count is the profile count value that F is the target function.
- // These two values are being used to update the branch weight.
- void promote(Instruction *Inst, Function *F, uint64_t Count,
- uint64_t TotalCount);
-
// Promote a list of targets for one indirect-call callsite. Return
// the number of promotions.
uint32_t tryToPromote(Instruction *Inst,
const std::vector<PromotionCandidate> &Candidates,
uint64_t &TotalCount);
- static const char *StatusToString(const TargetStatus S) {
- switch (S) {
- case OK:
- return "OK to promote";
- case NotAvailableInModule:
- return "Cannot find the target";
- case ReturnTypeMismatch:
- return "Return type mismatch";
- case NumArgsMismatch:
- return "The number of arguments mismatch";
- case ArgTypeMismatch:
- return "Argument Type mismatch";
- }
- llvm_unreachable("Should not reach here");
- }
-
// Noncopyable
ICallPromotionFunc(const ICallPromotionFunc &other) = delete;
ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete;
public:
- ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab)
- : F(Func), M(Modu), Symtab(Symtab) {
- }
+ ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
+ bool SamplePGO)
+ : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO) {}
bool processFunction();
};
} // end anonymous namespace
-ICallPromotionFunc::TargetStatus
-ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
- Function *&TargetFunction) {
- Function *DirectCallee = Symtab->getFunction(Target);
- if (DirectCallee == nullptr)
- return NotAvailableInModule;
+bool llvm::isLegalToPromote(Instruction *Inst, Function *F,
+ const char **Reason) {
// Check the return type.
Type *CallRetType = Inst->getType();
if (!CallRetType->isVoidTy()) {
- Type *FuncRetType = DirectCallee->getReturnType();
+ Type *FuncRetType = F->getReturnType();
if (FuncRetType != CallRetType &&
- !CastInst::isBitCastable(FuncRetType, CallRetType))
- return ReturnTypeMismatch;
+ !CastInst::isBitCastable(FuncRetType, CallRetType)) {
+ if (Reason)
+ *Reason = "Return type mismatch";
+ return false;
+ }
}
// Check if the arguments are compatible with the parameters
- FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
+ FunctionType *DirectCalleeType = F->getFunctionType();
unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
CallSite CS(Inst);
unsigned ArgNum = CS.arg_size();
- if (ParamNum != ArgNum && !DirectCalleeType->isVarArg())
- return NumArgsMismatch;
+ if (ParamNum != ArgNum && !DirectCalleeType->isVarArg()) {
+ if (Reason)
+ *Reason = "The number of arguments mismatch";
+ return false;
+ }
for (unsigned I = 0; I < ParamNum; ++I) {
Type *PTy = DirectCalleeType->getFunctionParamType(I);
Type *ATy = CS.getArgument(I)->getType();
if (PTy == ATy)
continue;
- if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy))
- return ArgTypeMismatch;
+ if (!CastInst::castIsValid(Instruction::BitCast, CS.getArgument(I), PTy)) {
+ if (Reason)
+ *Reason = "Argument type mismatch";
+ return false;
+ }
}
DEBUG(dbgs() << " #" << NumOfPGOICallPromotion << " Promote the icall to "
- << Symtab->getFuncName(Target) << "\n");
- TargetFunction = DirectCallee;
- return OK;
+ << F->getName() << "\n");
+ return true;
+}
+
+bool ICallPromotionFunc::isPromotionLegal(Instruction *Inst, uint64_t Target,
+ Function *&TargetFunction,
+ const char **Reason) {
+ TargetFunction = Symtab->getFunction(Target);
+ if (TargetFunction == nullptr) {
+ *Reason = "Cannot find the target";
+ return false;
+ }
+ return isLegalToPromote(Inst, TargetFunction, Reason);
}
// Indirect-call promotion heuristic. The direct targets are sorted based on
@@ -296,10 +361,9 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite(
break;
}
Function *TargetFunction = nullptr;
- TargetStatus Status = isPromotionLegal(Inst, Target, TargetFunction);
- if (Status != OK) {
+ const char *Reason = nullptr;
+ if (!isPromotionLegal(Inst, Target, TargetFunction, &Reason)) {
StringRef TargetFuncName = Symtab->getFuncName(Target);
- const char *Reason = StatusToString(Status);
DEBUG(dbgs() << " Not promote: " << Reason << "\n");
emitOptimizationRemarkMissed(
F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
@@ -532,8 +596,14 @@ static void insertCallRetPHI(Instruction *Inst, Instruction *CallResult,
// Ret = phi(Ret1, Ret2);
// It adds type casts for the args do not match the parameters and the return
// value. Branch weights metadata also updated.
-void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee,
- uint64_t Count, uint64_t TotalCount) {
+// If \p AttachProfToDirectCall is true, a prof metadata is attached to the
+// new direct call to contain \p Count. This is used by SamplePGO inliner to
+// check callsite hotness.
+// Returns the promoted direct call instruction.
+Instruction *llvm::promoteIndirectCall(Instruction *Inst,
+ Function *DirectCallee, uint64_t Count,
+ uint64_t TotalCount,
+ bool AttachProfToDirectCall) {
assert(DirectCallee != nullptr);
BasicBlock *BB = Inst->getParent();
// Just to suppress the non-debug build warning.
@@ -548,6 +618,14 @@ void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee,
Instruction *NewInst =
createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB);
+ if (AttachProfToDirectCall) {
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(Count);
+ MDBuilder MDB(NewInst->getContext());
+ dyn_cast<Instruction>(NewInst->stripPointerCasts())
+ ->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+ }
+
// Move Inst from MergeBB to IndirectCallBB.
Inst->removeFromParent();
IndirectCallBB->getInstList().insert(IndirectCallBB->getFirstInsertionPt(),
@@ -576,9 +654,10 @@ void ICallPromotionFunc::promote(Instruction *Inst, Function *DirectCallee,
DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n");
emitOptimizationRemark(
- F.getContext(), "pgo-icall-prom", F, Inst->getDebugLoc(),
+ BB->getContext(), "pgo-icall-prom", *BB->getParent(), Inst->getDebugLoc(),
Twine("Promote indirect call to ") + DirectCallee->getName() +
" with count " + Twine(Count) + " out of " + Twine(TotalCount));
+ return NewInst;
}
// Promote indirect-call to conditional direct-call for one callsite.
@@ -589,7 +668,7 @@ uint32_t ICallPromotionFunc::tryToPromote(
for (auto &C : Candidates) {
uint64_t Count = C.Count;
- promote(Inst, C.TargetFunction, Count, TotalCount);
+ promoteIndirectCall(Inst, C.TargetFunction, Count, TotalCount, SamplePGO);
assert(TotalCount >= Count);
TotalCount -= Count;
NumOfPGOICallPromotion++;
@@ -630,7 +709,7 @@ bool ICallPromotionFunc::processFunction() {
}
// A wrapper function that does the actual work.
-static bool promoteIndirectCalls(Module &M, bool InLTO) {
+static bool promoteIndirectCalls(Module &M, bool InLTO, bool SamplePGO) {
if (DisableICP)
return false;
InstrProfSymtab Symtab;
@@ -641,7 +720,7 @@ static bool promoteIndirectCalls(Module &M, bool InLTO) {
continue;
if (F.hasFnAttribute(Attribute::OptimizeNone))
continue;
- ICallPromotionFunc ICallPromotion(F, &M, &Symtab);
+ ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO);
bool FuncChanged = ICallPromotion.processFunction();
if (ICPDUMPAFTER && FuncChanged) {
DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
@@ -658,12 +737,289 @@ static bool promoteIndirectCalls(Module &M, bool InLTO) {
bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
// Command-line option has the priority for InLTO.
- return promoteIndirectCalls(M, InLTO | ICPLTOMode);
+ return promoteIndirectCalls(M, InLTO | ICPLTOMode,
+ SamplePGO | ICPSamplePGOMode);
}
-PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, ModuleAnalysisManager &AM) {
- if (!promoteIndirectCalls(M, InLTO | ICPLTOMode))
+PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (!promoteIndirectCalls(M, InLTO | ICPLTOMode,
+ SamplePGO | ICPSamplePGOMode))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
+
+namespace {
+class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
+public:
+ MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI)
+ : Func(Func), BFI(BFI), Changed(false) {
+ ValueDataArray =
+ llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
+ // Get the MemOPSize range information from option MemOPSizeRange,
+ getMemOPSizeRangeFromOption(MemOPSizeRange, PreciseRangeStart,
+ PreciseRangeLast);
+ }
+ bool isChanged() const { return Changed; }
+ void perform() {
+ WorkList.clear();
+ visit(Func);
+
+ for (auto &MI : WorkList) {
+ ++NumOfPGOMemOPAnnotate;
+ if (perform(MI)) {
+ Changed = true;
+ ++NumOfPGOMemOPOpt;
+ DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName()
+ << "is Transformed.\n");
+ }
+ }
+ }
+
+ void visitMemIntrinsic(MemIntrinsic &MI) {
+ Value *Length = MI.getLength();
+ // Not perform on constant length calls.
+ if (dyn_cast<ConstantInt>(Length))
+ return;
+ WorkList.push_back(&MI);
+ }
+
+private:
+ Function &Func;
+ BlockFrequencyInfo &BFI;
+ bool Changed;
+ std::vector<MemIntrinsic *> WorkList;
+ // Start of the previse range.
+ int64_t PreciseRangeStart;
+ // Last value of the previse range.
+ int64_t PreciseRangeLast;
+ // The space to read the profile annotation.
+ std::unique_ptr<InstrProfValueData[]> ValueDataArray;
+ bool perform(MemIntrinsic *MI);
+
+ // This kind shows which group the value falls in. For PreciseValue, we have
+ // the profile count for that value. LargeGroup groups the values that are in
+ // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
+ enum MemOPSizeKind { PreciseValue, NonLargeGroup, LargeGroup };
+
+ MemOPSizeKind getMemOPSizeKind(int64_t Value) const {
+ if (Value == MemOPSizeLarge && MemOPSizeLarge != 0)
+ return LargeGroup;
+ if (Value == PreciseRangeLast + 1)
+ return NonLargeGroup;
+ return PreciseValue;
+ }
+};
+
+static const char *getMIName(const MemIntrinsic *MI) {
+ switch (MI->getIntrinsicID()) {
+ case Intrinsic::memcpy:
+ return "memcpy";
+ case Intrinsic::memmove:
+ return "memmove";
+ case Intrinsic::memset:
+ return "memset";
+ default:
+ return "unknown";
+ }
+}
+
+static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
+ assert(Count <= TotalCount);
+ if (Count < MemOPCountThreshold)
+ return false;
+ if (Count < TotalCount * MemOPPercentThreshold / 100)
+ return false;
+ return true;
+}
+
+static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
+ uint64_t Denom) {
+ if (!MemOPScaleCount)
+ return Count;
+ bool Overflowed;
+ uint64_t ScaleCount = SaturatingMultiply(Count, Num, &Overflowed);
+ return ScaleCount / Denom;
+}
+
+bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
+ assert(MI);
+ if (MI->getIntrinsicID() == Intrinsic::memmove)
+ return false;
+
+ uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
+ uint64_t TotalCount;
+ if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
+ ValueDataArray.get(), NumVals, TotalCount))
+ return false;
+
+ uint64_t ActualCount = TotalCount;
+ uint64_t SavedTotalCount = TotalCount;
+ if (MemOPScaleCount) {
+ auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
+ if (!BBEdgeCount)
+ return false;
+ ActualCount = *BBEdgeCount;
+ }
+
+ if (ActualCount < MemOPCountThreshold)
+ return false;
+
+ ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);
+ TotalCount = ActualCount;
+ if (MemOPScaleCount)
+ DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount
+ << " denominator = " << SavedTotalCount << "\n");
+
+ // Keeping track of the count of the default case:
+ uint64_t RemainCount = TotalCount;
+ SmallVector<uint64_t, 16> SizeIds;
+ SmallVector<uint64_t, 16> CaseCounts;
+ uint64_t MaxCount = 0;
+ unsigned Version = 0;
+ // Default case is in the front -- save the slot here.
+ CaseCounts.push_back(0);
+ for (auto &VD : VDs) {
+ int64_t V = VD.Value;
+ uint64_t C = VD.Count;
+ if (MemOPScaleCount)
+ C = getScaledCount(C, ActualCount, SavedTotalCount);
+
+ // Only care precise value here.
+ if (getMemOPSizeKind(V) != PreciseValue)
+ continue;
+
+ // ValueCounts are sorted on the count. Break at the first un-profitable
+ // value.
+ if (!isProfitable(C, RemainCount))
+ break;
+
+ SizeIds.push_back(V);
+ CaseCounts.push_back(C);
+ if (C > MaxCount)
+ MaxCount = C;
+
+ assert(RemainCount >= C);
+ RemainCount -= C;
+
+ if (++Version > MemOPMaxVersion && MemOPMaxVersion != 0)
+ break;
+ }
+
+ if (Version == 0)
+ return false;
+
+ CaseCounts[0] = RemainCount;
+ if (RemainCount > MaxCount)
+ MaxCount = RemainCount;
+
+ uint64_t SumForOpt = TotalCount - RemainCount;
+ DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs "
+ << TotalCount << "\n");
+ DEBUG(
+ for (auto &VD
+ : VDs) { dbgs() << " (" << VD.Value << "," << VD.Count << ")\n"; });
+
+ DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version
+ << " Versions\n");
+
+ // mem_op(..., size)
+ // ==>
+ // switch (size) {
+ // case s1:
+ // mem_op(..., s1);
+ // goto merge_bb;
+ // case s2:
+ // mem_op(..., s2);
+ // goto merge_bb;
+ // ...
+ // default:
+ // mem_op(..., size);
+ // goto merge_bb;
+ // }
+ // merge_bb:
+
+ BasicBlock *BB = MI->getParent();
+ DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
+ DEBUG(dbgs() << *BB << "\n");
+
+ BasicBlock *DefaultBB = SplitBlock(BB, MI);
+ BasicBlock::iterator It(*MI);
+ ++It;
+ assert(It != DefaultBB->end());
+ BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
+ DefaultBB->setName("MemOP.Default");
+ MergeBB->setName("MemOP.Merge");
+
+ auto &Ctx = Func.getContext();
+ IRBuilder<> IRB(BB);
+ BB->getTerminator()->eraseFromParent();
+ Value *SizeVar = MI->getLength();
+ SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
+
+ // Clear the value profile data.
+ MI->setMetadata(LLVMContext::MD_prof, nullptr);
+
+ DEBUG(dbgs() << "\n\n== Basic Block After==\n");
+
+ for (uint64_t SizeId : SizeIds) {
+ ConstantInt *CaseSizeId = ConstantInt::get(Type::getInt64Ty(Ctx), SizeId);
+ BasicBlock *CaseBB = BasicBlock::Create(
+ Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
+ Instruction *NewInst = MI->clone();
+ // Fix the argument.
+ dyn_cast<MemIntrinsic>(NewInst)->setLength(CaseSizeId);
+ CaseBB->getInstList().push_back(NewInst);
+ IRBuilder<> IRBCase(CaseBB);
+ IRBCase.CreateBr(MergeBB);
+ SI->addCase(CaseSizeId, CaseBB);
+ DEBUG(dbgs() << *CaseBB << "\n");
+ }
+ setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
+
+ DEBUG(dbgs() << *BB << "\n");
+ DEBUG(dbgs() << *DefaultBB << "\n");
+ DEBUG(dbgs() << *MergeBB << "\n");
+
+ emitOptimizationRemark(Func.getContext(), "memop-opt", Func,
+ MI->getDebugLoc(),
+ Twine("optimize ") + getMIName(MI) + " with count " +
+ Twine(SumForOpt) + " out of " + Twine(TotalCount) +
+ " for " + Twine(Version) + " versions");
+
+ return true;
+}
+} // namespace
+
+static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI) {
+ if (DisableMemOPOPT)
+ return false;
+
+ if (F.hasFnAttribute(Attribute::OptimizeForSize))
+ return false;
+ MemOPSizeOpt MemOPSizeOpt(F, BFI);
+ MemOPSizeOpt.perform();
+ return MemOPSizeOpt.isChanged();
+}
+
+bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
+ BlockFrequencyInfo &BFI =
+ getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
+ return PGOMemOPSizeOptImpl(F, BFI);
+}
+
+namespace llvm {
+char &PGOMemOPSizeOptID = PGOMemOPSizeOptLegacyPass::ID;
+
+PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ bool Changed = PGOMemOPSizeOptImpl(F, BFI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ auto PA = PreservedAnalyses();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index adea7e772447..d91ac6ac7883 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -14,18 +14,58 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/InstrProfiling.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
using namespace llvm;
#define DEBUG_TYPE "instrprof"
+// The start and end values of precise value profile range for memory
+// intrinsic sizes
+cl::opt<std::string> MemOPSizeRange(
+ "memop-size-range",
+ cl::desc("Set the range of size in memory intrinsic calls to be profiled "
+ "precisely, in a format of <start_val>:<end_val>"),
+ cl::init(""));
+
+// The value that considered to be large value in memory intrinsic.
+cl::opt<unsigned> MemOPSizeLarge(
+ "memop-size-large",
+ cl::desc("Set large value thresthold in memory intrinsic size profiling. "
+ "Value of 0 disables the large value profiling."),
+ cl::init(8192));
+
namespace {
cl::opt<bool> DoNameCompression("enable-name-compression",
@@ -41,6 +81,7 @@ cl::opt<bool> ValueProfileStaticAlloc(
"vp-static-alloc",
cl::desc("Do static counter allocation for value profiler"),
cl::init(true));
+
cl::opt<double> NumCountersPerValueSite(
"vp-counters-per-site",
cl::desc("The average number of profile counters allocated "
@@ -56,9 +97,11 @@ class InstrProfilingLegacyPass : public ModulePass {
public:
static char ID;
- InstrProfilingLegacyPass() : ModulePass(ID), InstrProf() {}
+
+ InstrProfilingLegacyPass() : ModulePass(ID) {}
InstrProfilingLegacyPass(const InstrProfOptions &Options)
: ModulePass(ID), InstrProf(Options) {}
+
StringRef getPassName() const override {
return "Frontend instrumentation-based coverage lowering";
}
@@ -73,7 +116,7 @@ public:
}
};
-} // anonymous namespace
+} // end anonymous namespace
PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
@@ -97,30 +140,6 @@ llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
return new InstrProfilingLegacyPass(Options);
}
-bool InstrProfiling::isMachO() const {
- return Triple(M->getTargetTriple()).isOSBinFormatMachO();
-}
-
-/// Get the section name for the counter variables.
-StringRef InstrProfiling::getCountersSection() const {
- return getInstrProfCountersSectionName(isMachO());
-}
-
-/// Get the section name for the name variables.
-StringRef InstrProfiling::getNameSection() const {
- return getInstrProfNameSectionName(isMachO());
-}
-
-/// Get the section name for the profile data variables.
-StringRef InstrProfiling::getDataSection() const {
- return getInstrProfDataSectionName(isMachO());
-}
-
-/// Get the section name for the coverage mapping data.
-StringRef InstrProfiling::getCoverageSection() const {
- return getInstrProfCoverageSectionName(isMachO());
-}
-
static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
if (Inc)
@@ -137,6 +156,9 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
NamesSize = 0;
ProfileDataMap.clear();
UsedVars.clear();
+ getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
+ MemOPSizeRangeLast);
+ TT = Triple(M.getTargetTriple());
// We did not know how many value sites there would be inside
// the instrumented function. This is counting the number of instrumented
@@ -189,17 +211,34 @@ bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
}
static Constant *getOrInsertValueProfilingCall(Module &M,
- const TargetLibraryInfo &TLI) {
+ const TargetLibraryInfo &TLI,
+ bool IsRange = false) {
LLVMContext &Ctx = M.getContext();
auto *ReturnTy = Type::getVoidTy(M.getContext());
- Type *ParamTypes[] = {
+
+ Constant *Res;
+ if (!IsRange) {
+ Type *ParamTypes[] = {
#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
#include "llvm/ProfileData/InstrProfData.inc"
- };
- auto *ValueProfilingCallTy =
- FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
- Constant *Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
- ValueProfilingCallTy);
+ };
+ auto *ValueProfilingCallTy =
+ FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
+ Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+ ValueProfilingCallTy);
+ } else {
+ Type *RangeParamTypes[] = {
+#define VALUE_RANGE_PROF 1
+#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
+#include "llvm/ProfileData/InstrProfData.inc"
+#undef VALUE_RANGE_PROF
+ };
+ auto *ValueRangeProfilingCallTy =
+ FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
+ Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
+ ValueRangeProfilingCallTy);
+ }
+
if (Function *FunRes = dyn_cast<Function>(Res)) {
if (auto AK = TLI.getExtAttrForI32Param(false))
FunRes->addAttribute(3, AK);
@@ -208,7 +247,6 @@ static Constant *getOrInsertValueProfilingCall(Module &M,
}
void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
-
GlobalVariable *Name = Ind->getName();
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
uint64_t Index = Ind->getIndex()->getZExtValue();
@@ -222,7 +260,6 @@ void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
}
void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
-
GlobalVariable *Name = Ind->getName();
auto It = ProfileDataMap.find(Name);
assert(It != ProfileDataMap.end() && It->second.DataVar &&
@@ -235,11 +272,25 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
Index += It->second.NumValueSites[Kind];
IRBuilder<> Builder(Ind);
- Value *Args[3] = {Ind->getTargetValue(),
- Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
- Builder.getInt32(Index)};
- CallInst *Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI),
- Args);
+ bool IsRange = (Ind->getValueKind()->getZExtValue() ==
+ llvm::InstrProfValueKind::IPVK_MemOPSize);
+ CallInst *Call = nullptr;
+ if (!IsRange) {
+ Value *Args[3] = {Ind->getTargetValue(),
+ Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+ Builder.getInt32(Index)};
+ Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
+ } else {
+ Value *Args[6] = {
+ Ind->getTargetValue(),
+ Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+ Builder.getInt32(Index),
+ Builder.getInt64(MemOPSizeRangeStart),
+ Builder.getInt64(MemOPSizeRangeLast),
+ Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
+ Call =
+ Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
+ }
if (auto AK = TLI->getExtAttrForI32Param(false))
Call->addAttribute(3, AK);
Ind->replaceAllUsesWith(Call);
@@ -259,7 +310,6 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
}
void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
-
ConstantArray *Names =
cast<ConstantArray>(CoverageNamesVar->getInitializer());
for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
@@ -270,7 +320,9 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
Name->setLinkage(GlobalValue::PrivateLinkage);
ReferencedNames.push_back(Name);
+ NC->dropAllReferences();
}
+ CoverageNamesVar->eraseFromParent();
}
/// Get the name of a profiling variable for a particular function.
@@ -367,7 +419,8 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
Constant::getNullValue(CounterTy),
getVarName(Inc, getInstrProfCountersVarPrefix()));
CounterPtr->setVisibility(NamePtr->getVisibility());
- CounterPtr->setSection(getCountersSection());
+ CounterPtr->setSection(
+ getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
CounterPtr->setAlignment(8);
CounterPtr->setComdat(ProfileVarsComdat);
@@ -376,7 +429,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// the current function.
Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
-
uint64_t NS = 0;
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
NS += PD.NumValueSites[Kind];
@@ -388,11 +440,12 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
Constant::getNullValue(ValuesTy),
getVarName(Inc, getInstrProfValuesVarPrefix()));
ValuesVar->setVisibility(NamePtr->getVisibility());
- ValuesVar->setSection(getInstrProfValuesSectionName(isMachO()));
+ ValuesVar->setSection(
+ getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(8);
ValuesVar->setComdat(ProfileVarsComdat);
ValuesPtrExpr =
- ConstantExpr::getBitCast(ValuesVar, llvm::Type::getInt8PtrTy(Ctx));
+ ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
}
}
@@ -421,7 +474,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ConstantStruct::get(DataTy, DataVals),
getVarName(Inc, getInstrProfDataVarPrefix()));
Data->setVisibility(NamePtr->getVisibility());
- Data->setSection(getDataSection());
+ Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
Data->setComdat(ProfileVarsComdat);
@@ -481,9 +534,10 @@ void InstrProfiling::emitVNodes() {
ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
auto *VNodesVar = new GlobalVariable(
- *M, VNodesTy, false, llvm::GlobalValue::PrivateLinkage,
+ *M, VNodesTy, false, GlobalValue::PrivateLinkage,
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
- VNodesVar->setSection(getInstrProfVNodesSectionName(isMachO()));
+ VNodesVar->setSection(
+ getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
UsedVars.push_back(VNodesVar);
}
@@ -496,18 +550,22 @@ void InstrProfiling::emitNameData() {
std::string CompressedNameStr;
if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
DoNameCompression)) {
- llvm::report_fatal_error(toString(std::move(E)), false);
+ report_fatal_error(toString(std::move(E)), false);
}
auto &Ctx = M->getContext();
- auto *NamesVal = llvm::ConstantDataArray::getString(
+ auto *NamesVal = ConstantDataArray::getString(
Ctx, StringRef(CompressedNameStr), false);
- NamesVar = new llvm::GlobalVariable(*M, NamesVal->getType(), true,
- llvm::GlobalValue::PrivateLinkage,
- NamesVal, getInstrProfNamesVarName());
+ NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
+ GlobalValue::PrivateLinkage, NamesVal,
+ getInstrProfNamesVarName());
NamesSize = CompressedNameStr.size();
- NamesVar->setSection(getNameSection());
+ NamesVar->setSection(
+ getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
UsedVars.push_back(NamesVar);
+
+ for (auto *NamePtr : ReferencedNames)
+ NamePtr->eraseFromParent();
}
void InstrProfiling::emitRegistration() {
@@ -550,7 +608,6 @@ void InstrProfiling::emitRegistration() {
}
void InstrProfiling::emitRuntimeHook() {
-
// We expect the linker to be invoked with -u<hook_var> flag for linux,
// for which case there is no need to emit the user function.
if (Triple(M->getTargetTriple()).isOSLinux())
@@ -600,7 +657,6 @@ void InstrProfiling::emitInitialization() {
GlobalVariable *ProfileNameVar = new GlobalVariable(
*M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
- Triple TT(M->getTargetTriple());
if (TT.supportsCOMDAT()) {
ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
ProfileNameVar->setComdat(M->getOrInsertComdat(
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 2963d08752c4..7bb62d2c8455 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -63,6 +63,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
initializePGOInstrumentationGenLegacyPassPass(Registry);
initializePGOInstrumentationUseLegacyPassPass(Registry);
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
+ initializePGOMemOPSizeOptLegacyPassPass(Registry);
initializeInstrProfilingLegacyPassPass(Registry);
initializeMemorySanitizerPass(Registry);
initializeThreadSanitizerPass(Registry);
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fafb0fcbd017..190f05db4b0c 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -425,7 +425,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
// which is not yet implemented.
StringRef WarningFnName = Recover ? "__msan_warning"
: "__msan_warning_noreturn";
- WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), nullptr);
+ WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
@@ -433,31 +433,31 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
- IRB.getInt32Ty(), nullptr);
+ IRB.getInt32Ty());
FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
- IRB.getInt8PtrTy(), IRB.getInt32Ty(), nullptr);
+ IRB.getInt8PtrTy(), IRB.getInt32Ty());
}
MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
"__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
- IRB.getInt8PtrTy(), IntptrTy, nullptr);
+ IRB.getInt8PtrTy(), IntptrTy);
MsanPoisonStackFn =
M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr);
+ IRB.getInt8PtrTy(), IntptrTy);
MsanChainOriginFn = M.getOrInsertFunction(
- "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty(), nullptr);
+ "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
MemmoveFn = M.getOrInsertFunction(
"__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr);
+ IRB.getInt8PtrTy(), IntptrTy);
MemcpyFn = M.getOrInsertFunction(
"__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IntptrTy, nullptr);
+ IntptrTy);
MemsetFn = M.getOrInsertFunction(
"__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
- IntptrTy, nullptr);
+ IntptrTy);
// Create globals.
RetvalTLS = new GlobalVariable(
@@ -1037,15 +1037,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
OriginMap[V] = Origin;
}
+ Constant *getCleanShadow(Type *OrigTy) {
+ Type *ShadowTy = getShadowTy(OrigTy);
+ if (!ShadowTy)
+ return nullptr;
+ return Constant::getNullValue(ShadowTy);
+ }
+
/// \brief Create a clean shadow value for a given value.
///
/// Clean shadow (all zeroes) means all bits of the value are defined
/// (initialized).
Constant *getCleanShadow(Value *V) {
- Type *ShadowTy = getShadowTy(V);
- if (!ShadowTy)
- return nullptr;
- return Constant::getNullValue(ShadowTy);
+ return getCleanShadow(V->getType());
}
/// \brief Create a dirty shadow of a given shadow type.
@@ -1942,7 +1946,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ClCheckAccessAddress)
insertShadowCheck(Addr, &I);
- // FIXME: use ClStoreCleanOrigin
// FIXME: factor out common code from materializeStores
if (MS.TrackOrigins)
IRB.CreateStore(getOrigin(&I, 1), getOriginPtr(Addr, IRB, 1));
@@ -2325,11 +2328,49 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ void handleStmxcsr(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value* Addr = I.getArgOperand(0);
+ Type *Ty = IRB.getInt32Ty();
+ Value *ShadowPtr = getShadowPtr(Addr, Ty, IRB);
+
+ IRB.CreateStore(getCleanShadow(Ty),
+ IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+ }
+
+ void handleLdmxcsr(IntrinsicInst &I) {
+ if (!InsertChecks) return;
+
+ IRBuilder<> IRB(&I);
+ Value *Addr = I.getArgOperand(0);
+ Type *Ty = IRB.getInt32Ty();
+ unsigned Alignment = 1;
+
+ if (ClCheckAccessAddress)
+ insertShadowCheck(Addr, &I);
+
+ Value *Shadow = IRB.CreateAlignedLoad(getShadowPtr(Addr, Ty, IRB),
+ Alignment, "_ldmxcsr");
+ Value *Origin = MS.TrackOrigins
+ ? IRB.CreateLoad(getOriginPtr(Addr, IRB, Alignment))
+ : getCleanOrigin();
+ insertShadowCheck(Shadow, Origin, &I);
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case llvm::Intrinsic::bswap:
handleBswap(I);
break;
+ case llvm::Intrinsic::x86_sse_stmxcsr:
+ handleStmxcsr(I);
+ break;
+ case llvm::Intrinsic::x86_sse_ldmxcsr:
+ handleLdmxcsr(I);
+ break;
case llvm::Intrinsic::x86_avx512_vcvtsd2usi64:
case llvm::Intrinsic::x86_avx512_vcvtsd2usi32:
case llvm::Intrinsic::x86_avx512_vcvtss2usi64:
@@ -2566,10 +2607,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::ReadNone);
- Func->removeAttributes(AttributeSet::FunctionIndex,
- AttributeSet::get(Func->getContext(),
- AttributeSet::FunctionIndex,
- B));
+ Func->removeAttributes(AttributeList::FunctionIndex,
+ AttributeList::get(Func->getContext(),
+ AttributeList::FunctionIndex,
+ B));
}
maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
@@ -2597,7 +2638,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
" Shadow: " << *ArgShadow << "\n");
bool ArgIsInitialized = false;
const DataLayout &DL = F.getParent()->getDataLayout();
- if (CS.paramHasAttr(i + 1, Attribute::ByVal)) {
+ if (CS.paramHasAttr(i, Attribute::ByVal)) {
assert(A->getType()->isPointerTy() &&
"ByVal argument is not a pointer!");
Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());
@@ -2690,7 +2731,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
Value *Shadow = getShadow(RetVal);
IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
- // FIXME: make it conditional if ClStoreCleanOrigin==0
if (MS.TrackOrigins)
IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
}
@@ -2717,15 +2757,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getCleanOrigin());
IRBuilder<> IRB(I.getNextNode());
const DataLayout &DL = F.getParent()->getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(I.getAllocatedType());
+ uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
+ Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
+ if (I.isArrayAllocation())
+ Len = IRB.CreateMul(Len, I.getArraySize());
if (PoisonStack && ClPoisonStackWithCall) {
IRB.CreateCall(MS.MsanPoisonStackFn,
- {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
- ConstantInt::get(MS.IntptrTy, Size)});
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
} else {
Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
- IRB.CreateMemSet(ShadowBase, PoisonValue, Size, I.getAlignment());
+ IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlignment());
}
if (PoisonStack && MS.TrackOrigins) {
@@ -2742,8 +2784,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
StackDescription.str());
IRB.CreateCall(MS.MsanSetAllocaOrigin4Fn,
- {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
- ConstantInt::get(MS.IntptrTy, Size),
+ {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()),
IRB.CreatePointerCast(&F, MS.IntptrTy)});
}
@@ -2935,7 +2976,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *A = *ArgIt;
unsigned ArgNo = CS.getArgumentNo(ArgIt);
bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
- bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal);
+ bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
if (IsByVal) {
// ByVal arguments always go to the overflow area.
// Fixed arguments passed through the overflow area will be stepped
@@ -3456,7 +3497,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
Value *A = *ArgIt;
unsigned ArgNo = CS.getArgumentNo(ArgIt);
bool IsFixed = ArgNo < CS.getFunctionType()->getNumParams();
- bool IsByVal = CS.paramHasAttr(ArgNo + 1, Attribute::ByVal);
+ bool IsByVal = CS.paramHasAttr(ArgNo, Attribute::ByVal);
if (IsByVal) {
assert(A->getType()->isPointerTy());
Type *RealTy = A->getType()->getPointerElementType();
@@ -3618,9 +3659,9 @@ bool MemorySanitizer::runOnFunction(Function &F) {
AttrBuilder B;
B.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::ReadNone);
- F.removeAttributes(AttributeSet::FunctionIndex,
- AttributeSet::get(F.getContext(),
- AttributeSet::FunctionIndex, B));
+ F.removeAttributes(
+ AttributeList::FunctionIndex,
+ AttributeList::get(F.getContext(), AttributeList::FunctionIndex, B));
return Visitor.runOnFunction();
}
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 04f9a64bef9f..990bcec109de 100644
--- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -58,8 +58,10 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -71,7 +73,9 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/JamCRC.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -87,6 +91,7 @@ using namespace llvm;
STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
+STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
STATISTIC(NumOfPGOEdge, "Number of edges.");
STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
@@ -116,6 +121,13 @@ static cl::opt<unsigned> MaxNumAnnotations(
cl::desc("Max number of annotations for a single indirect "
"call callsite"));
+// Command line option to set the maximum number of value annotations
+// to write to the metadata for a single memop intrinsic.
+static cl::opt<unsigned> MaxNumMemOPAnnotations(
+ "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Max number of preicise value annotations for a single memop"
+ "intrinsic"));
+
// Command line option to control appending FunctionHash to the name of a COMDAT
// function. This is to avoid the hash mismatch caused by the preinliner.
static cl::opt<bool> DoComdatRenaming(
@@ -125,24 +137,59 @@ static cl::opt<bool> DoComdatRenaming(
// Command line option to enable/disable the warning about missing profile
// information.
-static cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function",
- cl::init(false),
- cl::Hidden);
+static cl::opt<bool>
+ PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about missing profile data for "
+ "functions."));
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data.
-static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false),
- cl::Hidden);
+static cl::opt<bool>
+ NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
+ cl::desc("Use this option to turn off/on "
+ "warnings about profile cfg mismatch."));
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data for Comdat functions, which often turns out to be false
// positive due to the pre-instrumentation inline.
-static cl::opt<bool> NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat",
- cl::init(true), cl::Hidden);
+static cl::opt<bool>
+ NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
+ cl::Hidden,
+ cl::desc("The option is used to turn on/off "
+ "warnings about hash mismatch for comdat "
+ "functions."));
// Command line option to enable/disable select instruction instrumentation.
-static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true),
- cl::Hidden);
+static cl::opt<bool>
+ PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
+ cl::desc("Use this option to turn on/off SELECT "
+ "instruction instrumentation. "));
+
+// Command line option to turn on CFG dot dump of raw profile counts
+static cl::opt<bool>
+ PGOViewRawCounts("pgo-view-raw-counts", cl::init(false), cl::Hidden,
+ cl::desc("A boolean option to show CFG dag "
+ "with raw profile counts from "
+ "profile data. See also option "
+ "-pgo-view-counts. To limit graph "
+ "display to only one function, use "
+ "filtering option -view-bfi-func-name."));
+
+// Command line option to enable/disable memop intrinsic call.size profiling.
+static cl::opt<bool>
+ PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "memory instrinsic size profiling."));
+
+// Command line option to turn on CFG dot dump after profile annotation.
+// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
+extern cl::opt<bool> PGOViewCounts;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
namespace {
/// The select instruction visitor plays three roles specified
@@ -167,6 +214,7 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
SelectInstVisitor(Function &Func) : F(Func) {}
void countSelects(Function &Func) {
+ NSIs = 0;
Mode = VM_counting;
visit(Func);
}
@@ -196,9 +244,54 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
void annotateOneSelectInst(SelectInst &SI);
// Visit \p SI instruction and perform tasks according to visit mode.
void visitSelectInst(SelectInst &SI);
+ // Return the number of select instructions. This needs be called after
+ // countSelects().
unsigned getNumOfSelectInsts() const { return NSIs; }
};
+/// Instruction Visitor class to visit memory intrinsic calls.
+struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> {
+ Function &F;
+ unsigned NMemIs = 0; // Number of memIntrinsics instrumented.
+ VisitMode Mode = VM_counting; // Visiting mode.
+ unsigned CurCtrId = 0; // Current counter index.
+ unsigned TotalNumCtrs = 0; // Total number of counters
+ GlobalVariable *FuncNameVar = nullptr;
+ uint64_t FuncHash = 0;
+ PGOUseFunc *UseFunc = nullptr;
+ std::vector<Instruction *> Candidates;
+
+ MemIntrinsicVisitor(Function &Func) : F(Func) {}
+
+ void countMemIntrinsics(Function &Func) {
+ NMemIs = 0;
+ Mode = VM_counting;
+ visit(Func);
+ }
+
+ void instrumentMemIntrinsics(Function &Func, unsigned TotalNC,
+ GlobalVariable *FNV, uint64_t FHash) {
+ Mode = VM_instrument;
+ TotalNumCtrs = TotalNC;
+ FuncHash = FHash;
+ FuncNameVar = FNV;
+ visit(Func);
+ }
+
+ std::vector<Instruction *> findMemIntrinsics(Function &Func) {
+ Candidates.clear();
+ Mode = VM_annotate;
+ visit(Func);
+ return Candidates;
+ }
+
+ // Visit the IR stream and annotate all mem intrinsic call instructions.
+ void instrumentOneMemIntrinsic(MemIntrinsic &MI);
+ // Visit \p MI instruction and perform tasks according to visit mode.
+ void visitMemIntrinsic(MemIntrinsic &SI);
+ unsigned getNumOfMemIntrinsics() const { return NMemIs; }
+};
+
class PGOInstrumentationGenLegacyPass : public ModulePass {
public:
static char ID;
@@ -316,8 +409,9 @@ private:
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
public:
- std::vector<Instruction *> IndirectCallSites;
+ std::vector<std::vector<Instruction *>> ValueSites;
SelectInstVisitor SIVisitor;
+ MemIntrinsicVisitor MIVisitor;
std::string FuncName;
GlobalVariable *FuncNameVar;
// CFG hash value for this function.
@@ -347,13 +441,16 @@ public:
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
BlockFrequencyInfo *BFI = nullptr)
- : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0),
- MST(F, BPI, BFI) {
+ : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1),
+ SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) {
// This should be done before CFG hash computation.
SIVisitor.countSelects(Func);
+ MIVisitor.countMemIntrinsics(Func);
NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
- IndirectCallSites = findIndirectCallSites(Func);
+ NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
+ ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func);
+ ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
FuncName = getPGOFuncName(F);
computeCFGHash();
@@ -405,7 +502,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
}
JC.update(Indexes);
FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
- (uint64_t)IndirectCallSites.size() << 48 |
+ (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
(uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
}
@@ -552,7 +649,7 @@ static void instrumentOneFunc(
return;
unsigned NumIndirectCallSites = 0;
- for (auto &I : FuncInfo.IndirectCallSites) {
+ for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) {
CallSite CS(I);
Value *Callee = CS.getCalledValue();
DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
@@ -565,10 +662,14 @@ static void instrumentOneFunc(
{llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
Builder.getInt64(FuncInfo.FunctionHash),
Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
- Builder.getInt32(llvm::InstrProfValueKind::IPVK_IndirectCallTarget),
+ Builder.getInt32(IPVK_IndirectCallTarget),
Builder.getInt32(NumIndirectCallSites++)});
}
NumOfPGOICall += NumIndirectCallSites;
+
+ // Now instrument memop intrinsic calls.
+ FuncInfo.MIVisitor.instrumentMemIntrinsics(
+ F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash);
}
// This class represents a CFG edge in profile use compilation.
@@ -653,8 +754,11 @@ public:
// Set the branch weights based on the count values.
void setBranchWeights();
- // Annotate the indirect call sites.
- void annotateIndirectCallSites();
+ // Annotate the value profile call sites all all value kind.
+ void annotateValueSites();
+
+ // Annotate the value profile call sites for one value kind.
+ void annotateValueSites(uint32_t Kind);
// The hotness of the function from the profile count.
enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
@@ -677,6 +781,8 @@ public:
return FuncInfo.findBBInfo(BB);
}
+ Function &getFunc() const { return F; }
+
private:
Function &F;
Module *M;
@@ -761,7 +867,7 @@ void PGOUseFunc::setInstrumentedCounts(
NewEdge1.InMST = true;
getBBInfo(InstrBB).setBBInfoCount(CountValue);
}
- ProfileCountSize = CountFromProfile.size();
+ ProfileCountSize = CountFromProfile.size();
CountPosition = I;
}
@@ -932,21 +1038,6 @@ void PGOUseFunc::populateCounters() {
DEBUG(FuncInfo.dumpInfo("after reading profile."));
}
-static void setProfMetadata(Module *M, Instruction *TI,
- ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
- MDBuilder MDB(M->getContext());
- assert(MaxCount > 0 && "Bad max count");
- uint64_t Scale = calculateCountScale(MaxCount);
- SmallVector<unsigned, 4> Weights;
- for (const auto &ECI : EdgeCounts)
- Weights.push_back(scaleBranchCount(ECI, Scale));
-
- DEBUG(dbgs() << "Weight is: ";
- for (const auto &W : Weights) { dbgs() << W << " "; }
- dbgs() << "\n";);
- TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
-}
-
// Assign the scaled count values to the BB with multiple out edges.
void PGOUseFunc::setBranchWeights() {
// Generate MD_prof metadata for every branch instruction.
@@ -990,8 +1081,8 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
{llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
- Builder.getInt64(FuncHash),
- Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
+ Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
+ Builder.getInt32(*CurCtrIdx), Step});
++(*CurCtrIdx);
}
@@ -1020,9 +1111,9 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
if (SI.getCondition()->getType()->isVectorTy())
return;
- NSIs++;
switch (Mode) {
case VM_counting:
+ NSIs++;
return;
case VM_instrument:
instrumentOneSelectInst(SI);
@@ -1035,35 +1126,79 @@ void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
llvm_unreachable("Unknown visiting mode");
}
-// Traverse all the indirect callsites and annotate the instructions.
-void PGOUseFunc::annotateIndirectCallSites() {
+void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
+ Module *M = F.getParent();
+ IRBuilder<> Builder(&MI);
+ Type *Int64Ty = Builder.getInt64Ty();
+ Type *I8PtrTy = Builder.getInt8PtrTy();
+ Value *Length = MI.getLength();
+ assert(!dyn_cast<ConstantInt>(Length));
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
+ {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+ Builder.getInt64(FuncHash), Builder.CreatePtrToInt(Length, Int64Ty),
+ Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});
+ ++CurCtrId;
+}
+
+void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) {
+ if (!PGOInstrMemOP)
+ return;
+ Value *Length = MI.getLength();
+ // Not instrument constant length calls.
+ if (dyn_cast<ConstantInt>(Length))
+ return;
+
+ switch (Mode) {
+ case VM_counting:
+ NMemIs++;
+ return;
+ case VM_instrument:
+ instrumentOneMemIntrinsic(MI);
+ return;
+ case VM_annotate:
+ Candidates.push_back(&MI);
+ return;
+ }
+ llvm_unreachable("Unknown visiting mode");
+}
+
+// Traverse all valuesites and annotate the instructions for all value kind.
+void PGOUseFunc::annotateValueSites() {
if (DisableValueProfiling)
return;
// Create the PGOFuncName meta data.
createPGOFuncNameMetadata(F, FuncInfo.FuncName);
- unsigned IndirectCallSiteIndex = 0;
- auto &IndirectCallSites = FuncInfo.IndirectCallSites;
- unsigned NumValueSites =
- ProfileRecord.getNumValueSites(IPVK_IndirectCallTarget);
- if (NumValueSites != IndirectCallSites.size()) {
- std::string Msg =
- std::string("Inconsistent number of indirect call sites: ") +
- F.getName().str();
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ annotateValueSites(Kind);
+}
+
+// Annotate the instructions for a specific value kind.
+void PGOUseFunc::annotateValueSites(uint32_t Kind) {
+ unsigned ValueSiteIndex = 0;
+ auto &ValueSites = FuncInfo.ValueSites[Kind];
+ unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
+ if (NumValueSites != ValueSites.size()) {
auto &Ctx = M->getContext();
- Ctx.diagnose(
- DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ M->getName().data(),
+ Twine("Inconsistent number of value sites for kind = ") + Twine(Kind) +
+ " in " + F.getName().str(),
+ DS_Warning));
return;
}
- for (auto &I : IndirectCallSites) {
- DEBUG(dbgs() << "Read one indirect call instrumentation: Index="
- << IndirectCallSiteIndex << " out of " << NumValueSites
- << "\n");
- annotateValueSite(*M, *I, ProfileRecord, IPVK_IndirectCallTarget,
- IndirectCallSiteIndex, MaxNumAnnotations);
- IndirectCallSiteIndex++;
+ for (auto &I : ValueSites) {
+ DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
+ << "): Index = " << ValueSiteIndex << " out of "
+ << NumValueSites << "\n");
+ annotateValueSite(*M, *I, ProfileRecord,
+ static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
+ Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
+ : MaxNumAnnotations);
+ ValueSiteIndex++;
}
}
} // end anonymous namespace
@@ -1196,12 +1331,29 @@ static bool annotateAllFunctions(
continue;
Func.populateCounters();
Func.setBranchWeights();
- Func.annotateIndirectCallSites();
+ Func.annotateValueSites();
PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
if (FreqAttr == PGOUseFunc::FFA_Cold)
ColdFunctions.push_back(&F);
else if (FreqAttr == PGOUseFunc::FFA_Hot)
HotFunctions.push_back(&F);
+ if (PGOViewCounts && (ViewBlockFreqFuncName.empty() ||
+ F.getName().equals(ViewBlockFreqFuncName))) {
+ LoopInfo LI{DominatorTree(F)};
+ std::unique_ptr<BranchProbabilityInfo> NewBPI =
+ llvm::make_unique<BranchProbabilityInfo>(F, LI);
+ std::unique_ptr<BlockFrequencyInfo> NewBFI =
+ llvm::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
+
+ NewBFI->view();
+ }
+ if (PGOViewRawCounts && (ViewBlockFreqFuncName.empty() ||
+ F.getName().equals(ViewBlockFreqFuncName))) {
+ if (ViewBlockFreqFuncName.empty())
+ WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
+ else
+ ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
+ }
}
M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext()));
// Set function hotness attribute from the profile.
@@ -1257,3 +1409,90 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI);
}
+
+namespace llvm {
+void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,
+ uint64_t MaxCount) {
+ MDBuilder MDB(M->getContext());
+ assert(MaxCount > 0 && "Bad max count");
+ uint64_t Scale = calculateCountScale(MaxCount);
+ SmallVector<unsigned, 4> Weights;
+ for (const auto &ECI : EdgeCounts)
+ Weights.push_back(scaleBranchCount(ECI, Scale));
+
+ DEBUG(dbgs() << "Weight is: ";
+ for (const auto &W : Weights) { dbgs() << W << " "; }
+ dbgs() << "\n";);
+ TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+}
+
+template <> struct GraphTraits<PGOUseFunc *> {
+ typedef const BasicBlock *NodeRef;
+ typedef succ_const_iterator ChildIteratorType;
+ typedef pointer_iterator<Function::const_iterator> nodes_iterator;
+
+ static NodeRef getEntryNode(const PGOUseFunc *G) {
+ return &G->getFunc().front();
+ }
+ static ChildIteratorType child_begin(const NodeRef N) {
+ return succ_begin(N);
+ }
+ static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
+ static nodes_iterator nodes_begin(const PGOUseFunc *G) {
+ return nodes_iterator(G->getFunc().begin());
+ }
+ static nodes_iterator nodes_end(const PGOUseFunc *G) {
+ return nodes_iterator(G->getFunc().end());
+ }
+};
+
+static std::string getSimpleNodeName(const BasicBlock *Node) {
+ if (!Node->getName().empty())
+ return Node->getName();
+
+ std::string SimpleNodeName;
+ raw_string_ostream OS(SimpleNodeName);
+ Node->printAsOperand(OS, false);
+ return OS.str();
+}
+
+template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
+ explicit DOTGraphTraits(bool isSimple = false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ static std::string getGraphName(const PGOUseFunc *G) {
+ return G->getFunc().getName();
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << getSimpleNodeName(Node) << ":\\l";
+ UseBBInfo *BI = Graph->findBBInfo(Node);
+ OS << "Count : ";
+ if (BI && BI->CountValid)
+ OS << BI->CountValue << "\\l";
+ else
+ OS << "Unknown\\l";
+
+ if (!PGOInstrSelect)
+ return Result;
+
+ for (auto BI = Node->begin(); BI != Node->end(); ++BI) {
+ auto *I = &*BI;
+ if (!isa<SelectInst>(I))
+ continue;
+ // Display scaled counts for SELECT instruction:
+ OS << "SELECT : { T = ";
+ uint64_t TC, FC;
+ bool HasProf = I->extractProfMetadata(TC, FC);
+ if (!HasProf)
+ OS << "Unknown, F = Unknown }\\l";
+ else
+ OS << TC << ", F = " << FC << " }\\l";
+ }
+ return Result;
+ }
+};
+} // namespace llvm
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 5b4b1fb77134..fa0c7cc5a4c5 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -78,7 +78,6 @@ static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
static const char *const SanCovModuleCtorName = "sancov.module_ctor";
static const uint64_t SanCtorAndDtorPriority = 2;
-static const char *const SanCovTracePCGuardSection = "__sancov_guards";
static const char *const SanCovTracePCGuardName =
"__sanitizer_cov_trace_pc_guard";
static const char *const SanCovTracePCGuardInitName =
@@ -95,7 +94,7 @@ static cl::opt<unsigned> ClCoverageBlockThreshold(
"sanitizer-coverage-block-threshold",
cl::desc("Use a callback with a guard check inside it if there are"
" more than this number of blocks."),
- cl::Hidden, cl::init(500));
+ cl::Hidden, cl::init(0));
static cl::opt<bool>
ClExperimentalTracing("sanitizer-coverage-experimental-tracing",
@@ -216,6 +215,9 @@ private:
SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() +
SanCovTraceEnter->getNumUses();
}
+ StringRef getSanCovTracePCGuardSection() const;
+ StringRef getSanCovTracePCGuardSectionStart() const;
+ StringRef getSanCovTracePCGuardSectionEnd() const;
Function *SanCovFunction;
Function *SanCovWithCheckFunction;
Function *SanCovIndirCallFunction, *SanCovTracePCIndir;
@@ -227,6 +229,7 @@ private:
InlineAsm *EmptyAsm;
Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy;
Module *CurModule;
+ Triple TargetTriple;
LLVMContext *C;
const DataLayout *DL;
@@ -246,6 +249,7 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
C = &(M.getContext());
DL = &M.getDataLayout();
CurModule = &M;
+ TargetTriple = Triple(M.getTargetTriple());
HasSancovGuardsSection = false;
IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
IntptrPtrTy = PointerType::getUnqual(IntptrTy);
@@ -258,39 +262,39 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
Int32Ty = IRB.getInt32Ty();
SanCovFunction = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy, nullptr));
+ M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy));
SanCovWithCheckFunction = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy, nullptr));
+ M.getOrInsertFunction(SanCovWithCheckName, VoidTy, Int32PtrTy));
SanCovTracePCIndir = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy, nullptr));
+ M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
SanCovIndirCallFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
+ SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy));
SanCovTraceCmpFunction[0] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty(), nullptr));
+ SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty()));
SanCovTraceCmpFunction[1] = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTraceCmp2, VoidTy, IRB.getInt16Ty(),
- IRB.getInt16Ty(), nullptr));
+ IRB.getInt16Ty()));
SanCovTraceCmpFunction[2] = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTraceCmp4, VoidTy, IRB.getInt32Ty(),
- IRB.getInt32Ty(), nullptr));
+ IRB.getInt32Ty()));
SanCovTraceCmpFunction[3] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty, nullptr));
+ SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty));
SanCovTraceDivFunction[0] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceDiv4, VoidTy, IRB.getInt32Ty(), nullptr));
+ SanCovTraceDiv4, VoidTy, IRB.getInt32Ty()));
SanCovTraceDivFunction[1] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceDiv8, VoidTy, Int64Ty, nullptr));
+ SanCovTraceDiv8, VoidTy, Int64Ty));
SanCovTraceGepFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceGep, VoidTy, IntptrTy, nullptr));
+ SanCovTraceGep, VoidTy, IntptrTy));
SanCovTraceSwitchFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy, nullptr));
+ SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy));
// We insert an empty inline asm after cov callbacks to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -298,13 +302,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
/*hasSideEffects=*/true);
SanCovTracePC = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTracePCName, VoidTy, nullptr));
+ M.getOrInsertFunction(SanCovTracePCName, VoidTy));
SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- SanCovTracePCGuardName, VoidTy, Int32PtrTy, nullptr));
+ SanCovTracePCGuardName, VoidTy, Int32PtrTy));
SanCovTraceEnter = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy, nullptr));
+ M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy));
SanCovTraceBB = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy, nullptr));
+ M.getOrInsertFunction(SanCovTraceBBName, VoidTy, Int32PtrTy));
// At this point we create a dummy array of guards because we don't
// know how many elements we will need.
@@ -363,22 +367,28 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
if (Options.TracePCGuard) {
if (HasSancovGuardsSection) {
Function *CtorFunc;
- std::string SectionName(SanCovTracePCGuardSection);
- GlobalVariable *Bounds[2];
- const char *Prefix[2] = {"__start_", "__stop_"};
- for (int i = 0; i < 2; i++) {
- Bounds[i] = new GlobalVariable(M, Int32PtrTy, false,
- GlobalVariable::ExternalLinkage, nullptr,
- Prefix[i] + SectionName);
- Bounds[i]->setVisibility(GlobalValue::HiddenVisibility);
- }
+ GlobalVariable *SecStart = new GlobalVariable(
+ M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSanCovTracePCGuardSectionStart());
+ SecStart->setVisibility(GlobalValue::HiddenVisibility);
+ GlobalVariable *SecEnd = new GlobalVariable(
+ M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSanCovTracePCGuardSectionEnd());
+ SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+
std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
M, SanCovModuleCtorName, SanCovTracePCGuardInitName,
{Int32PtrTy, Int32PtrTy},
- {IRB.CreatePointerCast(Bounds[0], Int32PtrTy),
- IRB.CreatePointerCast(Bounds[1], Int32PtrTy)});
-
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+ {IRB.CreatePointerCast(SecStart, Int32PtrTy),
+ IRB.CreatePointerCast(SecEnd, Int32PtrTy)});
+
+ if (TargetTriple.supportsCOMDAT()) {
+ // Use comdat to dedup CtorFunc.
+ CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
+ } else {
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+ }
}
} else if (!Options.TracePC) {
Function *CtorFunc;
@@ -435,6 +445,11 @@ static bool shouldInstrumentBlock(const Function& F, const BasicBlock *BB, const
if (isa<UnreachableInst>(BB->getTerminator()))
return false;
+ // Don't insert coverage into blocks without a valid insertion point
+ // (catchswitch blocks).
+ if (BB->getFirstInsertionPt() == BB->end())
+ return false;
+
if (!ClPruneBlocks || &F.getEntryBlock() == BB)
return true;
@@ -517,7 +532,7 @@ void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards,
Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_");
if (auto Comdat = F.getComdat())
FunctionGuardArray->setComdat(Comdat);
- FunctionGuardArray->setSection(SanCovTracePCGuardSection);
+ FunctionGuardArray->setSection(getSanCovTracePCGuardSection());
}
bool SanitizerCoverageModule::InjectCoverage(Function &F,
@@ -755,6 +770,27 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
}
}
+StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const {
+ if (TargetTriple.getObjectFormat() == Triple::COFF)
+ return ".SCOV$M";
+ if (TargetTriple.isOSBinFormatMachO())
+ return "__DATA,__sancov_guards";
+ return "__sancov_guards";
+}
+
+StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const {
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$start$__DATA$__sancov_guards";
+ return "__start___sancov_guards";
+}
+
+StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const {
+ if (TargetTriple.isOSBinFormatMachO())
+ return "\1section$end$__DATA$__sancov_guards";
+ return "__stop___sancov_guards";
+}
+
+
char SanitizerCoverageModule::ID = 0;
INITIALIZE_PASS_BEGIN(SanitizerCoverageModule, "sancov",
"SanitizerCoverage: TODO."
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 52035c79a4a3..9260217bd5e6 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -155,17 +155,18 @@ FunctionPass *llvm::createThreadSanitizerPass() {
void ThreadSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(M.getContext());
- AttributeSet Attr;
- Attr = Attr.addAttribute(M.getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind);
+ AttributeList Attr;
+ Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
// Initialize the callbacks.
TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
TsanFuncExit = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy(), nullptr));
+ M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy()));
TsanIgnoreBegin = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy(), nullptr));
+ "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy()));
TsanIgnoreEnd = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_ignore_thread_end", Attr, IRB.getVoidTy(), nullptr));
+ "__tsan_ignore_thread_end", Attr, IRB.getVoidTy()));
OrdTy = IRB.getInt32Ty();
for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
const unsigned ByteSize = 1U << i;
@@ -174,31 +175,31 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
std::string BitSizeStr = utostr(BitSize);
SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
TsanUnalignedRead[i] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
TsanUnalignedWrite[i] =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
Type *PtrTy = Ty->getPointerTo();
SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy, nullptr));
+ M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy));
SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
+ AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy));
for (int op = AtomicRMWInst::FIRST_BINOP;
op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -222,33 +223,33 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
continue;
SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
- M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy, nullptr));
+ M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy));
}
SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
"_compare_exchange_val");
TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
+ AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy));
}
TsanVptrUpdate = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr));
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy()));
TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+ "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy()));
TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr));
+ "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy));
TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr));
+ "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy));
MemmoveFn = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IntptrTy));
MemcpyFn = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy, nullptr));
+ IRB.getInt8PtrTy(), IntptrTy));
MemsetFn = checkSanitizerInterfaceFunction(
M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy, nullptr));
+ IRB.getInt32Ty(), IntptrTy));
}
bool ThreadSanitizer::doInitialization(Module &M) {
@@ -271,7 +272,7 @@ static bool isVtableAccess(Instruction *I) {
// Do not instrument known races/"benign races" that come from compiler
// instrumentatin. The user has no way of suppressing them.
-static bool shouldInstrumentReadWriteFromAddress(Value *Addr) {
+static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
// Peel off GEPs and BitCasts.
Addr = Addr->stripInBoundsOffsets();
@@ -279,8 +280,9 @@ static bool shouldInstrumentReadWriteFromAddress(Value *Addr) {
if (GV->hasSection()) {
StringRef SectionName = GV->getSection();
// Check if the global is in the PGO counters section.
- if (SectionName.endswith(getInstrProfCountersSectionName(
- /*AddSegment=*/false)))
+ auto OF = Triple(M->getTargetTriple()).getObjectFormat();
+ if (SectionName.endswith(
+ getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
return false;
}
@@ -342,13 +344,13 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
for (Instruction *I : reverse(Local)) {
if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
Value *Addr = Store->getPointerOperand();
- if (!shouldInstrumentReadWriteFromAddress(Addr))
+ if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
continue;
WriteTargets.insert(Addr);
} else {
LoadInst *Load = cast<LoadInst>(I);
Value *Addr = Load->getPointerOperand();
- if (!shouldInstrumentReadWriteFromAddress(Addr))
+ if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
continue;
if (WriteTargets.count(Addr)) {
// We will write to this temp, so no reason to analyze the read.
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index c74827210364..c541fa4c8bee 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -127,9 +127,8 @@ private:
LLVMContext &C = TheModule->getContext();
Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
- AttributeSet Attr =
- AttributeSet().addAttribute(C, AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeList Attr = AttributeList().addAttribute(
+ C, AttributeList::FunctionIndex, Attribute::NoUnwind);
FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
/*isVarArg=*/false);
return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
@@ -144,10 +143,10 @@ private:
Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
Type *Params[] = { I8X };
FunctionType *Fty = FunctionType::get(I8X, Params, /*isVarArg=*/false);
- AttributeSet Attr = AttributeSet();
+ AttributeList Attr = AttributeList();
if (NoUnwind)
- Attr = Attr.addAttribute(C, AttributeSet::FunctionIndex,
+ Attr = Attr.addAttribute(C, AttributeList::FunctionIndex,
Attribute::NoUnwind);
return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
@@ -162,9 +161,8 @@ private:
Type *I8XX = PointerType::getUnqual(I8X);
Type *Params[] = { I8XX, I8X };
- AttributeSet Attr =
- AttributeSet().addAttribute(C, AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeList Attr = AttributeList().addAttribute(
+ C, AttributeList::FunctionIndex, Attribute::NoUnwind);
Attr = Attr.addAttribute(C, 1, Attribute::NoCapture);
FunctionType *Fty = FunctionType::get(Type::getVoidTy(C), Params,
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 23c1f5990ba5..a86eaaec7641 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -394,6 +394,7 @@ void ObjCARCContract::tryToContractReleaseIntoStoreStrong(Instruction *Release,
DEBUG(llvm::dbgs() << " New Store Strong: " << *StoreStrong << "\n");
+ if (&*Iter == Retain) ++Iter;
if (&*Iter == Store) ++Iter;
Store->eraseFromParent();
Release->eraseFromParent();
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 136d54a6cb75..3c73376c9906 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -85,41 +85,6 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
return nullptr;
}
-/// This is a wrapper around getUnderlyingObjCPtr along the lines of
-/// GetUnderlyingObjects except that it returns early when it sees the first
-/// alloca.
-static inline bool AreAnyUnderlyingObjectsAnAlloca(const Value *V,
- const DataLayout &DL) {
- SmallPtrSet<const Value *, 4> Visited;
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(V);
- do {
- const Value *P = Worklist.pop_back_val();
- P = GetUnderlyingObjCPtr(P, DL);
-
- if (isa<AllocaInst>(P))
- return true;
-
- if (!Visited.insert(P).second)
- continue;
-
- if (const SelectInst *SI = dyn_cast<const SelectInst>(P)) {
- Worklist.push_back(SI->getTrueValue());
- Worklist.push_back(SI->getFalseValue());
- continue;
- }
-
- if (const PHINode *PN = dyn_cast<const PHINode>(P)) {
- for (Value *IncValue : PN->incoming_values())
- Worklist.push_back(IncValue);
- continue;
- }
- } while (!Worklist.empty());
-
- return false;
-}
-
-
/// @}
///
/// \defgroup ARCOpt ARC Optimization.
@@ -481,9 +446,6 @@ namespace {
/// MDKind identifiers.
ARCMDKindCache MDKindCache;
- // This is used to track if a pointer is stored into an alloca.
- DenseSet<const Value *> MultiOwnersSet;
-
/// A flag indicating whether this optimization pass should run.
bool Run;
@@ -524,8 +486,7 @@ namespace {
PairUpRetainsAndReleases(DenseMap<const BasicBlock *, BBState> &BBStates,
BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases, Module *M,
- SmallVectorImpl<Instruction *> &NewRetains,
- SmallVectorImpl<Instruction *> &NewReleases,
+ Instruction * Retain,
SmallVectorImpl<Instruction *> &DeadInsts,
RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
Value *Arg, bool KnownSafe,
@@ -1155,29 +1116,6 @@ bool ObjCARCOpt::VisitInstructionBottomUp(
case ARCInstKind::None:
// These are irrelevant.
return NestingDetected;
- case ARCInstKind::User:
- // If we have a store into an alloca of a pointer we are tracking, the
- // pointer has multiple owners implying that we must be more conservative.
- //
- // This comes up in the context of a pointer being ``KnownSafe''. In the
- // presence of a block being initialized, the frontend will emit the
- // objc_retain on the original pointer and the release on the pointer loaded
- // from the alloca. The optimizer will through the provenance analysis
- // realize that the two are related, but since we only require KnownSafe in
- // one direction, will match the inner retain on the original pointer with
- // the guard release on the original pointer. This is fixed by ensuring that
- // in the presence of allocas we only unconditionally remove pointers if
- // both our retain and our release are KnownSafe.
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- const DataLayout &DL = BB->getModule()->getDataLayout();
- if (AreAnyUnderlyingObjectsAnAlloca(SI->getPointerOperand(), DL)) {
- auto I = MyStates.findPtrBottomUpState(
- GetRCIdentityRoot(SI->getValueOperand()));
- if (I != MyStates.bottom_up_ptr_end())
- MultiOwnersSet.insert(I->first);
- }
- }
- break;
default:
break;
}
@@ -1540,8 +1478,7 @@ bool ObjCARCOpt::PairUpRetainsAndReleases(
DenseMap<const BasicBlock *, BBState> &BBStates,
BlotMapVector<Value *, RRInfo> &Retains,
DenseMap<Value *, RRInfo> &Releases, Module *M,
- SmallVectorImpl<Instruction *> &NewRetains,
- SmallVectorImpl<Instruction *> &NewReleases,
+ Instruction *Retain,
SmallVectorImpl<Instruction *> &DeadInsts, RRInfo &RetainsToMove,
RRInfo &ReleasesToMove, Value *Arg, bool KnownSafe,
bool &AnyPairsCompletelyEliminated) {
@@ -1549,7 +1486,6 @@ bool ObjCARCOpt::PairUpRetainsAndReleases(
// is already incremented, we can similarly ignore possible decrements unless
// we are dealing with a retainable object with multiple provenance sources.
bool KnownSafeTD = true, KnownSafeBU = true;
- bool MultipleOwners = false;
bool CFGHazardAfflicted = false;
// Connect the dots between the top-down-collected RetainsToMove and
@@ -1561,14 +1497,13 @@ bool ObjCARCOpt::PairUpRetainsAndReleases(
unsigned OldCount = 0;
unsigned NewCount = 0;
bool FirstRelease = true;
- for (;;) {
+ for (SmallVector<Instruction *, 4> NewRetains{Retain};;) {
+ SmallVector<Instruction *, 4> NewReleases;
for (Instruction *NewRetain : NewRetains) {
auto It = Retains.find(NewRetain);
assert(It != Retains.end());
const RRInfo &NewRetainRRI = It->second;
KnownSafeTD &= NewRetainRRI.KnownSafe;
- MultipleOwners =
- MultipleOwners || MultiOwnersSet.count(GetArgRCIdentityRoot(NewRetain));
for (Instruction *NewRetainRelease : NewRetainRRI.Calls) {
auto Jt = Releases.find(NewRetainRelease);
if (Jt == Releases.end())
@@ -1691,7 +1626,6 @@ bool ObjCARCOpt::PairUpRetainsAndReleases(
}
}
}
- NewReleases.clear();
if (NewRetains.empty()) break;
}
@@ -1745,10 +1679,6 @@ bool ObjCARCOpt::PerformCodePlacement(
DEBUG(dbgs() << "\n== ObjCARCOpt::PerformCodePlacement ==\n");
bool AnyPairsCompletelyEliminated = false;
- RRInfo RetainsToMove;
- RRInfo ReleasesToMove;
- SmallVector<Instruction *, 4> NewRetains;
- SmallVector<Instruction *, 4> NewReleases;
SmallVector<Instruction *, 8> DeadInsts;
// Visit each retain.
@@ -1780,9 +1710,10 @@ bool ObjCARCOpt::PerformCodePlacement(
// Connect the dots between the top-down-collected RetainsToMove and
// bottom-up-collected ReleasesToMove to form sets of related calls.
- NewRetains.push_back(Retain);
+ RRInfo RetainsToMove, ReleasesToMove;
+
bool PerformMoveCalls = PairUpRetainsAndReleases(
- BBStates, Retains, Releases, M, NewRetains, NewReleases, DeadInsts,
+ BBStates, Retains, Releases, M, Retain, DeadInsts,
RetainsToMove, ReleasesToMove, Arg, KnownSafe,
AnyPairsCompletelyEliminated);
@@ -1792,12 +1723,6 @@ bool ObjCARCOpt::PerformCodePlacement(
MoveCalls(Arg, RetainsToMove, ReleasesToMove,
Retains, Releases, DeadInsts, M);
}
-
- // Clean up state for next retain.
- NewReleases.clear();
- NewRetains.clear();
- RetainsToMove.clear();
- ReleasesToMove.clear();
}
// Now that we're done moving everything, we can delete the newly dead
@@ -1987,9 +1912,6 @@ bool ObjCARCOpt::OptimizeSequences(Function &F) {
Releases,
F.getParent());
- // Cleanup.
- MultiOwnersSet.clear();
-
return AnyPairsCompletelyEliminated && NestingDetected;
}
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index adc903cab31b..5b467dc9fe12 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
STATISTIC(NumRemoved, "Number of instructions removed");
STATISTIC(NumBranchesRemoved, "Number of branch instructions removed");
-// This is a tempoary option until we change the interface
-// to this pass based on optimization level.
+// This is a temporary option until we change the interface to this pass based
+// on optimization level.
static cl::opt<bool> RemoveControlFlowFlag("adce-remove-control-flow",
cl::init(true), cl::Hidden);
@@ -110,7 +110,7 @@ class AggressiveDeadCodeElimination {
/// The set of blocks which we have determined whose control
/// dependence sources must be live and which have not had
- /// those dependences analyized.
+ /// those dependences analyzed.
SmallPtrSet<BasicBlock *, 16> NewLiveBlocks;
/// Set up auxiliary data structures for Instructions and BasicBlocks and
@@ -145,7 +145,7 @@ class AggressiveDeadCodeElimination {
/// was removed.
bool removeDeadInstructions();
- /// Identify connected sections of the control flow grap which have
+ /// Identify connected sections of the control flow graph which have
/// dead terminators and rewrite the control flow graph to remove them.
void updateDeadRegions();
@@ -234,7 +234,7 @@ void AggressiveDeadCodeElimination::initialize() {
return Iter != end() && Iter->second;
}
} State;
-
+
State.reserve(F.size());
// Iterate over blocks in depth-first pre-order and
// treat all edges to a block already seen as loop back edges
@@ -262,25 +262,6 @@ void AggressiveDeadCodeElimination::initialize() {
continue;
auto *BB = BBInfo.BB;
if (!PDT.getNode(BB)) {
- markLive(BBInfo.Terminator);
- continue;
- }
- for (auto *Succ : successors(BB))
- if (!PDT.getNode(Succ)) {
- markLive(BBInfo.Terminator);
- break;
- }
- }
-
- // Mark blocks live if there is no path from the block to the
- // return of the function or a successor for which this is true.
- // This protects IDFCalculator which cannot handle such blocks.
- for (auto &BBInfoPair : BlockInfo) {
- auto &BBInfo = BBInfoPair.second;
- if (BBInfo.terminatorIsLive())
- continue;
- auto *BB = BBInfo.BB;
- if (!PDT.getNode(BB)) {
DEBUG(dbgs() << "Not post-dominated by return: " << BB->getName()
<< '\n';);
markLive(BBInfo.Terminator);
@@ -579,7 +560,7 @@ void AggressiveDeadCodeElimination::updateDeadRegions() {
PreferredSucc = Info;
}
assert((PreferredSucc && PreferredSucc->PostOrder > 0) &&
- "Failed to find safe successor for dead branc");
+ "Failed to find safe successor for dead branch");
bool First = true;
for (auto *Succ : successors(BB)) {
if (!First || Succ != PreferredSucc->BB)
@@ -594,13 +575,13 @@ void AggressiveDeadCodeElimination::updateDeadRegions() {
// reverse top-sort order
void AggressiveDeadCodeElimination::computeReversePostOrder() {
-
- // This provides a post-order numbering of the reverse conrtol flow graph
+
+ // This provides a post-order numbering of the reverse control flow graph
// Note that it is incomplete in the presence of infinite loops but we don't
// need numbers blocks which don't reach the end of the functions since
// all branches in those blocks are forced live.
-
- // For each block without successors, extend the DFS from the bloack
+
+ // For each block without successors, extend the DFS from the block
// backward through the graph
SmallPtrSet<BasicBlock*, 16> Visited;
unsigned PostOrder = 0;
@@ -644,8 +625,8 @@ PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) {
if (!AggressiveDeadCodeElimination(F, PDT).performDeadCodeElimination())
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- auto PA = PreservedAnalyses();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index c1df3173c0fc..fd931c521c8f 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -438,19 +438,13 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
- bool Changed = runImpl(F, AC, &SE, &DT);
-
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<ScalarEvolutionAnalysis>(F);
-
- if (!Changed)
+ if (!runImpl(F, AC, &SE, &DT))
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<AAManager>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<GlobalsAA>();
- PA.preserve<LoopAnalysis>();
- PA.preserve<DominatorTreeAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index 251b38707769..61e8700f1cd6 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -80,8 +80,8 @@ PreservedAnalyses BDCEPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!bitTrackingDCE(F, DB))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- auto PA = PreservedAnalyses();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 06d3d6a73954..b323ab3bd443 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMScalarOpts
IVUsersPrinter.cpp
InductiveRangeCheckElimination.cpp
IndVarSimplify.cpp
+ InferAddressSpaces.cpp
JumpThreading.cpp
LICM.cpp
LoopAccessAnalysisPrinter.cpp
@@ -29,6 +30,7 @@ add_llvm_library(LLVMScalarOpts
LoopInterchange.cpp
LoopLoadElimination.cpp
LoopPassManager.cpp
+ LoopPredication.cpp
LoopRerollPass.cpp
LoopRotation.cpp
LoopSimplifyCFG.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 38262514c9ec..ee6333e88716 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -136,8 +136,16 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
if (Idx != ~0U && isa<PHINode>(Inst))
return cast<PHINode>(Inst)->getIncomingBlock(Idx)->getTerminator();
- BasicBlock *IDom = DT->getNode(Inst->getParent())->getIDom()->getBlock();
- return IDom->getTerminator();
+ // This must be an EH pad. Iterate over immediate dominators until we find a
+ // non-EH pad. We need to skip over catchswitch blocks, which are both EH pads
+ // and terminators.
+ auto IDom = DT->getNode(Inst->getParent())->getIDom();
+ while (IDom->getBlock()->isEHPad()) {
+ assert(Entry != IDom->getBlock() && "eh pad in entry block");
+ IDom = IDom->getIDom();
+ }
+
+ return IDom->getBlock()->getTerminator();
}
/// \brief Find an insertion point that dominates all uses.
@@ -289,8 +297,8 @@ void ConstantHoistingPass::collectConstantCandidates(Function &Fn) {
// bit widths (APInt Operator- does not like that). If the value cannot be
// represented in uint64 we return an "empty" APInt. This is then interpreted
// as the value is not in range.
-static llvm::Optional<APInt> calculateOffsetDiff(APInt V1, APInt V2)
-{
+static llvm::Optional<APInt> calculateOffsetDiff(const APInt &V1,
+ const APInt &V2) {
llvm::Optional<APInt> Res = None;
unsigned BW = V1.getBitWidth() > V2.getBitWidth() ?
V1.getBitWidth() : V2.getBitWidth();
@@ -623,6 +631,7 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F,
if (!runImpl(F, TTI, DT, F.getEntryBlock()))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 84f9373ae914..c843c61ea94e 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -235,9 +235,8 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
// Analyse each switch case in turn. This is done in reverse order so that
// removing a case doesn't cause trouble for the iteration.
bool Changed = false;
- for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE;
- ) {
- ConstantInt *Case = CI.getCaseValue();
+ for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+ ConstantInt *Case = CI->getCaseValue();
// Check to see if the switch condition is equal to/not equal to the case
// value on every incoming edge, equal/not equal being the same each time.
@@ -270,8 +269,9 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
if (State == LazyValueInfo::False) {
// This case never fires - remove it.
- CI.getCaseSuccessor()->removePredecessor(BB);
- SI->removeCase(CI); // Does not invalidate the iterator.
+ CI->getCaseSuccessor()->removePredecessor(BB);
+ CI = SI->removeCase(CI);
+ CE = SI->case_end();
// The condition can be modified by removePredecessor's PHI simplification
// logic.
@@ -279,7 +279,9 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
++NumDeadCases;
Changed = true;
- } else if (State == LazyValueInfo::True) {
+ continue;
+ }
+ if (State == LazyValueInfo::True) {
// This case always fires. Arrange for the switch to be turned into an
// unconditional branch by replacing the switch condition with the case
// value.
@@ -288,6 +290,9 @@ static bool processSwitch(SwitchInst *SI, LazyValueInfo *LVI) {
Changed = true;
break;
}
+
+ // Increment the case iterator sense we didn't delete it.
+ ++CI;
}
if (Changed)
@@ -308,7 +313,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
// Try to mark pointer typed parameters as non-null. We skip the
// relatively expensive analysis for constants which are obviously either
// null or non-null to start with.
- if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+ if (Type && !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
!isa<Constant>(V) &&
LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
ConstantPointerNull::get(Type),
@@ -322,7 +327,7 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) {
if (Indices.empty())
return false;
- AttributeSet AS = CS.getAttributes();
+ AttributeList AS = CS.getAttributes();
LLVMContext &Ctx = CS.getInstruction()->getContext();
AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
CS.setAttributes(AS);
@@ -570,10 +575,6 @@ CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
bool Changed = runImpl(F, LVI);
- // FIXME: We need to invalidate LVI to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<LazyValueAnalysis>(F);
-
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index cc2a3cfaf9d1..07a0ba9b1222 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -124,9 +124,12 @@ static bool eliminateDeadCode(Function &F, TargetLibraryInfo *TLI) {
}
PreservedAnalyses DCEPass::run(Function &F, FunctionAnalysisManager &AM) {
- if (eliminateDeadCode(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
+ if (!eliminateDeadCode(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 4d4c3baef3f5..1ec38e56aa4c 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -135,13 +135,13 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
if (auto CS = CallSite(I)) {
if (Function *F = CS.getCalledFunction()) {
StringRef FnName = F->getName();
- if (TLI.has(LibFunc::strcpy) && FnName == TLI.getName(LibFunc::strcpy))
+ if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy))
return true;
- if (TLI.has(LibFunc::strncpy) && FnName == TLI.getName(LibFunc::strncpy))
+ if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy))
return true;
- if (TLI.has(LibFunc::strcat) && FnName == TLI.getName(LibFunc::strcat))
+ if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat))
return true;
- if (TLI.has(LibFunc::strncat) && FnName == TLI.getName(LibFunc::strncat))
+ if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat))
return true;
}
}
@@ -287,19 +287,14 @@ static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
}
namespace {
-enum OverwriteResult {
- OverwriteBegin,
- OverwriteComplete,
- OverwriteEnd,
- OverwriteUnknown
-};
+enum OverwriteResult { OW_Begin, OW_Complete, OW_End, OW_Unknown };
}
-/// Return 'OverwriteComplete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
-/// the 'Earlier' location is completely overwritten by 'Later',
-/// 'OverwriteBegin' if the beginning of the 'Earlier' location is overwritten
-/// by 'Later', or 'OverwriteUnknown' if nothing can be determined.
+/// Return 'OW_Complete' if a store to the 'Later' location completely
+/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
+/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
+/// beginning of the 'Earlier' location is overwritten by 'Later', or
+/// 'OW_Unknown' if nothing can be determined.
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
@@ -310,7 +305,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// If we don't know the sizes of either access, then we can't do a comparison.
if (Later.Size == MemoryLocation::UnknownSize ||
Earlier.Size == MemoryLocation::UnknownSize)
- return OverwriteUnknown;
+ return OW_Unknown;
const Value *P1 = Earlier.Ptr->stripPointerCasts();
const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -320,7 +315,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
if (P1 == P2) {
// Make sure that the Later size is >= the Earlier size.
if (Later.Size >= Earlier.Size)
- return OverwriteComplete;
+ return OW_Complete;
}
// Check to see if the later store is to the entire object (either a global,
@@ -332,13 +327,13 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (UO1 != UO2)
- return OverwriteUnknown;
+ return OW_Unknown;
// If the "Later" store is to a recognizable object, get its size.
uint64_t ObjectSize = getPointerSize(UO2, DL, TLI);
if (ObjectSize != MemoryLocation::UnknownSize)
if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
- return OverwriteComplete;
+ return OW_Complete;
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
@@ -350,7 +345,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
// If the base pointers still differ, we have two completely different stores.
if (BP1 != BP2)
- return OverwriteUnknown;
+ return OW_Unknown;
// The later store completely overlaps the earlier store if:
//
@@ -370,7 +365,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
if (EarlierOff >= LaterOff &&
Later.Size >= Earlier.Size &&
uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
- return OverwriteComplete;
+ return OW_Complete;
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
@@ -428,7 +423,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
") Composite Later [" <<
ILI->second << ", " << ILI->first << ")\n");
++NumCompletePartials;
- return OverwriteComplete;
+ return OW_Complete;
}
}
@@ -443,7 +438,7 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
if (!EnablePartialOverwriteTracking &&
(LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
- return OverwriteEnd;
+ return OW_End;
// Finally, we also need to check if the later store overwrites the beginning
// of the earlier store.
@@ -458,11 +453,11 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
(LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
assert(int64_t(LaterOff + Later.Size) <
int64_t(EarlierOff + Earlier.Size) &&
- "Expect to be handled as OverwriteComplete");
- return OverwriteBegin;
+ "Expect to be handled as OW_Complete");
+ return OW_Begin;
}
// Otherwise, they don't completely overlap.
- return OverwriteUnknown;
+ return OW_Unknown;
}
/// If 'Inst' might be a self read (i.e. a noop copy of a
@@ -551,7 +546,7 @@ static bool memoryIsNotModifiedBetween(Instruction *FirstI,
Instruction *I = &*BI;
if (I->mayWriteToMemory() && I != SecondI) {
auto Res = AA->getModRefInfo(I, MemLoc);
- if (Res != MRI_NoModRef)
+ if (Res & MRI_Mod)
return false;
}
}
@@ -909,7 +904,7 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
- "Should have been handled as OverwriteComplete");
+ "Should have been handled as OW_Complete");
if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
LaterSize, false)) {
IntervalMap.erase(OII);
@@ -1105,7 +1100,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
OverwriteResult OR =
isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset,
DepWrite, IOL);
- if (OR == OverwriteComplete) {
+ if (OR == OW_Complete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
@@ -1117,15 +1112,15 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// We erased DepWrite; start over.
InstDep = MD->getDependency(Inst);
continue;
- } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
- ((OR == OverwriteBegin &&
+ } else if ((OR == OW_End && isShortenableAtTheEnd(DepWrite)) ||
+ ((OR == OW_Begin &&
isShortenableAtTheBeginning(DepWrite)))) {
assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
"when partial-overwrite "
"tracking is enabled");
int64_t EarlierSize = DepLoc.Size;
int64_t LaterSize = Loc.Size;
- bool IsOverwriteEnd = (OR == OverwriteEnd);
+ bool IsOverwriteEnd = (OR == OW_End);
MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
InstWriteOffset, LaterSize, IsOverwriteEnd);
}
@@ -1186,8 +1181,9 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!eliminateDeadStores(F, AA, MD, DT, TLI))
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
PA.preserve<MemoryDependenceAnalysis>();
return PA;
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 16e08ee58fbe..04479b6e49ac 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,6 +19,8 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -32,7 +34,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/MemorySSA.h"
#include <deque>
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -253,6 +254,7 @@ public:
DominatorTree &DT;
AssumptionCache &AC;
MemorySSA *MSSA;
+ std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
typedef RecyclingAllocator<
BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
@@ -315,7 +317,9 @@ public:
/// \brief Set up the EarlyCSE runner for a particular function.
EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
- : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {}
+ : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA),
+ MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) {
+ }
bool run();
@@ -388,7 +392,7 @@ private:
ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
: IsTargetMemInst(false), Inst(Inst) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
- if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
+ if (TTI.getTgtMemIntrinsic(II, Info))
IsTargetMemInst = true;
}
bool isLoad() const {
@@ -400,17 +404,14 @@ private:
return isa<StoreInst>(Inst);
}
bool isAtomic() const {
- if (IsTargetMemInst) {
- assert(Info.IsSimple && "need to refine IsSimple in TTI");
- return false;
- }
+ if (IsTargetMemInst)
+ return Info.Ordering != AtomicOrdering::NotAtomic;
return Inst->isAtomic();
}
bool isUnordered() const {
- if (IsTargetMemInst) {
- assert(Info.IsSimple && "need to refine IsSimple in TTI");
- return true;
- }
+ if (IsTargetMemInst)
+ return Info.isUnordered();
+
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
return LI->isUnordered();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -421,10 +422,9 @@ private:
}
bool isVolatile() const {
- if (IsTargetMemInst) {
- assert(Info.IsSimple && "need to refine IsSimple in TTI");
- return false;
- }
+ if (IsTargetMemInst)
+ return Info.IsVolatile;
+
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
return LI->isVolatile();
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -517,7 +517,7 @@ private:
if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
PhisToCheck.push_back(MP);
- MSSA->removeMemoryAccess(WI);
+ MSSAUpdater->removeMemoryAccess(WI);
for (MemoryPhi *MP : PhisToCheck) {
MemoryAccess *FirstIn = MP->getIncomingValue(0);
@@ -587,27 +587,28 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// which reaches this block where the condition might hold a different
// value. Since we're adding this to the scoped hash table (like any other
// def), it will have been popped if we encounter a future merge block.
- if (BasicBlock *Pred = BB->getSinglePredecessor())
- if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
- if (BI->isConditional())
- if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
- if (SimpleValue::canHandle(CondInst)) {
- assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
- auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
- ConstantInt::getTrue(BB->getContext()) :
- ConstantInt::getFalse(BB->getContext());
- AvailableValues.insert(CondInst, ConditionalConstant);
- DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
- << CondInst->getName() << "' as " << *ConditionalConstant
- << " in " << BB->getName() << "\n");
- // Replace all dominated uses with the known value.
- if (unsigned Count =
- replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
- BasicBlockEdge(Pred, BB))) {
- Changed = true;
- NumCSECVP = NumCSECVP + Count;
- }
- }
+ if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+ auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (BI && BI->isConditional()) {
+ auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
+ if (CondInst && SimpleValue::canHandle(CondInst)) {
+ assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
+ auto *TorF = (BI->getSuccessor(0) == BB)
+ ? ConstantInt::getTrue(BB->getContext())
+ : ConstantInt::getFalse(BB->getContext());
+ AvailableValues.insert(CondInst, TorF);
+ DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
+ << CondInst->getName() << "' as " << *TorF << " in "
+ << BB->getName() << "\n");
+ // Replace all dominated uses with the known value.
+ if (unsigned Count = replaceDominatedUsesWith(
+ CondInst, TorF, DT, BasicBlockEdge(Pred, BB))) {
+ Changed = true;
+ NumCSECVP = NumCSECVP + Count;
+ }
+ }
+ }
+ }
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
@@ -761,12 +762,13 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
- // If this instruction may read from memory, forget LastStore.
- // Load/store intrinsics will indicate both a read and a write to
- // memory. The target may override this (e.g. so that a store intrinsic
- // does not read from memory, and thus will be treated the same as a
- // regular store for commoning purposes).
- if (Inst->mayReadFromMemory() &&
+ // If this instruction may read from memory or throw (and potentially read
+ // from memory in the exception handler), forget LastStore. Load/store
+ // intrinsics will indicate both a read and a write to memory. The target
+ // may override this (e.g. so that a store intrinsic does not read from
+ // memory, and thus will be treated the same as a regular store for
+ // commoning purposes).
+ if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
!(MemInst.isValid() && !MemInst.mayReadFromMemory()))
LastStore = nullptr;
@@ -967,10 +969,8 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
if (!CSE.run())
return PreservedAnalyses::all();
- // CSE preserves the dominator tree because it doesn't mutate the CFG.
- // FIXME: Bundle this with other CFG-preservation.
PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
if (UseMemorySSA)
PA.preserve<MemorySSAAnalysis>();
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index 545036d724ef..8a5af6195f1b 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -516,11 +516,10 @@ FunctionPass *createFloat2IntPass() { return new Float2IntLegacyPass(); }
PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &) {
if (!runImpl(F))
return PreservedAnalyses::all();
- else {
- // FIXME: This should also 'preserve the CFG'.
- PreservedAnalyses PA;
- PA.preserve<GlobalsAA>();
- return PA;
- }
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<GlobalsAA>();
+ return PA;
}
} // End namespace llvm
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0137378b828b..be696df548d5 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -36,7 +36,6 @@
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
@@ -51,9 +50,12 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/VNCoercion.h"
+
#include <vector>
using namespace llvm;
using namespace llvm::gvn;
+using namespace llvm::VNCoercion;
using namespace PatternMatch;
#define DEBUG_TYPE "gvn"
@@ -595,11 +597,12 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
+ PA.preserve<TargetLibraryAnalysis>();
return PA;
}
-LLVM_DUMP_METHOD
-void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
E = d.end(); I != E; ++I) {
@@ -608,6 +611,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
}
errs() << "}\n";
}
+#endif
/// Return true if we can prove that the value
/// we're analyzing is fully available in the specified block. As we go, keep
@@ -690,442 +694,6 @@ SpeculationFailure:
}
-/// Return true if CoerceAvailableValueToLoadType will succeed.
-static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
- Type *LoadTy,
- const DataLayout &DL) {
- // If the loaded or stored value is an first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
- StoredVal->getType()->isStructTy() ||
- StoredVal->getType()->isArrayTy())
- return false;
-
- // The store has to be at least as big as the load.
- if (DL.getTypeSizeInBits(StoredVal->getType()) <
- DL.getTypeSizeInBits(LoadTy))
- return false;
-
- return true;
-}
-
-/// If we saw a store of a value to memory, and
-/// then a load from a must-aliased pointer of a different type, try to coerce
-/// the stored value. LoadedTy is the type of the load we want to replace.
-/// IRB is IRBuilder used to insert new instructions.
-///
-/// If we can't do it, return null.
-static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
- IRBuilder<> &IRB,
- const DataLayout &DL) {
- assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
- "precondition violation - materialization can't fail");
-
- if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- // If this is already the right type, just return it.
- Type *StoredValTy = StoredVal->getType();
-
- uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
- uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
-
- // If the store and reload are the same size, we can always reuse it.
- if (StoredValSize == LoadedValSize) {
- // Pointer to Pointer -> use bitcast.
- if (StoredValTy->getScalarType()->isPointerTy() &&
- LoadedTy->getScalarType()->isPointerTy()) {
- StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy);
- } else {
- // Convert source pointers to integers, which can be bitcast.
- if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = DL.getIntPtrType(StoredValTy);
- StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
- }
-
- Type *TypeToCastTo = LoadedTy;
- if (TypeToCastTo->getScalarType()->isPointerTy())
- TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
-
- if (StoredValTy != TypeToCastTo)
- StoredVal = IRB.CreateBitCast(StoredVal, TypeToCastTo);
-
- // Cast to pointer if the load needs a pointer type.
- if (LoadedTy->getScalarType()->isPointerTy())
- StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
- }
-
- if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- return StoredVal;
- }
-
- // If the loaded value is smaller than the available value, then we can
- // extract out a piece from it. If the available value is too small, then we
- // can't do anything.
- assert(StoredValSize >= LoadedValSize &&
- "CanCoerceMustAliasedValueToLoad fail");
-
- // Convert source pointers to integers, which can be manipulated.
- if (StoredValTy->getScalarType()->isPointerTy()) {
- StoredValTy = DL.getIntPtrType(StoredValTy);
- StoredVal = IRB.CreatePtrToInt(StoredVal, StoredValTy);
- }
-
- // Convert vectors and fp to integer, which can be manipulated.
- if (!StoredValTy->isIntegerTy()) {
- StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
- StoredVal = IRB.CreateBitCast(StoredVal, StoredValTy);
- }
-
- // If this is a big-endian system, we need to shift the value down to the low
- // bits so that a truncate will work.
- if (DL.isBigEndian()) {
- uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
- DL.getTypeStoreSizeInBits(LoadedTy);
- StoredVal = IRB.CreateLShr(StoredVal, ShiftAmt, "tmp");
- }
-
- // Truncate the integer to the right size now.
- Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
- StoredVal = IRB.CreateTrunc(StoredVal, NewIntTy, "trunc");
-
- if (LoadedTy != NewIntTy) {
- // If the result is a pointer, inttoptr.
- if (LoadedTy->getScalarType()->isPointerTy())
- StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy, "inttoptr");
- else
- // Otherwise, bitcast.
- StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
- }
-
- if (auto *C = dyn_cast<Constant>(StoredVal))
- if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
- StoredVal = FoldedStoredVal;
-
- return StoredVal;
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering memory write (store,
-/// memset, memcpy, memmove). This means that the write *may* provide bits used
-/// by the load but we can't be sure because the pointers don't mustalias.
-///
-/// Check this case to see if there is anything more we can do before we give
-/// up. This returns -1 if we have to give up, or a byte number in the stored
-/// value of the piece that feeds the load.
-static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
- Value *WritePtr,
- uint64_t WriteSizeInBits,
- const DataLayout &DL) {
- // If the loaded or stored value is a first class array or struct, don't try
- // to transform them. We need to be able to bitcast to integer.
- if (LoadTy->isStructTy() || LoadTy->isArrayTy())
- return -1;
-
- int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase =
- GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
- Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
- if (StoreBase != LoadBase)
- return -1;
-
- // If the load and store are to the exact same address, they should have been
- // a must alias. AA must have gotten confused.
- // FIXME: Study to see if/when this happens. One case is forwarding a memset
- // to a load from the base of the memset.
-
- // If the load and store don't overlap at all, the store doesn't provide
- // anything to the load. In this case, they really don't alias at all, AA
- // must have gotten confused.
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
-
- if ((WriteSizeInBits & 7) | (LoadSize & 7))
- return -1;
- uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
- LoadSize /= 8;
-
-
- bool isAAFailure = false;
- if (StoreOffset < LoadOffset)
- isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
- else
- isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
-
- if (isAAFailure)
- return -1;
-
- // If the Load isn't completely contained within the stored bits, we don't
- // have all the bits to feed it. We could do something crazy in the future
- // (issue a smaller load then merge the bits in) but this seems unlikely to be
- // valuable.
- if (StoreOffset > LoadOffset ||
- StoreOffset+StoreSize < LoadOffset+LoadSize)
- return -1;
-
- // Okay, we can do this transformation. Return the number of bytes into the
- // store that the load is.
- return LoadOffset-StoreOffset;
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store.
-static int AnalyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
- StoreInst *DepSI) {
- // Cannot handle reading from store of first-class aggregate yet.
- if (DepSI->getValueOperand()->getType()->isStructTy() ||
- DepSI->getValueOperand()->getType()->isArrayTy())
- return -1;
-
- const DataLayout &DL = DepSI->getModule()->getDataLayout();
- Value *StorePtr = DepSI->getPointerOperand();
- uint64_t StoreSize =DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- StorePtr, StoreSize, DL);
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being clobbered by another load. See if
-/// the other load can feed into the second load.
-static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
- LoadInst *DepLI, const DataLayout &DL){
- // Cannot handle reading from store of first-class aggregate yet.
- if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
- return -1;
-
- Value *DepPtr = DepLI->getPointerOperand();
- uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
- int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
- if (R != -1) return R;
-
- // If we have a load/load clobber an DepLI can be widened to cover this load,
- // then we should widen it!
- int64_t LoadOffs = 0;
- const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
-
- unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
- LoadBase, LoadOffs, LoadSize, DepLI);
- if (Size == 0) return -1;
-
- // Check non-obvious conditions enforced by MDA which we rely on for being
- // able to materialize this potentially available value
- assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
- assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
-
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, DL);
-}
-
-
-
-static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
- MemIntrinsic *MI,
- const DataLayout &DL) {
- // If the mem operation is a non-constant size, we can't handle it.
- ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
- if (!SizeCst) return -1;
- uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
-
- // If this is memset, we just need to see if the offset is valid in the size
- // of the memset..
- if (MI->getIntrinsicID() == Intrinsic::memset)
- return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
- MemSizeInBits, DL);
-
- // If we have a memcpy/memmove, the only case we can handle is if this is a
- // copy from constant memory. In that case, we can read directly from the
- // constant memory.
- MemTransferInst *MTI = cast<MemTransferInst>(MI);
-
- Constant *Src = dyn_cast<Constant>(MTI->getSource());
- if (!Src) return -1;
-
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
- if (!GV || !GV->isConstant()) return -1;
-
- // See if the access is within the bounds of the transfer.
- int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
- MI->getDest(), MemSizeInBits, DL);
- if (Offset == -1)
- return Offset;
-
- unsigned AS = Src->getType()->getPointerAddressSpace();
- // Otherwise, see if we can constant fold a load from the constant with the
- // offset applied as appropriate.
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
- return Offset;
- return -1;
-}
-
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store. This means
-/// that the store provides bits used by the load but we the pointers don't
-/// mustalias. Check this case to see if there is anything more we can do
-/// before we give up.
-static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
- Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL){
- LLVMContext &Ctx = SrcVal->getType()->getContext();
-
- uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
- uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
-
- IRBuilder<> Builder(InsertPt);
-
- // Compute which bits of the stored value are being used by the load. Convert
- // to an integer type to start with.
- if (SrcVal->getType()->getScalarType()->isPointerTy())
- SrcVal = Builder.CreatePtrToInt(SrcVal,
- DL.getIntPtrType(SrcVal->getType()));
- if (!SrcVal->getType()->isIntegerTy())
- SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8));
-
- // Shift the bits to the least significant depending on endianness.
- unsigned ShiftAmt;
- if (DL.isLittleEndian())
- ShiftAmt = Offset*8;
- else
- ShiftAmt = (StoreSize-LoadSize-Offset)*8;
-
- if (ShiftAmt)
- SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt);
-
- if (LoadSize != StoreSize)
- SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8));
-
- return CoerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
-}
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering load. This means
-/// that the load *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias. Check this case to see if there is
-/// anything more we can do before we give up.
-static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset,
- Type *LoadTy, Instruction *InsertPt,
- GVN &gvn) {
- const DataLayout &DL = SrcVal->getModule()->getDataLayout();
- // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
- // widen SrcVal out to a larger load.
- unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
- if (Offset+LoadSize > SrcValStoreSize) {
- assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
- assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
- // If we have a load/load clobber an DepLI can be widened to cover this
- // load, then we should widen it to the next power of 2 size big enough!
- unsigned NewLoadSize = Offset+LoadSize;
- if (!isPowerOf2_32(NewLoadSize))
- NewLoadSize = NextPowerOf2(NewLoadSize);
-
- Value *PtrVal = SrcVal->getPointerOperand();
-
- // Insert the new load after the old load. This ensures that subsequent
- // memdep queries will find the new load. We can't easily remove the old
- // load completely because it is already in the value numbering table.
- IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestPTy =
- IntegerType::get(LoadTy->getContext(), NewLoadSize*8);
- DestPTy = PointerType::get(DestPTy,
- PtrVal->getType()->getPointerAddressSpace());
- Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
- PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
- LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
- NewLoad->takeName(SrcVal);
- NewLoad->setAlignment(SrcVal->getAlignment());
-
- DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
- DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
-
- // Replace uses of the original load with the wider load. On a big endian
- // system, we need to shift down to get the relevant bits.
- Value *RV = NewLoad;
- if (DL.isBigEndian())
- RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
- RV = Builder.CreateTrunc(RV, SrcVal->getType());
- SrcVal->replaceAllUsesWith(RV);
-
- // We would like to use gvn.markInstructionForDeletion here, but we can't
- // because the load is already memoized into the leader map table that GVN
- // tracks. It is potentially possible to remove the load from the table,
- // but then there all of the operations based on it would need to be
- // rehashed. Just leave the dead load around.
- gvn.getMemDep().removeInstruction(SrcVal);
- SrcVal = NewLoad;
- }
-
- return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
-}
-
-
-/// This function is called when we have a
-/// memdep query of a load that ends up being a clobbering mem intrinsic.
-static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
- Type *LoadTy, Instruction *InsertPt,
- const DataLayout &DL){
- LLVMContext &Ctx = LoadTy->getContext();
- uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy)/8;
-
- IRBuilder<> Builder(InsertPt);
-
- // We know that this method is only called when the mem transfer fully
- // provides the bits for the load.
- if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
- // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
- // independently of what the offset is.
- Value *Val = MSI->getValue();
- if (LoadSize != 1)
- Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
-
- Value *OneElt = Val;
-
- // Splat the value out to the right number of bits.
- for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
- // If we can double the number of bytes set, do it.
- if (NumBytesSet*2 <= LoadSize) {
- Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
- Val = Builder.CreateOr(Val, ShVal);
- NumBytesSet <<= 1;
- continue;
- }
-
- // Otherwise insert one byte at a time.
- Value *ShVal = Builder.CreateShl(Val, 1*8);
- Val = Builder.CreateOr(OneElt, ShVal);
- ++NumBytesSet;
- }
-
- return CoerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
- }
-
- // Otherwise, this is a memcpy/memmove from a constant global.
- MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
- Constant *Src = cast<Constant>(MTI->getSource());
- unsigned AS = Src->getType()->getPointerAddressSpace();
-
- // Otherwise, see if we can constant fold a load from the constant with the
- // offset applied as appropriate.
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
- OffsetCst);
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
-}
/// Given a set of loads specified by ValuesPerBlock,
@@ -1171,7 +739,7 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- Res = GetStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
+ Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
<< *getSimpleValue() << '\n'
@@ -1182,14 +750,20 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *LI,
if (Load->getType() == LoadTy && Offset == 0) {
Res = Load;
} else {
- Res = GetLoadValueForLoad(Load, Offset, LoadTy, InsertPt, gvn);
-
+ Res = getLoadValueForLoad(Load, Offset, LoadTy, InsertPt, DL);
+ // We would like to use gvn.markInstructionForDeletion here, but we can't
+ // because the load is already memoized into the leader map table that GVN
+ // tracks. It is potentially possible to remove the load from the table,
+ // but then there all of the operations based on it would need to be
+ // rehashed. Just leave the dead load around.
+ gvn.getMemDep().removeInstruction(Load);
DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " "
<< *getCoercedLoadValue() << '\n'
- << *Res << '\n' << "\n\n\n");
+ << *Res << '\n'
+ << "\n\n\n");
}
} else if (isMemIntrinValue()) {
- Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
+ Res = getMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy,
InsertPt, DL);
DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
<< " " << *getMemIntrinValue() << '\n'
@@ -1258,7 +832,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// Can't forward from non-atomic to atomic without violating memory model.
if (Address && LI->isAtomic() <= DepSI->isAtomic()) {
int Offset =
- AnalyzeLoadFromClobberingStore(LI->getType(), Address, DepSI);
+ analyzeLoadFromClobberingStore(LI->getType(), Address, DepSI, DL);
if (Offset != -1) {
Res = AvailableValue::get(DepSI->getValueOperand(), Offset);
return true;
@@ -1276,7 +850,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// Can't forward from non-atomic to atomic without violating memory model.
if (DepLI != LI && Address && LI->isAtomic() <= DepLI->isAtomic()) {
int Offset =
- AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
+ analyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, DL);
if (Offset != -1) {
Res = AvailableValue::getLoad(DepLI, Offset);
@@ -1289,7 +863,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// forward a value on from it.
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
if (Address && !LI->isAtomic()) {
- int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+ int Offset = analyzeLoadFromClobberingMemInst(LI->getType(), Address,
DepMI, DL);
if (Offset != -1) {
Res = AvailableValue::getMI(DepMI, Offset);
@@ -1334,7 +908,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// different types if we have to. If the stored value is larger or equal to
// the loaded value, we can reuse it.
if (S->getValueOperand()->getType() != LI->getType() &&
- !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ !canCoerceMustAliasedValueToLoad(S->getValueOperand(),
LI->getType(), DL))
return false;
@@ -1351,7 +925,7 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
// If the stored value is larger or equal to the loaded value, we can reuse
// it.
if (LD->getType() != LI->getType() &&
- !CanCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
+ !canCoerceMustAliasedValueToLoad(LD, LI->getType(), DL))
return false;
// Can't forward from non-atomic to atomic without violating memory model.
@@ -1713,7 +1287,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If instruction I has debug info, then we should not update it.
// Also, if I has a null DebugLoc, then it is still potentially incorrect
// to propagate LI's DebugLoc because LI may not post-dominate I.
- if (LI->getDebugLoc() && ValuesPerBlock.size() != 1)
+ if (LI->getDebugLoc() && LI->getParent() == I->getParent())
I->setDebugLoc(LI->getDebugLoc());
if (V->getType()->getScalarType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
@@ -1795,7 +1369,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
// Patch the replacement so that it is not more restrictive than the value
// being replaced.
- // Note that if 'I' is a load being replaced by some operation,
+ // Note that if 'I' is a load being replaced by some operation,
// for example, by an arithmetic operation, then andIRFlags()
// would just erase all math flags from the original arithmetic
// operation, which is clearly not wanted and not needed.
@@ -2187,11 +1761,11 @@ bool GVN::processInstruction(Instruction *I) {
for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
i != e; ++i) {
- BasicBlock *Dst = i.getCaseSuccessor();
+ BasicBlock *Dst = i->getCaseSuccessor();
// If there is only a single edge, propagate the case value into it.
if (SwitchEdges.lookup(Dst) == 1) {
BasicBlockEdge E(Parent, Dst);
- Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true);
+ Changed |= propagateEquality(SwitchCond, i->getCaseValue(), E, true);
}
}
return Changed;
@@ -2581,21 +2155,12 @@ bool GVN::iterateOnFunction(Function &F) {
// Top-down walk of the dominator tree
bool Changed = false;
- // Save the blocks this function have before transformation begins. GVN may
- // split critical edge, and hence may invalidate the RPO/DT iterator.
- //
- std::vector<BasicBlock *> BBVect;
- BBVect.reserve(256);
// Needed for value numbering with phi construction to work.
+ // RPOT walks the graph in its constructor and will not be invalidated during
+ // processBlock.
ReversePostOrderTraversal<Function *> RPOT(&F);
- for (ReversePostOrderTraversal<Function *>::rpo_iterator RI = RPOT.begin(),
- RE = RPOT.end();
- RI != RE; ++RI)
- BBVect.push_back(*RI);
-
- for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
- I != E; I++)
- Changed |= processBlock(*I);
+ for (BasicBlock *BB : RPOT)
+ Changed |= processBlock(BB);
return Changed;
}
@@ -2783,6 +2348,7 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp
index f8e1d2e1a08a..6adfe130d148 100644
--- a/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/lib/Transforms/Scalar/GVNHoist.cpp
@@ -17,16 +17,39 @@
// is disabled in the following cases.
// 1. Scalars across calls.
// 2. geps when corresponding load/store cannot be hoisted.
+//
+// TODO: Hoist from >2 successors. Currently GVNHoist will not hoist stores
+// in this case because it works on two instructions at a time.
+// entry:
+// switch i32 %c1, label %exit1 [
+// i32 0, label %sw0
+// i32 1, label %sw1
+// ]
+//
+// sw0:
+// store i32 1, i32* @G
+// br label %exit
+//
+// sw1:
+// store i32 1, i32* @G
+// br label %exit
+//
+// exit1:
+// store i32 1, i32* @G
+// ret void
+// exit:
+// ret void
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/MemorySSA.h"
using namespace llvm;
@@ -60,7 +83,7 @@ static cl::opt<int>
cl::desc("Maximum length of dependent chains to hoist "
"(default = 10, unlimited = -1)"));
-namespace {
+namespace llvm {
// Provides a sorting function based on the execution order of two instructions.
struct SortByDFSIn {
@@ -72,13 +95,6 @@ public:
// Returns true when A executes before B.
bool operator()(const Instruction *A, const Instruction *B) const {
- // FIXME: libc++ has a std::sort() algorithm that will call the compare
- // function on the same element. Once PR20837 is fixed and some more years
- // pass by and all the buildbots have moved to a corrected std::sort(),
- // enable the following assert:
- //
- // assert(A != B);
-
const BasicBlock *BA = A->getParent();
const BasicBlock *BB = B->getParent();
unsigned ADFS, BDFS;
@@ -202,6 +218,7 @@ public:
GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
MemorySSA *MSSA)
: DT(DT), AA(AA), MD(MD), MSSA(MSSA),
+ MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)),
HoistingGeps(false),
HoistedCtr(0)
{ }
@@ -249,9 +266,11 @@ private:
AliasAnalysis *AA;
MemoryDependenceResults *MD;
MemorySSA *MSSA;
+ std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
const bool HoistingGeps;
DenseMap<const Value *, unsigned> DFSNumber;
BBSideEffectsSet BBSideEffects;
+ DenseSet<const BasicBlock*> HoistBarrier;
int HoistedCtr;
enum InsKind { Unknown, Scalar, Load, Store };
@@ -307,8 +326,8 @@ private:
continue;
}
- // Check for end of function, calls that do not return, etc.
- if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
+ // We reached the leaf Basic Block => not all paths have this instruction.
+ if (!BB->getTerminator()->getNumSuccessors())
return false;
// When reaching the back-edge of a loop, there may be a path through the
@@ -360,7 +379,7 @@ private:
ReachedNewPt = true;
}
}
- if (defClobbersUseOrDef(Def, MU, *AA))
+ if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA))
return true;
}
@@ -387,7 +406,8 @@ private:
// executed between the execution of NewBB and OldBB. Hoisting an expression
// from OldBB into NewBB has to be safe on all execution paths.
for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) {
- if (*I == NewBB) {
+ const BasicBlock *BB = *I;
+ if (BB == NewBB) {
// Stop traversal when reaching HoistPt.
I.skipChildren();
continue;
@@ -398,11 +418,17 @@ private:
return true;
// Impossible to hoist with exceptions on the path.
- if (hasEH(*I))
+ if (hasEH(BB))
+ return true;
+
+ // No such instruction after HoistBarrier in a basic block was
+ // selected for hoisting so instructions selected within basic block with
+ // a hoist barrier can be hoisted.
+ if ((BB != OldBB) && HoistBarrier.count(BB))
return true;
// Check that we do not move a store past loads.
- if (hasMemoryUse(NewPt, Def, *I))
+ if (hasMemoryUse(NewPt, Def, BB))
return true;
// -1 is unlimited number of blocks on all paths.
@@ -419,17 +445,18 @@ private:
// Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
// return true when the counter NBBsOnAllPaths reaches 0, except when it is
// initialized to -1 which is unlimited.
- bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB,
+ bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB,
int &NBBsOnAllPaths) {
- assert(DT->dominates(HoistPt, BB) && "Invalid path");
+ assert(DT->dominates(HoistPt, SrcBB) && "Invalid path");
// Walk all basic blocks reachable in depth-first iteration on
// the inverse CFG from BBInsn to NewHoistPt. These blocks are all the
// blocks that may be executed between the execution of NewHoistPt and
// BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe
// on all execution paths.
- for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) {
- if (*I == HoistPt) {
+ for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) {
+ const BasicBlock *BB = *I;
+ if (BB == HoistPt) {
// Stop traversal when reaching NewHoistPt.
I.skipChildren();
continue;
@@ -440,7 +467,13 @@ private:
return true;
// Impossible to hoist with exceptions on the path.
- if (hasEH(*I))
+ if (hasEH(BB))
+ return true;
+
+ // No such instruction after HoistBarrier in a basic block was
+ // selected for hoisting so instructions selected within basic block with
+ // a hoist barrier can be hoisted.
+ if ((BB != SrcBB) && HoistBarrier.count(BB))
return true;
// -1 is unlimited number of blocks on all paths.
@@ -626,6 +659,8 @@ private:
// Compute the insertion point and the list of expressions to be hoisted.
SmallVecInsn InstructionsToHoist;
for (auto I : V)
+ // We don't need to check for hoist-barriers here because if
+ // I->getParent() is a barrier then I precedes the barrier.
if (!hasEH(I->getParent()))
InstructionsToHoist.push_back(I);
@@ -809,9 +844,9 @@ private:
// legal when the ld/st is not moved past its current definition.
MemoryAccess *Def = OldMemAcc->getDefiningAccess();
NewMemAcc =
- MSSA->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End);
+ MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End);
OldMemAcc->replaceAllUsesWith(NewMemAcc);
- MSSA->removeMemoryAccess(OldMemAcc);
+ MSSAUpdater->removeMemoryAccess(OldMemAcc);
}
}
@@ -850,7 +885,7 @@ private:
// Update the uses of the old MSSA access with NewMemAcc.
MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
OldMA->replaceAllUsesWith(NewMemAcc);
- MSSA->removeMemoryAccess(OldMA);
+ MSSAUpdater->removeMemoryAccess(OldMA);
}
Repl->andIRFlags(I);
@@ -872,7 +907,7 @@ private:
auto In = Phi->incoming_values();
if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
Phi->replaceAllUsesWith(NewMemAcc);
- MSSA->removeMemoryAccess(Phi);
+ MSSAUpdater->removeMemoryAccess(Phi);
}
}
}
@@ -896,6 +931,12 @@ private:
for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
int InstructionNb = 0;
for (Instruction &I1 : *BB) {
+ // If I1 cannot guarantee progress, subsequent instructions
+ // in BB cannot be hoisted anyways.
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) {
+ HoistBarrier.insert(BB);
+ break;
+ }
// Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
// deeper may increase the register pressure and compilation time.
if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
diff --git a/lib/Transforms/Scalar/GuardWidening.cpp b/lib/Transforms/Scalar/GuardWidening.cpp
index b05ef002a456..7019287954a1 100644
--- a/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/lib/Transforms/Scalar/GuardWidening.cpp
@@ -568,8 +568,7 @@ bool GuardWideningImpl::combineRangeChecks(
return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength;
};
- std::copy_if(Checks.begin(), Checks.end(),
- std::back_inserter(CurrentChecks), IsCurrentCheck);
+ copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck);
Checks.erase(remove_if(Checks, IsCurrentCheck), Checks.end());
assert(CurrentChecks.size() != 0 && "We know we have at least one!");
@@ -658,8 +657,12 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- bool Changed = GuardWideningImpl(DT, PDT, LI).run();
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ if (!GuardWideningImpl(DT, PDT, LI).run())
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) {
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 1752fb75eb1b..dcb2a4a0c6e6 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -231,8 +231,9 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
bool isExact = false;
// See if we can convert this to an int64_t
uint64_t UIntVal;
- if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
- &isExact) != APFloat::opOK || !isExact)
+ if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
+ APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
+ !isExact)
return false;
IntVal = UIntVal;
return true;
@@ -906,7 +907,7 @@ class WidenIV {
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
enum ExtendKind { ZeroExtended, SignExtended, Unknown };
- // A map tracking the kind of extension used to widen each narrow IV
+ // A map tracking the kind of extension used to widen each narrow IV
// and narrow IV user.
// Key: pointer to a narrow IV or IV user.
// Value: the kind of extension used to widen this Instruction.
@@ -1608,7 +1609,7 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
return;
CmpInst::Predicate P =
- TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
+ TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
auto CmpConstrainedLHSRange =
@@ -1634,7 +1635,7 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
UpdateRangeFromGuards(NarrowUser);
BasicBlock *NarrowUserBB = NarrowUser->getParent();
- // If NarrowUserBB is statically unreachable asking dominator queries may
+ // If NarrowUserBB is statically unreachable asking dominator queries may
// yield surprising results. (e.g. the block may not have a dom tree node)
if (!DT->isReachableFromEntry(NarrowUserBB))
return;
@@ -2152,6 +2153,8 @@ linearFunctionTestReplace(Loop *L,
Value *CmpIndVar = IndVar;
const SCEV *IVCount = BackedgeTakenCount;
+ assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+
// If the exiting block is the same as the backedge block, we prefer to
// compare against the post-incremented value, otherwise we must compare
// against the preincremented value.
@@ -2376,6 +2379,7 @@ bool IndVarSimplify::run(Loop *L) {
// Loop::getCanonicalInductionVariable only supports loops with preheaders,
// and we're in trouble if we can't find the induction variable even when
// we've manually inserted one.
+ // - LFTR relies on having a single backedge.
if (!L->isLoopSimplifyForm())
return false;
@@ -2492,8 +2496,9 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
if (!IVS.run(&L))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 8e81541c2337..85db6e5e1105 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -446,6 +446,15 @@ struct LoopStructure {
BasicBlock *LatchExit;
unsigned LatchBrExitIdx;
+ // The loop represented by this instance of LoopStructure is semantically
+ // equivalent to:
+ //
+ // intN_ty inc = IndVarIncreasing ? 1 : -1;
+ // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT;
+ //
+ // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarNext)
+ // ... body ...
+
Value *IndVarNext;
Value *IndVarStart;
Value *LoopExitAt;
@@ -789,6 +798,10 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
+ const SCEV *StartNext = IndVarNext->getStart();
+ const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
+ const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
+
ConstantInt *One = ConstantInt::get(IndVarTy, 1);
// TODO: generalize the predicates here to also match their unsigned variants.
if (IsIncreasing) {
@@ -809,10 +822,22 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
+ if (!SE.isLoopEntryGuardedByCond(
+ &L, CmpInst::ICMP_SLT, IndVarStart,
+ SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())))) {
+ FailureReason = "Induction variable start not bounded by upper limit";
+ return None;
+ }
+
IRBuilder<> B(Preheader->getTerminator());
RightValue = B.CreateAdd(RightValue, One);
+ } else {
+ if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart,
+ RightSCEV)) {
+ FailureReason = "Induction variable start not bounded by upper limit";
+ return None;
+ }
}
-
} else {
bool FoundExpectedPred =
(Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) ||
@@ -831,15 +856,24 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
return None;
}
+ if (!SE.isLoopEntryGuardedByCond(
+ &L, CmpInst::ICMP_SGT, IndVarStart,
+ SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())))) {
+ FailureReason = "Induction variable start not bounded by lower limit";
+ return None;
+ }
+
IRBuilder<> B(Preheader->getTerminator());
RightValue = B.CreateSub(RightValue, One);
+ } else {
+ if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart,
+ RightSCEV)) {
+ FailureReason = "Induction variable start not bounded by lower limit";
+ return None;
+ }
}
}
- const SCEV *StartNext = IndVarNext->getStart();
- const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE));
- const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend);
-
BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
assert(SE.getLoopDisposition(LatchCount, &L) ==
diff --git a/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f4940c937a2d..5d8701431a2c 100644
--- a/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp
+++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -89,13 +89,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "nvptx-infer-addrspace"
-
-#include "NVPTX.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -105,19 +103,30 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#define DEBUG_TYPE "infer-address-spaces"
+
using namespace llvm;
namespace {
-const unsigned ADDRESS_SPACE_UNINITIALIZED = (unsigned)-1;
+static const unsigned UninitializedAddressSpace = ~0u;
using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
-/// \brief NVPTXInferAddressSpaces
-class NVPTXInferAddressSpaces: public FunctionPass {
+/// \brief InferAddressSpaces
+class InferAddressSpaces : public FunctionPass {
+ /// Target specific address space which uses of should be replaced if
+ /// possible.
+ unsigned FlatAddrSpace;
+
public:
static char ID;
- NVPTXInferAddressSpaces() : FunctionPass(ID) {}
+ InferAddressSpaces() : FunctionPass(ID) {}
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
bool runOnFunction(Function &F) override;
@@ -125,30 +134,51 @@ private:
// Returns the new address space of V if updated; otherwise, returns None.
Optional<unsigned>
updateAddressSpace(const Value &V,
- const ValueToAddrSpaceMapTy &InferredAddrSpace);
+ const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
// Tries to infer the specific address space of each address expression in
// Postorder.
void inferAddressSpaces(const std::vector<Value *> &Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace);
+ ValueToAddrSpaceMapTy *InferredAddrSpace) const;
+
+ bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
- // Changes the generic address expressions in function F to point to specific
+ // Changes the flat address expressions in function F to point to specific
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
- // all generic address expressions in the use-def graph of function F.
+ // all flat expressions in the use-def graph of function F.
bool
rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder,
const ValueToAddrSpaceMapTy &InferredAddrSpace,
- Function *F);
+ Function *F) const;
+
+ void appendsFlatAddressExpressionToPostorderStack(
+ Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const;
+
+ bool rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV, Value *NewV) const;
+ void collectRewritableIntrinsicOperands(
+ IntrinsicInst *II,
+ std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const;
+
+ std::vector<Value *> collectFlatAddressExpressions(Function &F) const;
+
+ Value *cloneValueWithNewAddressSpace(
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+ unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
};
} // end anonymous namespace
-char NVPTXInferAddressSpaces::ID = 0;
+char InferAddressSpaces::ID = 0;
namespace llvm {
-void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
+void initializeInferAddressSpacesPass(PassRegistry &);
}
-INITIALIZE_PASS(NVPTXInferAddressSpaces, "nvptx-infer-addrspace",
- "Infer address spaces",
+
+INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
false, false)
// Returns true if V is an address expression.
@@ -163,6 +193,7 @@ static bool isAddressExpression(const Value &V) {
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
+ case Instruction::Select:
return true;
default:
return false;
@@ -174,7 +205,7 @@ static bool isAddressExpression(const Value &V) {
// Precondition: V is an address expression.
static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
assert(isAddressExpression(V));
- const Operator& Op = cast<Operator>(V);
+ const Operator &Op = cast<Operator>(V);
switch (Op.getOpcode()) {
case Instruction::PHI: {
auto IncomingValues = cast<PHINode>(Op).incoming_values();
@@ -185,42 +216,113 @@ static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
return {Op.getOperand(0)};
+ case Instruction::Select:
+ return {Op.getOperand(1), Op.getOperand(2)};
default:
llvm_unreachable("Unexpected instruction type.");
}
}
-// If V is an unvisited generic address expression, appends V to PostorderStack
+// TODO: Move logic to TTI?
+bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const {
+ Module *M = II->getParent()->getParent()->getParent();
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:{
+ const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4));
+ if (!IsVolatile || !IsVolatile->isNullValue())
+ return false;
+
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::objectsize: {
+ Type *DestTy = II->getType();
+ Type *SrcTy = NewV->getType();
+ Function *NewDecl =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
+ II->setArgOperand(0, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+// TODO: Move logic to TTI?
+void InferAddressSpaces::collectRewritableIntrinsicOperands(
+ IntrinsicInst *II, std::vector<std::pair<Value *, bool>> *PostorderStack,
+ DenseSet<Value *> *Visited) const {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::objectsize:
+ case Intrinsic::amdgcn_atomic_inc:
+ case Intrinsic::amdgcn_atomic_dec:
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
+ PostorderStack, Visited);
+ break;
+ default:
+ break;
+ }
+}
+
+// Returns all flat address expressions in function F. The elements are
+// If V is an unvisited flat address expression, appends V to PostorderStack
// and marks it as visited.
-static void appendsGenericAddressExpressionToPostorderStack(
+void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(
Value *V, std::vector<std::pair<Value *, bool>> *PostorderStack,
- DenseSet<Value *> *Visited) {
+ DenseSet<Value *> *Visited) const {
assert(V->getType()->isPointerTy());
if (isAddressExpression(*V) &&
- V->getType()->getPointerAddressSpace() ==
- AddressSpace::ADDRESS_SPACE_GENERIC) {
+ V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
if (Visited->insert(V).second)
PostorderStack->push_back(std::make_pair(V, false));
}
}
-// Returns all generic address expressions in function F. The elements are
+// Returns all flat address expressions in function F. The elements are ordered
// ordered in postorder.
-static std::vector<Value *> collectGenericAddressExpressions(Function &F) {
+std::vector<Value *>
+InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {
// This function implements a non-recursive postorder traversal of a partial
// use-def graph of function F.
- std::vector<std::pair<Value*, bool>> PostorderStack;
+ std::vector<std::pair<Value *, bool>> PostorderStack;
// The set of visited expressions.
- DenseSet<Value*> Visited;
+ DenseSet<Value *> Visited;
+
+ auto PushPtrOperand = [&](Value *Ptr) {
+ appendsFlatAddressExpressionToPostorderStack(Ptr, &PostorderStack,
+ &Visited);
+ };
+
// We only explore address expressions that are reachable from loads and
// stores for now because we aim at generating faster loads and stores.
for (Instruction &I : instructions(F)) {
- if (isa<LoadInst>(I)) {
- appendsGenericAddressExpressionToPostorderStack(
- I.getOperand(0), &PostorderStack, &Visited);
- } else if (isa<StoreInst>(I)) {
- appendsGenericAddressExpressionToPostorderStack(
- I.getOperand(1), &PostorderStack, &Visited);
+ if (auto *LI = dyn_cast<LoadInst>(&I))
+ PushPtrOperand(LI->getPointerOperand());
+ else if (auto *SI = dyn_cast<StoreInst>(&I))
+ PushPtrOperand(SI->getPointerOperand());
+ else if (auto *RMW = dyn_cast<AtomicRMWInst>(&I))
+ PushPtrOperand(RMW->getPointerOperand());
+ else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I))
+ PushPtrOperand(CmpX->getPointerOperand());
+ else if (auto *MI = dyn_cast<MemIntrinsic>(&I)) {
+ // For memset/memcpy/memmove, any pointer operand can be replaced.
+ PushPtrOperand(MI->getRawDest());
+
+ // Handle 2nd operand for memcpy/memmove.
+ if (auto *MTI = dyn_cast<MemTransferInst>(MI))
+ PushPtrOperand(MTI->getRawSource());
+ } else if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited);
+ else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
+ // FIXME: Handle vectors of pointers
+ if (Cmp->getOperand(0)->getType()->isPointerTy()) {
+ PushPtrOperand(Cmp->getOperand(0));
+ PushPtrOperand(Cmp->getOperand(1));
+ }
}
}
@@ -236,8 +338,8 @@ static std::vector<Value *> collectGenericAddressExpressions(Function &F) {
// Otherwise, adds its operands to the stack and explores them.
PostorderStack.back().second = true;
for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) {
- appendsGenericAddressExpressionToPostorderStack(
- PtrOperand, &PostorderStack, &Visited);
+ appendsFlatAddressExpressionToPostorderStack(PtrOperand, &PostorderStack,
+ &Visited);
}
}
return Postorder;
@@ -251,12 +353,18 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
const ValueToValueMapTy &ValueWithNewAddrSpace,
SmallVectorImpl<const Use *> *UndefUsesToFix) {
Value *Operand = OperandUse.get();
+
+ Type *NewPtrTy =
+ Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
+
+ if (Constant *C = dyn_cast<Constant>(Operand))
+ return ConstantExpr::getAddrSpaceCast(C, NewPtrTy);
+
if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
return NewOperand;
UndefUsesToFix->push_back(&OperandUse);
- return UndefValue::get(
- Operand->getType()->getPointerElementType()->getPointerTo(NewAddrSpace));
+ return UndefValue::get(NewPtrTy);
}
// Returns a clone of `I` with its operands converted to those specified in
@@ -277,7 +385,7 @@ static Value *cloneInstructionWithNewAddressSpace(
if (I->getOpcode() == Instruction::AddrSpaceCast) {
Value *Src = I->getOperand(0);
- // Because `I` is generic, the source address space must be specific.
+ // Because `I` is flat, the source address space must be specific.
// Therefore, the inferred address space must be the source space, according
// to our algorithm.
assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
@@ -293,7 +401,7 @@ static Value *cloneInstructionWithNewAddressSpace(
NewPointerOperands.push_back(nullptr);
else
NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
- OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
+ OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
}
switch (I->getOpcode()) {
@@ -318,6 +426,11 @@ static Value *cloneInstructionWithNewAddressSpace(
NewGEP->setIsInBounds(GEP->isInBounds());
return NewGEP;
}
+ case Instruction::Select: {
+ assert(I->getType()->isPointerTy());
+ return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
+ NewPointerOperands[2], "", nullptr, I);
+ }
default:
llvm_unreachable("Unexpected opcode");
}
@@ -327,13 +440,13 @@ static Value *cloneInstructionWithNewAddressSpace(
// constant expression `CE` with its operands replaced as specified in
// ValueWithNewAddrSpace.
static Value *cloneConstantExprWithNewAddressSpace(
- ConstantExpr *CE, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace) {
+ ConstantExpr *CE, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace) {
Type *TargetType =
- CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
+ CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
- // Because CE is generic, the source address space must be specific.
+ // Because CE is flat, the source address space must be specific.
// Therefore, the inferred address space must be the source space according
// to our algorithm.
assert(CE->getOperand(0)->getType()->getPointerAddressSpace() ==
@@ -341,6 +454,24 @@ static Value *cloneConstantExprWithNewAddressSpace(
return ConstantExpr::getBitCast(CE->getOperand(0), TargetType);
}
+ if (CE->getOpcode() == Instruction::BitCast) {
+ if (Value *NewOperand = ValueWithNewAddrSpace.lookup(CE->getOperand(0)))
+ return ConstantExpr::getBitCast(cast<Constant>(NewOperand), TargetType);
+ return ConstantExpr::getAddrSpaceCast(CE, TargetType);
+ }
+
+ if (CE->getOpcode() == Instruction::Select) {
+ Constant *Src0 = CE->getOperand(1);
+ Constant *Src1 = CE->getOperand(2);
+ if (Src0->getType()->getPointerAddressSpace() ==
+ Src1->getType()->getPointerAddressSpace()) {
+
+ return ConstantExpr::getSelect(
+ CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType),
+ ConstantExpr::getAddrSpaceCast(Src1, TargetType));
+ }
+ }
+
// Computes the operands of the new constant expression.
SmallVector<Constant *, 4> NewOperands;
for (unsigned Index = 0; Index < CE->getNumOperands(); ++Index) {
@@ -362,30 +493,29 @@ static Value *cloneConstantExprWithNewAddressSpace(
// Needs to specify the source type while constructing a getelementptr
// constant expression.
return CE->getWithOperands(
- NewOperands, TargetType, /*OnlyIfReduced=*/false,
- NewOperands[0]->getType()->getPointerElementType());
+ NewOperands, TargetType, /*OnlyIfReduced=*/false,
+ NewOperands[0]->getType()->getPointerElementType());
}
return CE->getWithOperands(NewOperands, TargetType);
}
// Returns a clone of the value `V`, with its operands replaced as specified in
-// ValueWithNewAddrSpace. This function is called on every generic address
+// ValueWithNewAddrSpace. This function is called on every flat address
// expression whose address space needs to be modified, in postorder.
//
// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
-static Value *
-cloneValueWithNewAddressSpace(Value *V, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) {
- // All values in Postorder are generic address expressions.
+Value *InferAddressSpaces::cloneValueWithNewAddressSpace(
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const {
+ // All values in Postorder are flat address expressions.
assert(isAddressExpression(*V) &&
- V->getType()->getPointerAddressSpace() ==
- AddressSpace::ADDRESS_SPACE_GENERIC);
+ V->getType()->getPointerAddressSpace() == FlatAddrSpace);
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
- I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
+ I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
if (Instruction *NewI = dyn_cast<Instruction>(NewV)) {
if (NewI->getParent() == nullptr) {
NewI->insertBefore(I);
@@ -396,63 +526,68 @@ cloneValueWithNewAddressSpace(Value *V, unsigned NewAddrSpace,
}
return cloneConstantExprWithNewAddressSpace(
- cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
+ cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
}
// Defines the join operation on the address space lattice (see the file header
// comments).
-static unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) {
- if (AS1 == AddressSpace::ADDRESS_SPACE_GENERIC ||
- AS2 == AddressSpace::ADDRESS_SPACE_GENERIC)
- return AddressSpace::ADDRESS_SPACE_GENERIC;
+unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1,
+ unsigned AS2) const {
+ if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace)
+ return FlatAddrSpace;
- if (AS1 == ADDRESS_SPACE_UNINITIALIZED)
+ if (AS1 == UninitializedAddressSpace)
return AS2;
- if (AS2 == ADDRESS_SPACE_UNINITIALIZED)
+ if (AS2 == UninitializedAddressSpace)
return AS1;
- // The join of two different specific address spaces is generic.
- return AS1 == AS2 ? AS1 : (unsigned)AddressSpace::ADDRESS_SPACE_GENERIC;
+ // The join of two different specific address spaces is flat.
+ return (AS1 == AS2) ? AS1 : FlatAddrSpace;
}
-bool NVPTXInferAddressSpaces::runOnFunction(Function &F) {
+bool InferAddressSpaces::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- // Collects all generic address expressions in postorder.
- std::vector<Value *> Postorder = collectGenericAddressExpressions(F);
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ FlatAddrSpace = TTI.getFlatAddressSpace();
+ if (FlatAddrSpace == UninitializedAddressSpace)
+ return false;
+
+ // Collects all flat address expressions in postorder.
+ std::vector<Value *> Postorder = collectFlatAddressExpressions(F);
// Runs a data-flow analysis to refine the address spaces of every expression
// in Postorder.
ValueToAddrSpaceMapTy InferredAddrSpace;
inferAddressSpaces(Postorder, &InferredAddrSpace);
- // Changes the address spaces of the generic address expressions who are
- // inferred to point to a specific address space.
+ // Changes the address spaces of the flat address expressions who are inferred
+ // to point to a specific address space.
return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);
}
-void NVPTXInferAddressSpaces::inferAddressSpaces(
+void InferAddressSpaces::inferAddressSpaces(
const std::vector<Value *> &Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) {
+ ValueToAddrSpaceMapTy *InferredAddrSpace) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
// Initially, all expressions are in the uninitialized address space.
for (Value *V : Postorder)
- (*InferredAddrSpace)[V] = ADDRESS_SPACE_UNINITIALIZED;
+ (*InferredAddrSpace)[V] = UninitializedAddressSpace;
while (!Worklist.empty()) {
- Value* V = Worklist.pop_back_val();
+ Value *V = Worklist.pop_back_val();
// Tries to update the address space of the stack top according to the
// address spaces of its operands.
- DEBUG(dbgs() << "Updating the address space of\n"
- << " " << *V << "\n");
+ DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n');
Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
if (!NewAS.hasValue())
continue;
// If any updates are made, grabs its users to the worklist because
// their address spaces can also be possibly updated.
- DEBUG(dbgs() << " to " << NewAS.getValue() << "\n");
+ DEBUG(dbgs() << " to " << NewAS.getValue() << '\n');
(*InferredAddrSpace)[V] = NewAS.getValue();
for (Value *User : V->users()) {
@@ -461,15 +596,15 @@ void NVPTXInferAddressSpaces::inferAddressSpaces(
continue;
auto Pos = InferredAddrSpace->find(User);
- // Our algorithm only updates the address spaces of generic address
+ // Our algorithm only updates the address spaces of flat address
// expressions, which are those in InferredAddrSpace.
if (Pos == InferredAddrSpace->end())
continue;
// Function updateAddressSpace moves the address space down a lattice
- // path. Therefore, nothing to do if User is already inferred as
- // generic (the bottom element in the lattice).
- if (Pos->second == AddressSpace::ADDRESS_SPACE_GENERIC)
+ // path. Therefore, nothing to do if User is already inferred as flat (the
+ // bottom element in the lattice).
+ if (Pos->second == FlatAddrSpace)
continue;
Worklist.insert(User);
@@ -477,35 +612,177 @@ void NVPTXInferAddressSpaces::inferAddressSpaces(
}
}
-Optional<unsigned> NVPTXInferAddressSpaces::updateAddressSpace(
- const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) {
+Optional<unsigned> InferAddressSpaces::updateAddressSpace(
+ const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
assert(InferredAddrSpace.count(&V));
// The new inferred address space equals the join of the address spaces
// of all its pointer operands.
- unsigned NewAS = ADDRESS_SPACE_UNINITIALIZED;
- for (Value *PtrOperand : getPointerOperands(V)) {
- unsigned OperandAS;
- if (InferredAddrSpace.count(PtrOperand))
- OperandAS = InferredAddrSpace.lookup(PtrOperand);
+ unsigned NewAS = UninitializedAddressSpace;
+
+ const Operator &Op = cast<Operator>(V);
+ if (Op.getOpcode() == Instruction::Select) {
+ Value *Src0 = Op.getOperand(1);
+ Value *Src1 = Op.getOperand(2);
+
+ auto I = InferredAddrSpace.find(Src0);
+ unsigned Src0AS = (I != InferredAddrSpace.end()) ?
+ I->second : Src0->getType()->getPointerAddressSpace();
+
+ auto J = InferredAddrSpace.find(Src1);
+ unsigned Src1AS = (J != InferredAddrSpace.end()) ?
+ J->second : Src1->getType()->getPointerAddressSpace();
+
+ auto *C0 = dyn_cast<Constant>(Src0);
+ auto *C1 = dyn_cast<Constant>(Src1);
+
+ // If one of the inputs is a constant, we may be able to do a constant
+ // addrspacecast of it. Defer inferring the address space until the input
+ // address space is known.
+ if ((C1 && Src0AS == UninitializedAddressSpace) ||
+ (C0 && Src1AS == UninitializedAddressSpace))
+ return None;
+
+ if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))
+ NewAS = Src1AS;
+ else if (C1 && isSafeToCastConstAddrSpace(C1, Src0AS))
+ NewAS = Src0AS;
else
- OperandAS = PtrOperand->getType()->getPointerAddressSpace();
- NewAS = joinAddressSpaces(NewAS, OperandAS);
- // join(generic, *) = generic. So we can break if NewAS is already generic.
- if (NewAS == AddressSpace::ADDRESS_SPACE_GENERIC)
- break;
+ NewAS = joinAddressSpaces(Src0AS, Src1AS);
+ } else {
+ for (Value *PtrOperand : getPointerOperands(V)) {
+ auto I = InferredAddrSpace.find(PtrOperand);
+ unsigned OperandAS = I != InferredAddrSpace.end() ?
+ I->second : PtrOperand->getType()->getPointerAddressSpace();
+
+ // join(flat, *) = flat. So we can break if NewAS is already flat.
+ NewAS = joinAddressSpaces(NewAS, OperandAS);
+ if (NewAS == FlatAddrSpace)
+ break;
+ }
}
unsigned OldAS = InferredAddrSpace.lookup(&V);
- assert(OldAS != AddressSpace::ADDRESS_SPACE_GENERIC);
+ assert(OldAS != FlatAddrSpace);
if (OldAS == NewAS)
return None;
return NewAS;
}
-bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces(
- const std::vector<Value *> &Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) {
+/// \p returns true if \p U is the pointer operand of a memory instruction with
+/// a single pointer operand that can have its address space changed by simply
+/// mutating the use to a new value.
+static bool isSimplePointerUseValidToReplace(Use &U) {
+ User *Inst = U.getUser();
+ unsigned OpNo = U.getOperandNo();
+
+ if (auto *LI = dyn_cast<LoadInst>(Inst))
+ return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile();
+
+ if (auto *SI = dyn_cast<StoreInst>(Inst))
+ return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile();
+
+ if (auto *RMW = dyn_cast<AtomicRMWInst>(Inst))
+ return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile();
+
+ if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() &&
+ !CmpX->isVolatile();
+ }
+
+ return false;
+}
+
+/// Update memory intrinsic uses that require more complex processing than
+/// simple memory instructions. Thse require re-mangling and may have multiple
+/// pointer operands.
+static bool handleMemIntrinsicPtrUse(MemIntrinsic *MI, Value *OldV,
+ Value *NewV) {
+ IRBuilder<> B(MI);
+ MDNode *TBAA = MI->getMetadata(LLVMContext::MD_tbaa);
+ MDNode *ScopeMD = MI->getMetadata(LLVMContext::MD_alias_scope);
+ MDNode *NoAliasMD = MI->getMetadata(LLVMContext::MD_noalias);
+
+ if (auto *MSI = dyn_cast<MemSetInst>(MI)) {
+ B.CreateMemSet(NewV, MSI->getValue(),
+ MSI->getLength(), MSI->getAlignment(),
+ false, // isVolatile
+ TBAA, ScopeMD, NoAliasMD);
+ } else if (auto *MTI = dyn_cast<MemTransferInst>(MI)) {
+ Value *Src = MTI->getRawSource();
+ Value *Dest = MTI->getRawDest();
+
+ // Be careful in case this is a self-to-self copy.
+ if (Src == OldV)
+ Src = NewV;
+
+ if (Dest == OldV)
+ Dest = NewV;
+
+ if (isa<MemCpyInst>(MTI)) {
+ MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct);
+ B.CreateMemCpy(Dest, Src, MTI->getLength(),
+ MTI->getAlignment(),
+ false, // isVolatile
+ TBAA, TBAAStruct, ScopeMD, NoAliasMD);
+ } else {
+ assert(isa<MemMoveInst>(MTI));
+ B.CreateMemMove(Dest, Src, MTI->getLength(),
+ MTI->getAlignment(),
+ false, // isVolatile
+ TBAA, ScopeMD, NoAliasMD);
+ }
+ } else
+ llvm_unreachable("unhandled MemIntrinsic");
+
+ MI->eraseFromParent();
+ return true;
+}
+
+// \p returns true if it is OK to change the address space of constant \p C with
+// a ConstantExpr addrspacecast.
+bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const {
+ assert(NewAS != UninitializedAddressSpace);
+
+ unsigned SrcAS = C->getType()->getPointerAddressSpace();
+ if (SrcAS == NewAS || isa<UndefValue>(C))
+ return true;
+
+ // Prevent illegal casts between different non-flat address spaces.
+ if (SrcAS != FlatAddrSpace && NewAS != FlatAddrSpace)
+ return false;
+
+ if (isa<ConstantPointerNull>(C))
+ return true;
+
+ if (auto *Op = dyn_cast<Operator>(C)) {
+ // If we already have a constant addrspacecast, it should be safe to cast it
+ // off.
+ if (Op->getOpcode() == Instruction::AddrSpaceCast)
+ return isSafeToCastConstAddrSpace(cast<Constant>(Op->getOperand(0)), NewAS);
+
+ if (Op->getOpcode() == Instruction::IntToPtr &&
+ Op->getType()->getPointerAddressSpace() == FlatAddrSpace)
+ return true;
+ }
+
+ return false;
+}
+
+static Value::use_iterator skipToNextUser(Value::use_iterator I,
+ Value::use_iterator End) {
+ User *CurUser = I->getUser();
+ ++I;
+
+ while (I != End && I->getUser() == CurUser)
+ ++I;
+
+ return I;
+}
+
+bool InferAddressSpaces::rewriteWithNewAddressSpaces(
+ const std::vector<Value *> &Postorder,
+ const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
// operands are converted, the clone is naturally in the new address space by
@@ -516,7 +793,7 @@ bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces(
unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
ValueWithNewAddrSpace[V] = cloneValueWithNewAddressSpace(
- V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
+ V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
}
}
@@ -524,7 +801,7 @@ bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces(
return false;
// Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace.
- for (const Use* UndefUse : UndefUsesToFix) {
+ for (const Use *UndefUse : UndefUsesToFix) {
User *V = UndefUse->getUser();
User *NewV = cast<User>(ValueWithNewAddrSpace.lookup(V));
unsigned OperandNo = UndefUse->getOperandNo();
@@ -538,39 +815,82 @@ bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces(
if (NewV == nullptr)
continue;
- SmallVector<Use *, 4> Uses;
- for (Use &U : V->uses())
- Uses.push_back(&U);
- DEBUG(dbgs() << "Replacing the uses of " << *V << "\n to\n " << *NewV
- << "\n");
- for (Use *U : Uses) {
- if (isa<LoadInst>(U->getUser()) ||
- (isa<StoreInst>(U->getUser()) && U->getOperandNo() == 1)) {
- // If V is used as the pointer operand of a load/store, sets the pointer
- // operand to NewV. This replacement does not change the element type,
- // so the resultant load/store is still valid.
- U->set(NewV);
- } else if (isa<Instruction>(U->getUser())) {
- // Otherwise, replaces the use with generic(NewV).
- // TODO: Some optimization opportunities are missed. For example, in
- // %0 = icmp eq float* %p, %q
- // if both p and q are inferred to be shared, we can rewrite %0 as
- // %0 = icmp eq float addrspace(3)* %new_p, %new_q
- // instead of currently
- // %generic_p = addrspacecast float addrspace(3)* %new_p to float*
- // %generic_q = addrspacecast float addrspace(3)* %new_q to float*
- // %0 = icmp eq float* %generic_p, %generic_q
+ DEBUG(dbgs() << "Replacing the uses of " << *V
+ << "\n with\n " << *NewV << '\n');
+
+ Value::use_iterator I, E, Next;
+ for (I = V->use_begin(), E = V->use_end(); I != E; ) {
+ Use &U = *I;
+
+ // Some users may see the same pointer operand in multiple operands. Skip
+ // to the next instruction.
+ I = skipToNextUser(I, E);
+
+ if (isSimplePointerUseValidToReplace(U)) {
+ // If V is used as the pointer operand of a compatible memory operation,
+ // sets the pointer operand to NewV. This replacement does not change
+ // the element type, so the resultant load/store is still valid.
+ U.set(NewV);
+ continue;
+ }
+
+ User *CurUser = U.getUser();
+ // Handle more complex cases like intrinsic that need to be remangled.
+ if (auto *MI = dyn_cast<MemIntrinsic>(CurUser)) {
+ if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV))
+ continue;
+ }
+
+ if (auto *II = dyn_cast<IntrinsicInst>(CurUser)) {
+ if (rewriteIntrinsicOperands(II, V, NewV))
+ continue;
+ }
+
+ if (isa<Instruction>(CurUser)) {
+ if (ICmpInst *Cmp = dyn_cast<ICmpInst>(CurUser)) {
+ // If we can infer that both pointers are in the same addrspace,
+ // transform e.g.
+ // %cmp = icmp eq float* %p, %q
+ // into
+ // %cmp = icmp eq float addrspace(3)* %new_p, %new_q
+
+ unsigned NewAS = NewV->getType()->getPointerAddressSpace();
+ int SrcIdx = U.getOperandNo();
+ int OtherIdx = (SrcIdx == 0) ? 1 : 0;
+ Value *OtherSrc = Cmp->getOperand(OtherIdx);
+
+ if (Value *OtherNewV = ValueWithNewAddrSpace.lookup(OtherSrc)) {
+ if (OtherNewV->getType()->getPointerAddressSpace() == NewAS) {
+ Cmp->setOperand(OtherIdx, OtherNewV);
+ Cmp->setOperand(SrcIdx, NewV);
+ continue;
+ }
+ }
+
+ // Even if the type mismatches, we can cast the constant.
+ if (auto *KOtherSrc = dyn_cast<Constant>(OtherSrc)) {
+ if (isSafeToCastConstAddrSpace(KOtherSrc, NewAS)) {
+ Cmp->setOperand(SrcIdx, NewV);
+ Cmp->setOperand(OtherIdx,
+ ConstantExpr::getAddrSpaceCast(KOtherSrc, NewV->getType()));
+ continue;
+ }
+ }
+ }
+
+ // Otherwise, replaces the use with flat(NewV).
if (Instruction *I = dyn_cast<Instruction>(V)) {
BasicBlock::iterator InsertPos = std::next(I->getIterator());
while (isa<PHINode>(InsertPos))
++InsertPos;
- U->set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
+ U.set(new AddrSpaceCastInst(NewV, V->getType(), "", &*InsertPos));
} else {
- U->set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
- V->getType()));
+ U.set(ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV),
+ V->getType()));
}
}
}
+
if (V->use_empty())
RecursivelyDeleteTriviallyDeadInstructions(V);
}
@@ -578,6 +898,6 @@ bool NVPTXInferAddressSpaces::rewriteWithNewAddressSpaces(
return true;
}
-FunctionPass *llvm::createNVPTXInferAddressSpacesPass() {
- return new NVPTXInferAddressSpaces();
+FunctionPass *llvm::createInferAddressSpacesPass() {
+ return new InferAddressSpaces();
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 1870c3deb4f3..08eb95a1a3d3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
@@ -30,11 +31,13 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -89,6 +92,7 @@ namespace {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<LazyValueInfoWrapperPass>();
AU.addPreserved<LazyValueInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
@@ -104,6 +108,7 @@ INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
"Jump Threading", false, false)
INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(JumpThreading, "jump-threading",
"Jump Threading", false, false)
@@ -121,6 +126,7 @@ bool JumpThreading::runOnFunction(Function &F) {
return false;
auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
+ auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = F.getEntryCount().hasValue();
@@ -129,7 +135,8 @@ bool JumpThreading::runOnFunction(Function &F) {
BPI.reset(new BranchProbabilityInfo(F, LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- return Impl.runImpl(F, TLI, LVI, HasProfileData, std::move(BFI),
+
+ return Impl.runImpl(F, TLI, LVI, AA, HasProfileData, std::move(BFI),
std::move(BPI));
}
@@ -138,6 +145,8 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData = F.getEntryCount().hasValue();
@@ -146,12 +155,9 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BPI.reset(new BranchProbabilityInfo(F, LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed =
- runImpl(F, &TLI, &LVI, HasProfileData, std::move(BFI), std::move(BPI));
- // FIXME: We need to invalidate LVI to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<LazyValueAnalysis>(F);
+ bool Changed = runImpl(F, &TLI, &LVI, &AA, HasProfileData, std::move(BFI),
+ std::move(BPI));
if (!Changed)
return PreservedAnalyses::all();
@@ -161,18 +167,23 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
}
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
- LazyValueInfo *LVI_, bool HasProfileData_,
+ LazyValueInfo *LVI_, AliasAnalysis *AA_,
+ bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
LVI = LVI_;
+ AA = AA_;
BFI.reset();
BPI.reset();
// When profile data is available, we need to update edge weights after
// successful jump threading, which requires both BPI and BFI being available.
HasProfileData = HasProfileData_;
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ HasGuards = GuardDecl && !GuardDecl->use_empty();
if (HasProfileData) {
BPI = std::move(BPI_);
BFI = std::move(BFI_);
@@ -226,26 +237,13 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
BB != &BB->getParent()->getEntryBlock() &&
// If the terminator is the only non-phi instruction, try to nuke it.
BB->getFirstNonPHIOrDbg()->isTerminator() && !LoopHeaders.count(BB)) {
- // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
- // block, we have to make sure it isn't in the LoopHeaders set. We
- // reinsert afterward if needed.
- bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
- BasicBlock *Succ = BI->getSuccessor(0);
-
// FIXME: It is always conservatively correct to drop the info
// for a block even if it doesn't get erased. This isn't totally
// awesome, but it allows us to use AssertingVH to prevent nasty
// dangling pointer issues within LazyValueInfo.
LVI->eraseBlock(BB);
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
Changed = true;
- // If we deleted BB and BB was the header of a loop, then the
- // successor is now the header of the loop.
- BB = Succ;
- }
-
- if (ErasedFromLoopHeaders)
- LoopHeaders.insert(BB);
}
}
EverChanged |= Changed;
@@ -255,10 +253,13 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
return EverChanged;
}
-/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it. Stop scanning the block when passing the threshold.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+/// Return the cost of duplicating a piece of this block from first non-phi
+/// and before StopAt instruction to thread across it. Stop scanning the block
+/// when exceeding the threshold. If duplication is impossible, returns ~0U.
+static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
+ Instruction *StopAt,
unsigned Threshold) {
+ assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I(BB->getFirstNonPHI());
@@ -266,15 +267,17 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// branch, so they shouldn't count against the duplication cost.
unsigned Bonus = 0;
- const TerminatorInst *BBTerm = BB->getTerminator();
- // Threading through a switch statement is particularly profitable. If this
- // block ends in a switch, decrease its cost to make it more likely to happen.
- if (isa<SwitchInst>(BBTerm))
- Bonus = 6;
-
- // The same holds for indirect branches, but slightly more so.
- if (isa<IndirectBrInst>(BBTerm))
- Bonus = 8;
+ if (BB->getTerminator() == StopAt) {
+ // Threading through a switch statement is particularly profitable. If this
+ // block ends in a switch, decrease its cost to make it more likely to
+ // happen.
+ if (isa<SwitchInst>(StopAt))
+ Bonus = 6;
+
+ // The same holds for indirect branches, but slightly more so.
+ if (isa<IndirectBrInst>(StopAt))
+ Bonus = 8;
+ }
// Bump the threshold up so the early exit from the loop doesn't skip the
// terminator-based Size adjustment at the end.
@@ -283,7 +286,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
- for (; !isa<TerminatorInst>(I); ++I) {
+ for (; &*I != StopAt; ++I) {
// Stop scanning the block if we've reached the threshold.
if (Size > Threshold)
@@ -729,6 +732,10 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (TryToUnfoldSelectInCurrBB(BB))
return true;
+ // Look if we can propagate guards to predecessors.
+ if (HasGuards && ProcessGuards(BB))
+ return true;
+
// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;
@@ -804,7 +811,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
return false;
}
-
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
// If we're branching on a conditional, LVI might be able to determine
// it's value at the branch instruction. We only handle comparisons
@@ -812,7 +818,12 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
// TODO: This should be extended to handle switches as well.
BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
- if (CondBr && CondConst && CondBr->isConditional()) {
+ if (CondBr && CondConst) {
+ // We should have returned as soon as we turn a conditional branch to
+ // unconditional. Because its no longer interesting as far as jump
+ // threading is concerned.
+ assert(CondBr->isConditional() && "Threading on unconditional terminator");
+
LazyValueInfo::Tristate Ret =
LVI->getPredicateAt(CondCmp->getPredicate(), CondCmp->getOperand(0),
CondConst, CondBr);
@@ -835,10 +846,12 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
}
return true;
}
- }
- if (CondBr && CondConst && TryToUnfoldSelect(CondCmp, BB))
- return true;
+ // We did not manage to simplify this branch, try to see whether
+ // CondCmp depends on a known phi-select pattern.
+ if (TryToUnfoldSelect(CondCmp, BB))
+ return true;
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -857,7 +870,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (SimplifyPartiallyRedundantLoad(LI))
return true;
-
// Handle a variety of cases where we are branching on something derived from
// a PHI node in the current block. If we can prove that any predecessors
// compute a predictable value based on a PHI node, thread those predecessors.
@@ -871,7 +883,6 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) {
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnPHI(PN);
-
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
@@ -920,6 +931,14 @@ bool JumpThreadingPass::ProcessImpliedCondition(BasicBlock *BB) {
return false;
}
+/// Return true if Op is an instruction defined in the given block.
+static bool isOpDefinedInBlock(Value *Op, BasicBlock *BB) {
+ if (Instruction *OpInst = dyn_cast<Instruction>(Op))
+ if (OpInst->getParent() == BB)
+ return true;
+ return false;
+}
+
/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
/// load instruction, eliminate it by replacing it with a PHI node. This is an
/// important optimization that encourages jump threading, and needs to be run
@@ -942,18 +961,17 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
Value *LoadedPtr = LI->getOperand(0);
- // If the loaded operand is defined in the LoadBB, it can't be available.
- // TODO: Could do simple PHI translation, that would be fun :)
- if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
- if (PtrOp->getParent() == LoadBB)
- return false;
+ // If the loaded operand is defined in the LoadBB and its not a phi,
+ // it can't be available in predecessors.
+ if (isOpDefinedInBlock(LoadedPtr, LoadBB) && !isa<PHINode>(LoadedPtr))
+ return false;
// Scan a few instructions up from the load, to see if it is obviously live at
// the entry to its block.
BasicBlock::iterator BBIt(LI);
bool IsLoadCSE;
- if (Value *AvailableVal =
- FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan, nullptr, &IsLoadCSE)) {
+ if (Value *AvailableVal = FindAvailableLoadedValue(
+ LI, LoadBB, BBIt, DefMaxInstsToScan, AA, &IsLoadCSE)) {
// If the value of the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
@@ -997,12 +1015,34 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (!PredsScanned.insert(PredBB).second)
continue;
- // Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- Value *PredAvailable = FindAvailableLoadedValue(LI, PredBB, BBIt,
- DefMaxInstsToScan,
- nullptr,
- &IsLoadCSE);
+ unsigned NumScanedInst = 0;
+ Value *PredAvailable = nullptr;
+ // NOTE: We don't CSE load that is volatile or anything stronger than
+ // unordered, that should have been checked when we entered the function.
+ assert(LI->isUnordered() && "Attempting to CSE volatile or atomic loads");
+ // If this is a load on a phi pointer, phi-translate it and search
+ // for available load/store to the pointer in predecessors.
+ Value *Ptr = LoadedPtr->DoPHITranslation(LoadBB, PredBB);
+ PredAvailable = FindAvailablePtrLoadStore(
+ Ptr, LI->getType(), LI->isAtomic(), PredBB, BBIt, DefMaxInstsToScan,
+ AA, &IsLoadCSE, &NumScanedInst);
+
+ // If PredBB has a single predecessor, continue scanning through the
+ // single precessor.
+ BasicBlock *SinglePredBB = PredBB;
+ while (!PredAvailable && SinglePredBB && BBIt == SinglePredBB->begin() &&
+ NumScanedInst < DefMaxInstsToScan) {
+ SinglePredBB = SinglePredBB->getSinglePredecessor();
+ if (SinglePredBB) {
+ BBIt = SinglePredBB->end();
+ PredAvailable = FindAvailablePtrLoadStore(
+ Ptr, LI->getType(), LI->isAtomic(), SinglePredBB, BBIt,
+ (DefMaxInstsToScan - NumScanedInst), AA, &IsLoadCSE,
+ &NumScanedInst);
+ }
+ }
+
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
@@ -1062,10 +1102,10 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- LoadInst *NewVal =
- new LoadInst(LoadedPtr, LI->getName() + ".pr", false,
- LI->getAlignment(), LI->getOrdering(), LI->getSynchScope(),
- UnavailablePred->getTerminator());
+ LoadInst *NewVal = new LoadInst(
+ LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred),
+ LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(),
+ LI->getSynchScope(), UnavailablePred->getTerminator());
NewVal->setDebugLoc(LI->getDebugLoc());
if (AATags)
NewVal->setAAMetadata(AATags);
@@ -1229,7 +1269,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
+ DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();
} else {
assert(isa<IndirectBrInst>(BB->getTerminator())
&& "Unexpected terminator");
@@ -1468,7 +1508,8 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
return false;
}
- unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
+ unsigned JumpThreadCost =
+ getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
if (JumpThreadCost > BBDupThreshold) {
DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -1756,7 +1797,8 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
return false;
}
- unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, BBDupThreshold);
+ unsigned DuplicationCost =
+ getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
if (DuplicationCost > BBDupThreshold) {
DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
@@ -1888,10 +1930,10 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(
/// TryToUnfoldSelect - Look for blocks of the form
/// bb1:
/// %a = select
-/// br bb
+/// br bb2
///
/// bb2:
-/// %p = phi [%a, %bb] ...
+/// %p = phi [%a, %bb1] ...
/// %c = icmp %p
/// br i1 %c
///
@@ -2021,3 +2063,130 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
return false;
}
+
+/// Try to propagate a guard from the current BB into one of its predecessors
+/// in case if another branch of execution implies that the condition of this
+/// guard is always true. Currently we only process the simplest case that
+/// looks like:
+///
+/// Start:
+/// %cond = ...
+/// br i1 %cond, label %T1, label %F1
+/// T1:
+/// br label %Merge
+/// F1:
+/// br label %Merge
+/// Merge:
+/// %condGuard = ...
+/// call void(i1, ...) @llvm.experimental.guard( i1 %condGuard )[ "deopt"() ]
+///
+/// And cond either implies condGuard or !condGuard. In this case all the
+/// instructions before the guard can be duplicated in both branches, and the
+/// guard is then threaded to one of them.
+bool JumpThreadingPass::ProcessGuards(BasicBlock *BB) {
+ using namespace PatternMatch;
+ // We only want to deal with two predecessors.
+ BasicBlock *Pred1, *Pred2;
+ auto PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE)
+ return false;
+ Pred1 = *PI++;
+ if (PI == PE)
+ return false;
+ Pred2 = *PI++;
+ if (PI != PE)
+ return false;
+ if (Pred1 == Pred2)
+ return false;
+
+ // Try to thread one of the guards of the block.
+ // TODO: Look up deeper than to immediate predecessor?
+ auto *Parent = Pred1->getSinglePredecessor();
+ if (!Parent || Parent != Pred2->getSinglePredecessor())
+ return false;
+
+ if (auto *BI = dyn_cast<BranchInst>(Parent->getTerminator()))
+ for (auto &I : *BB)
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>()))
+ if (ThreadGuard(BB, cast<IntrinsicInst>(&I), BI))
+ return true;
+
+ return false;
+}
+
+/// Try to propagate the guard from BB which is the lower block of a diamond
+/// to one of its branches, in case if diamond's condition implies guard's
+/// condition.
+bool JumpThreadingPass::ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard,
+ BranchInst *BI) {
+ assert(BI->getNumSuccessors() == 2 && "Wrong number of successors?");
+ assert(BI->isConditional() && "Unconditional branch has 2 successors?");
+ Value *GuardCond = Guard->getArgOperand(0);
+ Value *BranchCond = BI->getCondition();
+ BasicBlock *TrueDest = BI->getSuccessor(0);
+ BasicBlock *FalseDest = BI->getSuccessor(1);
+
+ auto &DL = BB->getModule()->getDataLayout();
+ bool TrueDestIsSafe = false;
+ bool FalseDestIsSafe = false;
+
+ // True dest is safe if BranchCond => GuardCond.
+ auto Impl = isImpliedCondition(BranchCond, GuardCond, DL);
+ if (Impl && *Impl)
+ TrueDestIsSafe = true;
+ else {
+ // False dest is safe if !BranchCond => GuardCond.
+ Impl =
+ isImpliedCondition(BranchCond, GuardCond, DL, /* InvertAPred */ true);
+ if (Impl && *Impl)
+ FalseDestIsSafe = true;
+ }
+
+ if (!TrueDestIsSafe && !FalseDestIsSafe)
+ return false;
+
+ BasicBlock *UnguardedBlock = TrueDestIsSafe ? TrueDest : FalseDest;
+ BasicBlock *GuardedBlock = FalseDestIsSafe ? TrueDest : FalseDest;
+
+ ValueToValueMapTy UnguardedMapping, GuardedMapping;
+ Instruction *AfterGuard = Guard->getNextNode();
+ unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
+ if (Cost > BBDupThreshold)
+ return false;
+ // Duplicate all instructions before the guard and the guard itself to the
+ // branch where implication is not proved.
+ GuardedBlock = DuplicateInstructionsInSplitBetween(
+ BB, GuardedBlock, AfterGuard, GuardedMapping);
+ assert(GuardedBlock && "Could not create the guarded block?");
+ // Duplicate all instructions before the guard in the unguarded branch.
+ // Since we have successfully duplicated the guarded block and this block
+ // has fewer instructions, we expect it to succeed.
+ UnguardedBlock = DuplicateInstructionsInSplitBetween(BB, UnguardedBlock,
+ Guard, UnguardedMapping);
+ assert(UnguardedBlock && "Could not create the unguarded block?");
+ DEBUG(dbgs() << "Moved guard " << *Guard << " to block "
+ << GuardedBlock->getName() << "\n");
+
+ // Some instructions before the guard may still have uses. For them, we need
+ // to create Phi nodes merging their copies in both guarded and unguarded
+ // branches. Those instructions that have no uses can be just removed.
+ SmallVector<Instruction *, 4> ToRemove;
+ for (auto BI = BB->begin(); &*BI != AfterGuard; ++BI)
+ if (!isa<PHINode>(&*BI))
+ ToRemove.push_back(&*BI);
+
+ Instruction *InsertionPoint = &*BB->getFirstInsertionPt();
+ assert(InsertionPoint && "Empty block?");
+ // Substitute with Phis & remove.
+ for (auto *Inst : reverse(ToRemove)) {
+ if (!Inst->use_empty()) {
+ PHINode *NewPN = PHINode::Create(Inst->getType(), 2);
+ NewPN->addIncoming(UnguardedMapping[Inst], UnguardedBlock);
+ NewPN->addIncoming(GuardedMapping[Inst], GuardedBlock);
+ NewPN->insertBefore(InsertionPoint);
+ Inst->replaceAllUsesWith(NewPN);
+ }
+ Inst->eraseFromParent();
+ }
+ return true;
+}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index f51d11c04cb2..340c81fed0fd 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -77,10 +77,16 @@ STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
STATISTIC(NumPromoted, "Number of memory locations promoted to registers");
+/// Memory promotion is enabled by default.
static cl::opt<bool>
- DisablePromotion("disable-licm-promotion", cl::Hidden,
+ DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable memory promotion in LICM pass"));
+static cl::opt<uint32_t> MaxNumUsesTraversed(
+ "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
+ cl::desc("Max num uses visited for identifying load "
+ "invariance in loop using invariant start (default = 8)"));
+
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);
@@ -201,9 +207,9 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true))
return PreservedAnalyses::all();
- // FIXME: There is no setPreservesCFG in the new PM. When that becomes
- // available, it should be used here.
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
char LegacyLICMPass::ID = 0;
@@ -425,6 +431,29 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
continue;
}
+ // Attempt to remove floating point division out of the loop by converting
+ // it to a reciprocal multiplication.
+ if (I.getOpcode() == Instruction::FDiv &&
+ CurLoop->isLoopInvariant(I.getOperand(1)) &&
+ I.hasAllowReciprocal()) {
+ auto Divisor = I.getOperand(1);
+ auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0);
+ auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor);
+ ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags());
+ ReciprocalDivisor->insertBefore(&I);
+
+ auto Product = BinaryOperator::CreateFMul(I.getOperand(0),
+ ReciprocalDivisor);
+ Product->setFastMathFlags(I.getFastMathFlags());
+ Product->insertAfter(&I);
+ I.replaceAllUsesWith(Product);
+ I.eraseFromParent();
+
+ hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
+ Changed = true;
+ continue;
+ }
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
@@ -461,7 +490,10 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
SafetyInfo->MayThrow = SafetyInfo->HeaderMayThrow;
// Iterate over loop instructions and compute safety info.
- for (Loop::block_iterator BB = CurLoop->block_begin(),
+ // Skip header as it has been computed and stored in HeaderMayThrow.
+ // The first block in loopinfo.Blocks is guaranteed to be the header.
+ assert(Header == *CurLoop->getBlocks().begin() && "First block must be header");
+ for (Loop::block_iterator BB = std::next(CurLoop->block_begin()),
BBE = CurLoop->block_end();
(BB != BBE) && !SafetyInfo->MayThrow; ++BB)
for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
@@ -477,6 +509,59 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
SafetyInfo->BlockColors = colorEHFunclets(*Fn);
}
+// Return true if LI is invariant within scope of the loop. LI is invariant if
+// CurLoop is dominated by an invariant.start representing the same memory location
+// and size as the memory location LI loads from, and also the invariant.start
+// has no uses.
+static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
+ Loop *CurLoop) {
+ Value *Addr = LI->getOperand(0);
+ const DataLayout &DL = LI->getModule()->getDataLayout();
+ const uint32_t LocSizeInBits = DL.getTypeSizeInBits(
+ cast<PointerType>(Addr->getType())->getElementType());
+
+ // if the type is i8 addrspace(x)*, we know this is the type of
+ // llvm.invariant.start operand
+ auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()),
+ LI->getPointerAddressSpace());
+ unsigned BitcastsVisited = 0;
+ // Look through bitcasts until we reach the i8* type (this is invariant.start
+ // operand type).
+ while (Addr->getType() != PtrInt8Ty) {
+ auto *BC = dyn_cast<BitCastInst>(Addr);
+ // Avoid traversing high number of bitcast uses.
+ if (++BitcastsVisited > MaxNumUsesTraversed || !BC)
+ return false;
+ Addr = BC->getOperand(0);
+ }
+
+ unsigned UsesVisited = 0;
+ // Traverse all uses of the load operand value, to see if invariant.start is
+ // one of the uses, and whether it dominates the load instruction.
+ for (auto *U : Addr->users()) {
+ // Avoid traversing for Load operand with high number of users.
+ if (++UsesVisited > MaxNumUsesTraversed)
+ return false;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+ // If there are escaping uses of invariant.start instruction, the load maybe
+ // non-invariant.
+ if (!II || II->getIntrinsicID() != Intrinsic::invariant_start ||
+ II->hasNUsesOrMore(1))
+ continue;
+ unsigned InvariantSizeInBits =
+ cast<ConstantInt>(II->getArgOperand(0))->getSExtValue() * 8;
+ // Confirm the invariant.start location size contains the load operand size
+ // in bits. Also, the invariant.start should dominate the load, and we
+ // should not hoist the load out of a loop that contains this dominating
+ // invariant.start.
+ if (LocSizeInBits <= InvariantSizeInBits &&
+ DT->properlyDominates(II->getParent(), CurLoop->getHeader()))
+ return true;
+ }
+
+ return false;
+}
+
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, AliasSetTracker *CurAST,
LoopSafetyInfo *SafetyInfo,
@@ -493,6 +578,10 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (LI->getMetadata(LLVMContext::MD_invariant_load))
return true;
+ // This checks for an invariant.start dominating the load.
+ if (isLoadInvariantInLoop(LI, DT, CurLoop))
+ return true;
+
// Don't hoist loads which have may-aliased stores in loop.
uint64_t Size = 0;
if (LI->getType()->isSized())
@@ -782,7 +871,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
<< "\n");
ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I)
- << "hosting " << ore::NV("Inst", &I));
+ << "hoisting " << ore::NV("Inst", &I));
// Metadata can be dependent on conditions we are hoisting above.
// Conservatively strip all metadata on the instruction unless we were
@@ -852,6 +941,7 @@ class LoopPromoter : public LoadAndStorePromoter {
LoopInfo &LI;
DebugLoc DL;
int Alignment;
+ bool UnorderedAtomic;
AAMDNodes AATags;
Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
@@ -875,10 +965,11 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
- const AAMDNodes &AATags)
+ bool UnorderedAtomic, const AAMDNodes &AATags)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
- LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {}
+ LI(li), DL(std::move(dl)), Alignment(alignment),
+ UnorderedAtomic(UnorderedAtomic),AATags(AATags) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -902,6 +993,8 @@ public:
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
+ if (UnorderedAtomic)
+ NewSI->setOrdering(AtomicOrdering::Unordered);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
@@ -992,18 +1085,41 @@ bool llvm::promoteLoopAccessesToScalars(
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
+ // Keep track of which types of access we see
+ bool SawUnorderedAtomic = false;
+ bool SawNotAtomic = false;
AAMDNodes AATags;
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
+ // Do we know this object does not escape ?
+ bool IsKnownNonEscapingObject = false;
if (SafetyInfo->MayThrow) {
// If a loop can throw, we have to insert a store along each unwind edge.
// That said, we can't actually make the unwind edge explicit. Therefore,
// we have to prove that the store is dead along the unwind edge.
//
- // Currently, this code just special-cases alloca instructions.
- if (!isa<AllocaInst>(GetUnderlyingObject(SomePtr, MDL)))
- return false;
+ // If the underlying object is not an alloca, nor a pointer that does not
+ // escape, then we can not effectively prove that the store is dead along
+ // the unwind edge. i.e. the caller of this function could have ways to
+ // access the pointed object.
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ // If this is a base pointer we do not understand, simply bail.
+ // We only handle alloca and return value from alloc-like fn right now.
+ if (!isa<AllocaInst>(Object)) {
+ if (!isAllocLikeFn(Object, TLI))
+ return false;
+ // If this is an alloc like fn. There are more constraints we need to verify.
+ // More specifically, we must make sure that the pointer can not escape.
+ //
+ // NOTE: PointerMayBeCaptured is not enough as the pointer may have escaped
+ // even though its not captured by the enclosing function. Standard allocation
+ // functions like malloc, calloc, and operator new return values which can
+ // be assumed not to have previously escaped.
+ if (PointerMayBeCaptured(Object, true, true))
+ return false;
+ IsKnownNonEscapingObject = true;
+ }
}
// Check that all of the pointers in the alias set have the same type. We
@@ -1029,8 +1145,11 @@ bool llvm::promoteLoopAccessesToScalars(
// it.
if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
assert(!Load->isVolatile() && "AST broken");
- if (!Load->isSimple())
+ if (!Load->isUnordered())
return false;
+
+ SawUnorderedAtomic |= Load->isAtomic();
+ SawNotAtomic |= !Load->isAtomic();
if (!DereferenceableInPH)
DereferenceableInPH = isSafeToExecuteUnconditionally(
@@ -1041,9 +1160,12 @@ bool llvm::promoteLoopAccessesToScalars(
if (UI->getOperand(1) != ASIV)
continue;
assert(!Store->isVolatile() && "AST broken");
- if (!Store->isSimple())
+ if (!Store->isUnordered())
return false;
+ SawUnorderedAtomic |= Store->isAtomic();
+ SawNotAtomic |= !Store->isAtomic();
+
// If the store is guaranteed to execute, both properties are satisfied.
// We may want to check if a store is guaranteed to execute even if we
// already know that promotion is safe, since it may have higher
@@ -1096,6 +1218,12 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
+ // If we found both an unordered atomic instruction and a non-atomic memory
+ // access, bail. We can't blindly promote non-atomic to atomic since we
+ // might not be able to lower the result. We can't downgrade since that
+ // would violate memory model. Also, align 0 is an error for atomics.
+ if (SawUnorderedAtomic && SawNotAtomic)
+ return false;
// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
@@ -1106,10 +1234,15 @@ bool llvm::promoteLoopAccessesToScalars(
// stores along paths which originally didn't have them without violating the
// memory model.
if (!SafeToInsertStore) {
- Value *Object = GetUnderlyingObject(SomePtr, MDL);
- SafeToInsertStore =
- (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
+ // If this is a known non-escaping object, it is safe to insert the stores.
+ if (IsKnownNonEscapingObject)
+ SafeToInsertStore = true;
+ else {
+ Value *Object = GetUnderlyingObject(SomePtr, MDL);
+ SafeToInsertStore =
+ (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
!PointerMayBeCaptured(Object, true, true);
+ }
}
// If we've still failed to prove we can sink the store, give up.
@@ -1134,12 +1267,15 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
+ InsertPts, PIC, *CurAST, *LI, DL, Alignment,
+ SawUnorderedAtomic, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
+ if (SawUnorderedAtomic)
+ PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
if (AATags)
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index 389f1c595aa4..02215d3450c2 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -53,18 +54,20 @@ struct LoadPOPPair {
class LoadCombine : public BasicBlockPass {
LLVMContext *C;
AliasAnalysis *AA;
+ DominatorTree *DT;
public:
LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
initializeLoadCombinePass(*PassRegistry::getPassRegistry());
}
-
+
using llvm::Pass::doInitialization;
bool doInitialization(Function &) override;
bool runOnBasicBlock(BasicBlock &BB) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
@@ -234,6 +237,14 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
return false;
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+ // Skip analysing dead blocks (not forward reachable from function entry).
+ if (!DT->isReachableFromEntry(&BB)) {
+ DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() <<
+ " in " << BB.getParent()->getName() << "\n");
+ return false;
+ }
IRBuilder<TargetFolder> TheBuilder(
BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
@@ -245,13 +256,17 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
bool Combined = false;
unsigned Index = 0;
for (auto &I : BB) {
- if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) {
+ if (I.mayThrow() || AST.containsUnknown(&I)) {
if (combineLoads(LoadMap))
Combined = true;
LoadMap.clear();
AST.clear();
continue;
}
+ if (I.mayWriteToMemory()) {
+ AST.add(&I);
+ continue;
+ }
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!LI)
continue;
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index cca75a365024..73e8ce0e1d93 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -29,32 +29,31 @@ using namespace llvm;
STATISTIC(NumDeleted, "Number of loops deleted");
-/// isLoopDead - Determined if a loop is dead. This assumes that we've already
-/// checked for unique exit and exiting blocks, and that the code is in LCSSA
-/// form.
-bool LoopDeletionPass::isLoopDead(Loop *L, ScalarEvolution &SE,
- SmallVectorImpl<BasicBlock *> &exitingBlocks,
- SmallVectorImpl<BasicBlock *> &exitBlocks,
- bool &Changed, BasicBlock *Preheader) {
- BasicBlock *exitBlock = exitBlocks[0];
-
+/// Determines if a loop is dead.
+///
+/// This assumes that we've already checked for unique exit and exiting blocks,
+/// and that the code is in LCSSA form.
+static bool isLoopDead(Loop *L, ScalarEvolution &SE,
+ SmallVectorImpl<BasicBlock *> &ExitingBlocks,
+ BasicBlock *ExitBlock, bool &Changed,
+ BasicBlock *Preheader) {
// Make sure that all PHI entries coming from the loop are loop invariant.
// Because the code is in LCSSA form, any values used outside of the loop
// must pass through a PHI in the exit block, meaning that this check is
// sufficient to guarantee that no loop-variant values are used outside
// of the loop.
- BasicBlock::iterator BI = exitBlock->begin();
+ BasicBlock::iterator BI = ExitBlock->begin();
bool AllEntriesInvariant = true;
bool AllOutgoingValuesSame = true;
while (PHINode *P = dyn_cast<PHINode>(BI)) {
- Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+ Value *incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
// Make sure all exiting blocks produce the same incoming value for the exit
// block. If there are different incoming values for different exiting
// blocks, then it is impossible to statically determine which value should
// be used.
AllOutgoingValuesSame =
- all_of(makeArrayRef(exitingBlocks).slice(1), [&](BasicBlock *BB) {
+ all_of(makeArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
return incoming == P->getIncomingValueForBlock(BB);
});
@@ -78,33 +77,37 @@ bool LoopDeletionPass::isLoopDead(Loop *L, ScalarEvolution &SE,
// Make sure that no instructions in the block have potential side-effects.
// This includes instructions that could write to memory, and loads that are
- // marked volatile. This could be made more aggressive by using aliasing
- // information to identify readonly and readnone calls.
- for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
- LI != LE; ++LI) {
- for (Instruction &I : **LI) {
- if (I.mayHaveSideEffects())
- return false;
- }
- }
-
+ // marked volatile.
+ for (auto &I : L->blocks())
+ if (any_of(*I, [](Instruction &I) { return I.mayHaveSideEffects(); }))
+ return false;
return true;
}
-/// Remove dead loops, by which we mean loops that do not impact the observable
-/// behavior of the program other than finite running time. Note we do ensure
-/// that this never remove a loop that might be infinite, as doing so could
-/// change the halting/non-halting nature of a program. NOTE: This entire
-/// process relies pretty heavily on LoopSimplify and LCSSA in order to make
-/// various safety checks work.
-bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
- LoopInfo &loopInfo) {
+/// Remove a loop if it is dead.
+///
+/// A loop is considered dead if it does not impact the observable behavior of
+/// the program other than finite running time. This never removes a loop that
+/// might be infinite, as doing so could change the halting/non-halting nature
+/// of a program.
+///
+/// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
+/// order to make various safety checks work.
+///
+/// \returns true if any changes were made. This may mutate the loop even if it
+/// is unable to delete it due to hoisting trivially loop invariant
+/// instructions out of the loop.
+///
+/// This also updates the relevant analysis information in \p DT, \p SE, and \p
+/// LI. It also updates the loop PM if an updater struct is provided.
+static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, LPMUpdater *Updater = nullptr) {
assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
// We can only remove the loop if there is a preheader that we can
// branch from after removing it.
- BasicBlock *preheader = L->getLoopPreheader();
- if (!preheader)
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader)
return false;
// If LoopSimplify form is not available, stay out of trouble.
@@ -116,22 +119,20 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
if (L->begin() != L->end())
return false;
- SmallVector<BasicBlock *, 4> exitingBlocks;
- L->getExitingBlocks(exitingBlocks);
-
- SmallVector<BasicBlock *, 4> exitBlocks;
- L->getUniqueExitBlocks(exitBlocks);
+ SmallVector<BasicBlock *, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
// We require that the loop only have a single exit block. Otherwise, we'd
// be in the situation of needing to be able to solve statically which exit
// block will be branched to, or trying to preserve the branching logic in
// a loop invariant manner.
- if (exitBlocks.size() != 1)
+ BasicBlock *ExitBlock = L->getUniqueExitBlock();
+ if (!ExitBlock)
return false;
// Finally, we have to check that the loop really is dead.
bool Changed = false;
- if (!isLoopDead(L, SE, exitingBlocks, exitBlocks, Changed, preheader))
+ if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader))
return Changed;
// Don't remove loops for which we can't solve the trip count.
@@ -142,11 +143,13 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
// Now that we know the removal is safe, remove the loop by changing the
// branch from the preheader to go to the single exit block.
- BasicBlock *exitBlock = exitBlocks[0];
-
+ //
// Because we're deleting a large chunk of code at once, the sequence in which
- // we remove things is very important to avoid invalidation issues. Don't
- // mess with this unless you have good reason and know what you're doing.
+ // we remove things is very important to avoid invalidation issues.
+
+ // If we have an LPM updater, tell it about the loop being removed.
+ if (Updater)
+ Updater->markLoopAsDeleted(*L);
// Tell ScalarEvolution that the loop is deleted. Do this before
// deleting the loop so that ScalarEvolution can look at the loop
@@ -154,19 +157,19 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SE.forgetLoop(L);
// Connect the preheader directly to the exit block.
- TerminatorInst *TI = preheader->getTerminator();
- TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+ TerminatorInst *TI = Preheader->getTerminator();
+ TI->replaceUsesOfWith(L->getHeader(), ExitBlock);
// Rewrite phis in the exit block to get their inputs from
// the preheader instead of the exiting block.
- BasicBlock *exitingBlock = exitingBlocks[0];
- BasicBlock::iterator BI = exitBlock->begin();
+ BasicBlock *ExitingBlock = ExitingBlocks[0];
+ BasicBlock::iterator BI = ExitBlock->begin();
while (PHINode *P = dyn_cast<PHINode>(BI)) {
- int j = P->getBasicBlockIndex(exitingBlock);
+ int j = P->getBasicBlockIndex(ExitingBlock);
assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
- P->setIncomingBlock(j, preheader);
- for (unsigned i = 1; i < exitingBlocks.size(); ++i)
- P->removeIncomingValue(exitingBlocks[i]);
+ P->setIncomingBlock(j, Preheader);
+ for (unsigned i = 1; i < ExitingBlocks.size(); ++i)
+ P->removeIncomingValue(ExitingBlocks[i]);
++BI;
}
@@ -175,11 +178,11 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SmallVector<DomTreeNode*, 8> ChildNodes;
for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
LI != LE; ++LI) {
- // Move all of the block's children to be children of the preheader, which
+ // Move all of the block's children to be children of the Preheader, which
// allows us to remove the domtree entry for the block.
ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
for (DomTreeNode *ChildNode : ChildNodes) {
- DT.changeImmediateDominator(ChildNode, DT[preheader]);
+ DT.changeImmediateDominator(ChildNode, DT[Preheader]);
}
ChildNodes.clear();
@@ -204,22 +207,19 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SmallPtrSet<BasicBlock *, 8> blocks;
blocks.insert(L->block_begin(), L->block_end());
for (BasicBlock *BB : blocks)
- loopInfo.removeBlock(BB);
+ LI.removeBlock(BB);
// The last step is to update LoopInfo now that we've eliminated this loop.
- loopInfo.markAsRemoved(L);
- Changed = true;
-
+ LI.markAsRemoved(L);
++NumDeleted;
- return Changed;
+ return true;
}
PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
- bool Changed = runImpl(&L, AR.DT, AR.SE, AR.LI);
- if (!Changed)
+ LPMUpdater &Updater) {
+ if (!deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, &Updater))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
@@ -257,8 +257,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LoopInfo &loopInfo = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LoopDeletionPass Impl;
- return Impl.runImpl(L, DT, SE, loopInfo);
+ return deleteLoopIfDead(L, DT, SE, LI);
}
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index 19716b28ad66..3624bba10345 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -812,29 +812,29 @@ private:
const RuntimePointerChecking *RtPtrChecking) {
SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
- std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
- [&](const RuntimePointerChecking::PointerCheck &Check) {
- for (unsigned PtrIdx1 : Check.first->Members)
- for (unsigned PtrIdx2 : Check.second->Members)
- // Only include this check if there is a pair of pointers
- // that require checking and the pointers fall into
- // separate partitions.
- //
- // (Note that we already know at this point that the two
- // pointer groups need checking but it doesn't follow
- // that each pair of pointers within the two groups need
- // checking as well.
- //
- // In other words we don't want to include a check just
- // because there is a pair of pointers between the two
- // pointer groups that require checks and a different
- // pair whose pointers fall into different partitions.)
- if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
- !RuntimePointerChecking::arePointersInSamePartition(
- PtrToPartition, PtrIdx1, PtrIdx2))
- return true;
- return false;
- });
+ copy_if(AllChecks, std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (unsigned PtrIdx1 : Check.first->Members)
+ for (unsigned PtrIdx2 : Check.second->Members)
+ // Only include this check if there is a pair of pointers
+ // that require checking and the pointers fall into
+ // separate partitions.
+ //
+ // (Note that we already know at this point that the two
+ // pointer groups need checking but it doesn't follow
+ // that each pair of pointers within the two groups need
+ // checking as well.
+ //
+ // In other words we don't want to include a check just
+ // because there is a pair of pointers between the two
+ // pointer groups that require checks and a different
+ // pair whose pointers fall into different partitions.)
+ if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+ !RuntimePointerChecking::arePointersInSamePartition(
+ PtrToPartition, PtrIdx1, PtrIdx2))
+ return true;
+ return false;
+ });
return Checks;
}
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5fec51c095d0..946d85d7360f 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -236,9 +236,9 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
ApplyCodeSizeHeuristics =
L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs;
- HasMemset = TLI->has(LibFunc::memset);
- HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
- HasMemcpy = TLI->has(LibFunc::memcpy);
+ HasMemset = TLI->has(LibFunc_memset);
+ HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
+ HasMemcpy = TLI->has(LibFunc_memcpy);
if (HasMemset || HasMemsetPattern || HasMemcpy)
if (SE->hasLoopInvariantBackedgeTakenCount(L))
@@ -823,7 +823,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Module *M = TheStore->getModule();
Value *MSP =
M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
- Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
+ Int8PtrTy, Int8PtrTy, IntPtr);
inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 69102d10ff60..28e71ca05436 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -189,7 +189,9 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
if (!SimplifyLoopInst(&L, &AR.DT, &AR.LI, &AR.AC, &AR.TLI))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
char LoopInstSimplifyLegacyPass::ID = 0;
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index e9f84edd1cbf..9f3875a3027f 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -39,7 +39,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+
using namespace llvm;
#define DEBUG_TYPE "loop-interchange"
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
index 8fb580183e30..cf63cb660db8 100644
--- a/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -20,13 +20,14 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -45,9 +46,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
-#include <forward_list>
-#include <cassert>
#include <algorithm>
+#include <cassert>
+#include <forward_list>
#include <set>
#include <tuple>
#include <utility>
@@ -373,15 +374,15 @@ public:
const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
- std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
- [&](const RuntimePointerChecking::PointerCheck &Check) {
- for (auto PtrIdx1 : Check.first->Members)
- for (auto PtrIdx2 : Check.second->Members)
- if (needsChecking(PtrIdx1, PtrIdx2,
- PtrsWrittenOnFwdingPath, CandLoadPtrs))
- return true;
- return false;
- });
+ copy_if(AllChecks, std::back_inserter(Checks),
+ [&](const RuntimePointerChecking::PointerCheck &Check) {
+ for (auto PtrIdx1 : Check.first->Members)
+ for (auto PtrIdx2 : Check.second->Members)
+ if (needsChecking(PtrIdx1, PtrIdx2, PtrsWrittenOnFwdingPath,
+ CandLoadPtrs))
+ return true;
+ return false;
+ });
DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
@@ -558,6 +559,32 @@ private:
PredicatedScalarEvolution PSE;
};
+static bool
+eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT,
+ function_ref<const LoopAccessInfo &(Loop &)> GetLAI) {
+ // Build up a worklist of inner-loops to transform to avoid iterator
+ // invalidation.
+ // FIXME: This logic comes from other passes that actually change the loop
+ // nest structure. It isn't clear this is necessary (or useful) for a pass
+ // which merely optimizes the use of loads in a loop.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ // The actual work is performed by LoadEliminationForLoop.
+ LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT);
+ Changed |= LEL.processLoop();
+ }
+ return Changed;
+}
+
/// \brief The pass. Most of the work is delegated to the per-loop
/// LoadEliminationForLoop class.
class LoopLoadElimination : public FunctionPass {
@@ -570,32 +597,14 @@ public:
if (skipFunction(F))
return false;
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- // Build up a worklist of inner-loops to vectorize. This is necessary as the
- // act of distributing a loop creates new loops and can invalidate iterators
- // across the loops.
- SmallVector<Loop *, 8> Worklist;
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop))
- // We only handle inner-most loops.
- if (L->empty())
- Worklist.push_back(L);
-
- // Now walk the identified inner loops.
- bool Changed = false;
- for (Loop *L : Worklist) {
- const LoopAccessInfo &LAI = LAA->getInfo(L);
- // The actual work is performed by LoadEliminationForLoop.
- LoadEliminationForLoop LEL(L, LI, LAI, DT);
- Changed |= LEL.processLoop();
- }
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
// Process each loop nest in the function.
- return Changed;
+ return eliminateLoadsAcrossLoops(
+ F, LI, DT,
+ [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); });
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -631,4 +640,28 @@ FunctionPass *createLoopLoadEliminationPass() {
return new LoopLoadElimination();
}
+PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+
+ auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
+ bool Changed = eliminateLoadsAcrossLoops(
+ F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & {
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI};
+ return LAM.getResult<LoopAccessAnalysis>(L, AR);
+ });
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ return PA;
+}
+
} // end namespace llvm
diff --git a/lib/Transforms/Scalar/LoopPassManager.cpp b/lib/Transforms/Scalar/LoopPassManager.cpp
index 028f4bba8b1d..10f6fcdcfdb7 100644
--- a/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -42,6 +42,13 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
break;
}
+#ifndef NDEBUG
+ // Verify the loop structure and LCSSA form before visiting the loop.
+ L.verifyLoop();
+ assert(L.isRecursivelyLCSSAForm(AR.DT, AR.LI) &&
+ "Loops must remain in LCSSA form!");
+#endif
+
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
AM.invalidate(L, PassPA);
diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp
new file mode 100644
index 000000000000..0ce604429326
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopPredication.cpp
@@ -0,0 +1,282 @@
+//===-- LoopPredication.cpp - Guard based loop predication pass -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LoopPredication pass tries to convert loop variant range checks to loop
+// invariant by widening checks across loop iterations. For example, it will
+// convert
+//
+// for (i = 0; i < n; i++) {
+// guard(i < len);
+// ...
+// }
+//
+// to
+//
+// for (i = 0; i < n; i++) {
+// guard(n - 1 < len);
+// ...
+// }
+//
+// After this transformation the condition of the guard is loop invariant, so
+// loop-unswitch can later unswitch the loop by this condition which basically
+// predicates the loop by the widened condition:
+//
+// if (n - 1 < len)
+// for (i = 0; i < n; i++) {
+// ...
+// }
+// else
+// deoptimize
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "loop-predication"
+
+using namespace llvm;
+
+namespace {
+class LoopPredication {
+ ScalarEvolution *SE;
+
+ Loop *L;
+ const DataLayout *DL;
+ BasicBlock *Preheader;
+
+ Optional<Value *> widenICmpRangeCheck(ICmpInst *ICI, SCEVExpander &Expander,
+ IRBuilder<> &Builder);
+ bool widenGuardConditions(IntrinsicInst *II, SCEVExpander &Expander);
+
+public:
+ LoopPredication(ScalarEvolution *SE) : SE(SE){};
+ bool runOnLoop(Loop *L);
+};
+
+class LoopPredicationLegacyPass : public LoopPass {
+public:
+ static char ID;
+ LoopPredicationLegacyPass() : LoopPass(ID) {
+ initializeLoopPredicationLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ getLoopAnalysisUsage(AU);
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+ if (skipLoop(L))
+ return false;
+ auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LoopPredication LP(SE);
+ return LP.runOnLoop(L);
+ }
+};
+
+char LoopPredicationLegacyPass::ID = 0;
+} // end namespace llvm
+
+INITIALIZE_PASS_BEGIN(LoopPredicationLegacyPass, "loop-predication",
+ "Loop predication", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_END(LoopPredicationLegacyPass, "loop-predication",
+ "Loop predication", false, false)
+
+Pass *llvm::createLoopPredicationPass() {
+ return new LoopPredicationLegacyPass();
+}
+
+PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
+ LoopStandardAnalysisResults &AR,
+ LPMUpdater &U) {
+ LoopPredication LP(&AR.SE);
+ if (!LP.runOnLoop(&L))
+ return PreservedAnalyses::all();
+
+ return getLoopPassPreservedAnalyses();
+}
+
+/// If ICI can be widened to a loop invariant condition emits the loop
+/// invariant condition in the loop preheader and return it, otherwise
+/// returns None.
+Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
+ SCEVExpander &Expander,
+ IRBuilder<> &Builder) {
+ DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
+ DEBUG(ICI->dump());
+
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+ const SCEV *LHSS = SE->getSCEV(LHS);
+ if (isa<SCEVCouldNotCompute>(LHSS))
+ return None;
+ const SCEV *RHSS = SE->getSCEV(RHS);
+ if (isa<SCEVCouldNotCompute>(RHSS))
+ return None;
+
+ // Canonicalize RHS to be loop invariant bound, LHS - a loop computable index
+ if (SE->isLoopInvariant(LHSS, L)) {
+ std::swap(LHS, RHS);
+ std::swap(LHSS, RHSS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ if (!SE->isLoopInvariant(RHSS, L) || !isSafeToExpand(RHSS, *SE))
+ return None;
+
+ const SCEVAddRecExpr *IndexAR = dyn_cast<SCEVAddRecExpr>(LHSS);
+ if (!IndexAR || IndexAR->getLoop() != L)
+ return None;
+
+ DEBUG(dbgs() << "IndexAR: ");
+ DEBUG(IndexAR->dump());
+
+ bool IsIncreasing = false;
+ if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing))
+ return None;
+
+ // If the predicate is increasing the condition can change from false to true
+ // as the loop progresses, in this case take the value on the first iteration
+ // for the widened check. Otherwise the condition can change from true to
+ // false as the loop progresses, so take the value on the last iteration.
+ const SCEV *NewLHSS = IsIncreasing
+ ? IndexAR->getStart()
+ : SE->getSCEVAtScope(IndexAR, L->getParentLoop());
+ if (NewLHSS == IndexAR) {
+ DEBUG(dbgs() << "Can't compute NewLHSS!\n");
+ return None;
+ }
+
+ DEBUG(dbgs() << "NewLHSS: ");
+ DEBUG(NewLHSS->dump());
+
+ if (!SE->isLoopInvariant(NewLHSS, L) || !isSafeToExpand(NewLHSS, *SE))
+ return None;
+
+ DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
+
+ Type *Ty = LHS->getType();
+ Instruction *InsertAt = Preheader->getTerminator();
+ assert(Ty == RHS->getType() && "icmp operands have different types?");
+ Value *NewLHS = Expander.expandCodeFor(NewLHSS, Ty, InsertAt);
+ Value *NewRHS = Expander.expandCodeFor(RHSS, Ty, InsertAt);
+ return Builder.CreateICmp(Pred, NewLHS, NewRHS);
+}
+
+bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
+ SCEVExpander &Expander) {
+ DEBUG(dbgs() << "Processing guard:\n");
+ DEBUG(Guard->dump());
+
+ IRBuilder<> Builder(cast<Instruction>(Preheader->getTerminator()));
+
+ // The guard condition is expected to be in form of:
+ // cond1 && cond2 && cond3 ...
+ // Iterate over subconditions looking for for icmp conditions which can be
+ // widened across loop iterations. Widening these conditions remember the
+ // resulting list of subconditions in Checks vector.
+ SmallVector<Value *, 4> Worklist(1, Guard->getOperand(0));
+ SmallPtrSet<Value *, 4> Visited;
+
+ SmallVector<Value *, 4> Checks;
+
+ unsigned NumWidened = 0;
+ do {
+ Value *Condition = Worklist.pop_back_val();
+ if (!Visited.insert(Condition).second)
+ continue;
+
+ Value *LHS, *RHS;
+ using namespace llvm::PatternMatch;
+ if (match(Condition, m_And(m_Value(LHS), m_Value(RHS)))) {
+ Worklist.push_back(LHS);
+ Worklist.push_back(RHS);
+ continue;
+ }
+
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) {
+ if (auto NewRangeCheck = widenICmpRangeCheck(ICI, Expander, Builder)) {
+ Checks.push_back(NewRangeCheck.getValue());
+ NumWidened++;
+ continue;
+ }
+ }
+
+ // Save the condition as is if we can't widen it
+ Checks.push_back(Condition);
+ } while (Worklist.size() != 0);
+
+ if (NumWidened == 0)
+ return false;
+
+ // Emit the new guard condition
+ Builder.SetInsertPoint(Guard);
+ Value *LastCheck = nullptr;
+ for (auto *Check : Checks)
+ if (!LastCheck)
+ LastCheck = Check;
+ else
+ LastCheck = Builder.CreateAnd(LastCheck, Check);
+ Guard->setOperand(0, LastCheck);
+
+ DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
+ return true;
+}
+
+bool LoopPredication::runOnLoop(Loop *Loop) {
+ L = Loop;
+
+ DEBUG(dbgs() << "Analyzing ");
+ DEBUG(L->dump());
+
+ Module *M = L->getHeader()->getModule();
+
+ // There is nothing to do if the module doesn't use guards
+ auto *GuardDecl =
+ M->getFunction(Intrinsic::getName(Intrinsic::experimental_guard));
+ if (!GuardDecl || GuardDecl->use_empty())
+ return false;
+
+ DL = &M->getDataLayout();
+
+ Preheader = L->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ // Collect all the guards into a vector and process later, so as not
+ // to invalidate the instruction iterator.
+ SmallVector<IntrinsicInst *, 4> Guards;
+ for (const auto BB : L->blocks())
+ for (auto &I : *BB)
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::experimental_guard)
+ Guards.push_back(II);
+
+ SCEVExpander Expander(*SE, *DL, "loop-predication");
+
+ bool Changed = false;
+ for (auto *Guard : Guards)
+ Changed |= widenGuardConditions(Guard, Expander);
+
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index cc83069d5f52..e5689368de80 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -79,7 +79,8 @@ private:
/// to merge the two values. Do this now.
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
BasicBlock *OrigPreheader,
- ValueToValueMapTy &ValueMap) {
+ ValueToValueMapTy &ValueMap,
+ SmallVectorImpl<PHINode*> *InsertedPHIs) {
// Remove PHI node entries that are no longer live.
BasicBlock::iterator I, E = OrigHeader->end();
for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
@@ -87,7 +88,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// Now fix up users of the instructions in OrigHeader, inserting PHI nodes
// as necessary.
- SSAUpdater SSA;
+ SSAUpdater SSA(InsertedPHIs);
for (I = OrigHeader->begin(); I != E; ++I) {
Value *OrigHeaderVal = &*I;
@@ -174,6 +175,38 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
}
}
+/// Propagate dbg.value intrinsics through the newly inserted Phis.
+static void insertDebugValues(BasicBlock *OrigHeader,
+ SmallVectorImpl<PHINode*> &InsertedPHIs) {
+ ValueToValueMapTy DbgValueMap;
+
+ // Map existing PHI nodes to their dbg.values.
+ for (auto &I : *OrigHeader) {
+ if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+ if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation()))
+ DbgValueMap.insert({Loc, DbgII});
+ }
+ }
+
+ // Then iterate through the new PHIs and look to see if they use one of the
+ // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will
+ // propagate the info through the new PHI.
+ LLVMContext &C = OrigHeader->getContext();
+ for (auto PHI : InsertedPHIs) {
+ for (auto VI : PHI->operand_values()) {
+ auto V = DbgValueMap.find(VI);
+ if (V != DbgValueMap.end()) {
+ auto *DbgII = cast<DbgInfoIntrinsic>(V->second);
+ Instruction *NewDbgII = DbgII->clone();
+ auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI));
+ NewDbgII->setOperand(0, PhiMAV);
+ BasicBlock *Parent = PHI->getParent();
+ NewDbgII->insertBefore(Parent->getFirstNonPHIOrDbgOrLifetime());
+ }
+ }
+ }
+}
+
/// Rotate loop LP. Return true if the loop is rotated.
///
/// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -347,9 +380,18 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// remove the corresponding incoming values from the PHI nodes in OrigHeader.
LoopEntryBranch->eraseFromParent();
+
+ SmallVector<PHINode*, 2> InsertedPHIs;
// If there were any uses of instructions in the duplicated block outside the
// loop, update them, inserting PHI nodes as required
- RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
+ &InsertedPHIs);
+
+ // Attach dbg.value intrinsics to the new phis if that phi uses a value that
+ // previously had debug metadata attached. This keeps the debug info
+ // up-to-date in the loop body.
+ if (!InsertedPHIs.empty())
+ insertDebugValues(OrigHeader, InsertedPHIs);
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
@@ -634,6 +676,7 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed = LR.processLoop(&L);
if (!Changed)
return PreservedAnalyses::all();
+
return getLoopPassPreservedAnalyses();
}
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index 16061212ba38..a5a81c33a8eb 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -69,6 +69,7 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
if (!simplifyLoopCFG(L, AR.DT, AR.LI))
return PreservedAnalyses::all();
+
return getLoopPassPreservedAnalyses();
}
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index f3f415275c0e..c9d55b4594fe 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -1,4 +1,4 @@
-//===-- LoopSink.cpp - Loop Sink Pass ------------------------===//
+//===-- LoopSink.cpp - Loop Sink Pass -------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,8 +28,10 @@
// InsertBBs = UseBBs - DomBBs + BB
// For BB in InsertBBs:
// Insert I at BB's beginning
+//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopSink.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
@@ -297,6 +299,42 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
return Changed;
}
+PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
+ LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
+ // Nothing to do if there are no loops.
+ if (LI.empty())
+ return PreservedAnalyses::all();
+
+ AAResults &AA = FAM.getResult<AAManager>(F);
+ DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+
+ // We want to do a postorder walk over the loops. Since loops are a tree this
+ // is equivalent to a reversed preorder walk and preorder is easy to compute
+ // without recursion. Since we reverse the preorder, we will visit siblings
+ // in reverse program order. This isn't expected to matter at all but is more
+ // consistent with sinking algorithms which generally work bottom-up.
+ SmallVector<Loop *, 4> PreorderLoops = LI.getLoopsInPreorder();
+
+ bool Changed = false;
+ do {
+ Loop &L = *PreorderLoops.pop_back_val();
+
+ // Note that we don't pass SCEV here because it is only used to invalidate
+ // loops in SCEV and we don't preserve (or request) SCEV at all making that
+ // unnecessary.
+ Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI,
+ /*ScalarEvolution*/ nullptr);
+ } while (!PreorderLoops.empty());
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
namespace {
struct LegacyLoopSinkPass : public LoopPass {
static char ID;
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 194587a85e7c..af137f6faa63 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -129,6 +129,17 @@ static cl::opt<bool> EnablePhiElim(
"enable-lsr-phielim", cl::Hidden, cl::init(true),
cl::desc("Enable LSR phi elimination"));
+// The flag adds instruction count to solutions cost comparision.
+static cl::opt<bool> InsnsCost(
+ "lsr-insns-cost", cl::Hidden, cl::init(false),
+ cl::desc("Add instruction count to a LSR cost model"));
+
+// Flag to choose how to narrow complex lsr solution
+static cl::opt<bool> LSRExpNarrow(
+ "lsr-exp-narrow", cl::Hidden, cl::init(false),
+ cl::desc("Narrow LSR complex solution using"
+ " expectation of registers number"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -181,10 +192,11 @@ void RegSortData::print(raw_ostream &OS) const {
OS << "[NumUses=" << UsedByIndices.count() << ']';
}
-LLVM_DUMP_METHOD
-void RegSortData::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void RegSortData::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
@@ -295,9 +307,13 @@ struct Formula {
/// canonical representation of a formula is
/// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
/// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
+ /// 3. The reg containing recurrent expr related with currect loop in the
+ /// formula should be put in the ScaledReg.
/// #1 enforces that the scaled register is always used when at least two
/// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
/// #2 enforces that 1 * reg is reg.
+ /// #3 ensures invariant regs with respect to current loop can be combined
+ /// together in LSR codegen.
/// This invariant can be temporarly broken while building a formula.
/// However, every formula inserted into the LSRInstance must be in canonical
/// form.
@@ -318,12 +334,14 @@ struct Formula {
void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
- bool isCanonical() const;
+ bool isCanonical(const Loop &L) const;
- void canonicalize();
+ void canonicalize(const Loop &L);
bool unscale();
+ bool hasZeroEnd() const;
+
size_t getNumRegs() const;
Type *getType() const;
@@ -410,16 +428,35 @@ void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
- canonicalize();
+ canonicalize(*L);
}
/// \brief Check whether or not this formula statisfies the canonical
/// representation.
/// \see Formula::BaseRegs.
-bool Formula::isCanonical() const {
- if (ScaledReg)
- return Scale != 1 || !BaseRegs.empty();
- return BaseRegs.size() <= 1;
+bool Formula::isCanonical(const Loop &L) const {
+ if (!ScaledReg)
+ return BaseRegs.size() <= 1;
+
+ if (Scale != 1)
+ return true;
+
+ if (Scale == 1 && BaseRegs.empty())
+ return false;
+
+ const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
+ if (SAR && SAR->getLoop() == &L)
+ return true;
+
+ // If ScaledReg is not a recurrent expr, or it is but its loop is not current
+ // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
+ // loop, we want to swap the reg in BaseRegs with ScaledReg.
+ auto I =
+ find_if(make_range(BaseRegs.begin(), BaseRegs.end()), [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
+ return I == BaseRegs.end();
}
/// \brief Helper method to morph a formula into its canonical representation.
@@ -428,21 +465,33 @@ bool Formula::isCanonical() const {
/// field. Otherwise, we would have to do special cases everywhere in LSR
/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
/// On the other hand, 1*reg should be canonicalized into reg.
-void Formula::canonicalize() {
- if (isCanonical())
+void Formula::canonicalize(const Loop &L) {
+ if (isCanonical(L))
return;
// So far we did not need this case. This is easy to implement but it is
// useless to maintain dead code. Beside it could hurt compile time.
assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
+
// Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
- ScaledReg = BaseRegs.back();
- BaseRegs.pop_back();
- Scale = 1;
- size_t BaseRegsSize = BaseRegs.size();
- size_t Try = 0;
- // If ScaledReg is an invariant, try to find a variant expression.
- while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
- std::swap(ScaledReg, BaseRegs[Try++]);
+ if (!ScaledReg) {
+ ScaledReg = BaseRegs.back();
+ BaseRegs.pop_back();
+ Scale = 1;
+ }
+
+ // If ScaledReg is an invariant with respect to L, find the reg from
+ // BaseRegs containing the recurrent expr related with Loop L. Swap the
+ // reg with ScaledReg.
+ const SCEVAddRecExpr *SAR = dyn_cast<const SCEVAddRecExpr>(ScaledReg);
+ if (!SAR || SAR->getLoop() != &L) {
+ auto I = find_if(make_range(BaseRegs.begin(), BaseRegs.end()),
+ [&](const SCEV *S) {
+ return isa<const SCEVAddRecExpr>(S) &&
+ (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
+ });
+ if (I != BaseRegs.end())
+ std::swap(ScaledReg, *I);
+ }
}
/// \brief Get rid of the scale in the formula.
@@ -458,6 +507,14 @@ bool Formula::unscale() {
return true;
}
+bool Formula::hasZeroEnd() const {
+ if (UnfoldedOffset || BaseOffset)
+ return false;
+ if (BaseRegs.size() != 1 || ScaledReg)
+ return false;
+ return true;
+}
+
/// Return the total number of register operands used by this formula. This does
/// not include register uses implied by non-constant addrec strides.
size_t Formula::getNumRegs() const {
@@ -534,10 +591,11 @@ void Formula::print(raw_ostream &OS) const {
}
}
-LLVM_DUMP_METHOD
-void Formula::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void Formula::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// Return true if the given addrec can be sign-extended without changing its
/// value.
@@ -711,7 +769,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
bool isAddress = isa<LoadInst>(Inst);
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(1) == OperandVal)
+ if (SI->getPointerOperand() == OperandVal)
isAddress = true;
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// Addressing modes can also be folded into prefetches and a variety
@@ -723,6 +781,12 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
isAddress = true;
break;
}
+ } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
+ if (RMW->getPointerOperand() == OperandVal)
+ isAddress = true;
+ } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ if (CmpX->getPointerOperand() == OperandVal)
+ isAddress = true;
}
return isAddress;
}
@@ -735,6 +799,10 @@ static MemAccessTy getAccessType(const Instruction *Inst) {
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
+ } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
+ AccessTy.AddrSpace = RMW->getPointerAddressSpace();
+ } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
+ AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
}
// All pointers have the same requirements, so canonicalize them to an
@@ -875,7 +943,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
- const LSRUse &LU, const Formula &F);
+ const LSRUse &LU, const Formula &F,
+ const Loop &L);
namespace {
@@ -883,6 +952,7 @@ namespace {
class Cost {
/// TODO: Some of these could be merged. Also, a lexical ordering
/// isn't always optimal.
+ unsigned Insns;
unsigned NumRegs;
unsigned AddRecCost;
unsigned NumIVMuls;
@@ -893,8 +963,8 @@ class Cost {
public:
Cost()
- : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
- SetupCost(0), ScaleCost(0) {}
+ : Insns(0), NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0),
+ ImmCost(0), SetupCost(0), ScaleCost(0) {}
bool operator<(const Cost &Other) const;
@@ -903,9 +973,9 @@ public:
#ifndef NDEBUG
// Once any of the metrics loses, they must all remain losers.
bool isValid() {
- return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
+ return ((Insns | NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
| ImmCost | SetupCost | ScaleCost) != ~0u)
- || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
+ || ((Insns & NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
& ImmCost & SetupCost & ScaleCost) == ~0u);
}
#endif
@@ -1067,7 +1137,8 @@ public:
}
bool HasFormulaWithSameRegs(const Formula &F) const;
- bool InsertFormula(const Formula &F);
+ float getNotSelectedProbability(const SCEV *Reg) const;
+ bool InsertFormula(const Formula &F, const Loop &L);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
@@ -1083,17 +1154,23 @@ void Cost::RateRegister(const SCEV *Reg,
const Loop *L,
ScalarEvolution &SE, DominatorTree &DT) {
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
- // If this is an addrec for another loop, don't second-guess its addrec phi
- // nodes. LSR isn't currently smart enough to reason about more than one
- // loop at a time. LSR has already run on inner loops, will not run on outer
- // loops, and cannot be expected to change sibling loops.
+ // If this is an addrec for another loop, it should be an invariant
+ // with respect to L since L is the innermost loop (at least
+ // for now LSR only handles innermost loops).
if (AR->getLoop() != L) {
// If the AddRec exists, consider it's register free and leave it alone.
if (isExistingPhi(AR, SE))
return;
- // Otherwise, do not consider this formula at all.
- Lose();
+ // It is bad to allow LSR for current loop to add induction variables
+ // for its sibling loops.
+ if (!AR->getLoop()->contains(L)) {
+ Lose();
+ return;
+ }
+
+ // Otherwise, it will be an invariant with respect to Loop L.
+ ++NumRegs;
return;
}
AddRecCost += 1; /// TODO: This should be a function of the stride.
@@ -1150,8 +1227,11 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
- assert(F.isCanonical() && "Cost is accurate only for canonical formula");
+ assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
// Tally up the registers.
+ unsigned PrevAddRecCost = AddRecCost;
+ unsigned PrevNumRegs = NumRegs;
+ unsigned PrevNumBaseAdds = NumBaseAdds;
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
Lose();
@@ -1171,6 +1251,18 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
return;
}
+ // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
+ // additional instruction (at least fill).
+ unsigned TTIRegNum = TTI.getNumberOfRegisters(false) - 1;
+ if (NumRegs > TTIRegNum) {
+ // Cost already exceeded TTIRegNum, then only newly added register can add
+ // new instructions.
+ if (PrevNumRegs > TTIRegNum)
+ Insns += (NumRegs - PrevNumRegs);
+ else
+ Insns += (NumRegs - TTIRegNum);
+ }
+
// Determine how many (unfolded) adds we'll need inside the loop.
size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
@@ -1181,7 +1273,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
- ScaleCost += getScalingFactorCost(TTI, LU, F);
+ ScaleCost += getScalingFactorCost(TTI, LU, F, *L);
// Tally up the non-zero immediates.
for (const LSRFixup &Fixup : LU.Fixups) {
@@ -1199,11 +1291,30 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
NumBaseAdds++;
}
+
+ // If ICmpZero formula ends with not 0, it could not be replaced by
+ // just add or sub. We'll need to compare final result of AddRec.
+ // That means we'll need an additional instruction.
+ // For -10 + {0, +, 1}:
+ // i = i + 1;
+ // cmp i, 10
+ //
+ // For {-10, +, 1}:
+ // i = i + 1;
+ if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd())
+ Insns++;
+ // Each new AddRec adds 1 instruction to calculation.
+ Insns += (AddRecCost - PrevAddRecCost);
+
+ // BaseAdds adds instructions for unfolded registers.
+ if (LU.Kind != LSRUse::ICmpZero)
+ Insns += NumBaseAdds - PrevNumBaseAdds;
assert(isValid() && "invalid cost");
}
/// Set this cost to a losing value.
void Cost::Lose() {
+ Insns = ~0u;
NumRegs = ~0u;
AddRecCost = ~0u;
NumIVMuls = ~0u;
@@ -1215,6 +1326,8 @@ void Cost::Lose() {
/// Choose the lower cost.
bool Cost::operator<(const Cost &Other) const {
+ if (InsnsCost && Insns != Other.Insns)
+ return Insns < Other.Insns;
return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
ImmCost, SetupCost) <
std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
@@ -1223,6 +1336,7 @@ bool Cost::operator<(const Cost &Other) const {
}
void Cost::print(raw_ostream &OS) const {
+ OS << Insns << " instruction" << (Insns == 1 ? " " : "s ");
OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
if (AddRecCost != 0)
OS << ", with addrec cost " << AddRecCost;
@@ -1239,10 +1353,11 @@ void Cost::print(raw_ostream &OS) const {
OS << ", plus " << SetupCost << " setup cost";
}
-LLVM_DUMP_METHOD
-void Cost::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void Cost::dump() const {
print(errs()); errs() << '\n';
}
+#endif
LSRFixup::LSRFixup()
: UserInst(nullptr), OperandValToReplace(nullptr),
@@ -1285,10 +1400,11 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", Offset=" << Offset;
}
-LLVM_DUMP_METHOD
-void LSRFixup::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LSRFixup::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// Test whether this use as a formula which has the same registers as the given
/// formula.
@@ -1300,10 +1416,19 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
return Uniquifier.count(Key);
}
+/// The function returns a probability of selecting formula without Reg.
+float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
+ unsigned FNum = 0;
+ for (const Formula &F : Formulae)
+ if (F.referencesReg(Reg))
+ FNum++;
+ return ((float)(Formulae.size() - FNum)) / Formulae.size();
+}
+
/// If the given formula has not yet been inserted, add it to the list, and
/// return true. Return false otherwise. The formula must be in canonical form.
-bool LSRUse::InsertFormula(const Formula &F) {
- assert(F.isCanonical() && "Invalid canonical representation");
+bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
+ assert(F.isCanonical(L) && "Invalid canonical representation");
if (!Formulae.empty() && RigidFormula)
return false;
@@ -1391,10 +1516,11 @@ void LSRUse::print(raw_ostream &OS) const {
OS << ", widest fixup type: " << *WidestFixupType;
}
-LLVM_DUMP_METHOD
-void LSRUse::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LSRUse::dump() const {
print(errs()); errs() << '\n';
}
+#endif
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
@@ -1472,7 +1598,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, MemAccessTy AccessTy,
- const Formula &F) {
+ const Formula &F, const Loop &L) {
// For the purpose of isAMCompletelyFolded either having a canonical formula
// or a scale not equal to zero is correct.
// Problems may arise from non canonical formulae having a scale == 0.
@@ -1480,7 +1606,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
// However, when we generate the scaled formulae, we first check that the
// scaling factor is profitable before computing the actual ScaledReg for
// compile time sake.
- assert((F.isCanonical() || F.Scale != 0));
+ assert((F.isCanonical(L) || F.Scale != 0));
return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
}
@@ -1515,14 +1641,15 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
- const LSRUse &LU, const Formula &F) {
+ const LSRUse &LU, const Formula &F,
+ const Loop &L) {
if (!F.Scale)
return 0;
// If the use is not completely folded in that instruction, we will have to
// pay an extra cost only for scale != 1.
if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, F))
+ LU.AccessTy, F, L))
return F.Scale != 1;
switch (LU.Kind) {
@@ -1772,6 +1899,7 @@ class LSRInstance {
void NarrowSearchSpaceByDetectingSupersets();
void NarrowSearchSpaceByCollapsingUnrolledCode();
void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByDeletingCostlyFormulas();
void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
@@ -2492,7 +2620,12 @@ static Value *getWideOperand(Value *Oper) {
static bool isCompatibleIVType(Value *LVal, Value *RVal) {
Type *LType = LVal->getType();
Type *RType = RVal->getType();
- return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
+ return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy() &&
+ // Different address spaces means (possibly)
+ // different types of the pointer implementation,
+ // e.g. i16 vs i32 so disallow that.
+ (LType->getPointerAddressSpace() ==
+ RType->getPointerAddressSpace()));
}
/// Return an approximation of this SCEV expression's "base", or NULL for any
@@ -2989,8 +3122,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
User::op_iterator UseI =
find(UserInst->operands(), U.getOperandValToReplace());
assert(UseI != UserInst->op_end() && "cannot find IV operand");
- if (IVIncSet.count(UseI))
+ if (IVIncSet.count(UseI)) {
+ DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
continue;
+ }
LSRUse::KindType Kind = LSRUse::Basic;
MemAccessTy AccessTy;
@@ -3025,8 +3160,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
- N = TransformForPostIncUse(Normalize, N, CI, nullptr,
- TmpPostIncLoops, SE, DT);
+ N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
}
@@ -3108,7 +3242,8 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
// Do not insert formula that we will not be able to expand.
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
"Formula is illegal");
- if (!LU.InsertFormula(F))
+
+ if (!LU.InsertFormula(F, *L))
return false;
CountRegisters(F, LUIdx);
@@ -3347,7 +3482,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
F.BaseRegs.push_back(*J);
// We may have changed the number of register in base regs, adjust the
// formula accordingly.
- F.canonicalize();
+ F.canonicalize(*L);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
@@ -3359,7 +3494,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
/// Split out subexpressions from adds and the bases of addrecs.
void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
Formula Base, unsigned Depth) {
- assert(Base.isCanonical() && "Input must be in the canonical form");
+ assert(Base.isCanonical(*L) && "Input must be in the canonical form");
// Arbitrarily cap recursion to protect compile time.
if (Depth >= 3)
return;
@@ -3400,7 +3535,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
- F.canonicalize();
+ F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3457,7 +3592,7 @@ void LSRInstance::GenerateConstantOffsetsImpl(
F.ScaledReg = nullptr;
} else
F.deleteBaseReg(F.BaseRegs[Idx]);
- F.canonicalize();
+ F.canonicalize(*L);
} else if (IsScaledReg)
F.ScaledReg = NewG;
else
@@ -3620,10 +3755,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (LU.Kind == LSRUse::ICmpZero &&
!Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
continue;
- // For each addrec base reg, apply the scale, if possible.
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
- if (const SCEVAddRecExpr *AR =
- dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+ // For each addrec base reg, if its loop is current loop, apply the scale.
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
+ if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
const SCEV *FactorS = SE.getConstant(IntTy, Factor);
if (FactorS->isZero())
continue;
@@ -3637,11 +3772,17 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// The canonical representation of 1*reg is reg, which is already in
// Base. In that case, do not try to insert the formula, it will be
// rejected anyway.
- if (F.Scale == 1 && F.BaseRegs.empty())
+ if (F.Scale == 1 && (F.BaseRegs.empty() ||
+ (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
continue;
+ // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
+ // non canonical Formula with ScaledReg's loop not being L.
+ if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
+ F.canonicalize(*L);
(void)InsertFormula(LU, LUIdx, F);
}
}
+ }
}
}
@@ -3697,10 +3838,11 @@ void WorkItem::print(raw_ostream &OS) const {
<< " , add offset " << Imm;
}
-LLVM_DUMP_METHOD
-void WorkItem::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void WorkItem::dump() const {
print(errs()); errs() << '\n';
}
+#endif
/// Look for registers which are a constant distance apart and try to form reuse
/// opportunities between them.
@@ -3821,7 +3963,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
// OK, looks good.
- NewF.canonicalize();
+ NewF.canonicalize(*this->L);
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3853,7 +3995,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
goto skip_formula;
// Ok, looks good.
- NewF.canonicalize();
+ NewF.canonicalize(*this->L);
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -4165,6 +4307,144 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
}
}
+/// The function delete formulas with high registers number expectation.
+/// Assuming we don't know the value of each formula (already delete
+/// all inefficient), generate probability of not selecting for each
+/// register.
+/// For example,
+/// Use1:
+/// reg(a) + reg({0,+,1})
+/// reg(a) + reg({-1,+,1}) + 1
+/// reg({a,+,1})
+/// Use2:
+/// reg(b) + reg({0,+,1})
+/// reg(b) + reg({-1,+,1}) + 1
+/// reg({b,+,1})
+/// Use3:
+/// reg(c) + reg(b) + reg({0,+,1})
+/// reg(c) + reg({b,+,1})
+///
+/// Probability of not selecting
+/// Use1 Use2 Use3
+/// reg(a) (1/3) * 1 * 1
+/// reg(b) 1 * (1/3) * (1/2)
+/// reg({0,+,1}) (2/3) * (2/3) * (1/2)
+/// reg({-1,+,1}) (2/3) * (2/3) * 1
+/// reg({a,+,1}) (2/3) * 1 * 1
+/// reg({b,+,1}) 1 * (2/3) * (2/3)
+/// reg(c) 1 * 1 * 0
+///
+/// Now count registers number mathematical expectation for each formula:
+/// Note that for each use we exclude probability if not selecting for the use.
+/// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
+/// probabilty 1/3 of not selecting for Use1).
+/// Use1:
+/// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted
+/// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted
+/// reg({a,+,1}) 1
+/// Use2:
+/// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted
+/// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted
+/// reg({b,+,1}) 2/3
+/// Use3:
+/// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
+/// reg(c) + reg({b,+,1}) 1 + 2/3
+
+void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
+ if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+ return;
+ // Ok, we have too many of formulae on our hands to conveniently handle.
+ // Use a rough heuristic to thin out the list.
+
+ // Set of Regs wich will be 100% used in final solution.
+ // Used in each formula of a solution (in example above this is reg(c)).
+ // We can skip them in calculations.
+ SmallPtrSet<const SCEV *, 4> UniqRegs;
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ // Map each register to probability of not selecting
+ DenseMap <const SCEV *, float> RegNumMap;
+ for (const SCEV *Reg : RegUses) {
+ if (UniqRegs.count(Reg))
+ continue;
+ float PNotSel = 1;
+ for (const LSRUse &LU : Uses) {
+ if (!LU.Regs.count(Reg))
+ continue;
+ float P = LU.getNotSelectedProbability(Reg);
+ if (P != 0.0)
+ PNotSel *= P;
+ else
+ UniqRegs.insert(Reg);
+ }
+ RegNumMap.insert(std::make_pair(Reg, PNotSel));
+ }
+
+ DEBUG(dbgs() << "Narrowing the search space by deleting costly formulas\n");
+
+ // Delete formulas where registers number expectation is high.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ // If nothing to delete - continue.
+ if (LU.Formulae.size() < 2)
+ continue;
+ // This is temporary solution to test performance. Float should be
+ // replaced with round independent type (based on integers) to avoid
+ // different results for different target builds.
+ float FMinRegNum = LU.Formulae[0].getNumRegs();
+ float FMinARegNum = LU.Formulae[0].getNumRegs();
+ size_t MinIdx = 0;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ float FRegNum = 0;
+ float FARegNum = 0;
+ for (const SCEV *BaseReg : F.BaseRegs) {
+ if (UniqRegs.count(BaseReg))
+ continue;
+ FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
+ if (isa<SCEVAddRecExpr>(BaseReg))
+ FARegNum +=
+ RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
+ }
+ if (const SCEV *ScaledReg = F.ScaledReg) {
+ if (!UniqRegs.count(ScaledReg)) {
+ FRegNum +=
+ RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
+ if (isa<SCEVAddRecExpr>(ScaledReg))
+ FARegNum +=
+ RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
+ }
+ }
+ if (FMinRegNum > FRegNum ||
+ (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
+ FMinRegNum = FRegNum;
+ FMinARegNum = FARegNum;
+ MinIdx = i;
+ }
+ }
+ DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs());
+ dbgs() << " with min reg num " << FMinRegNum << '\n');
+ if (MinIdx != 0)
+ std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
+ while (LU.Formulae.size() != 1) {
+ DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs());
+ dbgs() << '\n');
+ LU.Formulae.pop_back();
+ }
+ LU.RecomputeRegs(LUIdx, RegUses);
+ assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
+ Formula &F = LU.Formulae[0];
+ DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n');
+ // When we choose the formula, the regs become unique.
+ UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ UniqRegs.insert(F.ScaledReg);
+ }
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+}
+
+
/// Pick a register which seems likely to be profitable, and then in any use
/// which has any reference to that register, delete all formulae which do not
/// reference that register.
@@ -4237,7 +4517,10 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
NarrowSearchSpaceByDetectingSupersets();
NarrowSearchSpaceByCollapsingUnrolledCode();
NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
- NarrowSearchSpaceByPickingWinnerRegs();
+ if (LSRExpNarrow)
+ NarrowSearchSpaceByDeletingCostlyFormulas();
+ else
+ NarrowSearchSpaceByPickingWinnerRegs();
}
/// This is the recursive solver.
@@ -4515,11 +4798,7 @@ Value *LSRInstance::Expand(const LSRUse &LU,
assert(!Reg->isZero() && "Zero allocated in a base register!");
// If we're expanding for a post-inc user, make the post-inc adjustment.
- PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
- Reg = TransformForPostIncUse(Denormalize, Reg,
- LF.UserInst, LF.OperandValToReplace,
- Loops, SE, DT);
-
+ Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
}
@@ -4530,9 +4809,7 @@ Value *LSRInstance::Expand(const LSRUse &LU,
// If we're expanding for a post-inc user, make the post-inc adjustment.
PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
- ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
- LF.UserInst, LF.OperandValToReplace,
- Loops, SE, DT);
+ ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
if (LU.Kind == LSRUse::ICmpZero) {
// Expand ScaleReg as if it was part of the base regs.
@@ -4975,10 +5252,11 @@ void LSRInstance::print(raw_ostream &OS) const {
print_uses(OS);
}
-LLVM_DUMP_METHOD
-void LSRInstance::dump() const {
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LSRInstance::dump() const {
print(errs()); errs() << '\n';
}
+#endif
namespace {
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index c7f91226d222..62aa6ee48069 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -44,7 +44,11 @@ using namespace llvm;
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::Hidden,
- cl::desc("The baseline cost threshold for loop unrolling"));
+ cl::desc("The cost threshold for loop unrolling"));
+
+static cl::opt<unsigned> UnrollPartialThreshold(
+ "unroll-partial-threshold", cl::Hidden,
+ cl::desc("The cost threshold for partial loop unrolling"));
static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
"unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
@@ -106,10 +110,19 @@ static cl::opt<unsigned> FlatLoopTripCountThreshold(
"aggressively unrolled."));
static cl::opt<bool>
- UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden,
+ UnrollAllowPeeling("unroll-allow-peeling", cl::init(true), cl::Hidden,
cl::desc("Allows loops to be peeled when the dynamic "
"trip count is known to be low."));
+// This option isn't ever intended to be enabled, it serves to allow
+// experiments to check the assumptions about when this kind of revisit is
+// necessary.
+static cl::opt<bool> UnrollRevisitChildLoops(
+ "unroll-revisit-child-loops", cl::Hidden,
+ cl::desc("Enqueue and re-visit child loops in the loop PM after unrolling. "
+ "This shouldn't typically be needed as child loops (or their "
+ "clones) were already visited."));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -118,16 +131,17 @@ static const unsigned NoThreshold = UINT_MAX;
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
- Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
- Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
- Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
+ Loop *L, const TargetTransformInfo &TTI, int OptLevel,
+ Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
+ Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
+ Optional<bool> UserUpperBound) {
TargetTransformInfo::UnrollingPreferences UP;
// Set up the defaults
- UP.Threshold = 150;
+ UP.Threshold = OptLevel > 2 ? 300 : 150;
UP.MaxPercentThresholdBoost = 400;
UP.OptSizeThreshold = 0;
- UP.PartialThreshold = UP.Threshold;
+ UP.PartialThreshold = 150;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
UP.PeelCount = 0;
@@ -141,7 +155,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.AllowExpensiveTripCount = false;
UP.Force = false;
UP.UpperBound = false;
- UP.AllowPeeling = false;
+ UP.AllowPeeling = true;
// Override with any target specific settings
TTI.getUnrollingPreferences(L, UP);
@@ -153,10 +167,10 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
}
// Apply any user values specified by cl::opt
- if (UnrollThreshold.getNumOccurrences() > 0) {
+ if (UnrollThreshold.getNumOccurrences() > 0)
UP.Threshold = UnrollThreshold;
- UP.PartialThreshold = UnrollThreshold;
- }
+ if (UnrollPartialThreshold.getNumOccurrences() > 0)
+ UP.PartialThreshold = UnrollPartialThreshold;
if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
if (UnrollMaxCount.getNumOccurrences() > 0)
@@ -495,7 +509,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
KnownSucc = SI->getSuccessor(0);
else if (ConstantInt *SimpleCondVal =
dyn_cast<ConstantInt>(SimpleCond))
- KnownSucc = SI->findCaseValue(SimpleCondVal).getCaseSuccessor();
+ KnownSucc = SI->findCaseValue(SimpleCondVal)->getCaseSuccessor();
}
}
if (KnownSucc) {
@@ -770,7 +784,15 @@ static bool computeUnrollCount(
}
}
- // 4rd priority is partial unrolling.
+ // 4th priority is loop peeling
+ computePeelCount(L, LoopSize, UP, TripCount);
+ if (UP.PeelCount) {
+ UP.Runtime = false;
+ UP.Count = 1;
+ return ExplicitUnroll;
+ }
+
+ // 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be staticaly calculated.
if (TripCount) {
UP.Partial |= ExplicitUnroll;
@@ -833,14 +855,6 @@ static bool computeUnrollCount(
<< "Unable to fully unroll loop as directed by unroll(full) pragma "
"because loop has a runtime trip count.");
- // 5th priority is loop peeling
- computePeelCount(L, LoopSize, UP);
- if (UP.PeelCount) {
- UP.Runtime = false;
- UP.Count = 1;
- return ExplicitUnroll;
- }
-
// 6th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
if (HasRuntimeUnrollDisablePragma(L)) {
@@ -914,7 +928,7 @@ static bool computeUnrollCount(
static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE, const TargetTransformInfo &TTI,
AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
- bool PreserveLCSSA,
+ bool PreserveLCSSA, int OptLevel,
Optional<unsigned> ProvidedCount,
Optional<unsigned> ProvidedThreshold,
Optional<bool> ProvidedAllowPartial,
@@ -934,7 +948,7 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ L, TTI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
ProvidedRuntime, ProvidedUpperBound);
// Exit early if unrolling is disabled.
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
@@ -1034,16 +1048,17 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll(Optional<unsigned> Threshold = None,
+ LoopUnroll(int OptLevel = 2, Optional<unsigned> Threshold = None,
Optional<unsigned> Count = None,
Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
Optional<bool> UpperBound = None)
- : LoopPass(ID), ProvidedCount(std::move(Count)),
+ : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)),
ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
}
+ int OptLevel;
Optional<unsigned> ProvidedCount;
Optional<unsigned> ProvidedThreshold;
Optional<bool> ProvidedAllowPartial;
@@ -1068,7 +1083,7 @@ public:
OptimizationRemarkEmitter ORE(&F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel,
ProvidedCount, ProvidedThreshold,
ProvidedAllowPartial, ProvidedRuntime,
ProvidedUpperBound);
@@ -1094,26 +1109,27 @@ INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime, int UpperBound) {
+Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count,
+ int AllowPartial, int Runtime,
+ int UpperBound) {
// TODO: It would make more sense for this function to take the optionals
// directly, but that's dangerous since it would silently break out of tree
// callers.
- return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
- Count == -1 ? None : Optional<unsigned>(Count),
- AllowPartial == -1 ? None
- : Optional<bool>(AllowPartial),
- Runtime == -1 ? None : Optional<bool>(Runtime),
- UpperBound == -1 ? None : Optional<bool>(UpperBound));
+ return new LoopUnroll(
+ OptLevel, Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ Count == -1 ? None : Optional<unsigned>(Count),
+ AllowPartial == -1 ? None : Optional<bool>(AllowPartial),
+ Runtime == -1 ? None : Optional<bool>(Runtime),
+ UpperBound == -1 ? None : Optional<bool>(UpperBound));
}
-Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
+Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) {
+ return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0);
}
PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
+ LPMUpdater &Updater) {
const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
@@ -1124,12 +1140,84 @@ PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
"cached at a higher level");
- bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
- /*PreserveLCSSA*/ true, ProvidedCount,
- ProvidedThreshold, ProvidedAllowPartial,
- ProvidedRuntime, ProvidedUpperBound);
-
+ // Keep track of the previous loop structure so we can identify new loops
+ // created by unrolling.
+ Loop *ParentL = L.getParentLoop();
+ SmallPtrSet<Loop *, 4> OldLoops;
+ if (ParentL)
+ OldLoops.insert(ParentL->begin(), ParentL->end());
+ else
+ OldLoops.insert(AR.LI.begin(), AR.LI.end());
+
+ // The API here is quite complex to call, but there are only two interesting
+ // states we support: partial and full (or "simple") unrolling. However, to
+ // enable these things we actually pass "None" in for the optional to avoid
+ // providing an explicit choice.
+ Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam;
+ if (!AllowPartialUnrolling)
+ AllowPartialParam = RuntimeParam = UpperBoundParam = false;
+ bool Changed = tryToUnrollLoop(
+ &L, AR.DT, &AR.LI, &AR.SE, AR.TTI, AR.AC, *ORE,
+ /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
+ /*Threshold*/ None, AllowPartialParam, RuntimeParam, UpperBoundParam);
if (!Changed)
return PreservedAnalyses::all();
+
+ // The parent must not be damaged by unrolling!
+#ifndef NDEBUG
+ if (ParentL)
+ ParentL->verifyLoop();
+#endif
+
+ // Unrolling can do several things to introduce new loops into a loop nest:
+ // - Partial unrolling clones child loops within the current loop. If it
+ // uses a remainder, then it can also create any number of sibling loops.
+ // - Full unrolling clones child loops within the current loop but then
+ // removes the current loop making all of the children appear to be new
+ // sibling loops.
+ // - Loop peeling can directly introduce new sibling loops by peeling one
+ // iteration.
+ //
+ // When a new loop appears as a sibling loop, either from peeling an
+ // iteration or fully unrolling, its nesting structure has fundamentally
+ // changed and we want to revisit it to reflect that.
+ //
+ // When unrolling has removed the current loop, we need to tell the
+ // infrastructure that it is gone.
+ //
+ // Finally, we support a debugging/testing mode where we revisit child loops
+ // as well. These are not expected to require further optimizations as either
+ // they or the loop they were cloned from have been directly visited already.
+ // But the debugging mode allows us to check this assumption.
+ bool IsCurrentLoopValid = false;
+ SmallVector<Loop *, 4> SibLoops;
+ if (ParentL)
+ SibLoops.append(ParentL->begin(), ParentL->end());
+ else
+ SibLoops.append(AR.LI.begin(), AR.LI.end());
+ erase_if(SibLoops, [&](Loop *SibLoop) {
+ if (SibLoop == &L) {
+ IsCurrentLoopValid = true;
+ return true;
+ }
+
+ // Otherwise erase the loop from the list if it was in the old loops.
+ return OldLoops.count(SibLoop) != 0;
+ });
+ Updater.addSiblingLoops(SibLoops);
+
+ if (!IsCurrentLoopValid) {
+ Updater.markLoopAsDeleted(L);
+ } else {
+ // We can only walk child loops if the current loop remained valid.
+ if (UnrollRevisitChildLoops) {
+ // Walk *all* of the child loops. This is a highly speculative mode
+ // anyways so look for any simplifications that arose from partial
+ // unrolling or peeling off of iterations.
+ SmallVector<Loop *, 4> ChildLoops(L.begin(), L.end());
+ Updater.addChildLoops(ChildLoops);
+ }
+ }
+
return getLoopPassPreservedAnalyses();
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 76fe91884c7b..a99c9999c619 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
@@ -47,6 +48,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Support/CommandLine.h"
@@ -77,19 +79,6 @@ static cl::opt<unsigned>
Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
cl::init(100), cl::Hidden);
-static cl::opt<bool>
-LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
- cl::init(false), cl::Hidden,
- cl::desc("Enable the use of the block frequency analysis to access PGO "
- "heuristics to minimize code growth in cold regions."));
-
-static cl::opt<unsigned>
-ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
- cl::desc("Coldness threshold in percentage. The loop header frequency "
- "(relative to the entry frequency) is compared with this "
- "threshold to determine if non-trivial unswitching should be "
- "enabled."));
-
namespace {
class LUAnalysisCache {
@@ -174,13 +163,6 @@ namespace {
LUAnalysisCache BranchesInfo;
- bool EnabledPGO;
-
- // BFI and ColdEntryFreq are only used when PGO and
- // LoopUnswitchWithBlockFrequency are enabled.
- BlockFrequencyInfo BFI;
- BlockFrequency ColdEntryFreq;
-
bool OptimizeForSize;
bool redoLoop;
@@ -199,12 +181,14 @@ namespace {
// NewBlocks contained cloned copy of basic blocks from LoopBlocks.
std::vector<BasicBlock*> NewBlocks;
+ bool hasBranchDivergence;
+
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopUnswitch(bool Os = false) :
+ explicit LoopUnswitch(bool Os = false, bool hasBranchDivergence = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
- loopPreheader(nullptr) {
+ loopPreheader(nullptr), hasBranchDivergence(hasBranchDivergence) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
@@ -217,6 +201,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ if (hasBranchDivergence)
+ AU.addRequired<DivergenceAnalysis>();
getLoopAnalysisUsage(AU);
}
@@ -255,6 +241,11 @@ namespace {
TerminatorInst *TI);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+
+ /// Given that the Invariant is not equal to Val. Simplify instructions
+ /// in the loop.
+ Value *SimplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant,
+ Constant *Val);
};
}
@@ -381,16 +372,35 @@ INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
false, false)
-Pass *llvm::createLoopUnswitchPass(bool Os) {
- return new LoopUnswitch(Os);
+Pass *llvm::createLoopUnswitchPass(bool Os, bool hasBranchDivergence) {
+ return new LoopUnswitch(Os, hasBranchDivergence);
}
+/// Operator chain lattice.
+enum OperatorChain {
+ OC_OpChainNone, ///< There is no operator.
+ OC_OpChainOr, ///< There are only ORs.
+ OC_OpChainAnd, ///< There are only ANDs.
+ OC_OpChainMixed ///< There are ANDs and ORs.
+};
+
/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
/// an invariant piece, return the invariant. Otherwise, return null.
+//
+/// NOTE: FindLIVLoopCondition will not return a partial LIV by walking up a
+/// mixed operator chain, as we can not reliably find a value which will simplify
+/// the operator chain. If the chain is AND-only or OR-only, we can use 0 or ~0
+/// to simplify the chain.
+///
+/// NOTE: In case a partial LIV and a mixed operator chain, we may be able to
+/// simplify the condition itself to a loop variant condition, but at the
+/// cost of creating an entirely new loop.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
+ OperatorChain &ParentChain,
DenseMap<Value *, Value *> &Cache) {
auto CacheIt = Cache.find(Cond);
if (CacheIt != Cache.end())
@@ -414,21 +424,53 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
return Cond;
}
+ // Walk up the operator chain to find partial invariant conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
if (BO->getOpcode() == Instruction::And ||
BO->getOpcode() == Instruction::Or) {
- // If either the left or right side is invariant, we can unswitch on this,
- // which will cause the branch to go away in one loop and the condition to
- // simplify in the other one.
- if (Value *LHS =
- FindLIVLoopCondition(BO->getOperand(0), L, Changed, Cache)) {
- Cache[Cond] = LHS;
- return LHS;
+ // Given the previous operator, compute the current operator chain status.
+ OperatorChain NewChain;
+ switch (ParentChain) {
+ case OC_OpChainNone:
+ NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
+ OC_OpChainOr;
+ break;
+ case OC_OpChainOr:
+ NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr :
+ OC_OpChainMixed;
+ break;
+ case OC_OpChainAnd:
+ NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd :
+ OC_OpChainMixed;
+ break;
+ case OC_OpChainMixed:
+ NewChain = OC_OpChainMixed;
+ break;
}
- if (Value *RHS =
- FindLIVLoopCondition(BO->getOperand(1), L, Changed, Cache)) {
- Cache[Cond] = RHS;
- return RHS;
+
+ // If we reach a Mixed state, we do not want to keep walking up as we can not
+ // reliably find a value that will simplify the chain. With this check, we
+ // will return null on the first sight of mixed chain and the caller will
+ // either backtrack to find partial LIV in other operand or return null.
+ if (NewChain != OC_OpChainMixed) {
+ // Update the current operator chain type before we search up the chain.
+ ParentChain = NewChain;
+ // If either the left or right side is invariant, we can unswitch on this,
+ // which will cause the branch to go away in one loop and the condition to
+ // simplify in the other one.
+ if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed,
+ ParentChain, Cache)) {
+ Cache[Cond] = LHS;
+ return LHS;
+ }
+ // We did not manage to find a partial LIV in operand(0). Backtrack and try
+ // operand(1).
+ ParentChain = NewChain;
+ if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed,
+ ParentChain, Cache)) {
+ Cache[Cond] = RHS;
+ return RHS;
+ }
}
}
@@ -436,9 +478,21 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed,
return nullptr;
}
-static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
+/// an invariant piece, return the invariant along with the operator chain type.
+/// Otherwise, return null.
+static std::pair<Value *, OperatorChain> FindLIVLoopCondition(Value *Cond,
+ Loop *L,
+ bool &Changed) {
DenseMap<Value *, Value *> Cache;
- return FindLIVLoopCondition(Cond, L, Changed, Cache);
+ OperatorChain OpChain = OC_OpChainNone;
+ Value *FCond = FindLIVLoopCondition(Cond, L, Changed, OpChain, Cache);
+
+ // In case we do find a LIV, it can not be obtained by walking up a mixed
+ // operator chain.
+ assert((!FCond || OpChain != OC_OpChainMixed) &&
+ "Do not expect a partial LIV with mixed operator chain");
+ return {FCond, OpChain};
}
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
@@ -457,19 +511,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
if (SanitizeMemory)
computeLoopSafetyInfo(&SafetyInfo, L);
- EnabledPGO = F->getEntryCount().hasValue();
-
- if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
- BranchProbabilityInfo BPI(*F, *LI);
- BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
-
- // Use BranchProbability to compute a minimum frequency based on
- // function entry baseline frequency. Loops with headers below this
- // frequency are considered as cold.
- const BranchProbability ColdProb(ColdnessThreshold, 100);
- ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
- }
-
bool Changed = false;
do {
assert(currentLoop->isLCSSAForm(*DT));
@@ -581,19 +622,9 @@ bool LoopUnswitch::processCurrentLoop() {
loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
return false;
- if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
- // Compute the weighted frequency of the hottest block in the
- // loop (loopHeader in this case since inner loops should be
- // processed before outer loop). If it is less than ColdFrequency,
- // we should not unswitch.
- BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
- if (LoopEntryFreq < ColdEntryFreq)
- return false;
- }
-
for (IntrinsicInst *Guard : Guards) {
Value *LoopCond =
- FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed);
+ FindLIVLoopCondition(Guard->getOperand(0), currentLoop, Changed).first;
if (LoopCond &&
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) {
// NB! Unswitching (if successful) could have erased some of the
@@ -634,7 +665,7 @@ bool LoopUnswitch::processCurrentLoop() {
// See if this, or some part of it, is loop invariant. If so, we can
// unswitch on it if we desire.
Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
if (LoopCond &&
UnswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) {
++NumBranches;
@@ -642,24 +673,48 @@ bool LoopUnswitch::processCurrentLoop() {
}
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed);
+ Value *SC = SI->getCondition();
+ Value *LoopCond;
+ OperatorChain OpChain;
+ std::tie(LoopCond, OpChain) =
+ FindLIVLoopCondition(SC, currentLoop, Changed);
+
unsigned NumCases = SI->getNumCases();
if (LoopCond && NumCases) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
Constant *UnswitchVal = nullptr;
-
- // Do not process same value again and again.
- // At this point we have some cases already unswitched and
- // some not yet unswitched. Let's find the first not yet unswitched one.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
- Constant *UnswitchValCandidate = i.getCaseValue();
- if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
- UnswitchVal = UnswitchValCandidate;
- break;
+ // Find a case value such that at least one case value is unswitched
+ // out.
+ if (OpChain == OC_OpChainAnd) {
+ // If the chain only has ANDs and the switch has a case value of 0.
+ // Dropping in a 0 to the chain will unswitch out the 0-casevalue.
+ auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType()));
+ if (BranchesInfo.isUnswitched(SI, AllZero))
+ continue;
+ // We are unswitching 0 out.
+ UnswitchVal = AllZero;
+ } else if (OpChain == OC_OpChainOr) {
+ // If the chain only has ORs and the switch has a case value of ~0.
+ // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue.
+ auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType()));
+ if (BranchesInfo.isUnswitched(SI, AllOne))
+ continue;
+ // We are unswitching ~0 out.
+ UnswitchVal = AllOne;
+ } else {
+ assert(OpChain == OC_OpChainNone &&
+ "Expect to unswitch on trivial chain");
+ // Do not process same value again and again.
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (auto Case : SI->cases()) {
+ Constant *UnswitchValCandidate = Case.getCaseValue();
+ if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
}
}
@@ -668,6 +723,11 @@ bool LoopUnswitch::processCurrentLoop() {
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
++NumSwitches;
+ // In case of a full LIV, UnswitchVal is the value we unswitched out.
+ // In case of a partial LIV, we only unswitch when its an AND-chain
+ // or OR-chain. In both cases switch input value simplifies to
+ // UnswitchVal.
+ BranchesInfo.setUnswitched(SI, UnswitchVal);
return true;
}
}
@@ -678,7 +738,7 @@ bool LoopUnswitch::processCurrentLoop() {
BBI != E; ++BBI)
if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
if (LoopCond && UnswitchIfProfitable(LoopCond,
ConstantInt::getTrue(Context))) {
++NumSelects;
@@ -753,6 +813,15 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
<< ". Cost too high.\n");
return false;
}
+ if (hasBranchDivergence &&
+ getAnalysis<DivergenceAnalysis>().isDivergent(LoopCond)) {
+ DEBUG(dbgs() << "NOT unswitching loop %"
+ << currentLoop->getHeader()->getName()
+ << " at non-trivial condition '" << *Val
+ << "' == " << *LoopCond << "\n"
+ << ". Condition is divergent.\n");
+ return false;
+ }
UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
return true;
@@ -899,7 +968,6 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
if (I.mayHaveSideEffects())
return false;
- // FIXME: add check for constant foldable switch instructions.
if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
if (BI->isUnconditional()) {
CurrentBB = BI->getSuccessor(0);
@@ -911,7 +979,16 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
// Found a trivial condition candidate: non-foldable conditional branch.
break;
}
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+ // At this point, any constant-foldable instructions should have probably
+ // been folded.
+ ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+ if (!Cond)
+ break;
+ // Find the target block we are definitely going to.
+ CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor();
} else {
+ // We do not understand these terminator instructions.
break;
}
@@ -929,7 +1006,7 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
return false;
Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
// Unswitch only if the trivial condition itself is an LIV (not
// partial LIV which could occur in and/or)
@@ -960,7 +1037,7 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
// If this isn't switching on an invariant condition, we can't unswitch it.
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
- currentLoop, Changed);
+ currentLoop, Changed).first;
// Unswitch only if the trivial condition itself is an LIV (not
// partial LIV which could occur in and/or)
@@ -973,13 +1050,12 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
// this.
// Note that we can't trivially unswitch on the default case or
// on already unswitched cases.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
+ for (auto Case : SI->cases()) {
BasicBlock *LoopExitCandidate;
- if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
- i.getCaseSuccessor()))) {
+ if ((LoopExitCandidate =
+ isTrivialLoopExitBlock(currentLoop, Case.getCaseSuccessor()))) {
// Okay, we found a trivial case, remember the value that is trivial.
- ConstantInt *CaseVal = i.getCaseValue();
+ ConstantInt *CaseVal = Case.getCaseValue();
// Check that it was not unswitched before, since already unswitched
// trivial vals are looks trivial too.
@@ -998,6 +1074,9 @@ bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
nullptr);
+
+ // We are only unswitching full LIV.
+ BranchesInfo.setUnswitched(SI, CondVal);
++NumSwitches;
return true;
}
@@ -1253,18 +1332,38 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
if (!UI || !L->contains(UI))
continue;
- Worklist.push_back(UI);
+ // At this point, we know LIC is definitely not Val. Try to use some simple
+ // logic to simplify the user w.r.t. to the context.
+ if (Value *Replacement = SimplifyInstructionWithNotEqual(UI, LIC, Val)) {
+ if (LI->replacementPreservesLCSSAForm(UI, Replacement)) {
+ // This in-loop instruction has been simplified w.r.t. its context,
+ // i.e. LIC != Val, make sure we propagate its replacement value to
+ // all its users.
+ //
+ // We can not yet delete UI, the LIC user, yet, because that would invalidate
+ // the LIC->users() iterator !. However, we can make this instruction
+ // dead by replacing all its users and push it onto the worklist so that
+ // it can be properly deleted and its operands simplified.
+ UI->replaceAllUsesWith(Replacement);
+ }
+ }
- // TODO: We could do other simplifications, for example, turning
- // 'icmp eq LIC, Val' -> false.
+ // This is a LIC user, push it into the worklist so that SimplifyCode can
+ // attempt to simplify it.
+ Worklist.push_back(UI);
// If we know that LIC is not Val, use this info to simplify code.
SwitchInst *SI = dyn_cast<SwitchInst>(UI);
if (!SI || !isa<ConstantInt>(Val)) continue;
- SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+ // NOTE: if a case value for the switch is unswitched out, we record it
+ // after the unswitch finishes. We can not record it here as the switch
+ // is not a direct user of the partial LIV.
+ SwitchInst::CaseHandle DeadCase =
+ *SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
- if (DeadCase == SI->case_default()) continue;
+ if (DeadCase == *SI->case_default())
+ continue;
// Found a dead case value. Don't remove PHI nodes in the
// successor if they become single-entry, those PHI nodes may
@@ -1274,8 +1373,6 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BasicBlock *SISucc = DeadCase.getCaseSuccessor();
BasicBlock *Latch = L->getLoopLatch();
- BranchesInfo.setUnswitched(SI, Val);
-
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
@@ -1397,3 +1494,27 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
}
}
}
+
+/// Simple simplifications we can do given the information that Cond is
+/// definitely not equal to Val.
+Value *LoopUnswitch::SimplifyInstructionWithNotEqual(Instruction *Inst,
+ Value *Invariant,
+ Constant *Val) {
+ // icmp eq cond, val -> false
+ ICmpInst *CI = dyn_cast<ICmpInst>(Inst);
+ if (CI && CI->isEquality()) {
+ Value *Op0 = CI->getOperand(0);
+ Value *Op1 = CI->getOperand(1);
+ if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) {
+ LLVMContext &Ctx = Inst->getContext();
+ if (CI->getPredicate() == CmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(Ctx);
+ else
+ return ConstantInt::getTrue(Ctx);
+ }
+ }
+
+ // FIXME: there may be other opportunities, e.g. comparison with floating
+ // point, or Invariant - Val != 0, etc.
+ return nullptr;
+}
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 52975ef35153..a143b9a3c645 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -67,11 +67,11 @@ static bool handleSwitchExpect(SwitchInst &SI) {
if (!ExpectedValue)
return false;
- SwitchInst::CaseIt Case = SI.findCaseValue(ExpectedValue);
+ SwitchInst::CaseHandle Case = *SI.findCaseValue(ExpectedValue);
unsigned n = SI.getNumCases(); // +1 for default case.
SmallVector<uint32_t, 16> Weights(n + 1, UnlikelyBranchWeight);
- if (Case == SI.case_default())
+ if (Case == *SI.case_default())
Weights[0] = LikelyBranchWeight;
else
Weights[Case.getCaseIndex() + 1] = LikelyBranchWeight;
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 1b590140f70a..a3f3f25c1e0f 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,20 +12,49 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+
using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
@@ -119,6 +148,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
return true;
}
+namespace {
/// Represents a range of memset'd bytes with the ByteVal value.
/// This allows us to analyze stores like:
@@ -130,7 +160,6 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
/// the first store, we make a range [1, 2). The second store extends the range
/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
/// two ranges into [0, 3) which is memset'able.
-namespace {
struct MemsetRange {
// Start/End - A semi range that describes the span that this range covers.
// The range is closed at the start and open at the end: [Start, End).
@@ -148,7 +177,8 @@ struct MemsetRange {
bool isProfitableToUseMemset(const DataLayout &DL) const;
};
-} // end anon namespace
+
+} // end anonymous namespace
bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
// If we found more than 4 stores to merge or 16 bytes, use memset.
@@ -192,13 +222,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
return TheStores.size() > NumPointerStores+NumByteStores;
}
-
namespace {
+
class MemsetRanges {
/// A sorted list of the memset ranges.
SmallVector<MemsetRange, 8> Ranges;
typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
const DataLayout &DL;
+
public:
MemsetRanges(const DataLayout &DL) : DL(DL) {}
@@ -231,8 +262,7 @@ public:
};
-} // end anon namespace
-
+} // end anonymous namespace
/// Add a new store to the MemsetRanges data structure. This adds a
/// new range for the specified store at the specified offset, merging into
@@ -299,48 +329,36 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
//===----------------------------------------------------------------------===//
namespace {
- class MemCpyOptLegacyPass : public FunctionPass {
- MemCpyOptPass Impl;
- public:
- static char ID; // Pass identification, replacement for typeid
- MemCpyOptLegacyPass() : FunctionPass(ID) {
- initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
-
- private:
- // This transformation requires dominator postdominator info
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<MemoryDependenceWrapperPass>();
- }
+class MemCpyOptLegacyPass : public FunctionPass {
+ MemCpyOptPass Impl;
- // Helper functions
- bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
- bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
- bool processMemCpy(MemCpyInst *M);
- bool processMemMove(MemMoveInst *M);
- bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
- uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
- bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
- bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
- bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
- bool processByValArgument(CallSite CS, unsigned ArgNo);
- Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
- Value *ByteVal);
-
- bool iterateOnFunction(Function &F);
- };
+public:
+ static char ID; // Pass identification, replacement for typeid
- char MemCpyOptLegacyPass::ID = 0;
-}
+ MemCpyOptLegacyPass() : FunctionPass(ID) {
+ initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ // This transformation requires dominator postdominator info
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
+ }
+};
+
+char MemCpyOptLegacyPass::ID = 0;
+
+} // end anonymous namespace
/// The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
@@ -523,14 +541,15 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
if (Args.erase(C))
NeedLift = true;
else if (MayAlias) {
- NeedLift = any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
+ NeedLift = llvm::any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
return AA.getModRefInfo(C, ML);
});
if (!NeedLift)
- NeedLift = any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
- return AA.getModRefInfo(C, CS);
- });
+ NeedLift =
+ llvm::any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
+ return AA.getModRefInfo(C, CS);
+ });
}
if (!NeedLift)
@@ -567,7 +586,7 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
}
// We made it, we need to lift
- for (auto *I : reverse(ToLift)) {
+ for (auto *I : llvm::reverse(ToLift)) {
DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
}
@@ -761,7 +780,6 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
return false;
}
-
/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
@@ -914,6 +932,17 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
if (MR != MRI_NoModRef)
return false;
+ // We can't create address space casts here because we don't know if they're
+ // safe for the target.
+ if (cpySrc->getType()->getPointerAddressSpace() !=
+ cpyDest->getType()->getPointerAddressSpace())
+ return false;
+ for (unsigned i = 0; i < CS.arg_size(); ++i)
+ if (CS.getArgument(i)->stripPointerCasts() == cpySrc &&
+ cpySrc->getType()->getPointerAddressSpace() !=
+ CS.getArgument(i)->getType()->getPointerAddressSpace())
+ return false;
+
// All the checks have passed, so do the transformation.
bool changedArgument = false;
for (unsigned i = 0; i < CS.arg_size(); ++i)
@@ -1240,7 +1269,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
AliasAnalysis &AA = LookupAliasAnalysis();
- if (!TLI->has(LibFunc::memmove))
+ if (!TLI->has(LibFunc_memmove))
return false;
// See if the pointers alias.
@@ -1306,6 +1335,11 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
CS.getInstruction(), &AC, &DT) < ByValAlign)
return false;
+ // The address space of the memcpy source must match the byval argument
+ if (MDep->getSource()->getType()->getPointerAddressSpace() !=
+ ByValArg->getType()->getPointerAddressSpace())
+ return false;
+
// Verify that the copied-from memory doesn't change in between the memcpy and
// the byval call.
// memcpy(a <- b)
@@ -1375,7 +1409,6 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
-
auto &MD = AM.getResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
@@ -1393,7 +1426,9 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
LookupAssumptionCache, LookupDomTree);
if (!MadeChange)
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
PA.preserve<MemoryDependenceAnalysis>();
return PA;
@@ -1414,10 +1449,10 @@ bool MemCpyOptPass::runImpl(
// If we don't have at least memset and memcpy, there is little point of doing
// anything here. These are required by a freestanding implementation, so if
// even they are disabled, there is no point in trying hard.
- if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
+ if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
return false;
- while (1) {
+ while (true) {
if (!iterateOnFunction(F))
break;
MadeChange = true;
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 6a64c6b3619c..acd3ef6791be 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -19,6 +19,8 @@
// thinks it safe to do so. This optimization helps with eg. hiding load
// latencies, triggering if-conversion, and reducing static code size.
//
+// NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist.
+//
//===----------------------------------------------------------------------===//
//
//
@@ -87,7 +89,6 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
@@ -118,16 +119,6 @@ private:
void removeInstruction(Instruction *Inst);
BasicBlock *getDiamondTail(BasicBlock *BB);
bool isDiamondHead(BasicBlock *BB);
- // Routines for hoisting loads
- bool isLoadHoistBarrierInRange(const Instruction &Start,
- const Instruction &End, LoadInst *LI,
- bool SafeToLoadUnconditionally);
- LoadInst *canHoistFromBlock(BasicBlock *BB, LoadInst *LI);
- void hoistInstruction(BasicBlock *BB, Instruction *HoistCand,
- Instruction *ElseInst);
- bool isSafeToHoist(Instruction *I) const;
- bool hoistLoad(BasicBlock *BB, LoadInst *HoistCand, LoadInst *ElseInst);
- bool mergeLoads(BasicBlock *BB);
// Routines for sinking stores
StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI);
PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1);
@@ -188,169 +179,6 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
return true;
}
-///
-/// \brief True when instruction is a hoist barrier for a load
-///
-/// Whenever an instruction could possibly modify the value
-/// being loaded or protect against the load from happening
-/// it is considered a hoist barrier.
-///
-bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(
- const Instruction &Start, const Instruction &End, LoadInst *LI,
- bool SafeToLoadUnconditionally) {
- if (!SafeToLoadUnconditionally)
- for (const Instruction &Inst :
- make_range(Start.getIterator(), End.getIterator()))
- if (!isGuaranteedToTransferExecutionToSuccessor(&Inst))
- return true;
- MemoryLocation Loc = MemoryLocation::get(LI);
- return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
-}
-
-///
-/// \brief Decide if a load can be hoisted
-///
-/// When there is a load in \p BB to the same address as \p LI
-/// and it can be hoisted from \p BB, return that load.
-/// Otherwise return Null.
-///
-LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
- LoadInst *Load0) {
- BasicBlock *BB0 = Load0->getParent();
- BasicBlock *Head = BB0->getSinglePredecessor();
- bool SafeToLoadUnconditionally = isSafeToLoadUnconditionally(
- Load0->getPointerOperand(), Load0->getAlignment(),
- Load0->getModule()->getDataLayout(),
- /*ScanFrom=*/Head->getTerminator());
- for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
- ++BBI) {
- Instruction *Inst = &*BBI;
-
- // Only merge and hoist loads when their result in used only in BB
- auto *Load1 = dyn_cast<LoadInst>(Inst);
- if (!Load1 || Inst->isUsedOutsideOfBlock(BB1))
- continue;
-
- MemoryLocation Loc0 = MemoryLocation::get(Load0);
- MemoryLocation Loc1 = MemoryLocation::get(Load1);
- if (Load0->isSameOperationAs(Load1) && AA->isMustAlias(Loc0, Loc1) &&
- !isLoadHoistBarrierInRange(BB1->front(), *Load1, Load1,
- SafeToLoadUnconditionally) &&
- !isLoadHoistBarrierInRange(BB0->front(), *Load0, Load0,
- SafeToLoadUnconditionally)) {
- return Load1;
- }
- }
- return nullptr;
-}
-
-///
-/// \brief Merge two equivalent instructions \p HoistCand and \p ElseInst into
-/// \p BB
-///
-/// BB is the head of a diamond
-///
-void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
- Instruction *HoistCand,
- Instruction *ElseInst) {
- DEBUG(dbgs() << " Hoist Instruction into BB \n"; BB->dump();
- dbgs() << "Instruction Left\n"; HoistCand->dump(); dbgs() << "\n";
- dbgs() << "Instruction Right\n"; ElseInst->dump(); dbgs() << "\n");
- // Hoist the instruction.
- assert(HoistCand->getParent() != BB);
-
- // Intersect optional metadata.
- HoistCand->andIRFlags(ElseInst);
- HoistCand->dropUnknownNonDebugMetadata();
-
- // Prepend point for instruction insert
- Instruction *HoistPt = BB->getTerminator();
-
- // Merged instruction
- Instruction *HoistedInst = HoistCand->clone();
-
- // Hoist instruction.
- HoistedInst->insertBefore(HoistPt);
-
- HoistCand->replaceAllUsesWith(HoistedInst);
- removeInstruction(HoistCand);
- // Replace the else block instruction.
- ElseInst->replaceAllUsesWith(HoistedInst);
- removeInstruction(ElseInst);
-}
-
-///
-/// \brief Return true if no operand of \p I is defined in I's parent block
-///
-bool MergedLoadStoreMotion::isSafeToHoist(Instruction *I) const {
- BasicBlock *Parent = I->getParent();
- for (Use &U : I->operands())
- if (auto *Instr = dyn_cast<Instruction>(&U))
- if (Instr->getParent() == Parent)
- return false;
- return true;
-}
-
-///
-/// \brief Merge two equivalent loads and GEPs and hoist into diamond head
-///
-bool MergedLoadStoreMotion::hoistLoad(BasicBlock *BB, LoadInst *L0,
- LoadInst *L1) {
- // Only one definition?
- auto *A0 = dyn_cast<Instruction>(L0->getPointerOperand());
- auto *A1 = dyn_cast<Instruction>(L1->getPointerOperand());
- if (A0 && A1 && A0->isIdenticalTo(A1) && isSafeToHoist(A0) &&
- A0->hasOneUse() && (A0->getParent() == L0->getParent()) &&
- A1->hasOneUse() && (A1->getParent() == L1->getParent()) &&
- isa<GetElementPtrInst>(A0)) {
- DEBUG(dbgs() << "Hoist Instruction into BB \n"; BB->dump();
- dbgs() << "Instruction Left\n"; L0->dump(); dbgs() << "\n";
- dbgs() << "Instruction Right\n"; L1->dump(); dbgs() << "\n");
- hoistInstruction(BB, A0, A1);
- hoistInstruction(BB, L0, L1);
- return true;
- }
- return false;
-}
-
-///
-/// \brief Try to hoist two loads to same address into diamond header
-///
-/// Starting from a diamond head block, iterate over the instructions in one
-/// successor block and try to match a load in the second successor.
-///
-bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
- bool MergedLoads = false;
- assert(isDiamondHead(BB));
- BranchInst *BI = cast<BranchInst>(BB->getTerminator());
- BasicBlock *Succ0 = BI->getSuccessor(0);
- BasicBlock *Succ1 = BI->getSuccessor(1);
- // #Instructions in Succ1 for Compile Time Control
- int Size1 = Succ1->size();
- int NLoads = 0;
- for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
- BBI != BBE;) {
- Instruction *I = &*BBI;
- ++BBI;
-
- // Don't move non-simple (atomic, volatile) loads.
- auto *L0 = dyn_cast<LoadInst>(I);
- if (!L0 || !L0->isSimple() || L0->isUsedOutsideOfBlock(Succ0))
- continue;
-
- ++NLoads;
- if (NLoads * Size1 >= MagicCompileTimeControl)
- break;
- if (LoadInst *L1 = canHoistFromBlock(Succ1, L0)) {
- bool Res = hoistLoad(BB, L0, L1);
- MergedLoads |= Res;
- // Don't attempt to hoist above loads that had not been hoisted.
- if (!Res)
- break;
- }
- }
- return MergedLoads;
-}
///
/// \brief True when instruction is a sink barrier for a store
@@ -534,7 +362,6 @@ bool MergedLoadStoreMotion::run(Function &F, MemoryDependenceResults *MD,
// Hoist equivalent loads and sink stores
// outside diamonds when possible
if (isDiamondHead(BB)) {
- Changed |= mergeLoads(BB);
Changed |= mergeStores(getDiamondTail(BB));
}
}
@@ -596,8 +423,8 @@ MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!Impl.run(F, MD, AA))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
PA.preserve<MemoryDependenceAnalysis>();
return PA;
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index 0a3bf7b4c31b..c5bf2f28d185 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -156,20 +156,12 @@ PreservedAnalyses NaryReassociatePass::run(Function &F,
auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
- bool Changed = runImpl(F, AC, DT, SE, TLI, TTI);
-
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<ScalarEvolutionAnalysis>(F);
-
- if (!Changed)
+ if (!runImpl(F, AC, DT, SE, TLI, TTI))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<ScalarEvolutionAnalysis>();
- PA.preserve<TargetLibraryAnalysis>();
return PA;
}
diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp
index 57e6e3ddad94..3d8ce888867e 100644
--- a/lib/Transforms/Scalar/NewGVN.cpp
+++ b/lib/Transforms/Scalar/NewGVN.cpp
@@ -17,6 +17,27 @@
/// "A Sparse Algorithm for Predicated Global Value Numbering" from
/// Karthik Gargi.
///
+/// A brief overview of the algorithm: The algorithm is essentially the same as
+/// the standard RPO value numbering algorithm (a good reference is the paper
+/// "SCC based value numbering" by L. Taylor Simpson) with one major difference:
+/// The RPO algorithm proceeds, on every iteration, to process every reachable
+/// block and every instruction in that block. This is because the standard RPO
+/// algorithm does not track what things have the same value number, it only
+/// tracks what the value number of a given operation is (the mapping is
+/// operation -> value number). Thus, when a value number of an operation
+/// changes, it must reprocess everything to ensure all uses of a value number
+/// get updated properly. In constrast, the sparse algorithm we use *also*
+/// tracks what operations have a given value number (IE it also tracks the
+/// reverse mapping from value number -> operations with that value number), so
+/// that it only needs to reprocess the instructions that are affected when
+/// something's value number changes. The rest of the algorithm is devoted to
+/// performing symbolic evaluation, forward propagation, and simplification of
+/// operations based on the value numbers deduced so far.
+///
+/// We also do not perform elimination by using any published algorithm. All
+/// published algorithms are O(Instructions). Instead, we use a technique that
+/// is O(number of operations with the same value number), enabling us to skip
+/// trying to eliminate things that have unique value numbers.
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/NewGVN.h"
@@ -40,13 +61,10 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
@@ -55,24 +73,25 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/PredIteratorCache.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/MemorySSA.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/Transforms/Utils/VNCoercion.h"
+#include <numeric>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace llvm;
using namespace PatternMatch;
using namespace llvm::GVNExpression;
-
+using namespace llvm::VNCoercion;
#define DEBUG_TYPE "newgvn"
STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
@@ -87,6 +106,15 @@ STATISTIC(NumGVNAvoidedSortedLeaderChanges,
"Number of avoided sorted leader changes");
STATISTIC(NumGVNNotMostDominatingLeader,
"Number of times a member dominated it's new classes' leader");
+STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
+DEBUG_COUNTER(VNCounter, "newgvn-vn",
+ "Controls which instructions are value numbered")
+
+// Currently store defining access refinement is too slow due to basicaa being
+// egregiously slow. This flag lets us keep it working while we work on this
+// issue.
+static cl::opt<bool> EnableStoreRefinement("enable-store-refinement",
+ cl::init(false), cl::Hidden);
//===----------------------------------------------------------------------===//
// GVN Pass
@@ -105,6 +133,77 @@ PHIExpression::~PHIExpression() = default;
}
}
+// Tarjan's SCC finding algorithm with Nuutila's improvements
+// SCCIterator is actually fairly complex for the simple thing we want.
+// It also wants to hand us SCC's that are unrelated to the phi node we ask
+// about, and have us process them there or risk redoing work.
+// Graph traits over a filter iterator also doesn't work that well here.
+// This SCC finder is specialized to walk use-def chains, and only follows instructions,
+// not generic values (arguments, etc).
+struct TarjanSCC {
+
+ TarjanSCC() : Components(1) {}
+
+ void Start(const Instruction *Start) {
+ if (Root.lookup(Start) == 0)
+ FindSCC(Start);
+ }
+
+ const SmallPtrSetImpl<const Value *> &getComponentFor(const Value *V) const {
+ unsigned ComponentID = ValueToComponent.lookup(V);
+
+ assert(ComponentID > 0 &&
+ "Asking for a component for a value we never processed");
+ return Components[ComponentID];
+ }
+
+private:
+ void FindSCC(const Instruction *I) {
+ Root[I] = ++DFSNum;
+ // Store the DFS Number we had before it possibly gets incremented.
+ unsigned int OurDFS = DFSNum;
+ for (auto &Op : I->operands()) {
+ if (auto *InstOp = dyn_cast<Instruction>(Op)) {
+ if (Root.lookup(Op) == 0)
+ FindSCC(InstOp);
+ if (!InComponent.count(Op))
+ Root[I] = std::min(Root.lookup(I), Root.lookup(Op));
+ }
+ }
+ // See if we really were the root of a component, by seeing if we still have our DFSNumber.
+ // If we do, we are the root of the component, and we have completed a component. If we do not,
+ // we are not the root of a component, and belong on the component stack.
+ if (Root.lookup(I) == OurDFS) {
+ unsigned ComponentID = Components.size();
+ Components.resize(Components.size() + 1);
+ auto &Component = Components.back();
+ Component.insert(I);
+ DEBUG(dbgs() << "Component root is " << *I << "\n");
+ InComponent.insert(I);
+ ValueToComponent[I] = ComponentID;
+ // Pop a component off the stack and label it.
+ while (!Stack.empty() && Root.lookup(Stack.back()) >= OurDFS) {
+ auto *Member = Stack.back();
+ DEBUG(dbgs() << "Component member is " << *Member << "\n");
+ Component.insert(Member);
+ InComponent.insert(Member);
+ ValueToComponent[Member] = ComponentID;
+ Stack.pop_back();
+ }
+ } else {
+ // Part of a component, push to stack
+ Stack.push_back(I);
+ }
+ }
+ unsigned int DFSNum = 1;
+ SmallPtrSet<const Value *, 8> InComponent;
+ DenseMap<const Value *, unsigned int> Root;
+ SmallVector<const Value *, 8> Stack;
+ // Store the components as vector of ptr sets, because we need the topo order
+ // of SCC's, but not individual member order
+ SmallVector<SmallPtrSet<const Value *, 8>, 8> Components;
+ DenseMap<const Value *, unsigned> ValueToComponent;
+};
// Congruence classes represent the set of expressions/instructions
// that are all the same *during some scope in the function*.
// That is, because of the way we perform equality propagation, and
@@ -115,43 +214,152 @@ PHIExpression::~PHIExpression() = default;
// For any Value in the Member set, it is valid to replace any dominated member
// with that Value.
//
-// Every congruence class has a leader, and the leader is used to
-// symbolize instructions in a canonical way (IE every operand of an
-// instruction that is a member of the same congruence class will
-// always be replaced with leader during symbolization).
-// To simplify symbolization, we keep the leader as a constant if class can be
-// proved to be a constant value.
-// Otherwise, the leader is a randomly chosen member of the value set, it does
-// not matter which one is chosen.
-// Each congruence class also has a defining expression,
-// though the expression may be null. If it exists, it can be used for forward
-// propagation and reassociation of values.
-//
-struct CongruenceClass {
- using MemberSet = SmallPtrSet<Value *, 4>;
+// Every congruence class has a leader, and the leader is used to symbolize
+// instructions in a canonical way (IE every operand of an instruction that is a
+// member of the same congruence class will always be replaced with leader
+// during symbolization). To simplify symbolization, we keep the leader as a
+// constant if class can be proved to be a constant value. Otherwise, the
+// leader is the member of the value set with the smallest DFS number. Each
+// congruence class also has a defining expression, though the expression may be
+// null. If it exists, it can be used for forward propagation and reassociation
+// of values.
+
+// For memory, we also track a representative MemoryAccess, and a set of memory
+// members for MemoryPhis (which have no real instructions). Note that for
+// memory, it seems tempting to try to split the memory members into a
+// MemoryCongruenceClass or something. Unfortunately, this does not work
+// easily. The value numbering of a given memory expression depends on the
+// leader of the memory congruence class, and the leader of memory congruence
+// class depends on the value numbering of a given memory expression. This
+// leads to wasted propagation, and in some cases, missed optimization. For
+// example: If we had value numbered two stores together before, but now do not,
+// we move them to a new value congruence class. This in turn will move at one
+// of the memorydefs to a new memory congruence class. Which in turn, affects
+// the value numbering of the stores we just value numbered (because the memory
+// congruence class is part of the value number). So while theoretically
+// possible to split them up, it turns out to be *incredibly* complicated to get
+// it to work right, because of the interdependency. While structurally
+// slightly messier, it is algorithmically much simpler and faster to do what we
+// do here, and track them both at once in the same class.
+// Note: The default iterators for this class iterate over values
+class CongruenceClass {
+public:
+ using MemberType = Value;
+ using MemberSet = SmallPtrSet<MemberType *, 4>;
+ using MemoryMemberType = MemoryPhi;
+ using MemoryMemberSet = SmallPtrSet<const MemoryMemberType *, 2>;
+
+ explicit CongruenceClass(unsigned ID) : ID(ID) {}
+ CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
+ : ID(ID), RepLeader(Leader), DefiningExpr(E) {}
+ unsigned getID() const { return ID; }
+ // True if this class has no members left. This is mainly used for assertion
+ // purposes, and for skipping empty classes.
+ bool isDead() const {
+ // If it's both dead from a value perspective, and dead from a memory
+ // perspective, it's really dead.
+ return empty() && memory_empty();
+ }
+ // Leader functions
+ Value *getLeader() const { return RepLeader; }
+ void setLeader(Value *Leader) { RepLeader = Leader; }
+ const std::pair<Value *, unsigned int> &getNextLeader() const {
+ return NextLeader;
+ }
+ void resetNextLeader() { NextLeader = {nullptr, ~0}; }
+
+ void addPossibleNextLeader(std::pair<Value *, unsigned int> LeaderPair) {
+ if (LeaderPair.second < NextLeader.second)
+ NextLeader = LeaderPair;
+ }
+
+ Value *getStoredValue() const { return RepStoredValue; }
+ void setStoredValue(Value *Leader) { RepStoredValue = Leader; }
+ const MemoryAccess *getMemoryLeader() const { return RepMemoryAccess; }
+ void setMemoryLeader(const MemoryAccess *Leader) { RepMemoryAccess = Leader; }
+
+ // Forward propagation info
+ const Expression *getDefiningExpr() const { return DefiningExpr; }
+ void setDefiningExpr(const Expression *E) { DefiningExpr = E; }
+
+ // Value member set
+ bool empty() const { return Members.empty(); }
+ unsigned size() const { return Members.size(); }
+ MemberSet::const_iterator begin() const { return Members.begin(); }
+ MemberSet::const_iterator end() const { return Members.end(); }
+ void insert(MemberType *M) { Members.insert(M); }
+ void erase(MemberType *M) { Members.erase(M); }
+ void swap(MemberSet &Other) { Members.swap(Other); }
+
+ // Memory member set
+ bool memory_empty() const { return MemoryMembers.empty(); }
+ unsigned memory_size() const { return MemoryMembers.size(); }
+ MemoryMemberSet::const_iterator memory_begin() const {
+ return MemoryMembers.begin();
+ }
+ MemoryMemberSet::const_iterator memory_end() const {
+ return MemoryMembers.end();
+ }
+ iterator_range<MemoryMemberSet::const_iterator> memory() const {
+ return make_range(memory_begin(), memory_end());
+ }
+ void memory_insert(const MemoryMemberType *M) { MemoryMembers.insert(M); }
+ void memory_erase(const MemoryMemberType *M) { MemoryMembers.erase(M); }
+
+ // Store count
+ unsigned getStoreCount() const { return StoreCount; }
+ void incStoreCount() { ++StoreCount; }
+ void decStoreCount() {
+ assert(StoreCount != 0 && "Store count went negative");
+ --StoreCount;
+ }
+
+ // Return true if two congruence classes are equivalent to each other. This
+ // means
+ // that every field but the ID number and the dead field are equivalent.
+ bool isEquivalentTo(const CongruenceClass *Other) const {
+ if (!Other)
+ return false;
+ if (this == Other)
+ return true;
+
+ if (std::tie(StoreCount, RepLeader, RepStoredValue, RepMemoryAccess) !=
+ std::tie(Other->StoreCount, Other->RepLeader, Other->RepStoredValue,
+ Other->RepMemoryAccess))
+ return false;
+ if (DefiningExpr != Other->DefiningExpr)
+ if (!DefiningExpr || !Other->DefiningExpr ||
+ *DefiningExpr != *Other->DefiningExpr)
+ return false;
+ // We need some ordered set
+ std::set<Value *> AMembers(Members.begin(), Members.end());
+ std::set<Value *> BMembers(Members.begin(), Members.end());
+ return AMembers == BMembers;
+ }
+
+private:
unsigned ID;
// Representative leader.
Value *RepLeader = nullptr;
+ // The most dominating leader after our current leader, because the member set
+ // is not sorted and is expensive to keep sorted all the time.
+ std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U};
+ // If this is represented by a store, the value of the store.
+ Value *RepStoredValue = nullptr;
+ // If this class contains MemoryDefs or MemoryPhis, this is the leading memory
+ // access.
+ const MemoryAccess *RepMemoryAccess = nullptr;
// Defining Expression.
const Expression *DefiningExpr = nullptr;
// Actual members of this class.
MemberSet Members;
-
- // True if this class has no members left. This is mainly used for assertion
- // purposes, and for skipping empty classes.
- bool Dead = false;
-
+ // This is the set of MemoryPhis that exist in the class. MemoryDefs and
+ // MemoryUses have real instructions representing them, so we only need to
+ // track MemoryPhis here.
+ MemoryMemberSet MemoryMembers;
// Number of stores in this congruence class.
// This is used so we can detect store equivalence changes properly.
int StoreCount = 0;
-
- // The most dominating leader after our current leader, because the member set
- // is not sorted and is expensive to keep sorted all the time.
- std::pair<Value *, unsigned int> NextLeader = {nullptr, ~0U};
-
- explicit CongruenceClass(unsigned ID) : ID(ID) {}
- CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
- : ID(ID), RepLeader(Leader), DefiningExpr(E) {}
};
namespace llvm {
@@ -180,19 +388,34 @@ template <> struct DenseMapInfo<const Expression *> {
};
} // end namespace llvm
-class NewGVN : public FunctionPass {
+namespace {
+class NewGVN {
+ Function &F;
DominatorTree *DT;
- const DataLayout *DL;
- const TargetLibraryInfo *TLI;
AssumptionCache *AC;
+ const TargetLibraryInfo *TLI;
AliasAnalysis *AA;
MemorySSA *MSSA;
MemorySSAWalker *MSSAWalker;
+ const DataLayout &DL;
+ std::unique_ptr<PredicateInfo> PredInfo;
BumpPtrAllocator ExpressionAllocator;
ArrayRecycler<Value *> ArgRecycler;
+ TarjanSCC SCCFinder;
+
+ // Number of function arguments, used by ranking
+ unsigned int NumFuncArgs;
+
+ // RPOOrdering of basic blocks
+ DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
// Congruence class info.
- CongruenceClass *InitialClass;
+
+ // This class is called INITIAL in the paper. It is the class everything
+ // startsout in, and represents any value. Being an optimistic analysis,
+ // anything in the TOP class has the value TOP, which is indeterminate and
+ // equivalent to everything.
+ CongruenceClass *TOPClass;
std::vector<CongruenceClass *> CongruenceClasses;
unsigned NextCongruenceNum;
@@ -200,13 +423,38 @@ class NewGVN : public FunctionPass {
DenseMap<Value *, CongruenceClass *> ValueToClass;
DenseMap<Value *, const Expression *> ValueToExpression;
+ // Mapping from predicate info we used to the instructions we used it with.
+ // In order to correctly ensure propagation, we must keep track of what
+ // comparisons we used, so that when the values of the comparisons change, we
+ // propagate the information to the places we used the comparison.
+ DenseMap<const Value *, SmallPtrSet<Instruction *, 2>> PredicateToUsers;
+ // Mapping from MemoryAccess we used to the MemoryAccess we used it with. Has
+ // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for
+ // stores, we no longer can rely solely on the def-use chains of MemorySSA.
+ DenseMap<const MemoryAccess *, SmallPtrSet<MemoryAccess *, 2>> MemoryToUsers;
+
// A table storing which memorydefs/phis represent a memory state provably
// equivalent to another memory state.
// We could use the congruence class machinery, but the MemoryAccess's are
// abstract memory states, so they can only ever be equivalent to each other,
// and not to constants, etc.
- DenseMap<const MemoryAccess *, MemoryAccess *> MemoryAccessEquiv;
-
+ DenseMap<const MemoryAccess *, CongruenceClass *> MemoryAccessToClass;
+
+ // We could, if we wanted, build MemoryPhiExpressions and
+ // MemoryVariableExpressions, etc, and value number them the same way we value
+ // number phi expressions. For the moment, this seems like overkill. They
+ // can only exist in one of three states: they can be TOP (equal to
+ // everything), Equivalent to something else, or unique. Because we do not
+ // create expressions for them, we need to simulate leader change not just
+ // when they change class, but when they change state. Note: We can do the
+ // same thing for phis, and avoid having phi expressions if we wanted, We
+ // should eventually unify in one direction or the other, so this is a little
+ // bit of an experiment in which turns out easier to maintain.
+ enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
+ DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
+
+ enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
+ DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
// Expression to class mapping.
using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
ExpressionClassMap ExpressionToClass;
@@ -231,8 +479,6 @@ class NewGVN : public FunctionPass {
BitVector TouchedInstructions;
DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
- DenseMap<const DomTreeNode *, std::pair<unsigned, unsigned>>
- DominatedInstRange;
#ifndef NDEBUG
// Debugging for how many times each block and instruction got processed.
@@ -240,56 +486,42 @@ class NewGVN : public FunctionPass {
#endif
// DFS info.
- DenseMap<const BasicBlock *, std::pair<int, int>> DFSDomMap;
+ // This contains a mapping from Instructions to DFS numbers.
+ // The numbering starts at 1. An instruction with DFS number zero
+ // means that the instruction is dead.
DenseMap<const Value *, unsigned> InstrDFS;
+
+ // This contains the mapping DFS numbers to instructions.
SmallVector<Value *, 32> DFSToInstr;
// Deletion info.
SmallPtrSet<Instruction *, 8> InstructionsToErase;
public:
- static char ID; // Pass identification, replacement for typeid.
- NewGVN() : FunctionPass(ID) {
- initializeNewGVNPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
- bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
- TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA);
+ NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
+ const DataLayout &DL)
+ : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL),
+ PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {}
+ bool runGVN();
private:
- // This transformation requires dominator postdominator info.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
-
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-
// Expression handling.
- const Expression *createExpression(Instruction *, const BasicBlock *);
- const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *,
- const BasicBlock *);
- PHIExpression *createPHIExpression(Instruction *);
+ const Expression *createExpression(Instruction *);
+ const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *);
+ PHIExpression *createPHIExpression(Instruction *, bool &HasBackedge,
+ bool &AllConstant);
const VariableExpression *createVariableExpression(Value *);
const ConstantExpression *createConstantExpression(Constant *);
- const Expression *createVariableOrConstant(Value *V, const BasicBlock *B);
+ const Expression *createVariableOrConstant(Value *V);
const UnknownExpression *createUnknownExpression(Instruction *);
- const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *,
- const BasicBlock *);
+ const StoreExpression *createStoreExpression(StoreInst *,
+ const MemoryAccess *);
LoadExpression *createLoadExpression(Type *, Value *, LoadInst *,
- MemoryAccess *, const BasicBlock *);
-
- const CallExpression *createCallExpression(CallInst *, MemoryAccess *,
- const BasicBlock *);
- const AggregateValueExpression *
- createAggregateValueExpression(Instruction *, const BasicBlock *);
- bool setBasicExpressionInfo(Instruction *, BasicExpression *,
- const BasicBlock *);
+ const MemoryAccess *);
+ const CallExpression *createCallExpression(CallInst *, const MemoryAccess *);
+ const AggregateValueExpression *createAggregateValueExpression(Instruction *);
+ bool setBasicExpressionInfo(Instruction *, BasicExpression *);
// Congruence class handling.
CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) {
@@ -298,9 +530,21 @@ private:
return result;
}
+ CongruenceClass *createMemoryClass(MemoryAccess *MA) {
+ auto *CC = createCongruenceClass(nullptr, nullptr);
+ CC->setMemoryLeader(MA);
+ return CC;
+ }
+ CongruenceClass *ensureLeaderOfMemoryClass(MemoryAccess *MA) {
+ auto *CC = getMemoryClass(MA);
+ if (CC->getMemoryLeader() != MA)
+ CC = createMemoryClass(MA);
+ return CC;
+ }
+
CongruenceClass *createSingletonCongruenceClass(Value *Member) {
CongruenceClass *CClass = createCongruenceClass(Member, nullptr);
- CClass->Members.insert(Member);
+ CClass->insert(Member);
ValueToClass[Member] = CClass;
return CClass;
}
@@ -313,37 +557,49 @@ private:
// Symbolic evaluation.
const Expression *checkSimplificationResults(Expression *, Instruction *,
Value *);
- const Expression *performSymbolicEvaluation(Value *, const BasicBlock *);
- const Expression *performSymbolicLoadEvaluation(Instruction *,
- const BasicBlock *);
- const Expression *performSymbolicStoreEvaluation(Instruction *,
- const BasicBlock *);
- const Expression *performSymbolicCallEvaluation(Instruction *,
- const BasicBlock *);
- const Expression *performSymbolicPHIEvaluation(Instruction *,
- const BasicBlock *);
- bool setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To);
- const Expression *performSymbolicAggrValueEvaluation(Instruction *,
- const BasicBlock *);
+ const Expression *performSymbolicEvaluation(Value *);
+ const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
+ Instruction *, MemoryAccess *);
+ const Expression *performSymbolicLoadEvaluation(Instruction *);
+ const Expression *performSymbolicStoreEvaluation(Instruction *);
+ const Expression *performSymbolicCallEvaluation(Instruction *);
+ const Expression *performSymbolicPHIEvaluation(Instruction *);
+ const Expression *performSymbolicAggrValueEvaluation(Instruction *);
+ const Expression *performSymbolicCmpEvaluation(Instruction *);
+ const Expression *performSymbolicPredicateInfoEvaluation(Instruction *);
// Congruence finding.
- // Templated to allow them to work both on BB's and BB-edges.
- template <class T>
- Value *lookupOperandLeader(Value *, const User *, const T &) const;
+ bool someEquivalentDominates(const Instruction *, const Instruction *) const;
+ Value *lookupOperandLeader(Value *) const;
void performCongruenceFinding(Instruction *, const Expression *);
- void moveValueToNewCongruenceClass(Instruction *, CongruenceClass *,
- CongruenceClass *);
+ void moveValueToNewCongruenceClass(Instruction *, const Expression *,
+ CongruenceClass *, CongruenceClass *);
+ void moveMemoryToNewCongruenceClass(Instruction *, MemoryAccess *,
+ CongruenceClass *, CongruenceClass *);
+ Value *getNextValueLeader(CongruenceClass *) const;
+ const MemoryAccess *getNextMemoryLeader(CongruenceClass *) const;
+ bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
+ CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
+ const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
+ bool isMemoryAccessTop(const MemoryAccess *) const;
+
+ // Ranking
+ unsigned int getRank(const Value *) const;
+ bool shouldSwapOperands(const Value *, const Value *) const;
+
// Reachability handling.
void updateReachableEdge(BasicBlock *, BasicBlock *);
void processOutgoingEdges(TerminatorInst *, BasicBlock *);
- bool isOnlyReachableViaThisEdge(const BasicBlockEdge &) const;
- Value *findConditionEquivalence(Value *, BasicBlock *) const;
- MemoryAccess *lookupMemoryAccessEquiv(MemoryAccess *) const;
+ Value *findConditionEquivalence(Value *) const;
// Elimination.
struct ValueDFS;
- void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
- SmallVectorImpl<ValueDFS> &);
+ void convertClassToDFSOrdered(const CongruenceClass &,
+ SmallVectorImpl<ValueDFS> &,
+ DenseMap<const Value *, unsigned int> &,
+ SmallPtrSetImpl<Instruction *> &) const;
+ void convertClassToLoadsAndStores(const CongruenceClass &,
+ SmallVectorImpl<ValueDFS> &) const;
bool eliminateInstructions(Function &);
void replaceInstruction(Instruction *, Value *);
@@ -355,35 +611,58 @@ private:
// Various instruction touch utilities
void markUsersTouched(Value *);
- void markMemoryUsersTouched(MemoryAccess *);
- void markLeaderChangeTouched(CongruenceClass *CC);
+ void markMemoryUsersTouched(const MemoryAccess *);
+ void markMemoryDefTouched(const MemoryAccess *);
+ void markPredicateUsersTouched(Instruction *);
+ void markValueLeaderChangeTouched(CongruenceClass *CC);
+ void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+ void addPredicateUsers(const PredicateBase *, Instruction *);
+ void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U);
+
+ // Main loop of value numbering
+ void iterateTouchedInstructions();
// Utilities.
void cleanupTables();
std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
void updateProcessedCount(Value *V);
void verifyMemoryCongruency() const;
+ void verifyIterationSettled(Function &F);
bool singleReachablePHIPath(const MemoryAccess *, const MemoryAccess *) const;
-};
-
-char NewGVN::ID = 0;
+ BasicBlock *getBlockForValue(Value *V) const;
+ void deleteExpression(const Expression *E);
+ unsigned InstrToDFSNum(const Value *V) const {
+ assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
+ return InstrDFS.lookup(V);
+ }
-// createGVNPass - The public interface to this file.
-FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); }
+ unsigned InstrToDFSNum(const MemoryAccess *MA) const {
+ return MemoryToDFSNum(MA);
+ }
+ Value *InstrFromDFSNum(unsigned DFSNum) { return DFSToInstr[DFSNum]; }
+ // Given a MemoryAccess, return the relevant instruction DFS number. Note:
+ // This deliberately takes a value so it can be used with Use's, which will
+ // auto-convert to Value's but not to MemoryAccess's.
+ unsigned MemoryToDFSNum(const Value *MA) const {
+ assert(isa<MemoryAccess>(MA) &&
+ "This should not be used with instructions");
+ return isa<MemoryUseOrDef>(MA)
+ ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
+ : InstrDFS.lookup(MA);
+ }
+ bool isCycleFree(const PHINode *PN);
+ template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+ // Debug counter info. When verifying, we have to reset the value numbering
+ // debug counter to the same state it started in to get the same results.
+ std::pair<int, int> StartingVNCounter;
+};
+} // end anonymous namespace
template <typename T>
static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) {
- if ((!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS)) ||
- !LHS.BasicExpression::equals(RHS)) {
+ if (!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS))
return false;
- } else if (const auto *L = dyn_cast<LoadExpression>(&RHS)) {
- if (LHS.getDefiningAccess() != L->getDefiningAccess())
- return false;
- } else if (const auto *S = dyn_cast<StoreExpression>(&RHS)) {
- if (LHS.getDefiningAccess() != S->getDefiningAccess())
- return false;
- }
- return true;
+ return LHS.MemoryExpression::equals(RHS);
}
bool LoadExpression::equals(const Expression &Other) const {
@@ -391,7 +670,13 @@ bool LoadExpression::equals(const Expression &Other) const {
}
bool StoreExpression::equals(const Expression &Other) const {
- return equalsLoadStoreHelper(*this, Other);
+ if (!equalsLoadStoreHelper(*this, Other))
+ return false;
+ // Make sure that store vs store includes the value operand.
+ if (const auto *S = dyn_cast<StoreExpression>(&Other))
+ if (getStoredValue() != S->getStoredValue())
+ return false;
+ return true;
}
#ifndef NDEBUG
@@ -400,16 +685,28 @@ static std::string getBlockName(const BasicBlock *B) {
}
#endif
-INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)
+// Get the basic block from an instruction/memory value.
+BasicBlock *NewGVN::getBlockForValue(Value *V) const {
+ if (auto *I = dyn_cast<Instruction>(V))
+ return I->getParent();
+ else if (auto *MP = dyn_cast<MemoryPhi>(V))
+ return MP->getBlock();
+ llvm_unreachable("Should have been able to figure out a block for our value");
+ return nullptr;
+}
-PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
+// Delete a definitely dead expression, so it can be reused by the expression
+// allocator. Some of these are not in creation functions, so we have to accept
+// const versions.
+void NewGVN::deleteExpression(const Expression *E) {
+ assert(isa<BasicExpression>(E));
+ auto *BE = cast<BasicExpression>(E);
+ const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
+ ExpressionAllocator.Deallocate(E);
+}
+
+PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
+ bool &AllConstant) {
BasicBlock *PHIBlock = I->getParent();
auto *PN = cast<PHINode>(I);
auto *E =
@@ -419,28 +716,32 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
E->setType(I->getType());
E->setOpcode(I->getOpcode());
- auto ReachablePhiArg = [&](const Use &U) {
- return ReachableBlocks.count(PN->getIncomingBlock(U));
- };
+ unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
- // Filter out unreachable operands
- auto Filtered = make_filter_range(PN->operands(), ReachablePhiArg);
+ // Filter out unreachable phi operands.
+ auto Filtered = make_filter_range(PN->operands(), [&](const Use &U) {
+ return ReachableEdges.count({PN->getIncomingBlock(U), PHIBlock});
+ });
std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
[&](const Use &U) -> Value * {
+ auto *BB = PN->getIncomingBlock(U);
+ auto *DTN = DT->getNode(BB);
+ if (RPOOrdering.lookup(DTN) >= PHIRPO)
+ HasBackedge = true;
+ AllConstant &= isa<UndefValue>(U) || isa<Constant>(U);
+
// Don't try to transform self-defined phis.
if (U == PN)
return PN;
- const BasicBlockEdge BBE(PN->getIncomingBlock(U), PHIBlock);
- return lookupOperandLeader(U, I, BBE);
+ return lookupOperandLeader(U);
});
return E;
}
// Set basic expression info (Arguments, type, opcode) for Expression
// E from Instruction I in block B.
-bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
- const BasicBlock *B) {
+bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E) {
bool AllConstant = true;
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
E->setType(GEP->getSourceElementType());
@@ -452,7 +753,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
// Transform the operand array into an operand leader array, and keep track of
// whether all members are constant.
std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
- auto Operand = lookupOperandLeader(O, I, B);
+ auto Operand = lookupOperandLeader(O);
AllConstant &= isa<Constant>(Operand);
return Operand;
});
@@ -461,8 +762,7 @@ bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
}
const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
- Value *Arg1, Value *Arg2,
- const BasicBlock *B) {
+ Value *Arg1, Value *Arg2) {
auto *E = new (ExpressionAllocator) BasicExpression(2);
E->setType(T);
@@ -473,13 +773,13 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
// of their operands get the same value number by sorting the operand value
// numbers. Since all commutative instructions have two operands it is more
// efficient to sort by hand rather than using, say, std::sort.
- if (Arg1 > Arg2)
+ if (shouldSwapOperands(Arg1, Arg2))
std::swap(Arg1, Arg2);
}
- E->op_push_back(lookupOperandLeader(Arg1, nullptr, B));
- E->op_push_back(lookupOperandLeader(Arg2, nullptr, B));
+ E->op_push_back(lookupOperandLeader(Arg1));
+ E->op_push_back(lookupOperandLeader(Arg2));
- Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI,
+ Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI,
DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
return SimplifiedE;
@@ -502,40 +802,32 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E,
NumGVNOpsSimplified++;
assert(isa<BasicExpression>(E) &&
"We should always have had a basic expression here");
-
- cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
+ deleteExpression(E);
return createConstantExpression(C);
} else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
<< " variable " << *V << "\n");
- cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
+ deleteExpression(E);
return createVariableExpression(V);
}
CongruenceClass *CC = ValueToClass.lookup(V);
- if (CC && CC->DefiningExpr) {
+ if (CC && CC->getDefiningExpr()) {
if (I)
DEBUG(dbgs() << "Simplified " << *I << " to "
<< " expression " << *V << "\n");
NumGVNOpsSimplified++;
- assert(isa<BasicExpression>(E) &&
- "We should always have had a basic expression here");
- cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
- return CC->DefiningExpr;
+ deleteExpression(E);
+ return CC->getDefiningExpr();
}
return nullptr;
}
-const Expression *NewGVN::createExpression(Instruction *I,
- const BasicBlock *B) {
-
+const Expression *NewGVN::createExpression(Instruction *I) {
auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());
- bool AllConstant = setBasicExpressionInfo(I, E, B);
+ bool AllConstant = setBasicExpressionInfo(I, E);
if (I->isCommutative()) {
// Ensure that commutative instructions that only differ by a permutation
@@ -543,7 +835,7 @@ const Expression *NewGVN::createExpression(Instruction *I,
// numbers. Since all commutative instructions have two operands it is more
// efficient to sort by hand rather than using, say, std::sort.
assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
- if (E->getOperand(0) > E->getOperand(1))
+ if (shouldSwapOperands(E->getOperand(0), E->getOperand(1)))
E->swapOperands(0, 1);
}
@@ -559,48 +851,43 @@ const Expression *NewGVN::createExpression(Instruction *I,
// Sort the operand value numbers so x<y and y>x get the same value
// number.
CmpInst::Predicate Predicate = CI->getPredicate();
- if (E->getOperand(0) > E->getOperand(1)) {
+ if (shouldSwapOperands(E->getOperand(0), E->getOperand(1))) {
E->swapOperands(0, 1);
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
E->setOpcode((CI->getOpcode() << 8) | Predicate);
// TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands
- // TODO: Since we noop bitcasts, we may need to check types before
- // simplifying, so that we don't end up simplifying based on a wrong
- // type assumption. We should clean this up so we can use constants of the
- // wrong type
-
assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() &&
"Wrong types on cmp instruction");
- if ((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
- E->getOperand(1)->getType() == I->getOperand(1)->getType())) {
- Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
- *DL, TLI, DT, AC);
- if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
- return SimplifiedE;
- }
+ assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
+ E->getOperand(1)->getType() == I->getOperand(1)->getType()));
+ Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
+ DL, TLI, DT, AC);
+ if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+ return SimplifiedE;
} else if (isa<SelectInst>(I)) {
if (isa<Constant>(E->getOperand(0)) ||
- (E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
- E->getOperand(2)->getType() == I->getOperand(2)->getType())) {
+ E->getOperand(0) == E->getOperand(1)) {
+ assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
+ E->getOperand(2)->getType() == I->getOperand(2)->getType());
Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
- E->getOperand(2), *DL, TLI, DT, AC);
+ E->getOperand(2), DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
}
} else if (I->isBinaryOp()) {
Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
- *DL, TLI, DT, AC);
+ DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (auto *BI = dyn_cast<BitCastInst>(I)) {
- Value *V = SimplifyInstruction(BI, *DL, TLI, DT, AC);
+ Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (isa<GetElementPtrInst>(I)) {
Value *V = SimplifyGEPInst(E->getType(),
ArrayRef<Value *>(E->op_begin(), E->op_end()),
- *DL, TLI, DT, AC);
+ DL, TLI, DT, AC);
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
} else if (AllConstant) {
@@ -615,7 +902,7 @@ const Expression *NewGVN::createExpression(Instruction *I,
for (Value *Arg : E->operands())
C.emplace_back(cast<Constant>(Arg));
- if (Value *V = ConstantFoldInstOperands(I, C, *DL, TLI))
+ if (Value *V = ConstantFoldInstOperands(I, C, DL, TLI))
if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
return SimplifiedE;
}
@@ -623,18 +910,18 @@ const Expression *NewGVN::createExpression(Instruction *I,
}
const AggregateValueExpression *
-NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) {
+NewGVN::createAggregateValueExpression(Instruction *I) {
if (auto *II = dyn_cast<InsertValueInst>(I)) {
auto *E = new (ExpressionAllocator)
AggregateValueExpression(I->getNumOperands(), II->getNumIndices());
- setBasicExpressionInfo(I, E, B);
+ setBasicExpressionInfo(I, E);
E->allocateIntOperands(ExpressionAllocator);
std::copy(II->idx_begin(), II->idx_end(), int_op_inserter(E));
return E;
} else if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
auto *E = new (ExpressionAllocator)
AggregateValueExpression(I->getNumOperands(), EI->getNumIndices());
- setBasicExpressionInfo(EI, E, B);
+ setBasicExpressionInfo(EI, E);
E->allocateIntOperands(ExpressionAllocator);
std::copy(EI->idx_begin(), EI->idx_end(), int_op_inserter(E));
return E;
@@ -648,12 +935,10 @@ const VariableExpression *NewGVN::createVariableExpression(Value *V) {
return E;
}
-const Expression *NewGVN::createVariableOrConstant(Value *V,
- const BasicBlock *B) {
- auto Leader = lookupOperandLeader(V, nullptr, B);
- if (auto *C = dyn_cast<Constant>(Leader))
+const Expression *NewGVN::createVariableOrConstant(Value *V) {
+ if (auto *C = dyn_cast<Constant>(V))
return createConstantExpression(C);
- return createVariableExpression(Leader);
+ return createVariableExpression(V);
}
const ConstantExpression *NewGVN::createConstantExpression(Constant *C) {
@@ -669,40 +954,90 @@ const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) {
}
const CallExpression *NewGVN::createCallExpression(CallInst *CI,
- MemoryAccess *HV,
- const BasicBlock *B) {
+ const MemoryAccess *MA) {
// FIXME: Add operand bundles for calls.
auto *E =
- new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, HV);
- setBasicExpressionInfo(CI, E, B);
+ new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA);
+ setBasicExpressionInfo(CI, E);
return E;
}
+// Return true if some equivalent of instruction Inst dominates instruction U.
+bool NewGVN::someEquivalentDominates(const Instruction *Inst,
+ const Instruction *U) const {
+ auto *CC = ValueToClass.lookup(Inst);
+ // This must be an instruction because we are only called from phi nodes
+ // in the case that the value it needs to check against is an instruction.
+
+ // The most likely candiates for dominance are the leader and the next leader.
+ // The leader or nextleader will dominate in all cases where there is an
+ // equivalent that is higher up in the dom tree.
+ // We can't *only* check them, however, because the
+ // dominator tree could have an infinite number of non-dominating siblings
+ // with instructions that are in the right congruence class.
+ // A
+ // B C D E F G
+ // |
+ // H
+ // Instruction U could be in H, with equivalents in every other sibling.
+ // Depending on the rpo order picked, the leader could be the equivalent in
+ // any of these siblings.
+ if (!CC)
+ return false;
+ if (DT->dominates(cast<Instruction>(CC->getLeader()), U))
+ return true;
+ if (CC->getNextLeader().first &&
+ DT->dominates(cast<Instruction>(CC->getNextLeader().first), U))
+ return true;
+ return llvm::any_of(*CC, [&](const Value *Member) {
+ return Member != CC->getLeader() &&
+ DT->dominates(cast<Instruction>(Member), U);
+ });
+}
+
// See if we have a congruence class and leader for this operand, and if so,
// return it. Otherwise, return the operand itself.
-template <class T>
-Value *NewGVN::lookupOperandLeader(Value *V, const User *U, const T &B) const {
+Value *NewGVN::lookupOperandLeader(Value *V) const {
CongruenceClass *CC = ValueToClass.lookup(V);
- if (CC && (CC != InitialClass))
- return CC->RepLeader;
+ if (CC) {
+ // Everything in TOP is represneted by undef, as it can be any value.
+ // We do have to make sure we get the type right though, so we can't set the
+ // RepLeader to undef.
+ if (CC == TOPClass)
+ return UndefValue::get(V->getType());
+ return CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader();
+ }
+
return V;
}
-MemoryAccess *NewGVN::lookupMemoryAccessEquiv(MemoryAccess *MA) const {
- MemoryAccess *Result = MemoryAccessEquiv.lookup(MA);
- return Result ? Result : MA;
+const MemoryAccess *NewGVN::lookupMemoryLeader(const MemoryAccess *MA) const {
+ auto *CC = getMemoryClass(MA);
+ assert(CC->getMemoryLeader() &&
+ "Every MemoryAccess should be mapped to a "
+ "congruence class with a represenative memory "
+ "access");
+ return CC->getMemoryLeader();
+}
+
+// Return true if the MemoryAccess is really equivalent to everything. This is
+// equivalent to the lattice value "TOP" in most lattices. This is the initial
+// state of all MemoryAccesses.
+bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+ return getMemoryClass(MA) == TOPClass;
}
LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
- LoadInst *LI, MemoryAccess *DA,
- const BasicBlock *B) {
- auto *E = new (ExpressionAllocator) LoadExpression(1, LI, DA);
+ LoadInst *LI,
+ const MemoryAccess *MA) {
+ auto *E =
+ new (ExpressionAllocator) LoadExpression(1, LI, lookupMemoryLeader(MA));
E->allocateOperands(ArgRecycler, ExpressionAllocator);
E->setType(LoadType);
// Give store and loads same opcode so they value number together.
E->setOpcode(0);
- E->op_push_back(lookupOperandLeader(PointerOp, LI, B));
+ E->op_push_back(PointerOp);
if (LI)
E->setAlignment(LI->getAlignment());
@@ -713,16 +1048,16 @@ LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
}
const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
- MemoryAccess *DA,
- const BasicBlock *B) {
- auto *E =
- new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, DA);
+ const MemoryAccess *MA) {
+ auto *StoredValueLeader = lookupOperandLeader(SI->getValueOperand());
+ auto *E = new (ExpressionAllocator)
+ StoreExpression(SI->getNumOperands(), SI, StoredValueLeader, MA);
E->allocateOperands(ArgRecycler, ExpressionAllocator);
E->setType(SI->getValueOperand()->getType());
// Give store and loads same opcode so they value number together.
E->setOpcode(0);
- E->op_push_back(lookupOperandLeader(SI->getPointerOperand(), SI, B));
+ E->op_push_back(lookupOperandLeader(SI->getPointerOperand()));
// TODO: Value number heap versions. We may be able to discover
// things alias analysis can't on it's own (IE that a store and a
@@ -730,44 +1065,140 @@ const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
return E;
}
-// Utility function to check whether the congruence class has a member other
-// than the given instruction.
-bool hasMemberOtherThanUs(const CongruenceClass *CC, Instruction *I) {
- // Either it has more than one store, in which case it must contain something
- // other than us (because it's indexed by value), or if it only has one store
- // right now, that member should not be us.
- return CC->StoreCount > 1 || CC->Members.count(I) == 0;
-}
-
-const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) {
// Unlike loads, we never try to eliminate stores, so we do not check if they
// are simple and avoid value numbering them.
auto *SI = cast<StoreInst>(I);
- MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
- // See if we are defined by a previous store expression, it already has a
- // value, and it's the same value as our current store. FIXME: Right now, we
- // only do this for simple stores, we should expand to cover memcpys, etc.
+ auto *StoreAccess = MSSA->getMemoryAccess(SI);
+ // Get the expression, if any, for the RHS of the MemoryDef.
+ const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
+ if (EnableStoreRefinement)
+ StoreRHS = MSSAWalker->getClobberingMemoryAccess(StoreAccess);
+ // If we bypassed the use-def chains, make sure we add a use.
+ if (StoreRHS != StoreAccess->getDefiningAccess())
+ addMemoryUsers(StoreRHS, StoreAccess);
+
+ StoreRHS = lookupMemoryLeader(StoreRHS);
+ // If we are defined by ourselves, use the live on entry def.
+ if (StoreRHS == StoreAccess)
+ StoreRHS = MSSA->getLiveOnEntryDef();
+
if (SI->isSimple()) {
- // Get the expression, if any, for the RHS of the MemoryDef.
- MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
- cast<MemoryDef>(StoreAccess)->getDefiningAccess());
- const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
- CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
+ // See if we are defined by a previous store expression, it already has a
+ // value, and it's the same value as our current store. FIXME: Right now, we
+ // only do this for simple stores, we should expand to cover memcpys, etc.
+ const auto *LastStore = createStoreExpression(SI, StoreRHS);
+ const auto *LastCC = ExpressionToClass.lookup(LastStore);
// Basically, check if the congruence class the store is in is defined by a
// store that isn't us, and has the same value. MemorySSA takes care of
// ensuring the store has the same memory state as us already.
- if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
- CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B) &&
- hasMemberOtherThanUs(CC, I))
- return createStoreExpression(SI, StoreRHS, B);
+ // The RepStoredValue gets nulled if all the stores disappear in a class, so
+ // we don't need to check if the class contains a store besides us.
+ if (LastCC &&
+ LastCC->getStoredValue() == lookupOperandLeader(SI->getValueOperand()))
+ return LastStore;
+ deleteExpression(LastStore);
+ // Also check if our value operand is defined by a load of the same memory
+ // location, and the memory state is the same as it was then (otherwise, it
+ // could have been overwritten later. See test32 in
+ // transforms/DeadStoreElimination/simple.ll).
+ if (auto *LI =
+ dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
+ if ((lookupOperandLeader(LI->getPointerOperand()) ==
+ lookupOperandLeader(SI->getPointerOperand())) &&
+ (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+ StoreRHS))
+ return createVariableExpression(LI);
+ }
}
- return createStoreExpression(SI, StoreAccess, B);
+ // If the store is not equivalent to anything, value number it as a store that
+ // produces a unique memory state (instead of using it's MemoryUse, we use
+ // it's MemoryDef).
+ return createStoreExpression(SI, StoreAccess);
}
-const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
- const BasicBlock *B) {
+// See if we can extract the value of a loaded pointer from a load, a store, or
+// a memory instruction.
+const Expression *
+NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
+ LoadInst *LI, Instruction *DepInst,
+ MemoryAccess *DefiningAccess) {
+ assert((!LI || LI->isSimple()) && "Not a simple load");
+ if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ // Also don't need to coerce if they are the same type, we will just
+ // propogate..
+ if (LI->isAtomic() > DepSI->isAtomic() ||
+ LoadType == DepSI->getValueOperand()->getType())
+ return nullptr;
+ int Offset = analyzeLoadFromClobberingStore(LoadType, LoadPtr, DepSI, DL);
+ if (Offset >= 0) {
+ if (auto *C = dyn_cast<Constant>(
+ lookupOperandLeader(DepSI->getValueOperand()))) {
+ DEBUG(dbgs() << "Coercing load from store " << *DepSI << " to constant "
+ << *C << "\n");
+ return createConstantExpression(
+ getConstantStoreValueForLoad(C, Offset, LoadType, DL));
+ }
+ }
+
+ } else if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+ // Can't forward from non-atomic to atomic without violating memory model.
+ if (LI->isAtomic() > DepLI->isAtomic())
+ return nullptr;
+ int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL);
+ if (Offset >= 0) {
+ // We can coerce a constant load into a load
+ if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI)))
+ if (auto *PossibleConstant =
+ getConstantLoadValueForLoad(C, Offset, LoadType, DL)) {
+ DEBUG(dbgs() << "Coercing load from load " << *LI << " to constant "
+ << *PossibleConstant << "\n");
+ return createConstantExpression(PossibleConstant);
+ }
+ }
+
+ } else if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
+ int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL);
+ if (Offset >= 0) {
+ if (auto *PossibleConstant =
+ getConstantMemInstValueForLoad(DepMI, Offset, LoadType, DL)) {
+ DEBUG(dbgs() << "Coercing load from meminst " << *DepMI
+ << " to constant " << *PossibleConstant << "\n");
+ return createConstantExpression(PossibleConstant);
+ }
+ }
+ }
+
+ // All of the below are only true if the loaded pointer is produced
+ // by the dependent instruction.
+ if (LoadPtr != lookupOperandLeader(DepInst) &&
+ !AA->isMustAlias(LoadPtr, DepInst))
+ return nullptr;
+ // If this load really doesn't depend on anything, then we must be loading an
+ // undef value. This can happen when loading for a fresh allocation with no
+ // intervening stores, for example. Note that this is only true in the case
+ // that the result of the allocation is pointer equal to the load ptr.
+ if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
+ return createConstantExpression(UndefValue::get(LoadType));
+ }
+ // If this load occurs either right after a lifetime begin,
+ // then the loaded value is undefined.
+ else if (auto *II = dyn_cast<IntrinsicInst>(DepInst)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ return createConstantExpression(UndefValue::get(LoadType));
+ }
+ // If this load follows a calloc (which zero initializes memory),
+ // then the loaded value is zero
+ else if (isCallocLikeFn(DepInst, TLI)) {
+ return createConstantExpression(Constant::getNullValue(LoadType));
+ }
+
+ return nullptr;
+}
+
+const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) {
auto *LI = cast<LoadInst>(I);
// We can eliminate in favor of non-simple loads, but we won't be able to
@@ -775,7 +1206,7 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
if (!LI->isSimple())
return nullptr;
- Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand(), I, B);
+ Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand());
// Load of undef is undef.
if (isa<UndefValue>(LoadAddressLeader))
return createConstantExpression(UndefValue::get(LI->getType()));
@@ -788,61 +1219,233 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
// If the defining instruction is not reachable, replace with undef.
if (!ReachableBlocks.count(DefiningInst->getParent()))
return createConstantExpression(UndefValue::get(LI->getType()));
+ // This will handle stores and memory insts. We only do if it the
+ // defining access has a different type, or it is a pointer produced by
+ // certain memory operations that cause the memory to have a fixed value
+ // (IE things like calloc).
+ if (const auto *CoercionResult =
+ performSymbolicLoadCoercion(LI->getType(), LoadAddressLeader, LI,
+ DefiningInst, DefiningAccess))
+ return CoercionResult;
}
}
- const Expression *E =
- createLoadExpression(LI->getType(), LI->getPointerOperand(), LI,
- lookupMemoryAccessEquiv(DefiningAccess), B);
+ const Expression *E = createLoadExpression(LI->getType(), LoadAddressLeader,
+ LI, DefiningAccess);
return E;
}
+const Expression *
+NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) {
+ auto *PI = PredInfo->getPredicateInfoFor(I);
+ if (!PI)
+ return nullptr;
+
+ DEBUG(dbgs() << "Found predicate info from instruction !\n");
+
+ auto *PWC = dyn_cast<PredicateWithCondition>(PI);
+ if (!PWC)
+ return nullptr;
+
+ auto *CopyOf = I->getOperand(0);
+ auto *Cond = PWC->Condition;
+
+ // If this a copy of the condition, it must be either true or false depending
+ // on the predicate info type and edge
+ if (CopyOf == Cond) {
+ // We should not need to add predicate users because the predicate info is
+ // already a use of this operand.
+ if (isa<PredicateAssume>(PI))
+ return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
+ if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
+ if (PBranch->TrueEdge)
+ return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
+ return createConstantExpression(ConstantInt::getFalse(Cond->getType()));
+ }
+ if (auto *PSwitch = dyn_cast<PredicateSwitch>(PI))
+ return createConstantExpression(cast<Constant>(PSwitch->CaseValue));
+ }
+
+ // Not a copy of the condition, so see what the predicates tell us about this
+ // value. First, though, we check to make sure the value is actually a copy
+ // of one of the condition operands. It's possible, in certain cases, for it
+ // to be a copy of a predicateinfo copy. In particular, if two branch
+ // operations use the same condition, and one branch dominates the other, we
+ // will end up with a copy of a copy. This is currently a small deficiency in
+ // predicateinfo. What will end up happening here is that we will value
+ // number both copies the same anyway.
+
+ // Everything below relies on the condition being a comparison.
+ auto *Cmp = dyn_cast<CmpInst>(Cond);
+ if (!Cmp)
+ return nullptr;
+
+ if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) {
+ DEBUG(dbgs() << "Copy is not of any condition operands!");
+ return nullptr;
+ }
+ Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0));
+ Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1));
+ bool SwappedOps = false;
+ // Sort the ops
+ if (shouldSwapOperands(FirstOp, SecondOp)) {
+ std::swap(FirstOp, SecondOp);
+ SwappedOps = true;
+ }
+ CmpInst::Predicate Predicate =
+ SwappedOps ? Cmp->getSwappedPredicate() : Cmp->getPredicate();
+
+ if (isa<PredicateAssume>(PI)) {
+ // If the comparison is true when the operands are equal, then we know the
+ // operands are equal, because assumes must always be true.
+ if (CmpInst::isTrueWhenEqual(Predicate)) {
+ addPredicateUsers(PI, I);
+ return createVariableOrConstant(FirstOp);
+ }
+ }
+ if (const auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
+ // If we are *not* a copy of the comparison, we may equal to the other
+ // operand when the predicate implies something about equality of
+ // operations. In particular, if the comparison is true/false when the
+ // operands are equal, and we are on the right edge, we know this operation
+ // is equal to something.
+ if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
+ (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
+ addPredicateUsers(PI, I);
+ return createVariableOrConstant(FirstOp);
+ }
+ // Handle the special case of floating point.
+ if (((PBranch->TrueEdge && Predicate == CmpInst::FCMP_OEQ) ||
+ (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
+ isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
+ addPredicateUsers(PI, I);
+ return createConstantExpression(cast<Constant>(FirstOp));
+ }
+ }
+ return nullptr;
+}
+
// Evaluate read only and pure calls, and create an expression result.
-const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) {
auto *CI = cast<CallInst>(I);
- if (AA->doesNotAccessMemory(CI))
- return createCallExpression(CI, nullptr, B);
- if (AA->onlyReadsMemory(CI)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ // Instrinsics with the returned attribute are copies of arguments.
+ if (auto *ReturnedValue = II->getReturnedArgOperand()) {
+ if (II->getIntrinsicID() == Intrinsic::ssa_copy)
+ if (const auto *Result = performSymbolicPredicateInfoEvaluation(I))
+ return Result;
+ return createVariableOrConstant(ReturnedValue);
+ }
+ }
+ if (AA->doesNotAccessMemory(CI)) {
+ return createCallExpression(CI, TOPClass->getMemoryLeader());
+ } else if (AA->onlyReadsMemory(CI)) {
MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI);
- return createCallExpression(CI, lookupMemoryAccessEquiv(DefiningAccess), B);
+ return createCallExpression(CI, DefiningAccess);
}
return nullptr;
}
-// Update the memory access equivalence table to say that From is equal to To,
+// Retrieve the memory class for a given MemoryAccess.
+CongruenceClass *NewGVN::getMemoryClass(const MemoryAccess *MA) const {
+
+ auto *Result = MemoryAccessToClass.lookup(MA);
+ assert(Result && "Should have found memory class");
+ return Result;
+}
+
+// Update the MemoryAccess equivalence table to say that From is equal to To,
// and return true if this is different from what already existed in the table.
-bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) {
- DEBUG(dbgs() << "Setting " << *From << " equivalent to ");
- if (!To)
- DEBUG(dbgs() << "itself");
- else
- DEBUG(dbgs() << *To);
+bool NewGVN::setMemoryClass(const MemoryAccess *From,
+ CongruenceClass *NewClass) {
+ assert(NewClass &&
+ "Every MemoryAccess should be getting mapped to a non-null class");
+ DEBUG(dbgs() << "Setting " << *From);
+ DEBUG(dbgs() << " equivalent to congruence class ");
+ DEBUG(dbgs() << NewClass->getID() << " with current MemoryAccess leader ");
+ DEBUG(dbgs() << *NewClass->getMemoryLeader());
DEBUG(dbgs() << "\n");
- auto LookupResult = MemoryAccessEquiv.find(From);
+
+ auto LookupResult = MemoryAccessToClass.find(From);
bool Changed = false;
// If it's already in the table, see if the value changed.
- if (LookupResult != MemoryAccessEquiv.end()) {
- if (To && LookupResult->second != To) {
+ if (LookupResult != MemoryAccessToClass.end()) {
+ auto *OldClass = LookupResult->second;
+ if (OldClass != NewClass) {
+ // If this is a phi, we have to handle memory member updates.
+ if (auto *MP = dyn_cast<MemoryPhi>(From)) {
+ OldClass->memory_erase(MP);
+ NewClass->memory_insert(MP);
+ // This may have killed the class if it had no non-memory members
+ if (OldClass->getMemoryLeader() == From) {
+ if (OldClass->memory_empty()) {
+ OldClass->setMemoryLeader(nullptr);
+ } else {
+ OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));
+ DEBUG(dbgs() << "Memory class leader change for class "
+ << OldClass->getID() << " to "
+ << *OldClass->getMemoryLeader()
+ << " due to removal of a memory member " << *From
+ << "\n");
+ markMemoryLeaderChangeTouched(OldClass);
+ }
+ }
+ }
// It wasn't equivalent before, and now it is.
- LookupResult->second = To;
- Changed = true;
- } else if (!To) {
- // It used to be equivalent to something, and now it's not.
- MemoryAccessEquiv.erase(LookupResult);
+ LookupResult->second = NewClass;
Changed = true;
}
- } else {
- assert(!To &&
- "Memory equivalence should never change from nothing to something");
}
return Changed;
}
+
+// Determine if a phi is cycle-free. That means the values in the phi don't
+// depend on any expressions that can change value as a result of the phi.
+// For example, a non-cycle free phi would be v = phi(0, v+1).
+bool NewGVN::isCycleFree(const PHINode *PN) {
+ // In order to compute cycle-freeness, we do SCC finding on the phi, and see
+ // what kind of SCC it ends up in. If it is a singleton, it is cycle-free.
+ // If it is not in a singleton, it is only cycle free if the other members are
+ // all phi nodes (as they do not compute anything, they are copies). TODO:
+ // There are likely a few other intrinsics or expressions that could be
+ // included here, but this happens so infrequently already that it is not
+ // likely to be worth it.
+ auto PCS = PhiCycleState.lookup(PN);
+ if (PCS == PCS_Unknown) {
+ SCCFinder.Start(PN);
+ auto &SCC = SCCFinder.getComponentFor(PN);
+ // It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
+ if (SCC.size() == 1)
+ PhiCycleState.insert({PN, PCS_CycleFree});
+ else {
+ bool AllPhis =
+ llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
+ PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+ for (auto *Member : SCC)
+ if (auto *MemberPhi = dyn_cast<PHINode>(Member))
+ PhiCycleState.insert({MemberPhi, PCS});
+ }
+ }
+ if (PCS == PCS_Cycle)
+ return false;
+ return true;
+}
+
// Evaluate PHI nodes symbolically, and create an expression result.
-const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
- const BasicBlock *B) {
- auto *E = cast<PHIExpression>(createPHIExpression(I));
+const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) {
+ // True if one of the incoming phi edges is a backedge.
+ bool HasBackedge = false;
+ // All constant tracks the state of whether all the *original* phi operands
+ // were constant.
+ // This is really shorthand for "this phi cannot cycle due to forward
+ // propagation", as any
+ // change in value of the phi is guaranteed not to later change the value of
+ // the phi.
+ // IE it can't be v = phi(undef, v+1)
+ bool AllConstant = true;
+ auto *E =
+ cast<PHIExpression>(createPHIExpression(I, HasBackedge, AllConstant));
// We match the semantics of SimplifyPhiNode from InstructionSimplify here.
// See if all arguaments are the same.
@@ -861,14 +1464,15 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
if (Filtered.begin() == Filtered.end()) {
DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
<< "\n");
- E->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
+ deleteExpression(E);
return createConstantExpression(UndefValue::get(I->getType()));
}
+ unsigned NumOps = 0;
Value *AllSameValue = *(Filtered.begin());
++Filtered.begin();
// Can't use std::equal here, sadly, because filter.begin moves.
- if (llvm::all_of(Filtered, [AllSameValue](const Value *V) {
+ if (llvm::all_of(Filtered, [AllSameValue, &NumOps](const Value *V) {
+ ++NumOps;
return V == AllSameValue;
})) {
// In LLVM's non-standard representation of phi nodes, it's possible to have
@@ -881,27 +1485,32 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
// We also special case undef, so that if we have an undef, we can't use the
// common value unless it dominates the phi block.
if (HasUndef) {
+ // If we have undef and at least one other value, this is really a
+ // multivalued phi, and we need to know if it's cycle free in order to
+ // evaluate whether we can ignore the undef. The other parts of this are
+ // just shortcuts. If there is no backedge, or all operands are
+ // constants, or all operands are ignored but the undef, it also must be
+ // cycle free.
+ if (!AllConstant && HasBackedge && NumOps > 0 &&
+ !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
+ return E;
+
// Only have to check for instructions
if (auto *AllSameInst = dyn_cast<Instruction>(AllSameValue))
- if (!DT->dominates(AllSameInst, I))
+ if (!someEquivalentDominates(AllSameInst, I))
return E;
}
NumGVNPhisAllSame++;
DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
<< "\n");
- E->deallocateOperands(ArgRecycler);
- ExpressionAllocator.Deallocate(E);
- if (auto *C = dyn_cast<Constant>(AllSameValue))
- return createConstantExpression(C);
- return createVariableExpression(AllSameValue);
+ deleteExpression(E);
+ return createVariableOrConstant(AllSameValue);
}
return E;
}
-const Expression *
-NewGVN::performSymbolicAggrValueEvaluation(Instruction *I,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) {
if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
@@ -931,19 +1540,130 @@ NewGVN::performSymbolicAggrValueEvaluation(Instruction *I,
// expression.
assert(II->getNumArgOperands() == 2 &&
"Expect two args for recognised intrinsics.");
- return createBinaryExpression(Opcode, EI->getType(),
- II->getArgOperand(0),
- II->getArgOperand(1), B);
+ return createBinaryExpression(
+ Opcode, EI->getType(), II->getArgOperand(0), II->getArgOperand(1));
}
}
}
- return createAggregateValueExpression(I, B);
+ return createAggregateValueExpression(I);
+}
+const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {
+ auto *CI = dyn_cast<CmpInst>(I);
+ // See if our operands are equal to those of a previous predicate, and if so,
+ // if it implies true or false.
+ auto Op0 = lookupOperandLeader(CI->getOperand(0));
+ auto Op1 = lookupOperandLeader(CI->getOperand(1));
+ auto OurPredicate = CI->getPredicate();
+ if (shouldSwapOperands(Op0, Op1)) {
+ std::swap(Op0, Op1);
+ OurPredicate = CI->getSwappedPredicate();
+ }
+
+ // Avoid processing the same info twice
+ const PredicateBase *LastPredInfo = nullptr;
+ // See if we know something about the comparison itself, like it is the target
+ // of an assume.
+ auto *CmpPI = PredInfo->getPredicateInfoFor(I);
+ if (dyn_cast_or_null<PredicateAssume>(CmpPI))
+ return createConstantExpression(ConstantInt::getTrue(CI->getType()));
+
+ if (Op0 == Op1) {
+ // This condition does not depend on predicates, no need to add users
+ if (CI->isTrueWhenEqual())
+ return createConstantExpression(ConstantInt::getTrue(CI->getType()));
+ else if (CI->isFalseWhenEqual())
+ return createConstantExpression(ConstantInt::getFalse(CI->getType()));
+ }
+
+ // NOTE: Because we are comparing both operands here and below, and using
+ // previous comparisons, we rely on fact that predicateinfo knows to mark
+ // comparisons that use renamed operands as users of the earlier comparisons.
+ // It is *not* enough to just mark predicateinfo renamed operands as users of
+ // the earlier comparisons, because the *other* operand may have changed in a
+ // previous iteration.
+ // Example:
+ // icmp slt %a, %b
+ // %b.0 = ssa.copy(%b)
+ // false branch:
+ // icmp slt %c, %b.0
+
+ // %c and %a may start out equal, and thus, the code below will say the second
+ // %icmp is false. c may become equal to something else, and in that case the
+ // %second icmp *must* be reexamined, but would not if only the renamed
+ // %operands are considered users of the icmp.
+
+ // *Currently* we only check one level of comparisons back, and only mark one
+ // level back as touched when changes appen . If you modify this code to look
+ // back farther through comparisons, you *must* mark the appropriate
+ // comparisons as users in PredicateInfo.cpp, or you will cause bugs. See if
+ // we know something just from the operands themselves
+
+ // See if our operands have predicate info, so that we may be able to derive
+ // something from a previous comparison.
+ for (const auto &Op : CI->operands()) {
+ auto *PI = PredInfo->getPredicateInfoFor(Op);
+ if (const auto *PBranch = dyn_cast_or_null<PredicateBranch>(PI)) {
+ if (PI == LastPredInfo)
+ continue;
+ LastPredInfo = PI;
+
+ // TODO: Along the false edge, we may know more things too, like icmp of
+ // same operands is false.
+ // TODO: We only handle actual comparison conditions below, not and/or.
+ auto *BranchCond = dyn_cast<CmpInst>(PBranch->Condition);
+ if (!BranchCond)
+ continue;
+ auto *BranchOp0 = lookupOperandLeader(BranchCond->getOperand(0));
+ auto *BranchOp1 = lookupOperandLeader(BranchCond->getOperand(1));
+ auto BranchPredicate = BranchCond->getPredicate();
+ if (shouldSwapOperands(BranchOp0, BranchOp1)) {
+ std::swap(BranchOp0, BranchOp1);
+ BranchPredicate = BranchCond->getSwappedPredicate();
+ }
+ if (BranchOp0 == Op0 && BranchOp1 == Op1) {
+ if (PBranch->TrueEdge) {
+ // If we know the previous predicate is true and we are in the true
+ // edge then we may be implied true or false.
+ if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate,
+ BranchPredicate)) {
+ addPredicateUsers(PI, I);
+ return createConstantExpression(
+ ConstantInt::getTrue(CI->getType()));
+ }
+
+ if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate,
+ BranchPredicate)) {
+ addPredicateUsers(PI, I);
+ return createConstantExpression(
+ ConstantInt::getFalse(CI->getType()));
+ }
+
+ } else {
+ // Just handle the ne and eq cases, where if we have the same
+ // operands, we may know something.
+ if (BranchPredicate == OurPredicate) {
+ addPredicateUsers(PI, I);
+ // Same predicate, same ops,we know it was false, so this is false.
+ return createConstantExpression(
+ ConstantInt::getFalse(CI->getType()));
+ } else if (BranchPredicate ==
+ CmpInst::getInversePredicate(OurPredicate)) {
+ addPredicateUsers(PI, I);
+ // Inverse predicate, we know the other was false, so this is true.
+ return createConstantExpression(
+ ConstantInt::getTrue(CI->getType()));
+ }
+ }
+ }
+ }
+ }
+ // Create expression will take care of simplifyCmpInst
+ return createExpression(I);
}
// Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V,
- const BasicBlock *B) {
+const Expression *NewGVN::performSymbolicEvaluation(Value *V) {
const Expression *E = nullptr;
if (auto *C = dyn_cast<Constant>(V))
E = createConstantExpression(C);
@@ -957,24 +1677,27 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
switch (I->getOpcode()) {
case Instruction::ExtractValue:
case Instruction::InsertValue:
- E = performSymbolicAggrValueEvaluation(I, B);
+ E = performSymbolicAggrValueEvaluation(I);
break;
case Instruction::PHI:
- E = performSymbolicPHIEvaluation(I, B);
+ E = performSymbolicPHIEvaluation(I);
break;
case Instruction::Call:
- E = performSymbolicCallEvaluation(I, B);
+ E = performSymbolicCallEvaluation(I);
break;
case Instruction::Store:
- E = performSymbolicStoreEvaluation(I, B);
+ E = performSymbolicStoreEvaluation(I);
break;
case Instruction::Load:
- E = performSymbolicLoadEvaluation(I, B);
+ E = performSymbolicLoadEvaluation(I);
break;
case Instruction::BitCast: {
- E = createExpression(I, B);
+ E = createExpression(I);
+ } break;
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ E = performSymbolicCmpEvaluation(I);
} break;
-
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -993,8 +1716,6 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- case Instruction::ICmp:
- case Instruction::FCmp:
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -1011,7 +1732,7 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
case Instruction::InsertElement:
case Instruction::ShuffleVector:
case Instruction::GetElementPtr:
- E = createExpression(I, B);
+ E = createExpression(I);
break;
default:
return nullptr;
@@ -1020,129 +1741,297 @@ const Expression *NewGVN::performSymbolicEvaluation(Value *V,
return E;
}
-// There is an edge from 'Src' to 'Dst'. Return true if every path from
-// the entry block to 'Dst' passes via this edge. In particular 'Dst'
-// must not be reachable via another edge from 'Src'.
-bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) const {
-
- // While in theory it is interesting to consider the case in which Dst has
- // more than one predecessor, because Dst might be part of a loop which is
- // only reachable from Src, in practice it is pointless since at the time
- // GVN runs all such loops have preheaders, which means that Dst will have
- // been changed to have only one predecessor, namely Src.
- const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
- const BasicBlock *Src = E.getStart();
- assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
- (void)Src;
- return Pred != nullptr;
-}
-
void NewGVN::markUsersTouched(Value *V) {
// Now mark the users as touched.
for (auto *User : V->users()) {
assert(isa<Instruction>(User) && "Use of value not within an instruction?");
- TouchedInstructions.set(InstrDFS[User]);
+ TouchedInstructions.set(InstrToDFSNum(User));
}
}
-void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) {
- for (auto U : MA->users()) {
- if (auto *MUD = dyn_cast<MemoryUseOrDef>(U))
- TouchedInstructions.set(InstrDFS[MUD->getMemoryInst()]);
- else
- TouchedInstructions.set(InstrDFS[U]);
+void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) {
+ DEBUG(dbgs() << "Adding memory user " << *U << " to " << *To << "\n");
+ MemoryToUsers[To].insert(U);
+}
+
+void NewGVN::markMemoryDefTouched(const MemoryAccess *MA) {
+ TouchedInstructions.set(MemoryToDFSNum(MA));
+}
+
+void NewGVN::markMemoryUsersTouched(const MemoryAccess *MA) {
+ if (isa<MemoryUse>(MA))
+ return;
+ for (auto U : MA->users())
+ TouchedInstructions.set(MemoryToDFSNum(U));
+ const auto Result = MemoryToUsers.find(MA);
+ if (Result != MemoryToUsers.end()) {
+ for (auto *User : Result->second)
+ TouchedInstructions.set(MemoryToDFSNum(User));
+ MemoryToUsers.erase(Result);
+ }
+}
+
+// Add I to the set of users of a given predicate.
+void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) {
+ if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
+ PredicateToUsers[PBranch->Condition].insert(I);
+ else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
+ PredicateToUsers[PAssume->Condition].insert(I);
+}
+
+// Touch all the predicates that depend on this instruction.
+void NewGVN::markPredicateUsersTouched(Instruction *I) {
+ const auto Result = PredicateToUsers.find(I);
+ if (Result != PredicateToUsers.end()) {
+ for (auto *User : Result->second)
+ TouchedInstructions.set(InstrToDFSNum(User));
+ PredicateToUsers.erase(Result);
}
}
+// Mark users affected by a memory leader change.
+void NewGVN::markMemoryLeaderChangeTouched(CongruenceClass *CC) {
+ for (auto M : CC->memory())
+ markMemoryDefTouched(M);
+}
+
// Touch the instructions that need to be updated after a congruence class has a
// leader change, and mark changed values.
-void NewGVN::markLeaderChangeTouched(CongruenceClass *CC) {
- for (auto M : CC->Members) {
+void NewGVN::markValueLeaderChangeTouched(CongruenceClass *CC) {
+ for (auto M : *CC) {
if (auto *I = dyn_cast<Instruction>(M))
- TouchedInstructions.set(InstrDFS[I]);
+ TouchedInstructions.set(InstrToDFSNum(I));
LeaderChanges.insert(M);
}
}
+// Give a range of things that have instruction DFS numbers, this will return
+// the member of the range with the smallest dfs number.
+template <class T, class Range>
+T *NewGVN::getMinDFSOfRange(const Range &R) const {
+ std::pair<T *, unsigned> MinDFS = {nullptr, ~0U};
+ for (const auto X : R) {
+ auto DFSNum = InstrToDFSNum(X);
+ if (DFSNum < MinDFS.second)
+ MinDFS = {X, DFSNum};
+ }
+ return MinDFS.first;
+}
+
+// This function returns the MemoryAccess that should be the next leader of
+// congruence class CC, under the assumption that the current leader is going to
+// disappear.
+const MemoryAccess *NewGVN::getNextMemoryLeader(CongruenceClass *CC) const {
+ // TODO: If this ends up to slow, we can maintain a next memory leader like we
+ // do for regular leaders.
+ // Make sure there will be a leader to find
+ assert((CC->getStoreCount() > 0 || !CC->memory_empty()) &&
+ "Can't get next leader if there is none");
+ if (CC->getStoreCount() > 0) {
+ if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
+ return MSSA->getMemoryAccess(NL);
+ // Find the store with the minimum DFS number.
+ auto *V = getMinDFSOfRange<Value>(make_filter_range(
+ *CC, [&](const Value *V) { return isa<StoreInst>(V); }));
+ return MSSA->getMemoryAccess(cast<StoreInst>(V));
+ }
+ assert(CC->getStoreCount() == 0);
+
+ // Given our assertion, hitting this part must mean
+ // !OldClass->memory_empty()
+ if (CC->memory_size() == 1)
+ return *CC->memory_begin();
+ return getMinDFSOfRange<const MemoryPhi>(CC->memory());
+}
+
+// This function returns the next value leader of a congruence class, under the
+// assumption that the current leader is going away. This should end up being
+// the next most dominating member.
+Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const {
+ // We don't need to sort members if there is only 1, and we don't care about
+ // sorting the TOP class because everything either gets out of it or is
+ // unreachable.
+
+ if (CC->size() == 1 || CC == TOPClass) {
+ return *(CC->begin());
+ } else if (CC->getNextLeader().first) {
+ ++NumGVNAvoidedSortedLeaderChanges;
+ return CC->getNextLeader().first;
+ } else {
+ ++NumGVNSortedLeaderChanges;
+ // NOTE: If this ends up to slow, we can maintain a dual structure for
+ // member testing/insertion, or keep things mostly sorted, and sort only
+ // here, or use SparseBitVector or ....
+ return getMinDFSOfRange<Value>(*CC);
+ }
+}
+
+// Move a MemoryAccess, currently in OldClass, to NewClass, including updates to
+// the memory members, etc for the move.
+//
+// The invariants of this function are:
+//
+// I must be moving to NewClass from OldClass The StoreCount of OldClass and
+// NewClass is expected to have been updated for I already if it is is a store.
+// The OldClass memory leader has not been updated yet if I was the leader.
+void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I,
+ MemoryAccess *InstMA,
+ CongruenceClass *OldClass,
+ CongruenceClass *NewClass) {
+ // If the leader is I, and we had a represenative MemoryAccess, it should
+ // be the MemoryAccess of OldClass.
+ assert((!InstMA || !OldClass->getMemoryLeader() ||
+ OldClass->getLeader() != I ||
+ OldClass->getMemoryLeader() == InstMA) &&
+ "Representative MemoryAccess mismatch");
+ // First, see what happens to the new class
+ if (!NewClass->getMemoryLeader()) {
+ // Should be a new class, or a store becoming a leader of a new class.
+ assert(NewClass->size() == 1 ||
+ (isa<StoreInst>(I) && NewClass->getStoreCount() == 1));
+ NewClass->setMemoryLeader(InstMA);
+ // Mark it touched if we didn't just create a singleton
+ DEBUG(dbgs() << "Memory class leader change for class " << NewClass->getID()
+ << " due to new memory instruction becoming leader\n");
+ markMemoryLeaderChangeTouched(NewClass);
+ }
+ setMemoryClass(InstMA, NewClass);
+ // Now, fixup the old class if necessary
+ if (OldClass->getMemoryLeader() == InstMA) {
+ if (OldClass->getStoreCount() != 0 || !OldClass->memory_empty()) {
+ OldClass->setMemoryLeader(getNextMemoryLeader(OldClass));
+ DEBUG(dbgs() << "Memory class leader change for class "
+ << OldClass->getID() << " to "
+ << *OldClass->getMemoryLeader()
+ << " due to removal of old leader " << *InstMA << "\n");
+ markMemoryLeaderChangeTouched(OldClass);
+ } else
+ OldClass->setMemoryLeader(nullptr);
+ }
+}
+
// Move a value, currently in OldClass, to be part of NewClass
-// Update OldClass for the move (including changing leaders, etc)
-void NewGVN::moveValueToNewCongruenceClass(Instruction *I,
+// Update OldClass and NewClass for the move (including changing leaders, etc).
+void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E,
CongruenceClass *OldClass,
CongruenceClass *NewClass) {
- DEBUG(dbgs() << "New congruence class for " << I << " is " << NewClass->ID
- << "\n");
-
- if (I == OldClass->NextLeader.first)
- OldClass->NextLeader = {nullptr, ~0U};
+ if (I == OldClass->getNextLeader().first)
+ OldClass->resetNextLeader();
// It's possible, though unlikely, for us to discover equivalences such
// that the current leader does not dominate the old one.
// This statistic tracks how often this happens.
// We assert on phi nodes when this happens, currently, for debugging, because
// we want to make sure we name phi node cycles properly.
- if (isa<Instruction>(NewClass->RepLeader) && NewClass->RepLeader &&
- I != NewClass->RepLeader &&
- DT->properlyDominates(
- I->getParent(),
- cast<Instruction>(NewClass->RepLeader)->getParent())) {
- ++NumGVNNotMostDominatingLeader;
- assert(!isa<PHINode>(I) &&
- "New class for instruction should not be dominated by instruction");
- }
-
- if (NewClass->RepLeader != I) {
- auto DFSNum = InstrDFS.lookup(I);
- if (DFSNum < NewClass->NextLeader.second)
- NewClass->NextLeader = {I, DFSNum};
+ if (isa<Instruction>(NewClass->getLeader()) && NewClass->getLeader() &&
+ I != NewClass->getLeader()) {
+ auto *IBB = I->getParent();
+ auto *NCBB = cast<Instruction>(NewClass->getLeader())->getParent();
+ bool Dominated =
+ IBB == NCBB && InstrToDFSNum(I) < InstrToDFSNum(NewClass->getLeader());
+ Dominated = Dominated || DT->properlyDominates(IBB, NCBB);
+ if (Dominated) {
+ ++NumGVNNotMostDominatingLeader;
+ assert(
+ !isa<PHINode>(I) &&
+ "New class for instruction should not be dominated by instruction");
+ }
}
- OldClass->Members.erase(I);
- NewClass->Members.insert(I);
- if (isa<StoreInst>(I)) {
- --OldClass->StoreCount;
- assert(OldClass->StoreCount >= 0);
- ++NewClass->StoreCount;
- assert(NewClass->StoreCount > 0);
+ if (NewClass->getLeader() != I)
+ NewClass->addPossibleNextLeader({I, InstrToDFSNum(I)});
+
+ OldClass->erase(I);
+ NewClass->insert(I);
+ // Handle our special casing of stores.
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ OldClass->decStoreCount();
+ // Okay, so when do we want to make a store a leader of a class?
+ // If we have a store defined by an earlier load, we want the earlier load
+ // to lead the class.
+ // If we have a store defined by something else, we want the store to lead
+ // the class so everything else gets the "something else" as a value.
+ // If we have a store as the single member of the class, we want the store
+ // as the leader
+ if (NewClass->getStoreCount() == 0 && !NewClass->getStoredValue()) {
+ // If it's a store expression we are using, it means we are not equivalent
+ // to something earlier.
+ if (isa<StoreExpression>(E)) {
+ assert(lookupOperandLeader(SI->getValueOperand()) !=
+ NewClass->getLeader());
+ NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));
+ markValueLeaderChangeTouched(NewClass);
+ // Shift the new class leader to be the store
+ DEBUG(dbgs() << "Changing leader of congruence class "
+ << NewClass->getID() << " from " << *NewClass->getLeader()
+ << " to " << *SI << " because store joined class\n");
+ // If we changed the leader, we have to mark it changed because we don't
+ // know what it will do to symbolic evlauation.
+ NewClass->setLeader(SI);
+ }
+ // We rely on the code below handling the MemoryAccess change.
+ }
+ NewClass->incStoreCount();
}
-
+ // True if there is no memory instructions left in a class that had memory
+ // instructions before.
+
+ // If it's not a memory use, set the MemoryAccess equivalence
+ auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+ bool InstWasMemoryLeader = InstMA && OldClass->getMemoryLeader() == InstMA;
+ if (InstMA)
+ moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
ValueToClass[I] = NewClass;
// See if we destroyed the class or need to swap leaders.
- if (OldClass->Members.empty() && OldClass != InitialClass) {
- if (OldClass->DefiningExpr) {
- OldClass->Dead = true;
- DEBUG(dbgs() << "Erasing expression " << OldClass->DefiningExpr
+ if (OldClass->empty() && OldClass != TOPClass) {
+ if (OldClass->getDefiningExpr()) {
+ DEBUG(dbgs() << "Erasing expression " << OldClass->getDefiningExpr()
<< " from table\n");
- ExpressionToClass.erase(OldClass->DefiningExpr);
+ ExpressionToClass.erase(OldClass->getDefiningExpr());
}
- } else if (OldClass->RepLeader == I) {
+ } else if (OldClass->getLeader() == I) {
// When the leader changes, the value numbering of
// everything may change due to symbolization changes, so we need to
// reprocess.
- DEBUG(dbgs() << "Leader change!\n");
+ DEBUG(dbgs() << "Value class leader change for class " << OldClass->getID()
+ << "\n");
++NumGVNLeaderChanges;
- // We don't need to sort members if there is only 1, and we don't care about
- // sorting the initial class because everything either gets out of it or is
- // unreachable.
- if (OldClass->Members.size() == 1 || OldClass == InitialClass) {
- OldClass->RepLeader = *(OldClass->Members.begin());
- } else if (OldClass->NextLeader.first) {
- ++NumGVNAvoidedSortedLeaderChanges;
- OldClass->RepLeader = OldClass->NextLeader.first;
- OldClass->NextLeader = {nullptr, ~0U};
- } else {
- ++NumGVNSortedLeaderChanges;
- // TODO: If this ends up to slow, we can maintain a dual structure for
- // member testing/insertion, or keep things mostly sorted, and sort only
- // here, or ....
- std::pair<Value *, unsigned> MinDFS = {nullptr, ~0U};
- for (const auto X : OldClass->Members) {
- auto DFSNum = InstrDFS.lookup(X);
- if (DFSNum < MinDFS.second)
- MinDFS = {X, DFSNum};
- }
- OldClass->RepLeader = MinDFS.first;
+ // Destroy the stored value if there are no more stores to represent it.
+ // Note that this is basically clean up for the expression removal that
+ // happens below. If we remove stores from a class, we may leave it as a
+ // class of equivalent memory phis.
+ if (OldClass->getStoreCount() == 0) {
+ if (OldClass->getStoredValue())
+ OldClass->setStoredValue(nullptr);
+ }
+ // If we destroy the old access leader and it's a store, we have to
+ // effectively destroy the congruence class. When it comes to scalars,
+ // anything with the same value is as good as any other. That means that
+ // one leader is as good as another, and as long as you have some leader for
+ // the value, you are good.. When it comes to *memory states*, only one
+ // particular thing really represents the definition of a given memory
+ // state. Once it goes away, we need to re-evaluate which pieces of memory
+ // are really still equivalent. The best way to do this is to re-value
+ // number things. The only way to really make that happen is to destroy the
+ // rest of the class. In order to effectively destroy the class, we reset
+ // ExpressionToClass for each by using the ValueToExpression mapping. The
+ // members later get marked as touched due to the leader change. We will
+ // create new congruence classes, and the pieces that are still equivalent
+ // will end back together in a new class. If this becomes too expensive, it
+ // is possible to use a versioning scheme for the congruence classes to
+ // avoid the expressions finding this old class. Note that the situation is
+ // different for memory phis, becuase they are evaluated anew each time, and
+ // they become equal not by hashing, but by seeing if all operands are the
+ // same (or only one is reachable).
+ if (OldClass->getStoreCount() > 0 && InstWasMemoryLeader) {
+ DEBUG(dbgs() << "Kicking everything out of class " << OldClass->getID()
+ << " because MemoryAccess leader changed");
+ for (auto Member : *OldClass)
+ ExpressionToClass.erase(ValueToExpression.lookup(Member));
}
- markLeaderChangeTouched(OldClass);
+ OldClass->setLeader(getNextValueLeader(OldClass));
+ OldClass->resetNextLeader();
+ markValueLeaderChangeTouched(OldClass);
}
}
@@ -1150,12 +2039,12 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I,
void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
ValueToExpression[I] = E;
// This is guaranteed to return something, since it will at least find
- // INITIAL.
+ // TOP.
CongruenceClass *IClass = ValueToClass[I];
assert(IClass && "Should have found a IClass");
// Dead classes should have been eliminated from the mapping.
- assert(!IClass->Dead && "Found a dead class");
+ assert(!IClass->isDead() && "Found a dead class");
CongruenceClass *EClass;
if (const auto *VE = dyn_cast<VariableExpression>(E)) {
@@ -1171,79 +2060,52 @@ void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
// Constants and variables should always be made the leader.
if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
- NewClass->RepLeader = CE->getConstantValue();
+ NewClass->setLeader(CE->getConstantValue());
} else if (const auto *SE = dyn_cast<StoreExpression>(E)) {
StoreInst *SI = SE->getStoreInst();
- NewClass->RepLeader =
- lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
+ NewClass->setLeader(SI);
+ NewClass->setStoredValue(lookupOperandLeader(SI->getValueOperand()));
+ // The RepMemoryAccess field will be filled in properly by the
+ // moveValueToNewCongruenceClass call.
} else {
- NewClass->RepLeader = I;
+ NewClass->setLeader(I);
}
assert(!isa<VariableExpression>(E) &&
"VariableExpression should have been handled already");
EClass = NewClass;
DEBUG(dbgs() << "Created new congruence class for " << *I
- << " using expression " << *E << " at " << NewClass->ID
- << " and leader " << *(NewClass->RepLeader) << "\n");
- DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");
+ << " using expression " << *E << " at " << NewClass->getID()
+ << " and leader " << *(NewClass->getLeader()));
+ if (NewClass->getStoredValue())
+ DEBUG(dbgs() << " and stored value " << *(NewClass->getStoredValue()));
+ DEBUG(dbgs() << "\n");
} else {
EClass = lookupResult.first->second;
if (isa<ConstantExpression>(E))
- assert(isa<Constant>(EClass->RepLeader) &&
+ assert((isa<Constant>(EClass->getLeader()) ||
+ (EClass->getStoredValue() &&
+ isa<Constant>(EClass->getStoredValue()))) &&
"Any class with a constant expression should have a "
"constant leader");
assert(EClass && "Somehow don't have an eclass");
- assert(!EClass->Dead && "We accidentally looked up a dead class");
+ assert(!EClass->isDead() && "We accidentally looked up a dead class");
}
}
bool ClassChanged = IClass != EClass;
bool LeaderChanged = LeaderChanges.erase(I);
if (ClassChanged || LeaderChanged) {
- DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E
+ DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
<< "\n");
-
if (ClassChanged)
- moveValueToNewCongruenceClass(I, IClass, EClass);
+ moveValueToNewCongruenceClass(I, E, IClass, EClass);
markUsersTouched(I);
- if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
- // If this is a MemoryDef, we need to update the equivalence table. If
- // we determined the expression is congruent to a different memory
- // state, use that different memory state. If we determined it didn't,
- // we update that as well. Right now, we only support store
- // expressions.
- if (!isa<MemoryUse>(MA) && isa<StoreExpression>(E) &&
- EClass->Members.size() != 1) {
- auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
- setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
- } else {
- setMemoryAccessEquivTo(MA, nullptr);
- }
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
markMemoryUsersTouched(MA);
- }
- } else if (auto *SI = dyn_cast<StoreInst>(I)) {
- // There is, sadly, one complicating thing for stores. Stores do not
- // produce values, only consume them. However, in order to make loads and
- // stores value number the same, we ignore the value operand of the store.
- // But the value operand will still be the leader of our class, and thus, it
- // may change. Because the store is a use, the store will get reprocessed,
- // but nothing will change about it, and so nothing above will catch it
- // (since the class will not change). In order to make sure everything ends
- // up okay, we need to recheck the leader of the class. Since stores of
- // different values value number differently due to different memorydefs, we
- // are guaranteed the leader is always the same between stores in the same
- // class.
- DEBUG(dbgs() << "Checking store leader\n");
- auto ProperLeader =
- lookupOperandLeader(SI->getValueOperand(), SI, SI->getParent());
- if (EClass->RepLeader != ProperLeader) {
- DEBUG(dbgs() << "Store leader changed, fixing\n");
- EClass->RepLeader = ProperLeader;
- markLeaderChangeTouched(EClass);
- markMemoryUsersTouched(MSSA->getMemoryAccess(SI));
- }
+ if (auto *CI = dyn_cast<CmpInst>(I))
+ markPredicateUsersTouched(CI);
}
}
@@ -1267,11 +2129,11 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// they are the only thing that depend on new edges. Anything using their
// values will get propagated to if necessary.
if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
- TouchedInstructions.set(InstrDFS[MemPhi]);
+ TouchedInstructions.set(InstrToDFSNum(MemPhi));
auto BI = To->begin();
while (isa<PHINode>(BI)) {
- TouchedInstructions.set(InstrDFS[&*BI]);
+ TouchedInstructions.set(InstrToDFSNum(&*BI));
++BI;
}
}
@@ -1280,8 +2142,8 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
// Given a predicate condition (from a switch, cmp, or whatever) and a block,
// see if we know some constant value for it already.
-Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const {
- auto Result = lookupOperandLeader(Cond, nullptr, B);
+Value *NewGVN::findConditionEquivalence(Value *Cond) const {
+ auto Result = lookupOperandLeader(Cond);
if (isa<Constant>(Result))
return Result;
return nullptr;
@@ -1293,10 +2155,10 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
BranchInst *BR;
if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) {
Value *Cond = BR->getCondition();
- Value *CondEvaluated = findConditionEquivalence(Cond, B);
+ Value *CondEvaluated = findConditionEquivalence(Cond);
if (!CondEvaluated) {
if (auto *I = dyn_cast<Instruction>(Cond)) {
- const Expression *E = createExpression(I, B);
+ const Expression *E = createExpression(I);
if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
CondEvaluated = CE->getConstantValue();
}
@@ -1329,13 +2191,13 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
Value *SwitchCond = SI->getCondition();
- Value *CondEvaluated = findConditionEquivalence(SwitchCond, B);
+ Value *CondEvaluated = findConditionEquivalence(SwitchCond);
// See if we were able to turn this switch statement into a constant.
if (CondEvaluated && isa<ConstantInt>(CondEvaluated)) {
auto *CondVal = cast<ConstantInt>(CondEvaluated);
// We should be able to get case value for this.
- auto CaseVal = SI->findCaseValue(CondVal);
- if (CaseVal.getCaseSuccessor() == SI->getDefaultDest()) {
+ auto Case = *SI->findCaseValue(CondVal);
+ if (Case.getCaseSuccessor() == SI->getDefaultDest()) {
// We proved the value is outside of the range of the case.
// We can't do anything other than mark the default dest as reachable,
// and go home.
@@ -1343,7 +2205,7 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
return;
}
// Now get where it goes and mark it reachable.
- BasicBlock *TargetBlock = CaseVal.getCaseSuccessor();
+ BasicBlock *TargetBlock = Case.getCaseSuccessor();
updateReachableEdge(B, TargetBlock);
} else {
for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
@@ -1361,45 +2223,66 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
}
// This also may be a memory defining terminator, in which case, set it
- // equivalent to nothing.
- if (MemoryAccess *MA = MSSA->getMemoryAccess(TI))
- setMemoryAccessEquivTo(MA, nullptr);
+ // equivalent only to itself.
+ //
+ auto *MA = MSSA->getMemoryAccess(TI);
+ if (MA && !isa<MemoryUse>(MA)) {
+ auto *CC = ensureLeaderOfMemoryClass(MA);
+ if (setMemoryClass(MA, CC))
+ markMemoryUsersTouched(MA);
+ }
}
}
-// The algorithm initially places the values of the routine in the INITIAL
-// congruence
-// class. The leader of INITIAL is the undetermined value `TOP`.
-// When the algorithm has finished, values still in INITIAL are unreachable.
+// The algorithm initially places the values of the routine in the TOP
+// congruence class. The leader of TOP is the undetermined value `undef`.
+// When the algorithm has finished, values still in TOP are unreachable.
void NewGVN::initializeCongruenceClasses(Function &F) {
- // FIXME now i can't remember why this is 2
- NextCongruenceNum = 2;
- // Initialize all other instructions to be in INITIAL class.
- CongruenceClass::MemberSet InitialValues;
- InitialClass = createCongruenceClass(nullptr, nullptr);
+ NextCongruenceNum = 0;
+
+ // Note that even though we use the live on entry def as a representative
+ // MemoryAccess, it is *not* the same as the actual live on entry def. We
+ // have no real equivalemnt to undef for MemoryAccesses, and so we really
+ // should be checking whether the MemoryAccess is top if we want to know if it
+ // is equivalent to everything. Otherwise, what this really signifies is that
+ // the access "it reaches all the way back to the beginning of the function"
+
+ // Initialize all other instructions to be in TOP class.
+ TOPClass = createCongruenceClass(nullptr, nullptr);
+ TOPClass->setMemoryLeader(MSSA->getLiveOnEntryDef());
+ // The live on entry def gets put into it's own class
+ MemoryAccessToClass[MSSA->getLiveOnEntryDef()] =
+ createMemoryClass(MSSA->getLiveOnEntryDef());
+
for (auto &B : F) {
- if (auto *MP = MSSA->getMemoryAccess(&B))
- MemoryAccessEquiv.insert({MP, MSSA->getLiveOnEntryDef()});
+ // All MemoryAccesses are equivalent to live on entry to start. They must
+ // be initialized to something so that initial changes are noticed. For
+ // the maximal answer, we initialize them all to be the same as
+ // liveOnEntry.
+ auto *MemoryBlockDefs = MSSA->getBlockDefs(&B);
+ if (MemoryBlockDefs)
+ for (const auto &Def : *MemoryBlockDefs) {
+ MemoryAccessToClass[&Def] = TOPClass;
+ auto *MD = dyn_cast<MemoryDef>(&Def);
+ // Insert the memory phis into the member list.
+ if (!MD) {
+ const MemoryPhi *MP = cast<MemoryPhi>(&Def);
+ TOPClass->memory_insert(MP);
+ MemoryPhiState.insert({MP, MPS_TOP});
+ }
- for (auto &I : B) {
- InitialValues.insert(&I);
- ValueToClass[&I] = InitialClass;
- // All memory accesses are equivalent to live on entry to start. They must
- // be initialized to something so that initial changes are noticed. For
- // the maximal answer, we initialize them all to be the same as
- // liveOnEntry. Note that to save time, we only initialize the
- // MemoryDef's for stores and all MemoryPhis to be equal. Right now, no
- // other expression can generate a memory equivalence. If we start
- // handling memcpy/etc, we can expand this.
- if (isa<StoreInst>(&I)) {
- MemoryAccessEquiv.insert(
- {MSSA->getMemoryAccess(&I), MSSA->getLiveOnEntryDef()});
- ++InitialClass->StoreCount;
- assert(InitialClass->StoreCount > 0);
+ if (MD && isa<StoreInst>(MD->getMemoryInst()))
+ TOPClass->incStoreCount();
}
+ for (auto &I : B) {
+ // Don't insert void terminators into the class. We don't value number
+ // them, and they just end up sitting in TOP.
+ if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
+ continue;
+ TOPClass->insert(&I);
+ ValueToClass[&I] = TOPClass;
}
}
- InitialClass->Members.swap(InitialValues);
// Initialize arguments to be in their own unique congruence classes
for (auto &FA : F.args())
@@ -1408,8 +2291,8 @@ void NewGVN::initializeCongruenceClasses(Function &F) {
void NewGVN::cleanupTables() {
for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) {
- DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has "
- << CongruenceClasses[i]->Members.size() << " members\n");
+ DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->getID()
+ << " has " << CongruenceClasses[i]->size() << " members\n");
// Make sure we delete the congruence class (probably worth switching to
// a unique_ptr at some point.
delete CongruenceClasses[i];
@@ -1427,15 +2310,14 @@ void NewGVN::cleanupTables() {
#ifndef NDEBUG
ProcessedCount.clear();
#endif
- DFSDomMap.clear();
InstrDFS.clear();
InstructionsToErase.clear();
-
DFSToInstr.clear();
BlockInstRange.clear();
TouchedInstructions.clear();
- DominatedInstRange.clear();
- MemoryAccessEquiv.clear();
+ MemoryAccessToClass.clear();
+ PredicateToUsers.clear();
+ MemoryToUsers.clear();
}
std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
@@ -1447,6 +2329,16 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
}
for (auto &I : *B) {
+ // There's no need to call isInstructionTriviallyDead more than once on
+ // an instruction. Therefore, once we know that an instruction is dead
+ // we change its DFS number so that it doesn't get value numbered.
+ if (isInstructionTriviallyDead(&I, TLI)) {
+ InstrDFS[&I] = 0;
+ DEBUG(dbgs() << "Skipping trivially dead instruction " << I << "\n");
+ markInstructionForDeletion(&I);
+ continue;
+ }
+
InstrDFS[&I] = End++;
DFSToInstr.emplace_back(&I);
}
@@ -1462,7 +2354,7 @@ void NewGVN::updateProcessedCount(Value *V) {
if (ProcessedCount.count(V) == 0) {
ProcessedCount.insert({V, 1});
} else {
- ProcessedCount[V] += 1;
+ ++ProcessedCount[V];
assert(ProcessedCount[V] < 100 &&
"Seem to have processed the same Value a lot");
}
@@ -1472,26 +2364,33 @@ void NewGVN::updateProcessedCount(Value *V) {
void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
// If all the arguments are the same, the MemoryPhi has the same value as the
// argument.
- // Filter out unreachable blocks from our operands.
+ // Filter out unreachable blocks and self phis from our operands.
+ const BasicBlock *PHIBlock = MP->getBlock();
auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
- return ReachableBlocks.count(MP->getIncomingBlock(U));
+ return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
+ !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+ ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
});
-
- assert(Filtered.begin() != Filtered.end() &&
- "We should not be processing a MemoryPhi in a completely "
- "unreachable block");
+ // If all that is left is nothing, our memoryphi is undef. We keep it as
+ // InitialClass. Note: The only case this should happen is if we have at
+ // least one self-argument.
+ if (Filtered.begin() == Filtered.end()) {
+ if (setMemoryClass(MP, TOPClass))
+ markMemoryUsersTouched(MP);
+ return;
+ }
// Transform the remaining operands into operand leaders.
// FIXME: mapped_iterator should have a range version.
auto LookupFunc = [&](const Use &U) {
- return lookupMemoryAccessEquiv(cast<MemoryAccess>(U));
+ return lookupMemoryLeader(cast<MemoryAccess>(U));
};
auto MappedBegin = map_iterator(Filtered.begin(), LookupFunc);
auto MappedEnd = map_iterator(Filtered.end(), LookupFunc);
// and now check if all the elements are equal.
// Sadly, we can't use std::equals since these are random access iterators.
- MemoryAccess *AllSameValue = *MappedBegin;
+ const auto *AllSameValue = *MappedBegin;
++MappedBegin;
bool AllEqual = std::all_of(
MappedBegin, MappedEnd,
@@ -1501,8 +2400,18 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
DEBUG(dbgs() << "Memory Phi value numbered to " << *AllSameValue << "\n");
else
DEBUG(dbgs() << "Memory Phi value numbered to itself\n");
-
- if (setMemoryAccessEquivTo(MP, AllEqual ? AllSameValue : nullptr))
+ // If it's equal to something, it's in that class. Otherwise, it has to be in
+ // a class where it is the leader (other things may be equivalent to it, but
+ // it needs to start off in its own class, which means it must have been the
+ // leader, and it can't have stopped being the leader because it was never
+ // removed).
+ CongruenceClass *CC =
+ AllEqual ? getMemoryClass(AllSameValue) : ensureLeaderOfMemoryClass(MP);
+ auto OldState = MemoryPhiState.lookup(MP);
+ assert(OldState != MPS_Invalid && "Invalid memory phi state");
+ auto NewState = AllEqual ? MPS_Equivalent : MPS_Unique;
+ MemoryPhiState[MP] = NewState;
+ if (setMemoryClass(MP, CC) || OldState != NewState)
markMemoryUsersTouched(MP);
}
@@ -1510,21 +2419,25 @@ void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
// congruence finding, and updating mappings.
void NewGVN::valueNumberInstruction(Instruction *I) {
DEBUG(dbgs() << "Processing instruction " << *I << "\n");
- if (isInstructionTriviallyDead(I, TLI)) {
- DEBUG(dbgs() << "Skipping unused instruction\n");
- markInstructionForDeletion(I);
- return;
- }
if (!I->isTerminator()) {
- const auto *Symbolized = performSymbolicEvaluation(I, I->getParent());
+ const Expression *Symbolized = nullptr;
+ if (DebugCounter::shouldExecute(VNCounter)) {
+ Symbolized = performSymbolicEvaluation(I);
+ } else {
+ // Mark the instruction as unused so we don't value number it again.
+ InstrDFS[I] = 0;
+ }
// If we couldn't come up with a symbolic expression, use the unknown
// expression
- if (Symbolized == nullptr)
+ if (Symbolized == nullptr) {
Symbolized = createUnknownExpression(I);
+ }
+
performCongruenceFinding(I, Symbolized);
} else {
// Handle terminators that return values. All of them produce values we
- // don't currently understand.
+ // don't currently understand. We don't place non-value producing
+ // terminators in a class.
if (!I->getType()->isVoidTy()) {
auto *Symbolized = createUnknownExpression(I);
performCongruenceFinding(I, Symbolized);
@@ -1539,72 +2452,102 @@ bool NewGVN::singleReachablePHIPath(const MemoryAccess *First,
const MemoryAccess *Second) const {
if (First == Second)
return true;
-
- if (auto *FirstDef = dyn_cast<MemoryUseOrDef>(First)) {
- auto *DefAccess = FirstDef->getDefiningAccess();
- return singleReachablePHIPath(DefAccess, Second);
- } else {
- auto *MP = cast<MemoryPhi>(First);
- auto ReachableOperandPred = [&](const Use &U) {
- return ReachableBlocks.count(MP->getIncomingBlock(U));
- };
- auto FilteredPhiArgs =
- make_filter_range(MP->operands(), ReachableOperandPred);
- SmallVector<const Value *, 32> OperandList;
- std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
- std::back_inserter(OperandList));
- bool Okay = OperandList.size() == 1;
- if (!Okay)
- Okay = std::equal(OperandList.begin(), OperandList.end(),
- OperandList.begin());
- if (Okay)
- return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second);
+ if (MSSA->isLiveOnEntryDef(First))
return false;
+
+ const auto *EndDef = First;
+ for (auto *ChainDef : optimized_def_chain(First)) {
+ if (ChainDef == Second)
+ return true;
+ if (MSSA->isLiveOnEntryDef(ChainDef))
+ return false;
+ EndDef = ChainDef;
}
+ auto *MP = cast<MemoryPhi>(EndDef);
+ auto ReachableOperandPred = [&](const Use &U) {
+ return ReachableEdges.count({MP->getIncomingBlock(U), MP->getBlock()});
+ };
+ auto FilteredPhiArgs =
+ make_filter_range(MP->operands(), ReachableOperandPred);
+ SmallVector<const Value *, 32> OperandList;
+ std::copy(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
+ std::back_inserter(OperandList));
+ bool Okay = OperandList.size() == 1;
+ if (!Okay)
+ Okay =
+ std::equal(OperandList.begin(), OperandList.end(), OperandList.begin());
+ if (Okay)
+ return singleReachablePHIPath(cast<MemoryAccess>(OperandList[0]), Second);
+ return false;
}
// Verify the that the memory equivalence table makes sense relative to the
// congruence classes. Note that this checking is not perfect, and is currently
-// subject to very rare false negatives. It is only useful for testing/debugging.
+// subject to very rare false negatives. It is only useful for
+// testing/debugging.
void NewGVN::verifyMemoryCongruency() const {
- // Anything equivalent in the memory access table should be in the same
+#ifndef NDEBUG
+ // Verify that the memory table equivalence and memory member set match
+ for (const auto *CC : CongruenceClasses) {
+ if (CC == TOPClass || CC->isDead())
+ continue;
+ if (CC->getStoreCount() != 0) {
+ assert((CC->getStoredValue() || !isa<StoreInst>(CC->getLeader())) &&
+ "Any class with a store as a "
+ "leader should have a "
+ "representative stored value\n");
+ assert(CC->getMemoryLeader() &&
+ "Any congruence class with a store should "
+ "have a representative access\n");
+ }
+
+ if (CC->getMemoryLeader())
+ assert(MemoryAccessToClass.lookup(CC->getMemoryLeader()) == CC &&
+ "Representative MemoryAccess does not appear to be reverse "
+ "mapped properly");
+ for (auto M : CC->memory())
+ assert(MemoryAccessToClass.lookup(M) == CC &&
+ "Memory member does not appear to be reverse mapped properly");
+ }
+
+ // Anything equivalent in the MemoryAccess table should be in the same
// congruence class.
// Filter out the unreachable and trivially dead entries, because they may
// never have been updated if the instructions were not processed.
auto ReachableAccessPred =
- [&](const std::pair<const MemoryAccess *, MemoryAccess *> Pair) {
+ [&](const std::pair<const MemoryAccess *, CongruenceClass *> Pair) {
bool Result = ReachableBlocks.count(Pair.first->getBlock());
if (!Result)
return false;
+ if (MSSA->isLiveOnEntryDef(Pair.first))
+ return true;
if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
return !isInstructionTriviallyDead(MemDef->getMemoryInst());
+ if (MemoryToDFSNum(Pair.first) == 0)
+ return false;
return true;
};
- auto Filtered = make_filter_range(MemoryAccessEquiv, ReachableAccessPred);
+ auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
for (auto KV : Filtered) {
- assert(KV.first != KV.second &&
- "We added a useless equivalence to the memory equivalence table");
- // Unreachable instructions may not have changed because we never process
- // them.
- if (!ReachableBlocks.count(KV.first->getBlock()))
- continue;
+ assert(KV.second != TOPClass &&
+ "Memory not unreachable but ended up in TOP");
if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
- auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
+ auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
if (FirstMUD && SecondMUD)
assert((singleReachablePHIPath(FirstMUD, SecondMUD) ||
- ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
- ValueToClass.lookup(SecondMUD->getMemoryInst())) &&
- "The instructions for these memory operations should have "
- "been in the same congruence class or reachable through"
- "a single argument phi");
+ ValueToClass.lookup(FirstMUD->getMemoryInst()) ==
+ ValueToClass.lookup(SecondMUD->getMemoryInst())) &&
+ "The instructions for these memory operations should have "
+ "been in the same congruence class or reachable through"
+ "a single argument phi");
} else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
-
// We can only sanely verify that MemoryDefs in the operand list all have
// the same class.
auto ReachableOperandPred = [&](const Use &U) {
- return ReachableBlocks.count(FirstMP->getIncomingBlock(U)) &&
+ return ReachableEdges.count(
+ {FirstMP->getIncomingBlock(U), FirstMP->getBlock()}) &&
isa<MemoryDef>(U);
};
@@ -1622,19 +2565,127 @@ void NewGVN::verifyMemoryCongruency() const {
"All MemoryPhi arguments should be in the same class");
}
}
+#endif
+}
+
+// Verify that the sparse propagation we did actually found the maximal fixpoint
+// We do this by storing the value to class mapping, touching all instructions,
+// and redoing the iteration to see if anything changed.
+void NewGVN::verifyIterationSettled(Function &F) {
+#ifndef NDEBUG
+ DEBUG(dbgs() << "Beginning iteration verification\n");
+ if (DebugCounter::isCounterSet(VNCounter))
+ DebugCounter::setCounterValue(VNCounter, StartingVNCounter);
+
+ // Note that we have to store the actual classes, as we may change existing
+ // classes during iteration. This is because our memory iteration propagation
+ // is not perfect, and so may waste a little work. But it should generate
+ // exactly the same congruence classes we have now, with different IDs.
+ std::map<const Value *, CongruenceClass> BeforeIteration;
+
+ for (auto &KV : ValueToClass) {
+ if (auto *I = dyn_cast<Instruction>(KV.first))
+ // Skip unused/dead instructions.
+ if (InstrToDFSNum(I) == 0)
+ continue;
+ BeforeIteration.insert({KV.first, *KV.second});
+ }
+
+ TouchedInstructions.set();
+ TouchedInstructions.reset(0);
+ iterateTouchedInstructions();
+ DenseSet<std::pair<const CongruenceClass *, const CongruenceClass *>>
+ EqualClasses;
+ for (const auto &KV : ValueToClass) {
+ if (auto *I = dyn_cast<Instruction>(KV.first))
+ // Skip unused/dead instructions.
+ if (InstrToDFSNum(I) == 0)
+ continue;
+ // We could sink these uses, but i think this adds a bit of clarity here as
+ // to what we are comparing.
+ auto *BeforeCC = &BeforeIteration.find(KV.first)->second;
+ auto *AfterCC = KV.second;
+ // Note that the classes can't change at this point, so we memoize the set
+ // that are equal.
+ if (!EqualClasses.count({BeforeCC, AfterCC})) {
+ assert(BeforeCC->isEquivalentTo(AfterCC) &&
+ "Value number changed after main loop completed!");
+ EqualClasses.insert({BeforeCC, AfterCC});
+ }
+ }
+#endif
+}
+
+// This is the main value numbering loop, it iterates over the initial touched
+// instruction set, propagating value numbers, marking things touched, etc,
+// until the set of touched instructions is completely empty.
+void NewGVN::iterateTouchedInstructions() {
+ unsigned int Iterations = 0;
+ // Figure out where touchedinstructions starts
+ int FirstInstr = TouchedInstructions.find_first();
+ // Nothing set, nothing to iterate, just return.
+ if (FirstInstr == -1)
+ return;
+ BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+ while (TouchedInstructions.any()) {
+ ++Iterations;
+ // Walk through all the instructions in all the blocks in RPO.
+ // TODO: As we hit a new block, we should push and pop equalities into a
+ // table lookupOperandLeader can use, to catch things PredicateInfo
+ // might miss, like edge-only equivalences.
+ for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
+ InstrNum = TouchedInstructions.find_next(InstrNum)) {
+
+ // This instruction was found to be dead. We don't bother looking
+ // at it again.
+ if (InstrNum == 0) {
+ TouchedInstructions.reset(InstrNum);
+ continue;
+ }
+
+ Value *V = InstrFromDFSNum(InstrNum);
+ BasicBlock *CurrBlock = getBlockForValue(V);
+
+ // If we hit a new block, do reachability processing.
+ if (CurrBlock != LastBlock) {
+ LastBlock = CurrBlock;
+ bool BlockReachable = ReachableBlocks.count(CurrBlock);
+ const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock);
+
+ // If it's not reachable, erase any touched instructions and move on.
+ if (!BlockReachable) {
+ TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second);
+ DEBUG(dbgs() << "Skipping instructions in block "
+ << getBlockName(CurrBlock)
+ << " because it is unreachable\n");
+ continue;
+ }
+ updateProcessedCount(CurrBlock);
+ }
+
+ if (auto *MP = dyn_cast<MemoryPhi>(V)) {
+ DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
+ valueNumberMemoryPhi(MP);
+ } else if (auto *I = dyn_cast<Instruction>(V)) {
+ valueNumberInstruction(I);
+ } else {
+ llvm_unreachable("Should have been a MemoryPhi or Instruction");
+ }
+ updateProcessedCount(V);
+ // Reset after processing (because we may mark ourselves as touched when
+ // we propagate equalities).
+ TouchedInstructions.reset(InstrNum);
+ }
+ }
+ NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
}
// This is the main transformation entry point.
-bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
- TargetLibraryInfo *_TLI, AliasAnalysis *_AA,
- MemorySSA *_MSSA) {
+bool NewGVN::runGVN() {
+ if (DebugCounter::isCounterSet(VNCounter))
+ StartingVNCounter = DebugCounter::getCounterValue(VNCounter);
bool Changed = false;
- DT = _DT;
- AC = _AC;
- TLI = _TLI;
- AA = _AA;
- MSSA = _MSSA;
- DL = &F.getParent()->getDataLayout();
+ NumFuncArgs = F.arg_size();
MSSAWalker = MSSA->getWalker();
// Count number of instructions for sizing of hash tables, and come
@@ -1642,15 +2693,14 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
unsigned ICount = 1;
// Add an empty instruction to account for the fact that we start at 1
DFSToInstr.emplace_back(nullptr);
- // Note: We want RPO traversal of the blocks, which is not quite the same as
- // dominator tree order, particularly with regard whether backedges get
- // visited first or second, given a block with multiple successors.
+ // Note: We want ideal RPO traversal of the blocks, which is not quite the
+ // same as dominator tree order, particularly with regard whether backedges
+ // get visited first or second, given a block with multiple successors.
// If we visit in the wrong order, we will end up performing N times as many
// iterations.
// The dominator tree does guarantee that, for a given dom tree node, it's
// parent must occur before it in the RPO ordering. Thus, we only need to sort
// the siblings.
- DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
ReversePostOrderTraversal<Function *> RPOT(&F);
unsigned Counter = 0;
for (auto &B : RPOT) {
@@ -1663,7 +2713,7 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
auto *Node = DT->getNode(B);
if (Node->getChildren().size() > 1)
std::sort(Node->begin(), Node->end(),
- [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) {
+ [&](const DomTreeNode *A, const DomTreeNode *B) {
return RPOOrdering[A] < RPOOrdering[B];
});
}
@@ -1689,7 +2739,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
}
TouchedInstructions.resize(ICount);
- DominatedInstRange.reserve(F.size());
// Ensure we don't end up resizing the expressionToClass map, as
// that can be quite expensive. At most, we have one expression per
// instruction.
@@ -1701,62 +2750,10 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
ReachableBlocks.insert(&F.getEntryBlock());
initializeCongruenceClasses(F);
-
- unsigned int Iterations = 0;
- // We start out in the entry block.
- BasicBlock *LastBlock = &F.getEntryBlock();
- while (TouchedInstructions.any()) {
- ++Iterations;
- // Walk through all the instructions in all the blocks in RPO.
- for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
- InstrNum = TouchedInstructions.find_next(InstrNum)) {
- assert(InstrNum != 0 && "Bit 0 should never be set, something touched an "
- "instruction not in the lookup table");
- Value *V = DFSToInstr[InstrNum];
- BasicBlock *CurrBlock = nullptr;
-
- if (auto *I = dyn_cast<Instruction>(V))
- CurrBlock = I->getParent();
- else if (auto *MP = dyn_cast<MemoryPhi>(V))
- CurrBlock = MP->getBlock();
- else
- llvm_unreachable("DFSToInstr gave us an unknown type of instruction");
-
- // If we hit a new block, do reachability processing.
- if (CurrBlock != LastBlock) {
- LastBlock = CurrBlock;
- bool BlockReachable = ReachableBlocks.count(CurrBlock);
- const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock);
-
- // If it's not reachable, erase any touched instructions and move on.
- if (!BlockReachable) {
- TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second);
- DEBUG(dbgs() << "Skipping instructions in block "
- << getBlockName(CurrBlock)
- << " because it is unreachable\n");
- continue;
- }
- updateProcessedCount(CurrBlock);
- }
-
- if (auto *MP = dyn_cast<MemoryPhi>(V)) {
- DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
- valueNumberMemoryPhi(MP);
- } else if (auto *I = dyn_cast<Instruction>(V)) {
- valueNumberInstruction(I);
- } else {
- llvm_unreachable("Should have been a MemoryPhi or Instruction");
- }
- updateProcessedCount(V);
- // Reset after processing (because we may mark ourselves as touched when
- // we propagate equalities).
- TouchedInstructions.reset(InstrNum);
- }
- }
- NumGVNMaxIterations = std::max(NumGVNMaxIterations.getValue(), Iterations);
-#ifndef NDEBUG
+ iterateTouchedInstructions();
verifyMemoryCongruency();
-#endif
+ verifyIterationSettled(F);
+
Changed |= eliminateInstructions(F);
// Delete all instructions marked for deletion.
@@ -1783,36 +2780,6 @@ bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
return Changed;
}
-bool NewGVN::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
- return runGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
- &getAnalysis<AAResultsWrapperPass>().getAAResults(),
- &getAnalysis<MemorySSAWrapperPass>().getMSSA());
-}
-
-PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
- NewGVN Impl;
-
- // Apparently the order in which we get these results matter for
- // the old GVN (see Chandler's comment in GVN.cpp). I'll keep
- // the same order here, just in case.
- auto &AC = AM.getResult<AssumptionAnalysis>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- auto &AA = AM.getResult<AAManager>(F);
- auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
- bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA);
- if (!Changed)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<GlobalsAA>();
- return PA;
-}
-
// Return true if V is a value that will always be available (IE can
// be placed anywhere) in the function. We don't do globals here
// because they are often worse to put in place.
@@ -1821,21 +2788,15 @@ static bool alwaysAvailable(Value *V) {
return isa<Constant>(V) || isa<Argument>(V);
}
-// Get the basic block from an instruction/value.
-static BasicBlock *getBlockForValue(Value *V) {
- if (auto *I = dyn_cast<Instruction>(V))
- return I->getParent();
- return nullptr;
-}
-
struct NewGVN::ValueDFS {
int DFSIn = 0;
int DFSOut = 0;
int LocalNum = 0;
- // Only one of these will be set.
- Value *Val = nullptr;
+ // Only one of Def and U will be set.
+ // The bool in the Def tells us whether the Def is the stored value of a
+ // store.
+ PointerIntPair<Value *, 1, bool> Def;
Use *U = nullptr;
-
bool operator<(const ValueDFS &Other) const {
// It's not enough that any given field be less than - we have sets
// of fields that need to be evaluated together to give a proper ordering.
@@ -1875,89 +2836,151 @@ struct NewGVN::ValueDFS {
// but .val and .u.
// It does not matter what order we replace these operands in.
// You will always end up with the same IR, and this is guaranteed.
- return std::tie(DFSIn, DFSOut, LocalNum, Val, U) <
- std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Val,
+ return std::tie(DFSIn, DFSOut, LocalNum, Def, U) <
+ std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Def,
Other.U);
}
};
-void NewGVN::convertDenseToDFSOrdered(
- CongruenceClass::MemberSet &Dense,
- SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
+// This function converts the set of members for a congruence class from values,
+// to sets of defs and uses with associated DFS info. The total number of
+// reachable uses for each value is stored in UseCount, and instructions that
+// seem
+// dead (have no non-dead uses) are stored in ProbablyDead.
+void NewGVN::convertClassToDFSOrdered(
+ const CongruenceClass &Dense, SmallVectorImpl<ValueDFS> &DFSOrderedSet,
+ DenseMap<const Value *, unsigned int> &UseCounts,
+ SmallPtrSetImpl<Instruction *> &ProbablyDead) const {
for (auto D : Dense) {
// First add the value.
BasicBlock *BB = getBlockForValue(D);
// Constants are handled prior to ever calling this function, so
// we should only be left with instructions as members.
assert(BB && "Should have figured out a basic block for value");
- ValueDFS VD;
-
- std::pair<int, int> DFSPair = DFSDomMap[BB];
- assert(DFSPair.first != -1 && DFSPair.second != -1 && "Invalid DFS Pair");
- VD.DFSIn = DFSPair.first;
- VD.DFSOut = DFSPair.second;
- VD.Val = D;
- // If it's an instruction, use the real local dfs number.
- if (auto *I = dyn_cast<Instruction>(D))
- VD.LocalNum = InstrDFS[I];
- else
- llvm_unreachable("Should have been an instruction");
-
- DFSOrderedSet.emplace_back(VD);
-
- // Now add the users.
- for (auto &U : D->uses()) {
+ ValueDFS VDDef;
+ DomTreeNode *DomNode = DT->getNode(BB);
+ VDDef.DFSIn = DomNode->getDFSNumIn();
+ VDDef.DFSOut = DomNode->getDFSNumOut();
+ // If it's a store, use the leader of the value operand, if it's always
+ // available, or the value operand. TODO: We could do dominance checks to
+ // find a dominating leader, but not worth it ATM.
+ if (auto *SI = dyn_cast<StoreInst>(D)) {
+ auto Leader = lookupOperandLeader(SI->getValueOperand());
+ if (alwaysAvailable(Leader)) {
+ VDDef.Def.setPointer(Leader);
+ } else {
+ VDDef.Def.setPointer(SI->getValueOperand());
+ VDDef.Def.setInt(true);
+ }
+ } else {
+ VDDef.Def.setPointer(D);
+ }
+ assert(isa<Instruction>(D) &&
+ "The dense set member should always be an instruction");
+ VDDef.LocalNum = InstrToDFSNum(D);
+ DFSOrderedSet.emplace_back(VDDef);
+ Instruction *Def = cast<Instruction>(D);
+ unsigned int UseCount = 0;
+ // Now add the uses.
+ for (auto &U : Def->uses()) {
if (auto *I = dyn_cast<Instruction>(U.getUser())) {
- ValueDFS VD;
+ // Don't try to replace into dead uses
+ if (InstructionsToErase.count(I))
+ continue;
+ ValueDFS VDUse;
// Put the phi node uses in the incoming block.
BasicBlock *IBlock;
if (auto *P = dyn_cast<PHINode>(I)) {
IBlock = P->getIncomingBlock(U);
// Make phi node users appear last in the incoming block
// they are from.
- VD.LocalNum = InstrDFS.size() + 1;
+ VDUse.LocalNum = InstrDFS.size() + 1;
} else {
IBlock = I->getParent();
- VD.LocalNum = InstrDFS[I];
+ VDUse.LocalNum = InstrToDFSNum(I);
}
- std::pair<int, int> DFSPair = DFSDomMap[IBlock];
- VD.DFSIn = DFSPair.first;
- VD.DFSOut = DFSPair.second;
- VD.U = &U;
- DFSOrderedSet.emplace_back(VD);
+
+ // Skip uses in unreachable blocks, as we're going
+ // to delete them.
+ if (ReachableBlocks.count(IBlock) == 0)
+ continue;
+
+ DomTreeNode *DomNode = DT->getNode(IBlock);
+ VDUse.DFSIn = DomNode->getDFSNumIn();
+ VDUse.DFSOut = DomNode->getDFSNumOut();
+ VDUse.U = &U;
+ ++UseCount;
+ DFSOrderedSet.emplace_back(VDUse);
}
}
+
+ // If there are no uses, it's probably dead (but it may have side-effects,
+ // so not definitely dead. Otherwise, store the number of uses so we can
+ // track if it becomes dead later).
+ if (UseCount == 0)
+ ProbablyDead.insert(Def);
+ else
+ UseCounts[Def] = UseCount;
}
}
-static void patchReplacementInstruction(Instruction *I, Value *Repl) {
- // Patch the replacement so that it is not more restrictive than the value
- // being replaced.
- auto *Op = dyn_cast<BinaryOperator>(I);
- auto *ReplOp = dyn_cast<BinaryOperator>(Repl);
+// This function converts the set of members for a congruence class from values,
+// to the set of defs for loads and stores, with associated DFS info.
+void NewGVN::convertClassToLoadsAndStores(
+ const CongruenceClass &Dense,
+ SmallVectorImpl<ValueDFS> &LoadsAndStores) const {
+ for (auto D : Dense) {
+ if (!isa<LoadInst>(D) && !isa<StoreInst>(D))
+ continue;
- if (Op && ReplOp)
- ReplOp->andIRFlags(Op);
+ BasicBlock *BB = getBlockForValue(D);
+ ValueDFS VD;
+ DomTreeNode *DomNode = DT->getNode(BB);
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.Def.setPointer(D);
- if (auto *ReplInst = dyn_cast<Instruction>(Repl)) {
- // FIXME: If both the original and replacement value are part of the
- // same control-flow region (meaning that the execution of one
- // guarentees the executation of the other), then we can combine the
- // noalias scopes here and do better than the general conservative
- // answer used in combineMetadata().
+ // If it's an instruction, use the real local dfs number.
+ if (auto *I = dyn_cast<Instruction>(D))
+ VD.LocalNum = InstrToDFSNum(I);
+ else
+ llvm_unreachable("Should have been an instruction");
- // In general, GVN unifies expressions over different control-flow
- // regions, and so we need a conservative combination of the noalias
- // scopes.
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group};
- combineMetadata(ReplInst, I, KnownIDs);
+ LoadsAndStores.emplace_back(VD);
}
}
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
+ auto *ReplInst = dyn_cast<Instruction>(Repl);
+ if (!ReplInst)
+ return;
+
+ // Patch the replacement so that it is not more restrictive than the value
+ // being replaced.
+ // Note that if 'I' is a load being replaced by some operation,
+ // for example, by an arithmetic operation, then andIRFlags()
+ // would just erase all math flags from the original arithmetic
+ // operation, which is clearly not wanted and not needed.
+ if (!isa<LoadInst>(I))
+ ReplInst->andIRFlags(I);
+
+ // FIXME: If both the original and replacement value are part of the
+ // same control-flow region (meaning that the execution of one
+ // guarantees the execution of the other), then we can combine the
+ // noalias scopes here and do better than the general conservative
+ // answer used in combineMetadata().
+
+ // In general, GVN unifies expressions over different control-flow
+ // regions, and so we need a conservative combination of the noalias
+ // scopes.
+ static const unsigned KnownIDs[] = {
+ LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias, LLVMContext::MD_range,
+ LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
+ LLVMContext::MD_invariant_group};
+ combineMetadata(ReplInst, I, KnownIDs);
+}
+
static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
patchReplacementInstruction(I, Repl);
I->replaceAllUsesWith(Repl);
@@ -1967,10 +2990,6 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
DEBUG(dbgs() << " BasicBlock Dead:" << *BB);
++NumGVNBlocksDeleted;
- // Check to see if there are non-terminating instructions to delete.
- if (isa<TerminatorInst>(BB->begin()))
- return;
-
// Delete the instructions backwards, as it has a reduced likelihood of having
// to update as many def-use and use-def chains. Start after the terminator.
auto StartPoint = BB->rbegin();
@@ -1987,6 +3006,11 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
Inst.eraseFromParent();
++NumGVNInstrDeleted;
}
+ // Now insert something that simplifycfg will turn into an unreachable.
+ Type *Int8Ty = Type::getInt8Ty(BB->getContext());
+ new StoreInst(UndefValue::get(Int8Ty),
+ Constant::getNullValue(Int8Ty->getPointerTo()),
+ BB->getTerminator());
}
void NewGVN::markInstructionForDeletion(Instruction *I) {
@@ -2086,59 +3110,59 @@ bool NewGVN::eliminateInstructions(Function &F) {
}
}
}
- DomTreeNode *Node = DT->getNode(&B);
- if (Node)
- DFSDomMap[&B] = {Node->getDFSNumIn(), Node->getDFSNumOut()};
}
- for (CongruenceClass *CC : CongruenceClasses) {
- // FIXME: We should eventually be able to replace everything still
- // in the initial class with undef, as they should be unreachable.
- // Right now, initial still contains some things we skip value
- // numbering of (UNREACHABLE's, for example).
- if (CC == InitialClass || CC->Dead)
+ // Map to store the use counts
+ DenseMap<const Value *, unsigned int> UseCounts;
+ for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+ // Track the equivalent store info so we can decide whether to try
+ // dead store elimination.
+ SmallVector<ValueDFS, 8> PossibleDeadStores;
+ SmallPtrSet<Instruction *, 8> ProbablyDead;
+ if (CC->isDead() || CC->empty())
continue;
- assert(CC->RepLeader && "We should have had a leader");
+ // Everything still in the TOP class is unreachable or dead.
+ if (CC == TOPClass) {
+#ifndef NDEBUG
+ for (auto M : *CC)
+ assert((!ReachableBlocks.count(cast<Instruction>(M)->getParent()) ||
+ InstructionsToErase.count(cast<Instruction>(M))) &&
+ "Everything in TOP should be unreachable or dead at this "
+ "point");
+#endif
+ continue;
+ }
+ assert(CC->getLeader() && "We should have had a leader");
// If this is a leader that is always available, and it's a
// constant or has no equivalences, just replace everything with
// it. We then update the congruence class with whatever members
// are left.
- if (alwaysAvailable(CC->RepLeader)) {
- SmallPtrSet<Value *, 4> MembersLeft;
- for (auto M : CC->Members) {
-
+ Value *Leader =
+ CC->getStoredValue() ? CC->getStoredValue() : CC->getLeader();
+ if (alwaysAvailable(Leader)) {
+ CongruenceClass::MemberSet MembersLeft;
+ for (auto M : *CC) {
Value *Member = M;
-
// Void things have no uses we can replace.
- if (Member == CC->RepLeader || Member->getType()->isVoidTy()) {
+ if (Member == Leader || !isa<Instruction>(Member) ||
+ Member->getType()->isVoidTy()) {
MembersLeft.insert(Member);
continue;
}
-
- DEBUG(dbgs() << "Found replacement " << *(CC->RepLeader) << " for "
- << *Member << "\n");
- // Due to equality propagation, these may not always be
- // instructions, they may be real values. We don't really
- // care about trying to replace the non-instructions.
- if (auto *I = dyn_cast<Instruction>(Member)) {
- assert(CC->RepLeader != I &&
- "About to accidentally remove our leader");
- replaceInstruction(I, CC->RepLeader);
- AnythingReplaced = true;
-
- continue;
- } else {
- MembersLeft.insert(I);
- }
+ DEBUG(dbgs() << "Found replacement " << *(Leader) << " for " << *Member
+ << "\n");
+ auto *I = cast<Instruction>(Member);
+ assert(Leader != I && "About to accidentally remove our leader");
+ replaceInstruction(I, Leader);
+ AnythingReplaced = true;
}
- CC->Members.swap(MembersLeft);
-
+ CC->swap(MembersLeft);
} else {
- DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n");
+ DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
+ << "\n");
// If this is a singleton, we can skip it.
- if (CC->Members.size() != 1) {
-
+ if (CC->size() != 1) {
// This is a stack because equality replacement/etc may place
// constants in the middle of the member list, and we want to use
// those constant values in preference to the current leader, over
@@ -2147,24 +3171,19 @@ bool NewGVN::eliminateInstructions(Function &F) {
// Convert the members to DFS ordered sets and then merge them.
SmallVector<ValueDFS, 8> DFSOrderedSet;
- convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);
+ convertClassToDFSOrdered(*CC, DFSOrderedSet, UseCounts, ProbablyDead);
// Sort the whole thing.
std::sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
-
for (auto &VD : DFSOrderedSet) {
int MemberDFSIn = VD.DFSIn;
int MemberDFSOut = VD.DFSOut;
- Value *Member = VD.Val;
- Use *MemberUse = VD.U;
-
- if (Member) {
- // We ignore void things because we can't get a value from them.
- // FIXME: We could actually use this to kill dead stores that are
- // dominated by equivalent earlier stores.
- if (Member->getType()->isVoidTy())
- continue;
- }
+ Value *Def = VD.Def.getPointer();
+ bool FromStore = VD.Def.getInt();
+ Use *U = VD.U;
+ // We ignore void things because we can't get a value from them.
+ if (Def && Def->getType()->isVoidTy())
+ continue;
if (EliminationStack.empty()) {
DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -2189,69 +3208,240 @@ bool NewGVN::eliminateInstructions(Function &F) {
// start using, we also push.
// Otherwise, we walk along, processing members who are
// dominated by this scope, and eliminate them.
- bool ShouldPush =
- Member && (EliminationStack.empty() || isa<Constant>(Member));
+ bool ShouldPush = Def && EliminationStack.empty();
bool OutOfScope =
!EliminationStack.isInScope(MemberDFSIn, MemberDFSOut);
if (OutOfScope || ShouldPush) {
// Sync to our current scope.
EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut);
- ShouldPush |= Member && EliminationStack.empty();
+ bool ShouldPush = Def && EliminationStack.empty();
if (ShouldPush) {
- EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
+ EliminationStack.push_back(Def, MemberDFSIn, MemberDFSOut);
+ }
+ }
+
+ // Skip the Def's, we only want to eliminate on their uses. But mark
+ // dominated defs as dead.
+ if (Def) {
+ // For anything in this case, what and how we value number
+ // guarantees that any side-effets that would have occurred (ie
+ // throwing, etc) can be proven to either still occur (because it's
+ // dominated by something that has the same side-effects), or never
+ // occur. Otherwise, we would not have been able to prove it value
+ // equivalent to something else. For these things, we can just mark
+ // it all dead. Note that this is different from the "ProbablyDead"
+ // set, which may not be dominated by anything, and thus, are only
+ // easy to prove dead if they are also side-effect free. Note that
+ // because stores are put in terms of the stored value, we skip
+ // stored values here. If the stored value is really dead, it will
+ // still be marked for deletion when we process it in its own class.
+ if (!EliminationStack.empty() && Def != EliminationStack.back() &&
+ isa<Instruction>(Def) && !FromStore)
+ markInstructionForDeletion(cast<Instruction>(Def));
+ continue;
+ }
+ // At this point, we know it is a Use we are trying to possibly
+ // replace.
+
+ assert(isa<Instruction>(U->get()) &&
+ "Current def should have been an instruction");
+ assert(isa<Instruction>(U->getUser()) &&
+ "Current user should have been an instruction");
+
+ // If the thing we are replacing into is already marked to be dead,
+ // this use is dead. Note that this is true regardless of whether
+ // we have anything dominating the use or not. We do this here
+ // because we are already walking all the uses anyway.
+ Instruction *InstUse = cast<Instruction>(U->getUser());
+ if (InstructionsToErase.count(InstUse)) {
+ auto &UseCount = UseCounts[U->get()];
+ if (--UseCount == 0) {
+ ProbablyDead.insert(cast<Instruction>(U->get()));
}
}
// If we get to this point, and the stack is empty we must have a use
- // with nothing we can use to eliminate it, just skip it.
+ // with nothing we can use to eliminate this use, so just skip it.
if (EliminationStack.empty())
continue;
- // Skip the Value's, we only want to eliminate on their uses.
- if (Member)
- continue;
- Value *Result = EliminationStack.back();
+ Value *DominatingLeader = EliminationStack.back();
// Don't replace our existing users with ourselves.
- if (MemberUse->get() == Result)
+ if (U->get() == DominatingLeader)
continue;
-
- DEBUG(dbgs() << "Found replacement " << *Result << " for "
- << *MemberUse->get() << " in " << *(MemberUse->getUser())
- << "\n");
+ DEBUG(dbgs() << "Found replacement " << *DominatingLeader << " for "
+ << *U->get() << " in " << *(U->getUser()) << "\n");
// If we replaced something in an instruction, handle the patching of
- // metadata.
- if (auto *ReplacedInst = dyn_cast<Instruction>(MemberUse->get()))
- patchReplacementInstruction(ReplacedInst, Result);
-
- assert(isa<Instruction>(MemberUse->getUser()));
- MemberUse->set(Result);
+ // metadata. Skip this if we are replacing predicateinfo with its
+ // original operand, as we already know we can just drop it.
+ auto *ReplacedInst = cast<Instruction>(U->get());
+ auto *PI = PredInfo->getPredicateInfoFor(ReplacedInst);
+ if (!PI || DominatingLeader != PI->OriginalOp)
+ patchReplacementInstruction(ReplacedInst, DominatingLeader);
+ U->set(DominatingLeader);
+ // This is now a use of the dominating leader, which means if the
+ // dominating leader was dead, it's now live!
+ auto &LeaderUseCount = UseCounts[DominatingLeader];
+ // It's about to be alive again.
+ if (LeaderUseCount == 0 && isa<Instruction>(DominatingLeader))
+ ProbablyDead.erase(cast<Instruction>(DominatingLeader));
+ ++LeaderUseCount;
AnythingReplaced = true;
}
}
}
+ // At this point, anything still in the ProbablyDead set is actually dead if
+ // would be trivially dead.
+ for (auto *I : ProbablyDead)
+ if (wouldInstructionBeTriviallyDead(I))
+ markInstructionForDeletion(I);
+
// Cleanup the congruence class.
- SmallPtrSet<Value *, 4> MembersLeft;
- for (Value *Member : CC->Members) {
- if (Member->getType()->isVoidTy()) {
+ CongruenceClass::MemberSet MembersLeft;
+ for (auto *Member : *CC)
+ if (!isa<Instruction>(Member) ||
+ !InstructionsToErase.count(cast<Instruction>(Member)))
MembersLeft.insert(Member);
- continue;
- }
-
- if (auto *MemberInst = dyn_cast<Instruction>(Member)) {
- if (isInstructionTriviallyDead(MemberInst)) {
- // TODO: Don't mark loads of undefs.
- markInstructionForDeletion(MemberInst);
- continue;
+ CC->swap(MembersLeft);
+
+ // If we have possible dead stores to look at, try to eliminate them.
+ if (CC->getStoreCount() > 0) {
+ convertClassToLoadsAndStores(*CC, PossibleDeadStores);
+ std::sort(PossibleDeadStores.begin(), PossibleDeadStores.end());
+ ValueDFSStack EliminationStack;
+ for (auto &VD : PossibleDeadStores) {
+ int MemberDFSIn = VD.DFSIn;
+ int MemberDFSOut = VD.DFSOut;
+ Instruction *Member = cast<Instruction>(VD.Def.getPointer());
+ if (EliminationStack.empty() ||
+ !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut)) {
+ // Sync to our current scope.
+ EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut);
+ if (EliminationStack.empty()) {
+ EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
+ continue;
+ }
}
+ // We already did load elimination, so nothing to do here.
+ if (isa<LoadInst>(Member))
+ continue;
+ assert(!EliminationStack.empty());
+ Instruction *Leader = cast<Instruction>(EliminationStack.back());
+ (void)Leader;
+ assert(DT->dominates(Leader->getParent(), Member->getParent()));
+ // Member is dominater by Leader, and thus dead
+ DEBUG(dbgs() << "Marking dead store " << *Member
+ << " that is dominated by " << *Leader << "\n");
+ markInstructionForDeletion(Member);
+ CC->erase(Member);
+ ++NumGVNDeadStores;
}
- MembersLeft.insert(Member);
}
- CC->Members.swap(MembersLeft);
}
return AnythingReplaced;
}
+
+// This function provides global ranking of operations so that we can place them
+// in a canonical order. Note that rank alone is not necessarily enough for a
+// complete ordering, as constants all have the same rank. However, generally,
+// we will simplify an operation with all constants so that it doesn't matter
+// what order they appear in.
+unsigned int NewGVN::getRank(const Value *V) const {
+ // Prefer undef to anything else
+ if (isa<UndefValue>(V))
+ return 0;
+ if (isa<Constant>(V))
+ return 1;
+ else if (auto *A = dyn_cast<Argument>(V))
+ return 2 + A->getArgNo();
+
+ // Need to shift the instruction DFS by number of arguments + 3 to account for
+ // the constant and argument ranking above.
+ unsigned Result = InstrToDFSNum(V);
+ if (Result > 0)
+ return 3 + NumFuncArgs + Result;
+ // Unreachable or something else, just return a really large number.
+ return ~0;
+}
+
+// This is a function that says whether two commutative operations should
+// have their order swapped when canonicalizing.
+bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const {
+ // Because we only care about a total ordering, and don't rewrite expressions
+ // in this order, we order by rank, which will give a strict weak ordering to
+ // everything but constants, and then we order by pointer address.
+ return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B);
+}
+
+class NewGVNLegacyPass : public FunctionPass {
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ NewGVNLegacyPass() : FunctionPass(ID) {
+ initializeNewGVNLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnFunction(Function &F) override;
+
+private:
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+
+bool NewGVNLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+ return NewGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+ &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+ &getAnalysis<AAResultsWrapperPass>().getAAResults(),
+ &getAnalysis<MemorySSAWrapperPass>().getMSSA(),
+ F.getParent()->getDataLayout())
+ .runGVN();
+}
+
+INITIALIZE_PASS_BEGIN(NewGVNLegacyPass, "newgvn", "Global Value Numbering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(NewGVNLegacyPass, "newgvn", "Global Value Numbering", false,
+ false)
+
+char NewGVNLegacyPass::ID = 0;
+
+// createGVNPass - The public interface to this file.
+FunctionPass *llvm::createNewGVNPass() { return new NewGVNLegacyPass(); }
+
+PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
+ // Apparently the order in which we get these results matter for
+ // the old GVN (see Chandler's comment in GVN.cpp). I'll keep
+ // the same order here, just in case.
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AA = AM.getResult<AAManager>(F);
+ auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ bool Changed =
+ NewGVN(F, &DT, &AC, &TLI, &AA, &MSSA, F.getParent()->getDataLayout())
+ .runGVN();
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<GlobalsAA>();
+ return PA;
+}
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 1a7ddc9585ba..1bfecea2f61e 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -66,7 +66,7 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
// Add attribute "readnone" so that backend can use a native sqrt instruction
// for this call. Insert a FP compare instruction and a conditional branch
// at the end of CurrBB.
- Call->addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
+ Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
CurrBB.getTerminator()->eraseFromParent();
Builder.SetInsertPoint(&CurrBB);
Value *FCmp = Builder.CreateFCmpOEQ(Call, Call);
@@ -98,14 +98,14 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
// Skip if function either has local linkage or is not a known library
// function.
- LibFunc::Func LibFunc;
+ LibFunc LF;
if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() ||
- !TLI->getLibFunc(CalledFunc->getName(), LibFunc))
+ !TLI->getLibFunc(CalledFunc->getName(), LF))
continue;
- switch (LibFunc) {
- case LibFunc::sqrtf:
- case LibFunc::sqrt:
+ switch (LF) {
+ case LibFunc_sqrtf:
+ case LibFunc_sqrt:
if (TTI->haveFastSqrt(Call->getType()) &&
optimizeSQRT(Call, CalledFunc, *CurrBB, BB))
break;
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 65c814d7a63b..3dcab6090789 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1069,8 +1069,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
///
/// Ops is the top-level list of add operands we're trying to factor.
static void FindSingleUseMultiplyFactors(Value *V,
- SmallVectorImpl<Value*> &Factors,
- const SmallVectorImpl<ValueEntry> &Ops) {
+ SmallVectorImpl<Value*> &Factors) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul, Instruction::FMul);
if (!BO) {
Factors.push_back(V);
@@ -1078,8 +1077,8 @@ static void FindSingleUseMultiplyFactors(Value *V,
}
// Otherwise, add the LHS and RHS to the list of factors.
- FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops);
- FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops);
+ FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
+ FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
}
/// Optimize a series of operands to an 'and', 'or', or 'xor' instruction.
@@ -1499,7 +1498,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
// Compute all of the factors of this added value.
SmallVector<Value*, 8> Factors;
- FindSingleUseMultiplyFactors(BOp, Factors, Ops);
+ FindSingleUseMultiplyFactors(BOp, Factors);
assert(Factors.size() > 1 && "Bad linearize!");
// Add one to FactorOccurrences for each unique factor in this op.
@@ -2236,8 +2235,8 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
ValueRankMap.clear();
if (MadeChange) {
- // FIXME: This should also 'preserve the CFG'.
- auto PA = PreservedAnalyses();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 1de742050cb3..f344eb151464 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -365,6 +365,11 @@ findBaseDefiningValueOfVector(Value *I) {
// for particular sufflevector patterns.
return BaseDefiningValueResult(I, false);
+ // The behavior of getelementptr instructions is the same for vector and
+ // non-vector data types.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+ return findBaseDefiningValue(GEP->getPointerOperand());
+
// A PHI or Select is a base defining value. The outer findBasePointer
// algorithm is responsible for constructing a base value for this BDV.
assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
@@ -634,7 +639,7 @@ static BDVState meetBDVStateImpl(const BDVState &LHS, const BDVState &RHS) {
// Values of type BDVState form a lattice, and this function implements the meet
// operation.
-static BDVState meetBDVState(BDVState LHS, BDVState RHS) {
+static BDVState meetBDVState(const BDVState &LHS, const BDVState &RHS) {
BDVState Result = meetBDVStateImpl(LHS, RHS);
assert(Result == meetBDVStateImpl(RHS, LHS) &&
"Math is wrong: meet does not commute!");
@@ -1123,14 +1128,14 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
// Create new attribute set containing only attributes which can be transferred
// from original call to the safepoint.
-static AttributeSet legalizeCallAttributes(AttributeSet AS) {
- AttributeSet Ret;
+static AttributeList legalizeCallAttributes(AttributeList AS) {
+ AttributeList Ret;
for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
unsigned Index = AS.getSlotIndex(Slot);
- if (Index == AttributeSet::ReturnIndex ||
- Index == AttributeSet::FunctionIndex) {
+ if (Index == AttributeList::ReturnIndex ||
+ Index == AttributeList::FunctionIndex) {
for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
@@ -1148,7 +1153,7 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
Ret = Ret.addAttributes(
AS.getContext(), Index,
- AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
+ AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr)));
}
}
@@ -1299,12 +1304,11 @@ static StringRef getDeoptLowering(CallSite CS) {
const char *DeoptLowering = "deopt-lowering";
if (CS.hasFnAttr(DeoptLowering)) {
// FIXME: CallSite has a *really* confusing interface around attributes
- // with values.
- const AttributeSet &CSAS = CS.getAttributes();
- if (CSAS.hasAttribute(AttributeSet::FunctionIndex,
- DeoptLowering))
- return CSAS.getAttribute(AttributeSet::FunctionIndex,
- DeoptLowering).getValueAsString();
+ // with values.
+ const AttributeList &CSAS = CS.getAttributes();
+ if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
+ return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
+ .getValueAsString();
Function *F = CS.getCalledFunction();
assert(F && F->hasFnAttribute(DeoptLowering));
return F->getFnAttribute(DeoptLowering).getValueAsString();
@@ -1388,7 +1392,6 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Create the statepoint given all the arguments
Instruction *Token = nullptr;
- AttributeSet ReturnAttrs;
if (CS.isCall()) {
CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
CallInst *Call = Builder.CreateGCStatepointCall(
@@ -1400,11 +1403,12 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
// In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- Call->setAttributes(NewAttrs.getFnAttributes());
- ReturnAttrs = NewAttrs.getRetAttributes();
+ Call->setAttributes(AttributeList::get(Call->getContext(),
+ AttributeList::FunctionIndex,
+ NewAttrs.getFnAttributes()));
Token = Call;
@@ -1428,11 +1432,12 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
// Currently we will fail on parameter attributes and on certain
// function attributes.
- AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+ AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
// In case if we can handle this set of attributes - set up function attrs
// directly on statepoint and return attrs later for gc_result intrinsic.
- Invoke->setAttributes(NewAttrs.getFnAttributes());
- ReturnAttrs = NewAttrs.getRetAttributes();
+ Invoke->setAttributes(AttributeList::get(Invoke->getContext(),
+ AttributeList::FunctionIndex,
+ NewAttrs.getFnAttributes()));
Token = Invoke;
@@ -1478,7 +1483,9 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
StringRef Name =
CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
- GCResult->setAttributes(CS.getAttributes().getRetAttributes());
+ GCResult->setAttributes(
+ AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
+ CS.getAttributes().getRetAttributes()));
// We cannot RAUW or delete CS.getInstruction() because it could be in the
// live set of some other safepoint, in which case that safepoint's
@@ -1615,8 +1622,10 @@ static void relocationViaAlloca(
// Emit alloca for "LiveValue" and record it in "allocaMap" and
// "PromotableAllocas"
+ const DataLayout &DL = F.getParent()->getDataLayout();
auto emitAllocaFor = [&](Value *LiveValue) {
- AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "",
+ AllocaInst *Alloca = new AllocaInst(LiveValue->getType(),
+ DL.getAllocaAddrSpace(), "",
F.getEntryBlock().getFirstNonPHI());
AllocaMap[LiveValue] = Alloca;
PromotableAllocas.push_back(Alloca);
@@ -1873,7 +1882,7 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
"non noop cast is found during rematerialization");
Type *SrcTy = CI->getOperand(0)->getType();
- Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy);
+ Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
// Cost of the address calculation
@@ -2304,7 +2313,7 @@ static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
if (!R.empty())
AH.setAttributes(AH.getAttributes().removeAttributes(
- Ctx, Index, AttributeSet::get(Ctx, Index, R)));
+ Ctx, Index, AttributeList::get(Ctx, Index, R)));
}
void
@@ -2316,7 +2325,7 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
if (isa<PointerType>(F.getReturnType()))
- RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex);
}
void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
@@ -2351,7 +2360,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
if (isa<PointerType>(CS.getArgument(i)->getType()))
RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
if (isa<PointerType>(CS.getType()))
- RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
+ RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex);
}
}
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index ede381c4c243..8908dae2f545 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -140,6 +140,14 @@ public:
return nullptr;
}
+ /// getBlockAddress - If this is a constant with a BlockAddress value, return
+ /// it, otherwise return null.
+ BlockAddress *getBlockAddress() const {
+ if (isConstant())
+ return dyn_cast<BlockAddress>(getConstant());
+ return nullptr;
+ }
+
void markForcedConstant(Constant *V) {
assert(isUnknown() && "Can't force a defined value!");
Val.setInt(forcedconstant);
@@ -306,20 +314,14 @@ public:
return MRVFunctionsTracked;
}
- void markOverdefined(Value *V) {
- assert(!V->getType()->isStructTy() &&
- "structs should use markAnythingOverdefined");
- markOverdefined(ValueState[V], V);
- }
-
- /// markAnythingOverdefined - Mark the specified value overdefined. This
+ /// markOverdefined - Mark the specified value overdefined. This
/// works with both scalars and structs.
- void markAnythingOverdefined(Value *V) {
+ void markOverdefined(Value *V) {
if (auto *STy = dyn_cast<StructType>(V->getType()))
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
markOverdefined(getStructValueState(V, i), V);
else
- markOverdefined(V);
+ markOverdefined(ValueState[V], V);
}
// isStructLatticeConstant - Return true if all the lattice values
@@ -513,12 +515,12 @@ private:
void visitCmpInst(CmpInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
void visitInsertValueInst(InsertValueInst &IVI);
- void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
+ void visitLandingPadInst(LandingPadInst &I) { markOverdefined(&I); }
void visitFuncletPadInst(FuncletPadInst &FPI) {
- markAnythingOverdefined(&FPI);
+ markOverdefined(&FPI);
}
void visitCatchSwitchInst(CatchSwitchInst &CPI) {
- markAnythingOverdefined(&CPI);
+ markOverdefined(&CPI);
visitTerminatorInst(CPI);
}
@@ -538,16 +540,16 @@ private:
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
- markAnythingOverdefined(&I);
+ markOverdefined(&I);
}
void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
- void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
+ void visitVAArgInst (Instruction &I) { markOverdefined(&I); }
void visitInstruction(Instruction &I) {
// If a new instruction is added to LLVM that we don't handle.
DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
- markAnythingOverdefined(&I); // Just in case
+ markOverdefined(&I); // Just in case
}
};
@@ -602,14 +604,36 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
}
- Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true;
+ Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
return;
}
- // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
- if (isa<IndirectBrInst>(&TI)) {
- // Just mark all destinations executable!
- Succs.assign(TI.getNumSuccessors(), true);
+ // In case of indirect branch and its address is a blockaddress, we mark
+ // the target as executable.
+ if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
+ // Casts are folded by visitCastInst.
+ LatticeVal IBRValue = getValueState(IBR->getAddress());
+ BlockAddress *Addr = IBRValue.getBlockAddress();
+ if (!Addr) { // Overdefined or unknown condition?
+ // All destinations are executable!
+ if (!IBRValue.isUnknown())
+ Succs.assign(TI.getNumSuccessors(), true);
+ return;
+ }
+
+ BasicBlock* T = Addr->getBasicBlock();
+ assert(Addr->getFunction() == T->getParent() &&
+ "Block address of a different function ?");
+ for (unsigned i = 0; i < IBR->getNumSuccessors(); ++i) {
+ // This is the target.
+ if (IBR->getDestination(i) == T) {
+ Succs[i] = true;
+ return;
+ }
+ }
+
+ // If we didn't find our destination in the IBR successor list, then we
+ // have undefined behavior. Its ok to assume no successor is executable.
return;
}
@@ -659,13 +683,21 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
if (!CI)
return !SCValue.isUnknown();
- return SI->findCaseValue(CI).getCaseSuccessor() == To;
+ return SI->findCaseValue(CI)->getCaseSuccessor() == To;
}
- // Just mark all destinations executable!
- // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
- if (isa<IndirectBrInst>(TI))
- return true;
+ // In case of indirect branch and its address is a blockaddress, we mark
+ // the target as executable.
+ if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) {
+ LatticeVal IBRValue = getValueState(IBR->getAddress());
+ BlockAddress *Addr = IBRValue.getBlockAddress();
+
+ if (!Addr)
+ return !IBRValue.isUnknown();
+
+ // At this point, the indirectbr is branching on a blockaddress.
+ return Addr->getBasicBlock() == To;
+ }
DEBUG(dbgs() << "Unknown terminator instruction: " << *TI << '\n');
llvm_unreachable("SCCP: Don't know how to handle this terminator!");
@@ -693,7 +725,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// If this PN returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
if (PN.getType()->isStructTy())
- return markAnythingOverdefined(&PN);
+ return markOverdefined(&PN);
if (getValueState(&PN).isOverdefined())
return; // Quick exit
@@ -803,7 +835,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
// If this returns a struct, mark all elements over defined, we don't track
// structs in structs.
if (EVI.getType()->isStructTy())
- return markAnythingOverdefined(&EVI);
+ return markOverdefined(&EVI);
// If this is extracting from more than one level of struct, we don't know.
if (EVI.getNumIndices() != 1)
@@ -828,7 +860,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
// If this has more than one index, we can't handle it, drive all results to
// undef.
if (IVI.getNumIndices() != 1)
- return markAnythingOverdefined(&IVI);
+ return markOverdefined(&IVI);
Value *Aggr = IVI.getAggregateOperand();
unsigned Idx = *IVI.idx_begin();
@@ -857,7 +889,7 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
// If this select returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
if (I.getType()->isStructTy())
- return markAnythingOverdefined(&I);
+ return markOverdefined(&I);
LatticeVal CondValue = getValueState(I.getCondition());
if (CondValue.isUnknown())
@@ -910,9 +942,16 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// Otherwise, one of our operands is overdefined. Try to produce something
// better than overdefined with some tricks.
-
- // If this is an AND or OR with 0 or -1, it doesn't matter that the other
- // operand is overdefined.
+ // If this is 0 / Y, it doesn't matter that the second operand is
+ // overdefined, and we can replace it with zero.
+ if (I.getOpcode() == Instruction::UDiv || I.getOpcode() == Instruction::SDiv)
+ if (V1State.isConstant() && V1State.getConstant()->isNullValue())
+ return markConstant(IV, &I, V1State.getConstant());
+
+ // If this is:
+ // -> AND/MUL with 0
+ // -> OR with -1
+ // it doesn't matter that the other operand is overdefined.
if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Mul ||
I.getOpcode() == Instruction::Or) {
LatticeVal *NonOverdefVal = nullptr;
@@ -1021,7 +1060,7 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) {
void SCCPSolver::visitLoadInst(LoadInst &I) {
// If this load is of a struct, just mark the result overdefined.
if (I.getType()->isStructTy())
- return markAnythingOverdefined(&I);
+ return markOverdefined(&I);
LatticeVal PtrVal = getValueState(I.getOperand(0));
if (PtrVal.isUnknown()) return; // The pointer is not resolved yet!
@@ -1107,7 +1146,7 @@ CallOverdefined:
}
// Otherwise, we don't know anything about this call, mark it overdefined.
- return markAnythingOverdefined(I);
+ return markOverdefined(I);
}
// If this is a local function that doesn't have its address taken, mark its
@@ -1483,6 +1522,31 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
return true;
}
+ if (auto *IBR = dyn_cast<IndirectBrInst>(TI)) {
+ // Indirect branch with no successor ?. Its ok to assume it branches
+ // to no target.
+ if (IBR->getNumSuccessors() < 1)
+ continue;
+
+ if (!getValueState(IBR->getAddress()).isUnknown())
+ continue;
+
+ // If the input to SCCP is actually branch on undef, fix the undef to
+ // the first successor of the indirect branch.
+ if (isa<UndefValue>(IBR->getAddress())) {
+ IBR->setAddress(BlockAddress::get(IBR->getSuccessor(0)));
+ markEdgeExecutable(&BB, IBR->getSuccessor(0));
+ return true;
+ }
+
+ // Otherwise, it is a branch on a symbolic value which is currently
+ // considered to be undef. Handle this by forcing the input value to the
+ // branch to the first successor.
+ markForcedConstant(IBR->getAddress(),
+ BlockAddress::get(IBR->getSuccessor(0)));
+ return true;
+ }
+
if (auto *SI = dyn_cast<SwitchInst>(TI)) {
if (!SI->getNumCases() || !getValueState(SI->getCondition()).isUnknown())
continue;
@@ -1490,12 +1554,12 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// If the input to SCCP is actually switch on undef, fix the undef to
// the first constant.
if (isa<UndefValue>(SI->getCondition())) {
- SI->setCondition(SI->case_begin().getCaseValue());
- markEdgeExecutable(&BB, SI->case_begin().getCaseSuccessor());
+ SI->setCondition(SI->case_begin()->getCaseValue());
+ markEdgeExecutable(&BB, SI->case_begin()->getCaseSuccessor());
return true;
}
- markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue());
+ markForcedConstant(SI->getCondition(), SI->case_begin()->getCaseValue());
return true;
}
}
@@ -1545,7 +1609,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
// Mark all arguments to the function as being overdefined.
for (Argument &AI : F.args())
- Solver.markAnythingOverdefined(&AI);
+ Solver.markOverdefined(&AI);
// Solve for constants.
bool ResolvedUndefs = true;
@@ -1728,7 +1792,7 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
// Assume nothing about the incoming arguments.
for (Argument &AI : F.args())
- Solver.markAnythingOverdefined(&AI);
+ Solver.markOverdefined(&AI);
}
// Loop over global variables. We inform the solver about any internal global
@@ -1817,32 +1881,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
if (!I) continue;
bool Folded = ConstantFoldTerminator(I->getParent());
- if (!Folded) {
- // The constant folder may not have been able to fold the terminator
- // if this is a branch or switch on undef. Fold it manually as a
- // branch to the first successor.
-#ifndef NDEBUG
- if (auto *BI = dyn_cast<BranchInst>(I)) {
- assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
- "Branch should be foldable!");
- } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
- assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
- } else {
- llvm_unreachable("Didn't fold away reference to block!");
- }
-#endif
-
- // Make this an uncond branch to the first successor.
- TerminatorInst *TI = I->getParent()->getTerminator();
- BranchInst::Create(TI->getSuccessor(0), TI);
-
- // Remove entries in successor phi nodes to remove edges.
- for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
- TI->getSuccessor(i)->removePredecessor(TI->getParent());
-
- // Remove the old terminator.
- TI->eraseFromParent();
- }
+ assert(Folded &&
+ "Expect TermInst on constantint or blockaddress to be folded");
+ (void) Folded;
}
// Finally, delete the basic block.
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index bfcb15530ef5..d01e91a7f235 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1825,6 +1825,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Rank the remaining candidate vector types. This is easy because we know
// they're all integer vectors. We sort by ascending number of elements.
auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
+ (void)DL;
assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&
"Cannot have vector types of different sizes!");
assert(RHSTy->getElementType()->isIntegerTy() &&
@@ -2294,7 +2295,8 @@ private:
#endif
return getAdjustedPtr(IRB, DL, &NewAI,
- APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
+ APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset),
+ PointerTy,
#ifndef NDEBUG
Twine(OldName) + "."
#else
@@ -2369,6 +2371,8 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
+ unsigned AS = LI.getPointerAddressSpace();
+
Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
: LI.getType();
const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
@@ -2387,6 +2391,10 @@ private:
LI.isVolatile(), LI.getName());
if (LI.isVolatile())
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+ // Try to preserve nonnull metadata
+ if (TargetTy->isPointerTy())
+ NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
V = NewLI;
// If this is an integer load past the end of the slice (which means the
@@ -2401,7 +2409,7 @@ private:
"endian_shift");
}
} else {
- Type *LTy = TargetTy->getPointerTo();
+ Type *LTy = TargetTy->getPointerTo(AS);
LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
getSliceAlign(TargetTy),
LI.isVolatile(), LI.getName());
@@ -2429,7 +2437,7 @@ private:
// the computed value, and then replace the placeholder with LI, leaving
// LI only used for this computation.
Value *Placeholder =
- new LoadInst(UndefValue::get(LI.getType()->getPointerTo()));
+ new LoadInst(UndefValue::get(LI.getType()->getPointerTo(AS)));
V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
"insert");
LI.replaceAllUsesWith(V);
@@ -2542,7 +2550,8 @@ private:
NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
SI.isVolatile());
} else {
- Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
+ unsigned AS = SI.getPointerAddressSpace();
+ Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
@@ -3857,7 +3866,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
if (Alignment <= DL.getABITypeAlignment(SliceTy))
Alignment = 0;
NewAI = new AllocaInst(
- SliceTy, nullptr, Alignment,
+ SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment,
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
++NumNewAllocas;
}
@@ -4184,7 +4193,7 @@ bool SROA::promoteAllocas(Function &F) {
NumPromoted += PromotableAllocas.size();
DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
+ PromoteMemToReg(PromotableAllocas, *DT, AC);
PromotableAllocas.clear();
return true;
}
@@ -4234,9 +4243,8 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
if (!Changed)
return PreservedAnalyses::all();
- // FIXME: Even when promoting allocas we should preserve some abstract set of
- // CFG-specific analyses.
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
return PA;
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index afe7483006ae..00e3c95f6f06 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -43,13 +43,14 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeDSELegacyPassPass(Registry);
initializeGuardWideningLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
- initializeNewGVNPass(Registry);
+ initializeNewGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
initializeInductiveRangeCheckEliminationPass(Registry);
initializeIndVarSimplifyLegacyPassPass(Registry);
+ initializeInferAddressSpacesPass(Registry);
initializeJumpThreadingPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLegacyLoopSinkPassPass(Registry);
@@ -58,6 +59,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeLoopAccessLegacyAnalysisPass(Registry);
initializeLoopInstSimplifyLegacyPassPass(Registry);
initializeLoopInterchangePass(Registry);
+ initializeLoopPredicationLegacyPassPass(Registry);
initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
initializeLoopRerollPass(Registry);
@@ -79,6 +81,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeIPSCCPLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
initializeCFGSimplifyPassPass(Registry);
+ initializeLateCFGSimplifyPassPass(Registry);
initializeStructurizeCFGPass(Registry);
initializeSinkingLegacyPassPass(Registry);
initializeTailCallElimPass(Registry);
@@ -115,6 +118,10 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCFGSimplificationPass());
}
+void LLVMAddLateCFGSimplificationPass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLateCFGSimplificationPass());
+}
+
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDeadStoreEliminationPass());
}
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 39969e27367f..c0c09a7e43fe 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -520,12 +520,25 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
unsigned NumElems = VT->getNumElements();
unsigned NumIndices = GEPI.getNumIndices();
- Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
+ // The base pointer might be scalar even if it's a vector GEP. In those cases,
+ // splat the pointer into a vector value, and scatter that vector.
+ Value *Op0 = GEPI.getOperand(0);
+ if (!Op0->getType()->isVectorTy())
+ Op0 = Builder.CreateVectorSplat(NumElems, Op0);
+ Scatterer Base = scatter(&GEPI, Op0);
SmallVector<Scatterer, 8> Ops;
Ops.resize(NumIndices);
- for (unsigned I = 0; I < NumIndices; ++I)
- Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
+ for (unsigned I = 0; I < NumIndices; ++I) {
+ Value *Op = GEPI.getOperand(I + 1);
+
+ // The indices might be scalars even if it's a vector GEP. In those cases,
+ // splat the scalar into a vector value, and scatter that vector.
+ if (!Op->getType()->isVectorTy())
+ Op = Builder.CreateVectorSplat(NumElems, Op);
+
+ Ops[I] = scatter(&GEPI, Op);
+ }
ValueVector Res;
Res.resize(NumElems);
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index f2723bd7af82..8754c714c5b2 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -130,7 +130,8 @@ static bool mergeEmptyReturnBlocks(Function &F) {
/// iterating until no more changes are made.
static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
AssumptionCache *AC,
- unsigned BonusInstThreshold) {
+ unsigned BonusInstThreshold,
+ bool LateSimplifyCFG) {
bool Changed = false;
bool LocalChange = true;
@@ -145,7 +146,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
// Loop over all of the basic blocks and remove them if they are unneeded.
for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
- if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders)) {
+ if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC, &LoopHeaders, LateSimplifyCFG)) {
LocalChange = true;
++NumSimpl;
}
@@ -156,10 +157,12 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
}
static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
- AssumptionCache *AC, int BonusInstThreshold) {
+ AssumptionCache *AC, int BonusInstThreshold,
+ bool LateSimplifyCFG) {
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
- EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
+ EverChanged |= iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold,
+ LateSimplifyCFG);
// If neither pass changed anything, we're done.
if (!EverChanged) return false;
@@ -173,7 +176,8 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
return true;
do {
- EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold);
+ EverChanged = iterativelySimplifyCFG(F, TTI, AC, BonusInstThreshold,
+ LateSimplifyCFG);
EverChanged |= removeUnreachableBlocks(F);
} while (EverChanged);
@@ -181,17 +185,19 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
}
SimplifyCFGPass::SimplifyCFGPass()
- : BonusInstThreshold(UserBonusInstThreshold) {}
+ : BonusInstThreshold(UserBonusInstThreshold),
+ LateSimplifyCFG(true) {}
-SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold)
- : BonusInstThreshold(BonusInstThreshold) {}
+SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold, bool LateSimplifyCFG)
+ : BonusInstThreshold(BonusInstThreshold),
+ LateSimplifyCFG(LateSimplifyCFG) {}
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold))
+ if (!simplifyFunctionCFG(F, TTI, &AC, BonusInstThreshold, LateSimplifyCFG))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<GlobalsAA>();
@@ -199,16 +205,17 @@ PreservedAnalyses SimplifyCFGPass::run(Function &F,
}
namespace {
-struct CFGSimplifyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
+struct BaseCFGSimplifyPass : public FunctionPass {
unsigned BonusInstThreshold;
std::function<bool(const Function &)> PredicateFtor;
+ bool LateSimplifyCFG;
- CFGSimplifyPass(int T = -1,
- std::function<bool(const Function &)> Ftor = nullptr)
- : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+ BaseCFGSimplifyPass(int T, bool LateSimplifyCFG,
+ std::function<bool(const Function &)> Ftor,
+ char &ID)
+ : FunctionPass(ID), PredicateFtor(std::move(Ftor)),
+ LateSimplifyCFG(LateSimplifyCFG) {
BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : unsigned(T);
- initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F)))
@@ -218,7 +225,7 @@ struct CFGSimplifyPass : public FunctionPass {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold);
+ return simplifyFunctionCFG(F, TTI, AC, BonusInstThreshold, LateSimplifyCFG);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -227,6 +234,26 @@ struct CFGSimplifyPass : public FunctionPass {
AU.addPreserved<GlobalsAAWrapperPass>();
}
};
+
+struct CFGSimplifyPass : public BaseCFGSimplifyPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ CFGSimplifyPass(int T = -1,
+ std::function<bool(const Function &)> Ftor = nullptr)
+ : BaseCFGSimplifyPass(T, false, Ftor, ID) {
+ initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+};
+
+struct LateCFGSimplifyPass : public BaseCFGSimplifyPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ LateCFGSimplifyPass(int T = -1,
+ std::function<bool(const Function &)> Ftor = nullptr)
+ : BaseCFGSimplifyPass(T, true, Ftor, ID) {
+ initializeLateCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+ }
+};
}
char CFGSimplifyPass::ID = 0;
@@ -237,9 +264,24 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
false)
+char LateCFGSimplifyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LateCFGSimplifyPass, "latesimplifycfg",
+ "Simplify the CFG more aggressively", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(LateCFGSimplifyPass, "latesimplifycfg",
+ "Simplify the CFG more aggressively", false, false)
+
// Public interface to the CFGSimplification pass
FunctionPass *
llvm::createCFGSimplificationPass(int Threshold,
- std::function<bool(const Function &)> Ftor) {
+ std::function<bool(const Function &)> Ftor) {
return new CFGSimplifyPass(Threshold, std::move(Ftor));
}
+
+// Public interface to the LateCFGSimplification pass
+FunctionPass *
+llvm::createLateCFGSimplificationPass(int Threshold,
+ std::function<bool(const Function &)> Ftor) {
+ return new LateCFGSimplifyPass(Threshold, std::move(Ftor));
+}
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c3f14a0f4b1e..102e9eaeab77 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -164,13 +164,14 @@ static bool SinkInstruction(Instruction *Inst,
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
- // Look at all the postdominators and see if we can sink it in one.
+ // Look at all the dominated blocks and see if we can sink it in one.
DomTreeNode *DTN = DT.getNode(Inst->getParent());
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
I != E && SuccToSinkTo == nullptr; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
- if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
- IsAcceptableTarget(Inst, Candidate, DT, LI))
+ // A node always immediate-dominates its children on the dominator
+ // tree.
+ if (IsAcceptableTarget(Inst, Candidate, DT, LI))
SuccToSinkTo = Candidate;
}
@@ -262,9 +263,8 @@ PreservedAnalyses SinkingPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!iterativelySinkInstructions(F, DT, LI, AA))
return PreservedAnalyses::all();
- auto PA = PreservedAnalyses();
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
return PA;
}
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index 2e95926c0b3f..4c9746b8c691 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -102,6 +102,10 @@ FunctionPass *llvm::createAddDiscriminatorsPass() {
return new AddDiscriminatorsLegacyPass();
}
+static bool shouldHaveDiscriminator(const Instruction *I) {
+ return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
+}
+
/// \brief Assign DWARF discriminators.
///
/// To assign discriminators, we examine the boundaries of every
@@ -176,7 +180,13 @@ static bool addDiscriminators(Function &F) {
// discriminator for this instruction.
for (BasicBlock &B : F) {
for (auto &I : B.getInstList()) {
- if (isa<IntrinsicInst>(&I))
+ // Not all intrinsic calls should have a discriminator.
+ // We want to avoid a non-deterministic assignment of discriminators at
+ // different debug levels. We still allow discriminators on memory
+ // intrinsic calls because those can be early expanded by SROA into
+ // pairs of loads and stores, and the expanded load/store instructions
+ // should have a valid discriminator.
+ if (!shouldHaveDiscriminator(&I))
continue;
const DILocation *DIL = I.getDebugLoc();
if (!DIL)
@@ -190,8 +200,8 @@ static bool addDiscriminators(Function &F) {
// discriminator is needed to distinguish both instructions.
// Only the lowest 7 bits are used to represent a discriminator to fit
// it in 1 byte ULEB128 representation.
- unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f;
- I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator));
+ unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];
+ I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator));
DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn() << ":" << Discriminator << " " << I
<< "\n");
@@ -207,6 +217,10 @@ static bool addDiscriminators(Function &F) {
LocationSet CallLocations;
for (auto &I : B.getInstList()) {
CallInst *Current = dyn_cast<CallInst>(&I);
+ // We bypass intrinsic calls for the following two reasons:
+ // 1) We want to avoid a non-deterministic assigment of
+ // discriminators.
+ // 2) We want to minimize the number of base discriminators used.
if (!Current || isa<IntrinsicInst>(&I))
continue;
@@ -216,8 +230,8 @@ static bool addDiscriminators(Function &F) {
Location L =
std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
if (!CallLocations.insert(L).second) {
- Current->setDebugLoc(
- CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f));
+ unsigned Discriminator = ++LDM[L];
+ Current->setDebugLoc(CurrentDIL->setBaseDiscriminator(Discriminator));
Changed = true;
}
}
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index b90349d3cdad..22af21d55c01 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -438,7 +438,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
// The new block unconditionally branches to the old block.
BranchInst *BI = BranchInst::Create(BB, NewBB);
- BI->setDebugLoc(BB->getFirstNonPHI()->getDebugLoc());
+ BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
// Move the edges from Preds to point to NewBB instead of BB.
for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
@@ -646,9 +646,10 @@ llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
}
if (LI) {
- Loop *L = LI->getLoopFor(Head);
- L->addBasicBlockToLoop(ThenBlock, *LI);
- L->addBasicBlockToLoop(Tail, *LI);
+ if (Loop *L = LI->getLoopFor(Head)) {
+ L->addBasicBlockToLoop(ThenBlock, *LI);
+ L->addBasicBlockToLoop(Tail, *LI);
+ }
}
return CheckTerm;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index e61b04fbdd57..6cd9f1614991 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -96,9 +96,9 @@ static bool setDoesNotAlias(Function &F, unsigned n) {
}
static bool setNonNull(Function &F, unsigned n) {
- assert((n != AttributeSet::ReturnIndex ||
- F.getReturnType()->isPointerTy()) &&
- "nonnull applies only to pointers");
+ assert(
+ (n != AttributeList::ReturnIndex || F.getReturnType()->isPointerTy()) &&
+ "nonnull applies only to pointers");
if (F.getAttributes().hasAttribute(n, Attribute::NonNull))
return false;
F.addAttribute(n, Attribute::NonNull);
@@ -107,255 +107,255 @@ static bool setNonNull(Function &F, unsigned n) {
}
bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
- LibFunc::Func TheLibFunc;
+ LibFunc TheLibFunc;
if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))
return false;
bool Changed = false;
switch (TheLibFunc) {
- case LibFunc::strlen:
+ case LibFunc_strlen:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::strchr:
- case LibFunc::strrchr:
+ case LibFunc_strchr:
+ case LibFunc_strrchr:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::strcpy:
- case LibFunc::stpcpy:
- case LibFunc::strcat:
- case LibFunc::strncat:
- case LibFunc::strncpy:
- case LibFunc::stpncpy:
+ case LibFunc_strcpy:
+ case LibFunc_stpcpy:
+ case LibFunc_strcat:
+ case LibFunc_strncat:
+ case LibFunc_strncpy:
+ case LibFunc_stpncpy:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::strxfrm:
+ case LibFunc_strxfrm:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::strcmp: // 0,1
- case LibFunc::strspn: // 0,1
- case LibFunc::strncmp: // 0,1
- case LibFunc::strcspn: // 0,1
- case LibFunc::strcoll: // 0,1
- case LibFunc::strcasecmp: // 0,1
- case LibFunc::strncasecmp: //
+ case LibFunc_strcmp: // 0,1
+ case LibFunc_strspn: // 0,1
+ case LibFunc_strncmp: // 0,1
+ case LibFunc_strcspn: // 0,1
+ case LibFunc_strcoll: // 0,1
+ case LibFunc_strcasecmp: // 0,1
+ case LibFunc_strncasecmp: //
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::strstr:
- case LibFunc::strpbrk:
+ case LibFunc_strstr:
+ case LibFunc_strpbrk:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::strtok:
- case LibFunc::strtok_r:
+ case LibFunc_strtok:
+ case LibFunc_strtok_r:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::scanf:
+ case LibFunc_scanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::setbuf:
- case LibFunc::setvbuf:
+ case LibFunc_setbuf:
+ case LibFunc_setvbuf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::strdup:
- case LibFunc::strndup:
+ case LibFunc_strdup:
+ case LibFunc_strndup:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::stat:
- case LibFunc::statvfs:
+ case LibFunc_stat:
+ case LibFunc_statvfs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::sscanf:
+ case LibFunc_sscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::sprintf:
+ case LibFunc_sprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::snprintf:
+ case LibFunc_snprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 3);
Changed |= setOnlyReadsMemory(F, 3);
return Changed;
- case LibFunc::setitimer:
+ case LibFunc_setitimer:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setDoesNotCapture(F, 3);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::system:
+ case LibFunc_system:
// May throw; "system" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::malloc:
+ case LibFunc_malloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::memchr:
- case LibFunc::memrchr:
+ case LibFunc_memchr:
+ case LibFunc_memrchr:
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::modf:
- case LibFunc::modff:
- case LibFunc::modfl:
+ case LibFunc_modf:
+ case LibFunc_modff:
+ case LibFunc_modfl:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::memcpy:
- case LibFunc::mempcpy:
- case LibFunc::memccpy:
- case LibFunc::memmove:
+ case LibFunc_memcpy:
+ case LibFunc_mempcpy:
+ case LibFunc_memccpy:
+ case LibFunc_memmove:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::memcpy_chk:
+ case LibFunc_memcpy_chk:
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::memalign:
+ case LibFunc_memalign:
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::mkdir:
+ case LibFunc_mkdir:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::mktime:
+ case LibFunc_mktime:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::realloc:
+ case LibFunc_realloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::read:
+ case LibFunc_read:
// May throw; "read" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::rewind:
+ case LibFunc_rewind:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::rmdir:
- case LibFunc::remove:
- case LibFunc::realpath:
+ case LibFunc_rmdir:
+ case LibFunc_remove:
+ case LibFunc_realpath:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::rename:
+ case LibFunc_rename:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::readlink:
+ case LibFunc_readlink:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::write:
+ case LibFunc_write:
// May throw; "write" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::bcopy:
+ case LibFunc_bcopy:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::bcmp:
+ case LibFunc_bcmp:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::bzero:
+ case LibFunc_bzero:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::calloc:
+ case LibFunc_calloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::chmod:
- case LibFunc::chown:
+ case LibFunc_chmod:
+ case LibFunc_chown:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::ctermid:
- case LibFunc::clearerr:
- case LibFunc::closedir:
+ case LibFunc_ctermid:
+ case LibFunc_clearerr:
+ case LibFunc_closedir:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::atoi:
- case LibFunc::atol:
- case LibFunc::atof:
- case LibFunc::atoll:
+ case LibFunc_atoi:
+ case LibFunc_atol:
+ case LibFunc_atof:
+ case LibFunc_atoll:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::access:
+ case LibFunc_access:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::fopen:
+ case LibFunc_fopen:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -363,150 +363,150 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fdopen:
+ case LibFunc_fdopen:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::feof:
- case LibFunc::free:
- case LibFunc::fseek:
- case LibFunc::ftell:
- case LibFunc::fgetc:
- case LibFunc::fseeko:
- case LibFunc::ftello:
- case LibFunc::fileno:
- case LibFunc::fflush:
- case LibFunc::fclose:
- case LibFunc::fsetpos:
- case LibFunc::flockfile:
- case LibFunc::funlockfile:
- case LibFunc::ftrylockfile:
+ case LibFunc_feof:
+ case LibFunc_free:
+ case LibFunc_fseek:
+ case LibFunc_ftell:
+ case LibFunc_fgetc:
+ case LibFunc_fseeko:
+ case LibFunc_ftello:
+ case LibFunc_fileno:
+ case LibFunc_fflush:
+ case LibFunc_fclose:
+ case LibFunc_fsetpos:
+ case LibFunc_flockfile:
+ case LibFunc_funlockfile:
+ case LibFunc_ftrylockfile:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::ferror:
+ case LibFunc_ferror:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F);
return Changed;
- case LibFunc::fputc:
- case LibFunc::fstat:
- case LibFunc::frexp:
- case LibFunc::frexpf:
- case LibFunc::frexpl:
- case LibFunc::fstatvfs:
+ case LibFunc_fputc:
+ case LibFunc_fstat:
+ case LibFunc_frexp:
+ case LibFunc_frexpf:
+ case LibFunc_frexpl:
+ case LibFunc_fstatvfs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::fgets:
+ case LibFunc_fgets:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 3);
return Changed;
- case LibFunc::fread:
+ case LibFunc_fread:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 4);
return Changed;
- case LibFunc::fwrite:
+ case LibFunc_fwrite:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 4);
// FIXME: readonly #1?
return Changed;
- case LibFunc::fputs:
+ case LibFunc_fputs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::fscanf:
- case LibFunc::fprintf:
+ case LibFunc_fscanf:
+ case LibFunc_fprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fgetpos:
+ case LibFunc_fgetpos:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::getc:
- case LibFunc::getlogin_r:
- case LibFunc::getc_unlocked:
+ case LibFunc_getc:
+ case LibFunc_getlogin_r:
+ case LibFunc_getc_unlocked:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::getenv:
+ case LibFunc_getenv:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyReadsMemory(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::gets:
- case LibFunc::getchar:
+ case LibFunc_gets:
+ case LibFunc_getchar:
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::getitimer:
+ case LibFunc_getitimer:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::getpwnam:
+ case LibFunc_getpwnam:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::ungetc:
+ case LibFunc_ungetc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::uname:
+ case LibFunc_uname:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::unlink:
+ case LibFunc_unlink:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::unsetenv:
+ case LibFunc_unsetenv:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::utime:
- case LibFunc::utimes:
+ case LibFunc_utime:
+ case LibFunc_utimes:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::putc:
+ case LibFunc_putc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::puts:
- case LibFunc::printf:
- case LibFunc::perror:
+ case LibFunc_puts:
+ case LibFunc_printf:
+ case LibFunc_perror:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::pread:
+ case LibFunc_pread:
// May throw; "pread" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::pwrite:
+ case LibFunc_pwrite:
// May throw; "pwrite" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::putchar:
+ case LibFunc_putchar:
Changed |= setDoesNotThrow(F);
return Changed;
- case LibFunc::popen:
+ case LibFunc_popen:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -514,132 +514,132 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::pclose:
+ case LibFunc_pclose:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::vscanf:
+ case LibFunc_vscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::vsscanf:
+ case LibFunc_vsscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::vfscanf:
+ case LibFunc_vfscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::valloc:
+ case LibFunc_valloc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::vprintf:
+ case LibFunc_vprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::vfprintf:
- case LibFunc::vsprintf:
+ case LibFunc_vfprintf:
+ case LibFunc_vsprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::vsnprintf:
+ case LibFunc_vsnprintf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 3);
Changed |= setOnlyReadsMemory(F, 3);
return Changed;
- case LibFunc::open:
+ case LibFunc_open:
// May throw; "open" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::opendir:
+ case LibFunc_opendir:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::tmpfile:
+ case LibFunc_tmpfile:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::times:
+ case LibFunc_times:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::htonl:
- case LibFunc::htons:
- case LibFunc::ntohl:
- case LibFunc::ntohs:
+ case LibFunc_htonl:
+ case LibFunc_htons:
+ case LibFunc_ntohl:
+ case LibFunc_ntohs:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAccessMemory(F);
return Changed;
- case LibFunc::lstat:
+ case LibFunc_lstat:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::lchown:
+ case LibFunc_lchown:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::qsort:
+ case LibFunc_qsort:
// May throw; places call through function pointer.
Changed |= setDoesNotCapture(F, 4);
return Changed;
- case LibFunc::dunder_strdup:
- case LibFunc::dunder_strndup:
+ case LibFunc_dunder_strdup:
+ case LibFunc_dunder_strndup:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::dunder_strtok_r:
+ case LibFunc_dunder_strtok_r:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::under_IO_getc:
+ case LibFunc_under_IO_getc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::under_IO_putc:
+ case LibFunc_under_IO_putc:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::dunder_isoc99_scanf:
+ case LibFunc_dunder_isoc99_scanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::stat64:
- case LibFunc::lstat64:
- case LibFunc::statvfs64:
+ case LibFunc_stat64:
+ case LibFunc_lstat64:
+ case LibFunc_statvfs64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::dunder_isoc99_sscanf:
+ case LibFunc_dunder_isoc99_sscanf:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fopen64:
+ case LibFunc_fopen64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setDoesNotCapture(F, 1);
@@ -647,26 +647,26 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
- case LibFunc::fseeko64:
- case LibFunc::ftello64:
+ case LibFunc_fseeko64:
+ case LibFunc_ftello64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc::tmpfile64:
+ case LibFunc_tmpfile64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotAlias(F, 0);
return Changed;
- case LibFunc::fstat64:
- case LibFunc::fstatvfs64:
+ case LibFunc_fstat64:
+ case LibFunc_fstatvfs64:
Changed |= setDoesNotThrow(F);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::open64:
+ case LibFunc_open64:
// May throw; "open" is a valid pthread cancellation point.
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc::gettimeofday:
+ case LibFunc_gettimeofday:
// Currently some platforms have the restrict keyword on the arguments to
// gettimeofday. To be conservative, do not add noalias to gettimeofday's
// arguments.
@@ -674,29 +674,29 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
return Changed;
- case LibFunc::Znwj: // new(unsigned int)
- case LibFunc::Znwm: // new(unsigned long)
- case LibFunc::Znaj: // new[](unsigned int)
- case LibFunc::Znam: // new[](unsigned long)
- case LibFunc::msvc_new_int: // new(unsigned int)
- case LibFunc::msvc_new_longlong: // new(unsigned long long)
- case LibFunc::msvc_new_array_int: // new[](unsigned int)
- case LibFunc::msvc_new_array_longlong: // new[](unsigned long long)
+ case LibFunc_Znwj: // new(unsigned int)
+ case LibFunc_Znwm: // new(unsigned long)
+ case LibFunc_Znaj: // new[](unsigned int)
+ case LibFunc_Znam: // new[](unsigned long)
+ case LibFunc_msvc_new_int: // new(unsigned int)
+ case LibFunc_msvc_new_longlong: // new(unsigned long long)
+ case LibFunc_msvc_new_array_int: // new[](unsigned int)
+ case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
// Operator new always returns a nonnull noalias pointer
- Changed |= setNonNull(F, AttributeSet::ReturnIndex);
- Changed |= setDoesNotAlias(F, AttributeSet::ReturnIndex);
+ Changed |= setNonNull(F, AttributeList::ReturnIndex);
+ Changed |= setDoesNotAlias(F, AttributeList::ReturnIndex);
return Changed;
//TODO: add LibFunc entries for:
- //case LibFunc::memset_pattern4:
- //case LibFunc::memset_pattern8:
- case LibFunc::memset_pattern16:
+ //case LibFunc_memset_pattern4:
+ //case LibFunc_memset_pattern8:
+ case LibFunc_memset_pattern16:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 1);
Changed |= setDoesNotCapture(F, 2);
Changed |= setOnlyReadsMemory(F, 2);
return Changed;
// int __nvvm_reflect(const char *)
- case LibFunc::nvvm_reflect:
+ case LibFunc_nvvm_reflect:
Changed |= setDoesNotAccessMemory(F);
Changed |= setDoesNotThrow(F);
return Changed;
@@ -717,13 +717,13 @@ Value *llvm::castToCStr(Value *V, IRBuilder<> &B) {
Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strlen))
+ if (!TLI->has(LibFunc_strlen))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Constant *StrLen = M->getOrInsertFunction("strlen", DL.getIntPtrType(Context),
- B.getInt8PtrTy(), nullptr);
+ B.getInt8PtrTy());
inferLibFuncAttributes(*M->getFunction("strlen"), *TLI);
CallInst *CI = B.CreateCall(StrLen, castToCStr(Ptr, B), "strlen");
if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
@@ -734,14 +734,14 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strchr))
+ if (!TLI->has(LibFunc_strchr))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
Type *I32Ty = B.getInt32Ty();
Constant *StrChr =
- M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty, nullptr);
+ M->getOrInsertFunction("strchr", I8Ptr, I8Ptr, I32Ty);
inferLibFuncAttributes(*M->getFunction("strchr"), *TLI);
CallInst *CI = B.CreateCall(
StrChr, {castToCStr(Ptr, B), ConstantInt::get(I32Ty, C)}, "strchr");
@@ -752,14 +752,14 @@ Value *llvm::emitStrChr(Value *Ptr, char C, IRBuilder<> &B,
Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::strncmp))
+ if (!TLI->has(LibFunc_strncmp))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *StrNCmp = M->getOrInsertFunction("strncmp", B.getInt32Ty(),
B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(*M->getFunction("strncmp"), *TLI);
CallInst *CI = B.CreateCall(
StrNCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "strncmp");
@@ -772,12 +772,12 @@ Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc::strcpy))
+ if (!TLI->has(LibFunc_strcpy))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
- Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr, nullptr);
+ Value *StrCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr);
inferLibFuncAttributes(*M->getFunction(Name), *TLI);
CallInst *CI =
B.CreateCall(StrCpy, {castToCStr(Dst, B), castToCStr(Src, B)}, Name);
@@ -788,13 +788,13 @@ Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
const TargetLibraryInfo *TLI, StringRef Name) {
- if (!TLI->has(LibFunc::strncpy))
+ if (!TLI->has(LibFunc_strncpy))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Type *I8Ptr = B.getInt8PtrTy();
Value *StrNCpy = M->getOrInsertFunction(Name, I8Ptr, I8Ptr, I8Ptr,
- Len->getType(), nullptr);
+ Len->getType());
inferLibFuncAttributes(*M->getFunction(Name), *TLI);
CallInst *CI = B.CreateCall(
StrNCpy, {castToCStr(Dst, B), castToCStr(Src, B), Len}, "strncpy");
@@ -806,18 +806,18 @@ Value *llvm::emitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
IRBuilder<> &B, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::memcpy_chk))
+ if (!TLI->has(LibFunc_memcpy_chk))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- AttributeSet AS;
- AS = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex,
- Attribute::NoUnwind);
+ AttributeList AS;
+ AS = AttributeList::get(M->getContext(), AttributeList::FunctionIndex,
+ Attribute::NoUnwind);
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCpy = M->getOrInsertFunction(
- "__memcpy_chk", AttributeSet::get(M->getContext(), AS), B.getInt8PtrTy(),
+ "__memcpy_chk", AttributeList::get(M->getContext(), AS), B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt8PtrTy(), DL.getIntPtrType(Context),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
Dst = castToCStr(Dst, B);
Src = castToCStr(Src, B);
CallInst *CI = B.CreateCall(MemCpy, {Dst, Src, Len, ObjSize});
@@ -828,14 +828,14 @@ Value *llvm::emitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::memchr))
+ if (!TLI->has(LibFunc_memchr))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemChr = M->getOrInsertFunction("memchr", B.getInt8PtrTy(),
B.getInt8PtrTy(), B.getInt32Ty(),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(*M->getFunction("memchr"), *TLI);
CallInst *CI = B.CreateCall(MemChr, {castToCStr(Ptr, B), Val, Len}, "memchr");
@@ -847,14 +847,14 @@ Value *llvm::emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
Value *llvm::emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::memcmp))
+ if (!TLI->has(LibFunc_memcmp))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
Value *MemCmp = M->getOrInsertFunction("memcmp", B.getInt32Ty(),
B.getInt8PtrTy(), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), nullptr);
+ DL.getIntPtrType(Context));
inferLibFuncAttributes(*M->getFunction("memcmp"), *TLI);
CallInst *CI = B.CreateCall(
MemCmp, {castToCStr(Ptr1, B), castToCStr(Ptr2, B), Len}, "memcmp");
@@ -881,13 +881,13 @@ static void appendTypeSuffix(Value *Op, StringRef &Name,
}
Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
- const AttributeSet &Attrs) {
+ const AttributeList &Attrs) {
SmallString<20> NameBuffer;
appendTypeSuffix(Op, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getModule();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
- Op->getType(), nullptr);
+ Op->getType());
CallInst *CI = B.CreateCall(Callee, Op, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
@@ -897,13 +897,13 @@ Value *llvm::emitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
}
Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
- IRBuilder<> &B, const AttributeSet &Attrs) {
+ IRBuilder<> &B, const AttributeList &Attrs) {
SmallString<20> NameBuffer;
appendTypeSuffix(Op1, Name, NameBuffer);
Module *M = B.GetInsertBlock()->getModule();
Value *Callee = M->getOrInsertFunction(Name, Op1->getType(), Op1->getType(),
- Op2->getType(), nullptr);
+ Op2->getType());
CallInst *CI = B.CreateCall(Callee, {Op1, Op2}, Name);
CI->setAttributes(Attrs);
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
@@ -914,12 +914,12 @@ Value *llvm::emitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::putchar))
+ if (!TLI->has(LibFunc_putchar))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
- B.getInt32Ty(), nullptr);
+ Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), B.getInt32Ty());
+ inferLibFuncAttributes(*M->getFunction("putchar"), *TLI);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
B.getInt32Ty(),
@@ -934,12 +934,12 @@ Value *llvm::emitPutChar(Value *Char, IRBuilder<> &B,
Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::puts))
+ if (!TLI->has(LibFunc_puts))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Value *PutS =
- M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy(), nullptr);
+ M->getOrInsertFunction("puts", B.getInt32Ty(), B.getInt8PtrTy());
inferLibFuncAttributes(*M->getFunction("puts"), *TLI);
CallInst *CI = B.CreateCall(PutS, castToCStr(Str, B), "puts");
if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
@@ -949,12 +949,12 @@ Value *llvm::emitPutS(Value *Str, IRBuilder<> &B,
Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::fputc))
+ if (!TLI->has(LibFunc_fputc))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
Constant *F = M->getOrInsertFunction("fputc", B.getInt32Ty(), B.getInt32Ty(),
- File->getType(), nullptr);
+ File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(*M->getFunction("fputc"), *TLI);
Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
@@ -968,13 +968,13 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::fputs))
+ if (!TLI->has(LibFunc_fputs))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
- StringRef FPutsName = TLI->getName(LibFunc::fputs);
+ StringRef FPutsName = TLI->getName(LibFunc_fputs);
Constant *F = M->getOrInsertFunction(
- FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType(), nullptr);
+ FPutsName, B.getInt32Ty(), B.getInt8PtrTy(), File->getType());
if (File->getType()->isPointerTy())
inferLibFuncAttributes(*M->getFunction(FPutsName), *TLI);
CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs");
@@ -986,16 +986,16 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,
Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
- if (!TLI->has(LibFunc::fwrite))
+ if (!TLI->has(LibFunc_fwrite))
return nullptr;
Module *M = B.GetInsertBlock()->getModule();
LLVMContext &Context = B.GetInsertBlock()->getContext();
- StringRef FWriteName = TLI->getName(LibFunc::fwrite);
+ StringRef FWriteName = TLI->getName(LibFunc_fwrite);
Constant *F = M->getOrInsertFunction(
FWriteName, DL.getIntPtrType(Context), B.getInt8PtrTy(),
- DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType(),
- nullptr);
+ DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType());
+
if (File->getType()->isPointerTy())
inferLibFuncAttributes(*M->getFunction(FWriteName), *TLI);
CallInst *CI =
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index bc2cef26edcb..1cfe3bd53648 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -17,6 +17,8 @@
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -36,12 +38,21 @@ namespace {
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
};
- struct DivPhiNodes {
- PHINode *Quotient;
- PHINode *Remainder;
+ struct QuotRemPair {
+ Value *Quotient;
+ Value *Remainder;
- DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
- : Quotient(InQuotient), Remainder(InRemainder) {}
+ QuotRemPair(Value *InQuotient, Value *InRemainder)
+ : Quotient(InQuotient), Remainder(InRemainder) {}
+ };
+
+ /// A quotient and remainder, plus a BB from which they logically "originate".
+ /// If you use Quotient or Remainder in a Phi node, you should use BB as its
+ /// corresponding predecessor.
+ struct QuotRemWithBB {
+ BasicBlock *BB = nullptr;
+ Value *Quotient = nullptr;
+ Value *Remainder = nullptr;
};
}
@@ -69,159 +80,376 @@ namespace llvm {
}
};
- typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+ typedef DenseMap<DivOpInfo, QuotRemPair> DivCacheTy;
+ typedef DenseMap<unsigned, unsigned> BypassWidthsTy;
+ typedef SmallPtrSet<Instruction *, 4> VisitedSetTy;
}
-// insertFastDiv - Substitutes the div/rem instruction with code that checks the
-// value of the operands and uses a shorter-faster div/rem instruction when
-// possible and the longer-slower div/rem instruction otherwise.
-static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
- bool UseDivOp, bool UseSignedOp,
- DivCacheTy &PerBBDivCache) {
- Function *F = I->getParent()->getParent();
- // Get instruction operands
- Value *Dividend = I->getOperand(0);
- Value *Divisor = I->getOperand(1);
+namespace {
+enum ValueRange {
+ /// Operand definitely fits into BypassType. No runtime checks are needed.
+ VALRNG_KNOWN_SHORT,
+ /// A runtime check is required, as value range is unknown.
+ VALRNG_UNKNOWN,
+ /// Operand is unlikely to fit into BypassType. The bypassing should be
+ /// disabled.
+ VALRNG_LIKELY_LONG
+};
+
+class FastDivInsertionTask {
+ bool IsValidTask = false;
+ Instruction *SlowDivOrRem = nullptr;
+ IntegerType *BypassType = nullptr;
+ BasicBlock *MainBB = nullptr;
+
+ bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
+ ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
+ QuotRemWithBB createSlowBB(BasicBlock *Successor);
+ QuotRemWithBB createFastBB(BasicBlock *Successor);
+ QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
+ BasicBlock *PhiBB);
+ Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
+ Optional<QuotRemPair> insertFastDivAndRem();
+
+ bool isSignedOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::SRem;
+ }
+ bool isDivisionOp() {
+ return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
+ SlowDivOrRem->getOpcode() == Instruction::UDiv;
+ }
+ Type *getSlowType() { return SlowDivOrRem->getType(); }
+
+public:
+ FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
+ Value *getReplacement(DivCacheTy &Cache);
+};
+} // anonymous namespace
+
+FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
+ const BypassWidthsTy &BypassWidths) {
+ switch (I->getOpcode()) {
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ SlowDivOrRem = I;
+ break;
+ default:
+ // I is not a div/rem operation.
+ return;
+ }
- if (isa<ConstantInt>(Divisor)) {
- // Division by a constant should have been been solved and replaced earlier
- // in the pipeline.
- return false;
+ // Skip division on vector types. Only optimize integer instructions.
+ IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
+ if (!SlowType)
+ return;
+
+ // Skip if this bitwidth is not bypassed.
+ auto BI = BypassWidths.find(SlowType->getBitWidth());
+ if (BI == BypassWidths.end())
+ return;
+
+ // Get type for div/rem instruction with bypass bitwidth.
+ IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
+ BypassType = BT;
+
+ // The original basic block.
+ MainBB = I->getParent();
+
+ // The instruction is indeed a slow div or rem operation.
+ IsValidTask = true;
+}
+
+/// Reuses previously-computed dividend or remainder from the current BB if
+/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
+/// perform the optimization and caches the resulting dividend and remainder.
+/// If no replacement can be generated, nullptr is returned.
+Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
+ // First, make sure that the task is valid.
+ if (!IsValidTask)
+ return nullptr;
+
+ // Then, look for a value in Cache.
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ DivOpInfo Key(isSignedOp(), Dividend, Divisor);
+ auto CacheI = Cache.find(Key);
+
+ if (CacheI == Cache.end()) {
+ // If previous instance does not exist, try to insert fast div.
+ Optional<QuotRemPair> OptResult = insertFastDivAndRem();
+ // Bail out if insertFastDivAndRem has failed.
+ if (!OptResult)
+ return nullptr;
+ CacheI = Cache.insert({Key, *OptResult}).first;
}
- // If the numerator is a constant, bail if it doesn't fit into BypassType.
- if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
- if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
+ QuotRemPair &Value = CacheI->second;
+ return isDivisionOp() ? Value.Quotient : Value.Remainder;
+}
+
+/// \brief Check if a value looks like a hash.
+///
+/// The routine is expected to detect values computed using the most common hash
+/// algorithms. Typically, hash computations end with one of the following
+/// instructions:
+///
+/// 1) MUL with a constant wider than BypassType
+/// 2) XOR instruction
+///
+/// And even if we are wrong and the value is not a hash, it is still quite
+/// unlikely that such values will fit into BypassType.
+///
+/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
+/// It is implemented as a depth-first search for values that look neither long
+/// nor hash-like.
+bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Xor:
+ return true;
+ case Instruction::Mul: {
+ // After Constant Hoisting pass, long constants may be represented as
+ // bitcast instructions. As a result, some constants may look like an
+ // instruction at first, and an additional check is necessary to find out if
+ // an operand is actually a constant.
+ Value *Op1 = I->getOperand(1);
+ ConstantInt *C = dyn_cast<ConstantInt>(Op1);
+ if (!C && isa<BitCastInst>(Op1))
+ C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
+ return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
+ }
+ case Instruction::PHI: {
+ // Stop IR traversal in case of a crazy input code. This limits recursion
+ // depth.
+ if (Visited.size() >= 16)
return false;
+ // Do not visit nodes that have been visited already. We return true because
+ // it means that we couldn't find any value that doesn't look hash-like.
+ if (Visited.find(I) != Visited.end())
+ return true;
+ Visited.insert(I);
+ return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
+ // Ignore undef values as they probably don't affect the division
+ // operands.
+ return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
+ isa<UndefValue>(V);
+ });
+ }
+ default:
+ return false;
+ }
+}
+
+/// Check if an integer value fits into our bypass type.
+ValueRange FastDivInsertionTask::getValueRange(Value *V,
+ VisitedSetTy &Visited) {
+ unsigned ShortLen = BypassType->getBitWidth();
+ unsigned LongLen = V->getType()->getIntegerBitWidth();
+
+ assert(LongLen > ShortLen && "Value type must be wider than BypassType");
+ unsigned HiBits = LongLen - ShortLen;
+
+ const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
+ APInt Zeros(LongLen, 0), Ones(LongLen, 0);
- // Basic Block is split before divide
- BasicBlock *MainBB = &*I->getParent();
- BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
-
- // Add new basic block for slow divide operation
- BasicBlock *SlowBB =
- BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
- SlowBB->moveBefore(SuccessorBB);
- IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
- Value *SlowQuotientV;
- Value *SlowRemainderV;
- if (UseSignedOp) {
- SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
- SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+ computeKnownBits(V, Zeros, Ones, DL);
+
+ if (Zeros.countLeadingOnes() >= HiBits)
+ return VALRNG_KNOWN_SHORT;
+
+ if (Ones.countLeadingZeros() < HiBits)
+ return VALRNG_LIKELY_LONG;
+
+ // Long integer divisions are often used in hashtable implementations. It's
+ // not worth bypassing such divisions because hash values are extremely
+ // unlikely to have enough leading zeros. The call below tries to detect
+ // values that are unlikely to fit BypassType (including hashes).
+ if (isHashLikeValue(V, Visited))
+ return VALRNG_LIKELY_LONG;
+
+ return VALRNG_UNKNOWN;
+}
+
+/// Add new basic block for slow div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isSignedOp()) {
+ DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
} else {
- SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
- SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+ DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
+ DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
}
- SlowBuilder.CreateBr(SuccessorBB);
-
- // Add new basic block for fast divide operation
- BasicBlock *FastBB =
- BasicBlock::Create(F->getContext(), "", MainBB->getParent(), SuccessorBB);
- FastBB->moveBefore(SlowBB);
- IRBuilder<> FastBuilder(FastBB, FastBB->begin());
- Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
- BypassType);
- Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
- BypassType);
-
- // udiv/urem because optimization only handles positive numbers
- Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
- Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
- ShortDivisorV);
- Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
- ShortQuotientV,
- Dividend->getType());
- Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
- ShortRemainderV,
- Dividend->getType());
- FastBuilder.CreateBr(SuccessorBB);
-
- // Phi nodes for result of div and rem
- IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
- PHINode *QuoPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
- QuoPhi->addIncoming(SlowQuotientV, SlowBB);
- QuoPhi->addIncoming(FastQuotientV, FastBB);
- PHINode *RemPhi = SuccessorBuilder.CreatePHI(I->getType(), 2);
- RemPhi->addIncoming(SlowRemainderV, SlowBB);
- RemPhi->addIncoming(FastRemainderV, FastBB);
-
- // Replace I with appropriate phi node
- if (UseDivOp)
- I->replaceAllUsesWith(QuoPhi);
- else
- I->replaceAllUsesWith(RemPhi);
- I->eraseFromParent();
- // Combine operands into a single value with OR for value testing below
- MainBB->getInstList().back().eraseFromParent();
- IRBuilder<> MainBuilder(MainBB, MainBB->end());
+ Builder.CreateBr(SuccessorBB);
+ return DivRemPair;
+}
+
+/// Add new basic block for fast div and rem operations and put it before
+/// SuccessorBB.
+QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
+ QuotRemWithBB DivRemPair;
+ DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
+ MainBB->getParent(), SuccessorBB);
+ IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
+
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+ Value *ShortDivisorV =
+ Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
+ Value *ShortDividendV =
+ Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
+
+ // udiv/urem because this optimization only handles positive numbers.
+ Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
+ Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
+ DivRemPair.Quotient =
+ Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
+ DivRemPair.Remainder =
+ Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
+ Builder.CreateBr(SuccessorBB);
+
+ return DivRemPair;
+}
- // We should have bailed out above if the divisor is a constant, but the
- // dividend may still be a constant. Set OrV to our non-constant operands
- // OR'ed together.
- assert(!isa<ConstantInt>(Divisor));
+/// Creates Phi nodes for result of Div and Rem.
+QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
+ QuotRemWithBB &RHS,
+ BasicBlock *PhiBB) {
+ IRBuilder<> Builder(PhiBB, PhiBB->begin());
+ PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
+ QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
+ QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
+ PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
+ RemPhi->addIncoming(LHS.Remainder, LHS.BB);
+ RemPhi->addIncoming(RHS.Remainder, RHS.BB);
+ return QuotRemPair(QuoPhi, RemPhi);
+}
+
+/// Creates a runtime check to test whether both the divisor and dividend fit
+/// into BypassType. The check is inserted at the end of MainBB. True return
+/// value means that the operands fit. Either of the operands may be NULL if it
+/// doesn't need a runtime check.
+Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
+ assert((Op1 || Op2) && "Nothing to check");
+ IRBuilder<> Builder(MainBB, MainBB->end());
Value *OrV;
- if (!isa<ConstantInt>(Dividend))
- OrV = MainBuilder.CreateOr(Dividend, Divisor);
+ if (Op1 && Op2)
+ OrV = Builder.CreateOr(Op1, Op2);
else
- OrV = Divisor;
+ OrV = Op1 ? Op1 : Op2;
// BitMask is inverted to check if the operands are
// larger than the bypass type
uint64_t BitMask = ~BypassType->getBitMask();
- Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
-
- // Compare operand values and branch
- Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
- Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
- MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
-
- // Cache phi nodes to be used later in place of other instances
- // of div or rem with the same sign, dividend, and divisor
- DivOpInfo Key(UseSignedOp, Dividend, Divisor);
- DivPhiNodes Value(QuoPhi, RemPhi);
- PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
- return true;
+ Value *AndV = Builder.CreateAnd(OrV, BitMask);
+
+ // Compare operand values
+ Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
+ return Builder.CreateICmpEQ(AndV, ZeroV);
}
-// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder from
-// the current BB if operands and operation are identical. Otherwise calls
-// insertFastDiv to perform the optimization and caches the resulting dividend
-// and remainder.
-static bool reuseOrInsertFastDiv(Instruction *I, IntegerType *BypassType,
- bool UseDivOp, bool UseSignedOp,
- DivCacheTy &PerBBDivCache) {
- // Get instruction operands
- DivOpInfo Key(UseSignedOp, I->getOperand(0), I->getOperand(1));
- DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
-
- if (CacheI == PerBBDivCache.end()) {
- // If previous instance does not exist, insert fast div
- return insertFastDiv(I, BypassType, UseDivOp, UseSignedOp, PerBBDivCache);
+/// Substitutes the div/rem instruction with code that checks the value of the
+/// operands and uses a shorter-faster div/rem instruction when possible.
+Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
+ Value *Dividend = SlowDivOrRem->getOperand(0);
+ Value *Divisor = SlowDivOrRem->getOperand(1);
+
+ if (isa<ConstantInt>(Divisor)) {
+ // Keep division by a constant for DAGCombiner.
+ return None;
}
- // Replace operation value with previously generated phi node
- DivPhiNodes &Value = CacheI->second;
- if (UseDivOp) {
- // Replace all uses of div instruction with quotient phi node
- I->replaceAllUsesWith(Value.Quotient);
+ VisitedSetTy SetL;
+ ValueRange DividendRange = getValueRange(Dividend, SetL);
+ if (DividendRange == VALRNG_LIKELY_LONG)
+ return None;
+
+ VisitedSetTy SetR;
+ ValueRange DivisorRange = getValueRange(Divisor, SetR);
+ if (DivisorRange == VALRNG_LIKELY_LONG)
+ return None;
+
+ bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
+ bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
+
+ if (DividendShort && DivisorShort) {
+ // If both operands are known to be short then just replace the long
+ // division with a short one in-place.
+
+ IRBuilder<> Builder(SlowDivOrRem);
+ Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
+ Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
+ Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
+ Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
+ Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
+ Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
+ return QuotRemPair(ExtDiv, ExtRem);
+ } else if (DividendShort && !isSignedOp()) {
+ // If the division is unsigned and Dividend is known to be short, then
+ // either
+ // 1) Divisor is less or equal to Dividend, and the result can be computed
+ // with a short division.
+ // 2) Divisor is greater than Dividend. In this case, no division is needed
+ // at all: The quotient is 0 and the remainder is equal to Dividend.
+ //
+ // So instead of checking at runtime whether Divisor fits into BypassType,
+ // we emit a runtime check to differentiate between these two cases. This
+ // lets us entirely avoid a long div.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->getInstList().back().eraseFromParent();
+ QuotRemWithBB Long;
+ Long.BB = MainBB;
+ Long.Quotient = ConstantInt::get(getSlowType(), 0);
+ Long.Remainder = Dividend;
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
+ Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
+ return Result;
} else {
- // Replace all uses of rem instruction with remainder phi node
- I->replaceAllUsesWith(Value.Remainder);
+ // General case. Create both slow and fast div/rem pairs and choose one of
+ // them at runtime.
+
+ // Split the basic block before the div/rem.
+ BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
+ // Remove the unconditional branch from MainBB to SuccessorBB.
+ MainBB->getInstList().back().eraseFromParent();
+ QuotRemWithBB Fast = createFastBB(SuccessorBB);
+ QuotRemWithBB Slow = createSlowBB(SuccessorBB);
+ QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
+ Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
+ DivisorShort ? nullptr : Divisor);
+ IRBuilder<> Builder(MainBB, MainBB->end());
+ Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
+ return Result;
}
-
- // Remove redundant operation
- I->eraseFromParent();
- return true;
}
-// bypassSlowDivision - This optimization identifies DIV instructions in a BB
-// that can be profitably bypassed and carried out with a shorter, faster
-// divide.
-bool llvm::bypassSlowDivision(
- BasicBlock *BB, const DenseMap<unsigned int, unsigned int> &BypassWidths) {
- DivCacheTy DivCache;
+/// This optimization identifies DIV/REM instructions in a BB that can be
+/// profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(BasicBlock *BB,
+ const BypassWidthsTy &BypassWidths) {
+ DivCacheTy PerBBDivCache;
bool MadeChange = false;
Instruction* Next = &*BB->begin();
@@ -231,42 +459,20 @@ bool llvm::bypassSlowDivision(
Instruction* I = Next;
Next = Next->getNextNode();
- // Get instruction details
- unsigned Opcode = I->getOpcode();
- bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
- bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
- bool UseSignedOp = Opcode == Instruction::SDiv ||
- Opcode == Instruction::SRem;
-
- // Only optimize div or rem ops
- if (!UseDivOp && !UseRemOp)
- continue;
-
- // Skip division on vector types, only optimize integer instructions
- if (!I->getType()->isIntegerTy())
- continue;
-
- // Get bitwidth of div/rem instruction
- IntegerType *T = cast<IntegerType>(I->getType());
- unsigned int bitwidth = T->getBitWidth();
-
- // Continue if bitwidth is not bypassed
- DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);
- if (BI == BypassWidths.end())
- continue;
-
- // Get type for div/rem instruction with bypass bitwidth
- IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
-
- MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
+ FastDivInsertionTask Task(I, BypassWidths);
+ if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
+ I->replaceAllUsesWith(Replacement);
+ I->eraseFromParent();
+ MadeChange = true;
+ }
}
// Above we eagerly create divs and rems, as pairs, so that we can efficiently
// create divrem machine instructions. Now erase any unused divs / rems so we
// don't leave extra instructions sitting around.
- for (auto &KV : DivCache)
- for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
- RecursivelyDeleteTriviallyDeadInstructions(Phi);
+ for (auto &KV : PerBBDivCache)
+ for (Value *V : {KV.second.Quotient, KV.second.Remainder})
+ RecursivelyDeleteTriviallyDeadInstructions(V);
return MadeChange;
}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 69889ec72f90..7a21c03da221 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -31,12 +31,13 @@ add_llvm_library(LLVMTransformUtils
LoopUtils.cpp
LoopVersioning.cpp
LowerInvoke.cpp
+ LowerMemIntrinsics.cpp
LowerSwitch.cpp
Mem2Reg.cpp
- MemorySSA.cpp
MetaRenamer.cpp
ModuleUtils.cpp
NameAnonGlobals.cpp
+ PredicateInfo.cpp
PromoteMemoryToRegister.cpp
StripGCRelocates.cpp
SSAUpdater.cpp
@@ -51,6 +52,7 @@ add_llvm_library(LLVMTransformUtils
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
+ VNCoercion.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 4d33e22fecfb..385c12302e04 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -90,9 +90,9 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(&I) && "No mapping from source argument specified!");
#endif
- // Copy all attributes other than those stored in the AttributeSet. We need
- // to remap the parameter indices of the AttributeSet.
- AttributeSet NewAttrs = NewFunc->getAttributes();
+ // Copy all attributes other than those stored in the AttributeList. We need
+ // to remap the parameter indices of the AttributeList.
+ AttributeList NewAttrs = NewFunc->getAttributes();
NewFunc->copyAttributesFrom(OldFunc);
NewFunc->setAttributes(NewAttrs);
@@ -103,22 +103,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
TypeMapper, Materializer));
- AttributeSet OldAttrs = OldFunc->getAttributes();
+ SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size());
+ AttributeList OldAttrs = OldFunc->getAttributes();
+
// Clone any argument attributes that are present in the VMap.
- for (const Argument &OldArg : OldFunc->args())
+ for (const Argument &OldArg : OldFunc->args()) {
if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
- AttributeSet attrs =
- OldAttrs.getParamAttributes(OldArg.getArgNo() + 1);
- if (attrs.getNumSlots() > 0)
- NewArg->addAttr(attrs);
+ NewArgAttrs[NewArg->getArgNo()] =
+ OldAttrs.getParamAttributes(OldArg.getArgNo());
}
+ }
NewFunc->setAttributes(
- NewFunc->getAttributes()
- .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex,
- OldAttrs.getRetAttributes())
- .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex,
- OldAttrs.getFnAttributes()));
+ AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
+ OldAttrs.getRetAttributes(), NewArgAttrs));
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
@@ -353,7 +351,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
Cond = dyn_cast_or_null<ConstantInt>(V);
}
if (Cond) { // Constant fold to uncond branch!
- SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+ SwitchInst::ConstCaseHandle Case = *SI->findCaseValue(Cond);
BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
@@ -747,3 +745,40 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
return NewLoop;
}
+
+/// \brief Duplicate non-Phi instructions from the beginning of block up to
+/// StopAt instruction into a split block between BB and its predecessor.
+BasicBlock *
+llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,
+ Instruction *StopAt,
+ ValueToValueMapTy &ValueMapping) {
+ // We are going to have to map operands from the original BB block to the new
+ // copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
+ // account for entry from PredBB.
+ BasicBlock::iterator BI = BB->begin();
+ for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+ ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+ BasicBlock *NewBB = SplitEdge(PredBB, BB);
+ NewBB->setName(PredBB->getName() + ".split");
+ Instruction *NewTerm = NewBB->getTerminator();
+
+ // Clone the non-phi instructions of BB into NewBB, keeping track of the
+ // mapping and using it to remap operands in the cloned instructions.
+ for (; StopAt != &*BI; ++BI) {
+ Instruction *New = BI->clone();
+ New->setName(BI->getName());
+ New->insertBefore(NewTerm);
+ ValueMapping[&*BI] = New;
+
+ // Remap operands to patch up intra-block references.
+ for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+ if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+ auto I = ValueMapping.find(Inst);
+ if (I != ValueMapping.end())
+ New->setOperand(i, I->second);
+ }
+ }
+
+ return NewBB;
+}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 7ebeb615d248..4e9d67252d6c 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -20,6 +20,15 @@
#include "llvm-c/Core.h"
using namespace llvm;
+static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {
+ const Comdat *SC = Src->getComdat();
+ if (!SC)
+ return;
+ Comdat *DC = Dst->getParent()->getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SC->getSelectionKind());
+ Dst->setComdat(DC);
+}
+
/// This is not as easy as it might seem because we have to worry about making
/// copies of global variables and functions, and making their (initializers and
/// references, respectively) refer to the right globals.
@@ -124,6 +133,8 @@ std::unique_ptr<Module> llvm::CloneModule(
I->getAllMetadata(MDs);
for (auto MD : MDs)
GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
+
+ copyComdat(GV, &*I);
}
// Similarly, copy over function bodies now...
@@ -153,6 +164,8 @@ std::unique_ptr<Module> llvm::CloneModule(
if (I.hasPersonalityFn())
F->setPersonalityFn(MapValue(I.getPersonalityFn(), VMap));
+
+ copyComdat(F, &I);
}
// And aliases
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index c514c9c9cd4a..644d93b727b3 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -362,9 +362,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// "target-features" attribute allowing it to be lowered.
// FIXME: This should be changed to check to see if a specific
// attribute can not be inherited.
- AttributeSet OldFnAttrs = oldFunction->getAttributes().getFnAttributes();
- AttrBuilder AB(OldFnAttrs, AttributeSet::FunctionIndex);
- for (auto Attr : AB.td_attrs())
+ AttrBuilder AB(oldFunction->getAttributes().getFnAttributes());
+ for (const auto &Attr : AB.td_attrs())
newFunction->addFnAttr(Attr.first, Attr.second);
newFunction->getBasicBlockList().push_back(newRootNode);
@@ -440,8 +439,10 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Emit a call to the new function, passing in: *pointer to struct (if
// aggregating parameters), or plan inputs and allocated memory for outputs
std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
-
- LLVMContext &Context = newFunction->getContext();
+
+ Module *M = newFunction->getParent();
+ LLVMContext &Context = M->getContext();
+ const DataLayout &DL = M->getDataLayout();
// Add inputs as params, or to be filled into the struct
for (Value *input : inputs)
@@ -456,8 +457,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
StructValues.push_back(output);
} else {
AllocaInst *alloca =
- new AllocaInst(output->getType(), nullptr, output->getName() + ".loc",
- &codeReplacer->getParent()->front().front());
+ new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
+ nullptr, output->getName() + ".loc",
+ &codeReplacer->getParent()->front().front());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
}
@@ -473,7 +475,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Allocate a struct at the beginning of this function
StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
- Struct = new AllocaInst(StructArgTy, nullptr, "structArg",
+ Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr,
+ "structArg",
&codeReplacer->getParent()->front().front());
params.push_back(Struct);
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 75a1dde57c4c..0eee6e19efac 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -28,15 +28,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
return nullptr;
}
+ Function *F = I.getParent()->getParent();
+ const DataLayout &DL = F->getParent()->getDataLayout();
+
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(I.getType(), nullptr,
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
I.getName()+".reg2mem", AllocaPoint);
} else {
- Function *F = I.getParent()->getParent();
- Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem",
- &F->getEntryBlock().front());
+ Slot = new AllocaInst(I.getType(), DL.getAllocaAddrSpace(), nullptr,
+ I.getName() + ".reg2mem", &F->getEntryBlock().front());
}
// We cannot demote invoke instructions to the stack if their normal edge
@@ -110,14 +112,17 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
return nullptr;
}
+ const DataLayout &DL = P->getModule()->getDataLayout();
+
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(P->getType(), nullptr,
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
P->getName()+".reg2mem", AllocaPoint);
} else {
Function *F = P->getParent()->getParent();
- Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem",
+ Slot = new AllocaInst(P->getType(), DL.getAllocaAddrSpace(), nullptr,
+ P->getName() + ".reg2mem",
&F->getEntryBlock().front());
}
diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp
index 4adf1754253d..59f176e2f231 100644
--- a/lib/Transforms/Utils/Evaluator.cpp
+++ b/lib/Transforms/Utils/Evaluator.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/GlobalVariable.h"
@@ -486,7 +487,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
ConstantInt *Val =
dyn_cast<ConstantInt>(getVal(SI->getCondition()));
if (!Val) return false; // Cannot determine.
- NextBB = SI->findCaseValue(Val).getCaseSuccessor();
+ NextBB = SI->findCaseValue(Val)->getCaseSuccessor();
} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp
index 81a7c4ceffab..73a0b2737e95 100644
--- a/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/lib/Transforms/Utils/FunctionComparator.cpp
@@ -74,14 +74,14 @@ int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
return L.compare(R);
}
-int FunctionComparator::cmpAttrs(const AttributeSet L,
- const AttributeSet R) const {
+int FunctionComparator::cmpAttrs(const AttributeList L,
+ const AttributeList R) const {
if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
return Res;
for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
- AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
- RE = R.end(i);
+ AttributeList::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+ RE = R.end(i);
for (; LI != LE && RI != RE; ++LI, ++RI) {
Attribute LA = *LI;
Attribute RA = *RI;
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index 9844190ef84a..b00f4b14068a 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -21,11 +21,11 @@ using namespace llvm;
/// Checks if we should import SGV as a definition, otherwise import as a
/// declaration.
bool FunctionImportGlobalProcessing::doImportAsDefinition(
- const GlobalValue *SGV, DenseSet<const GlobalValue *> *GlobalsToImport) {
+ const GlobalValue *SGV, SetVector<GlobalValue *> *GlobalsToImport) {
// For alias, we tie the definition to the base object. Extract it and recurse
if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
- if (GA->hasWeakAnyLinkage())
+ if (GA->isInterposable())
return false;
const GlobalObject *GO = GA->getBaseObject();
if (!GO->hasLinkOnceODRLinkage())
@@ -34,7 +34,7 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition(
GO, GlobalsToImport);
}
// Only import the globals requested for importing.
- if (GlobalsToImport->count(SGV))
+ if (GlobalsToImport->count(const_cast<GlobalValue *>(SGV)))
return true;
// Otherwise no.
return false;
@@ -57,7 +57,8 @@ bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
return false;
if (isPerformingImport()) {
- assert((!GlobalsToImport->count(SGV) || !isNonRenamableLocal(*SGV)) &&
+ assert((!GlobalsToImport->count(const_cast<GlobalValue *>(SGV)) ||
+ !isNonRenamableLocal(*SGV)) &&
"Attempting to promote non-renamable local");
// We don't know for sure yet if we are importing this value (as either
// a reference or a def), since we are simply walking all values in the
@@ -254,9 +255,8 @@ bool FunctionImportGlobalProcessing::run() {
return false;
}
-bool llvm::renameModuleForThinLTO(
- Module &M, const ModuleSummaryIndex &Index,
- DenseSet<const GlobalValue *> *GlobalsToImport) {
+bool llvm::renameModuleForThinLTO(Module &M, const ModuleSummaryIndex &Index,
+ SetVector<GlobalValue *> *GlobalsToImport) {
FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport);
return ThinLTOProcessing.run();
}
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 74ebcda8355c..ba4b78ac758a 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -10,9 +10,22 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
using namespace llvm;
@@ -175,13 +188,9 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
return false;
}
+GlobalStatus::GlobalStatus() = default;
+
bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
SmallPtrSet<const PHINode *, 16> PhiUsers;
return analyzeGlobalAux(V, GS, PhiUsers);
}
-
-GlobalStatus::GlobalStatus()
- : IsCompared(false), IsLoaded(false), StoredType(NotStored),
- StoredOnceValue(nullptr), AccessingFunction(nullptr),
- HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
- Ordering(AtomicOrdering::NotAtomic) {}
diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
index ed018bb73107..b8c12ad5ea84 100644
--- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
+++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -62,6 +62,8 @@ void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller,
void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
ModuleName = M.getName();
for (const auto &F : M.functions()) {
+ if (F.isDeclaration())
+ continue;
AllFunctions++;
ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr);
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index a40079ca8e76..5d6fbc3325ff 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -20,10 +20,12 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
@@ -40,8 +42,8 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@@ -1107,26 +1109,23 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
bool DTCalculated = false;
Function *CalledFunc = CS.getCalledFunction();
- for (Function::arg_iterator I = CalledFunc->arg_begin(),
- E = CalledFunc->arg_end();
- I != E; ++I) {
- unsigned Align = I->getType()->isPointerTy() ? I->getParamAlignment() : 0;
- if (Align && !I->hasByValOrInAllocaAttr() && !I->hasNUses(0)) {
+ for (Argument &Arg : CalledFunc->args()) {
+ unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
+ if (Align && !Arg.hasByValOrInAllocaAttr() && !Arg.hasNUses(0)) {
if (!DTCalculated) {
- DT.recalculate(const_cast<Function&>(*CS.getInstruction()->getParent()
- ->getParent()));
+ DT.recalculate(*CS.getCaller());
DTCalculated = true;
}
// If we can already prove the asserted alignment in the context of the
// caller, then don't bother inserting the assumption.
- Value *Arg = CS.getArgument(I->getArgNo());
- if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align)
+ Value *ArgVal = CS.getArgument(Arg.getArgNo());
+ if (getKnownAlignment(ArgVal, DL, CS.getInstruction(), AC, &DT) >= Align)
continue;
- CallInst *NewAssumption = IRBuilder<>(CS.getInstruction())
- .CreateAlignmentAssumption(DL, Arg, Align);
- AC->registerAssumption(NewAssumption);
+ CallInst *NewAsmp = IRBuilder<>(CS.getInstruction())
+ .CreateAlignmentAssumption(DL, ArgVal, Align);
+ AC->registerAssumption(NewAsmp);
}
}
}
@@ -1140,7 +1139,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
ValueToValueMapTy &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
- const Function *Caller = CS.getInstruction()->getParent()->getParent();
+ const Function *Caller = CS.getCaller();
const Function *Callee = CS.getCalledFunction();
CallGraphNode *CalleeNode = CG[Callee];
CallGraphNode *CallerNode = CG[Caller];
@@ -1225,7 +1224,8 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
PointerType *ArgTy = cast<PointerType>(Arg->getType());
Type *AggTy = ArgTy->getElementType();
- Function *Caller = TheCall->getParent()->getParent();
+ Function *Caller = TheCall->getFunction();
+ const DataLayout &DL = Caller->getParent()->getDataLayout();
// If the called function is readonly, then it could not mutate the caller's
// copy of the byval'd memory. In this case, it is safe to elide the copy and
@@ -1239,31 +1239,30 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
AssumptionCache *AC =
IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
- const DataLayout &DL = Caller->getParent()->getDataLayout();
// If the pointer is already known to be sufficiently aligned, or if we can
// round it up to a larger alignment, then we don't need a temporary.
if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=
ByValAlignment)
return Arg;
-
+
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
// for code quality, but rarely happens and is required for correctness.
}
// Create the alloca. If we have DataLayout, use nice alignment.
- unsigned Align =
- Caller->getParent()->getDataLayout().getPrefTypeAlignment(AggTy);
+ unsigned Align = DL.getPrefTypeAlignment(AggTy);
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
// pointer inside the callee).
Align = std::max(Align, ByValAlignment);
-
- Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),
+
+ Value *NewAlloca = new AllocaInst(AggTy, DL.getAllocaAddrSpace(),
+ nullptr, Align, Arg->getName(),
&*Caller->begin()->begin());
IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
-
+
// Uses of the argument in the function should use our new alloca
// instead.
return NewAlloca;
@@ -1393,6 +1392,89 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
}
+/// Update the block frequencies of the caller after a callee has been inlined.
+///
+/// Each block cloned into the caller has its block frequency scaled by the
+/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
+/// callee's entry block gets the same frequency as the callsite block and the
+/// relative frequencies of all cloned blocks remain the same after cloning.
+static void updateCallerBFI(BasicBlock *CallSiteBlock,
+ const ValueToValueMapTy &VMap,
+ BlockFrequencyInfo *CallerBFI,
+ BlockFrequencyInfo *CalleeBFI,
+ const BasicBlock &CalleeEntryBlock) {
+ SmallPtrSet<BasicBlock *, 16> ClonedBBs;
+ for (auto const &Entry : VMap) {
+ if (!isa<BasicBlock>(Entry.first) || !Entry.second)
+ continue;
+ auto *OrigBB = cast<BasicBlock>(Entry.first);
+ auto *ClonedBB = cast<BasicBlock>(Entry.second);
+ uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
+ if (!ClonedBBs.insert(ClonedBB).second) {
+ // Multiple blocks in the callee might get mapped to one cloned block in
+ // the caller since we prune the callee as we clone it. When that happens,
+ // we want to use the maximum among the original blocks' frequencies.
+ uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
+ if (NewFreq > Freq)
+ Freq = NewFreq;
+ }
+ CallerBFI->setBlockFreq(ClonedBB, Freq);
+ }
+ BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
+ CallerBFI->setBlockFreqAndScale(
+ EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
+ ClonedBBs);
+}
+
+/// Update the branch metadata for cloned call instructions.
+static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
+ const Optional<uint64_t> &CalleeEntryCount,
+ const Instruction *TheCall) {
+ if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1)
+ return;
+ Optional<uint64_t> CallSiteCount =
+ ProfileSummaryInfo::getProfileCount(TheCall, nullptr);
+ uint64_t CallCount =
+ std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0,
+ CalleeEntryCount.getValue());
+
+ for (auto const &Entry : VMap)
+ if (isa<CallInst>(Entry.first))
+ if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+ CI->updateProfWeight(CallCount, CalleeEntryCount.getValue());
+ for (BasicBlock &BB : *Callee)
+ // No need to update the callsite if it is pruned during inlining.
+ if (VMap.count(&BB))
+ for (Instruction &I : BB)
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount,
+ CalleeEntryCount.getValue());
+}
+
+/// Update the entry count of callee after inlining.
+///
+/// The callsite's block count is subtracted from the callee's function entry
+/// count.
+static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,
+ Instruction *CallInst, Function *Callee) {
+ // If the callee has a original count of N, and the estimated count of
+ // callsite is M, the new callee count is set to N - M. M is estimated from
+ // the caller's entry count, its entry block frequency and the block frequency
+ // of the callsite.
+ Optional<uint64_t> CalleeCount = Callee->getEntryCount();
+ if (!CalleeCount.hasValue())
+ return;
+ Optional<uint64_t> CallCount =
+ ProfileSummaryInfo::getProfileCount(CallInst, CallerBFI);
+ if (!CallCount.hasValue())
+ return;
+ // Since CallSiteCount is an estimate, it could exceed the original callee
+ // count and has to be set to 0.
+ if (CallCount.getValue() > CalleeCount.getValue())
+ Callee->setEntryCount(0);
+ else
+ Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue());
+}
/// This function inlines the called function into the basic block of the
/// caller. This returns false if it is not possible to inline this call.
@@ -1405,13 +1487,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
AAResults *CalleeAAR, bool InsertLifetime) {
Instruction *TheCall = CS.getInstruction();
- assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
- "Instruction not in function!");
+ assert(TheCall->getParent() && TheCall->getFunction()
+ && "Instruction not in function!");
// If IFI has any state in it, zap it before we fill it in.
IFI.reset();
-
- const Function *CalledFunc = CS.getCalledFunction();
+
+ Function *CalledFunc = CS.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
@@ -1548,7 +1630,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// matches up the formal to the actual argument values.
CallSite::arg_iterator AI = CS.arg_begin();
unsigned ArgNo = 0;
- for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+ for (Function::arg_iterator I = CalledFunc->arg_begin(),
E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
Value *ActualArg = *AI;
@@ -1578,10 +1660,18 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
&InlinedFunctionInfo, TheCall);
-
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
+ if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
+ // Update the BFI of blocks cloned into the caller.
+ updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
+ CalledFunc->front());
+
+ updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall);
+ // Update the profile count of callee.
+ updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc);
+
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
@@ -2087,6 +2177,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CalledFunc->getName() + ".exit");
}
+ if (IFI.CallerBFI) {
+ // Copy original BB's block frequency to AfterCallBB
+ IFI.CallerBFI->setBlockFreq(
+ AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
+ }
+
// Change the branch that used to go to AfterCallBB to branch to the first
// basic block of the inlined function.
//
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 68c6b74d5e5b..49b4bd92faf4 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -87,7 +87,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
Instruction *I = Worklist.pop_back_val();
BasicBlock *InstBB = I->getParent();
Loop *L = LI.getLoopFor(InstBB);
- if (!LoopExitBlocks.count(L))
+ assert(L && "Instruction belongs to a BB that's not part of a loop");
+ if (!LoopExitBlocks.count(L))
L->getExitBlocks(LoopExitBlocks[L]);
assert(LoopExitBlocks.count(L));
const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
@@ -105,7 +106,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
for (Use &U : I->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User))
+ if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(U);
if (InstBB != UserBB && !L->contains(UserBB))
@@ -123,7 +124,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// DomBB dominates the value, so adjust DomBB to the normal destination
// block, which is effectively where the value is first usable.
BasicBlock *DomBB = InstBB;
- if (InvokeInst *Inv = dyn_cast<InvokeInst>(I))
+ if (auto *Inv = dyn_cast<InvokeInst>(I))
DomBB = Inv->getNormalDest();
DomTreeNode *DomNode = DT.getNode(DomBB);
@@ -188,7 +189,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// block.
Instruction *User = cast<Instruction>(UseToRewrite->getUser());
BasicBlock *UserBB = User->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(User))
+ if (auto *PN = dyn_cast<PHINode>(User))
UserBB = PN->getIncomingBlock(*UseToRewrite);
if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
@@ -237,40 +238,75 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
return Changed;
}
-/// Return true if the specified block dominates at least
-/// one of the blocks in the specified list.
-static bool
-blockDominatesAnExit(BasicBlock *BB,
- DominatorTree &DT,
- const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
- DomTreeNode *DomNode = DT.getNode(BB);
- return any_of(ExitBlocks, [&](BasicBlock *EB) {
- return DT.dominates(DomNode, DT.getNode(EB));
- });
+// Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
+static void computeBlocksDominatingExits(
+ Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
+ SmallPtrSet<BasicBlock *, 8> &BlocksDominatingExits) {
+ SmallVector<BasicBlock *, 8> BBWorklist;
+
+ // We start from the exit blocks, as every block trivially dominates itself
+ // (not strictly).
+ for (BasicBlock *BB : ExitBlocks)
+ BBWorklist.push_back(BB);
+
+ while (!BBWorklist.empty()) {
+ BasicBlock *BB = BBWorklist.pop_back_val();
+
+ // Check if this is a loop header. If this is the case, we're done.
+ if (L.getHeader() == BB)
+ continue;
+
+ // Otherwise, add its immediate predecessor in the dominator tree to the
+ // worklist, unless we visited it already.
+ BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
+
+ // Exit blocks can have an immediate dominator not beloinging to the
+ // loop. For an exit block to be immediately dominated by another block
+ // outside the loop, it implies not all paths from that dominator, to the
+ // exit block, go through the loop.
+ // Example:
+ //
+ // |---- A
+ // | |
+ // | B<--
+ // | | |
+ // |---> C --
+ // |
+ // D
+ //
+ // C is the exit block of the loop and it's immediately dominated by A,
+ // which doesn't belong to the loop.
+ if (!L.contains(IDomBB))
+ continue;
+
+ if (BlocksDominatingExits.insert(IDomBB).second)
+ BBWorklist.push_back(IDomBB);
+ }
}
bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
ScalarEvolution *SE) {
bool Changed = false;
- // Get the set of exiting blocks.
SmallVector<BasicBlock *, 8> ExitBlocks;
L.getExitBlocks(ExitBlocks);
-
if (ExitBlocks.empty())
return false;
+ SmallPtrSet<BasicBlock *, 8> BlocksDominatingExits;
+
+ // We want to avoid use-scanning leveraging dominance informations.
+ // If a block doesn't dominate any of the loop exits, the none of the values
+ // defined in the loop can be used outside.
+ // We compute the set of blocks fullfilling the conditions in advance
+ // walking the dominator tree upwards until we hit a loop header.
+ computeBlocksDominatingExits(L, DT, ExitBlocks, BlocksDominatingExits);
+
SmallVector<Instruction *, 8> Worklist;
// Look at all the instructions in the loop, checking to see if they have uses
// outside the loop. If so, put them into the worklist to rewrite those uses.
- for (BasicBlock *BB : L.blocks()) {
- // For large loops, avoid use-scanning by using dominance information: In
- // particular, if a block does not dominate any of the loop exits, then none
- // of the values defined in the block could be used outside the loop.
- if (!blockDominatesAnExit(BB, DT, ExitBlocks))
- continue;
-
+ for (BasicBlock *BB : BlocksDominatingExits) {
for (Instruction &I : *BB) {
// Reject two common cases fast: instructions with no uses (like stores)
// and instructions with one use that is in the same block as this.
@@ -395,8 +431,8 @@ PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
if (!formLCSSAOnAllLoops(&LI, DT, SE))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<BasicAA>();
PA.preserve<GlobalsAA>();
PA.preserve<SCEVAA>();
diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index d97cd7582eaa..fe93d6927c63 100644
--- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -100,12 +100,12 @@ private:
bool perform(CallInst *CI);
void checkCandidate(CallInst &CI);
void shrinkWrapCI(CallInst *CI, Value *Cond);
- bool performCallDomainErrorOnly(CallInst *CI, const LibFunc::Func &Func);
- bool performCallErrors(CallInst *CI, const LibFunc::Func &Func);
- bool performCallRangeErrorOnly(CallInst *CI, const LibFunc::Func &Func);
- Value *generateOneRangeCond(CallInst *CI, const LibFunc::Func &Func);
- Value *generateTwoRangeCond(CallInst *CI, const LibFunc::Func &Func);
- Value *generateCondForPow(CallInst *CI, const LibFunc::Func &Func);
+ bool performCallDomainErrorOnly(CallInst *CI, const LibFunc &Func);
+ bool performCallErrors(CallInst *CI, const LibFunc &Func);
+ bool performCallRangeErrorOnly(CallInst *CI, const LibFunc &Func);
+ Value *generateOneRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func);
+ Value *generateCondForPow(CallInst *CI, const LibFunc &Func);
// Create an OR of two conditions.
Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val,
@@ -141,44 +141,44 @@ private:
// Perform the transformation to calls with errno set by domain error.
bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
Value *Cond = nullptr;
switch (Func) {
- case LibFunc::acos: // DomainError: (x < -1 || x > 1)
- case LibFunc::acosf: // Same as acos
- case LibFunc::acosl: // Same as acos
- case LibFunc::asin: // DomainError: (x < -1 || x > 1)
- case LibFunc::asinf: // Same as asin
- case LibFunc::asinl: // Same as asin
+ case LibFunc_acos: // DomainError: (x < -1 || x > 1)
+ case LibFunc_acosf: // Same as acos
+ case LibFunc_acosl: // Same as acos
+ case LibFunc_asin: // DomainError: (x < -1 || x > 1)
+ case LibFunc_asinf: // Same as asin
+ case LibFunc_asinl: // Same as asin
{
++NumWrappedTwoCond;
Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f);
break;
}
- case LibFunc::cos: // DomainError: (x == +inf || x == -inf)
- case LibFunc::cosf: // Same as cos
- case LibFunc::cosl: // Same as cos
- case LibFunc::sin: // DomainError: (x == +inf || x == -inf)
- case LibFunc::sinf: // Same as sin
- case LibFunc::sinl: // Same as sin
+ case LibFunc_cos: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_cosf: // Same as cos
+ case LibFunc_cosl: // Same as cos
+ case LibFunc_sin: // DomainError: (x == +inf || x == -inf)
+ case LibFunc_sinf: // Same as sin
+ case LibFunc_sinl: // Same as sin
{
++NumWrappedTwoCond;
Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ,
-INFINITY);
break;
}
- case LibFunc::acosh: // DomainError: (x < 1)
- case LibFunc::acoshf: // Same as acosh
- case LibFunc::acoshl: // Same as acosh
+ case LibFunc_acosh: // DomainError: (x < 1)
+ case LibFunc_acoshf: // Same as acosh
+ case LibFunc_acoshl: // Same as acosh
{
++NumWrappedOneCond;
Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f);
break;
}
- case LibFunc::sqrt: // DomainError: (x < 0)
- case LibFunc::sqrtf: // Same as sqrt
- case LibFunc::sqrtl: // Same as sqrt
+ case LibFunc_sqrt: // DomainError: (x < 0)
+ case LibFunc_sqrtf: // Same as sqrt
+ case LibFunc_sqrtl: // Same as sqrt
{
++NumWrappedOneCond;
Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f);
@@ -193,31 +193,31 @@ bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
// Perform the transformation to calls with errno set by range error.
bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
Value *Cond = nullptr;
switch (Func) {
- case LibFunc::cosh:
- case LibFunc::coshf:
- case LibFunc::coshl:
- case LibFunc::exp:
- case LibFunc::expf:
- case LibFunc::expl:
- case LibFunc::exp10:
- case LibFunc::exp10f:
- case LibFunc::exp10l:
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
- case LibFunc::sinh:
- case LibFunc::sinhf:
- case LibFunc::sinhl: {
+ case LibFunc_cosh:
+ case LibFunc_coshf:
+ case LibFunc_coshl:
+ case LibFunc_exp:
+ case LibFunc_expf:
+ case LibFunc_expl:
+ case LibFunc_exp10:
+ case LibFunc_exp10f:
+ case LibFunc_exp10l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_sinh:
+ case LibFunc_sinhf:
+ case LibFunc_sinhl: {
Cond = generateTwoRangeCond(CI, Func);
break;
}
- case LibFunc::expm1: // RangeError: (709, inf)
- case LibFunc::expm1f: // RangeError: (88, inf)
- case LibFunc::expm1l: // RangeError: (11356, inf)
+ case LibFunc_expm1: // RangeError: (709, inf)
+ case LibFunc_expm1f: // RangeError: (88, inf)
+ case LibFunc_expm1l: // RangeError: (11356, inf)
{
Cond = generateOneRangeCond(CI, Func);
break;
@@ -231,15 +231,15 @@ bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
// Perform the transformation to calls with errno set by combination of errors.
bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
Value *Cond = nullptr;
switch (Func) {
- case LibFunc::atanh: // DomainError: (x < -1 || x > 1)
+ case LibFunc_atanh: // DomainError: (x < -1 || x > 1)
// PoleError: (x == -1 || x == 1)
// Overall Cond: (x <= -1 || x >= 1)
- case LibFunc::atanhf: // Same as atanh
- case LibFunc::atanhl: // Same as atanh
+ case LibFunc_atanhf: // Same as atanh
+ case LibFunc_atanhl: // Same as atanh
{
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
return false;
@@ -247,20 +247,20 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
break;
}
- case LibFunc::log: // DomainError: (x < 0)
+ case LibFunc_log: // DomainError: (x < 0)
// PoleError: (x == 0)
// Overall Cond: (x <= 0)
- case LibFunc::logf: // Same as log
- case LibFunc::logl: // Same as log
- case LibFunc::log10: // Same as log
- case LibFunc::log10f: // Same as log
- case LibFunc::log10l: // Same as log
- case LibFunc::log2: // Same as log
- case LibFunc::log2f: // Same as log
- case LibFunc::log2l: // Same as log
- case LibFunc::logb: // Same as log
- case LibFunc::logbf: // Same as log
- case LibFunc::logbl: // Same as log
+ case LibFunc_logf: // Same as log
+ case LibFunc_logl: // Same as log
+ case LibFunc_log10: // Same as log
+ case LibFunc_log10f: // Same as log
+ case LibFunc_log10l: // Same as log
+ case LibFunc_log2: // Same as log
+ case LibFunc_log2f: // Same as log
+ case LibFunc_log2l: // Same as log
+ case LibFunc_logb: // Same as log
+ case LibFunc_logbf: // Same as log
+ case LibFunc_logbl: // Same as log
{
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
return false;
@@ -268,11 +268,11 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
break;
}
- case LibFunc::log1p: // DomainError: (x < -1)
+ case LibFunc_log1p: // DomainError: (x < -1)
// PoleError: (x == -1)
// Overall Cond: (x <= -1)
- case LibFunc::log1pf: // Same as log1p
- case LibFunc::log1pl: // Same as log1p
+ case LibFunc_log1pf: // Same as log1p
+ case LibFunc_log1pl: // Same as log1p
{
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
return false;
@@ -280,11 +280,11 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
break;
}
- case LibFunc::pow: // DomainError: x < 0 and y is noninteger
+ case LibFunc_pow: // DomainError: x < 0 and y is noninteger
// PoleError: x == 0 and y < 0
// RangeError: overflow or underflow
- case LibFunc::powf:
- case LibFunc::powl: {
+ case LibFunc_powf:
+ case LibFunc_powl: {
if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError ||
!LibCallsShrinkWrapDoRangeError)
return false;
@@ -313,7 +313,7 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
if (!CI.use_empty())
return;
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI.getCalledFunction();
if (!Callee)
return;
@@ -333,16 +333,16 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
// Generate the upper bound condition for RangeError.
Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
float UpperBound;
switch (Func) {
- case LibFunc::expm1: // RangeError: (709, inf)
+ case LibFunc_expm1: // RangeError: (709, inf)
UpperBound = 709.0f;
break;
- case LibFunc::expm1f: // RangeError: (88, inf)
+ case LibFunc_expm1f: // RangeError: (88, inf)
UpperBound = 88.0f;
break;
- case LibFunc::expm1l: // RangeError: (11356, inf)
+ case LibFunc_expm1l: // RangeError: (11356, inf)
UpperBound = 11356.0f;
break;
default:
@@ -355,57 +355,57 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
// Generate the lower and upper bound condition for RangeError.
Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
- const LibFunc::Func &Func) {
+ const LibFunc &Func) {
float UpperBound, LowerBound;
switch (Func) {
- case LibFunc::cosh: // RangeError: (x < -710 || x > 710)
- case LibFunc::sinh: // Same as cosh
+ case LibFunc_cosh: // RangeError: (x < -710 || x > 710)
+ case LibFunc_sinh: // Same as cosh
LowerBound = -710.0f;
UpperBound = 710.0f;
break;
- case LibFunc::coshf: // RangeError: (x < -89 || x > 89)
- case LibFunc::sinhf: // Same as coshf
+ case LibFunc_coshf: // RangeError: (x < -89 || x > 89)
+ case LibFunc_sinhf: // Same as coshf
LowerBound = -89.0f;
UpperBound = 89.0f;
break;
- case LibFunc::coshl: // RangeError: (x < -11357 || x > 11357)
- case LibFunc::sinhl: // Same as coshl
+ case LibFunc_coshl: // RangeError: (x < -11357 || x > 11357)
+ case LibFunc_sinhl: // Same as coshl
LowerBound = -11357.0f;
UpperBound = 11357.0f;
break;
- case LibFunc::exp: // RangeError: (x < -745 || x > 709)
+ case LibFunc_exp: // RangeError: (x < -745 || x > 709)
LowerBound = -745.0f;
UpperBound = 709.0f;
break;
- case LibFunc::expf: // RangeError: (x < -103 || x > 88)
+ case LibFunc_expf: // RangeError: (x < -103 || x > 88)
LowerBound = -103.0f;
UpperBound = 88.0f;
break;
- case LibFunc::expl: // RangeError: (x < -11399 || x > 11356)
+ case LibFunc_expl: // RangeError: (x < -11399 || x > 11356)
LowerBound = -11399.0f;
UpperBound = 11356.0f;
break;
- case LibFunc::exp10: // RangeError: (x < -323 || x > 308)
+ case LibFunc_exp10: // RangeError: (x < -323 || x > 308)
LowerBound = -323.0f;
UpperBound = 308.0f;
break;
- case LibFunc::exp10f: // RangeError: (x < -45 || x > 38)
+ case LibFunc_exp10f: // RangeError: (x < -45 || x > 38)
LowerBound = -45.0f;
UpperBound = 38.0f;
break;
- case LibFunc::exp10l: // RangeError: (x < -4950 || x > 4932)
+ case LibFunc_exp10l: // RangeError: (x < -4950 || x > 4932)
LowerBound = -4950.0f;
UpperBound = 4932.0f;
break;
- case LibFunc::exp2: // RangeError: (x < -1074 || x > 1023)
+ case LibFunc_exp2: // RangeError: (x < -1074 || x > 1023)
LowerBound = -1074.0f;
UpperBound = 1023.0f;
break;
- case LibFunc::exp2f: // RangeError: (x < -149 || x > 127)
+ case LibFunc_exp2f: // RangeError: (x < -149 || x > 127)
LowerBound = -149.0f;
UpperBound = 127.0f;
break;
- case LibFunc::exp2l: // RangeError: (x < -16445 || x > 11383)
+ case LibFunc_exp2l: // RangeError: (x < -16445 || x > 11383)
LowerBound = -16445.0f;
UpperBound = 11383.0f;
break;
@@ -434,9 +434,9 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
// (i.e. we might invoke the calls that will not set the errno.).
//
Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
- const LibFunc::Func &Func) {
- // FIXME: LibFunc::powf and powl TBD.
- if (Func != LibFunc::pow) {
+ const LibFunc &Func) {
+ // FIXME: LibFunc_powf and powl TBD.
+ if (Func != LibFunc_pow) {
DEBUG(dbgs() << "Not handled powf() and powl()\n");
return nullptr;
}
@@ -516,7 +516,7 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
// Perform the transformation to a single candidate.
bool LibCallsShrinkWrap::perform(CallInst *CI) {
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
assert(Callee && "perform() should apply to a non-empty callee");
TLI.getLibFunc(*Callee, Func);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 6e4174aa0cda..18b29226c2ef 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -126,21 +126,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// If the default is unreachable, ignore it when searching for TheOnlyDest.
if (isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg()) &&
SI->getNumCases() > 0) {
- TheOnlyDest = SI->case_begin().getCaseSuccessor();
+ TheOnlyDest = SI->case_begin()->getCaseSuccessor();
}
// Figure out which case it goes to.
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
- i != e; ++i) {
+ for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
// Found case matching a constant operand?
- if (i.getCaseValue() == CI) {
- TheOnlyDest = i.getCaseSuccessor();
+ if (i->getCaseValue() == CI) {
+ TheOnlyDest = i->getCaseSuccessor();
break;
}
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
- if (i.getCaseSuccessor() == DefaultDest) {
+ if (i->getCaseSuccessor() == DefaultDest) {
MDNode *MD = SI->getMetadata(LLVMContext::MD_prof);
unsigned NCases = SI->getNumCases();
// Fold the case metadata into the default if there will be any branches
@@ -154,7 +153,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
Weights.push_back(CI->getValue().getZExtValue());
}
// Merge weight of this case to the default weight.
- unsigned idx = i.getCaseIndex();
+ unsigned idx = i->getCaseIndex();
Weights[0] += Weights[idx+1];
// Remove weight for this case.
std::swap(Weights[idx+1], Weights.back());
@@ -165,15 +164,19 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
}
// Remove this entry.
DefaultDest->removePredecessor(SI->getParent());
- SI->removeCase(i);
- --i; --e;
+ i = SI->removeCase(i);
+ e = SI->case_end();
continue;
}
// Otherwise, check to see if the switch only branches to one destination.
// We do this by reseting "TheOnlyDest" to null when we find two non-equal
// destinations.
- if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr;
+ if (i->getCaseSuccessor() != TheOnlyDest)
+ TheOnlyDest = nullptr;
+
+ // Increment this iterator as we haven't removed the case.
+ ++i;
}
if (CI && !TheOnlyDest) {
@@ -209,7 +212,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
if (SI->getNumCases() == 1) {
// Otherwise, we can fold this switch into a conditional branch
// instruction if it has only one non-default destination.
- SwitchInst::CaseIt FirstCase = SI->case_begin();
+ auto FirstCase = *SI->case_begin();
Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
FirstCase.getCaseValue(), "cond");
@@ -287,7 +290,15 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
///
bool llvm::isInstructionTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI) {
- if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+ if (!I->use_empty())
+ return false;
+ return wouldInstructionBeTriviallyDead(I, TLI);
+}
+
+bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
+ const TargetLibraryInfo *TLI) {
+ if (isa<TerminatorInst>(I))
+ return false;
// We don't want the landingpad-like instructions removed by anything this
// general.
@@ -307,7 +318,8 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
return true;
}
- if (!I->mayHaveSideEffects()) return true;
+ if (!I->mayHaveSideEffects())
+ return true;
// Special case intrinsics that "may have side effects" but can be deleted
// when dead.
@@ -334,7 +346,8 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
}
}
- if (isAllocLikeFn(I, TLI)) return true;
+ if (isAllocLikeFn(I, TLI))
+ return true;
if (CallInst *CI = isFreeCall(I, TLI))
if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
@@ -1075,11 +1088,11 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
// Since we can't guarantee that the original dbg.declare instrinsic
// is removed by LowerDbgDeclare(), we need to make sure that we are
// not inserting the same dbg.value intrinsic over and over.
- DbgValueList DbgValues;
- FindAllocaDbgValues(DbgValues, APN);
- for (auto DVI : DbgValues) {
- assert (DVI->getValue() == APN);
- assert (DVI->getOffset() == 0);
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ findDbgValues(DbgValues, APN);
+ for (auto *DVI : DbgValues) {
+ assert(DVI->getValue() == APN);
+ assert(DVI->getOffset() == 0);
if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
return true;
}
@@ -1241,9 +1254,7 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
return nullptr;
}
-/// FindAllocaDbgValues - Finds the llvm.dbg.value intrinsics describing the
-/// alloca 'V', if any.
-void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) {
+void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
if (auto *L = LocalAsMetadata::getIfExists(V))
if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
for (User *U : MDV->users())
@@ -1251,36 +1262,32 @@ void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) {
DbgValues.push_back(DVI);
}
-static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) {
- Expr.push_back(dwarf::DW_OP_deref);
-}
-
-static void DIExprAddOffset(SmallVectorImpl<uint64_t> &Expr, int Offset) {
+static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) {
if (Offset > 0) {
- Expr.push_back(dwarf::DW_OP_plus);
- Expr.push_back(Offset);
+ Ops.push_back(dwarf::DW_OP_plus);
+ Ops.push_back(Offset);
} else if (Offset < 0) {
- Expr.push_back(dwarf::DW_OP_minus);
- Expr.push_back(-Offset);
+ Ops.push_back(dwarf::DW_OP_minus);
+ Ops.push_back(-Offset);
}
}
-static DIExpression *BuildReplacementDIExpr(DIBuilder &Builder,
- DIExpression *DIExpr, bool Deref,
- int Offset) {
+/// Prepend \p DIExpr with a deref and offset operation.
+static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr,
+ bool Deref, int64_t Offset) {
if (!Deref && !Offset)
return DIExpr;
// Create a copy of the original DIDescriptor for user variable, prepending
// "deref" operation to a list of address elements, as new llvm.dbg.declare
// will take a value storing address of the memory for variable, not
// alloca itself.
- SmallVector<uint64_t, 4> NewDIExpr;
+ SmallVector<uint64_t, 4> Ops;
if (Deref)
- DIExprAddDeref(NewDIExpr);
- DIExprAddOffset(NewDIExpr, Offset);
+ Ops.push_back(dwarf::DW_OP_deref);
+ appendOffset(Ops, Offset);
if (DIExpr)
- NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
- return Builder.createExpression(NewDIExpr);
+ Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ return Builder.createExpression(Ops);
}
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
@@ -1294,7 +1301,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
auto *DIExpr = DDI->getExpression();
assert(DIVar && "Missing variable");
- DIExpr = BuildReplacementDIExpr(Builder, DIExpr, Deref, Offset);
+ DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset);
// Insert llvm.dbg.declare immediately after the original alloca, and remove
// old llvm.dbg.declare.
@@ -1326,11 +1333,11 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
// Insert the offset immediately after the first deref.
// We could just change the offset argument of dbg.value, but it's unsigned...
if (Offset) {
- SmallVector<uint64_t, 4> NewDIExpr;
- DIExprAddDeref(NewDIExpr);
- DIExprAddOffset(NewDIExpr, Offset);
- NewDIExpr.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
- DIExpr = Builder.createExpression(NewDIExpr);
+ SmallVector<uint64_t, 4> Ops;
+ Ops.push_back(dwarf::DW_OP_deref);
+ appendOffset(Ops, Offset);
+ Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());
+ DIExpr = Builder.createExpression(Ops);
}
Builder.insertDbgValueIntrinsic(NewAddress, DVI->getOffset(), DIVar, DIExpr,
@@ -1349,6 +1356,53 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
}
}
+void llvm::salvageDebugInfo(Instruction &I) {
+ SmallVector<DbgValueInst *, 1> DbgValues;
+ auto &M = *I.getModule();
+
+ auto MDWrap = [&](Value *V) {
+ return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V));
+ };
+
+ if (isa<BitCastInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ // Bitcasts are entirely irrelevant for debug info. Rewrite the dbg.value
+ // to use the cast's source.
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ unsigned BitWidth =
+ M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace());
+ APInt Offset(BitWidth, 0);
+ // Rewrite a constant GEP into a DIExpression.
+ if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) {
+ auto *DIExpr = DVI->getExpression();
+ DIBuilder DIB(M, /*AllowUnresolved*/ false);
+ // GEP offsets are i32 and thus alwaus fit into an int64_t.
+ DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue());
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ }
+ } else if (isa<LoadInst>(&I)) {
+ findDbgValues(DbgValues, &I);
+ for (auto *DVI : DbgValues) {
+ // Rewrite the load into DW_OP_deref.
+ auto *DIExpr = DVI->getExpression();
+ DIBuilder DIB(M, /*AllowUnresolved*/ false);
+ DIExpr = prependDIExpr(DIB, DIExpr, WithDeref, 0);
+ DVI->setOperand(0, MDWrap(I.getOperand(0)));
+ DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));
+ DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n');
+ }
+ }
+}
+
unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
unsigned NumDeadInst = 0;
// Delete the instructions backwards, as it has a reduced likelihood of
@@ -2068,9 +2122,9 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
CallInst *CI, const TargetLibraryInfo *TLI) {
Function *F = CI->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
if (F && !F->hasLocalLinkage() && F->hasName() &&
TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
!F->doesNotAccessMemory())
- CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin);
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 00cda2af00c6..e7ba19665d59 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -645,14 +645,7 @@ ReprocessLoop:
// loop-invariant instructions out of the way to open up more
// opportunities, and the disadvantage of having the responsibility
// to preserve dominator information.
- bool UniqueExit = true;
- if (!ExitBlocks.empty())
- for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
- if (ExitBlocks[i] != ExitBlocks[0]) {
- UniqueExit = false;
- break;
- }
- if (UniqueExit) {
+ if (ExitBlockSet.size() == 1) {
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
if (!ExitingBlock->getSinglePredecessor()) continue;
@@ -735,6 +728,17 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA) {
bool Changed = false;
+#ifndef NDEBUG
+ // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
+ // form.
+ if (PreserveLCSSA) {
+ assert(DT && "DT not available.");
+ assert(LI && "LI not available.");
+ assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+ "Requested to preserve LCSSA, but it's already broken.");
+ }
+#endif
+
// Worklist maintains our depth-first queue of loops in this nest to process.
SmallVector<Loop *, 4> Worklist;
Worklist.push_back(L);
@@ -814,15 +818,6 @@ bool LoopSimplify::runOnFunction(Function &F) {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-#ifndef NDEBUG
- if (PreserveLCSSA) {
- assert(DT && "DT not available.");
- assert(LI && "LI not available.");
- bool InLCSSA = all_of(
- *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
- assert(InLCSSA && "Requested to preserve LCSSA, but it's already broken.");
- }
-#endif
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
@@ -846,17 +841,14 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
- // FIXME: This pass should verify that the loops on which it's operating
- // are in canonical SSA form, and that the pass itself preserves this form.
+ // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
+ // after simplifying the loops.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */);
-
- // FIXME: We need to invalidate this to avoid PR28400. Is there a better
- // solution?
- AM.invalidate<ScalarEvolutionAnalysis>(F);
+ Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false);
if (!Changed)
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index e346ebd6a000..3c669ce644e2 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -51,6 +52,16 @@ UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
cl::desc("Allow runtime unrolled loops to be unrolled "
"with epilog instead of prolog."));
+static cl::opt<bool>
+UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,
+ cl::desc("Verify domtree after unrolling"),
+#ifdef NDEBUG
+ cl::init(false)
+#else
+ cl::init(true)
+#endif
+ );
+
/// Convert the instruction operands from referencing the current values into
/// those specified by VMap.
static inline void remapInstruction(Instruction *I,
@@ -205,6 +216,45 @@ const Loop* llvm::addClonedBlockToLoopInfo(BasicBlock *OriginalBB,
}
}
+/// The function chooses which type of unroll (epilog or prolog) is more
+/// profitabale.
+/// Epilog unroll is more profitable when there is PHI that starts from
+/// constant. In this case epilog will leave PHI start from constant,
+/// but prolog will convert it to non-constant.
+///
+/// loop:
+/// PN = PHI [I, Latch], [CI, PreHeader]
+/// I = foo(PN)
+/// ...
+///
+/// Epilog unroll case.
+/// loop:
+/// PN = PHI [I2, Latch], [CI, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+/// Prolog unroll case.
+/// NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
+/// loop:
+/// PN = PHI [I2, Latch], [NewPN, PreHeader]
+/// I1 = foo(PN)
+/// I2 = foo(I1)
+/// ...
+///
+static bool isEpilogProfitable(Loop *L) {
+ BasicBlock *PreHeader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ assert(PreHeader && Header);
+ for (Instruction &BBI : *Header) {
+ PHINode *PN = dyn_cast<PHINode>(&BBI);
+ if (!PN)
+ break;
+ if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader)))
+ return true;
+ }
+ return false;
+}
+
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
@@ -296,8 +346,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
Count = TripCount;
// Don't enter the unroll code if there is nothing to do.
- if (TripCount == 0 && Count < 2 && PeelCount == 0)
+ if (TripCount == 0 && Count < 2 && PeelCount == 0) {
+ DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");
return false;
+ }
assert(Count > 0);
assert(TripMultiple > 0);
@@ -330,7 +382,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
"and peeling for the same loop");
if (PeelCount)
- peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);
+ peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);
// Loops containing convergent instructions must have a count that divides
// their TripMultiple.
@@ -346,14 +398,22 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
"convergent operation.");
});
+ bool EpilogProfitability =
+ UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
+ : isEpilogProfitable(L);
+
if (RuntimeTripCount && TripMultiple % Count != 0 &&
!UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
- UnrollRuntimeEpilog, LI, SE, DT,
+ EpilogProfitability, LI, SE, DT,
PreserveLCSSA)) {
if (Force)
RuntimeTripCount = false;
- else
+ else {
+ DEBUG(
+ dbgs() << "Wont unroll; remainder loop could not be generated"
+ "when assuming runtime trip count\n");
return false;
+ }
}
// Notify ScalarEvolution that the loop will be substantially changed,
@@ -446,6 +506,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
for (Loop *SubLoop : *L)
LoopsToSimplify.insert(SubLoop);
+ if (Header->getParent()->isDebugInfoForProfiling())
+ for (BasicBlock *BB : L->getBlocks())
+ for (Instruction &I : *BB)
+ if (const DILocation *DIL = I.getDebugLoc())
+ I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count));
+
for (unsigned It = 1; It != Count; ++It) {
std::vector<BasicBlock*> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -456,19 +522,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
+ assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
+ "Header should not be in a sub-loop");
// Tell LI about New.
- if (*BB == Header) {
- assert(LI->getLoopFor(*BB) == L && "Header should not be in a sub-loop");
- L->addBasicBlockToLoop(New, *LI);
- } else {
- const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
- if (OldLoop) {
- LoopsToSimplify.insert(NewLoops[OldLoop]);
+ const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
+ if (OldLoop) {
+ LoopsToSimplify.insert(NewLoops[OldLoop]);
- // Forget the old loop, since its inputs may have changed.
- if (SE)
- SE->forgetLoop(OldLoop);
- }
+ // Forget the old loop, since its inputs may have changed.
+ if (SE)
+ SE->forgetLoop(OldLoop);
}
if (*BB == Header)
@@ -615,14 +678,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
Term->eraseFromParent();
}
}
+
// Update dominators of blocks we might reach through exits.
// Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can now reach it from the copied
- // iterations too. Thus, the new idom of the block will be the nearest
- // common dominator of the previous idom and common dominator of all copies of
- // the previous idom. This is equivalent to the nearest common dominator of
- // the previous idom and the first latch, which dominates all copies of the
- // previous idom.
+ // iterations too.
if (DT && Count > 1) {
for (auto *BB : OriginalLoopBlocks) {
auto *BBDomNode = DT->getNode(BB);
@@ -632,12 +692,38 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
if (!L->contains(ChildBB))
ChildrenToUpdate.push_back(ChildBB);
}
- BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]);
+ BasicBlock *NewIDom;
+ if (BB == LatchBlock) {
+ // The latch is special because we emit unconditional branches in
+ // some cases where the original loop contained a conditional branch.
+ // Since the latch is always at the bottom of the loop, if the latch
+ // dominated an exit before unrolling, the new dominator of that exit
+ // must also be a latch. Specifically, the dominator is the first
+ // latch which ends in a conditional branch, or the last latch if
+ // there is no such latch.
+ NewIDom = Latches.back();
+ for (BasicBlock *IterLatch : Latches) {
+ TerminatorInst *Term = IterLatch->getTerminator();
+ if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
+ NewIDom = IterLatch;
+ break;
+ }
+ }
+ } else {
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ NewIDom = DT->findNearestCommonDominator(BB, LatchBlock);
+ }
for (auto *ChildBB : ChildrenToUpdate)
DT->changeImmediateDominator(ChildBB, NewIDom);
}
}
+ if (DT && UnrollVerifyDomtree)
+ DT->verifyDomTree();
+
// Merge adjacent basic blocks, if possible.
SmallPtrSet<Loop *, 4> ForgottenLoops;
for (BasicBlock *Latch : Latches) {
@@ -655,13 +741,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
}
}
- // FIXME: We only preserve DT info for complete unrolling now. Incrementally
- // updating domtree after partial loop unrolling should also be easy.
- if (DT && !CompletelyUnroll)
- DT->recalculate(*L->getHeader()->getParent());
- else if (DT)
- DEBUG(DT->verifyDomTree());
-
// Simplify any new induction variables in the partially unrolled loop.
if (SE && !CompletelyUnroll && Count > 1) {
SmallVector<WeakVH, 16> DeadInsts;
@@ -721,29 +800,29 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
// at least one layer outside of the loop that was unrolled so that any
// changes to the parent loop exposed by the unrolling are considered.
if (DT) {
- if (!OuterL && !CompletelyUnroll)
- OuterL = L;
if (OuterL) {
// OuterL includes all loops for which we can break loop-simplify, so
// it's sufficient to simplify only it (it'll recursively simplify inner
// loops too).
+ if (NeedToFixLCSSA) {
+ // LCSSA must be performed on the outermost affected loop. The unrolled
+ // loop's last loop latch is guaranteed to be in the outermost loop
+ // after LoopInfo's been updated by markAsRemoved.
+ Loop *LatchLoop = LI->getLoopFor(Latches.back());
+ Loop *FixLCSSALoop = OuterL;
+ if (!FixLCSSALoop->contains(LatchLoop))
+ while (FixLCSSALoop->getParentLoop() != LatchLoop)
+ FixLCSSALoop = FixLCSSALoop->getParentLoop();
+
+ formLCSSARecursively(*FixLCSSALoop, *DT, LI, SE);
+ } else if (PreserveLCSSA) {
+ assert(OuterL->isLCSSAForm(*DT) &&
+ "Loops should be in LCSSA form after loop-unroll.");
+ }
+
// TODO: That potentially might be compile-time expensive. We should try
// to fix the loop-simplified form incrementally.
simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
-
- // LCSSA must be performed on the outermost affected loop. The unrolled
- // loop's last loop latch is guaranteed to be in the outermost loop after
- // LoopInfo's been updated by markAsRemoved.
- Loop *LatchLoop = LI->getLoopFor(Latches.back());
- if (!OuterL->contains(LatchLoop))
- while (OuterL->getParentLoop() != LatchLoop)
- OuterL = OuterL->getParentLoop();
-
- if (NeedToFixLCSSA)
- formLCSSARecursively(*OuterL, *DT, LI, SE);
- else
- assert(OuterL->isLCSSAForm(*DT) &&
- "Loops should be in LCSSA form after loop-unroll.");
} else {
// Simplify loops for which we might've broken loop-simplify form.
for (Loop *SubLoop : LoopsToSimplify)
diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 842cf31f2e3d..73c14f5606b7 100644
--- a/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -28,6 +28,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <algorithm>
@@ -55,12 +56,20 @@ static bool canPeel(Loop *L) {
if (!L->getExitingBlock() || !L->getUniqueExitBlock())
return false;
+ // Don't try to peel loops where the latch is not the exiting block.
+ // This can be an indication of two different things:
+ // 1) The loop is not rotated.
+ // 2) The loop contains irreducible control flow that involves the latch.
+ if (L->getLoopLatch() != L->getExitingBlock())
+ return false;
+
return true;
}
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
- TargetTransformInfo::UnrollingPreferences &UP) {
+ TargetTransformInfo::UnrollingPreferences &UP,
+ unsigned &TripCount) {
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -69,6 +78,39 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
if (!L->empty())
return;
+ // Try to find a Phi node that has the same loop invariant as an input from
+ // its only back edge. If there is such Phi, peeling 1 iteration from the
+ // loop is profitable, because starting from 2nd iteration we will have an
+ // invariant instead of this Phi.
+ if (LoopSize <= UP.Threshold) {
+ BasicBlock *BackEdge = L->getLoopLatch();
+ assert(BackEdge && "Loop is not in simplified form?");
+ BasicBlock *Header = L->getHeader();
+ // Iterate over Phis to find one with invariant input on back edge.
+ bool FoundCandidate = false;
+ PHINode *Phi;
+ for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) {
+ Phi = cast<PHINode>(&*BI);
+ Value *Input = Phi->getIncomingValueForBlock(BackEdge);
+ if (L->isLoopInvariant(Input)) {
+ FoundCandidate = true;
+ break;
+ }
+ }
+ if (FoundCandidate) {
+ DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi
+ << " because starting from 2nd iteration it is always"
+ << " an invariant\n");
+ UP.PeelCount = 1;
+ return;
+ }
+ }
+
+ // Bail if we know the statically calculated trip count.
+ // In this case we rather prefer partial unrolling.
+ if (TripCount)
+ return;
+
// If the user provided a peel count, use that.
bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
if (UserPeelCount) {
@@ -164,7 +206,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Exit,
SmallVectorImpl<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- ValueToValueMapTy &LVMap, LoopInfo *LI) {
+ ValueToValueMapTy &LVMap, DominatorTree *DT,
+ LoopInfo *LI) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
@@ -185,6 +228,17 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
ParentLoop->addBasicBlockToLoop(NewBB, *LI);
VMap[*BB] = NewBB;
+
+ // If dominator tree is available, insert nodes to represent cloned blocks.
+ if (DT) {
+ if (Header == *BB)
+ DT->addNewBlock(NewBB, InsertTop);
+ else {
+ DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
+ // VMap must contain entry for IDom, as the iteration order is RPO.
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
+ }
+ }
}
// Hook-up the control flow for the newly inserted blocks.
@@ -198,11 +252,13 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
// The backedge now goes to the "bottom", which is either the loop's real
// header (for the last peeled iteration) or the copied header of the next
// iteration (for every other iteration)
- BranchInst *LatchBR =
- cast<BranchInst>(cast<BasicBlock>(VMap[Latch])->getTerminator());
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ BranchInst *LatchBR = cast<BranchInst>(NewLatch->getTerminator());
unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
LatchBR->setSuccessor(HeaderIdx, InsertBot);
LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+ if (DT)
+ DT->changeImmediateDominator(InsertBot, NewLatch);
// The new copy of the loop body starts with a bunch of PHI nodes
// that pick an incoming value from either the preheader, or the previous
@@ -257,7 +313,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
/// optimizations.
bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
- bool PreserveLCSSA) {
+ AssumptionCache *AC, bool PreserveLCSSA) {
if (!canPeel(L))
return false;
@@ -358,7 +414,24 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
CurHeaderWeight = 1;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
- NewBlocks, LoopBlocks, VMap, LVMap, LI);
+ NewBlocks, LoopBlocks, VMap, LVMap, DT, LI);
+
+ // Remap to use values from the current iteration instead of the
+ // previous one.
+ remapInstructionsInBlocks(NewBlocks, VMap);
+
+ if (DT) {
+ // Latches of the cloned loops dominate over the loop exit, so idom of the
+ // latter is the first cloned loop body, as original PreHeader dominates
+ // the original loop body.
+ if (Iter == 0)
+ DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch]));
+#ifndef NDEBUG
+ if (VerifyDomInfo)
+ DT->verifyDomTree();
+#endif
+ }
+
updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
PeelCount, ExitWeight);
@@ -369,10 +442,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
F->getBasicBlockList().splice(InsertTop->getIterator(),
F->getBasicBlockList(),
NewBlocks[0]->getIterator(), F->end());
-
- // Remap to use values from the current iteration instead of the
- // previous one.
- remapInstructionsInBlocks(NewBlocks, VMap);
}
// Now adjust the phi nodes in the loop header to get their initial values
@@ -405,9 +474,16 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
}
// If the loop is nested, we changed the parent loop, update SE.
- if (Loop *ParentLoop = L->getParentLoop())
+ if (Loop *ParentLoop = L->getParentLoop()) {
SE->forgetLoop(ParentLoop);
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA);
+ } else {
+ // FIXME: Incrementally update loop-simplify
+ simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA);
+ }
+
NumPeeled++;
return true;
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d3ea1564115b..85db734fb182 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -146,6 +146,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, Exit, NewPreHeader);
InsertPt->eraseFromParent();
+ if (DT)
+ DT->changeImmediateDominator(Exit, PrologExit);
}
/// Connect the unrolling epilog code to the original loop.
@@ -260,13 +262,20 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
IRBuilder<> B(InsertPt);
Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
assert(Exit && "Loop must have a single exit block only");
- // Split the exit to maintain loop canonicalization guarantees
+ // Split the epilogue exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
PreserveLCSSA);
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
+ if (DT)
+ DT->changeImmediateDominator(Exit, NewExit);
+
+ // Split the main loop exit to maintain canonicalization guarantees.
+ SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
+ SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
+ PreserveLCSSA);
}
/// Create a clone of the blocks in a loop and connect them together.
@@ -284,27 +293,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks,
LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
- LoopInfo *LI) {
+ DominatorTree *DT, LoopInfo *LI) {
StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Function *F = Header->getParent();
LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
- Loop *NewLoop = nullptr;
Loop *ParentLoop = L->getParentLoop();
- if (CreateRemainderLoop) {
- NewLoop = new Loop();
- if (ParentLoop)
- ParentLoop->addChildLoop(NewLoop);
- else
- LI->addTopLevelLoop(NewLoop);
- }
-
NewLoopsMap NewLoops;
- if (NewLoop)
- NewLoops[L] = NewLoop;
- else if (ParentLoop)
+ NewLoops[ParentLoop] = ParentLoop;
+ if (!CreateRemainderLoop)
NewLoops[L] = ParentLoop;
// For each block in the original loop, create a new copy,
@@ -312,7 +311,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
-
+
// If we're unrolling the outermost loop, there's no remainder loop,
// and this block isn't in a nested loop, then the new block is not
// in any loop. Otherwise, add it to loopinfo.
@@ -326,6 +325,17 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
InsertTop->getTerminator()->setSuccessor(0, NewBB);
}
+ if (DT) {
+ if (Header == *BB) {
+ // The header is dominated by the preheader.
+ DT->addNewBlock(NewBB, InsertTop);
+ } else {
+ // Copy information from original loop to unrolled loop.
+ BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
+ DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+ }
+ }
+
if (Latch == *BB) {
// For the last block, if CreateRemainderLoop is false, create a direct
// jump to InsertBot. If not, create a loop back to cloned head.
@@ -376,7 +386,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter,
NewPHI->setIncomingValue(idx, V);
}
}
- if (NewLoop) {
+ if (CreateRemainderLoop) {
+ Loop *NewLoop = NewLoops[L];
+ assert(NewLoop && "L should have been cloned");
// Add unroll disable metadata to disable future unrolling for this loop.
SmallVector<Metadata *, 4> MDs;
// Reserve first location for self reference to the LoopID metadata node.
@@ -599,6 +611,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
// Branch to either remainder (extra iterations) loop or unrolling loop.
B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
PreHeaderBR->eraseFromParent();
+ if (DT) {
+ if (UseEpilogRemainder)
+ DT->changeImmediateDominator(NewExit, PreHeader);
+ else
+ DT->changeImmediateDominator(PrologExit, PreHeader);
+ }
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog/epilog code
@@ -623,7 +641,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
- InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);
+ InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Insert the cloned blocks into the function.
F->getBasicBlockList().splice(InsertBot->getIterator(),
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index c8efa9efc7f3..175d013a011d 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -230,8 +230,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// - PHI:
// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.
- // - By one instruction outside of the loop (safe).
- // - By further instructions outside of the loop (not safe).
+ // - By instructions outside of the loop (safe).
+ // * One value may have several outside users, but all outside
+ // uses must be of the same value.
// - By an instruction that is not part of the reduction (not safe).
// This is either:
// * An instruction type other than PHI or the reduction operation.
@@ -297,10 +298,15 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
// Check if we found the exit user.
BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
- // Exit if you find multiple outside users or if the header phi node is
- // being used. In this case the user uses the value of the previous
- // iteration, in which case we would loose "VF-1" iterations of the
- // reduction operation if we vectorize.
+ // If we already know this instruction is used externally, move on to
+ // the next user.
+ if (ExitInstruction == Cur)
+ continue;
+
+ // Exit if you find multiple values used outside or if the header phi
+ // node is being used. In this case the user uses the value of the
+ // previous iteration, in which case we would loose "VF-1" iterations of
+ // the reduction operation if we vectorize.
if (ExitInstruction != nullptr || Cur == Phi)
return false;
@@ -547,13 +553,14 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
return false;
- // Ensure every user of the phi node is dominated by the previous value. The
- // dominance requirement ensures the loop vectorizer will not need to
+ // Ensure every user of the phi node is dominated by the previous value.
+ // The dominance requirement ensures the loop vectorizer will not need to
// vectorize the initial value prior to the first iteration of the loop.
for (User *U : Phi->users())
- if (auto *I = dyn_cast<Instruction>(U))
+ if (auto *I = dyn_cast<Instruction>(U)) {
if (!DT->dominates(Previous, I))
return false;
+ }
return true;
}
diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
new file mode 100644
index 000000000000..c7cb561b5e21
--- /dev/null
+++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -0,0 +1,231 @@
+//===- LowerMemIntrinsics.cpp ----------------------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+
+using namespace llvm;
+
+void llvm::createMemCpyLoop(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr, Value *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ BasicBlock *NewBB =
+ InsertBefore->getParent()->splitBasicBlock(InsertBefore, "split");
+ BasicBlock *LoopBB = BasicBlock::Create(F->getContext(), "loadstoreloop",
+ F, NewBB);
+
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
+
+ // SrcAddr and DstAddr are expected to be pointer types,
+ // so no check is made here.
+ unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+ unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+
+ // Cast pointers to (char *)
+ SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
+ DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
+
+ // load from SrcAddr+LoopIndex
+ // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
+ // word-sized loads and stores.
+ Value *Element =
+ LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
+ LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
+ SrcIsVolatile);
+ // store at DstAddr+LoopIndex
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
+ DstAddr, LoopIndex),
+ DstIsVolatile);
+
+ // The value for LoopIndex coming from backedge is (LoopIndex + 1)
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
+}
+
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+// unsigned char* d = dst;
+// const unsigned char* s = src;
+// if (s < d) {
+// // copy backwards
+// while (n--) {
+// d[n] = s[n];
+// }
+// } else {
+// // copy forward
+// for (size_t i = 0; i < n; ++i) {
+// d[i] = s[i];
+// }
+// }
+// return dst;
+// }
+static void createMemMoveLoop(Instruction *InsertBefore,
+ Value *SrcAddr, Value *DstAddr, Value *CopyLen,
+ unsigned SrcAlign, unsigned DestAlign,
+ bool SrcIsVolatile, bool DstIsVolatile) {
+ Type *TypeOfCopyLen = CopyLen->getType();
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+
+ // Create the a comparison of src and dst, based on which we jump to either
+ // the forward-copy part of the function (if src >= dst) or the backwards-copy
+ // part (if src < dst).
+ // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+ // structure. Its block terminators (unconditional branches) are replaced by
+ // the appropriate conditional branches when the loop is built.
+ ICmpInst *PtrCompare = new ICmpInst(InsertBefore, ICmpInst::ICMP_ULT,
+ SrcAddr, DstAddr, "compare_src_dst");
+ TerminatorInst *ThenTerm, *ElseTerm;
+ SplitBlockAndInsertIfThenElse(PtrCompare, InsertBefore, &ThenTerm,
+ &ElseTerm);
+
+ // Each part of the function consists of two blocks:
+ // copy_backwards: used to skip the loop when n == 0
+ // copy_backwards_loop: the actual backwards loop BB
+ // copy_forward: used to skip the loop when n == 0
+ // copy_forward_loop: the actual forward loop BB
+ BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+ CopyBackwardsBB->setName("copy_backwards");
+ BasicBlock *CopyForwardBB = ElseTerm->getParent();
+ CopyForwardBB->setName("copy_forward");
+ BasicBlock *ExitBB = InsertBefore->getParent();
+ ExitBB->setName("memmove_done");
+
+ // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+ // between both backwards and forward copy clauses.
+ ICmpInst *CompareN =
+ new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+ ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+ // Copying backwards.
+ BasicBlock *LoopBB =
+ BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+ Value *IndexPtr = LoopBuilder.CreateSub(
+ LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+ Value *Element = LoopBuilder.CreateLoad(
+ LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
+ LoopBuilder.CreateStore(Element,
+ LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+ LoopBuilder.CreateCondBr(
+ LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+ ExitBB, LoopBB);
+ LoopPhi->addIncoming(IndexPtr, LoopBB);
+ LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+ BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+ ThenTerm->eraseFromParent();
+
+ // Copying forward.
+ BasicBlock *FwdLoopBB =
+ BasicBlock::Create(F->getContext(), "copy_forward_loop", F, ExitBB);
+ IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+ PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+ Value *FwdElement = FwdLoopBuilder.CreateLoad(
+ FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+ FwdLoopBuilder.CreateStore(
+ FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+ Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+ FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+ FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+ ExitBB, FwdLoopBB);
+ FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+ FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+ BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+ ElseTerm->eraseFromParent();
+}
+
+static void createMemSetLoop(Instruction *InsertBefore,
+ Value *DstAddr, Value *CopyLen, Value *SetValue,
+ unsigned Align, bool IsVolatile) {
+ BasicBlock *OrigBB = InsertBefore->getParent();
+ Function *F = OrigBB->getParent();
+ BasicBlock *NewBB =
+ OrigBB->splitBasicBlock(InsertBefore, "split");
+ BasicBlock *LoopBB
+ = BasicBlock::Create(F->getContext(), "loadstoreloop", F, NewBB);
+
+ OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+ IRBuilder<> Builder(OrigBB->getTerminator());
+
+ // Cast pointer to the type of value getting stored
+ unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+ DstAddr = Builder.CreateBitCast(DstAddr,
+ PointerType::get(SetValue->getType(), dstAS));
+
+ IRBuilder<> LoopBuilder(LoopBB);
+ PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
+ LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
+
+ LoopBuilder.CreateStore(
+ SetValue,
+ LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+ IsVolatile);
+
+ Value *NewIndex =
+ LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+ LoopIndex->addIncoming(NewIndex, LoopBB);
+
+ LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+ NewBB);
+}
+
+void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) {
+ createMemCpyLoop(/* InsertBefore */ Memcpy,
+ /* SrcAddr */ Memcpy->getRawSource(),
+ /* DstAddr */ Memcpy->getRawDest(),
+ /* CopyLen */ Memcpy->getLength(),
+ /* SrcAlign */ Memcpy->getAlignment(),
+ /* DestAlign */ Memcpy->getAlignment(),
+ /* SrcIsVolatile */ Memcpy->isVolatile(),
+ /* DstIsVolatile */ Memcpy->isVolatile());
+}
+
+void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
+ createMemMoveLoop(/* InsertBefore */ Memmove,
+ /* SrcAddr */ Memmove->getRawSource(),
+ /* DstAddr */ Memmove->getRawDest(),
+ /* CopyLen */ Memmove->getLength(),
+ /* SrcAlign */ Memmove->getAlignment(),
+ /* DestAlign */ Memmove->getAlignment(),
+ /* SrcIsVolatile */ Memmove->isVolatile(),
+ /* DstIsVolatile */ Memmove->isVolatile());
+}
+
+void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
+ createMemSetLoop(/* InsertBefore */ Memset,
+ /* DstAddr */ Memset->getRawDest(),
+ /* CopyLen */ Memset->getLength(),
+ /* SetValue */ Memset->getValue(),
+ /* Alignment */ Memset->getAlignment(),
+ Memset->isVolatile());
+}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 75cd3bc8b2bf..b375d51005d5 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -356,10 +356,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
unsigned numCmps = 0;
// Start with "simple" cases
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
- Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
- i.getCaseSuccessor()));
-
+ for (auto Case : SI->cases())
+ Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),
+ Case.getCaseSuccessor()));
+
std::sort(Cases.begin(), Cases.end(), CaseCmp());
// Merge case into clusters
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 24b3b12930ac..b659a2e4463f 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -46,7 +46,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
if (Allocas.empty())
break;
- PromoteMemToReg(Allocas, DT, nullptr, &AC);
+ PromoteMemToReg(Allocas, DT, &AC);
NumPromoted += Allocas.size();
Changed = true;
}
@@ -59,8 +59,9 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
if (!promoteMemoryToRegister(F, DT, AC))
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index c999bd008fef..481c6aa29c3a 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
@@ -67,6 +68,7 @@ namespace {
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesAll();
}
@@ -110,9 +112,15 @@ namespace {
}
// Rename all functions
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
for (auto &F : M) {
StringRef Name = F.getName();
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
+ LibFunc Tmp;
+ // Leave library functions alone because their presence or absence could
+ // affect the behavior of other passes.
+ if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ TLI.getLibFunc(F, Tmp))
continue;
F.setName(renamer.newName());
@@ -139,8 +147,11 @@ namespace {
}
char MetaRenamer::ID = 0;
-INITIALIZE_PASS(MetaRenamer, "metarenamer",
- "Assign new names to everything", false, false)
+INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
+ "Assign new names to everything", false, false)
//===----------------------------------------------------------------------===//
//
// MetaRenamer - Rename everything with metasyntactic names.
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index 0d623df77a67..dbe42c201dd4 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -130,13 +130,25 @@ void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
if (isa<Function>(FuncOrBitcast))
return cast<Function>(FuncOrBitcast);
- FuncOrBitcast->dump();
+ FuncOrBitcast->print(errs());
+ errs() << '\n';
std::string Err;
raw_string_ostream Stream(Err);
Stream << "Sanitizer interface function redefined: " << *FuncOrBitcast;
report_fatal_error(Err);
}
+Function *llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
+ ArrayRef<Type *> InitArgTypes) {
+ assert(!InitName.empty() && "Expected init function name");
+ Function *F = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+ InitName,
+ FunctionType::get(Type::getVoidTy(M.getContext()), InitArgTypes, false),
+ AttributeList()));
+ F->setLinkage(Function::ExternalLinkage);
+ return F;
+}
+
std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
Module &M, StringRef CtorName, StringRef InitName,
ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
@@ -144,22 +156,19 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
assert(!InitName.empty() && "Expected init function name");
assert(InitArgs.size() == InitArgTypes.size() &&
"Sanitizer's init function expects different number of arguments");
+ Function *InitFunction =
+ declareSanitizerInitFunction(M, InitName, InitArgTypes);
Function *Ctor = Function::Create(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
GlobalValue::InternalLinkage, CtorName, &M);
BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB));
- Function *InitFunction =
- checkSanitizerInterfaceFunction(M.getOrInsertFunction(
- InitName, FunctionType::get(IRB.getVoidTy(), InitArgTypes, false),
- AttributeSet()));
- InitFunction->setLinkage(Function::ExternalLinkage);
IRB.CreateCall(InitFunction, InitArgs);
if (!VersionCheckName.empty()) {
Function *VersionCheckFunction =
checkSanitizerInterfaceFunction(M.getOrInsertFunction(
VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
- AttributeSet()));
+ AttributeList()));
IRB.CreateCall(VersionCheckFunction, {});
}
return std::make_pair(Ctor, InitFunction);
diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp
new file mode 100644
index 000000000000..8877aeafecde
--- /dev/null
+++ b/lib/Transforms/Utils/PredicateInfo.cpp
@@ -0,0 +1,782 @@
+//===-- PredicateInfo.cpp - PredicateInfo Builder--------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------===//
+//
+// This file implements the PredicateInfo class.
+//
+//===----------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/AssemblyAnnotationWriter.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <algorithm>
+#define DEBUG_TYPE "predicateinfo"
+using namespace llvm;
+using namespace PatternMatch;
+using namespace llvm::PredicateInfoClasses;
+
+INITIALIZE_PASS_BEGIN(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(PredicateInfoPrinterLegacyPass, "print-predicateinfo",
+ "PredicateInfo Printer", false, false)
+static cl::opt<bool> VerifyPredicateInfo(
+ "verify-predicateinfo", cl::init(false), cl::Hidden,
+ cl::desc("Verify PredicateInfo in legacy printer pass."));
+namespace {
+DEBUG_COUNTER(RenameCounter, "predicateinfo-rename",
+ "Controls which variables are renamed with predicateinfo")
+// Given a predicate info that is a type of branching terminator, get the
+// branching block.
+const BasicBlock *getBranchBlock(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Only branches and switches should have PHIOnly defs that "
+ "require branch blocks.");
+ return cast<PredicateWithEdge>(PB)->From;
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// branching terminator.
+static Instruction *getBranchTerminator(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get a terminator from.");
+ return cast<PredicateWithEdge>(PB)->From->getTerminator();
+}
+
+// Given a predicate info that is a type of branching terminator, get the
+// edge this predicate info represents
+const std::pair<BasicBlock *, BasicBlock *>
+getBlockEdge(const PredicateBase *PB) {
+ assert(isa<PredicateWithEdge>(PB) &&
+ "Not a predicate info type we know how to get an edge from.");
+ const auto *PEdge = cast<PredicateWithEdge>(PB);
+ return std::make_pair(PEdge->From, PEdge->To);
+}
+}
+
+namespace llvm {
+namespace PredicateInfoClasses {
+enum LocalNum {
+ // Operations that must appear first in the block.
+ LN_First,
+ // Operations that are somewhere in the middle of the block, and are sorted on
+ // demand.
+ LN_Middle,
+ // Operations that must appear last in a block, like successor phi node uses.
+ LN_Last
+};
+
+// Associate global and local DFS info with defs and uses, so we can sort them
+// into a global domination ordering.
+struct ValueDFS {
+ int DFSIn = 0;
+ int DFSOut = 0;
+ unsigned int LocalNum = LN_Middle;
+ // Only one of Def or Use will be set.
+ Value *Def = nullptr;
+ Use *U = nullptr;
+ // Neither PInfo nor EdgeOnly participate in the ordering
+ PredicateBase *PInfo = nullptr;
+ bool EdgeOnly = false;
+};
+
+// This compares ValueDFS structures, creating OrderedBasicBlocks where
+// necessary to compare uses/defs in the same block. Doing so allows us to walk
+// the minimum number of instructions necessary to compute our def/use ordering.
+struct ValueDFS_Compare {
+ DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap;
+ ValueDFS_Compare(
+ DenseMap<const BasicBlock *, std::unique_ptr<OrderedBasicBlock>> &OBBMap)
+ : OBBMap(OBBMap) {}
+ bool operator()(const ValueDFS &A, const ValueDFS &B) const {
+ if (&A == &B)
+ return false;
+ // The only case we can't directly compare them is when they in the same
+ // block, and both have localnum == middle. In that case, we have to use
+ // comesbefore to see what the real ordering is, because they are in the
+ // same basic block.
+
+ bool SameBlock = std::tie(A.DFSIn, A.DFSOut) == std::tie(B.DFSIn, B.DFSOut);
+
+ // We want to put the def that will get used for a given set of phi uses,
+ // before those phi uses.
+ // So we sort by edge, then by def.
+ // Note that only phi nodes uses and defs can come last.
+ if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last)
+ return comparePHIRelated(A, B);
+
+ if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle)
+ return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) <
+ std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U);
+ return localComesBefore(A, B);
+ }
+
+ // For a phi use, or a non-materialized def, return the edge it represents.
+ const std::pair<BasicBlock *, BasicBlock *>
+ getBlockEdge(const ValueDFS &VD) const {
+ if (!VD.Def && VD.U) {
+ auto *PHI = cast<PHINode>(VD.U->getUser());
+ return std::make_pair(PHI->getIncomingBlock(*VD.U), PHI->getParent());
+ }
+ // This is really a non-materialized def.
+ return ::getBlockEdge(VD.PInfo);
+ }
+
+ // For two phi related values, return the ordering.
+ bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const {
+ auto &ABlockEdge = getBlockEdge(A);
+ auto &BBlockEdge = getBlockEdge(B);
+ // Now sort by block edge and then defs before uses.
+ return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U);
+ }
+
+ // Get the definition of an instruction that occurs in the middle of a block.
+ Value *getMiddleDef(const ValueDFS &VD) const {
+ if (VD.Def)
+ return VD.Def;
+ // It's possible for the defs and uses to be null. For branches, the local
+ // numbering will say the placed predicaeinfos should go first (IE
+ // LN_beginning), so we won't be in this function. For assumes, we will end
+ // up here, beause we need to order the def we will place relative to the
+ // assume. So for the purpose of ordering, we pretend the def is the assume
+ // because that is where we will insert the info.
+ if (!VD.U) {
+ assert(VD.PInfo &&
+ "No def, no use, and no predicateinfo should not occur");
+ assert(isa<PredicateAssume>(VD.PInfo) &&
+ "Middle of block should only occur for assumes");
+ return cast<PredicateAssume>(VD.PInfo)->AssumeInst;
+ }
+ return nullptr;
+ }
+
+ // Return either the Def, if it's not null, or the user of the Use, if the def
+ // is null.
+ const Instruction *getDefOrUser(const Value *Def, const Use *U) const {
+ if (Def)
+ return cast<Instruction>(Def);
+ return cast<Instruction>(U->getUser());
+ }
+
+ // This performs the necessary local basic block ordering checks to tell
+ // whether A comes before B, where both are in the same basic block.
+ bool localComesBefore(const ValueDFS &A, const ValueDFS &B) const {
+ auto *ADef = getMiddleDef(A);
+ auto *BDef = getMiddleDef(B);
+
+ // See if we have real values or uses. If we have real values, we are
+ // guaranteed they are instructions or arguments. No matter what, we are
+ // guaranteed they are in the same block if they are instructions.
+ auto *ArgA = dyn_cast_or_null<Argument>(ADef);
+ auto *ArgB = dyn_cast_or_null<Argument>(BDef);
+
+ if (ArgA && !ArgB)
+ return true;
+ if (ArgB && !ArgA)
+ return false;
+ if (ArgA && ArgB)
+ return ArgA->getArgNo() < ArgB->getArgNo();
+
+ auto *AInst = getDefOrUser(ADef, A.U);
+ auto *BInst = getDefOrUser(BDef, B.U);
+
+ auto *BB = AInst->getParent();
+ auto LookupResult = OBBMap.find(BB);
+ if (LookupResult != OBBMap.end())
+ return LookupResult->second->dominates(AInst, BInst);
+
+ auto Result = OBBMap.insert({BB, make_unique<OrderedBasicBlock>(BB)});
+ return Result.first->second->dominates(AInst, BInst);
+ }
+};
+
+} // namespace PredicateInfoClasses
+
+bool PredicateInfo::stackIsInScope(const ValueDFSStack &Stack,
+ const ValueDFS &VDUse) const {
+ if (Stack.empty())
+ return false;
+ // If it's a phi only use, make sure it's for this phi node edge, and that the
+ // use is in a phi node. If it's anything else, and the top of the stack is
+ // EdgeOnly, we need to pop the stack. We deliberately sort phi uses next to
+ // the defs they must go with so that we can know it's time to pop the stack
+ // when we hit the end of the phi uses for a given def.
+ if (Stack.back().EdgeOnly) {
+ if (!VDUse.U)
+ return false;
+ auto *PHI = dyn_cast<PHINode>(VDUse.U->getUser());
+ if (!PHI)
+ return false;
+ // Check edge
+ BasicBlock *EdgePred = PHI->getIncomingBlock(*VDUse.U);
+ if (EdgePred != getBranchBlock(Stack.back().PInfo))
+ return false;
+
+ // Use dominates, which knows how to handle edge dominance.
+ return DT.dominates(getBlockEdge(Stack.back().PInfo), *VDUse.U);
+ }
+
+ return (VDUse.DFSIn >= Stack.back().DFSIn &&
+ VDUse.DFSOut <= Stack.back().DFSOut);
+}
+
+void PredicateInfo::popStackUntilDFSScope(ValueDFSStack &Stack,
+ const ValueDFS &VD) {
+ while (!Stack.empty() && !stackIsInScope(Stack, VD))
+ Stack.pop_back();
+}
+
+// Convert the uses of Op into a vector of uses, associating global and local
+// DFS info with each one.
+void PredicateInfo::convertUsesToDFSOrdered(
+ Value *Op, SmallVectorImpl<ValueDFS> &DFSOrderedSet) {
+ for (auto &U : Op->uses()) {
+ if (auto *I = dyn_cast<Instruction>(U.getUser())) {
+ ValueDFS VD;
+ // Put the phi node uses in the incoming block.
+ BasicBlock *IBlock;
+ if (auto *PN = dyn_cast<PHINode>(I)) {
+ IBlock = PN->getIncomingBlock(U);
+ // Make phi node users appear last in the incoming block
+ // they are from.
+ VD.LocalNum = LN_Last;
+ } else {
+ // If it's not a phi node use, it is somewhere in the middle of the
+ // block.
+ IBlock = I->getParent();
+ VD.LocalNum = LN_Middle;
+ }
+ DomTreeNode *DomNode = DT.getNode(IBlock);
+ // It's possible our use is in an unreachable block. Skip it if so.
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.U = &U;
+ DFSOrderedSet.push_back(VD);
+ }
+ }
+}
+
+// Collect relevant operations from Comparison that we may want to insert copies
+// for.
+void collectCmpOps(CmpInst *Comparison, SmallVectorImpl<Value *> &CmpOperands) {
+ auto *Op0 = Comparison->getOperand(0);
+ auto *Op1 = Comparison->getOperand(1);
+ if (Op0 == Op1)
+ return;
+ CmpOperands.push_back(Comparison);
+ // Only want real values, not constants. Additionally, operands with one use
+ // are only being used in the comparison, which means they will not be useful
+ // for us to consider for predicateinfo.
+ //
+ if ((isa<Instruction>(Op0) || isa<Argument>(Op0)) && !Op0->hasOneUse())
+ CmpOperands.push_back(Op0);
+ if ((isa<Instruction>(Op1) || isa<Argument>(Op1)) && !Op1->hasOneUse())
+ CmpOperands.push_back(Op1);
+}
+
+// Add Op, PB to the list of value infos for Op, and mark Op to be renamed.
+void PredicateInfo::addInfoFor(SmallPtrSetImpl<Value *> &OpsToRename, Value *Op,
+ PredicateBase *PB) {
+ OpsToRename.insert(Op);
+ auto &OperandInfo = getOrCreateValueInfo(Op);
+ AllInfos.push_back(PB);
+ OperandInfo.Infos.push_back(PB);
+}
+
+// Process an assume instruction and place relevant operations we want to rename
+// into OpsToRename.
+void PredicateInfo::processAssume(IntrinsicInst *II, BasicBlock *AssumeBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ // See if we have a comparison we support
+ SmallVector<Value *, 8> CmpOperands;
+ SmallVector<Value *, 2> ConditionsToProcess;
+ CmpInst::Predicate Pred;
+ Value *Operand = II->getOperand(0);
+ if (m_c_And(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value()))
+ .match(II->getOperand(0))) {
+ ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(0));
+ ConditionsToProcess.push_back(cast<BinaryOperator>(Operand)->getOperand(1));
+ ConditionsToProcess.push_back(Operand);
+ } else if (isa<CmpInst>(Operand)) {
+
+ ConditionsToProcess.push_back(Operand);
+ }
+ for (auto Cond : ConditionsToProcess) {
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
+ collectCmpOps(Cmp, CmpOperands);
+ // Now add our copy infos for our operands
+ for (auto *Op : CmpOperands) {
+ auto *PA = new PredicateAssume(Op, II, Cmp);
+ addInfoFor(OpsToRename, Op, PA);
+ }
+ CmpOperands.clear();
+ } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
+ // Otherwise, it should be an AND.
+ assert(BinOp->getOpcode() == Instruction::And &&
+ "Should have been an AND");
+ auto *PA = new PredicateAssume(BinOp, II, BinOp);
+ addInfoFor(OpsToRename, BinOp, PA);
+ } else {
+ llvm_unreachable("Unknown type of condition");
+ }
+ }
+}
+
+// Process a block terminating branch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfo::processBranch(BranchInst *BI, BasicBlock *BranchBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ BasicBlock *FirstBB = BI->getSuccessor(0);
+ BasicBlock *SecondBB = BI->getSuccessor(1);
+ SmallVector<BasicBlock *, 2> SuccsToProcess;
+ SuccsToProcess.push_back(FirstBB);
+ SuccsToProcess.push_back(SecondBB);
+ SmallVector<Value *, 2> ConditionsToProcess;
+
+ auto InsertHelper = [&](Value *Op, bool isAnd, bool isOr, Value *Cond) {
+ for (auto *Succ : SuccsToProcess) {
+ // Don't try to insert on a self-edge. This is mainly because we will
+ // eliminate during renaming anyway.
+ if (Succ == BranchBB)
+ continue;
+ bool TakenEdge = (Succ == FirstBB);
+ // For and, only insert on the true edge
+ // For or, only insert on the false edge
+ if ((isAnd && !TakenEdge) || (isOr && TakenEdge))
+ continue;
+ PredicateBase *PB =
+ new PredicateBranch(Op, BranchBB, Succ, Cond, TakenEdge);
+ addInfoFor(OpsToRename, Op, PB);
+ if (!Succ->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, Succ});
+ }
+ };
+
+ // Match combinations of conditions.
+ CmpInst::Predicate Pred;
+ bool isAnd = false;
+ bool isOr = false;
+ SmallVector<Value *, 8> CmpOperands;
+ if (match(BI->getCondition(), m_And(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value()))) ||
+ match(BI->getCondition(), m_Or(m_Cmp(Pred, m_Value(), m_Value()),
+ m_Cmp(Pred, m_Value(), m_Value())))) {
+ auto *BinOp = cast<BinaryOperator>(BI->getCondition());
+ if (BinOp->getOpcode() == Instruction::And)
+ isAnd = true;
+ else if (BinOp->getOpcode() == Instruction::Or)
+ isOr = true;
+ ConditionsToProcess.push_back(BinOp->getOperand(0));
+ ConditionsToProcess.push_back(BinOp->getOperand(1));
+ ConditionsToProcess.push_back(BI->getCondition());
+ } else if (isa<CmpInst>(BI->getCondition())) {
+ ConditionsToProcess.push_back(BI->getCondition());
+ }
+ for (auto Cond : ConditionsToProcess) {
+ if (auto *Cmp = dyn_cast<CmpInst>(Cond)) {
+ collectCmpOps(Cmp, CmpOperands);
+ // Now add our copy infos for our operands
+ for (auto *Op : CmpOperands)
+ InsertHelper(Op, isAnd, isOr, Cmp);
+ } else if (auto *BinOp = dyn_cast<BinaryOperator>(Cond)) {
+ // This must be an AND or an OR.
+ assert((BinOp->getOpcode() == Instruction::And ||
+ BinOp->getOpcode() == Instruction::Or) &&
+ "Should have been an AND or an OR");
+ // The actual value of the binop is not subject to the same restrictions
+ // as the comparison. It's either true or false on the true/false branch.
+ InsertHelper(BinOp, false, false, BinOp);
+ } else {
+ llvm_unreachable("Unknown type of condition");
+ }
+ CmpOperands.clear();
+ }
+}
+// Process a block terminating switch, and place relevant operations to be
+// renamed into OpsToRename.
+void PredicateInfo::processSwitch(SwitchInst *SI, BasicBlock *BranchBB,
+ SmallPtrSetImpl<Value *> &OpsToRename) {
+ Value *Op = SI->getCondition();
+ if ((!isa<Instruction>(Op) && !isa<Argument>(Op)) || Op->hasOneUse())
+ return;
+
+ // Remember how many outgoing edges there are to every successor.
+ SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+ for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *TargetBlock = SI->getSuccessor(i);
+ ++SwitchEdges[TargetBlock];
+ }
+
+ // Now propagate info for each case value
+ for (auto C : SI->cases()) {
+ BasicBlock *TargetBlock = C.getCaseSuccessor();
+ if (SwitchEdges.lookup(TargetBlock) == 1) {
+ PredicateSwitch *PS = new PredicateSwitch(
+ Op, SI->getParent(), TargetBlock, C.getCaseValue(), SI);
+ addInfoFor(OpsToRename, Op, PS);
+ if (!TargetBlock->getSinglePredecessor())
+ EdgeUsesOnly.insert({BranchBB, TargetBlock});
+ }
+ }
+}
+
+// Build predicate info for our function
+void PredicateInfo::buildPredicateInfo() {
+ DT.updateDFSNumbers();
+ // Collect operands to rename from all conditional branch terminators, as well
+ // as assume statements.
+ SmallPtrSet<Value *, 8> OpsToRename;
+ for (auto DTN : depth_first(DT.getRootNode())) {
+ BasicBlock *BranchBB = DTN->getBlock();
+ if (auto *BI = dyn_cast<BranchInst>(BranchBB->getTerminator())) {
+ if (!BI->isConditional())
+ continue;
+ processBranch(BI, BranchBB, OpsToRename);
+ } else if (auto *SI = dyn_cast<SwitchInst>(BranchBB->getTerminator())) {
+ processSwitch(SI, BranchBB, OpsToRename);
+ }
+ }
+ for (auto &Assume : AC.assumptions()) {
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(Assume))
+ processAssume(II, II->getParent(), OpsToRename);
+ }
+ // Now rename all our operations.
+ renameUses(OpsToRename);
+}
+
+// Given the renaming stack, make all the operands currently on the stack real
+// by inserting them into the IR. Return the last operation's value.
+Value *PredicateInfo::materializeStack(unsigned int &Counter,
+ ValueDFSStack &RenameStack,
+ Value *OrigOp) {
+ // Find the first thing we have to materialize
+ auto RevIter = RenameStack.rbegin();
+ for (; RevIter != RenameStack.rend(); ++RevIter)
+ if (RevIter->Def)
+ break;
+
+ size_t Start = RevIter - RenameStack.rbegin();
+ // The maximum number of things we should be trying to materialize at once
+ // right now is 4, depending on if we had an assume, a branch, and both used
+ // and of conditions.
+ for (auto RenameIter = RenameStack.end() - Start;
+ RenameIter != RenameStack.end(); ++RenameIter) {
+ auto *Op =
+ RenameIter == RenameStack.begin() ? OrigOp : (RenameIter - 1)->Def;
+ ValueDFS &Result = *RenameIter;
+ auto *ValInfo = Result.PInfo;
+ // For edge predicates, we can just place the operand in the block before
+ // the terminator. For assume, we have to place it right before the assume
+ // to ensure we dominate all of our uses. Always insert right before the
+ // relevant instruction (terminator, assume), so that we insert in proper
+ // order in the case of multiple predicateinfo in the same block.
+ if (isa<PredicateWithEdge>(ValInfo)) {
+ IRBuilder<> B(getBranchTerminator(ValInfo));
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ CallInst *PIC =
+ B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
+ PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ } else {
+ auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
+ assert(PAssume &&
+ "Should not have gotten here without it being an assume");
+ IRBuilder<> B(PAssume->AssumeInst);
+ Function *IF = Intrinsic::getDeclaration(
+ F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ CallInst *PIC = B.CreateCall(IF, Op);
+ PredicateMap.insert({PIC, ValInfo});
+ Result.Def = PIC;
+ }
+ }
+ return RenameStack.back().Def;
+}
+
+// Instead of the standard SSA renaming algorithm, which is O(Number of
+// instructions), and walks the entire dominator tree, we walk only the defs +
+// uses. The standard SSA renaming algorithm does not really rely on the
+// dominator tree except to order the stack push/pops of the renaming stacks, so
+// that defs end up getting pushed before hitting the correct uses. This does
+// not require the dominator tree, only the *order* of the dominator tree. The
+// complete and correct ordering of the defs and uses, in dominator tree is
+// contained in the DFS numbering of the dominator tree. So we sort the defs and
+// uses into the DFS ordering, and then just use the renaming stack as per
+// normal, pushing when we hit a def (which is a predicateinfo instruction),
+// popping when we are out of the dfs scope for that def, and replacing any uses
+// with top of stack if it exists. In order to handle liveness without
+// propagating liveness info, we don't actually insert the predicateinfo
+// instruction def until we see a use that it would dominate. Once we see such
+// a use, we materialize the predicateinfo instruction in the right place and
+// use it.
+//
+// TODO: Use this algorithm to perform fast single-variable renaming in
+// promotememtoreg and memoryssa.
+void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpsToRename) {
+ ValueDFS_Compare Compare(OBBMap);
+ // Compute liveness, and rename in O(uses) per Op.
+ for (auto *Op : OpsToRename) {
+ unsigned Counter = 0;
+ SmallVector<ValueDFS, 16> OrderedUses;
+ const auto &ValueInfo = getValueInfo(Op);
+ // Insert the possible copies into the def/use list.
+ // They will become real copies if we find a real use for them, and never
+ // created otherwise.
+ for (auto &PossibleCopy : ValueInfo.Infos) {
+ ValueDFS VD;
+ // Determine where we are going to place the copy by the copy type.
+ // The predicate info for branches always come first, they will get
+ // materialized in the split block at the top of the block.
+ // The predicate info for assumes will be somewhere in the middle,
+ // it will get materialized in front of the assume.
+ if (const auto *PAssume = dyn_cast<PredicateAssume>(PossibleCopy)) {
+ VD.LocalNum = LN_Middle;
+ DomTreeNode *DomNode = DT.getNode(PAssume->AssumeInst->getParent());
+ if (!DomNode)
+ continue;
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ } else if (isa<PredicateWithEdge>(PossibleCopy)) {
+ // If we can only do phi uses, we treat it like it's in the branch
+ // block, and handle it specially. We know that it goes last, and only
+ // dominate phi uses.
+ auto BlockEdge = getBlockEdge(PossibleCopy);
+ if (EdgeUsesOnly.count(BlockEdge)) {
+ VD.LocalNum = LN_Last;
+ auto *DomNode = DT.getNode(BlockEdge.first);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ VD.EdgeOnly = true;
+ OrderedUses.push_back(VD);
+ }
+ } else {
+ // Otherwise, we are in the split block (even though we perform
+ // insertion in the branch block).
+ // Insert a possible copy at the split block and before the branch.
+ VD.LocalNum = LN_First;
+ auto *DomNode = DT.getNode(BlockEdge.second);
+ if (DomNode) {
+ VD.DFSIn = DomNode->getDFSNumIn();
+ VD.DFSOut = DomNode->getDFSNumOut();
+ VD.PInfo = PossibleCopy;
+ OrderedUses.push_back(VD);
+ }
+ }
+ }
+ }
+
+ convertUsesToDFSOrdered(Op, OrderedUses);
+ std::sort(OrderedUses.begin(), OrderedUses.end(), Compare);
+ SmallVector<ValueDFS, 8> RenameStack;
+ // For each use, sorted into dfs order, push values and replaces uses with
+ // top of stack, which will represent the reaching def.
+ for (auto &VD : OrderedUses) {
+ // We currently do not materialize copy over copy, but we should decide if
+ // we want to.
+ bool PossibleCopy = VD.PInfo != nullptr;
+ if (RenameStack.empty()) {
+ DEBUG(dbgs() << "Rename Stack is empty\n");
+ } else {
+ DEBUG(dbgs() << "Rename Stack Top DFS numbers are ("
+ << RenameStack.back().DFSIn << ","
+ << RenameStack.back().DFSOut << ")\n");
+ }
+
+ DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << ","
+ << VD.DFSOut << ")\n");
+
+ bool ShouldPush = (VD.Def || PossibleCopy);
+ bool OutOfScope = !stackIsInScope(RenameStack, VD);
+ if (OutOfScope || ShouldPush) {
+ // Sync to our current scope.
+ popStackUntilDFSScope(RenameStack, VD);
+ if (ShouldPush) {
+ RenameStack.push_back(VD);
+ }
+ }
+ // If we get to this point, and the stack is empty we must have a use
+ // with no renaming needed, just skip it.
+ if (RenameStack.empty())
+ continue;
+ // Skip values, only want to rename the uses
+ if (VD.Def || PossibleCopy)
+ continue;
+ if (!DebugCounter::shouldExecute(RenameCounter)) {
+ DEBUG(dbgs() << "Skipping execution due to debug counter\n");
+ continue;
+ }
+ ValueDFS &Result = RenameStack.back();
+
+ // If the possible copy dominates something, materialize our stack up to
+ // this point. This ensures every comparison that affects our operation
+ // ends up with predicateinfo.
+ if (!Result.Def)
+ Result.Def = materializeStack(Counter, RenameStack, Op);
+
+ DEBUG(dbgs() << "Found replacement " << *Result.Def << " for "
+ << *VD.U->get() << " in " << *(VD.U->getUser()) << "\n");
+ assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) &&
+ "Predicateinfo def should have dominated this use");
+ VD.U->set(Result.Def);
+ }
+ }
+}
+
+PredicateInfo::ValueInfo &PredicateInfo::getOrCreateValueInfo(Value *Operand) {
+ auto OIN = ValueInfoNums.find(Operand);
+ if (OIN == ValueInfoNums.end()) {
+ // This will grow it
+ ValueInfos.resize(ValueInfos.size() + 1);
+ // This will use the new size and give us a 0 based number of the info
+ auto InsertResult = ValueInfoNums.insert({Operand, ValueInfos.size() - 1});
+ assert(InsertResult.second && "Value info number already existed?");
+ return ValueInfos[InsertResult.first->second];
+ }
+ return ValueInfos[OIN->second];
+}
+
+const PredicateInfo::ValueInfo &
+PredicateInfo::getValueInfo(Value *Operand) const {
+ auto OINI = ValueInfoNums.lookup(Operand);
+ assert(OINI != 0 && "Operand was not really in the Value Info Numbers");
+ assert(OINI < ValueInfos.size() &&
+ "Value Info Number greater than size of Value Info Table");
+ return ValueInfos[OINI];
+}
+
+PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
+ AssumptionCache &AC)
+ : F(F), DT(DT), AC(AC) {
+ // Push an empty operand info so that we can detect 0 as not finding one
+ ValueInfos.resize(1);
+ buildPredicateInfo();
+}
+
+PredicateInfo::~PredicateInfo() {}
+
+void PredicateInfo::verifyPredicateInfo() const {}
+
+char PredicateInfoPrinterLegacyPass::ID = 0;
+
+PredicateInfoPrinterLegacyPass::PredicateInfoPrinterLegacyPass()
+ : FunctionPass(ID) {
+ initializePredicateInfoPrinterLegacyPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+}
+
+bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ auto PredInfo = make_unique<PredicateInfo>(F, DT, AC);
+ PredInfo->print(dbgs());
+ if (VerifyPredicateInfo)
+ PredInfo->verifyPredicateInfo();
+ return false;
+}
+
+PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ OS << "PredicateInfo for function: " << F.getName() << "\n";
+ make_unique<PredicateInfo>(F, DT, AC)->print(OS);
+
+ return PreservedAnalyses::all();
+}
+
+/// \brief An assembly annotator class to print PredicateInfo information in
+/// comments.
+class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
+ friend class PredicateInfo;
+ const PredicateInfo *PredInfo;
+
+public:
+ PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {}
+
+ virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {}
+
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) {
+ if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
+ OS << "; Has predicate info\n";
+ if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
+ OS << "; branch predicate info { TrueEdge: " << PB->TrueEdge
+ << " Comparison:" << *PB->Condition << " Edge: [";
+ PB->From->printAsOperand(OS);
+ OS << ",";
+ PB->To->printAsOperand(OS);
+ OS << "] }\n";
+ } else if (const auto *PS = dyn_cast<PredicateSwitch>(PI)) {
+ OS << "; switch predicate info { CaseValue: " << *PS->CaseValue
+ << " Switch:" << *PS->Switch << " Edge: [";
+ PS->From->printAsOperand(OS);
+ OS << ",";
+ PS->To->printAsOperand(OS);
+ OS << "] }\n";
+ } else if (const auto *PA = dyn_cast<PredicateAssume>(PI)) {
+ OS << "; assume predicate info {"
+ << " Comparison:" << *PA->Condition << " }\n";
+ }
+ }
+ }
+};
+
+void PredicateInfo::print(raw_ostream &OS) const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(OS, &Writer);
+}
+
+void PredicateInfo::dump() const {
+ PredicateInfoAnnotatedWriter Writer(this);
+ F.print(dbgs(), &Writer);
+}
+
+PreservedAnalyses PredicateInfoVerifierPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ make_unique<PredicateInfo>(F, DT, AC)->verifyPredicateInfo();
+
+ return PreservedAnalyses::all();
+}
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 35faa6f65efd..a33b85c4ee69 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -23,6 +22,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -38,6 +38,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
using namespace llvm;
@@ -225,9 +226,6 @@ struct PromoteMem2Reg {
DominatorTree &DT;
DIBuilder DIB;
- /// An AliasSetTracker object to update. If null, don't update it.
- AliasSetTracker *AST;
-
/// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
AssumptionCache *AC;
@@ -269,10 +267,10 @@ struct PromoteMem2Reg {
public:
PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST, AssumptionCache *AC)
+ AssumptionCache *AC)
: Allocas(Allocas.begin(), Allocas.end()), DT(DT),
DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
- AST(AST), AC(AC) {}
+ AC(AC) {}
void run();
@@ -301,6 +299,18 @@ private:
} // end of anonymous namespace
+/// Given a LoadInst LI this adds assume(LI != null) after it.
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
+ Function *AssumeIntrinsic =
+ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
+ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
+ Constant::getNullValue(LI->getType()));
+ LoadNotNull->insertAfter(LI);
+ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
+ CI->insertAfter(LoadNotNull);
+ AC->registerAssumption(CI);
+}
+
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
// Knowing that this alloca is promotable, we know that it's safe to kill all
// instructions except for load and store.
@@ -334,9 +344,8 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
/// and thus must be phi-ed with undef. We fall back to the standard alloca
/// promotion algorithm in that case.
static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI,
- DominatorTree &DT,
- AliasSetTracker *AST) {
+ LargeBlockInfo &LBI, DominatorTree &DT,
+ AssumptionCache *AC) {
StoreInst *OnlyStore = Info.OnlyStore;
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
BasicBlock *StoreBB = OnlyStore->getParent();
@@ -387,9 +396,15 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
// code.
if (ReplVal == LI)
ReplVal = UndefValue::get(LI->getType());
+
+ // If the load was marked as nonnull we don't want to lose
+ // that information when we erase this Load. So we preserve
+ // it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
LI->replaceAllUsesWith(ReplVal);
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
@@ -410,8 +425,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
Info.OnlyStore->eraseFromParent();
LBI.deleteValue(Info.OnlyStore);
- if (AST)
- AST->deleteValue(AI);
AI->eraseFromParent();
LBI.deleteValue(AI);
return true;
@@ -435,7 +448,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
/// }
static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LargeBlockInfo &LBI,
- AliasSetTracker *AST) {
+ DominatorTree &DT,
+ AssumptionCache *AC) {
// The trickiest case to handle is when we have large blocks. Because of this,
// this code is optimized assuming that large blocks happen. This does not
// significantly pessimize the small block case. This uses LargeBlockInfo to
@@ -476,13 +490,18 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// There is no store before this load, bail out (load may be affected
// by the following stores - see main comment).
return false;
- }
- else
+ } else {
// Otherwise, there was a store before this load, the load takes its value.
- LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
+ // Note, if the load was marked as nonnull we don't want to lose that
+ // information when we erase it. So we preserve it with an assume.
+ Value *ReplVal = std::prev(I)->second->getOperand(0);
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
+ LI->replaceAllUsesWith(ReplVal);
+ }
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
@@ -499,8 +518,6 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
LBI.deleteValue(SI);
}
- if (AST)
- AST->deleteValue(AI);
AI->eraseFromParent();
LBI.deleteValue(AI);
@@ -517,8 +534,6 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
void PromoteMem2Reg::run() {
Function &F = *DT.getRoot()->getParent();
- if (AST)
- PointerAllocaValues.resize(Allocas.size());
AllocaDbgDeclares.resize(Allocas.size());
AllocaInfo Info;
@@ -536,8 +551,6 @@ void PromoteMem2Reg::run() {
if (AI->use_empty()) {
// If there are no uses of the alloca, just delete it now.
- if (AST)
- AST->deleteValue(AI);
AI->eraseFromParent();
// Remove the alloca from the Allocas list, since it has been processed
@@ -553,7 +566,7 @@ void PromoteMem2Reg::run() {
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
- if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AC)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
++NumSingleStore;
@@ -564,7 +577,7 @@ void PromoteMem2Reg::run() {
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
if (Info.OnlyUsedInOneBlock &&
- promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
+ promoteSingleBlockAlloca(AI, Info, LBI, DT, AC)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
continue;
@@ -578,11 +591,6 @@ void PromoteMem2Reg::run() {
BBNumbers[&BB] = ID++;
}
- // If we have an AST to keep updated, remember some pointer value that is
- // stored into the alloca.
- if (AST)
- PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
-
// Remember the dbg.declare intrinsic describing this alloca, if any.
if (Info.DbgDeclare)
AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
@@ -662,8 +670,6 @@ void PromoteMem2Reg::run() {
// tree. Just delete the users now.
if (!A->use_empty())
A->replaceAllUsesWith(UndefValue::get(A->getType()));
- if (AST)
- AST->deleteValue(A);
A->eraseFromParent();
}
@@ -694,8 +700,6 @@ void PromoteMem2Reg::run() {
// If this PHI node merges one value and/or undefs, get the value.
if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) {
- if (AST && PN->getType()->isPointerTy())
- AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
NewPhiNodes.erase(I++);
@@ -863,10 +867,6 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
&BB->front());
++NumPHIInsert;
PhiToAllocaMap[PN] = AllocaNo;
-
- if (AST && PN->getType()->isPointerTy())
- AST->copyValue(PointerAllocaValues[AllocaNo], PN);
-
return true;
}
@@ -940,10 +940,15 @@ NextIteration:
Value *V = IncomingVals[AI->second];
+ // If the load was marked as nonnull we don't want to lose
+ // that information when we erase this Load. So we preserve
+ // it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !llvm::isKnownNonNullAt(V, LI, &DT))
+ addAssumeNonNull(AC, LI);
+
// Anything using the load now uses the current value.
LI->replaceAllUsesWith(V);
- if (AST && LI->getType()->isPointerTy())
- AST->deleteValue(LI);
BB->getInstList().erase(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Delete this instruction and mark the name as the current holder of the
@@ -987,10 +992,10 @@ NextIteration:
}
void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
- AliasSetTracker *AST, AssumptionCache *AC) {
+ AssumptionCache *AC) {
// If there is nothing to do, bail out...
if (Allocas.empty())
return;
- PromoteMem2Reg(Allocas, DT, AST, AC).run();
+ PromoteMem2Reg(Allocas, DT, AC).run();
}
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 8e93ee757a15..8b6a2c3766d2 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,20 +11,29 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+#include <cassert>
+#include <utility>
using namespace llvm;
@@ -36,7 +45,7 @@ static AvailableValsTy &getAvailableVals(void *AV) {
}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {}
+ : InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete static_cast<AvailableValsTy*>(AV);
@@ -205,6 +214,7 @@ void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
}
namespace llvm {
+
template<>
class SSAUpdaterTraits<SSAUpdater> {
public:
@@ -230,6 +240,7 @@ public:
PHI_iterator &operator++() { ++idx; return *this; }
bool operator==(const PHI_iterator& x) const { return idx == x.idx; }
bool operator!=(const PHI_iterator& x) const { return !operator==(x); }
+
Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
};
@@ -303,7 +314,7 @@ public:
}
};
-} // End llvm namespace
+} // end namespace llvm
/// Check to see if AvailableVals has an entry for the specified BB and if so,
/// return it. If not, construct SSA form by first calculating the required
@@ -337,14 +348,12 @@ LoadAndStorePromoter(ArrayRef<const Instruction*> Insts,
SSA.Initialize(SomeVal->getType(), BaseName);
}
-
void LoadAndStorePromoter::
run(const SmallVectorImpl<Instruction*> &Insts) const {
-
// First step: bucket up uses of the alloca by the block they occur in.
// This is important because we have to handle multiple defs/uses in a block
// ourselves: SSAUpdater is purely for cross-block references.
- DenseMap<BasicBlock*, TinyPtrVector<Instruction*> > UsesByBlock;
+ DenseMap<BasicBlock*, TinyPtrVector<Instruction*>> UsesByBlock;
for (Instruction *User : Insts)
UsesByBlock[User->getParent()].push_back(User);
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 7b0bddbbb831..127a44df5344 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -169,6 +170,8 @@ class SimplifyCFGOpt {
unsigned BonusInstThreshold;
AssumptionCache *AC;
SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
+ // See comments in SimplifyCFGOpt::SimplifySwitch.
+ bool LateSimplifyCFG;
Value *isValueEqualityComparison(TerminatorInst *TI);
BasicBlock *GetValueEqualityComparisonCases(
TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases);
@@ -192,9 +195,10 @@ class SimplifyCFGOpt {
public:
SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
unsigned BonusInstThreshold, AssumptionCache *AC,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders)
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ bool LateSimplifyCFG)
: TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
- LoopHeaders(LoopHeaders) {}
+ LoopHeaders(LoopHeaders), LateSimplifyCFG(LateSimplifyCFG) {}
bool run(BasicBlock *BB);
};
@@ -710,10 +714,9 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
TerminatorInst *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cases.reserve(SI->getNumCases());
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
- ++i)
- Cases.push_back(
- ValueEqualityComparisonCase(i.getCaseValue(), i.getCaseSuccessor()));
+ for (auto Case : SI->cases())
+ Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
+ Case.getCaseSuccessor()));
return SI->getDefaultDest();
}
@@ -846,12 +849,12 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
}
for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
--i;
- if (DeadCases.count(i.getCaseValue())) {
+ if (DeadCases.count(i->getCaseValue())) {
if (HasWeight) {
- std::swap(Weights[i.getCaseIndex() + 1], Weights.back());
+ std::swap(Weights[i->getCaseIndex() + 1], Weights.back());
Weights.pop_back();
}
- i.getCaseSuccessor()->removePredecessor(TI->getParent());
+ i->getCaseSuccessor()->removePredecessor(TI->getParent());
SI->removeCase(i);
}
}
@@ -996,8 +999,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
SmallSetVector<BasicBlock*, 4> FailBlocks;
if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
for (auto *Succ : FailBlocks) {
- std::vector<BasicBlock*> Blocks = { TI->getParent() };
- if (!SplitBlockPredecessors(Succ, Blocks, ".fold.split"))
+ if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
return false;
}
}
@@ -1280,7 +1282,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
if (!isa<CallInst>(I1))
I1->setDebugLoc(
DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()));
-
+
I2->eraseFromParent();
Changed = true;
@@ -1472,29 +1474,28 @@ static bool canSinkInstructions(
return false;
}
+ // Because SROA can't handle speculating stores of selects, try not
+ // to sink loads or stores of allocas when we'd have to create a PHI for
+ // the address operand. Also, because it is likely that loads or stores
+ // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
+ // This can cause code churn which can have unintended consequences down
+ // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
+ // FIXME: This is a workaround for a deficiency in SROA - see
+ // https://llvm.org/bugs/show_bug.cgi?id=30188
+ if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(1));
+ }))
+ return false;
+ if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
+ return isa<AllocaInst>(I->getOperand(0));
+ }))
+ return false;
+
for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
if (I0->getOperand(OI)->getType()->isTokenTy())
// Don't touch any operand of token type.
return false;
- // Because SROA can't handle speculating stores of selects, try not
- // to sink loads or stores of allocas when we'd have to create a PHI for
- // the address operand. Also, because it is likely that loads or stores
- // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
- // This can cause code churn which can have unintended consequences down
- // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
- // FIXME: This is a workaround for a deficiency in SROA - see
- // https://llvm.org/bugs/show_bug.cgi?id=30188
- if (OI == 1 && isa<StoreInst>(I0) &&
- any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(1));
- }))
- return false;
- if (OI == 0 && isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
- return isa<AllocaInst>(I->getOperand(0));
- }))
- return false;
-
auto SameAsI0 = [&I0, OI](const Instruction *I) {
assert(I->getNumOperands() == I0->getNumOperands());
return I->getOperand(OI) == I0->getOperand(OI);
@@ -1546,7 +1547,7 @@ static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}))
return false;
}
-
+
// We don't need to do any more checking here; canSinkLastInstruction should
// have done it all for us.
SmallVector<Value*, 4> NewOperands;
@@ -1653,7 +1654,7 @@ namespace {
bool isValid() const {
return !Fail;
}
-
+
void operator -- () {
if (Fail)
return;
@@ -1699,7 +1700,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// / \
// [f(1)] [if]
// | | \
- // | | \
+ // | | |
// | [f(2)]|
// \ | /
// [ end ]
@@ -1737,7 +1738,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
}
if (UnconditionalPreds.size() < 2)
return false;
-
+
bool Changed = false;
// We take a two-step approach to tail sinking. First we scan from the end of
// each block upwards in lockstep. If the n'th instruction from the end of each
@@ -1767,7 +1768,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
NumPHIInsts++;
-
+
return NumPHIInsts <= 1;
};
@@ -1790,7 +1791,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
}
if (!Profitable)
return false;
-
+
DEBUG(dbgs() << "SINK: Splitting edge\n");
// We have a conditional edge and we're going to sink some instructions.
// Insert a new block postdominating all blocks we're going to sink from.
@@ -1800,7 +1801,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
return false;
Changed = true;
}
-
+
// Now that we've analyzed all potential sinking candidates, perform the
// actual sink. We iteratively sink the last non-terminator of the source
// blocks into their common successor unless doing so would require too
@@ -1826,7 +1827,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
break;
}
-
+
if (!sinkLastInstruction(UnconditionalPreds))
return Changed;
NumSinkCommons++;
@@ -2078,6 +2079,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
Value *S = Builder.CreateSelect(
BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI);
SpeculatedStore->setOperand(0, S);
+ SpeculatedStore->setDebugLoc(
+ DILocation::getMergedLocation(
+ BI->getDebugLoc(), SpeculatedStore->getDebugLoc()));
}
// Metadata can be dependent on the condition we are hoisting above.
@@ -2147,7 +2151,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// If we have a conditional branch on a PHI node value that is defined in the
/// same block as the branch and if any PHI entries are constants, thread edges
/// corresponding to that entry to be branches to their ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
+static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
+ AssumptionCache *AC) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -2239,6 +2244,11 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
// Insert the new instruction into its new home.
if (N)
EdgeBB->getInstList().insert(InsertPt, N);
+
+ // Register the new instruction with the assumption cache if necessary.
+ if (auto *II = dyn_cast_or_null<IntrinsicInst>(N))
+ if (II->getIntrinsicID() == Intrinsic::assume)
+ AC->registerAssumption(II);
}
// Loop over all of the edges from PredBB to BB, changing them to branch
@@ -2251,7 +2261,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
}
// Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DL) | true;
+ return FoldCondBranchOnPHI(BI, DL, AC) | true;
}
return false;
@@ -3433,8 +3443,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
// Find the relevant condition and destinations.
Value *Condition = Select->getCondition();
- BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
- BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
+ BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
+ BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
// Get weight for TrueBB and FalseBB.
uint32_t TrueWeight = 0, FalseWeight = 0;
@@ -3444,9 +3454,9 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
GetBranchWeights(SI, Weights);
if (Weights.size() == 1 + SI->getNumCases()) {
TrueWeight =
- (uint32_t)Weights[SI->findCaseValue(TrueVal).getSuccessorIndex()];
+ (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
FalseWeight =
- (uint32_t)Weights[SI->findCaseValue(FalseVal).getSuccessorIndex()];
+ (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
}
}
@@ -4148,15 +4158,16 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
}
}
} else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
- for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e;
- ++i)
- if (i.getCaseSuccessor() == BB) {
- BB->removePredecessor(SI->getParent());
- SI->removeCase(i);
- --i;
- --e;
- Changed = true;
+ for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ if (i->getCaseSuccessor() != BB) {
+ ++i;
+ continue;
}
+ BB->removePredecessor(SI->getParent());
+ i = SI->removeCase(i);
+ e = SI->case_end();
+ Changed = true;
+ }
} else if (auto *II = dyn_cast<InvokeInst>(TI)) {
if (II->getUnwindDest() == BB) {
removeUnwindEdge(TI->getParent());
@@ -4239,18 +4250,18 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
SmallVector<ConstantInt *, 16> CasesA;
SmallVector<ConstantInt *, 16> CasesB;
- for (SwitchInst::CaseIt I : SI->cases()) {
- BasicBlock *Dest = I.getCaseSuccessor();
+ for (auto Case : SI->cases()) {
+ BasicBlock *Dest = Case.getCaseSuccessor();
if (!DestA)
DestA = Dest;
if (Dest == DestA) {
- CasesA.push_back(I.getCaseValue());
+ CasesA.push_back(Case.getCaseValue());
continue;
}
if (!DestB)
DestB = Dest;
if (Dest == DestB) {
- CasesB.push_back(I.getCaseValue());
+ CasesB.push_back(Case.getCaseValue());
continue;
}
return false; // More than two destinations.
@@ -4375,7 +4386,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
const unsigned NumUnknownBits =
- Bits - (KnownZero.Or(KnownOne)).countPopulation();
+ Bits - (KnownZero | KnownOne).countPopulation();
assert(NumUnknownBits <= Bits);
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
@@ -4400,17 +4411,17 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
// Remove dead cases from the switch.
for (ConstantInt *DeadCase : DeadCases) {
- SwitchInst::CaseIt Case = SI->findCaseValue(DeadCase);
- assert(Case != SI->case_default() &&
+ SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
+ assert(CaseI != SI->case_default() &&
"Case was not found. Probably mistake in DeadCases forming.");
if (HasWeight) {
- std::swap(Weights[Case.getCaseIndex() + 1], Weights.back());
+ std::swap(Weights[CaseI->getCaseIndex() + 1], Weights.back());
Weights.pop_back();
}
// Prune unused values from PHI nodes.
- Case.getCaseSuccessor()->removePredecessor(SI->getParent());
- SI->removeCase(Case);
+ CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
+ SI->removeCase(CaseI);
}
if (HasWeight && Weights.size() >= 2) {
SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
@@ -4464,10 +4475,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
typedef DenseMap<PHINode *, SmallVector<int, 4>> ForwardingNodesMap;
ForwardingNodesMap ForwardingNodes;
- for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E;
- ++I) {
- ConstantInt *CaseValue = I.getCaseValue();
- BasicBlock *CaseDest = I.getCaseSuccessor();
+ for (auto Case : SI->cases()) {
+ ConstantInt *CaseValue = Case.getCaseValue();
+ BasicBlock *CaseDest = Case.getCaseSuccessor();
int PhiIndex;
PHINode *PHI =
@@ -5202,8 +5212,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// common destination, as well as the min and max case values.
assert(SI->case_begin() != SI->case_end());
SwitchInst::CaseIt CI = SI->case_begin();
- ConstantInt *MinCaseVal = CI.getCaseValue();
- ConstantInt *MaxCaseVal = CI.getCaseValue();
+ ConstantInt *MinCaseVal = CI->getCaseValue();
+ ConstantInt *MaxCaseVal = CI->getCaseValue();
BasicBlock *CommonDest = nullptr;
typedef SmallVector<std::pair<ConstantInt *, Constant *>, 4> ResultListTy;
@@ -5213,7 +5223,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
SmallVector<PHINode *, 4> PHIs;
for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
- ConstantInt *CaseVal = CI.getCaseValue();
+ ConstantInt *CaseVal = CI->getCaseValue();
if (CaseVal->getValue().slt(MinCaseVal->getValue()))
MinCaseVal = CaseVal;
if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
@@ -5222,7 +5232,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Resulting value at phi nodes for this case value.
typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy;
ResultsTy Results;
- if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
+ if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
Results, DL, TTI))
return false;
@@ -5503,11 +5513,10 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
auto *Rot = Builder.CreateOr(LShr, Shl);
SI->replaceUsesOfWith(SI->getCondition(), Rot);
- for (SwitchInst::CaseIt C = SI->case_begin(), E = SI->case_end(); C != E;
- ++C) {
- auto *Orig = C.getCaseValue();
+ for (auto Case : SI->cases()) {
+ auto *Orig = Case.getCaseValue();
auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
- C.setValue(
+ Case.setValue(
cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
}
return true;
@@ -5553,7 +5562,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
- if (SwitchToLookupTable(SI, Builder, DL, TTI))
+ // The conversion from switch to lookup tables results in difficult
+ // to analyze code and makes pruning branches much harder.
+ // This is a problem of the switch expression itself can still be
+ // restricted as a result of inlining or CVP. There only apply this
+ // transformation during late steps of the optimisation chain.
+ if (LateSimplifyCFG && SwitchToLookupTable(SI, Builder, DL, TTI))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
if (ReduceSwitchRange(SI, Builder, DL, TTI))
@@ -5833,7 +5847,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// through this block if any PHI node entries are constants.
if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
if (PN->getParent() == BI->getParent())
- if (FoldCondBranchOnPHI(BI, DL))
+ if (FoldCondBranchOnPHI(BI, DL, AC))
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
// Scan predecessor blocks for conditional branches.
@@ -6012,8 +6026,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
unsigned BonusInstThreshold, AssumptionCache *AC,
- SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
+ SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
+ bool LateSimplifyCFG) {
return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(),
- BonusInstThreshold, AC, LoopHeaders)
+ BonusInstThreshold, AC, LoopHeaders, LateSimplifyCFG)
.run(BB);
}
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 6b1d3dc41330..a4cc6a031ad4 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -35,6 +35,9 @@ using namespace llvm;
STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
+STATISTIC(
+ NumSimplifiedSDiv,
+ "Number of IV signed division operations converted to unsigned division");
STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
namespace {
@@ -75,6 +78,7 @@ namespace {
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
bool IsSigned);
+ bool eliminateSDiv(BinaryOperator *SDiv);
bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
};
}
@@ -265,6 +269,33 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
Changed = true;
}
+bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {
+ // Get the SCEVs for the ICmp operands.
+ auto *N = SE->getSCEV(SDiv->getOperand(0));
+ auto *D = SE->getSCEV(SDiv->getOperand(1));
+
+ // Simplify unnecessary loops away.
+ const Loop *L = LI->getLoopFor(SDiv->getParent());
+ N = SE->getSCEVAtScope(N, L);
+ D = SE->getSCEVAtScope(D, L);
+
+ // Replace sdiv by udiv if both of the operands are non-negative
+ if (SE->isKnownNonNegative(N) && SE->isKnownNonNegative(D)) {
+ auto *UDiv = BinaryOperator::Create(
+ BinaryOperator::UDiv, SDiv->getOperand(0), SDiv->getOperand(1),
+ SDiv->getName() + ".udiv", SDiv);
+ UDiv->setIsExact(SDiv->isExact());
+ SDiv->replaceAllUsesWith(UDiv);
+ DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');
+ ++NumSimplifiedSDiv;
+ Changed = true;
+ DeadInsts.push_back(SDiv);
+ return true;
+ }
+
+ return false;
+}
+
/// SimplifyIVUsers helper for eliminating useless
/// remainder operations operating on an induction variable.
void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
@@ -426,12 +457,15 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
eliminateIVComparison(ICmp, IVOperand);
return true;
}
- if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
- bool IsSigned = Rem->getOpcode() == Instruction::SRem;
- if (IsSigned || Rem->getOpcode() == Instruction::URem) {
- eliminateIVRemainder(Rem, IVOperand, IsSigned);
+ if (BinaryOperator *Bin = dyn_cast<BinaryOperator>(UseInst)) {
+ bool IsSRem = Bin->getOpcode() == Instruction::SRem;
+ if (IsSRem || Bin->getOpcode() == Instruction::URem) {
+ eliminateIVRemainder(Bin, IVOperand, IsSRem);
return true;
}
+
+ if (Bin->getOpcode() == Instruction::SDiv)
+ return eliminateSDiv(Bin);
}
if (auto *CI = dyn_cast<CallInst>(UseInst))
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 1220490123ce..f6070868de44 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -35,7 +36,8 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of redundant instructions removed");
static bool runImpl(Function &F, const DominatorTree *DT,
- const TargetLibraryInfo *TLI, AssumptionCache *AC) {
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ OptimizationRemarkEmitter *ORE) {
const DataLayout &DL = F.getParent()->getDataLayout();
SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -54,7 +56,7 @@ static bool runImpl(Function &F, const DominatorTree *DT,
// Don't waste time simplifying unused instructions.
if (!I->use_empty()) {
- if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
+ if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC, ORE)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I->users())
Next->insert(cast<Instruction>(U));
@@ -95,6 +97,7 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
/// runOnFunction - Remove instructions that simplify.
@@ -108,7 +111,10 @@ namespace {
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- return runImpl(F, DT, TLI, AC);
+ OptimizationRemarkEmitter *ORE =
+ &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+
+ return runImpl(F, DT, TLI, AC, ORE);
}
};
}
@@ -119,6 +125,7 @@ INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
"Remove redundant instructions", false, false)
char &llvm::InstructionSimplifierID = InstSimplifier::ID;
@@ -133,9 +140,12 @@ PreservedAnalyses InstSimplifierPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- bool Changed = runImpl(F, &DT, &TLI, &AC);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ bool Changed = runImpl(F, &DT, &TLI, &AC, &ORE);
if (!Changed)
return PreservedAnalyses::all();
- // FIXME: This should also 'preserve the CFG'.
- return PreservedAnalyses::none();
+
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 8eaeb1073a76..aa71e3669ea2 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -51,9 +51,9 @@ static cl::opt<bool>
// Helper Functions
//===----------------------------------------------------------------------===//
-static bool ignoreCallingConv(LibFunc::Func Func) {
- return Func == LibFunc::abs || Func == LibFunc::labs ||
- Func == LibFunc::llabs || Func == LibFunc::strlen;
+static bool ignoreCallingConv(LibFunc Func) {
+ return Func == LibFunc_abs || Func == LibFunc_labs ||
+ Func == LibFunc_llabs || Func == LibFunc_strlen;
}
static bool isCallingConvCCompatible(CallInst *CI) {
@@ -123,8 +123,8 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
/// \brief Check whether the overloaded unary floating point function
/// corresponding to \a Ty is available.
static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
- LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
- LibFunc::Func LongDoubleFn) {
+ LibFunc DoubleFn, LibFunc FloatFn,
+ LibFunc LongDoubleFn) {
switch (Ty->getTypeID()) {
case Type::FloatTyID:
return TLI->has(FloatFn);
@@ -809,9 +809,9 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) {
// TODO: Does this belong in BuildLibCalls or should all of those similar
// functions be moved here?
-static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs,
+static Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
IRBuilder<> &B, const TargetLibraryInfo &TLI) {
- LibFunc::Func Func;
+ LibFunc Func;
if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func))
return nullptr;
@@ -819,7 +819,7 @@ static Value *emitCalloc(Value *Num, Value *Size, const AttributeSet &Attrs,
const DataLayout &DL = M->getDataLayout();
IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(),
- PtrType, PtrType, nullptr);
+ PtrType, PtrType);
CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc");
if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts()))
@@ -846,9 +846,9 @@ static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,
// Is the inner call really malloc()?
Function *InnerCallee = Malloc->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
- Func != LibFunc::malloc)
+ Func != LibFunc_malloc)
return nullptr;
// The memset must cover the same number of bytes that are malloc'd.
@@ -948,6 +948,20 @@ static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
return B.CreateFPExt(V, B.getDoubleTy());
}
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceUnaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+ // Propagate fast-math flags from the existing call to the new call.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+
+ Module *M = CI->getModule();
+ Value *V = CI->getArgOperand(0);
+ Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+ CallInst *NewCall = B.CreateCall(F, V);
+ NewCall->takeName(CI);
+ return NewCall;
+}
+
/// Shrink double -> float for binary functions like 'fmin/fmax'.
static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
@@ -1041,9 +1055,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
// pow(10.0, x) -> exp10(x)
if (Op1C->isExactlyValue(10.0) &&
- hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
- LibFunc::exp10l))
- return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp10), B,
+ hasUnaryFloatFn(TLI, Op1->getType(), LibFunc_exp10, LibFunc_exp10f,
+ LibFunc_exp10l))
+ return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc_exp10), B,
Callee->getAttributes());
}
@@ -1055,10 +1069,10 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
auto *OpC = dyn_cast<CallInst>(Op1);
if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
- LibFunc::Func Func;
+ LibFunc Func;
Function *OpCCallee = OpC->getCalledFunction();
if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
- TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) {
+ TLI->has(Func) && (Func == LibFunc_exp || Func == LibFunc_exp2)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
@@ -1075,17 +1089,20 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
return ConstantFP::get(CI->getType(), 1.0);
if (Op2C->isExactlyValue(-0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
- LibFunc::sqrtl)) {
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl)) {
// If -ffast-math:
// pow(x, -0.5) -> 1.0 / sqrt(x)
if (CI->hasUnsafeAlgebra()) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
- // Here we cannot lower to an intrinsic because C99 sqrt() and llvm.sqrt
- // are not guaranteed to have the same semantics.
- Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+ // TODO: If the pow call is an intrinsic, we should lower to the sqrt
+ // intrinsic, so we match errno semantics. We also should check that the
+ // target can in fact lower the sqrt intrinsic -- we currently have no way
+ // to ask this question other than asking whether the target has a sqrt
+ // libcall, which is a sufficient but not necessary condition.
+ Value *Sqrt = emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
Callee->getAttributes());
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Sqrt, "sqrtrecip");
@@ -1093,19 +1110,17 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
}
if (Op2C->isExactlyValue(0.5) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::sqrt, LibFunc::sqrtf,
- LibFunc::sqrtl) &&
- hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
- LibFunc::fabsl)) {
+ hasUnaryFloatFn(TLI, Op2->getType(), LibFunc_sqrt, LibFunc_sqrtf,
+ LibFunc_sqrtl)) {
// In -ffast-math, pow(x, 0.5) -> sqrt(x).
if (CI->hasUnsafeAlgebra()) {
IRBuilder<>::FastMathFlagGuard Guard(B);
B.setFastMathFlags(CI->getFastMathFlags());
- // Unlike other math intrinsics, sqrt has differerent semantics
- // from the libc function. See LangRef for details.
- return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+ // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
+ // intrinsic, to match errno semantics.
+ return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc_sqrt), B,
Callee->getAttributes());
}
@@ -1115,9 +1130,16 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
// TODO: In finite-only mode, this could be just fabs(sqrt(x)).
Value *Inf = ConstantFP::getInfinity(CI->getType());
Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+
+ // TODO: As above, we should lower to the sqrt intrinsic if the pow is an
+ // intrinsic, to match errno semantics.
Value *Sqrt = emitUnaryFloatFnCall(Op1, "sqrt", B, Callee->getAttributes());
- Value *FAbs =
- emitUnaryFloatFnCall(Sqrt, "fabs", B, Callee->getAttributes());
+
+ Module *M = Callee->getParent();
+ Function *FabsF = Intrinsic::getDeclaration(M, Intrinsic::fabs,
+ CI->getType());
+ Value *FAbs = B.CreateCall(FabsF, Sqrt);
+
Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf);
Value *Sel = B.CreateSelect(FCmp, Inf, FAbs);
return Sel;
@@ -1173,11 +1195,11 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
- LibFunc::Func LdExp = LibFunc::ldexpl;
+ LibFunc LdExp = LibFunc_ldexpl;
if (Op->getType()->isFloatTy())
- LdExp = LibFunc::ldexpf;
+ LdExp = LibFunc_ldexpf;
else if (Op->getType()->isDoubleTy())
- LdExp = LibFunc::ldexp;
+ LdExp = LibFunc_ldexp;
if (TLI->has(LdExp)) {
Value *LdExpArg = nullptr;
@@ -1197,7 +1219,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
Module *M = CI->getModule();
Value *NewCallee =
M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
- Op->getType(), B.getInt32Ty(), nullptr);
+ Op->getType(), B.getInt32Ty());
CallInst *CI = B.CreateCall(NewCallee, {One, LdExpArg});
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1208,15 +1230,6 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
return Ret;
}
-Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
- Function *Callee = CI->getCalledFunction();
- StringRef Name = Callee->getName();
- if (Name == "fabs" && hasFloatVersion(Name))
- return optimizeUnaryDoubleFP(CI, B, false);
-
- return nullptr;
-}
-
Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
// If we can shrink the call to a float function rather than a double
@@ -1280,17 +1293,17 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
FMF.setUnsafeAlgebra();
B.setFastMathFlags(FMF);
- LibFunc::Func Func;
+ LibFunc Func;
Function *F = OpC->getCalledFunction();
if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow))
+ Func == LibFunc_pow) || F->getIntrinsicID() == Intrinsic::pow))
return B.CreateFMul(OpC->getArgOperand(1),
emitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
Callee->getAttributes()), "mul");
// log(exp2(y)) -> y*log(2)
if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
- TLI->has(Func) && Func == LibFunc::exp2)
+ TLI->has(Func) && Func == LibFunc_exp2)
return B.CreateFMul(
OpC->getArgOperand(0),
emitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
@@ -1302,8 +1315,11 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
- if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
- Callee->getIntrinsicID() == Intrinsic::sqrt))
+ // TODO: Once we have a way (other than checking for the existince of the
+ // libcall) to tell whether our target can lower @llvm.sqrt, relax the
+ // condition below.
+ if (TLI->has(LibFunc_sqrtf) && (Callee->getName() == "sqrt" ||
+ Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
if (!CI->hasUnsafeAlgebra())
@@ -1385,12 +1401,12 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
// tan(atan(x)) -> x
// tanf(atanf(x)) -> x
// tanl(atanl(x)) -> x
- LibFunc::Func Func;
+ LibFunc Func;
Function *F = OpC->getCalledFunction();
if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
- ((Func == LibFunc::atan && Callee->getName() == "tan") ||
- (Func == LibFunc::atanf && Callee->getName() == "tanf") ||
- (Func == LibFunc::atanl && Callee->getName() == "tanl")))
+ ((Func == LibFunc_atan && Callee->getName() == "tan") ||
+ (Func == LibFunc_atanf && Callee->getName() == "tanf") ||
+ (Func == LibFunc_atanl && Callee->getName() == "tanl")))
Ret = OpC->getArgOperand(0);
return Ret;
}
@@ -1427,7 +1443,7 @@ static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
Module *M = OrigCallee->getParent();
Value *Callee = M->getOrInsertFunction(Name, OrigCallee->getAttributes(),
- ResTy, ArgTy, nullptr);
+ ResTy, ArgTy);
if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
// If the argument is an instruction, it must dominate all uses so put our
@@ -1508,24 +1524,24 @@ void LibCallSimplifier::classifyArgUse(
return;
Function *Callee = CI->getCalledFunction();
- LibFunc::Func Func;
+ LibFunc Func;
if (!Callee || !TLI->getLibFunc(*Callee, Func) || !TLI->has(Func) ||
!isTrigLibCall(CI))
return;
if (IsFloat) {
- if (Func == LibFunc::sinpif)
+ if (Func == LibFunc_sinpif)
SinCalls.push_back(CI);
- else if (Func == LibFunc::cospif)
+ else if (Func == LibFunc_cospif)
CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospif_stret)
+ else if (Func == LibFunc_sincospif_stret)
SinCosCalls.push_back(CI);
} else {
- if (Func == LibFunc::sinpi)
+ if (Func == LibFunc_sinpi)
SinCalls.push_back(CI);
- else if (Func == LibFunc::cospi)
+ else if (Func == LibFunc_cospi)
CosCalls.push_back(CI);
- else if (Func == LibFunc::sincospi_stret)
+ else if (Func == LibFunc_sincospi_stret)
SinCosCalls.push_back(CI);
}
}
@@ -1609,7 +1625,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
// Proceedings of PACT'98, Oct. 1998, IEEE
if (!CI->hasFnAttr(Attribute::Cold) &&
isReportingError(Callee, CI, StreamArg)) {
- CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
+ CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold);
}
return nullptr;
@@ -1699,7 +1715,7 @@ Value *LibCallSimplifier::optimizePrintF(CallInst *CI, IRBuilder<> &B) {
// printf(format, ...) -> iprintf(format, ...) if no floating point
// arguments.
- if (TLI->has(LibFunc::iprintf) && !callHasFloatingPointArgument(CI)) {
+ if (TLI->has(LibFunc_iprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
Constant *IPrintFFn =
M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
@@ -1780,7 +1796,7 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {
// sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
// point arguments.
- if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+ if (TLI->has(LibFunc_siprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
Constant *SIPrintFFn =
M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
@@ -1850,7 +1866,7 @@ Value *LibCallSimplifier::optimizeFPrintF(CallInst *CI, IRBuilder<> &B) {
// fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
// floating point arguments.
- if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+ if (TLI->has(LibFunc_fiprintf) && !callHasFloatingPointArgument(CI)) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
Constant *FIPrintFFn =
M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
@@ -1929,7 +1945,7 @@ Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {
}
bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
- LibFunc::Func Func;
+ LibFunc Func;
SmallString<20> FloatFuncName = FuncName;
FloatFuncName += 'f';
if (TLI->getLibFunc(FloatFuncName, Func))
@@ -1939,7 +1955,7 @@ bool LibCallSimplifier::hasFloatVersion(StringRef FuncName) {
Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
IRBuilder<> &Builder) {
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
// Check for string/memory library functions.
if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
@@ -1948,51 +1964,51 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
isCallingConvCCompatible(CI)) &&
"Optimizing string/memory libcall would change the calling convention");
switch (Func) {
- case LibFunc::strcat:
+ case LibFunc_strcat:
return optimizeStrCat(CI, Builder);
- case LibFunc::strncat:
+ case LibFunc_strncat:
return optimizeStrNCat(CI, Builder);
- case LibFunc::strchr:
+ case LibFunc_strchr:
return optimizeStrChr(CI, Builder);
- case LibFunc::strrchr:
+ case LibFunc_strrchr:
return optimizeStrRChr(CI, Builder);
- case LibFunc::strcmp:
+ case LibFunc_strcmp:
return optimizeStrCmp(CI, Builder);
- case LibFunc::strncmp:
+ case LibFunc_strncmp:
return optimizeStrNCmp(CI, Builder);
- case LibFunc::strcpy:
+ case LibFunc_strcpy:
return optimizeStrCpy(CI, Builder);
- case LibFunc::stpcpy:
+ case LibFunc_stpcpy:
return optimizeStpCpy(CI, Builder);
- case LibFunc::strncpy:
+ case LibFunc_strncpy:
return optimizeStrNCpy(CI, Builder);
- case LibFunc::strlen:
+ case LibFunc_strlen:
return optimizeStrLen(CI, Builder);
- case LibFunc::strpbrk:
+ case LibFunc_strpbrk:
return optimizeStrPBrk(CI, Builder);
- case LibFunc::strtol:
- case LibFunc::strtod:
- case LibFunc::strtof:
- case LibFunc::strtoul:
- case LibFunc::strtoll:
- case LibFunc::strtold:
- case LibFunc::strtoull:
+ case LibFunc_strtol:
+ case LibFunc_strtod:
+ case LibFunc_strtof:
+ case LibFunc_strtoul:
+ case LibFunc_strtoll:
+ case LibFunc_strtold:
+ case LibFunc_strtoull:
return optimizeStrTo(CI, Builder);
- case LibFunc::strspn:
+ case LibFunc_strspn:
return optimizeStrSpn(CI, Builder);
- case LibFunc::strcspn:
+ case LibFunc_strcspn:
return optimizeStrCSpn(CI, Builder);
- case LibFunc::strstr:
+ case LibFunc_strstr:
return optimizeStrStr(CI, Builder);
- case LibFunc::memchr:
+ case LibFunc_memchr:
return optimizeMemChr(CI, Builder);
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
return optimizeMemCmp(CI, Builder);
- case LibFunc::memcpy:
+ case LibFunc_memcpy:
return optimizeMemCpy(CI, Builder);
- case LibFunc::memmove:
+ case LibFunc_memmove:
return optimizeMemMove(CI, Builder);
- case LibFunc::memset:
+ case LibFunc_memset:
return optimizeMemSet(CI, Builder);
default:
break;
@@ -2005,7 +2021,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (CI->isNoBuiltin())
return nullptr;
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
StringRef FuncName = Callee->getName();
@@ -2029,8 +2045,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
return optimizePow(CI, Builder);
case Intrinsic::exp2:
return optimizeExp2(CI, Builder);
- case Intrinsic::fabs:
- return optimizeFabs(CI, Builder);
case Intrinsic::log:
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
@@ -2067,114 +2081,117 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
if (Value *V = optimizeStringMemoryLibCall(CI, Builder))
return V;
switch (Func) {
- case LibFunc::cosf:
- case LibFunc::cos:
- case LibFunc::cosl:
+ case LibFunc_cosf:
+ case LibFunc_cos:
+ case LibFunc_cosl:
return optimizeCos(CI, Builder);
- case LibFunc::sinpif:
- case LibFunc::sinpi:
- case LibFunc::cospif:
- case LibFunc::cospi:
+ case LibFunc_sinpif:
+ case LibFunc_sinpi:
+ case LibFunc_cospif:
+ case LibFunc_cospi:
return optimizeSinCosPi(CI, Builder);
- case LibFunc::powf:
- case LibFunc::pow:
- case LibFunc::powl:
+ case LibFunc_powf:
+ case LibFunc_pow:
+ case LibFunc_powl:
return optimizePow(CI, Builder);
- case LibFunc::exp2l:
- case LibFunc::exp2:
- case LibFunc::exp2f:
+ case LibFunc_exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
return optimizeExp2(CI, Builder);
- case LibFunc::fabsf:
- case LibFunc::fabs:
- case LibFunc::fabsl:
- return optimizeFabs(CI, Builder);
- case LibFunc::sqrtf:
- case LibFunc::sqrt:
- case LibFunc::sqrtl:
+ case LibFunc_fabsf:
+ case LibFunc_fabs:
+ case LibFunc_fabsl:
+ return replaceUnaryCall(CI, Builder, Intrinsic::fabs);
+ case LibFunc_sqrtf:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtl:
return optimizeSqrt(CI, Builder);
- case LibFunc::ffs:
- case LibFunc::ffsl:
- case LibFunc::ffsll:
+ case LibFunc_ffs:
+ case LibFunc_ffsl:
+ case LibFunc_ffsll:
return optimizeFFS(CI, Builder);
- case LibFunc::fls:
- case LibFunc::flsl:
- case LibFunc::flsll:
+ case LibFunc_fls:
+ case LibFunc_flsl:
+ case LibFunc_flsll:
return optimizeFls(CI, Builder);
- case LibFunc::abs:
- case LibFunc::labs:
- case LibFunc::llabs:
+ case LibFunc_abs:
+ case LibFunc_labs:
+ case LibFunc_llabs:
return optimizeAbs(CI, Builder);
- case LibFunc::isdigit:
+ case LibFunc_isdigit:
return optimizeIsDigit(CI, Builder);
- case LibFunc::isascii:
+ case LibFunc_isascii:
return optimizeIsAscii(CI, Builder);
- case LibFunc::toascii:
+ case LibFunc_toascii:
return optimizeToAscii(CI, Builder);
- case LibFunc::printf:
+ case LibFunc_printf:
return optimizePrintF(CI, Builder);
- case LibFunc::sprintf:
+ case LibFunc_sprintf:
return optimizeSPrintF(CI, Builder);
- case LibFunc::fprintf:
+ case LibFunc_fprintf:
return optimizeFPrintF(CI, Builder);
- case LibFunc::fwrite:
+ case LibFunc_fwrite:
return optimizeFWrite(CI, Builder);
- case LibFunc::fputs:
+ case LibFunc_fputs:
return optimizeFPuts(CI, Builder);
- case LibFunc::log:
- case LibFunc::log10:
- case LibFunc::log1p:
- case LibFunc::log2:
- case LibFunc::logb:
+ case LibFunc_log:
+ case LibFunc_log10:
+ case LibFunc_log1p:
+ case LibFunc_log2:
+ case LibFunc_logb:
return optimizeLog(CI, Builder);
- case LibFunc::puts:
+ case LibFunc_puts:
return optimizePuts(CI, Builder);
- case LibFunc::tan:
- case LibFunc::tanf:
- case LibFunc::tanl:
+ case LibFunc_tan:
+ case LibFunc_tanf:
+ case LibFunc_tanl:
return optimizeTan(CI, Builder);
- case LibFunc::perror:
+ case LibFunc_perror:
return optimizeErrorReporting(CI, Builder);
- case LibFunc::vfprintf:
- case LibFunc::fiprintf:
+ case LibFunc_vfprintf:
+ case LibFunc_fiprintf:
return optimizeErrorReporting(CI, Builder, 0);
- case LibFunc::fputc:
+ case LibFunc_fputc:
return optimizeErrorReporting(CI, Builder, 1);
- case LibFunc::ceil:
- case LibFunc::floor:
- case LibFunc::rint:
- case LibFunc::round:
- case LibFunc::nearbyint:
- case LibFunc::trunc:
- if (hasFloatVersion(FuncName))
- return optimizeUnaryDoubleFP(CI, Builder, false);
- return nullptr;
- case LibFunc::acos:
- case LibFunc::acosh:
- case LibFunc::asin:
- case LibFunc::asinh:
- case LibFunc::atan:
- case LibFunc::atanh:
- case LibFunc::cbrt:
- case LibFunc::cosh:
- case LibFunc::exp:
- case LibFunc::exp10:
- case LibFunc::expm1:
- case LibFunc::sin:
- case LibFunc::sinh:
- case LibFunc::tanh:
+ case LibFunc_ceil:
+ return replaceUnaryCall(CI, Builder, Intrinsic::ceil);
+ case LibFunc_floor:
+ return replaceUnaryCall(CI, Builder, Intrinsic::floor);
+ case LibFunc_round:
+ return replaceUnaryCall(CI, Builder, Intrinsic::round);
+ case LibFunc_nearbyint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::nearbyint);
+ case LibFunc_rint:
+ return replaceUnaryCall(CI, Builder, Intrinsic::rint);
+ case LibFunc_trunc:
+ return replaceUnaryCall(CI, Builder, Intrinsic::trunc);
+ case LibFunc_acos:
+ case LibFunc_acosh:
+ case LibFunc_asin:
+ case LibFunc_asinh:
+ case LibFunc_atan:
+ case LibFunc_atanh:
+ case LibFunc_cbrt:
+ case LibFunc_cosh:
+ case LibFunc_exp:
+ case LibFunc_exp10:
+ case LibFunc_expm1:
+ case LibFunc_sin:
+ case LibFunc_sinh:
+ case LibFunc_tanh:
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return optimizeUnaryDoubleFP(CI, Builder, true);
return nullptr;
- case LibFunc::copysign:
+ case LibFunc_copysign:
if (hasFloatVersion(FuncName))
return optimizeBinaryDoubleFP(CI, Builder);
return nullptr;
- case LibFunc::fminf:
- case LibFunc::fmin:
- case LibFunc::fminl:
- case LibFunc::fmaxf:
- case LibFunc::fmax:
- case LibFunc::fmaxl:
+ case LibFunc_fminf:
+ case LibFunc_fmin:
+ case LibFunc_fminl:
+ case LibFunc_fmaxf:
+ case LibFunc_fmax:
+ case LibFunc_fmaxl:
return optimizeFMinFMax(CI, Builder);
default:
return nullptr;
@@ -2211,16 +2228,10 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// * log(exp10(y)) -> y*log(10)
// * log(sqrt(x)) -> 0.5*log(x)
//
-// lround, lroundf, lroundl:
-// * lround(cnst) -> cnst'
-//
// pow, powf, powl:
// * pow(sqrt(x),y) -> pow(x,y*0.5)
// * pow(pow(x,y),z)-> pow(x,y*z)
//
-// round, roundf, roundl:
-// * round(cnst) -> cnst'
-//
// signbit:
// * signbit(cnst) -> cnst'
// * signbit(nncst) -> 0 (if pstv is a non-negative constant)
@@ -2230,10 +2241,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// * sqrt(Nroot(x)) -> pow(x,1/(2*N))
// * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
//
-// trunc, truncf, truncl:
-// * trunc(cnst) -> cnst'
-//
-//
//===----------------------------------------------------------------------===//
// Fortified Library Call Optimizations
@@ -2300,7 +2307,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
IRBuilder<> &B,
- LibFunc::Func Func) {
+ LibFunc Func) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
const DataLayout &DL = CI->getModule()->getDataLayout();
@@ -2308,7 +2315,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
*ObjSize = CI->getArgOperand(2);
// __stpcpy_chk(x,x,...) -> x+strlen(x)
- if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
+ if (Func == LibFunc_stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
Value *StrLen = emitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
@@ -2334,14 +2341,14 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
// If the function was an __stpcpy_chk, and we were able to fold it into
// a __memcpy_chk, we still need to return the correct end pointer.
- if (Ret && Func == LibFunc::stpcpy_chk)
+ if (Ret && Func == LibFunc_stpcpy_chk)
return B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(SizeTTy, Len - 1));
return Ret;
}
Value *FortifiedLibCallSimplifier::optimizeStrpNCpyChk(CallInst *CI,
IRBuilder<> &B,
- LibFunc::Func Func) {
+ LibFunc Func) {
Function *Callee = CI->getCalledFunction();
StringRef Name = Callee->getName();
if (isFortifiedCallFoldable(CI, 3, 2, false)) {
@@ -2366,7 +2373,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
//
// PR23093.
- LibFunc::Func Func;
+ LibFunc Func;
Function *Callee = CI->getCalledFunction();
SmallVector<OperandBundleDef, 2> OpBundles;
@@ -2384,17 +2391,17 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
return nullptr;
switch (Func) {
- case LibFunc::memcpy_chk:
+ case LibFunc_memcpy_chk:
return optimizeMemCpyChk(CI, Builder);
- case LibFunc::memmove_chk:
+ case LibFunc_memmove_chk:
return optimizeMemMoveChk(CI, Builder);
- case LibFunc::memset_chk:
+ case LibFunc_memset_chk:
return optimizeMemSetChk(CI, Builder);
- case LibFunc::stpcpy_chk:
- case LibFunc::strcpy_chk:
+ case LibFunc_stpcpy_chk:
+ case LibFunc_strcpy_chk:
return optimizeStrpCpyChk(CI, Builder, Func);
- case LibFunc::stpncpy_chk:
- case LibFunc::strncpy_chk:
+ case LibFunc_stpncpy_chk:
+ case LibFunc_strncpy_chk:
return optimizeStrpNCpyChk(CI, Builder, Func);
default:
break;
diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp
index 7b9de2eadc61..7106483c3bd2 100644
--- a/lib/Transforms/Utils/Utils.cpp
+++ b/lib/Transforms/Utils/Utils.cpp
@@ -35,9 +35,8 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
initializeUnifyFunctionExitNodesPass(Registry);
initializeInstSimplifierPass(Registry);
initializeMetaRenamerPass(Registry);
- initializeMemorySSAWrapperPassPass(Registry);
- initializeMemorySSAPrinterLegacyPassPass(Registry);
initializeStripGCRelocatesPass(Registry);
+ initializePredicateInfoPrinterLegacyPassPass(Registry);
}
/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
new file mode 100644
index 000000000000..4aeea02b1b1b
--- /dev/null
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -0,0 +1,482 @@
+#include "llvm/Transforms/Utils/VNCoercion.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "vncoerce"
+namespace llvm {
+namespace VNCoercion {
+
+/// Return true if coerceAvailableValueToLoadType will succeed.
+bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
+ const DataLayout &DL) {
+ // If the loaded or stored value is an first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+ return false;
+
+ // The store has to be at least as big as the load.
+ if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
+ return false;
+
+ return true;
+}
+
+template <class T, class HelperClass>
+static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy,
+ HelperClass &Helper,
+ const DataLayout &DL) {
+ assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
+ "precondition violation - materialization can't fail");
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ // If this is already the right type, just return it.
+ Type *StoredValTy = StoredVal->getType();
+
+ uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy);
+ uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy);
+
+ // If the store and reload are the same size, we can always reuse it.
+ if (StoredValSize == LoadedValSize) {
+ // Pointer to Pointer -> use bitcast.
+ if (StoredValTy->getScalarType()->isPointerTy() &&
+ LoadedTy->getScalarType()->isPointerTy()) {
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ } else {
+ // Convert source pointers to integers, which can be bitcast.
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ Type *TypeToCastTo = LoadedTy;
+ if (TypeToCastTo->getScalarType()->isPointerTy())
+ TypeToCastTo = DL.getIntPtrType(TypeToCastTo);
+
+ if (StoredValTy != TypeToCastTo)
+ StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);
+
+ // Cast to pointer if the load needs a pointer type.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ return StoredVal;
+ }
+ // If the loaded value is smaller than the available value, then we can
+ // extract out a piece from it. If the available value is too small, then we
+ // can't do anything.
+ assert(StoredValSize >= LoadedValSize &&
+ "canCoerceMustAliasedValueToLoad fail");
+
+ // Convert source pointers to integers, which can be manipulated.
+ if (StoredValTy->getScalarType()->isPointerTy()) {
+ StoredValTy = DL.getIntPtrType(StoredValTy);
+ StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
+ }
+
+ // Convert vectors and fp to integer, which can be manipulated.
+ if (!StoredValTy->isIntegerTy()) {
+ StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
+ StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy);
+ }
+
+ // If this is a big-endian system, we need to shift the value down to the low
+ // bits so that a truncate will work.
+ if (DL.isBigEndian()) {
+ uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy) -
+ DL.getTypeStoreSizeInBits(LoadedTy);
+ StoredVal = Helper.CreateLShr(
+ StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
+ }
+
+ // Truncate the integer to the right size now.
+ Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
+ StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy);
+
+ if (LoadedTy != NewIntTy) {
+ // If the result is a pointer, inttoptr.
+ if (LoadedTy->getScalarType()->isPointerTy())
+ StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
+ else
+ // Otherwise, bitcast.
+ StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
+ }
+
+ if (auto *C = dyn_cast<Constant>(StoredVal))
+ if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+ StoredVal = FoldedStoredVal;
+
+ return StoredVal;
+}
+
+/// If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value. LoadedTy is the type of the load we want to replace.
+/// IRB is IRBuilder used to insert new instructions.
+///
+/// If we can't do it, return null.
+Value *coerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
+ IRBuilder<> &IRB, const DataLayout &DL) {
+ return coerceAvailableValueToLoadTypeHelper(StoredVal, LoadedTy, IRB, DL);
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering memory write (store, memset, memcpy, memmove). This
+/// means that the write *may* provide bits used by the load but we can't be
+/// sure because the pointers don't must-alias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int analyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
+ const DataLayout &DL) {
+ // If the loaded or stored value is a first class array or struct, don't try
+ // to transform them. We need to be able to bitcast to integer.
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+ return -1;
+
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase =
+ GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
+ Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
+ if (StoreBase != LoadBase)
+ return -1;
+
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused.
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy);
+
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
+ return -1;
+ uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
+ LoadSize /= 8;
+
+ bool isAAFailure = false;
+ if (StoreOffset < LoadOffset)
+ isAAFailure = StoreOffset + int64_t(StoreSize) <= LoadOffset;
+ else
+ isAAFailure = LoadOffset + int64_t(LoadSize) <= StoreOffset;
+
+ if (isAAFailure)
+ return -1;
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset + StoreSize < LoadOffset + LoadSize)
+ return -1;
+
+ // Okay, we can do this transformation. Return the number of bytes into the
+ // store that the load is.
+ return LoadOffset - StoreOffset;
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI, const DataLayout &DL) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepSI->getValueOperand()->getType()->isStructTy() ||
+ DepSI->getValueOperand()->getType()->isArrayTy())
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize =
+ DL.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
+ DL);
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being clobbered by another load. See if
+/// the other load can feed into the second load.
+int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
+ const DataLayout &DL) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
+ return -1;
+
+ Value *DepPtr = DepLI->getPointerOperand();
+ uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType());
+ int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
+ if (R != -1)
+ return R;
+
+ // If we have a load/load clobber an DepLI can be widened to cover this load,
+ // then we should widen it!
+ int64_t LoadOffs = 0;
+ const Value *LoadBase =
+ GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+
+ unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
+ LoadBase, LoadOffs, LoadSize, DepLI);
+ if (Size == 0)
+ return -1;
+
+ // Check non-obvious conditions enforced by MDA which we rely on for being
+ // able to materialize this potentially available value
+ assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
+
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
+}
+
+int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI, const DataLayout &DL) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (!SizeCst)
+ return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (MI->getIntrinsicID() == Intrinsic::memset)
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (!Src)
+ return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, DL));
+ if (!GV || !GV->isConstant())
+ return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, DL);
+ if (Offset == -1)
+ return Offset;
+
+ unsigned AS = Src->getType()->getPointerAddressSpace();
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src =
+ ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+ if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
+ return Offset;
+ return -1;
+}
+
+template <class T, class HelperClass>
+static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy,
+ HelperClass &Helper,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ uint64_t StoreSize = (DL.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8;
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (SrcVal->getType()->getScalarType()->isPointerTy())
+ SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
+ if (!SrcVal->getType()->isIntegerTy())
+ SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (DL.isLittleEndian())
+ ShiftAmt = Offset * 8;
+ else
+ ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
+ if (ShiftAmt)
+ SrcVal = Helper.CreateLShr(SrcVal,
+ ConstantInt::get(SrcVal->getType(), ShiftAmt));
+
+ if (LoadSize != StoreSize)
+ SrcVal = Helper.CreateTruncOrBitCast(SrcVal,
+ IntegerType::get(Ctx, LoadSize * 8));
+ return SrcVal;
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering store. This means that the store provides bits used by
+/// the load but the pointers don't must-alias. Check this case to see if
+/// there is anything more we can do before we give up.
+Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+
+ IRBuilder<> Builder(InsertPt);
+ SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
+ return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, Builder, DL);
+}
+
+Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ ConstantFolder F;
+ SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, F, DL);
+ return coerceAvailableValueToLoadTypeHelper(SrcVal, LoadTy, F, DL);
+}
+
+/// This function is called when we have a memdep query of a load that ends up
+/// being a clobbering load. This means that the load *may* provide bits used
+/// by the load but we can't be sure because the pointers don't must-alias.
+/// Check this case to see if there is anything more we can do before we give
+/// up.
+Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+ // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
+ // widen SrcVal out to a larger load.
+ unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ if (Offset + LoadSize > SrcValStoreSize) {
+ assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
+ assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
+ // If we have a load/load clobber an DepLI can be widened to cover this
+ // load, then we should widen it to the next power of 2 size big enough!
+ unsigned NewLoadSize = Offset + LoadSize;
+ if (!isPowerOf2_32(NewLoadSize))
+ NewLoadSize = NextPowerOf2(NewLoadSize);
+
+ Value *PtrVal = SrcVal->getPointerOperand();
+ // Insert the new load after the old load. This ensures that subsequent
+ // memdep queries will find the new load. We can't easily remove the old
+ // load completely because it is already in the value numbering table.
+ IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
+ Type *DestPTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
+ DestPTy =
+ PointerType::get(DestPTy, PtrVal->getType()->getPointerAddressSpace());
+ Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
+ PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
+ LoadInst *NewLoad = Builder.CreateLoad(PtrVal);
+ NewLoad->takeName(SrcVal);
+ NewLoad->setAlignment(SrcVal->getAlignment());
+
+ DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
+ DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
+
+ // Replace uses of the original load with the wider load. On a big endian
+ // system, we need to shift down to get the relevant bits.
+ Value *RV = NewLoad;
+ if (DL.isBigEndian())
+ RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
+ RV = Builder.CreateTrunc(RV, SrcVal->getType());
+ SrcVal->replaceAllUsesWith(RV);
+
+ SrcVal = NewLoad;
+ }
+
+ return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
+}
+
+Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ unsigned SrcValStoreSize = DL.getTypeStoreSize(SrcVal->getType());
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy);
+ if (Offset + LoadSize > SrcValStoreSize)
+ return nullptr;
+ return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
+}
+
+template <class T, class HelperClass>
+T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, HelperClass &Helper,
+ const DataLayout &DL) {
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy) / 8;
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ T *Val = cast<T>(MSI->getValue());
+ if (LoadSize != 1)
+ Val =
+ Helper.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
+ T *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet * 2 <= LoadSize) {
+ T *ShVal = Helper.CreateShl(
+ Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
+ Val = Helper.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ T *ShVal = Helper.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
+ Val = Helper.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return coerceAvailableValueToLoadTypeHelper(Val, LoadTy, Helper, DL);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+ unsigned AS = Src->getType()->getPointerAddressSpace();
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src =
+ ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
+ OffsetCst);
+ Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
+ return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
+}
+
+/// This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, Instruction *InsertPt,
+ const DataLayout &DL) {
+ IRBuilder<> Builder(InsertPt);
+ return getMemInstValueForLoadHelper<Value, IRBuilder<>>(SrcInst, Offset,
+ LoadTy, Builder, DL);
+}
+
+Constant *getConstantMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+ // The only case analyzeLoadFromClobberingMemInst cannot be converted to a
+ // constant is when it's a memset of a non-constant.
+ if (auto *MSI = dyn_cast<MemSetInst>(SrcInst))
+ if (!isa<Constant>(MSI->getValue()))
+ return nullptr;
+ ConstantFolder F;
+ return getMemInstValueForLoadHelper<Constant, ConstantFolder>(SrcInst, Offset,
+ LoadTy, F, DL);
+}
+} // namespace VNCoercion
+} // namespace llvm
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 0e9baaf8649d..f77c10b6dd47 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -681,6 +681,7 @@ void MDNodeMapper::mapNodesInPOT(UniquedGraph &G) {
remapOperands(*ClonedN, [this, &D, &G](Metadata *Old) {
if (Optional<Metadata *> MappedOp = getMappedOp(Old))
return *MappedOp;
+ (void)D;
assert(G.Info[Old].ID > D.ID && "Expected a forward reference");
return &G.getFwdReference(*cast<MDNode>(Old));
});
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index c01740b27d59..c83b3f7b225b 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -494,13 +494,13 @@ namespace {
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
// For stores, it is the value type, not the pointer type that matters
// because the value is what will come from a vector register.
-
+
Value *IVal = SI->getValueOperand();
T1 = IVal->getType();
} else {
T1 = I->getType();
}
-
+
if (CastInst *CI = dyn_cast<CastInst>(I))
T2 = CI->getSrcTy();
else
@@ -547,10 +547,11 @@ namespace {
// Returns the cost of the provided instruction using TTI.
// This does not handle loads and stores.
unsigned getInstrCost(unsigned Opcode, Type *T1, Type *T2,
- TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue,
TargetTransformInfo::OperandValueKind Op2VK =
- TargetTransformInfo::OK_AnyValue) {
+ TargetTransformInfo::OK_AnyValue,
+ const Instruction *I = nullptr) {
switch (Opcode) {
default: break;
case Instruction::GetElementPtr:
@@ -584,7 +585,7 @@ namespace {
case Instruction::Select:
case Instruction::ICmp:
case Instruction::FCmp:
- return TTI->getCmpSelInstrCost(Opcode, T1, T2);
+ return TTI->getCmpSelInstrCost(Opcode, T1, T2, I);
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
@@ -598,7 +599,7 @@ namespace {
case Instruction::FPTrunc:
case Instruction::BitCast:
case Instruction::ShuffleVector:
- return TTI->getCastInstrCost(Opcode, T1, T2);
+ return TTI->getCastInstrCost(Opcode, T1, T2, I);
}
return 1;
@@ -894,7 +895,7 @@ namespace {
// vectors that has a scalar condition results in a malformed select.
// FIXME: We could probably be smarter about this by rewriting the select
// with different types instead.
- return (SI->getCondition()->getType()->isVectorTy() ==
+ return (SI->getCondition()->getType()->isVectorTy() ==
SI->getTrueValue()->getType()->isVectorTy());
} else if (isa<CmpInst>(I)) {
if (!Config.VectorizeCmp)
@@ -1044,14 +1045,14 @@ namespace {
return false;
}
} else if (TTI) {
- unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2);
- unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2);
- Type *VT1 = getVecTypeForPair(IT1, JT1),
- *VT2 = getVecTypeForPair(IT2, JT2);
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_AnyValue;
+ unsigned ICost = getInstrCost(I->getOpcode(), IT1, IT2, Op1VK, Op2VK, I);
+ unsigned JCost = getInstrCost(J->getOpcode(), JT1, JT2, Op1VK, Op2VK, J);
+ Type *VT1 = getVecTypeForPair(IT1, JT1),
+ *VT2 = getVecTypeForPair(IT2, JT2);
// On some targets (example X86) the cost of a vector shift may vary
// depending on whether the second operand is a Uniform or
@@ -1090,7 +1091,7 @@ namespace {
// but this cost is ignored (because insert and extract element
// instructions are assigned a zero depth factor and are not really
// fused in general).
- unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK);
+ unsigned VCost = getInstrCost(I->getOpcode(), VT1, VT2, Op1VK, Op2VK, I);
if (VCost > ICost + JCost)
return false;
@@ -1127,39 +1128,51 @@ namespace {
FastMathFlags FMFCI;
if (auto *FPMOCI = dyn_cast<FPMathOperator>(CI))
FMFCI = FPMOCI->getFastMathFlags();
+ SmallVector<Value *, 4> IArgs(CI->arg_operands());
+ unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, IArgs, FMFCI);
- SmallVector<Type*, 4> Tys;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(CI->getArgOperand(i)->getType());
- unsigned ICost = TTI->getIntrinsicInstrCost(IID, IT1, Tys, FMFCI);
-
- Tys.clear();
CallInst *CJ = cast<CallInst>(J);
FastMathFlags FMFCJ;
if (auto *FPMOCJ = dyn_cast<FPMathOperator>(CJ))
FMFCJ = FPMOCJ->getFastMathFlags();
- for (unsigned i = 0, ie = CJ->getNumArgOperands(); i != ie; ++i)
- Tys.push_back(CJ->getArgOperand(i)->getType());
- unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, Tys, FMFCJ);
+ SmallVector<Value *, 4> JArgs(CJ->arg_operands());
+ unsigned JCost = TTI->getIntrinsicInstrCost(IID, JT1, JArgs, FMFCJ);
- Tys.clear();
assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
"Intrinsic argument counts differ");
+ SmallVector<Type*, 4> Tys;
+ SmallVector<Value *, 4> VecArgs;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
- IID == Intrinsic::cttz) && i == 1)
+ IID == Intrinsic::cttz) && i == 1) {
Tys.push_back(CI->getArgOperand(i)->getType());
- else
+ VecArgs.push_back(CI->getArgOperand(i));
+ }
+ else {
Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
CJ->getArgOperand(i)->getType()));
+ // Add both operands, and then count their scalarization overhead
+ // with VF 1.
+ VecArgs.push_back(CI->getArgOperand(i));
+ VecArgs.push_back(CJ->getArgOperand(i));
+ }
}
+ // Compute the scalarization cost here with the original operands (to
+ // check for uniqueness etc), and then call getIntrinsicInstrCost()
+ // with the constructed vector types.
+ Type *RetTy = getVecTypeForPair(IT1, JT1);
+ unsigned ScalarizationCost = 0;
+ if (!RetTy->isVoidTy())
+ ScalarizationCost += TTI->getScalarizationOverhead(RetTy, true, false);
+ ScalarizationCost += TTI->getOperandsScalarizationOverhead(VecArgs, 1);
+
FastMathFlags FMFV = FMFCI;
FMFV &= FMFCJ;
- Type *RetTy = getVecTypeForPair(IT1, JT1);
- unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV);
+ unsigned VCost = TTI->getIntrinsicInstrCost(IID, RetTy, Tys, FMFV,
+ ScalarizationCost);
if (VCost > ICost + JCost)
return false;
@@ -2502,7 +2515,7 @@ namespace {
if (I2 == I1 || isa<UndefValue>(I2))
I2 = nullptr;
}
-
+
if (HEE) {
Value *I3 = HEE->getOperand(0);
if (!I2 && I3 != I1)
@@ -2693,14 +2706,14 @@ namespace {
// so extend the smaller vector to be the same length as the larger one.
Instruction *NLOp;
if (numElemL > 1) {
-
+
std::vector<Constant *> Mask(numElemH);
unsigned v = 0;
for (; v < numElemL; ++v)
Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
for (; v < numElemH; ++v)
Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
+
NLOp = new ShuffleVectorInst(LOp, UndefValue::get(ArgTypeL),
ConstantVector::get(Mask),
getReplacementName(IBeforeJ ? I : J,
@@ -2710,7 +2723,7 @@ namespace {
getReplacementName(IBeforeJ ? I : J,
true, o, 1));
}
-
+
NLOp->insertBefore(IBeforeJ ? J : I);
LOp = NLOp;
}
@@ -2720,7 +2733,7 @@ namespace {
if (numElemH == 1 && expandIEChain(Context, I, J, o, LOp, numElemL,
ArgTypeH, VArgType, IBeforeJ)) {
Instruction *S =
- InsertElementInst::Create(LOp, HOp,
+ InsertElementInst::Create(LOp, HOp,
ConstantInt::get(Type::getInt32Ty(Context),
numElemL),
getReplacementName(IBeforeJ ? I : J,
@@ -2737,7 +2750,7 @@ namespace {
Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
for (; v < numElemL; ++v)
Mask[v] = UndefValue::get(Type::getInt32Ty(Context));
-
+
NHOp = new ShuffleVectorInst(HOp, UndefValue::get(ArgTypeH),
ConstantVector::get(Mask),
getReplacementName(IBeforeJ ? I : J,
diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index c44a393cf846..4409d7a404f8 100644
--- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -432,9 +432,12 @@ Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
unsigned ElementSizeBytes = ElementSizeBits / 8;
unsigned SizeBytes = ElementSizeBytes * Chain.size();
unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
- if (NumLeft == Chain.size())
- --NumLeft;
- else if (NumLeft == 0)
+ if (NumLeft == Chain.size()) {
+ if ((NumLeft & 1) == 0)
+ NumLeft /= 2; // Split even in half
+ else
+ --NumLeft; // Split off last element
+ } else if (NumLeft == 0)
NumLeft = 1;
return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
}
@@ -588,7 +591,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
continue;
// Make sure all the users of a vector are constant-index extracts.
- if (isa<VectorType>(Ty) && !all_of(LI->users(), [LI](const User *U) {
+ if (isa<VectorType>(Ty) && !all_of(LI->users(), [](const User *U) {
const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
return EEI && isa<ConstantInt>(EEI->getOperand(1));
}))
@@ -622,7 +625,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
if (TySize > VecRegSize / 2)
continue;
- if (isa<VectorType>(Ty) && !all_of(SI->users(), [SI](const User *U) {
+ if (isa<VectorType>(Ty) && !all_of(SI->users(), [](const User *U) {
const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
return EEI && isa<ConstantInt>(EEI->getOperand(1));
}))
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index dac7032fa08f..595b2ec88943 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -50,6 +50,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -92,6 +93,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/Transforms/Vectorize.h"
@@ -266,21 +268,6 @@ static bool hasCyclesInLoopBody(const Loop &L) {
return false;
}
-/// \brief This modifies LoopAccessReport to initialize message with
-/// loop-vectorizer-specific part.
-class VectorizationReport : public LoopAccessReport {
-public:
- VectorizationReport(Instruction *I = nullptr)
- : LoopAccessReport("loop not vectorized: ", I) {}
-
- /// \brief This allows promotion of the loop-access analysis report into the
- /// loop-vectorizer report. It modifies the message to add the
- /// loop-vectorizer-specific part of the message.
- explicit VectorizationReport(const LoopAccessReport &R)
- : LoopAccessReport(Twine("loop not vectorized: ") + R.str(),
- R.getInstr()) {}
-};
-
/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
@@ -290,31 +277,9 @@ static Type *ToVectorTy(Type *Scalar, unsigned VF) {
return VectorType::get(Scalar, VF);
}
-/// A helper function that returns GEP instruction and knows to skip a
-/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination
-/// pointee types of the 'bitcast' have the same size.
-/// For example:
-/// bitcast double** %var to i64* - can be skipped
-/// bitcast double** %var to i8* - can not
-static GetElementPtrInst *getGEPInstruction(Value *Ptr) {
-
- if (isa<GetElementPtrInst>(Ptr))
- return cast<GetElementPtrInst>(Ptr);
-
- if (isa<BitCastInst>(Ptr) &&
- isa<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0))) {
- Type *BitcastTy = Ptr->getType();
- Type *GEPTy = cast<BitCastInst>(Ptr)->getSrcTy();
- if (!isa<PointerType>(BitcastTy) || !isa<PointerType>(GEPTy))
- return nullptr;
- Type *Pointee1Ty = cast<PointerType>(BitcastTy)->getPointerElementType();
- Type *Pointee2Ty = cast<PointerType>(GEPTy)->getPointerElementType();
- const DataLayout &DL = cast<BitCastInst>(Ptr)->getModule()->getDataLayout();
- if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty))
- return cast<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0));
- }
- return nullptr;
-}
+// FIXME: The following helper functions have multiple implementations
+// in the project. They can be effectively organized in a common Load/Store
+// utilities unit.
/// A helper function that returns the pointer operand of a load or store
/// instruction.
@@ -326,6 +291,34 @@ static Value *getPointerOperand(Value *I) {
return nullptr;
}
+/// A helper function that returns the type of loaded or stored value.
+static Type *getMemInstValueType(Value *I) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected Load or Store instruction");
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return LI->getType();
+ return cast<StoreInst>(I)->getValueOperand()->getType();
+}
+
+/// A helper function that returns the alignment of load or store instruction.
+static unsigned getMemInstAlignment(Value *I) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected Load or Store instruction");
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return LI->getAlignment();
+ return cast<StoreInst>(I)->getAlignment();
+}
+
+/// A helper function that returns the address space of the pointer operand of
+/// load or store instruction.
+static unsigned getMemInstAddressSpace(Value *I) {
+ assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ "Expected Load or Store instruction");
+ if (auto *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerAddressSpace();
+ return cast<StoreInst>(I)->getPointerAddressSpace();
+}
+
/// A helper function that returns true if the given type is irregular. The
/// type is irregular if its allocated size doesn't equal the store size of an
/// element of the corresponding vector type at the given vectorization factor.
@@ -351,6 +344,23 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
/// we always assume predicated blocks have a 50% chance of executing.
static unsigned getReciprocalPredBlockProb() { return 2; }
+/// A helper function that adds a 'fast' flag to floating-point operations.
+static Value *addFastMathFlag(Value *V) {
+ if (isa<FPMathOperator>(V)) {
+ FastMathFlags Flags;
+ Flags.setUnsafeAlgebra();
+ cast<Instruction>(V)->setFastMathFlags(Flags);
+ }
+ return V;
+}
+
+/// A helper function that returns an integer or floating-point constant with
+/// value C.
+static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
+ return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C)
+ : ConstantFP::get(Ty, C);
+}
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -428,10 +438,17 @@ protected:
/// Copy and widen the instructions from the old loop.
virtual void vectorizeLoop();
+ /// Handle all cross-iteration phis in the header.
+ void fixCrossIterationPHIs();
+
/// Fix a first-order recurrence. This is the second phase of vectorizing
/// this phi node.
void fixFirstOrderRecurrence(PHINode *Phi);
+ /// Fix a reduction cross-iteration phi. This is the second phase of
+ /// vectorizing this phi node.
+ void fixReduction(PHINode *Phi);
+
/// \brief The Loop exit block may have single value PHI nodes where the
/// incoming value is 'Undef'. While vectorizing we only handled real values
/// that were defined inside the loop. Here we fix the 'undef case'.
@@ -448,7 +465,8 @@ protected:
/// Collect the instructions from the original loop that would be trivially
/// dead in the vectorized loop if generated.
- void collectTriviallyDeadInstructions();
+ void collectTriviallyDeadInstructions(
+ SmallPtrSetImpl<Instruction *> &DeadInstructions);
/// Shrinks vector element sizes to the smallest bitwidth they can be legally
/// represented as.
@@ -462,14 +480,14 @@ protected:
/// and DST.
VectorParts createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
- /// A helper function to vectorize a single BB within the innermost loop.
- void vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV);
+ /// A helper function to vectorize a single instruction within the innermost
+ /// loop.
+ void vectorizeInstruction(Instruction &I);
/// Vectorize a single PHINode in a block. This method handles the induction
/// variable canonicalization. It supports both VF = 1 for unrolled loops and
/// arbitrary length vectors.
- void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF,
- PhiVector *PV);
+ void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF);
/// Insert the new loop to the loop hierarchy and pass manager
/// and update the analysis passes.
@@ -504,20 +522,21 @@ protected:
/// \p EntryVal is the value from the original loop that maps to the steps.
/// Note that \p EntryVal doesn't have to be an induction variable (e.g., it
/// can be a truncate instruction).
- void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal);
-
- /// Create a vector induction phi node based on an existing scalar one. This
- /// currently only works for integer induction variables with a constant
- /// step. \p EntryVal is the value from the original loop that maps to the
- /// vector phi node. If \p EntryVal is a truncate instruction, instead of
- /// widening the original IV, we widen a version of the IV truncated to \p
- /// EntryVal's type.
- void createVectorIntInductionPHI(const InductionDescriptor &II,
- Instruction *EntryVal);
-
- /// Widen an integer induction variable \p IV. If \p Trunc is provided, the
- /// induction variable will first be truncated to the corresponding type.
- void widenIntInduction(PHINode *IV, TruncInst *Trunc = nullptr);
+ void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal,
+ const InductionDescriptor &ID);
+
+ /// Create a vector induction phi node based on an existing scalar one. \p
+ /// EntryVal is the value from the original loop that maps to the vector phi
+ /// node, and \p Step is the loop-invariant step. If \p EntryVal is a
+ /// truncate instruction, instead of widening the original IV, we widen a
+ /// version of the IV truncated to \p EntryVal's type.
+ void createVectorIntOrFpInductionPHI(const InductionDescriptor &II,
+ Value *Step, Instruction *EntryVal);
+
+ /// Widen an integer or floating-point induction variable \p IV. If \p Trunc
+ /// is provided, the integer induction variable will first be truncated to
+ /// the corresponding type.
+ void widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc = nullptr);
/// Returns true if an instruction \p I should be scalarized instead of
/// vectorized for the chosen vectorization factor.
@@ -583,6 +602,10 @@ protected:
/// vector of instructions.
void addMetadata(ArrayRef<Value *> To, Instruction *From);
+ /// \brief Set the debug location in the builder using the debug location in
+ /// the instruction.
+ void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
+
/// This is a helper class for maintaining vectorization state. It's used for
/// mapping values from the original loop to their corresponding values in
/// the new loop. Two mappings are maintained: one for vectorized values and
@@ -777,14 +800,6 @@ protected:
// Record whether runtime checks are added.
bool AddedSafetyChecks;
- // Holds instructions from the original loop whose counterparts in the
- // vectorized loop would be trivially dead if generated. For example,
- // original induction update instructions can become dead because we
- // separately emit induction "steps" when generating code for the new loop.
- // Similarly, we create a new latch condition when setting up the structure
- // of the new loop, so the old one can become dead.
- SmallPtrSet<Instruction *, 4> DeadInstructions;
-
// Holds the end values for each induction variable. We save the end values
// so we can later fix-up the external users of the induction variables.
DenseMap<PHINode *, Value *> IVEndValues;
@@ -803,8 +818,6 @@ public:
UnrollFactor, LVL, CM) {}
private:
- void scalarizeInstruction(Instruction *Instr,
- bool IfPredicateInstr = false) override;
void vectorizeMemoryInstruction(Instruction *Instr) override;
Value *getBroadcastInstrs(Value *V) override;
Value *getStepVector(Value *Val, int StartIdx, Value *Step,
@@ -832,12 +845,14 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
return I;
}
-/// \brief Set the debug location in the builder using the debug location in the
-/// instruction.
-static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
- if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr))
- B.SetCurrentDebugLocation(Inst->getDebugLoc());
- else
+void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
+ if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr)) {
+ const DILocation *DIL = Inst->getDebugLoc();
+ if (DIL && Inst->getFunction()->isDebugInfoForProfiling())
+ B.SetCurrentDebugLocation(DIL->cloneWithDuplicationFactor(UF * VF));
+ else
+ B.SetCurrentDebugLocation(DIL);
+ } else
B.SetCurrentDebugLocation(DebugLoc());
}
@@ -1497,14 +1512,6 @@ private:
OptimizationRemarkEmitter &ORE;
};
-static void emitAnalysisDiag(const Loop *TheLoop,
- const LoopVectorizeHints &Hints,
- OptimizationRemarkEmitter &ORE,
- const LoopAccessReport &Message) {
- const char *Name = Hints.vectorizeAnalysisPassName();
- LoopAccessReport::emitAnalysis(Message, TheLoop, Name, ORE);
-}
-
static void emitMissedWarning(Function *F, Loop *L,
const LoopVectorizeHints &LH,
OptimizationRemarkEmitter *ORE) {
@@ -1512,13 +1519,17 @@ static void emitMissedWarning(Function *F, Loop *L,
if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
if (LH.getWidth() != 1)
- emitLoopVectorizeWarning(
- F->getContext(), *F, L->getStartLoc(),
- "failed explicitly specified loop vectorization");
+ ORE->emit(DiagnosticInfoOptimizationFailure(
+ DEBUG_TYPE, "FailedRequestedVectorization",
+ L->getStartLoc(), L->getHeader())
+ << "loop not vectorized: "
+ << "failed explicitly specified loop vectorization");
else if (LH.getInterleave() != 1)
- emitLoopInterleaveWarning(
- F->getContext(), *F, L->getStartLoc(),
- "failed explicitly specified loop interleaving");
+ ORE->emit(DiagnosticInfoOptimizationFailure(
+ DEBUG_TYPE, "FailedRequestedInterleaving", L->getStartLoc(),
+ L->getHeader())
+ << "loop not interleaved: "
+ << "failed explicitly specified loop interleaving");
}
}
@@ -1546,7 +1557,7 @@ public:
LoopVectorizeHints *H)
: NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT),
GetLAA(GetLAA), LAI(nullptr), ORE(ORE), InterleaveInfo(PSE, L, DT, LI),
- Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+ PrimaryInduction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
Requirements(R), Hints(H) {}
/// ReductionList contains the reduction descriptors for all
@@ -1566,8 +1577,8 @@ public:
/// loop, only that it is legal to do so.
bool canVectorize();
- /// Returns the Induction variable.
- PHINode *getInduction() { return Induction; }
+ /// Returns the primary induction variable.
+ PHINode *getPrimaryInduction() { return PrimaryInduction; }
/// Returns the reduction variables found in the loop.
ReductionList *getReductionVars() { return &Reductions; }
@@ -1607,12 +1618,6 @@ public:
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
- /// Returns true if \p I is known to be uniform after vectorization.
- bool isUniformAfterVectorization(Instruction *I) { return Uniforms.count(I); }
-
- /// Returns true if \p I is known to be scalar after vectorization.
- bool isScalarAfterVectorization(Instruction *I) { return Scalars.count(I); }
-
/// Returns the information that we collected about runtime memory check.
const RuntimePointerChecking *getRuntimePointerChecking() const {
return LAI->getRuntimePointerChecking();
@@ -1689,15 +1694,9 @@ public:
/// instructions that may divide by zero.
bool isScalarWithPredication(Instruction *I);
- /// Returns true if \p I is a memory instruction that has a consecutive or
- /// consecutive-like pointer operand. Consecutive-like pointers are pointers
- /// that are treated like consecutive pointers during vectorization. The
- /// pointer operands of interleaved accesses are an example.
- bool hasConsecutiveLikePtrOperand(Instruction *I);
-
- /// Returns true if \p I is a memory instruction that must be scalarized
- /// during vectorization.
- bool memoryInstructionMustBeScalarized(Instruction *I, unsigned VF = 1);
+ /// Returns true if \p I is a memory instruction with consecutive memory
+ /// access that can be widened.
+ bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
private:
/// Check if a single basic block loop is vectorizable.
@@ -1715,24 +1714,6 @@ private:
/// transformation.
bool canVectorizeWithIfConvert();
- /// Collect the instructions that are uniform after vectorization. An
- /// instruction is uniform if we represent it with a single scalar value in
- /// the vectorized loop corresponding to each vector iteration. Examples of
- /// uniform instructions include pointer operands of consecutive or
- /// interleaved memory accesses. Note that although uniformity implies an
- /// instruction will be scalar, the reverse is not true. In general, a
- /// scalarized instruction will be represented by VF scalar values in the
- /// vectorized loop, each corresponding to an iteration of the original
- /// scalar loop.
- void collectLoopUniforms();
-
- /// Collect the instructions that are scalar after vectorization. An
- /// instruction is scalar if it is known to be uniform or will be scalarized
- /// during vectorization. Non-uniform scalarized instructions will be
- /// represented by VF values in the vectorized loop, each corresponding to an
- /// iteration of the original scalar loop.
- void collectLoopScalars();
-
/// Return true if all of the instructions in the block can be speculatively
/// executed. \p SafePtrs is a list of addresses that are known to be legal
/// and we know that we can read from them without segfault.
@@ -1744,14 +1725,6 @@ private:
void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
SmallPtrSetImpl<Value *> &AllowedExit);
- /// Report an analysis message to assist the user in diagnosing loops that are
- /// not vectorized. These are handled as LoopAccessReport rather than
- /// VectorizationReport because the << operator of VectorizationReport returns
- /// LoopAccessReport.
- void emitAnalysis(const LoopAccessReport &Message) const {
- emitAnalysisDiag(TheLoop, *Hints, *ORE, Message);
- }
-
/// Create an analysis remark that explains why vectorization failed
///
/// \p RemarkName is the identifier for the remark. If \p I is passed it is
@@ -1804,9 +1777,9 @@ private:
// --- vectorization state --- //
- /// Holds the integer induction variable. This is the counter of the
+ /// Holds the primary induction variable. This is the counter of the
/// loop.
- PHINode *Induction;
+ PHINode *PrimaryInduction;
/// Holds the reduction variables.
ReductionList Reductions;
/// Holds all of the induction variables that we found in the loop.
@@ -1822,12 +1795,6 @@ private:
/// vars which can be accessed from outside the loop.
SmallPtrSet<Value *, 4> AllowedExit;
- /// Holds the instructions known to be uniform after vectorization.
- SmallPtrSet<Instruction *, 4> Uniforms;
-
- /// Holds the instructions known to be scalar after vectorization.
- SmallPtrSet<Instruction *, 4> Scalars;
-
/// Can we assume the absence of NaNs.
bool HasFunNoNaNAttr;
@@ -1861,16 +1828,26 @@ public:
: TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {}
+ /// \return An upper bound for the vectorization factor, or None if
+ /// vectorization should be avoided up front.
+ Optional<unsigned> computeMaxVF(bool OptForSize);
+
/// Information about vectorization costs
struct VectorizationFactor {
unsigned Width; // Vector width with best cost
unsigned Cost; // Cost of the loop with that width
};
/// \return The most profitable vectorization factor and the cost of that VF.
- /// This method checks every power of two up to VF. If UserVF is not ZERO
+ /// This method checks every power of two up to MaxVF. If UserVF is not ZERO
/// then this vectorization factor will be selected if vectorization is
/// possible.
- VectorizationFactor selectVectorizationFactor(bool OptForSize);
+ VectorizationFactor selectVectorizationFactor(unsigned MaxVF);
+
+ /// Setup cost-based decisions for user vectorization factor.
+ void selectUserVectorizationFactor(unsigned UserVF) {
+ collectUniformsAndScalars(UserVF);
+ collectInstsToScalarize(UserVF);
+ }
/// \return The size (in bits) of the smallest and widest types in the code
/// that needs to be vectorized. We ignore values that remain scalar such as
@@ -1884,6 +1861,15 @@ public:
unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
unsigned LoopCost);
+ /// Memory access instruction may be vectorized in more than one way.
+ /// Form of instruction after vectorization depends on cost.
+ /// This function takes cost-based decisions for Load/Store instructions
+ /// and collects them in a map. This decisions map is used for building
+ /// the lists of loop-uniform and loop-scalar instructions.
+ /// The calculated cost is saved with widening decision in order to
+ /// avoid redundant calculations.
+ void setCostBasedWideningDecision(unsigned VF);
+
/// \brief A struct that represents some properties of the register usage
/// of a loop.
struct RegisterUsage {
@@ -1918,14 +1904,118 @@ public:
return Scalars->second.count(I);
}
+ /// Returns true if \p I is known to be uniform after vectorization.
+ bool isUniformAfterVectorization(Instruction *I, unsigned VF) const {
+ if (VF == 1)
+ return true;
+ assert(Uniforms.count(VF) && "VF not yet analyzed for uniformity");
+ auto UniformsPerVF = Uniforms.find(VF);
+ return UniformsPerVF->second.count(I);
+ }
+
+ /// Returns true if \p I is known to be scalar after vectorization.
+ bool isScalarAfterVectorization(Instruction *I, unsigned VF) const {
+ if (VF == 1)
+ return true;
+ assert(Scalars.count(VF) && "Scalar values are not calculated for VF");
+ auto ScalarsPerVF = Scalars.find(VF);
+ return ScalarsPerVF->second.count(I);
+ }
+
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
/// for vectorization factor \p VF.
bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
- !Legal->isScalarAfterVectorization(I);
+ !isScalarAfterVectorization(I, VF);
+ }
+
+ /// Decision that was taken during cost calculation for memory instruction.
+ enum InstWidening {
+ CM_Unknown,
+ CM_Widen,
+ CM_Interleave,
+ CM_GatherScatter,
+ CM_Scalarize
+ };
+
+ /// Save vectorization decision \p W and \p Cost taken by the cost model for
+ /// instruction \p I and vector width \p VF.
+ void setWideningDecision(Instruction *I, unsigned VF, InstWidening W,
+ unsigned Cost) {
+ assert(VF >= 2 && "Expected VF >=2");
+ WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
+ }
+
+ /// Save vectorization decision \p W and \p Cost taken by the cost model for
+ /// interleaving group \p Grp and vector width \p VF.
+ void setWideningDecision(const InterleaveGroup *Grp, unsigned VF,
+ InstWidening W, unsigned Cost) {
+ assert(VF >= 2 && "Expected VF >=2");
+ /// Broadcast this decicion to all instructions inside the group.
+ /// But the cost will be assigned to one instruction only.
+ for (unsigned i = 0; i < Grp->getFactor(); ++i) {
+ if (auto *I = Grp->getMember(i)) {
+ if (Grp->getInsertPos() == I)
+ WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
+ else
+ WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, 0);
+ }
+ }
+ }
+
+ /// Return the cost model decision for the given instruction \p I and vector
+ /// width \p VF. Return CM_Unknown if this instruction did not pass
+ /// through the cost modeling.
+ InstWidening getWideningDecision(Instruction *I, unsigned VF) {
+ assert(VF >= 2 && "Expected VF >=2");
+ std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
+ auto Itr = WideningDecisions.find(InstOnVF);
+ if (Itr == WideningDecisions.end())
+ return CM_Unknown;
+ return Itr->second.first;
+ }
+
+ /// Return the vectorization cost for the given instruction \p I and vector
+ /// width \p VF.
+ unsigned getWideningCost(Instruction *I, unsigned VF) {
+ assert(VF >= 2 && "Expected VF >=2");
+ std::pair<Instruction *, unsigned> InstOnVF = std::make_pair(I, VF);
+ assert(WideningDecisions.count(InstOnVF) && "The cost is not calculated");
+ return WideningDecisions[InstOnVF].second;
+ }
+
+ /// Return True if instruction \p I is an optimizable truncate whose operand
+ /// is an induction variable. Such a truncate will be removed by adding a new
+ /// induction variable with the destination type.
+ bool isOptimizableIVTruncate(Instruction *I, unsigned VF) {
+
+ // If the instruction is not a truncate, return false.
+ auto *Trunc = dyn_cast<TruncInst>(I);
+ if (!Trunc)
+ return false;
+
+ // Get the source and destination types of the truncate.
+ Type *SrcTy = ToVectorTy(cast<CastInst>(I)->getSrcTy(), VF);
+ Type *DestTy = ToVectorTy(cast<CastInst>(I)->getDestTy(), VF);
+
+ // If the truncate is free for the given types, return false. Replacing a
+ // free truncate with an induction variable would add an induction variable
+ // update instruction to each iteration of the loop. We exclude from this
+ // check the primary induction variable since it will need an update
+ // instruction regardless.
+ Value *Op = Trunc->getOperand(0);
+ if (Op != Legal->getPrimaryInduction() && TTI.isTruncateFree(SrcTy, DestTy))
+ return false;
+
+ // If the truncated value is not an induction variable, return false.
+ return Legal->isInductionVariable(Op);
}
private:
+ /// \return An upper bound for the vectorization factor, larger than zero.
+ /// One is returned if vectorization should best be avoided due to cost.
+ unsigned computeFeasibleMaxVF(bool OptForSize);
+
/// The vectorization cost is a combination of the cost itself and a boolean
/// indicating whether any of the contributing operations will actually
/// operate on
@@ -1949,6 +2039,26 @@ private:
/// the vector type as an output parameter.
unsigned getInstructionCost(Instruction *I, unsigned VF, Type *&VectorTy);
+ /// Calculate vectorization cost of memory instruction \p I.
+ unsigned getMemoryInstructionCost(Instruction *I, unsigned VF);
+
+ /// The cost computation for scalarized memory instruction.
+ unsigned getMemInstScalarizationCost(Instruction *I, unsigned VF);
+
+ /// The cost computation for interleaving group of memory instructions.
+ unsigned getInterleaveGroupCost(Instruction *I, unsigned VF);
+
+ /// The cost computation for Gather/Scatter instruction.
+ unsigned getGatherScatterCost(Instruction *I, unsigned VF);
+
+ /// The cost computation for widening instruction \p I with consecutive
+ /// memory access.
+ unsigned getConsecutiveMemOpCost(Instruction *I, unsigned VF);
+
+ /// The cost calculation for Load instruction \p I with uniform pointer -
+ /// scalar load + broadcast.
+ unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
+
/// Returns whether the instruction is a load or store and will be a emitted
/// as a vector operation.
bool isConsecutiveLoadOrStore(Instruction *I);
@@ -1972,12 +2082,24 @@ private:
/// pairs.
typedef DenseMap<Instruction *, unsigned> ScalarCostsTy;
+ /// A set containing all BasicBlocks that are known to present after
+ /// vectorization as a predicated block.
+ SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
+
/// A map holding scalar costs for different vectorization factors. The
/// presence of a cost for an instruction in the mapping indicates that the
/// instruction will be scalarized when vectorizing with the associated
/// vectorization factor. The entries are VF-ScalarCostTy pairs.
DenseMap<unsigned, ScalarCostsTy> InstsToScalarize;
+ /// Holds the instructions known to be uniform after vectorization.
+ /// The data is collected per VF.
+ DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> Uniforms;
+
+ /// Holds the instructions known to be scalar after vectorization.
+ /// The data is collected per VF.
+ DenseMap<unsigned, SmallPtrSet<Instruction *, 4>> Scalars;
+
/// Returns the expected difference in cost from scalarizing the expression
/// feeding a predicated instruction \p PredInst. The instructions to
/// scalarize and their scalar costs are collected in \p ScalarCosts. A
@@ -1990,6 +2112,44 @@ private:
/// the loop.
void collectInstsToScalarize(unsigned VF);
+ /// Collect the instructions that are uniform after vectorization. An
+ /// instruction is uniform if we represent it with a single scalar value in
+ /// the vectorized loop corresponding to each vector iteration. Examples of
+ /// uniform instructions include pointer operands of consecutive or
+ /// interleaved memory accesses. Note that although uniformity implies an
+ /// instruction will be scalar, the reverse is not true. In general, a
+ /// scalarized instruction will be represented by VF scalar values in the
+ /// vectorized loop, each corresponding to an iteration of the original
+ /// scalar loop.
+ void collectLoopUniforms(unsigned VF);
+
+ /// Collect the instructions that are scalar after vectorization. An
+ /// instruction is scalar if it is known to be uniform or will be scalarized
+ /// during vectorization. Non-uniform scalarized instructions will be
+ /// represented by VF values in the vectorized loop, each corresponding to an
+ /// iteration of the original scalar loop.
+ void collectLoopScalars(unsigned VF);
+
+ /// Collect Uniform and Scalar values for the given \p VF.
+ /// The sets depend on CM decision for Load/Store instructions
+ /// that may be vectorized as interleave, gather-scatter or scalarized.
+ void collectUniformsAndScalars(unsigned VF) {
+ // Do the analysis once.
+ if (VF == 1 || Uniforms.count(VF))
+ return;
+ setCostBasedWideningDecision(VF);
+ collectLoopUniforms(VF);
+ collectLoopScalars(VF);
+ }
+
+ /// Keeps cost model vectorization decision and cost for instructions.
+ /// Right now it is used for memory instructions only.
+ typedef DenseMap<std::pair<Instruction *, unsigned>,
+ std::pair<InstWidening, unsigned>>
+ DecisionList;
+
+ DecisionList WideningDecisions;
+
public:
/// The loop that we evaluate.
Loop *TheLoop;
@@ -2019,6 +2179,23 @@ public:
SmallPtrSet<const Value *, 16> VecValuesToIgnore;
};
+/// LoopVectorizationPlanner - drives the vectorization process after having
+/// passed Legality checks.
+class LoopVectorizationPlanner {
+public:
+ LoopVectorizationPlanner(LoopVectorizationCostModel &CM) : CM(CM) {}
+
+ ~LoopVectorizationPlanner() {}
+
+ /// Plan how to best vectorize, return the best VF and its cost.
+ LoopVectorizationCostModel::VectorizationFactor plan(bool OptForSize,
+ unsigned UserVF);
+
+private:
+ /// The profitablity analysis.
+ LoopVectorizationCostModel &CM;
+};
+
/// \brief This holds vectorization requirements that must be verified late in
/// the process. The requirements are set by legalize and costmodel. Once
/// vectorization has been determined to be possible and profitable the
@@ -2134,8 +2311,6 @@ struct LoopVectorize : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
AU.addRequired<BlockFrequencyInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
@@ -2156,7 +2331,7 @@ struct LoopVectorize : public FunctionPass {
//===----------------------------------------------------------------------===//
// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and
-// LoopVectorizationCostModel.
+// LoopVectorizationCostModel and LoopVectorizationPlanner.
//===----------------------------------------------------------------------===//
Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
@@ -2176,27 +2351,51 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
return Shuf;
}
-void InnerLoopVectorizer::createVectorIntInductionPHI(
- const InductionDescriptor &II, Instruction *EntryVal) {
+void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
+ const InductionDescriptor &II, Value *Step, Instruction *EntryVal) {
Value *Start = II.getStartValue();
- ConstantInt *Step = II.getConstIntStepValue();
- assert(Step && "Can not widen an IV with a non-constant step");
// Construct the initial value of the vector IV in the vector loop preheader
auto CurrIP = Builder.saveIP();
Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
if (isa<TruncInst>(EntryVal)) {
+ assert(Start->getType()->isIntegerTy() &&
+ "Truncation requires an integer type");
auto *TruncType = cast<IntegerType>(EntryVal->getType());
- Step = ConstantInt::getSigned(TruncType, Step->getSExtValue());
+ Step = Builder.CreateTrunc(Step, TruncType);
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
}
Value *SplatStart = Builder.CreateVectorSplat(VF, Start);
- Value *SteppedStart = getStepVector(SplatStart, 0, Step);
+ Value *SteppedStart =
+ getStepVector(SplatStart, 0, Step, II.getInductionOpcode());
+
+ // We create vector phi nodes for both integer and floating-point induction
+ // variables. Here, we determine the kind of arithmetic we will perform.
+ Instruction::BinaryOps AddOp;
+ Instruction::BinaryOps MulOp;
+ if (Step->getType()->isIntegerTy()) {
+ AddOp = Instruction::Add;
+ MulOp = Instruction::Mul;
+ } else {
+ AddOp = II.getInductionOpcode();
+ MulOp = Instruction::FMul;
+ }
+
+ // Multiply the vectorization factor by the step using integer or
+ // floating-point arithmetic as appropriate.
+ Value *ConstVF = getSignedIntOrFpConstant(Step->getType(), VF);
+ Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF));
+
+ // Create a vector splat to use in the induction update.
+ //
+ // FIXME: If the step is non-constant, we create the vector splat with
+ // IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
+ // handle a constant vector splat.
+ Value *SplatVF = isa<Constant>(Mul)
+ ? ConstantVector::getSplat(VF, cast<Constant>(Mul))
+ : Builder.CreateVectorSplat(VF, Mul);
Builder.restoreIP(CurrIP);
- Value *SplatVF =
- ConstantVector::getSplat(VF, ConstantInt::getSigned(Start->getType(),
- VF * Step->getSExtValue()));
// We may need to add the step a number of times, depending on the unroll
// factor. The last of those goes into the PHI.
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
@@ -2205,8 +2404,8 @@ void InnerLoopVectorizer::createVectorIntInductionPHI(
VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
Entry[Part] = LastInduction;
- LastInduction = cast<Instruction>(
- Builder.CreateAdd(LastInduction, SplatVF, "step.add"));
+ LastInduction = cast<Instruction>(addFastMathFlag(
+ Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
}
VectorLoopValueMap.initVector(EntryVal, Entry);
if (isa<TruncInst>(EntryVal))
@@ -2225,7 +2424,7 @@ void InnerLoopVectorizer::createVectorIntInductionPHI(
}
bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
- return Legal->isScalarAfterVectorization(I) ||
+ return Cost->isScalarAfterVectorization(I, VF) ||
Cost->isProfitableToScalarize(I, VF);
}
@@ -2239,7 +2438,10 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
return any_of(IV->users(), isScalarInst);
}
-void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) {
+void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
+
+ assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
+ "Primary induction variable must have an integer type");
auto II = Legal->getInductionVars()->find(IV);
assert(II != Legal->getInductionVars()->end() && "IV is not an induction");
@@ -2251,9 +2453,6 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) {
// induction variable.
Value *ScalarIV = nullptr;
- // The step of the induction.
- Value *Step = nullptr;
-
// The value from the original loop to which we are mapping the new induction
// variable.
Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
@@ -2266,45 +2465,49 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) {
// least one user in the loop that is not widened.
auto NeedsScalarIV = VF > 1 && needsScalarInduction(EntryVal);
- // If the induction variable has a constant integer step value, go ahead and
- // get it now.
- if (ID.getConstIntStepValue())
- Step = ID.getConstIntStepValue();
+ // Generate code for the induction step. Note that induction steps are
+ // required to be loop-invariant
+ assert(PSE.getSE()->isLoopInvariant(ID.getStep(), OrigLoop) &&
+ "Induction step should be loop invariant");
+ auto &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+ Value *Step = nullptr;
+ if (PSE.getSE()->isSCEVable(IV->getType())) {
+ SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+ Step = Exp.expandCodeFor(ID.getStep(), ID.getStep()->getType(),
+ LoopVectorPreHeader->getTerminator());
+ } else {
+ Step = cast<SCEVUnknown>(ID.getStep())->getValue();
+ }
// Try to create a new independent vector induction variable. If we can't
// create the phi node, we will splat the scalar induction variable in each
// loop iteration.
- if (VF > 1 && IV->getType() == Induction->getType() && Step &&
- !shouldScalarizeInstruction(EntryVal)) {
- createVectorIntInductionPHI(ID, EntryVal);
+ if (VF > 1 && !shouldScalarizeInstruction(EntryVal)) {
+ createVectorIntOrFpInductionPHI(ID, Step, EntryVal);
VectorizedIV = true;
}
// If we haven't yet vectorized the induction variable, or if we will create
// a scalar one, we need to define the scalar induction variable and step
// values. If we were given a truncation type, truncate the canonical
- // induction variable and constant step. Otherwise, derive these values from
- // the induction descriptor.
+ // induction variable and step. Otherwise, derive these values from the
+ // induction descriptor.
if (!VectorizedIV || NeedsScalarIV) {
+ ScalarIV = Induction;
+ if (IV != OldInduction) {
+ ScalarIV = IV->getType()->isIntegerTy()
+ ? Builder.CreateSExtOrTrunc(Induction, IV->getType())
+ : Builder.CreateCast(Instruction::SIToFP, Induction,
+ IV->getType());
+ ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL);
+ ScalarIV->setName("offset.idx");
+ }
if (Trunc) {
auto *TruncType = cast<IntegerType>(Trunc->getType());
- assert(Step && "Truncation requires constant integer step");
- auto StepInt = cast<ConstantInt>(Step)->getSExtValue();
- ScalarIV = Builder.CreateCast(Instruction::Trunc, Induction, TruncType);
- Step = ConstantInt::getSigned(TruncType, StepInt);
- } else {
- ScalarIV = Induction;
- auto &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
- if (IV != OldInduction) {
- ScalarIV = Builder.CreateSExtOrTrunc(ScalarIV, IV->getType());
- ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL);
- ScalarIV->setName("offset.idx");
- }
- if (!Step) {
- SCEVExpander Exp(*PSE.getSE(), DL, "induction");
- Step = Exp.expandCodeFor(ID.getStep(), ID.getStep()->getType(),
- &*Builder.GetInsertPoint());
- }
+ assert(Step->getType()->isIntegerTy() &&
+ "Truncation requires an integer step");
+ ScalarIV = Builder.CreateTrunc(ScalarIV, TruncType);
+ Step = Builder.CreateTrunc(Step, TruncType);
}
}
@@ -2314,7 +2517,8 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) {
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
VectorParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
+ Entry[Part] =
+ getStepVector(Broadcasted, VF * Part, Step, ID.getInductionOpcode());
VectorLoopValueMap.initVector(EntryVal, Entry);
if (Trunc)
addMetadata(Entry, Trunc);
@@ -2327,7 +2531,7 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) {
// in the loop in the common case prior to InstCombine. We will be trading
// one vector extract for each scalar step.
if (NeedsScalarIV)
- buildScalarSteps(ScalarIV, Step, EntryVal);
+ buildScalarSteps(ScalarIV, Step, EntryVal, ID);
}
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
@@ -2387,30 +2591,43 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
}
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
- Value *EntryVal) {
+ Value *EntryVal,
+ const InductionDescriptor &ID) {
// We shouldn't have to build scalar steps if we aren't vectorizing.
assert(VF > 1 && "VF should be greater than one");
// Get the value type and ensure it and the step have the same integer type.
Type *ScalarIVTy = ScalarIV->getType()->getScalarType();
- assert(ScalarIVTy->isIntegerTy() && ScalarIVTy == Step->getType() &&
- "Val and Step should have the same integer type");
+ assert(ScalarIVTy == Step->getType() &&
+ "Val and Step should have the same type");
+
+ // We build scalar steps for both integer and floating-point induction
+ // variables. Here, we determine the kind of arithmetic we will perform.
+ Instruction::BinaryOps AddOp;
+ Instruction::BinaryOps MulOp;
+ if (ScalarIVTy->isIntegerTy()) {
+ AddOp = Instruction::Add;
+ MulOp = Instruction::Mul;
+ } else {
+ AddOp = ID.getInductionOpcode();
+ MulOp = Instruction::FMul;
+ }
// Determine the number of scalars we need to generate for each unroll
// iteration. If EntryVal is uniform, we only need to generate the first
// lane. Otherwise, we generate all VF values.
unsigned Lanes =
- Legal->isUniformAfterVectorization(cast<Instruction>(EntryVal)) ? 1 : VF;
+ Cost->isUniformAfterVectorization(cast<Instruction>(EntryVal), VF) ? 1 : VF;
// Compute the scalar steps and save the results in VectorLoopValueMap.
ScalarParts Entry(UF);
for (unsigned Part = 0; Part < UF; ++Part) {
Entry[Part].resize(VF);
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
- auto *StartIdx = ConstantInt::get(ScalarIVTy, VF * Part + Lane);
- auto *Mul = Builder.CreateMul(StartIdx, Step);
- auto *Add = Builder.CreateAdd(ScalarIV, Mul);
+ auto *StartIdx = getSignedIntOrFpConstant(ScalarIVTy, VF * Part + Lane);
+ auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
+ auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
Entry[Part][Lane] = Add;
}
}
@@ -2469,7 +2686,7 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
// known to be uniform after vectorization, this corresponds to lane zero
// of the last unroll iteration. Otherwise, the last instruction is the one
// we created for the last vector lane of the last unroll iteration.
- unsigned LastLane = Legal->isUniformAfterVectorization(I) ? 0 : VF - 1;
+ unsigned LastLane = Cost->isUniformAfterVectorization(I, VF) ? 0 : VF - 1;
auto *LastInst = cast<Instruction>(getScalarValue(V, UF - 1, LastLane));
// Set the insert point after the last scalarized instruction. This ensures
@@ -2486,7 +2703,7 @@ InnerLoopVectorizer::getVectorValue(Value *V) {
// VectorLoopValueMap, we will only generate the insertelements once.
for (unsigned Part = 0; Part < UF; ++Part) {
Value *VectorValue = nullptr;
- if (Legal->isUniformAfterVectorization(I)) {
+ if (Cost->isUniformAfterVectorization(I, VF)) {
VectorValue = getBroadcastInstrs(getScalarValue(V, Part, 0));
} else {
VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
@@ -2515,8 +2732,9 @@ Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
if (OrigLoop->isLoopInvariant(V))
return V;
- assert(Lane > 0 ? !Legal->isUniformAfterVectorization(cast<Instruction>(V))
- : true && "Uniform values only have lane zero");
+ assert(Lane > 0 ?
+ !Cost->isUniformAfterVectorization(cast<Instruction>(V), VF)
+ : true && "Uniform values only have lane zero");
// If the value from the original loop has not been vectorized, it is
// represented by UF x VF scalar values in the new loop. Return the requested
@@ -2551,102 +2769,6 @@ Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
"reverse");
}
-// Get a mask to interleave \p NumVec vectors into a wide vector.
-// I.e. <0, VF, VF*2, ..., VF*(NumVec-1), 1, VF+1, VF*2+1, ...>
-// E.g. For 2 interleaved vectors, if VF is 4, the mask is:
-// <0, 4, 1, 5, 2, 6, 3, 7>
-static Constant *getInterleavedMask(IRBuilder<> &Builder, unsigned VF,
- unsigned NumVec) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < VF; i++)
- for (unsigned j = 0; j < NumVec; j++)
- Mask.push_back(Builder.getInt32(j * VF + i));
-
- return ConstantVector::get(Mask);
-}
-
-// Get the strided mask starting from index \p Start.
-// I.e. <Start, Start + Stride, ..., Start + Stride*(VF-1)>
-static Constant *getStridedMask(IRBuilder<> &Builder, unsigned Start,
- unsigned Stride, unsigned VF) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < VF; i++)
- Mask.push_back(Builder.getInt32(Start + i * Stride));
-
- return ConstantVector::get(Mask);
-}
-
-// Get a mask of two parts: The first part consists of sequential integers
-// starting from 0, The second part consists of UNDEFs.
-// I.e. <0, 1, 2, ..., NumInt - 1, undef, ..., undef>
-static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned NumInt,
- unsigned NumUndef) {
- SmallVector<Constant *, 16> Mask;
- for (unsigned i = 0; i < NumInt; i++)
- Mask.push_back(Builder.getInt32(i));
-
- Constant *Undef = UndefValue::get(Builder.getInt32Ty());
- for (unsigned i = 0; i < NumUndef; i++)
- Mask.push_back(Undef);
-
- return ConstantVector::get(Mask);
-}
-
-// Concatenate two vectors with the same element type. The 2nd vector should
-// not have more elements than the 1st vector. If the 2nd vector has less
-// elements, extend it with UNDEFs.
-static Value *ConcatenateTwoVectors(IRBuilder<> &Builder, Value *V1,
- Value *V2) {
- VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
- VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
- assert(VecTy1 && VecTy2 &&
- VecTy1->getScalarType() == VecTy2->getScalarType() &&
- "Expect two vectors with the same element type");
-
- unsigned NumElts1 = VecTy1->getNumElements();
- unsigned NumElts2 = VecTy2->getNumElements();
- assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
-
- if (NumElts1 > NumElts2) {
- // Extend with UNDEFs.
- Constant *ExtMask =
- getSequentialMask(Builder, NumElts2, NumElts1 - NumElts2);
- V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
- }
-
- Constant *Mask = getSequentialMask(Builder, NumElts1 + NumElts2, 0);
- return Builder.CreateShuffleVector(V1, V2, Mask);
-}
-
-// Concatenate vectors in the given list. All vectors have the same type.
-static Value *ConcatenateVectors(IRBuilder<> &Builder,
- ArrayRef<Value *> InputList) {
- unsigned NumVec = InputList.size();
- assert(NumVec > 1 && "Should be at least two vectors");
-
- SmallVector<Value *, 8> ResList;
- ResList.append(InputList.begin(), InputList.end());
- do {
- SmallVector<Value *, 8> TmpList;
- for (unsigned i = 0; i < NumVec - 1; i += 2) {
- Value *V0 = ResList[i], *V1 = ResList[i + 1];
- assert((V0->getType() == V1->getType() || i == NumVec - 2) &&
- "Only the last vector may have a different type");
-
- TmpList.push_back(ConcatenateTwoVectors(Builder, V0, V1));
- }
-
- // Push the last vector if the total number of vectors is odd.
- if (NumVec % 2 != 0)
- TmpList.push_back(ResList[NumVec - 1]);
-
- ResList = TmpList;
- NumVec = ResList.size();
- } while (NumVec > 1);
-
- return ResList[0];
-}
-
// Try to vectorize the interleave group that \p Instr belongs to.
//
// E.g. Translate following interleaved load group (factor = 3):
@@ -2683,15 +2805,13 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
if (Instr != Group->getInsertPos())
return;
- LoadInst *LI = dyn_cast<LoadInst>(Instr);
- StoreInst *SI = dyn_cast<StoreInst>(Instr);
Value *Ptr = getPointerOperand(Instr);
// Prepare for the vector type of the interleaved load/store.
- Type *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
+ Type *ScalarTy = getMemInstValueType(Instr);
unsigned InterleaveFactor = Group->getFactor();
Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
- Type *PtrTy = VecTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
+ Type *PtrTy = VecTy->getPointerTo(getMemInstAddressSpace(Instr));
// Prepare for the new pointers.
setDebugLocFromInst(Builder, Ptr);
@@ -2731,7 +2851,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
Value *UndefVec = UndefValue::get(VecTy);
// Vectorize the interleaved load group.
- if (LI) {
+ if (isa<LoadInst>(Instr)) {
// For each unroll part, create a wide load for the group.
SmallVector<Value *, 2> NewLoads;
@@ -2752,7 +2872,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
continue;
VectorParts Entry(UF);
- Constant *StrideMask = getStridedMask(Builder, I, InterleaveFactor, VF);
+ Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);
for (unsigned Part = 0; Part < UF; Part++) {
Value *StridedVec = Builder.CreateShuffleVector(
NewLoads[Part], UndefVec, StrideMask, "strided.vec");
@@ -2796,10 +2916,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
}
// Concatenate all vectors into a wide vector.
- Value *WideVec = ConcatenateVectors(Builder, StoredVecs);
+ Value *WideVec = concatenateVectors(Builder, StoredVecs);
// Interleave the elements in the wide vector.
- Constant *IMask = getInterleavedMask(Builder, VF, InterleaveFactor);
+ Constant *IMask = createInterleaveMask(Builder, VF, InterleaveFactor);
Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
"interleaved.vec");
@@ -2816,103 +2936,44 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
assert((LI || SI) && "Invalid Load/Store instruction");
- // Try to vectorize the interleave group if this access is interleaved.
- if (Legal->isAccessInterleaved(Instr))
+ LoopVectorizationCostModel::InstWidening Decision =
+ Cost->getWideningDecision(Instr, VF);
+ assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
+ "CM decision should be taken at this point");
+ if (Decision == LoopVectorizationCostModel::CM_Interleave)
return vectorizeInterleaveGroup(Instr);
- Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
+ Type *ScalarDataTy = getMemInstValueType(Instr);
Type *DataTy = VectorType::get(ScalarDataTy, VF);
Value *Ptr = getPointerOperand(Instr);
- unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
+ unsigned Alignment = getMemInstAlignment(Instr);
// An alignment of 0 means target abi alignment. We need to use the scalar's
// target abi alignment in such a case.
const DataLayout &DL = Instr->getModule()->getDataLayout();
if (!Alignment)
Alignment = DL.getABITypeAlignment(ScalarDataTy);
- unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
+ unsigned AddressSpace = getMemInstAddressSpace(Instr);
// Scalarize the memory instruction if necessary.
- if (Legal->memoryInstructionMustBeScalarized(Instr, VF))
+ if (Decision == LoopVectorizationCostModel::CM_Scalarize)
return scalarizeInstruction(Instr, Legal->isScalarWithPredication(Instr));
// Determine if the pointer operand of the access is either consecutive or
// reverse consecutive.
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
bool Reverse = ConsecutiveStride < 0;
-
- // Determine if either a gather or scatter operation is legal.
bool CreateGatherScatter =
- !ConsecutiveStride && Legal->isLegalGatherOrScatter(Instr);
+ (Decision == LoopVectorizationCostModel::CM_GatherScatter);
VectorParts VectorGep;
// Handle consecutive loads/stores.
- GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (ConsecutiveStride) {
- if (Gep) {
- unsigned NumOperands = Gep->getNumOperands();
-#ifndef NDEBUG
- // The original GEP that identified as a consecutive memory access
- // should have only one loop-variant operand.
- unsigned NumOfLoopVariantOps = 0;
- for (unsigned i = 0; i < NumOperands; ++i)
- if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
- OrigLoop))
- NumOfLoopVariantOps++;
- assert(NumOfLoopVariantOps == 1 &&
- "Consecutive GEP should have only one loop-variant operand");
-#endif
- GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
- Gep2->setName("gep.indvar");
-
- // A new GEP is created for a 0-lane value of the first unroll iteration.
- // The GEPs for the rest of the unroll iterations are computed below as an
- // offset from this GEP.
- for (unsigned i = 0; i < NumOperands; ++i)
- // We can apply getScalarValue() for all GEP indices. It returns an
- // original value for loop-invariant operand and 0-lane for consecutive
- // operand.
- Gep2->setOperand(i, getScalarValue(Gep->getOperand(i),
- 0, /* First unroll iteration */
- 0 /* 0-lane of the vector */ ));
- setDebugLocFromInst(Builder, Gep);
- Ptr = Builder.Insert(Gep2);
-
- } else { // No GEP
- setDebugLocFromInst(Builder, Ptr);
- Ptr = getScalarValue(Ptr, 0, 0);
- }
+ Ptr = getScalarValue(Ptr, 0, 0);
} else {
// At this point we should vector version of GEP for Gather or Scatter
assert(CreateGatherScatter && "The instruction should be scalarized");
- if (Gep) {
- // Vectorizing GEP, across UF parts. We want to get a vector value for base
- // and each index that's defined inside the loop, even if it is
- // loop-invariant but wasn't hoisted out. Otherwise we want to keep them
- // scalar.
- SmallVector<VectorParts, 4> OpsV;
- for (Value *Op : Gep->operands()) {
- Instruction *SrcInst = dyn_cast<Instruction>(Op);
- if (SrcInst && OrigLoop->contains(SrcInst))
- OpsV.push_back(getVectorValue(Op));
- else
- OpsV.push_back(VectorParts(UF, Op));
- }
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value *, 4> Ops;
- Value *GEPBasePtr = OpsV[0][Part];
- for (unsigned i = 1; i < Gep->getNumOperands(); i++)
- Ops.push_back(OpsV[i][Part]);
- Value *NewGep = Builder.CreateGEP(GEPBasePtr, Ops, "VectorGep");
- cast<GetElementPtrInst>(NewGep)->setIsInBounds(Gep->isInBounds());
- assert(NewGep->getType()->isVectorTy() && "Expected vector GEP");
-
- NewGep =
- Builder.CreateBitCast(NewGep, VectorType::get(Ptr->getType(), VF));
- VectorGep.push_back(NewGep);
- }
- } else
- VectorGep = getVectorValue(Ptr);
+ VectorGep = getVectorValue(Ptr);
}
VectorParts Mask = createBlockInMask(Instr->getParent());
@@ -3027,7 +3088,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
- unsigned Lanes = Legal->isUniformAfterVectorization(Instr) ? 1 : VF;
+ unsigned Lanes = Cost->isUniformAfterVectorization(Instr, VF) ? 1 : VF;
// For each vector unroll 'part':
for (unsigned Part = 0; Part < UF; ++Part) {
@@ -3038,7 +3099,9 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
// Start if-block.
Value *Cmp = nullptr;
if (IfPredicateInstr) {
- Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Lane));
+ Cmp = Cond[Part];
+ if (Cmp->getType()->isVectorTy())
+ Cmp = Builder.CreateExtractElement(Cmp, Builder.getInt32(Lane));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
ConstantInt::get(Cmp->getType(), 1));
}
@@ -3346,7 +3409,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// - counts from zero, stepping by one
// - is the size of the widest induction variable type
// then we create a new one.
- OldInduction = Legal->getInduction();
+ OldInduction = Legal->getPrimaryInduction();
Type *IdxTy = Legal->getWidestInductionType();
// Split the single block loop into the two loop structure described above.
@@ -3543,7 +3606,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
namespace {
struct CSEDenseMapInfo {
- static bool canHandle(Instruction *I) {
+ static bool canHandle(const Instruction *I) {
return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
}
@@ -3553,12 +3616,12 @@ struct CSEDenseMapInfo {
static inline Instruction *getTombstoneKey() {
return DenseMapInfo<Instruction *>::getTombstoneKey();
}
- static unsigned getHashValue(Instruction *I) {
+ static unsigned getHashValue(const Instruction *I) {
assert(canHandle(I) && "Unknown instruction!");
return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(),
I->value_op_end()));
}
- static bool isEqual(Instruction *LHS, Instruction *RHS) {
+ static bool isEqual(const Instruction *LHS, const Instruction *RHS) {
if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
LHS == getTombstoneKey() || RHS == getTombstoneKey())
return LHS == RHS;
@@ -3589,51 +3652,6 @@ static void cse(BasicBlock *BB) {
}
}
-/// \brief Adds a 'fast' flag to floating point operations.
-static Value *addFastMathFlag(Value *V) {
- if (isa<FPMathOperator>(V)) {
- FastMathFlags Flags;
- Flags.setUnsafeAlgebra();
- cast<Instruction>(V)->setFastMathFlags(Flags);
- }
- return V;
-}
-
-/// \brief Estimate the overhead of scalarizing a value based on its type.
-/// Insert and Extract are set if the result needs to be inserted and/or
-/// extracted from vectors.
-static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
- const TargetTransformInfo &TTI) {
- if (Ty->isVoidTy())
- return 0;
-
- assert(Ty->isVectorTy() && "Can only scalarize vectors");
- unsigned Cost = 0;
-
- for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) {
- if (Extract)
- Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I);
- if (Insert)
- Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
- }
-
- return Cost;
-}
-
-/// \brief Estimate the overhead of scalarizing an Instruction based on the
-/// types of its operands and return value.
-static unsigned getScalarizationOverhead(SmallVectorImpl<Type *> &OpTys,
- Type *RetTy,
- const TargetTransformInfo &TTI) {
- unsigned ScalarizationCost =
- getScalarizationOverhead(RetTy, true, false, TTI);
-
- for (Type *Ty : OpTys)
- ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);
-
- return ScalarizationCost;
-}
-
/// \brief Estimate the overhead of scalarizing an instruction. This is a
/// convenience wrapper for the type-based getScalarizationOverhead API.
static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
@@ -3641,14 +3659,24 @@ static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
if (VF == 1)
return 0;
+ unsigned Cost = 0;
Type *RetTy = ToVectorTy(I->getType(), VF);
+ if (!RetTy->isVoidTy() &&
+ (!isa<LoadInst>(I) ||
+ !TTI.supportsEfficientVectorElementLoadStore()))
+ Cost += TTI.getScalarizationOverhead(RetTy, true, false);
- SmallVector<Type *, 4> OpTys;
- unsigned OperandsNum = I->getNumOperands();
- for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd)
- OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF));
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ SmallVector<const Value *, 4> Operands(CI->arg_operands());
+ Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
+ }
+ else if (!isa<StoreInst>(I) ||
+ !TTI.supportsEfficientVectorElementLoadStore()) {
+ SmallVector<const Value *, 4> Operands(I->operand_values());
+ Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
+ }
- return getScalarizationOverhead(OpTys, RetTy, TTI);
+ return Cost;
}
// Estimate cost of a call instruction CI if it were vectorized with factor VF.
@@ -3681,7 +3709,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
// Compute costs of unpacking argument values for the scalar calls and
// packing the return values to a vector.
- unsigned ScalarizationCost = getScalarizationOverhead(Tys, RetTy, TTI);
+ unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
@@ -3709,16 +3737,12 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
assert(ID && "Expected intrinsic call!");
- Type *RetTy = ToVectorTy(CI->getType(), VF);
- SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI->arg_operands())
- Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
-
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();
- return TTI.getIntrinsicInstrCost(ID, RetTy, Tys, FMF);
+ SmallVector<Value *, 4> Operands(CI->arg_operands());
+ return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF);
}
static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
@@ -3861,30 +3885,27 @@ void InnerLoopVectorizer::vectorizeLoop() {
// the cost-model.
//
//===------------------------------------------------===//
- Constant *Zero = Builder.getInt32(0);
- // In order to support recurrences we need to be able to vectorize Phi nodes.
- // Phi nodes have cycles, so we need to vectorize them in two stages. First,
- // we create a new vector PHI node with no incoming edges. We use this value
- // when we vectorize all of the instructions that use the PHI. Next, after
- // all of the instructions in the block are complete we add the new incoming
- // edges to the PHI. At this point all of the instructions in the basic block
- // are vectorized, so we can use them to construct the PHI.
- PhiVector PHIsToFix;
-
- // Collect instructions from the original loop that will become trivially
- // dead in the vectorized loop. We don't need to vectorize these
- // instructions.
- collectTriviallyDeadInstructions();
+ // Collect instructions from the original loop that will become trivially dead
+ // in the vectorized loop. We don't need to vectorize these instructions. For
+ // example, original induction update instructions can become dead because we
+ // separately emit induction "steps" when generating code for the new loop.
+ // Similarly, we create a new latch condition when setting up the structure
+ // of the new loop, so the old one can become dead.
+ SmallPtrSet<Instruction *, 4> DeadInstructions;
+ collectTriviallyDeadInstructions(DeadInstructions);
// Scan the loop in a topological order to ensure that defs are vectorized
// before users.
LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);
- // Vectorize all of the blocks in the original loop.
+ // Vectorize all instructions in the original loop that will not become
+ // trivially dead when vectorized.
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
- vectorizeBlockInLoop(BB, &PHIsToFix);
+ for (Instruction &I : *BB)
+ if (!DeadInstructions.count(&I))
+ vectorizeInstruction(I);
// Insert truncates and extends for any truncated instructions as hints to
// InstCombine.
@@ -3892,224 +3913,10 @@ void InnerLoopVectorizer::vectorizeLoop() {
truncateToMinimalBitwidths();
// At this point every instruction in the original loop is widened to a
- // vector form. Now we need to fix the recurrences in PHIsToFix. These PHI
+ // vector form. Now we need to fix the recurrences in the loop. These PHI
// nodes are currently empty because we did not want to introduce cycles.
// This is the second stage of vectorizing recurrences.
- for (PHINode *Phi : PHIsToFix) {
- assert(Phi && "Unable to recover vectorized PHI");
-
- // Handle first-order recurrences that need to be fixed.
- if (Legal->isFirstOrderRecurrence(Phi)) {
- fixFirstOrderRecurrence(Phi);
- continue;
- }
-
- // If the phi node is not a first-order recurrence, it must be a reduction.
- // Get it's reduction variable descriptor.
- assert(Legal->isReductionVariable(Phi) &&
- "Unable to find the reduction variable");
- RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[Phi];
-
- RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind();
- TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
- RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
- RdxDesc.getMinMaxRecurrenceKind();
- setDebugLocFromInst(Builder, ReductionStartValue);
-
- // We need to generate a reduction vector from the incoming scalar.
- // To do so, we need to generate the 'identity' vector and override
- // one of the elements with the incoming scalar reduction. We need
- // to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
-
- // This is the vector-clone of the value that leaves the loop.
- const VectorParts &VectorExit = getVectorValue(LoopExitInst);
- Type *VecTy = VectorExit[0]->getType();
-
- // Find the reduction identity variable. Zero for addition, or, xor,
- // one for multiplication, -1 for And.
- Value *Identity;
- Value *VectorStart;
- if (RK == RecurrenceDescriptor::RK_IntegerMinMax ||
- RK == RecurrenceDescriptor::RK_FloatMinMax) {
- // MinMax reduction have the start value as their identify.
- if (VF == 1) {
- VectorStart = Identity = ReductionStartValue;
- } else {
- VectorStart = Identity =
- Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident");
- }
- } else {
- // Handle other reduction kinds:
- Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
- RK, VecTy->getScalarType());
- if (VF == 1) {
- Identity = Iden;
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- VectorStart = ReductionStartValue;
- } else {
- Identity = ConstantVector::getSplat(VF, Iden);
-
- // This vector is the Identity vector where the first element is the
- // incoming scalar reduction.
- VectorStart =
- Builder.CreateInsertElement(Identity, ReductionStartValue, Zero);
- }
- }
-
- // Fix the vector-loop phi.
-
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- const VectorParts &VecRdxPhi = getVectorValue(Phi);
- BasicBlock *Latch = OrigLoop->getLoopLatch();
- Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
- const VectorParts &Val = getVectorValue(LoopVal);
- for (unsigned part = 0; part < UF; ++part) {
- // Make sure to add the reduction stat value only to the
- // first unroll part.
- Value *StartVal = (part == 0) ? VectorStart : Identity;
- cast<PHINode>(VecRdxPhi[part])
- ->addIncoming(StartVal, LoopVectorPreHeader);
- cast<PHINode>(VecRdxPhi[part])
- ->addIncoming(Val[part], LoopVectorBody);
- }
-
- // Before each round, move the insertion point right between
- // the PHIs and the values we are going to write.
- // This allows us to write both PHINodes and the extractelement
- // instructions.
- Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
-
- VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst);
- setDebugLocFromInst(Builder, LoopExitInst);
-
- // If the vector reduction can be performed in a smaller type, we truncate
- // then extend the loop exit value to enable InstCombine to evaluate the
- // entire expression in the smaller type.
- if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
- Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
- Builder.SetInsertPoint(LoopVectorBody->getTerminator());
- for (unsigned part = 0; part < UF; ++part) {
- Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
- Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
- : Builder.CreateZExt(Trunc, VecTy);
- for (Value::user_iterator UI = RdxParts[part]->user_begin();
- UI != RdxParts[part]->user_end();)
- if (*UI != Trunc) {
- (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
- RdxParts[part] = Extnd;
- } else {
- ++UI;
- }
- }
- Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
- for (unsigned part = 0; part < UF; ++part)
- RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
- }
-
- // Reduce all of the unrolled parts into a single vector.
- Value *ReducedPartRdx = RdxParts[0];
- unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
- setDebugLocFromInst(Builder, ReducedPartRdx);
- for (unsigned part = 1; part < UF; ++part) {
- if (Op != Instruction::ICmp && Op != Instruction::FCmp)
- // Floating point operations had to be 'fast' to enable the reduction.
- ReducedPartRdx = addFastMathFlag(
- Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
- ReducedPartRdx, "bin.rdx"));
- else
- ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
- Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
- }
-
- if (VF > 1) {
- // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
- // and vector ops, reducing the set of values being computed by half each
- // round.
- assert(isPowerOf2_32(VF) &&
- "Reduction emission only supported for pow2 vectors!");
- Value *TmpVec = ReducedPartRdx;
- SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
- for (unsigned i = VF; i != 1; i >>= 1) {
- // Move the upper half of the vector to the lower half.
- for (unsigned j = 0; j != i / 2; ++j)
- ShuffleMask[j] = Builder.getInt32(i / 2 + j);
-
- // Fill the rest of the mask with undef.
- std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
- UndefValue::get(Builder.getInt32Ty()));
-
- Value *Shuf = Builder.CreateShuffleVector(
- TmpVec, UndefValue::get(TmpVec->getType()),
- ConstantVector::get(ShuffleMask), "rdx.shuf");
-
- if (Op != Instruction::ICmp && Op != Instruction::FCmp)
- // Floating point operations had to be 'fast' to enable the reduction.
- TmpVec = addFastMathFlag(Builder.CreateBinOp(
- (Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"));
- else
- TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind,
- TmpVec, Shuf);
- }
-
- // The result is in the first element of the vector.
- ReducedPartRdx =
- Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
-
- // If the reduction can be performed in a smaller type, we need to extend
- // the reduction to the wider type before we branch to the original loop.
- if (Phi->getType() != RdxDesc.getRecurrenceType())
- ReducedPartRdx =
- RdxDesc.isSigned()
- ? Builder.CreateSExt(ReducedPartRdx, Phi->getType())
- : Builder.CreateZExt(ReducedPartRdx, Phi->getType());
- }
-
- // Create a phi node that merges control-flow from the backedge-taken check
- // block and the middle block.
- PHINode *BCBlockPhi = PHINode::Create(Phi->getType(), 2, "bc.merge.rdx",
- LoopScalarPreHeader->getTerminator());
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
- BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]);
- BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
-
- // Now, we need to fix the users of the reduction variable
- // inside and outside of the scalar remainder loop.
- // We know that the loop is in LCSSA form. We need to update the
- // PHI nodes in the exit blocks.
- for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
- LEE = LoopExitBlock->end();
- LEI != LEE; ++LEI) {
- PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
- if (!LCSSAPhi)
- break;
-
- // All PHINodes need to have a single entry edge, or two if
- // we already fixed them.
- assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
-
- // We found our reduction value exit-PHI. Update it with the
- // incoming bypass edge.
- if (LCSSAPhi->getIncomingValue(0) == LoopExitInst) {
- // Add an edge coming from the bypass.
- LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
- break;
- }
- } // end of the LCSSA phi scan.
-
- // Fix the scalar loop reduction variable with the incoming reduction sum
- // from the vector body and from the backedge value.
- int IncomingEdgeBlockIdx =
- Phi->getBasicBlockIndex(OrigLoop->getLoopLatch());
- assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
- // Pick the other block.
- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
- Phi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
- Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
- } // end of for each Phi in PHIsToFix.
+ fixCrossIterationPHIs();
// Update the dominator tree.
//
@@ -4134,6 +3941,25 @@ void InnerLoopVectorizer::vectorizeLoop() {
cse(LoopVectorBody);
}
+void InnerLoopVectorizer::fixCrossIterationPHIs() {
+ // In order to support recurrences we need to be able to vectorize Phi nodes.
+ // Phi nodes have cycles, so we need to vectorize them in two stages. This is
+ // stage #2: We now need to fix the recurrences by adding incoming edges to
+ // the currently empty PHI nodes. At this point every instruction in the
+ // original loop is widened to a vector form so we can use them to construct
+ // the incoming edges.
+ for (Instruction &I : *OrigLoop->getHeader()) {
+ PHINode *Phi = dyn_cast<PHINode>(&I);
+ if (!Phi)
+ break;
+ // Handle first-order recurrences and reductions that need to be fixed.
+ if (Legal->isFirstOrderRecurrence(Phi))
+ fixFirstOrderRecurrence(Phi);
+ else if (Legal->isReductionVariable(Phi))
+ fixReduction(Phi);
+ }
+}
+
void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// This is the second phase of vectorizing first-order recurrences. An
@@ -4212,15 +4038,17 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
auto *VecPhi = Builder.CreatePHI(VectorInit->getType(), 2, "vector.recur");
VecPhi->addIncoming(VectorInit, LoopVectorPreHeader);
- // Get the vectorized previous value. We ensured the previous values was an
- // instruction when detecting the recurrence.
+ // Get the vectorized previous value.
auto &PreviousParts = getVectorValue(Previous);
- // Set the insertion point to be after this instruction. We ensured the
- // previous value dominated all uses of the phi when detecting the
- // recurrence.
- Builder.SetInsertPoint(
- &*++BasicBlock::iterator(cast<Instruction>(PreviousParts[UF - 1])));
+ // Set the insertion point after the previous value if it is an instruction.
+ // Note that the previous value may have been constant-folded so it is not
+ // guaranteed to be an instruction in the vector loop.
+ if (LI->getLoopFor(LoopVectorBody)->isLoopInvariant(PreviousParts[UF - 1]))
+ Builder.SetInsertPoint(&*LoopVectorBody->getFirstInsertionPt());
+ else
+ Builder.SetInsertPoint(
+ &*++BasicBlock::iterator(cast<Instruction>(PreviousParts[UF - 1])));
// We will construct a vector for the recurrence by combining the values for
// the current and previous iterations. This is the required shuffle mask.
@@ -4251,18 +4079,33 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
// Extract the last vector element in the middle block. This will be the
// initial value for the recurrence when jumping to the scalar loop.
- auto *Extract = Incoming;
+ auto *ExtractForScalar = Incoming;
if (VF > 1) {
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
- Extract = Builder.CreateExtractElement(Extract, Builder.getInt32(VF - 1),
- "vector.recur.extract");
- }
+ ExtractForScalar = Builder.CreateExtractElement(
+ ExtractForScalar, Builder.getInt32(VF - 1), "vector.recur.extract");
+ }
+ // Extract the second last element in the middle block if the
+ // Phi is used outside the loop. We need to extract the phi itself
+ // and not the last element (the phi update in the current iteration). This
+ // will be the value when jumping to the exit block from the LoopMiddleBlock,
+ // when the scalar loop is not run at all.
+ Value *ExtractForPhiUsedOutsideLoop = nullptr;
+ if (VF > 1)
+ ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
+ Incoming, Builder.getInt32(VF - 2), "vector.recur.extract.for.phi");
+ // When loop is unrolled without vectorizing, initialize
+ // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value of
+ // `Incoming`. This is analogous to the vectorized case above: extracting the
+ // second last element when VF > 1.
+ else if (UF > 1)
+ ExtractForPhiUsedOutsideLoop = PreviousParts[UF - 2];
// Fix the initial value of the original recurrence in the scalar loop.
Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
for (auto *BB : predecessors(LoopScalarPreHeader)) {
- auto *Incoming = BB == LoopMiddleBlock ? Extract : ScalarInit;
+ auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit;
Start->addIncoming(Incoming, BB);
}
@@ -4279,12 +4122,218 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
if (!LCSSAPhi)
break;
if (LCSSAPhi->getIncomingValue(0) == Phi) {
- LCSSAPhi->addIncoming(Extract, LoopMiddleBlock);
+ LCSSAPhi->addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
break;
}
}
}
+void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
+ Constant *Zero = Builder.getInt32(0);
+
+ // Get it's reduction variable descriptor.
+ assert(Legal->isReductionVariable(Phi) &&
+ "Unable to find the reduction variable");
+ RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[Phi];
+
+ RecurrenceDescriptor::RecurrenceKind RK = RdxDesc.getRecurrenceKind();
+ TrackingVH<Value> ReductionStartValue = RdxDesc.getRecurrenceStartValue();
+ Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
+ RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
+ RdxDesc.getMinMaxRecurrenceKind();
+ setDebugLocFromInst(Builder, ReductionStartValue);
+
+ // We need to generate a reduction vector from the incoming scalar.
+ // To do so, we need to generate the 'identity' vector and override
+ // one of the elements with the incoming scalar reduction. We need
+ // to do it in the vector-loop preheader.
+ Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
+
+ // This is the vector-clone of the value that leaves the loop.
+ const VectorParts &VectorExit = getVectorValue(LoopExitInst);
+ Type *VecTy = VectorExit[0]->getType();
+
+ // Find the reduction identity variable. Zero for addition, or, xor,
+ // one for multiplication, -1 for And.
+ Value *Identity;
+ Value *VectorStart;
+ if (RK == RecurrenceDescriptor::RK_IntegerMinMax ||
+ RK == RecurrenceDescriptor::RK_FloatMinMax) {
+ // MinMax reduction have the start value as their identify.
+ if (VF == 1) {
+ VectorStart = Identity = ReductionStartValue;
+ } else {
+ VectorStart = Identity =
+ Builder.CreateVectorSplat(VF, ReductionStartValue, "minmax.ident");
+ }
+ } else {
+ // Handle other reduction kinds:
+ Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
+ RK, VecTy->getScalarType());
+ if (VF == 1) {
+ Identity = Iden;
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart = ReductionStartValue;
+ } else {
+ Identity = ConstantVector::getSplat(VF, Iden);
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ VectorStart =
+ Builder.CreateInsertElement(Identity, ReductionStartValue, Zero);
+ }
+ }
+
+ // Fix the vector-loop phi.
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ const VectorParts &VecRdxPhi = getVectorValue(Phi);
+ BasicBlock *Latch = OrigLoop->getLoopLatch();
+ Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
+ const VectorParts &Val = getVectorValue(LoopVal);
+ for (unsigned part = 0; part < UF; ++part) {
+ // Make sure to add the reduction stat value only to the
+ // first unroll part.
+ Value *StartVal = (part == 0) ? VectorStart : Identity;
+ cast<PHINode>(VecRdxPhi[part])
+ ->addIncoming(StartVal, LoopVectorPreHeader);
+ cast<PHINode>(VecRdxPhi[part])
+ ->addIncoming(Val[part], LI->getLoopFor(LoopVectorBody)->getLoopLatch());
+ }
+
+ // Before each round, move the insertion point right between
+ // the PHIs and the values we are going to write.
+ // This allows us to write both PHINodes and the extractelement
+ // instructions.
+ Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
+
+ VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst);
+ setDebugLocFromInst(Builder, LoopExitInst);
+
+ // If the vector reduction can be performed in a smaller type, we truncate
+ // then extend the loop exit value to enable InstCombine to evaluate the
+ // entire expression in the smaller type.
+ if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
+ Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
+ Builder.SetInsertPoint(LoopVectorBody->getTerminator());
+ for (unsigned part = 0; part < UF; ++part) {
+ Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
+ : Builder.CreateZExt(Trunc, VecTy);
+ for (Value::user_iterator UI = RdxParts[part]->user_begin();
+ UI != RdxParts[part]->user_end();)
+ if (*UI != Trunc) {
+ (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
+ RdxParts[part] = Extnd;
+ } else {
+ ++UI;
+ }
+ }
+ Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
+ for (unsigned part = 0; part < UF; ++part)
+ RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+ }
+
+ // Reduce all of the unrolled parts into a single vector.
+ Value *ReducedPartRdx = RdxParts[0];
+ unsigned Op = RecurrenceDescriptor::getRecurrenceBinOp(RK);
+ setDebugLocFromInst(Builder, ReducedPartRdx);
+ for (unsigned part = 1; part < UF; ++part) {
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ // Floating point operations had to be 'fast' to enable the reduction.
+ ReducedPartRdx = addFastMathFlag(
+ Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part],
+ ReducedPartRdx, "bin.rdx"));
+ else
+ ReducedPartRdx = RecurrenceDescriptor::createMinMaxOp(
+ Builder, MinMaxKind, ReducedPartRdx, RdxParts[part]);
+ }
+
+ if (VF > 1) {
+ // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
+ // and vector ops, reducing the set of values being computed by half each
+ // round.
+ assert(isPowerOf2_32(VF) &&
+ "Reduction emission only supported for pow2 vectors!");
+ Value *TmpVec = ReducedPartRdx;
+ SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
+ for (unsigned i = VF; i != 1; i >>= 1) {
+ // Move the upper half of the vector to the lower half.
+ for (unsigned j = 0; j != i / 2; ++j)
+ ShuffleMask[j] = Builder.getInt32(i / 2 + j);
+
+ // Fill the rest of the mask with undef.
+ std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
+ UndefValue::get(Builder.getInt32Ty()));
+
+ Value *Shuf = Builder.CreateShuffleVector(
+ TmpVec, UndefValue::get(TmpVec->getType()),
+ ConstantVector::get(ShuffleMask), "rdx.shuf");
+
+ if (Op != Instruction::ICmp && Op != Instruction::FCmp)
+ // Floating point operations had to be 'fast' to enable the reduction.
+ TmpVec = addFastMathFlag(Builder.CreateBinOp(
+ (Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"));
+ else
+ TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind,
+ TmpVec, Shuf);
+ }
+
+ // The result is in the first element of the vector.
+ ReducedPartRdx =
+ Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
+
+ // If the reduction can be performed in a smaller type, we need to extend
+ // the reduction to the wider type before we branch to the original loop.
+ if (Phi->getType() != RdxDesc.getRecurrenceType())
+ ReducedPartRdx =
+ RdxDesc.isSigned()
+ ? Builder.CreateSExt(ReducedPartRdx, Phi->getType())
+ : Builder.CreateZExt(ReducedPartRdx, Phi->getType());
+ }
+
+ // Create a phi node that merges control-flow from the backedge-taken check
+ // block and the middle block.
+ PHINode *BCBlockPhi = PHINode::Create(Phi->getType(), 2, "bc.merge.rdx",
+ LoopScalarPreHeader->getTerminator());
+ for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]);
+ BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
+
+ // Now, we need to fix the users of the reduction variable
+ // inside and outside of the scalar remainder loop.
+ // We know that the loop is in LCSSA form. We need to update the
+ // PHI nodes in the exit blocks.
+ for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+ LEE = LoopExitBlock->end();
+ LEI != LEE; ++LEI) {
+ PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+ if (!LCSSAPhi)
+ break;
+
+ // All PHINodes need to have a single entry edge, or two if
+ // we already fixed them.
+ assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
+
+ // We found a reduction value exit-PHI. Update it with the
+ // incoming bypass edge.
+ if (LCSSAPhi->getIncomingValue(0) == LoopExitInst)
+ LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
+ } // end of the LCSSA phi scan.
+
+ // Fix the scalar loop reduction variable with the incoming reduction sum
+ // from the vector body and from the backedge value.
+ int IncomingEdgeBlockIdx =
+ Phi->getBasicBlockIndex(OrigLoop->getLoopLatch());
+ assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
+ // Pick the other block.
+ int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
+ Phi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
+ Phi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
+}
+
void InnerLoopVectorizer::fixLCSSAPHIs() {
for (Instruction &LEI : *LoopExitBlock) {
auto *LCSSAPhi = dyn_cast<PHINode>(&LEI);
@@ -4296,7 +4345,8 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
}
}
-void InnerLoopVectorizer::collectTriviallyDeadInstructions() {
+void InnerLoopVectorizer::collectTriviallyDeadInstructions(
+ SmallPtrSetImpl<Instruction *> &DeadInstructions) {
BasicBlock *Latch = OrigLoop->getLoopLatch();
// We create new control-flow for the vectorized loop, so the original
@@ -4563,9 +4613,12 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
}
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
- unsigned VF, PhiVector *PV) {
+ unsigned VF) {
PHINode *P = cast<PHINode>(PN);
- // Handle recurrences.
+ // In order to support recurrences we need to be able to vectorize Phi nodes.
+ // Phi nodes have cycles, so we need to vectorize them in two stages. This is
+ // stage #1: We create a new vector PHI node with no incoming edges. We'll use
+ // this value when we vectorize all of the instructions that use the PHI.
if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
VectorParts Entry(UF);
for (unsigned part = 0; part < UF; ++part) {
@@ -4576,7 +4629,6 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
}
VectorLoopValueMap.initVector(P, Entry);
- PV->push_back(P);
return;
}
@@ -4631,7 +4683,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
case InductionDescriptor::IK_NoInduction:
llvm_unreachable("Unknown induction");
case InductionDescriptor::IK_IntInduction:
- return widenIntInduction(P);
+ case InductionDescriptor::IK_FpInduction:
+ return widenIntOrFpInduction(P);
case InductionDescriptor::IK_PtrInduction: {
// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");
@@ -4641,7 +4694,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
- unsigned Lanes = Legal->isUniformAfterVectorization(P) ? 1 : VF;
+ unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
// These are the scalar results. Notice that we don't generate vector GEPs
// because scalar GEPs result in better code.
ScalarParts Entry(UF);
@@ -4658,30 +4711,6 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
VectorLoopValueMap.initScalar(P, Entry);
return;
}
- case InductionDescriptor::IK_FpInduction: {
- assert(P->getType() == II.getStartValue()->getType() &&
- "Types must match");
- // Handle other induction variables that are now based on the
- // canonical one.
- assert(P != OldInduction && "Primary induction can be integer only");
-
- Value *V = Builder.CreateCast(Instruction::SIToFP, Induction, P->getType());
- V = II.transform(Builder, V, PSE.getSE(), DL);
- V->setName("fp.offset.idx");
-
- // Now we have scalar op: %fp.offset.idx = StartVal +/- Induction*StepVal
-
- Value *Broadcasted = getBroadcastInstrs(V);
- // After broadcasting the induction variable we need to make the vector
- // consecutive by adding StepVal*0, StepVal*1, StepVal*2, etc.
- Value *StepVal = cast<SCEVUnknown>(II.getStep())->getValue();
- VectorParts Entry(UF);
- for (unsigned part = 0; part < UF; ++part)
- Entry[part] = getStepVector(Broadcasted, VF * part, StepVal,
- II.getInductionOpcode());
- VectorLoopValueMap.initVector(P, Entry);
- return;
- }
}
}
@@ -4703,269 +4732,323 @@ static bool mayDivideByZero(Instruction &I) {
return !CInt || CInt->isZero();
}
-void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
- // For each instruction in the old loop.
- for (Instruction &I : *BB) {
-
- // If the instruction will become trivially dead when vectorized, we don't
- // need to generate it.
- if (DeadInstructions.count(&I))
- continue;
+void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
+ // Scalarize instructions that should remain scalar after vectorization.
+ if (VF > 1 &&
+ !(isa<BranchInst>(&I) || isa<PHINode>(&I) || isa<DbgInfoIntrinsic>(&I)) &&
+ shouldScalarizeInstruction(&I)) {
+ scalarizeInstruction(&I, Legal->isScalarWithPredication(&I));
+ return;
+ }
- // Scalarize instructions that should remain scalar after vectorization.
- if (VF > 1 &&
- !(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
- isa<DbgInfoIntrinsic>(&I)) &&
- shouldScalarizeInstruction(&I)) {
- scalarizeInstruction(&I, Legal->isScalarWithPredication(&I));
- continue;
- }
+ switch (I.getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ break;
+ case Instruction::PHI: {
+ // Vectorize PHINodes.
+ widenPHIInstruction(&I, UF, VF);
+ break;
+ } // End of PHI.
+ case Instruction::GetElementPtr: {
+ // Construct a vector GEP by widening the operands of the scalar GEP as
+ // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
+ // results in a vector of pointers when at least one operand of the GEP
+ // is vector-typed. Thus, to keep the representation compact, we only use
+ // vector-typed operands for loop-varying values.
+ auto *GEP = cast<GetElementPtrInst>(&I);
+ VectorParts Entry(UF);
- switch (I.getOpcode()) {
- case Instruction::Br:
- // Nothing to do for PHIs and BR, since we already took care of the
- // loop control flow instructions.
- continue;
- case Instruction::PHI: {
- // Vectorize PHINodes.
- widenPHIInstruction(&I, UF, VF, PV);
- continue;
- } // End of PHI.
-
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- // Scalarize with predication if this instruction may divide by zero and
- // block execution is conditional, otherwise fallthrough.
- if (Legal->isScalarWithPredication(&I)) {
- scalarizeInstruction(&I, true);
- continue;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // Just widen binops.
- auto *BinOp = cast<BinaryOperator>(&I);
- setDebugLocFromInst(Builder, BinOp);
- const VectorParts &A = getVectorValue(BinOp->getOperand(0));
- const VectorParts &B = getVectorValue(BinOp->getOperand(1));
-
- // Use this vector value for all users of the original instruction.
- VectorParts Entry(UF);
+ if (VF > 1 && OrigLoop->hasLoopInvariantOperands(GEP)) {
+ // If we are vectorizing, but the GEP has only loop-invariant operands,
+ // the GEP we build (by only using vector-typed operands for
+ // loop-varying values) would be a scalar pointer. Thus, to ensure we
+ // produce a vector of pointers, we need to either arbitrarily pick an
+ // operand to broadcast, or broadcast a clone of the original GEP.
+ // Here, we broadcast a clone of the original.
+ //
+ // TODO: If at some point we decide to scalarize instructions having
+ // loop-invariant operands, this special case will no longer be
+ // required. We would add the scalarization decision to
+ // collectLoopScalars() and teach getVectorValue() to broadcast
+ // the lane-zero scalar value.
+ auto *Clone = Builder.Insert(GEP->clone());
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = Builder.CreateVectorSplat(VF, Clone);
+ } else {
+ // If the GEP has at least one loop-varying operand, we are sure to
+ // produce a vector of pointers. But if we are only unrolling, we want
+ // to produce a scalar GEP for each unroll part. Thus, the GEP we
+ // produce with the code below will be scalar (if VF == 1) or vector
+ // (otherwise). Note that for the unroll-only case, we still maintain
+ // values in the vector mapping with initVector, as we do for other
+ // instructions.
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
- if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
- VecOp->copyIRFlags(BinOp);
+ // The pointer operand of the new GEP. If it's loop-invariant, we
+ // won't broadcast it.
+ auto *Ptr = OrigLoop->isLoopInvariant(GEP->getPointerOperand())
+ ? GEP->getPointerOperand()
+ : getVectorValue(GEP->getPointerOperand())[Part];
+
+ // Collect all the indices for the new GEP. If any index is
+ // loop-invariant, we won't broadcast it.
+ SmallVector<Value *, 4> Indices;
+ for (auto &U : make_range(GEP->idx_begin(), GEP->idx_end())) {
+ if (OrigLoop->isLoopInvariant(U.get()))
+ Indices.push_back(U.get());
+ else
+ Indices.push_back(getVectorValue(U.get())[Part]);
+ }
- Entry[Part] = V;
+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+ // but it should be a vector, otherwise.
+ auto *NewGEP = GEP->isInBounds()
+ ? Builder.CreateInBoundsGEP(Ptr, Indices)
+ : Builder.CreateGEP(Ptr, Indices);
+ assert((VF == 1 || NewGEP->getType()->isVectorTy()) &&
+ "NewGEP is not a pointer vector");
+ Entry[Part] = NewGEP;
}
+ }
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, BinOp);
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, GEP);
+ break;
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // Scalarize with predication if this instruction may divide by zero and
+ // block execution is conditional, otherwise fallthrough.
+ if (Legal->isScalarWithPredication(&I)) {
+ scalarizeInstruction(&I, true);
break;
}
- case Instruction::Select: {
- // Widen selects.
- // If the selector is loop invariant we can create a select
- // instruction with a scalar condition. Otherwise, use vector-select.
- auto *SE = PSE.getSE();
- bool InvariantCond =
- SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop);
- setDebugLocFromInst(Builder, &I);
-
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
- const VectorParts &Cond = getVectorValue(I.getOperand(0));
- const VectorParts &Op0 = getVectorValue(I.getOperand(1));
- const VectorParts &Op1 = getVectorValue(I.getOperand(2));
-
- auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0);
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen binops.
+ auto *BinOp = cast<BinaryOperator>(&I);
+ setDebugLocFromInst(Builder, BinOp);
+ const VectorParts &A = getVectorValue(BinOp->getOperand(0));
+ const VectorParts &B = getVectorValue(BinOp->getOperand(1));
+
+ // Use this vector value for all users of the original instruction.
+ VectorParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
+
+ if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
+ VecOp->copyIRFlags(BinOp);
+
+ Entry[Part] = V;
+ }
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part] = Builder.CreateSelect(
- InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
- }
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, BinOp);
+ break;
+ }
+ case Instruction::Select: {
+ // Widen selects.
+ // If the selector is loop invariant we can create a select
+ // instruction with a scalar condition. Otherwise, use vector-select.
+ auto *SE = PSE.getSE();
+ bool InvariantCond =
+ SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop);
+ setDebugLocFromInst(Builder, &I);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ const VectorParts &Cond = getVectorValue(I.getOperand(0));
+ const VectorParts &Op0 = getVectorValue(I.getOperand(1));
+ const VectorParts &Op1 = getVectorValue(I.getOperand(2));
+
+ auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0);
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
- break;
+ VectorParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Entry[Part] = Builder.CreateSelect(
+ InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
}
- case Instruction::ICmp:
- case Instruction::FCmp: {
- // Widen compares. Generate vector compares.
- bool FCmp = (I.getOpcode() == Instruction::FCmp);
- auto *Cmp = dyn_cast<CmpInst>(&I);
- setDebugLocFromInst(Builder, Cmp);
- const VectorParts &A = getVectorValue(Cmp->getOperand(0));
- const VectorParts &B = getVectorValue(Cmp->getOperand(1));
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *C = nullptr;
- if (FCmp) {
- C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
- cast<FCmpInst>(C)->copyFastMathFlags(Cmp);
- } else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
- }
- Entry[Part] = C;
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
+ break;
+ }
+
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (I.getOpcode() == Instruction::FCmp);
+ auto *Cmp = dyn_cast<CmpInst>(&I);
+ setDebugLocFromInst(Builder, Cmp);
+ const VectorParts &A = getVectorValue(Cmp->getOperand(0));
+ const VectorParts &B = getVectorValue(Cmp->getOperand(1));
+ VectorParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ Value *C = nullptr;
+ if (FCmp) {
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
+ cast<FCmpInst>(C)->copyFastMathFlags(Cmp);
+ } else {
+ C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
}
+ Entry[Part] = C;
+ }
+
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
+ break;
+ }
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
+ case Instruction::Store:
+ case Instruction::Load:
+ vectorizeMemoryInstruction(&I);
+ break;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ auto *CI = dyn_cast<CastInst>(&I);
+ setDebugLocFromInst(Builder, CI);
+
+ // Optimize the special case where the source is a constant integer
+ // induction variable. Notice that we can only optimize the 'trunc' case
+ // because (a) FP conversions lose precision, (b) sext/zext may wrap, and
+ // (c) other casts depend on pointer size.
+ if (Cost->isOptimizableIVTruncate(CI, VF)) {
+ widenIntOrFpInduction(cast<PHINode>(CI->getOperand(0)),
+ cast<TruncInst>(CI));
break;
}
- case Instruction::Store:
- case Instruction::Load:
- vectorizeMemoryInstruction(&I);
+ /// Vectorize casts.
+ Type *DestTy =
+ (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
+
+ const VectorParts &A = getVectorValue(CI->getOperand(0));
+ VectorParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part)
+ Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
+ break;
+ }
+
+ case Instruction::Call: {
+ // Ignore dbg intrinsics.
+ if (isa<DbgInfoIntrinsic>(I))
break;
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- auto *CI = dyn_cast<CastInst>(&I);
- setDebugLocFromInst(Builder, CI);
-
- // Optimize the special case where the source is a constant integer
- // induction variable. Notice that we can only optimize the 'trunc' case
- // because (a) FP conversions lose precision, (b) sext/zext may wrap, and
- // (c) other casts depend on pointer size.
- auto ID = Legal->getInductionVars()->lookup(OldInduction);
- if (isa<TruncInst>(CI) && CI->getOperand(0) == OldInduction &&
- ID.getConstIntStepValue()) {
- widenIntInduction(OldInduction, cast<TruncInst>(CI));
- break;
- }
+ setDebugLocFromInst(Builder, &I);
- /// Vectorize casts.
- Type *DestTy =
- (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
+ Module *M = I.getParent()->getParent()->getParent();
+ auto *CI = cast<CallInst>(&I);
- const VectorParts &A = getVectorValue(CI->getOperand(0));
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part)
- Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
+ StringRef FnName = CI->getCalledFunction()->getName();
+ Function *F = CI->getCalledFunction();
+ Type *RetTy = ToVectorTy(CI->getType(), VF);
+ SmallVector<Type *, 4> Tys;
+ for (Value *ArgOperand : CI->arg_operands())
+ Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
+
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
+ if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
+ ID == Intrinsic::lifetime_start)) {
+ scalarizeInstruction(&I);
+ break;
+ }
+ // The flag shows whether we use Intrinsic or a usual Call for vectorized
+ // version of the instruction.
+ // Is it beneficial to perform intrinsic call compared to lib call?
+ bool NeedToScalarize;
+ unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
+ bool UseVectorIntrinsic =
+ ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
+ if (!UseVectorIntrinsic && NeedToScalarize) {
+ scalarizeInstruction(&I);
break;
}
- case Instruction::Call: {
- // Ignore dbg intrinsics.
- if (isa<DbgInfoIntrinsic>(I))
- break;
- setDebugLocFromInst(Builder, &I);
-
- Module *M = BB->getParent()->getParent();
- auto *CI = cast<CallInst>(&I);
-
- StringRef FnName = CI->getCalledFunction()->getName();
- Function *F = CI->getCalledFunction();
- Type *RetTy = ToVectorTy(CI->getType(), VF);
- SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI->arg_operands())
- Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
-
- Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
- ID == Intrinsic::lifetime_start)) {
- scalarizeInstruction(&I);
- break;
- }
- // The flag shows whether we use Intrinsic or a usual Call for vectorized
- // version of the instruction.
- // Is it beneficial to perform intrinsic call compared to lib call?
- bool NeedToScalarize;
- unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
- bool UseVectorIntrinsic =
- ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
- if (!UseVectorIntrinsic && NeedToScalarize) {
- scalarizeInstruction(&I);
- break;
- }
-
- VectorParts Entry(UF);
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value *, 4> Args;
- for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- Value *Arg = CI->getArgOperand(i);
- // Some intrinsics have a scalar argument - don't replace it with a
- // vector.
- if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
- const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
- Arg = VectorArg[Part];
- }
- Args.push_back(Arg);
+ VectorParts Entry(UF);
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ SmallVector<Value *, 4> Args;
+ for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ Value *Arg = CI->getArgOperand(i);
+ // Some intrinsics have a scalar argument - don't replace it with a
+ // vector.
+ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
+ const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
+ Arg = VectorArg[Part];
}
+ Args.push_back(Arg);
+ }
- Function *VectorF;
- if (UseVectorIntrinsic) {
- // Use vector version of the intrinsic.
- Type *TysForDecl[] = {CI->getType()};
- if (VF > 1)
- TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
- VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
- } else {
- // Use vector version of the library call.
- StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
- assert(!VFnName.empty() && "Vector function name is empty.");
- VectorF = M->getFunction(VFnName);
- if (!VectorF) {
- // Generate a declaration
- FunctionType *FTy = FunctionType::get(RetTy, Tys, false);
- VectorF =
- Function::Create(FTy, Function::ExternalLinkage, VFnName, M);
- VectorF->copyAttributesFrom(F);
- }
+ Function *VectorF;
+ if (UseVectorIntrinsic) {
+ // Use vector version of the intrinsic.
+ Type *TysForDecl[] = {CI->getType()};
+ if (VF > 1)
+ TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+ VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
+ } else {
+ // Use vector version of the library call.
+ StringRef VFnName = TLI->getVectorizedFunction(FnName, VF);
+ assert(!VFnName.empty() && "Vector function name is empty.");
+ VectorF = M->getFunction(VFnName);
+ if (!VectorF) {
+ // Generate a declaration
+ FunctionType *FTy = FunctionType::get(RetTy, Tys, false);
+ VectorF =
+ Function::Create(FTy, Function::ExternalLinkage, VFnName, M);
+ VectorF->copyAttributesFrom(F);
}
- assert(VectorF && "Can't create vector function.");
-
- SmallVector<OperandBundleDef, 1> OpBundles;
- CI->getOperandBundlesAsDefs(OpBundles);
- CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles);
+ }
+ assert(VectorF && "Can't create vector function.");
- if (isa<FPMathOperator>(V))
- V->copyFastMathFlags(CI);
+ SmallVector<OperandBundleDef, 1> OpBundles;
+ CI->getOperandBundlesAsDefs(OpBundles);
+ CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles);
- Entry[Part] = V;
- }
+ if (isa<FPMathOperator>(V))
+ V->copyFastMathFlags(CI);
- VectorLoopValueMap.initVector(&I, Entry);
- addMetadata(Entry, &I);
- break;
+ Entry[Part] = V;
}
- default:
- // All other instructions are unsupported. Scalarize them.
- scalarizeInstruction(&I);
- break;
- } // end of switch.
- } // end of for_each instr.
+ VectorLoopValueMap.initVector(&I, Entry);
+ addMetadata(Entry, &I);
+ break;
+ }
+
+ default:
+ // All other instructions are unsupported. Scalarize them.
+ scalarizeInstruction(&I);
+ break;
+ } // end of switch.
}
void InnerLoopVectorizer::updateAnalysis() {
@@ -4976,11 +5059,10 @@ void InnerLoopVectorizer::updateAnalysis() {
assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
"Entry does not dominate exit.");
- // We don't predicate stores by this point, so the vector body should be a
- // single loop.
- DT->addNewBlock(LoopVectorBody, LoopVectorPreHeader);
-
- DT->addNewBlock(LoopMiddleBlock, LoopVectorBody);
+ DT->addNewBlock(LI->getLoopFor(LoopVectorBody)->getHeader(),
+ LoopVectorPreHeader);
+ DT->addNewBlock(LoopMiddleBlock,
+ LI->getLoopFor(LoopVectorBody)->getLoopLatch());
DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
@@ -5145,12 +5227,6 @@ bool LoopVectorizationLegality::canVectorize() {
if (UseInterleaved)
InterleaveInfo.analyzeInterleaving(*getSymbolicStrides());
- // Collect all instructions that are known to be uniform after vectorization.
- collectLoopUniforms();
-
- // Collect all instructions that are known to be scalar after vectorization.
- collectLoopScalars();
-
unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
@@ -5234,8 +5310,8 @@ void LoopVectorizationLegality::addInductionPhi(
// one if there are multiple (no good reason for doing this other
// than it is expedient). We've checked that it begins at zero and
// steps by one, so this is a canonical induction variable.
- if (!Induction || PhiTy == WidestIndTy)
- Induction = Phi;
+ if (!PrimaryInduction || PhiTy == WidestIndTy)
+ PrimaryInduction = Phi;
}
// Both the PHI node itself, and the "post-increment" value feeding
@@ -5398,7 +5474,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
} // next instr.
}
- if (!Induction) {
+ if (!PrimaryInduction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
if (Inductions.empty()) {
ORE->emit(createMissedAnalysis("NoInductionVariable")
@@ -5410,46 +5486,166 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Now we know the widest induction type, check if our found induction
// is the same size. If it's not, unset it here and InnerLoopVectorizer
// will create another.
- if (Induction && WidestIndTy != Induction->getType())
- Induction = nullptr;
+ if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
+ PrimaryInduction = nullptr;
return true;
}
-void LoopVectorizationLegality::collectLoopScalars() {
+void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
+
+ // We should not collect Scalars more than once per VF. Right now, this
+ // function is called from collectUniformsAndScalars(), which already does
+ // this check. Collecting Scalars for VF=1 does not make any sense.
+ assert(VF >= 2 && !Scalars.count(VF) &&
+ "This function should not be visited twice for the same VF");
+
+ SmallSetVector<Instruction *, 8> Worklist;
+
+ // These sets are used to seed the analysis with pointers used by memory
+ // accesses that will remain scalar.
+ SmallSetVector<Instruction *, 8> ScalarPtrs;
+ SmallPtrSet<Instruction *, 8> PossibleNonScalarPtrs;
+
+ // A helper that returns true if the use of Ptr by MemAccess will be scalar.
+ // The pointer operands of loads and stores will be scalar as long as the
+ // memory access is not a gather or scatter operation. The value operand of a
+ // store will remain scalar if the store is scalarized.
+ auto isScalarUse = [&](Instruction *MemAccess, Value *Ptr) {
+ InstWidening WideningDecision = getWideningDecision(MemAccess, VF);
+ assert(WideningDecision != CM_Unknown &&
+ "Widening decision should be ready at this moment");
+ if (auto *Store = dyn_cast<StoreInst>(MemAccess))
+ if (Ptr == Store->getValueOperand())
+ return WideningDecision == CM_Scalarize;
+ assert(Ptr == getPointerOperand(MemAccess) &&
+ "Ptr is neither a value or pointer operand");
+ return WideningDecision != CM_GatherScatter;
+ };
+
+ // A helper that returns true if the given value is a bitcast or
+ // getelementptr instruction contained in the loop.
+ auto isLoopVaryingBitCastOrGEP = [&](Value *V) {
+ return ((isa<BitCastInst>(V) && V->getType()->isPointerTy()) ||
+ isa<GetElementPtrInst>(V)) &&
+ !TheLoop->isLoopInvariant(V);
+ };
- // If an instruction is uniform after vectorization, it will remain scalar.
- Scalars.insert(Uniforms.begin(), Uniforms.end());
+ // A helper that evaluates a memory access's use of a pointer. If the use
+ // will be a scalar use, and the pointer is only used by memory accesses, we
+ // place the pointer in ScalarPtrs. Otherwise, the pointer is placed in
+ // PossibleNonScalarPtrs.
+ auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
- // Collect the getelementptr instructions that will not be vectorized. A
- // getelementptr instruction is only vectorized if it is used for a legal
- // gather or scatter operation.
+ // We only care about bitcast and getelementptr instructions contained in
+ // the loop.
+ if (!isLoopVaryingBitCastOrGEP(Ptr))
+ return;
+
+ // If the pointer has already been identified as scalar (e.g., if it was
+ // also identified as uniform), there's nothing to do.
+ auto *I = cast<Instruction>(Ptr);
+ if (Worklist.count(I))
+ return;
+
+ // If the use of the pointer will be a scalar use, and all users of the
+ // pointer are memory accesses, place the pointer in ScalarPtrs. Otherwise,
+ // place the pointer in PossibleNonScalarPtrs.
+ if (isScalarUse(MemAccess, Ptr) && all_of(I->users(), [&](User *U) {
+ return isa<LoadInst>(U) || isa<StoreInst>(U);
+ }))
+ ScalarPtrs.insert(I);
+ else
+ PossibleNonScalarPtrs.insert(I);
+ };
+
+ // We seed the scalars analysis with three classes of instructions: (1)
+ // instructions marked uniform-after-vectorization, (2) bitcast and
+ // getelementptr instructions used by memory accesses requiring a scalar use,
+ // and (3) pointer induction variables and their update instructions (we
+ // currently only scalarize these).
+ //
+ // (1) Add to the worklist all instructions that have been identified as
+ // uniform-after-vectorization.
+ Worklist.insert(Uniforms[VF].begin(), Uniforms[VF].end());
+
+ // (2) Add to the worklist all bitcast and getelementptr instructions used by
+ // memory accesses requiring a scalar use. The pointer operands of loads and
+ // stores will be scalar as long as the memory accesses is not a gather or
+ // scatter operation. The value operand of a store will remain scalar if the
+ // store is scalarized.
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
- if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- Scalars.insert(GEP);
- continue;
+ if (auto *Load = dyn_cast<LoadInst>(&I)) {
+ evaluatePtrUse(Load, Load->getPointerOperand());
+ } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
+ evaluatePtrUse(Store, Store->getPointerOperand());
+ evaluatePtrUse(Store, Store->getValueOperand());
}
- auto *Ptr = getPointerOperand(&I);
- if (!Ptr)
- continue;
- auto *GEP = getGEPInstruction(Ptr);
- if (GEP && isLegalGatherOrScatter(&I))
- Scalars.erase(GEP);
+ }
+ for (auto *I : ScalarPtrs)
+ if (!PossibleNonScalarPtrs.count(I)) {
+ DEBUG(dbgs() << "LV: Found scalar instruction: " << *I << "\n");
+ Worklist.insert(I);
}
+ // (3) Add to the worklist all pointer induction variables and their update
+ // instructions.
+ //
+ // TODO: Once we are able to vectorize pointer induction variables we should
+ // no longer insert them into the worklist here.
+ auto *Latch = TheLoop->getLoopLatch();
+ for (auto &Induction : *Legal->getInductionVars()) {
+ auto *Ind = Induction.first;
+ auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+ if (Induction.second.getKind() != InductionDescriptor::IK_PtrInduction)
+ continue;
+ Worklist.insert(Ind);
+ Worklist.insert(IndUpdate);
+ DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n");
+ DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n");
+ }
+
+ // Expand the worklist by looking through any bitcasts and getelementptr
+ // instructions we've already identified as scalar. This is similar to the
+ // expansion step in collectLoopUniforms(); however, here we're only
+ // expanding to include additional bitcasts and getelementptr instructions.
+ unsigned Idx = 0;
+ while (Idx != Worklist.size()) {
+ Instruction *Dst = Worklist[Idx++];
+ if (!isLoopVaryingBitCastOrGEP(Dst->getOperand(0)))
+ continue;
+ auto *Src = cast<Instruction>(Dst->getOperand(0));
+ if (all_of(Src->users(), [&](User *U) -> bool {
+ auto *J = cast<Instruction>(U);
+ return !TheLoop->contains(J) || Worklist.count(J) ||
+ ((isa<LoadInst>(J) || isa<StoreInst>(J)) &&
+ isScalarUse(J, Src));
+ })) {
+ Worklist.insert(Src);
+ DEBUG(dbgs() << "LV: Found scalar instruction: " << *Src << "\n");
+ }
+ }
+
// An induction variable will remain scalar if all users of the induction
// variable and induction variable update remain scalar.
- auto *Latch = TheLoop->getLoopLatch();
- for (auto &Induction : *getInductionVars()) {
+ for (auto &Induction : *Legal->getInductionVars()) {
auto *Ind = Induction.first;
auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+ // We already considered pointer induction variables, so there's no reason
+ // to look at their users again.
+ //
+ // TODO: Once we are able to vectorize pointer induction variables we
+ // should no longer skip over them here.
+ if (Induction.second.getKind() == InductionDescriptor::IK_PtrInduction)
+ continue;
+
// Determine if all users of the induction variable are scalar after
// vectorization.
auto ScalarInd = all_of(Ind->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
- return I == IndUpdate || !TheLoop->contains(I) || Scalars.count(I);
+ return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I);
});
if (!ScalarInd)
continue;
@@ -5458,23 +5654,19 @@ void LoopVectorizationLegality::collectLoopScalars() {
// scalar after vectorization.
auto ScalarIndUpdate = all_of(IndUpdate->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
- return I == Ind || !TheLoop->contains(I) || Scalars.count(I);
+ return I == Ind || !TheLoop->contains(I) || Worklist.count(I);
});
if (!ScalarIndUpdate)
continue;
// The induction variable and its update instruction will remain scalar.
- Scalars.insert(Ind);
- Scalars.insert(IndUpdate);
+ Worklist.insert(Ind);
+ Worklist.insert(IndUpdate);
+ DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n");
+ DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n");
}
-}
-bool LoopVectorizationLegality::hasConsecutiveLikePtrOperand(Instruction *I) {
- if (isAccessInterleaved(I))
- return true;
- if (auto *Ptr = getPointerOperand(I))
- return isConsecutivePtr(Ptr);
- return false;
+ Scalars[VF].insert(Worklist.begin(), Worklist.end());
}
bool LoopVectorizationLegality::isScalarWithPredication(Instruction *I) {
@@ -5494,48 +5686,48 @@ bool LoopVectorizationLegality::isScalarWithPredication(Instruction *I) {
return false;
}
-bool LoopVectorizationLegality::memoryInstructionMustBeScalarized(
- Instruction *I, unsigned VF) {
-
- // If the memory instruction is in an interleaved group, it will be
- // vectorized and its pointer will remain uniform.
- if (isAccessInterleaved(I))
- return false;
-
+bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I,
+ unsigned VF) {
// Get and ensure we have a valid memory instruction.
LoadInst *LI = dyn_cast<LoadInst>(I);
StoreInst *SI = dyn_cast<StoreInst>(I);
assert((LI || SI) && "Invalid memory instruction");
- // If the pointer operand is uniform (loop invariant), the memory instruction
- // will be scalarized.
auto *Ptr = getPointerOperand(I);
- if (LI && isUniform(Ptr))
- return true;
- // If the pointer operand is non-consecutive and neither a gather nor a
- // scatter operation is legal, the memory instruction will be scalarized.
- if (!isConsecutivePtr(Ptr) && !isLegalGatherOrScatter(I))
- return true;
+ // In order to be widened, the pointer should be consecutive, first of all.
+ if (!isConsecutivePtr(Ptr))
+ return false;
// If the instruction is a store located in a predicated block, it will be
// scalarized.
if (isScalarWithPredication(I))
- return true;
+ return false;
// If the instruction's allocated size doesn't equal it's type size, it
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
if (hasIrregularType(ScalarTy, DL, VF))
- return true;
+ return false;
- // Otherwise, the memory instruction should be vectorized if the rest of the
- // loop is.
- return false;
+ return true;
}
-void LoopVectorizationLegality::collectLoopUniforms() {
+void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
+
+ // We should not collect Uniforms more than once per VF. Right now,
+ // this function is called from collectUniformsAndScalars(), which
+ // already does this check. Collecting Uniforms for VF=1 does not make any
+ // sense.
+
+ assert(VF >= 2 && !Uniforms.count(VF) &&
+ "This function should not be visited twice for the same VF");
+
+ // Visit the list of Uniforms. If we'll not find any uniform value, we'll
+ // not analyze again. Uniforms.count(VF) will return 1.
+ Uniforms[VF].clear();
+
// We now know that the loop is vectorizable!
// Collect instructions inside the loop that will remain uniform after
// vectorization.
@@ -5568,6 +5760,14 @@ void LoopVectorizationLegality::collectLoopUniforms() {
// Holds pointer operands of instructions that are possibly non-uniform.
SmallPtrSet<Instruction *, 8> PossibleNonUniformPtrs;
+ auto isUniformDecision = [&](Instruction *I, unsigned VF) {
+ InstWidening WideningDecision = getWideningDecision(I, VF);
+ assert(WideningDecision != CM_Unknown &&
+ "Widening decision should be ready at this moment");
+
+ return (WideningDecision == CM_Widen ||
+ WideningDecision == CM_Interleave);
+ };
// Iterate over the instructions in the loop, and collect all
// consecutive-like pointer operands in ConsecutiveLikePtrs. If it's possible
// that a consecutive-like pointer operand will be scalarized, we collect it
@@ -5590,25 +5790,18 @@ void LoopVectorizationLegality::collectLoopUniforms() {
return getPointerOperand(U) == Ptr;
});
- // Ensure the memory instruction will not be scalarized, making its
- // pointer operand non-uniform. If the pointer operand is used by some
- // instruction other than a memory access, we're not going to check if
- // that other instruction may be scalarized here. Thus, conservatively
- // assume the pointer operand may be non-uniform.
- if (!UsersAreMemAccesses || memoryInstructionMustBeScalarized(&I))
+ // Ensure the memory instruction will not be scalarized or used by
+ // gather/scatter, making its pointer operand non-uniform. If the pointer
+ // operand is used by any instruction other than a memory access, we
+ // conservatively assume the pointer operand may be non-uniform.
+ if (!UsersAreMemAccesses || !isUniformDecision(&I, VF))
PossibleNonUniformPtrs.insert(Ptr);
// If the memory instruction will be vectorized and its pointer operand
- // is consecutive-like, the pointer operand should remain uniform.
- else if (hasConsecutiveLikePtrOperand(&I))
- ConsecutiveLikePtrs.insert(Ptr);
-
- // Otherwise, if the memory instruction will be vectorized and its
- // pointer operand is non-consecutive-like, the memory instruction should
- // be a gather or scatter operation. Its pointer operand will be
- // non-uniform.
+ // is consecutive-like, or interleaving - the pointer operand should
+ // remain uniform.
else
- PossibleNonUniformPtrs.insert(Ptr);
+ ConsecutiveLikePtrs.insert(Ptr);
}
// Add to the Worklist all consecutive and consecutive-like pointers that
@@ -5632,7 +5825,9 @@ void LoopVectorizationLegality::collectLoopUniforms() {
continue;
auto *OI = cast<Instruction>(OV);
if (all_of(OI->users(), [&](User *U) -> bool {
- return isOutOfScope(U) || Worklist.count(cast<Instruction>(U));
+ auto *J = cast<Instruction>(U);
+ return !TheLoop->contains(J) || Worklist.count(J) ||
+ (OI == getPointerOperand(J) && isUniformDecision(J, VF));
})) {
Worklist.insert(OI);
DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n");
@@ -5643,7 +5838,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
// Returns true if Ptr is the pointer operand of a memory access instruction
// I, and I is known to not require scalarization.
auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
- return getPointerOperand(I) == Ptr && !memoryInstructionMustBeScalarized(I);
+ return getPointerOperand(I) == Ptr && isUniformDecision(I, VF);
};
// For an instruction to be added into Worklist above, all its users inside
@@ -5652,7 +5847,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
// nodes separately. An induction variable will remain uniform if all users
// of the induction variable and induction variable update remain uniform.
// The code below handles both pointer and non-pointer induction variables.
- for (auto &Induction : Inductions) {
+ for (auto &Induction : *Legal->getInductionVars()) {
auto *Ind = Induction.first;
auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
@@ -5683,7 +5878,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate << "\n");
}
- Uniforms.insert(Worklist.begin(), Worklist.end());
+ Uniforms[VF].insert(Worklist.begin(), Worklist.end());
}
bool LoopVectorizationLegality::canVectorizeMemory() {
@@ -5823,7 +6018,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
// An alignment of 0 means target ABI alignment.
- unsigned Align = LI ? LI->getAlignment() : SI->getAlignment();
+ unsigned Align = getMemInstAlignment(&I);
if (!Align)
Align = DL.getABITypeAlignment(PtrTy->getElementType());
@@ -5978,6 +6173,11 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
continue;
+ // Ignore A if the memory object of A and B don't belong to the same
+ // address space
+ if (getMemInstAddressSpace(A) != getMemInstAddressSpace(B))
+ continue;
+
// Calculate the distance from A to B.
const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
@@ -6020,36 +6220,36 @@ void InterleavedAccessInfo::analyzeInterleaving(
if (Group->getNumMembers() != Group->getFactor())
releaseGroup(Group);
- // Remove interleaved groups with gaps (currently only loads) whose memory
- // accesses may wrap around. We have to revisit the getPtrStride analysis,
- // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
+ // Remove interleaved groups with gaps (currently only loads) whose memory
+ // accesses may wrap around. We have to revisit the getPtrStride analysis,
+ // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
// not check wrapping (see documentation there).
- // FORNOW we use Assume=false;
- // TODO: Change to Assume=true but making sure we don't exceed the threshold
+ // FORNOW we use Assume=false;
+ // TODO: Change to Assume=true but making sure we don't exceed the threshold
// of runtime SCEV assumptions checks (thereby potentially failing to
- // vectorize altogether).
+ // vectorize altogether).
// Additional optional optimizations:
- // TODO: If we are peeling the loop and we know that the first pointer doesn't
+ // TODO: If we are peeling the loop and we know that the first pointer doesn't
// wrap then we can deduce that all pointers in the group don't wrap.
- // This means that we can forcefully peel the loop in order to only have to
- // check the first pointer for no-wrap. When we'll change to use Assume=true
+ // This means that we can forcefully peel the loop in order to only have to
+ // check the first pointer for no-wrap. When we'll change to use Assume=true
// we'll only need at most one runtime check per interleaved group.
//
for (InterleaveGroup *Group : LoadGroups) {
// Case 1: A full group. Can Skip the checks; For full groups, if the wide
- // load would wrap around the address space we would do a memory access at
- // nullptr even without the transformation.
- if (Group->getNumMembers() == Group->getFactor())
+ // load would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
continue;
- // Case 2: If first and last members of the group don't wrap this implies
+ // Case 2: If first and last members of the group don't wrap this implies
// that all the pointers in the group don't wrap.
// So we check only group member 0 (which is always guaranteed to exist),
- // and group member Factor - 1; If the latter doesn't exist we rely on
+ // and group member Factor - 1; If the latter doesn't exist we rely on
// peeling (if it is a non-reveresed accsess -- see Case 3).
Value *FirstMemberPtr = getPointerOperand(Group->getMember(0));
- if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
+ if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
/*ShouldCheckWrap=*/true)) {
DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
"first group member potentially pointer-wrapping.\n");
@@ -6065,8 +6265,7 @@ void InterleavedAccessInfo::analyzeInterleaving(
"last group member potentially pointer-wrapping.\n");
releaseGroup(Group);
}
- }
- else {
+ } else {
// Case 3: A non-reversed interleaved load group with gaps: We need
// to execute at least one scalar epilogue iteration. This will ensure
// we don't speculatively access memory out-of-bounds. We only need
@@ -6082,27 +6281,62 @@ void InterleavedAccessInfo::analyzeInterleaving(
}
}
-LoopVectorizationCostModel::VectorizationFactor
-LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
- // Width 1 means no vectorize
- VectorizationFactor Factor = {1U, 0U};
- if (OptForSize && Legal->getRuntimePointerChecking()->Need) {
+Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
+ if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
+ ORE->emit(createMissedAnalysis("ConditionalStore")
+ << "store that is conditionally executed prevents vectorization");
+ DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
+ return None;
+ }
+
+ if (!OptForSize) // Remaining checks deal with scalar loop when OptForSize.
+ return computeFeasibleMaxVF(OptForSize);
+
+ if (Legal->getRuntimePointerChecking()->Need) {
ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
<< "runtime pointer checks needed. Enable vectorization of this "
"loop with '#pragma clang loop vectorize(enable)' when "
"compiling with -Os/-Oz");
DEBUG(dbgs()
<< "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
- return Factor;
+ return None;
}
- if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
- ORE->emit(createMissedAnalysis("ConditionalStore")
- << "store that is conditionally executed prevents vectorization");
- DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
- return Factor;
+ // If we optimize the program for size, avoid creating the tail loop.
+ unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+ DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
+
+ // If we don't know the precise trip count, don't try to vectorize.
+ if (TC < 2) {
+ ORE->emit(
+ createMissedAnalysis("UnknownLoopCountComplexCFG")
+ << "unable to calculate the loop count due to complex control flow");
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
+ return None;
}
+ unsigned MaxVF = computeFeasibleMaxVF(OptForSize);
+
+ if (TC % MaxVF != 0) {
+ // If the trip count that we found modulo the vectorization factor is not
+ // zero then we require a tail.
+ // FIXME: look for a smaller MaxVF that does divide TC rather than give up.
+ // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a
+ // smaller MaxVF that does not require a scalar epilog.
+
+ ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
+ << "cannot optimize for size and vectorize at the "
+ "same time. Enable vectorization of this loop "
+ "with '#pragma clang loop vectorize(enable)' "
+ "when compiling with -Os/-Oz");
+ DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
+ return None;
+ }
+
+ return MaxVF;
+}
+
+unsigned LoopVectorizationCostModel::computeFeasibleMaxVF(bool OptForSize) {
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
unsigned SmallestType, WidestType;
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -6136,7 +6370,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements"
" into one vector!");
- unsigned VF = MaxVectorSize;
+ unsigned MaxVF = MaxVectorSize;
if (MaximizeBandwidth && !OptForSize) {
// Collect all viable vectorization factors.
SmallVector<unsigned, 8> VFs;
@@ -6152,54 +6386,16 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
for (int i = RUs.size() - 1; i >= 0; --i) {
if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
- VF = VFs[i];
+ MaxVF = VFs[i];
break;
}
}
}
+ return MaxVF;
+}
- // If we optimize the program for size, avoid creating the tail loop.
- if (OptForSize) {
- unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
- DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
-
- // If we don't know the precise trip count, don't try to vectorize.
- if (TC < 2) {
- ORE->emit(
- createMissedAnalysis("UnknownLoopCountComplexCFG")
- << "unable to calculate the loop count due to complex control flow");
- DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
- return Factor;
- }
-
- // Find the maximum SIMD width that can fit within the trip count.
- VF = TC % MaxVectorSize;
-
- if (VF == 0)
- VF = MaxVectorSize;
- else {
- // If the trip count that we found modulo the vectorization factor is not
- // zero then we require a tail.
- ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
- << "cannot optimize for size and vectorize at the "
- "same time. Enable vectorization of this loop "
- "with '#pragma clang loop vectorize(enable)' "
- "when compiling with -Os/-Oz");
- DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
- return Factor;
- }
- }
-
- int UserVF = Hints->getWidth();
- if (UserVF != 0) {
- assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
- DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
-
- Factor.Width = UserVF;
- collectInstsToScalarize(UserVF);
- return Factor;
- }
-
+LoopVectorizationCostModel::VectorizationFactor
+LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) {
float Cost = expectedCost(1).first;
#ifndef NDEBUG
const float ScalarCost = Cost;
@@ -6209,12 +6405,12 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
// Ignore scalar width, because the user explicitly wants vectorization.
- if (ForceVectorization && VF > 1) {
+ if (ForceVectorization && MaxVF > 1) {
Width = 2;
Cost = expectedCost(Width).first / (float)Width;
}
- for (unsigned i = 2; i <= VF; i *= 2) {
+ for (unsigned i = 2; i <= MaxVF; i *= 2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
// the vector elements.
@@ -6238,8 +6434,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
<< "LV: Vectorization seems to be not beneficial, "
<< "but was forced by a user.\n");
DEBUG(dbgs() << "LV: Selecting VF: " << Width << ".\n");
- Factor.Width = Width;
- Factor.Cost = Width * Cost;
+ VectorizationFactor Factor = {Width, (unsigned)(Width * Cost)};
return Factor;
}
@@ -6277,9 +6472,16 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() {
T = ST->getValueOperand()->getType();
// Ignore loaded pointer types and stored pointer types that are not
- // consecutive. However, we do want to take consecutive stores/loads of
- // pointer vectors into account.
- if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I))
+ // vectorizable.
+ //
+ // FIXME: The check here attempts to predict whether a load or store will
+ // be vectorized. We only know this for certain after a VF has
+ // been selected. Here, we assume that if an access can be
+ // vectorized, it will be. We should also look at extending this
+ // optimization to non-pointer types.
+ //
+ if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) &&
+ !Legal->isAccessInterleaved(&I) && !Legal->isLegalGatherOrScatter(&I))
continue;
MinWidth = std::min(MinWidth,
@@ -6562,12 +6764,13 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
continue;
}
-
+ collectUniformsAndScalars(VFs[j]);
// Count the number of live intervals.
unsigned RegUsage = 0;
for (auto Inst : OpenIntervals) {
// Skip ignored values for VF > 1.
- if (VecValuesToIgnore.count(Inst))
+ if (VecValuesToIgnore.count(Inst) ||
+ isScalarAfterVectorization(Inst, VFs[j]))
continue;
RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
}
@@ -6628,6 +6831,9 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
ScalarCostsTy ScalarCosts;
if (computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
+
+ // Remember that BB will remain after vectorization.
+ PredicatedBBsAfterVectorization.insert(BB);
}
}
}
@@ -6636,7 +6842,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
Instruction *PredInst, DenseMap<Instruction *, unsigned> &ScalarCosts,
unsigned VF) {
- assert(!Legal->isUniformAfterVectorization(PredInst) &&
+ assert(!isUniformAfterVectorization(PredInst, VF) &&
"Instruction marked uniform-after-vectorization will be predicated");
// Initialize the discount to zero, meaning that the scalar version and the
@@ -6657,7 +6863,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
// already be scalar to avoid traversing chains that are unlikely to be
// beneficial.
if (!I->hasOneUse() || PredInst->getParent() != I->getParent() ||
- Legal->isScalarAfterVectorization(I))
+ isScalarAfterVectorization(I, VF))
return false;
// If the instruction is scalar with predication, it will be analyzed
@@ -6677,7 +6883,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
// the lane zero values for uniforms rather than asserting.
for (Use &U : I->operands())
if (auto *J = dyn_cast<Instruction>(U.get()))
- if (Legal->isUniformAfterVectorization(J))
+ if (isUniformAfterVectorization(J, VF))
return false;
// Otherwise, we can scalarize the instruction.
@@ -6690,7 +6896,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
// and their return values are inserted into vectors. Thus, an extract would
// still be required.
auto needsExtract = [&](Instruction *I) -> bool {
- return TheLoop->contains(I) && !Legal->isScalarAfterVectorization(I);
+ return TheLoop->contains(I) && !isScalarAfterVectorization(I, VF);
};
// Compute the expected cost discount from scalarizing the entire expression
@@ -6717,8 +6923,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
// Compute the scalarization overhead of needed insertelement instructions
// and phi nodes.
if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
- ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true,
- false, TTI);
+ ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF),
+ true, false);
ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
}
@@ -6733,8 +6939,8 @@ int LoopVectorizationCostModel::computePredInstDiscount(
if (canBeScalarized(J))
Worklist.push_back(J);
else if (needsExtract(J))
- ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF),
- false, true, TTI);
+ ScalarCost += TTI.getScalarizationOverhead(
+ ToVectorTy(J->getType(),VF), false, true);
}
// Scale the total scalar cost by block probability.
@@ -6753,6 +6959,9 @@ LoopVectorizationCostModel::VectorizationCostTy
LoopVectorizationCostModel::expectedCost(unsigned VF) {
VectorizationCostTy Cost;
+ // Collect Uniform and Scalar instructions after vectorization with VF.
+ collectUniformsAndScalars(VF);
+
// Collect the instructions (and their associated costs) that will be more
// profitable to scalarize.
collectInstsToScalarize(VF);
@@ -6832,11 +7041,141 @@ static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
Legal->hasStride(I->getOperand(1));
}
+unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
+ unsigned VF) {
+ Type *ValTy = getMemInstValueType(I);
+ auto SE = PSE.getSE();
+
+ unsigned Alignment = getMemInstAlignment(I);
+ unsigned AS = getMemInstAddressSpace(I);
+ Value *Ptr = getPointerOperand(I);
+ Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
+
+ // Figure out whether the access is strided and get the stride value
+ // if it's known in compile time
+ const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
+
+ // Get the cost of the scalar memory instruction and address computation.
+ unsigned Cost = VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
+
+ Cost += VF *
+ TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment,
+ AS, I);
+
+ // Get the overhead of the extractelement and insertelement instructions
+ // we might create due to scalarization.
+ Cost += getScalarizationOverhead(I, VF, TTI);
+
+ // If we have a predicated store, it may not be executed for each vector
+ // lane. Scale the cost by the probability of executing the predicated
+ // block.
+ if (Legal->isScalarWithPredication(I))
+ Cost /= getReciprocalPredBlockProb();
+
+ return Cost;
+}
+
+unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
+ unsigned VF) {
+ Type *ValTy = getMemInstValueType(I);
+ Type *VectorTy = ToVectorTy(ValTy, VF);
+ unsigned Alignment = getMemInstAlignment(I);
+ Value *Ptr = getPointerOperand(I);
+ unsigned AS = getMemInstAddressSpace(I);
+ int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+
+ assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
+ "Stride should be 1 or -1 for consecutive memory access");
+ unsigned Cost = 0;
+ if (Legal->isMaskRequired(I))
+ Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
+ else
+ Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I);
+
+ bool Reverse = ConsecutiveStride < 0;
+ if (Reverse)
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
+ return Cost;
+}
+
+unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
+ unsigned VF) {
+ LoadInst *LI = cast<LoadInst>(I);
+ Type *ValTy = LI->getType();
+ Type *VectorTy = ToVectorTy(ValTy, VF);
+ unsigned Alignment = LI->getAlignment();
+ unsigned AS = LI->getPointerAddressSpace();
+
+ return TTI.getAddressComputationCost(ValTy) +
+ TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) +
+ TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy);
+}
+
+unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
+ unsigned VF) {
+ Type *ValTy = getMemInstValueType(I);
+ Type *VectorTy = ToVectorTy(ValTy, VF);
+ unsigned Alignment = getMemInstAlignment(I);
+ Value *Ptr = getPointerOperand(I);
+
+ return TTI.getAddressComputationCost(VectorTy) +
+ TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
+ Legal->isMaskRequired(I), Alignment);
+}
+
+unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
+ unsigned VF) {
+ Type *ValTy = getMemInstValueType(I);
+ Type *VectorTy = ToVectorTy(ValTy, VF);
+ unsigned AS = getMemInstAddressSpace(I);
+
+ auto Group = Legal->getInterleavedAccessGroup(I);
+ assert(Group && "Fail to get an interleaved access group.");
+
+ unsigned InterleaveFactor = Group->getFactor();
+ Type *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
+
+ // Holds the indices of existing members in an interleaved load group.
+ // An interleaved store group doesn't need this as it doesn't allow gaps.
+ SmallVector<unsigned, 4> Indices;
+ if (isa<LoadInst>(I)) {
+ for (unsigned i = 0; i < InterleaveFactor; i++)
+ if (Group->getMember(i))
+ Indices.push_back(i);
+ }
+
+ // Calculate the cost of the whole interleaved group.
+ unsigned Cost = TTI.getInterleavedMemoryOpCost(I->getOpcode(), WideVecTy,
+ Group->getFactor(), Indices,
+ Group->getAlignment(), AS);
+
+ if (Group->isReverse())
+ Cost += Group->getNumMembers() *
+ TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
+ return Cost;
+}
+
+unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
+ unsigned VF) {
+
+ // Calculate scalar cost only. Vectorization cost should be ready at this
+ // moment.
+ if (VF == 1) {
+ Type *ValTy = getMemInstValueType(I);
+ unsigned Alignment = getMemInstAlignment(I);
+ unsigned AS = getMemInstAlignment(I);
+
+ return TTI.getAddressComputationCost(ValTy) +
+ TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I);
+ }
+ return getWideningCost(I, VF);
+}
+
LoopVectorizationCostModel::VectorizationCostTy
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
// If we know that this instruction will remain uniform, check the cost of
// the scalar version.
- if (Legal->isUniformAfterVectorization(I))
+ if (isUniformAfterVectorization(I, VF))
VF = 1;
if (VF > 1 && isProfitableToScalarize(I, VF))
@@ -6850,6 +7189,79 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
return VectorizationCostTy(C, TypeNotScalarized);
}
+void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
+ if (VF == 1)
+ return;
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ // For each instruction in the old loop.
+ for (Instruction &I : *BB) {
+ Value *Ptr = getPointerOperand(&I);
+ if (!Ptr)
+ continue;
+
+ if (isa<LoadInst>(&I) && Legal->isUniform(Ptr)) {
+ // Scalar load + broadcast
+ unsigned Cost = getUniformMemOpCost(&I, VF);
+ setWideningDecision(&I, VF, CM_Scalarize, Cost);
+ continue;
+ }
+
+ // We assume that widening is the best solution when possible.
+ if (Legal->memoryInstructionCanBeWidened(&I, VF)) {
+ unsigned Cost = getConsecutiveMemOpCost(&I, VF);
+ setWideningDecision(&I, VF, CM_Widen, Cost);
+ continue;
+ }
+
+ // Choose between Interleaving, Gather/Scatter or Scalarization.
+ unsigned InterleaveCost = UINT_MAX;
+ unsigned NumAccesses = 1;
+ if (Legal->isAccessInterleaved(&I)) {
+ auto Group = Legal->getInterleavedAccessGroup(&I);
+ assert(Group && "Fail to get an interleaved access group.");
+
+ // Make one decision for the whole group.
+ if (getWideningDecision(&I, VF) != CM_Unknown)
+ continue;
+
+ NumAccesses = Group->getNumMembers();
+ InterleaveCost = getInterleaveGroupCost(&I, VF);
+ }
+
+ unsigned GatherScatterCost =
+ Legal->isLegalGatherOrScatter(&I)
+ ? getGatherScatterCost(&I, VF) * NumAccesses
+ : UINT_MAX;
+
+ unsigned ScalarizationCost =
+ getMemInstScalarizationCost(&I, VF) * NumAccesses;
+
+ // Choose better solution for the current VF,
+ // write down this decision and use it during vectorization.
+ unsigned Cost;
+ InstWidening Decision;
+ if (InterleaveCost <= GatherScatterCost &&
+ InterleaveCost < ScalarizationCost) {
+ Decision = CM_Interleave;
+ Cost = InterleaveCost;
+ } else if (GatherScatterCost < ScalarizationCost) {
+ Decision = CM_GatherScatter;
+ Cost = GatherScatterCost;
+ } else {
+ Decision = CM_Scalarize;
+ Cost = ScalarizationCost;
+ }
+ // If the instructions belongs to an interleave group, the whole group
+ // receives the same decision. The whole group receives the cost, but
+ // the cost will actually be assigned to one instruction.
+ if (auto Group = Legal->getInterleavedAccessGroup(&I))
+ setWideningDecision(Group, VF, Decision, Cost);
+ else
+ setWideningDecision(&I, VF, Decision, Cost);
+ }
+ }
+}
+
unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
unsigned VF,
Type *&VectorTy) {
@@ -6868,7 +7280,31 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// instruction cost.
return 0;
case Instruction::Br: {
- return TTI.getCFInstrCost(I->getOpcode());
+ // In cases of scalarized and predicated instructions, there will be VF
+ // predicated blocks in the vectorized loop. Each branch around these
+ // blocks requires also an extract of its vector compare i1 element.
+ bool ScalarPredicatedBB = false;
+ BranchInst *BI = cast<BranchInst>(I);
+ if (VF > 1 && BI->isConditional() &&
+ (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) ||
+ PredicatedBBsAfterVectorization.count(BI->getSuccessor(1))))
+ ScalarPredicatedBB = true;
+
+ if (ScalarPredicatedBB) {
+ // Return cost for branches around scalarized and predicated blocks.
+ Type *Vec_i1Ty =
+ VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
+ return (TTI.getScalarizationOverhead(Vec_i1Ty, false, true) +
+ (TTI.getCFInstrCost(Instruction::Br) * VF));
+ } else if (I->getParent() == TheLoop->getLoopLatch() || VF == 1)
+ // The back-edge branch will remain, as will all scalar branches.
+ return TTI.getCFInstrCost(Instruction::Br);
+ else
+ // This branch will be eliminated by if-conversion.
+ return 0;
+ // Note: We currently assume zero cost for an unconditional branch inside
+ // a predicated block since it will become a fall-through, although we
+ // may decide in the future to call TTI for all branches.
}
case Instruction::PHI: {
auto *Phi = cast<PHINode>(I);
@@ -6969,7 +7405,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (!ScalarCond)
CondTy = VectorType::get(CondTy, VF);
- return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
+ return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I);
}
case Instruction::ICmp:
case Instruction::FCmp: {
@@ -6978,130 +7414,12 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
VectorTy = ToVectorTy(ValTy, VF);
- return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
+ return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I);
}
case Instruction::Store:
case Instruction::Load: {
- StoreInst *SI = dyn_cast<StoreInst>(I);
- LoadInst *LI = dyn_cast<LoadInst>(I);
- Type *ValTy = (SI ? SI->getValueOperand()->getType() : LI->getType());
- VectorTy = ToVectorTy(ValTy, VF);
-
- unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
- unsigned AS =
- SI ? SI->getPointerAddressSpace() : LI->getPointerAddressSpace();
- Value *Ptr = getPointerOperand(I);
- // We add the cost of address computation here instead of with the gep
- // instruction because only here we know whether the operation is
- // scalarized.
- if (VF == 1)
- return TTI.getAddressComputationCost(VectorTy) +
- TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
-
- if (LI && Legal->isUniform(Ptr)) {
- // Scalar load + broadcast
- unsigned Cost = TTI.getAddressComputationCost(ValTy->getScalarType());
- Cost += TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
- Alignment, AS);
- return Cost +
- TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, ValTy);
- }
-
- // For an interleaved access, calculate the total cost of the whole
- // interleave group.
- if (Legal->isAccessInterleaved(I)) {
- auto Group = Legal->getInterleavedAccessGroup(I);
- assert(Group && "Fail to get an interleaved access group.");
-
- // Only calculate the cost once at the insert position.
- if (Group->getInsertPos() != I)
- return 0;
-
- unsigned InterleaveFactor = Group->getFactor();
- Type *WideVecTy =
- VectorType::get(VectorTy->getVectorElementType(),
- VectorTy->getVectorNumElements() * InterleaveFactor);
-
- // Holds the indices of existing members in an interleaved load group.
- // An interleaved store group doesn't need this as it doesn't allow gaps.
- SmallVector<unsigned, 4> Indices;
- if (LI) {
- for (unsigned i = 0; i < InterleaveFactor; i++)
- if (Group->getMember(i))
- Indices.push_back(i);
- }
-
- // Calculate the cost of the whole interleaved group.
- unsigned Cost = TTI.getInterleavedMemoryOpCost(
- I->getOpcode(), WideVecTy, Group->getFactor(), Indices,
- Group->getAlignment(), AS);
-
- if (Group->isReverse())
- Cost +=
- Group->getNumMembers() *
- TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
-
- // FIXME: The interleaved load group with a huge gap could be even more
- // expensive than scalar operations. Then we could ignore such group and
- // use scalar operations instead.
- return Cost;
- }
-
- // Check if the memory instruction will be scalarized.
- if (Legal->memoryInstructionMustBeScalarized(I, VF)) {
- unsigned Cost = 0;
- Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
-
- // Figure out whether the access is strided and get the stride value
- // if it's known in compile time
- const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, SE, TheLoop);
-
- // Get the cost of the scalar memory instruction and address computation.
- Cost += VF * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
- Cost += VF *
- TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
- Alignment, AS);
-
- // Get the overhead of the extractelement and insertelement instructions
- // we might create due to scalarization.
- Cost += getScalarizationOverhead(I, VF, TTI);
-
- // If we have a predicated store, it may not be executed for each vector
- // lane. Scale the cost by the probability of executing the predicated
- // block.
- if (Legal->isScalarWithPredication(I))
- Cost /= getReciprocalPredBlockProb();
-
- return Cost;
- }
-
- // Determine if the pointer operand of the access is either consecutive or
- // reverse consecutive.
- int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
- bool Reverse = ConsecutiveStride < 0;
-
- // Determine if either a gather or scatter operation is legal.
- bool UseGatherOrScatter =
- !ConsecutiveStride && Legal->isLegalGatherOrScatter(I);
-
- unsigned Cost = TTI.getAddressComputationCost(VectorTy);
- if (UseGatherOrScatter) {
- assert(ConsecutiveStride == 0 &&
- "Gather/Scatter are not used for consecutive stride");
- return Cost +
- TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment);
- }
- // Wide load/stores.
- if (Legal->isMaskRequired(I))
- Cost +=
- TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
- else
- Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);
-
- if (Reverse)
- Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, 0);
- return Cost;
+ VectorTy = ToVectorTy(getMemInstValueType(I), VF);
+ return getMemoryInstructionCost(I, VF);
}
case Instruction::ZExt:
case Instruction::SExt:
@@ -7115,12 +7433,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- // We optimize the truncation of induction variable.
- // The cost of these is the same as the scalar operation.
- if (I->getOpcode() == Instruction::Trunc &&
- Legal->isInductionVariable(I->getOperand(0)))
- return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
- I->getOperand(0)->getType());
+ // We optimize the truncation of induction variables having constant
+ // integer steps. The cost of these truncations is the same as the scalar
+ // operation.
+ if (isOptimizableIVTruncate(I, VF)) {
+ auto *Trunc = cast<TruncInst>(I);
+ return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(),
+ Trunc->getSrcTy(), Trunc);
+ }
Type *SrcScalarTy = I->getOperand(0)->getType();
Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
@@ -7143,7 +7463,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
}
}
- return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+ return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I);
}
case Instruction::Call: {
bool NeedToScalarize;
@@ -7172,9 +7492,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
@@ -7206,81 +7524,34 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
VecValuesToIgnore.insert(Casts.begin(), Casts.end());
}
-
- // Insert values known to be scalar into VecValuesToIgnore. This is a
- // conservative estimation of the values that will later be scalarized.
- //
- // FIXME: Even though an instruction is not scalar-after-vectoriztion, it may
- // still be scalarized. For example, we may find an instruction to be
- // more profitable for a given vectorization factor if it were to be
- // scalarized. But at this point, we haven't yet computed the
- // vectorization factor.
- for (auto *BB : TheLoop->getBlocks())
- for (auto &I : *BB)
- if (Legal->isScalarAfterVectorization(&I))
- VecValuesToIgnore.insert(&I);
}
-void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
- bool IfPredicateInstr) {
- assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
- // Holds vector parameters or scalars, in case of uniform vals.
- SmallVector<VectorParts, 4> Params;
-
- setDebugLocFromInst(Builder, Instr);
-
- // Does this instruction return a value ?
- bool IsVoidRetTy = Instr->getType()->isVoidTy();
-
- // Initialize a new scalar map entry.
- ScalarParts Entry(UF);
-
- VectorParts Cond;
- if (IfPredicateInstr)
- Cond = createBlockInMask(Instr->getParent());
-
- // For each vector unroll 'part':
- for (unsigned Part = 0; Part < UF; ++Part) {
- Entry[Part].resize(1);
- // For each scalar that we create:
-
- // Start an "if (pred) a[i] = ..." block.
- Value *Cmp = nullptr;
- if (IfPredicateInstr) {
- if (Cond[Part]->getType()->isVectorTy())
- Cond[Part] =
- Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0));
- Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cond[Part],
- ConstantInt::get(Cond[Part]->getType(), 1));
- }
-
- Instruction *Cloned = Instr->clone();
- if (!IsVoidRetTy)
- Cloned->setName(Instr->getName() + ".cloned");
-
- // Replace the operands of the cloned instructions with their scalar
- // equivalents in the new loop.
- for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
- auto *NewOp = getScalarValue(Instr->getOperand(op), Part, 0);
- Cloned->setOperand(op, NewOp);
- }
+LoopVectorizationCostModel::VectorizationFactor
+LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
- // Place the cloned scalar in the new loop.
- Builder.Insert(Cloned);
+ // Width 1 means no vectorize, cost 0 means uncomputed cost.
+ const LoopVectorizationCostModel::VectorizationFactor NoVectorization = {1U,
+ 0U};
+ Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
+ if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
+ return NoVectorization;
- // Add the cloned scalar to the scalar map entry.
- Entry[Part][0] = Cloned;
+ if (UserVF) {
+ DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
+ assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
+ // Collect the instructions (and their associated costs) that will be more
+ // profitable to scalarize.
+ CM.selectUserVectorizationFactor(UserVF);
+ return {UserVF, 0};
+ }
- // If we just cloned a new assumption, add it the assumption cache.
- if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
- if (II->getIntrinsicID() == Intrinsic::assume)
- AC->registerAssumption(II);
+ unsigned MaxVF = MaybeMaxVF.getValue();
+ assert(MaxVF != 0 && "MaxVF is zero.");
+ if (MaxVF == 1)
+ return NoVectorization;
- // End if-block.
- if (IfPredicateInstr)
- PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
- }
- VectorLoopValueMap.initScalar(Instr, Entry);
+ // Select the optimal vectorization factor.
+ return CM.selectVectorizationFactor(MaxVF);
}
void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) {
@@ -7414,11 +7685,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- // Use the cost model.
- LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
- &Hints);
- CM.collectValuesToIgnore();
-
// Check the function attributes to find out if this function should be
// optimized for size.
bool OptForSize =
@@ -7464,9 +7730,20 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- // Select the optimal vectorization factor.
- const LoopVectorizationCostModel::VectorizationFactor VF =
- CM.selectVectorizationFactor(OptForSize);
+ // Use the cost model.
+ LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
+ &Hints);
+ CM.collectValuesToIgnore();
+
+ // Use the planner for vectorization.
+ LoopVectorizationPlanner LVP(CM);
+
+ // Get user vectorization factor.
+ unsigned UserVF = Hints.getWidth();
+
+ // Plan how to best vectorize, return the best VF and its cost.
+ LoopVectorizationCostModel::VectorizationFactor VF =
+ LVP.plan(OptForSize, UserVF);
// Select the interleave count.
unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
@@ -7522,10 +7799,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
const char *VAPassName = Hints.vectorizeAnalysisPassName();
if (!VectorizeLoop && !InterleaveLoop) {
// Do not vectorize or interleaving the loop.
- ORE->emit(OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first,
+ ORE->emit(OptimizationRemarkMissed(VAPassName, VecDiagMsg.first,
L->getStartLoc(), L->getHeader())
<< VecDiagMsg.second);
- ORE->emit(OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
+ ORE->emit(OptimizationRemarkMissed(LV_NAME, IntDiagMsg.first,
L->getStartLoc(), L->getHeader())
<< IntDiagMsg.second);
return false;
@@ -7621,6 +7898,16 @@ bool LoopVectorizePass::runImpl(
if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
return false;
+ bool Changed = false;
+
+ // The vectorizer requires loops to be in simplified form.
+ // Since simplification may add new inner loops, it has to run before the
+ // legality and profitability checks. This means running the loop vectorizer
+ // will simplify all loops, regardless of whether anything end up being
+ // vectorized.
+ for (auto &L : *LI)
+ Changed |= simplifyLoop(L, DT, LI, SE, AC, false /* PreserveLCSSA */);
+
// Build up a worklist of inner-loops to vectorize. This is necessary as
// the act of vectorizing or partially unrolling a loop creates new loops
// and can invalidate iterators across the loops.
@@ -7632,9 +7919,15 @@ bool LoopVectorizePass::runImpl(
LoopsAnalyzed += Worklist.size();
// Now walk the identified inner loops.
- bool Changed = false;
- while (!Worklist.empty())
- Changed |= processLoop(Worklist.pop_back_val());
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+
+ // For the inner loops we actually process, form LCSSA to simplify the
+ // transform.
+ Changed |= formLCSSARecursively(*L, *DT, LI, SE);
+
+ Changed |= processLoop(L);
+ }
// Process each loop nest in the function.
return Changed;
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 328f27002960..da3ac06ab464 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -39,6 +39,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
@@ -90,6 +91,10 @@ static cl::opt<unsigned> MinTreeSize(
"slp-min-tree-size", cl::init(3), cl::Hidden,
cl::desc("Only vectorize small trees if they are fully vectorizable"));
+static cl::opt<bool>
+ ViewSLPTree("view-slp-tree", cl::Hidden,
+ cl::desc("Display the SLP trees with Graphviz"));
+
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
@@ -212,14 +217,14 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
/// Flag set: NSW, NUW, exact, and all of fast-math.
static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
if (auto *VecOp = dyn_cast<Instruction>(I)) {
- if (auto *Intersection = dyn_cast<Instruction>(VL[0])) {
- // Intersection is initialized to the 0th scalar,
- // so start counting from index '1'.
+ if (auto *I0 = dyn_cast<Instruction>(VL[0])) {
+ // VecOVp is initialized to the 0th scalar, so start counting from index
+ // '1'.
+ VecOp->copyIRFlags(I0);
for (int i = 1, e = VL.size(); i < e; ++i) {
if (auto *Scalar = dyn_cast<Instruction>(VL[i]))
- Intersection->andIRFlags(Scalar);
+ VecOp->andIRFlags(Scalar);
}
- VecOp->copyIRFlags(Intersection);
}
}
}
@@ -304,6 +309,8 @@ public:
typedef SmallVector<Instruction *, 16> InstrList;
typedef SmallPtrSet<Value *, 16> ValueSet;
typedef SmallVector<StoreInst *, 8> StoreList;
+ typedef MapVector<Value *, SmallVector<Instruction *, 2>>
+ ExtraValueToDebugLocsMap;
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li,
@@ -330,6 +337,10 @@ public:
/// \brief Vectorize the tree that starts with the elements in \p VL.
/// Returns the vectorized root.
Value *vectorizeTree();
+ /// Vectorize the tree but with the list of externally used values \p
+ /// ExternallyUsedValues. Values in this MapVector can be replaced but the
+ /// generated extractvalue instructions.
+ Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
@@ -343,6 +354,13 @@ public:
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
void buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst = None);
+ /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
+ /// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
+ /// into account (anf updating it, if required) list of externally used
+ /// values stored in \p ExternallyUsedValues.
+ void buildTree(ArrayRef<Value *> Roots,
+ ExtraValueToDebugLocsMap &ExternallyUsedValues,
+ ArrayRef<Value *> UserIgnoreLst = None);
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
@@ -404,7 +422,7 @@ private:
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
/// \returns True if the ExtractElement/ExtractValue instructions in VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a vector).
@@ -451,8 +469,9 @@ private:
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right);
struct TreeEntry {
- TreeEntry() : Scalars(), VectorizedValue(nullptr),
- NeedToGather(0) {}
+ TreeEntry(std::vector<TreeEntry> &Container)
+ : Scalars(), VectorizedValue(nullptr), NeedToGather(0),
+ Container(Container) {}
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
@@ -468,11 +487,24 @@ private:
/// Do we need to gather this sequence ?
bool NeedToGather;
+
+ /// Points back to the VectorizableTree.
+ ///
+ /// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
+ /// to be a pointer and needs to be able to initialize the child iterator.
+ /// Thus we need a reference back to the container to translate the indices
+ /// to entries.
+ std::vector<TreeEntry> &Container;
+
+ /// The TreeEntry index containing the user of this entry. We can actually
+ /// have multiple users so the data structure is not truly a tree.
+ SmallVector<int, 1> UserTreeIndices;
};
/// Create a new VectorizableTree entry.
- TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized) {
- VectorizableTree.emplace_back();
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
+ int &UserTreeIdx) {
+ VectorizableTree.emplace_back(VectorizableTree);
int idx = VectorizableTree.size() - 1;
TreeEntry *Last = &VectorizableTree[idx];
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
@@ -485,6 +517,10 @@ private:
} else {
MustGather.insert(VL.begin(), VL.end());
}
+
+ if (UserTreeIdx >= 0)
+ Last->UserTreeIndices.push_back(UserTreeIdx);
+ UserTreeIdx = idx;
return Last;
}
@@ -558,7 +594,9 @@ private:
SmallVector<std::unique_ptr<Instruction>, 8> DeletedInstructions;
/// A list of values that need to extracted out of the tree.
- /// This list holds pairs of (Internal Scalar : External User).
+ /// This list holds pairs of (Internal Scalar : External User). External User
+ /// can be nullptr, it means that this Internal Scalar will be used later,
+ /// after vectorization.
UserList ExternalUses;
/// Values used only by @llvm.assume calls.
@@ -706,6 +744,8 @@ private:
return os;
}
#endif
+ friend struct GraphTraits<BoUpSLP *>;
+ friend struct DOTGraphTraits<BoUpSLP *>;
/// Contains all scheduling data for a basic block.
///
@@ -916,17 +956,98 @@ private:
/// original width.
MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
};
+} // end namespace slpvectorizer
+
+template <> struct GraphTraits<BoUpSLP *> {
+ typedef BoUpSLP::TreeEntry TreeEntry;
+
+ /// NodeRef has to be a pointer per the GraphWriter.
+ typedef TreeEntry *NodeRef;
+
+ /// \brief Add the VectorizableTree to the index iterator to be able to return
+ /// TreeEntry pointers.
+ struct ChildIteratorType
+ : public iterator_adaptor_base<ChildIteratorType,
+ SmallVector<int, 1>::iterator> {
+
+ std::vector<TreeEntry> &VectorizableTree;
+
+ ChildIteratorType(SmallVector<int, 1>::iterator W,
+ std::vector<TreeEntry> &VT)
+ : ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
+
+ NodeRef operator*() { return &VectorizableTree[*I]; }
+ };
+
+ static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
+
+ static ChildIteratorType child_begin(NodeRef N) {
+ return {N->UserTreeIndices.begin(), N->Container};
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return {N->UserTreeIndices.end(), N->Container};
+ }
+
+ /// For the node iterator we just need to turn the TreeEntry iterator into a
+ /// TreeEntry* iterator so that it dereferences to NodeRef.
+ typedef pointer_iterator<std::vector<TreeEntry>::iterator> nodes_iterator;
+
+ static nodes_iterator nodes_begin(BoUpSLP *R) {
+ return nodes_iterator(R->VectorizableTree.begin());
+ }
+ static nodes_iterator nodes_end(BoUpSLP *R) {
+ return nodes_iterator(R->VectorizableTree.end());
+ }
+
+ static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); }
+};
+
+template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
+ typedef BoUpSLP::TreeEntry TreeEntry;
+
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *R) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ if (isSplat(Entry->Scalars)) {
+ OS << "<splat> " << *Entry->Scalars[0];
+ return Str;
+ }
+ for (auto V : Entry->Scalars) {
+ OS << *V;
+ if (std::any_of(
+ R->ExternalUses.begin(), R->ExternalUses.end(),
+ [&](const BoUpSLP::ExternalUser &EU) { return EU.Scalar == V; }))
+ OS << " <extract>";
+ OS << "\n";
+ }
+ return Str;
+ }
+
+ static std::string getNodeAttributes(const TreeEntry *Entry,
+ const BoUpSLP *) {
+ if (Entry->NeedToGather)
+ return "color=red";
+ return "";
+ }
+};
} // end namespace llvm
-} // end namespace slpvectorizer
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst) {
+ ExtraValueToDebugLocsMap ExternallyUsedValues;
+ buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
+}
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
+ ExtraValueToDebugLocsMap &ExternallyUsedValues,
+ ArrayRef<Value *> UserIgnoreLst) {
deleteTree();
UserIgnoreList = UserIgnoreLst;
if (!allSameType(Roots))
return;
- buildTree_rec(Roots, 0);
+ buildTree_rec(Roots, 0, -1);
// Collect the values that we need to extract from the tree.
for (TreeEntry &EIdx : VectorizableTree) {
@@ -940,6 +1061,14 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
if (Entry->NeedToGather)
continue;
+ // Check if the scalar is externally used as an extra arg.
+ auto ExtI = ExternallyUsedValues.find(Scalar);
+ if (ExtI != ExternallyUsedValues.end()) {
+ DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane " <<
+ Lane << " from " << *Scalar << ".\n");
+ ExternalUses.emplace_back(Scalar, nullptr, Lane);
+ continue;
+ }
for (User *U : Scalar->users()) {
DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
@@ -976,28 +1105,28 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
}
}
-
-void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
+void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
+ int UserTreeIdx) {
bool isAltShuffle = false;
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
if (Depth == RecursionMaxDepth) {
DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
// Don't handle vectors.
if (VL[0]->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
if (SI->getValueOperand()->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
unsigned Opcode = getSameOpcode(VL);
@@ -1014,7 +1143,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// If all of the operands are identical or constant we have a simple solution.
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !Opcode) {
DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1026,7 +1155,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (EphValues.count(VL[i])) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is ephemeral.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1039,10 +1168,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
if (E->Scalars[i] != VL[i]) {
DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
+ // Record the reuse of the tree node. FIXME, currently this is only used to
+ // properly draw the graph rather than for the actual vectorization.
+ E->UserTreeIndices.push_back(UserTreeIdx);
DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n");
return;
}
@@ -1052,7 +1184,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (ScalarToTreeEntry.count(VL[i])) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is already in tree.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1062,7 +1194,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
if (MustGather.count(VL[i])) {
DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1076,7 +1208,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// Don't go into unreachable blocks. They may contain instructions with
// dependency cycles which confuse the final scheduling.
DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1085,7 +1217,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned j = i+1; j < e; ++j)
if (VL[i] == VL[j]) {
DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
@@ -1100,7 +1232,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
assert((!BS.getScheduleData(VL[0]) ||
!BS.getScheduleData(VL[0])->isPartOfBundle()) &&
"tryScheduleBundle should cancelScheduling on failure");
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
@@ -1117,12 +1249,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Term) {
DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
@@ -1132,7 +1264,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1144,7 +1276,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
} else {
BS.cancelScheduling(VL);
}
- newTreeEntry(VL, Reuse);
+ newTreeEntry(VL, Reuse, UserTreeIdx);
return;
}
case Instruction::Load: {
@@ -1160,7 +1292,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
return;
}
@@ -1171,7 +1303,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
LoadInst *L = cast<LoadInst>(VL[i]);
if (!L->isSimple()) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
@@ -1193,7 +1325,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Consecutive) {
++NumLoadsWantToKeepOrder;
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of loads.\n");
return;
}
@@ -1208,7 +1340,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
if (ReverseConsecutive) {
++NumLoadsWantToChangeOrder;
@@ -1235,12 +1367,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
return;
}
}
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of casts.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@@ -1249,7 +1381,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth+1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1263,13 +1395,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Cmp->getPredicate() != P0 ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
return;
}
}
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of compares.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
@@ -1278,7 +1410,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth+1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1301,7 +1433,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
// Sort operands of the instructions so that each side is more likely to
@@ -1309,8 +1441,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
- buildTree_rec(Left, Depth + 1);
- buildTree_rec(Right, Depth + 1);
+ buildTree_rec(Left, Depth + 1, UserTreeIdx);
+ buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
@@ -1320,7 +1452,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth+1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1330,7 +1462,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1343,7 +1475,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (Ty0 != CurTy) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
@@ -1355,12 +1487,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
return;
}
}
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
@@ -1368,7 +1500,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1377,19 +1509,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
return;
}
case Instruction::Call: {
@@ -1400,7 +1532,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (!isTriviallyVectorizable(ID)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
@@ -1414,7 +1546,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
return;
@@ -1425,7 +1557,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
Value *A1J = CI2->getArgOperand(1);
if (A1I != A1J) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument "<< A1I<<"!=" << A1J
<< "\n");
@@ -1438,14 +1570,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
CI->op_begin() + CI->getBundleOperandsEndIndex(),
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!="
<< *VL[i] << '\n');
return;
}
}
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
@@ -1453,7 +1585,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
CallInst *CI2 = dyn_cast<CallInst>(j);
Operands.push_back(CI2->getArgOperand(i));
}
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
@@ -1462,19 +1594,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
// then do not vectorize this instruction.
if (!isAltShuffle) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
- newTreeEntry(VL, true);
+ newTreeEntry(VL, true, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderAltShuffleOperands(VL, Left, Right);
- buildTree_rec(Left, Depth + 1);
- buildTree_rec(Right, Depth + 1);
+ buildTree_rec(Left, Depth + 1, UserTreeIdx);
+ buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
@@ -1484,13 +1616,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
default:
BS.cancelScheduling(VL);
- newTreeEntry(VL, false);
+ newTreeEntry(VL, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
}
@@ -1570,6 +1702,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
ScalarTy = SI->getValueOperand()->getType();
+ else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
+ ScalarTy = CI->getOperand(0)->getType();
VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
// If we have computed a smaller type for the expression, update VecTy so
@@ -1599,7 +1733,13 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int DeadCost = 0;
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
Instruction *E = cast<Instruction>(VL[i]);
- if (E->hasOneUse())
+ // If all users are going to be vectorized, instruction can be
+ // considered as dead.
+ // The same, if have only one user, it will be vectorized for sure.
+ if (E->hasOneUse() ||
+ std::all_of(E->user_begin(), E->user_end(), [this](User *U) {
+ return ScalarToTreeEntry.count(U) > 0;
+ }))
// Take credit for instruction that will become dead.
DeadCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
@@ -1624,10 +1764,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Calculate the cost of this instruction.
int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
- VL0->getType(), SrcTy);
+ VL0->getType(), SrcTy, VL0);
VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
- int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
+ int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0);
return VecCost - ScalarCost;
}
case Instruction::FCmp:
@@ -1636,8 +1776,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Calculate the cost of this instruction.
VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
int ScalarCost = VecTy->getNumElements() *
- TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty());
- int VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy);
+ TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty(), VL0);
+ int VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy, VL0);
return VecCost - ScalarCost;
}
case Instruction::Add:
@@ -1720,18 +1860,18 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// Cost of wide load - cost of scalar loads.
unsigned alignment = dyn_cast<LoadInst>(VL0)->getAlignment();
int ScalarLdCost = VecTy->getNumElements() *
- TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0);
+ TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0);
int VecLdCost = TTI->getMemoryOpCost(Instruction::Load,
- VecTy, alignment, 0);
+ VecTy, alignment, 0, VL0);
return VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
unsigned alignment = dyn_cast<StoreInst>(VL0)->getAlignment();
int ScalarStCost = VecTy->getNumElements() *
- TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0);
+ TTI->getMemoryOpCost(Instruction::Store, ScalarTy, alignment, 0, VL0);
int VecStCost = TTI->getMemoryOpCost(Instruction::Store,
- VecTy, alignment, 0);
+ VecTy, alignment, 0, VL0);
return VecStCost - ScalarStCost;
}
case Instruction::Call: {
@@ -1739,12 +1879,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
- SmallVector<Type*, 4> ScalarTys, VecTys;
- for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
+ SmallVector<Type*, 4> ScalarTys;
+ for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op)
ScalarTys.push_back(CI->getArgOperand(op)->getType());
- VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
- VecTy->getNumElements()));
- }
FastMathFlags FMF;
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
@@ -1753,7 +1890,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int ScalarCallCost = VecTy->getNumElements() *
TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF);
- int VecCallCost = TTI->getIntrinsicInstrCost(ID, VecTy, VecTys, FMF);
+ SmallVector<Value *, 4> Args(CI->arg_operands());
+ int VecCallCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF,
+ VecTy->getNumElements());
DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
@@ -1947,9 +2086,18 @@ int BoUpSLP::getTreeCost() {
int SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
- DEBUG(dbgs() << "SLP: Spill Cost = " << SpillCost << ".\n"
- << "SLP: Extract Cost = " << ExtractCost << ".\n"
- << "SLP: Total Cost = " << Cost << ".\n");
+ std::string Str;
+ {
+ raw_string_ostream OS(Str);
+ OS << "SLP: Spill Cost = " << SpillCost << ".\n"
+ << "SLP: Extract Cost = " << ExtractCost << ".\n"
+ << "SLP: Total Cost = " << Cost << ".\n";
+ }
+ DEBUG(dbgs() << Str);
+
+ if (ViewSLPTree)
+ ViewGraph(this, "SLP" + F->getName(), false, Str);
+
return Cost;
}
@@ -2702,6 +2850,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *BoUpSLP::vectorizeTree() {
+ ExtraValueToDebugLocsMap ExternallyUsedValues;
+ return vectorizeTree(ExternallyUsedValues);
+}
+
+Value *
+BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
@@ -2744,7 +2898,7 @@ Value *BoUpSLP::vectorizeTree() {
// Skip users that we already RAUW. This happens when one instruction
// has multiple uses of the same value.
- if (!is_contained(Scalar->users(), User))
+ if (User && !is_contained(Scalar->users(), User))
continue;
assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
@@ -2756,6 +2910,28 @@ Value *BoUpSLP::vectorizeTree() {
assert(Vec && "Can't find vectorizable value");
Value *Lane = Builder.getInt32(ExternalUse.Lane);
+ // If User == nullptr, the Scalar is used as extra arg. Generate
+ // ExtractElement instruction and update the record for this scalar in
+ // ExternallyUsedValues.
+ if (!User) {
+ assert(ExternallyUsedValues.count(Scalar) &&
+ "Scalar with nullptr as an external user must be registered in "
+ "ExternallyUsedValues map");
+ if (auto *VecI = dyn_cast<Instruction>(Vec)) {
+ Builder.SetInsertPoint(VecI->getParent(),
+ std::next(VecI->getIterator()));
+ } else {
+ Builder.SetInsertPoint(&F->getEntryBlock().front());
+ }
+ Value *Ex = Builder.CreateExtractElement(Vec, Lane);
+ Ex = extend(ScalarRoot, Ex, Scalar->getType());
+ CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
+ auto &Locs = ExternallyUsedValues[Scalar];
+ ExternallyUsedValues.insert({Ex, Locs});
+ ExternallyUsedValues.erase(Scalar);
+ continue;
+ }
+
// Generate extracts for out-of-tree users.
// Find the insertion point for the extractelement lane.
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
@@ -3264,7 +3440,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
// sorted by the original instruction location. This lets the final schedule
// be as close as possible to the original instruction order.
struct ScheduleDataCompare {
- bool operator()(ScheduleData *SD1, ScheduleData *SD2) {
+ bool operator()(ScheduleData *SD1, ScheduleData *SD2) const {
return SD2->SchedulingPriority < SD1->SchedulingPriority;
}
};
@@ -3645,9 +3821,9 @@ PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &A
bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB);
if (!Changed)
return PreservedAnalyses::all();
+
PreservedAnalyses PA;
- PA.preserve<LoopAnalysis>();
- PA.preserve<DominatorTreeAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
PA.preserve<AAManager>();
PA.preserve<GlobalsAA>();
return PA;
@@ -4026,36 +4202,40 @@ bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
if (!V)
return false;
+ Value *P = V->getParent();
+
+ // Vectorize in current basic block only.
+ auto *Op0 = dyn_cast<Instruction>(V->getOperand(0));
+ auto *Op1 = dyn_cast<Instruction>(V->getOperand(1));
+ if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P)
+ return false;
+
// Try to vectorize V.
- if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+ if (tryToVectorizePair(Op0, Op1, R))
return true;
- BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
- BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+ auto *A = dyn_cast<BinaryOperator>(Op0);
+ auto *B = dyn_cast<BinaryOperator>(Op1);
// Try to skip B.
if (B && B->hasOneUse()) {
- BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
- BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
- if (tryToVectorizePair(A, B0, R)) {
+ auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+ auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+ if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
return true;
- }
- if (tryToVectorizePair(A, B1, R)) {
+ if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
return true;
- }
}
// Try to skip A.
if (A && A->hasOneUse()) {
- BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
- BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
- if (tryToVectorizePair(A0, B, R)) {
+ auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+ auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+ if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
return true;
- }
- if (tryToVectorizePair(A1, B, R)) {
+ if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
return true;
- }
}
- return 0;
+ return false;
}
/// \brief Generate a shuffle mask to be used in a reduction tree.
@@ -4119,37 +4299,41 @@ namespace {
class HorizontalReduction {
SmallVector<Value *, 16> ReductionOps;
SmallVector<Value *, 32> ReducedVals;
+ // Use map vector to make stable output.
+ MapVector<Instruction *, Value *> ExtraArgs;
- BinaryOperator *ReductionRoot;
- // After successfull horizontal reduction vectorization attempt for PHI node
- // vectorizer tries to update root binary op by combining vectorized tree and
- // the ReductionPHI node. But during vectorization this ReductionPHI can be
- // vectorized itself and replaced by the undef value, while the instruction
- // itself is marked for deletion. This 'marked for deletion' PHI node then can
- // be used in new binary operation, causing "Use still stuck around after Def
- // is destroyed" crash upon PHI node deletion.
- WeakVH ReductionPHI;
+ BinaryOperator *ReductionRoot = nullptr;
/// The opcode of the reduction.
- unsigned ReductionOpcode;
+ Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd;
/// The opcode of the values we perform a reduction on.
- unsigned ReducedValueOpcode;
+ unsigned ReducedValueOpcode = 0;
/// Should we model this reduction as a pairwise reduction tree or a tree that
/// splits the vector in halves and adds those halves.
- bool IsPairwiseReduction;
+ bool IsPairwiseReduction = false;
+
+ /// Checks if the ParentStackElem.first should be marked as a reduction
+ /// operation with an extra argument or as extra argument itself.
+ void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
+ Value *ExtraArg) {
+ if (ExtraArgs.count(ParentStackElem.first)) {
+ ExtraArgs[ParentStackElem.first] = nullptr;
+ // We ran into something like:
+ // ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg.
+ // The whole ParentStackElem.first should be considered as an extra value
+ // in this case.
+ // Do not perform analysis of remaining operands of ParentStackElem.first
+ // instruction, this whole instruction is an extra argument.
+ ParentStackElem.second = ParentStackElem.first->getNumOperands();
+ } else {
+ // We ran into something like:
+ // ParentStackElem.first += ... + ExtraArg + ...
+ ExtraArgs[ParentStackElem.first] = ExtraArg;
+ }
+ }
public:
- /// The width of one full horizontal reduction operation.
- unsigned ReduxWidth;
-
- /// Minimal width of available vector registers. It's used to determine
- /// ReduxWidth.
- unsigned MinVecRegSize;
-
- HorizontalReduction(unsigned MinVecRegSize)
- : ReductionRoot(nullptr), ReductionOpcode(0), ReducedValueOpcode(0),
- IsPairwiseReduction(false), ReduxWidth(0),
- MinVecRegSize(MinVecRegSize) {}
+ HorizontalReduction() = default;
/// \brief Try to find a reduction tree.
bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
@@ -4176,21 +4360,14 @@ public:
if (!isValidElementType(Ty))
return false;
- const DataLayout &DL = B->getModule()->getDataLayout();
ReductionOpcode = B->getOpcode();
ReducedValueOpcode = 0;
- // FIXME: Register size should be a parameter to this function, so we can
- // try different vectorization factors.
- ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
ReductionRoot = B;
- ReductionPHI = Phi;
-
- if (ReduxWidth < 4)
- return false;
// We currently only support adds.
- if (ReductionOpcode != Instruction::Add &&
- ReductionOpcode != Instruction::FAdd)
+ if ((ReductionOpcode != Instruction::Add &&
+ ReductionOpcode != Instruction::FAdd) ||
+ !B->isAssociative())
return false;
// Post order traverse the reduction tree starting at B. We only handle true
@@ -4202,30 +4379,26 @@ public:
unsigned EdgeToVist = Stack.back().second++;
bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
- // Only handle trees in the current basic block.
- if (TreeN->getParent() != B->getParent())
- return false;
-
- // Each tree node needs to have one user except for the ultimate
- // reduction.
- if (!TreeN->hasOneUse() && TreeN != B)
- return false;
-
// Postorder vist.
if (EdgeToVist == 2 || IsReducedValue) {
- if (IsReducedValue) {
- // Make sure that the opcodes of the operations that we are going to
- // reduce match.
- if (!ReducedValueOpcode)
- ReducedValueOpcode = TreeN->getOpcode();
- else if (ReducedValueOpcode != TreeN->getOpcode())
- return false;
+ if (IsReducedValue)
ReducedVals.push_back(TreeN);
- } else {
- // We need to be able to reassociate the adds.
- if (!TreeN->isAssociative())
- return false;
- ReductionOps.push_back(TreeN);
+ else {
+ auto I = ExtraArgs.find(TreeN);
+ if (I != ExtraArgs.end() && !I->second) {
+ // Check if TreeN is an extra argument of its parent operation.
+ if (Stack.size() <= 1) {
+ // TreeN can't be an extra argument as it is a root reduction
+ // operation.
+ return false;
+ }
+ // Yes, TreeN is an extra argument, do not add it to a list of
+ // reduction operations.
+ // Stack[Stack.size() - 2] always points to the parent operation.
+ markExtraArg(Stack[Stack.size() - 2], TreeN);
+ ExtraArgs.erase(TreeN);
+ } else
+ ReductionOps.push_back(TreeN);
}
// Retract.
Stack.pop_back();
@@ -4242,13 +4415,44 @@ public:
// reduced value class.
if (I && (!ReducedValueOpcode || I->getOpcode() == ReducedValueOpcode ||
I->getOpcode() == ReductionOpcode)) {
- if (!ReducedValueOpcode && I->getOpcode() != ReductionOpcode)
+ // Only handle trees in the current basic block.
+ if (I->getParent() != B->getParent()) {
+ // I is an extra argument for TreeN (its parent operation).
+ markExtraArg(Stack.back(), I);
+ continue;
+ }
+
+ // Each tree node needs to have one user except for the ultimate
+ // reduction.
+ if (!I->hasOneUse() && I != B) {
+ // I is an extra argument for TreeN (its parent operation).
+ markExtraArg(Stack.back(), I);
+ continue;
+ }
+
+ if (I->getOpcode() == ReductionOpcode) {
+ // We need to be able to reassociate the reduction operations.
+ if (!I->isAssociative()) {
+ // I is an extra argument for TreeN (its parent operation).
+ markExtraArg(Stack.back(), I);
+ continue;
+ }
+ } else if (ReducedValueOpcode &&
+ ReducedValueOpcode != I->getOpcode()) {
+ // Make sure that the opcodes of the operations that we are going to
+ // reduce match.
+ // I is an extra argument for TreeN (its parent operation).
+ markExtraArg(Stack.back(), I);
+ continue;
+ } else if (!ReducedValueOpcode)
ReducedValueOpcode = I->getOpcode();
+
Stack.push_back(std::make_pair(I, 0));
continue;
}
- return false;
}
+ // NextV is an extra argument for TreeN (its parent operation).
+ markExtraArg(Stack.back(), NextV);
}
return true;
}
@@ -4259,10 +4463,15 @@ public:
if (ReducedVals.empty())
return false;
+ // If there is a sufficient number of reduction values, reduce
+ // to a nearby power-of-2. Can safely generate oversized
+ // vectors and rely on the backend to split them to legal sizes.
unsigned NumReducedVals = ReducedVals.size();
- if (NumReducedVals < ReduxWidth)
+ if (NumReducedVals < 4)
return false;
+ unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
+
Value *VectorizedTree = nullptr;
IRBuilder<> Builder(ReductionRoot);
FastMathFlags Unsafe;
@@ -4270,20 +4479,26 @@ public:
Builder.setFastMathFlags(Unsafe);
unsigned i = 0;
- for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
+ BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
+ // The same extra argument may be used several time, so log each attempt
+ // to use it.
+ for (auto &Pair : ExtraArgs)
+ ExternallyUsedValues[Pair.second].push_back(Pair.first);
+ while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
- V.buildTree(VL, ReductionOps);
+ V.buildTree(VL, ExternallyUsedValues, ReductionOps);
if (V.shouldReorder()) {
SmallVector<Value *, 8> Reversed(VL.rbegin(), VL.rend());
- V.buildTree(Reversed, ReductionOps);
+ V.buildTree(Reversed, ExternallyUsedValues, ReductionOps);
}
if (V.isTreeTinyAndNotFullyVectorizable())
- continue;
+ break;
V.computeMinimumValueSizes();
// Estimate cost.
- int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
+ int Cost =
+ V.getTreeCost() + getReductionCost(TTI, ReducedVals[i], ReduxWidth);
if (Cost >= -SLPCostThreshold)
break;
@@ -4292,33 +4507,44 @@ public:
// Vectorize a tree.
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
- Value *VectorizedRoot = V.vectorizeTree();
+ Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
// Emit a reduction.
- Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder);
+ Value *ReducedSubTree =
+ emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps);
if (VectorizedTree) {
Builder.SetCurrentDebugLocation(Loc);
- VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
- ReducedSubTree, "bin.rdx");
+ VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
+ ReducedSubTree, "bin.rdx");
+ propagateIRFlags(VectorizedTree, ReductionOps);
} else
VectorizedTree = ReducedSubTree;
+ i += ReduxWidth;
+ ReduxWidth = PowerOf2Floor(NumReducedVals - i);
}
if (VectorizedTree) {
// Finish the reduction.
for (; i < NumReducedVals; ++i) {
- Builder.SetCurrentDebugLocation(
- cast<Instruction>(ReducedVals[i])->getDebugLoc());
- VectorizedTree = createBinOp(Builder, ReductionOpcode, VectorizedTree,
- ReducedVals[i]);
+ auto *I = cast<Instruction>(ReducedVals[i]);
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+ VectorizedTree =
+ Builder.CreateBinOp(ReductionOpcode, VectorizedTree, I);
+ propagateIRFlags(VectorizedTree, ReductionOps);
+ }
+ for (auto &Pair : ExternallyUsedValues) {
+ assert(!Pair.second.empty() &&
+ "At least one DebugLoc must be inserted");
+ // Add each externally used value to the final reduction.
+ for (auto *I : Pair.second) {
+ Builder.SetCurrentDebugLocation(I->getDebugLoc());
+ VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree,
+ Pair.first, "bin.extra");
+ propagateIRFlags(VectorizedTree, I);
+ }
}
// Update users.
- if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
- assert(ReductionRoot && "Need a reduction operation");
- ReductionRoot->setOperand(0, VectorizedTree);
- ReductionRoot->setOperand(1, ReductionPHI);
- } else
- ReductionRoot->replaceAllUsesWith(VectorizedTree);
+ ReductionRoot->replaceAllUsesWith(VectorizedTree);
}
return VectorizedTree != nullptr;
}
@@ -4329,7 +4555,8 @@ public:
private:
/// \brief Calculate the cost of a reduction.
- int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
+ int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal,
+ unsigned ReduxWidth) {
Type *ScalarTy = FirstReducedVal->getType();
Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
@@ -4352,15 +4579,9 @@ private:
return VecReduxCost - ScalarReduxCost;
}
- static Value *createBinOp(IRBuilder<> &Builder, unsigned Opcode, Value *L,
- Value *R, const Twine &Name = "") {
- if (Opcode == Instruction::FAdd)
- return Builder.CreateFAdd(L, R, Name);
- return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, L, R, Name);
- }
-
/// \brief Emit a horizontal reduction of the vectorized value.
- Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder) {
+ Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder,
+ unsigned ReduxWidth, ArrayRef<Value *> RedOps) {
assert(VectorizedValue && "Need to have a vectorized tree node");
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
@@ -4378,15 +4599,16 @@ private:
Value *RightShuf = Builder.CreateShuffleVector(
TmpVec, UndefValue::get(TmpVec->getType()), (RightMask),
"rdx.shuf.r");
- TmpVec = createBinOp(Builder, ReductionOpcode, LeftShuf, RightShuf,
- "bin.rdx");
+ TmpVec = Builder.CreateBinOp(ReductionOpcode, LeftShuf, RightShuf,
+ "bin.rdx");
} else {
Value *UpperHalf =
createRdxShuffleMask(ReduxWidth, i, false, false, Builder);
Value *Shuf = Builder.CreateShuffleVector(
TmpVec, UndefValue::get(TmpVec->getType()), UpperHalf, "rdx.shuf");
- TmpVec = createBinOp(Builder, ReductionOpcode, TmpVec, Shuf, "bin.rdx");
+ TmpVec = Builder.CreateBinOp(ReductionOpcode, TmpVec, Shuf, "bin.rdx");
}
+ propagateIRFlags(TmpVec, RedOps);
}
// The result is in the first element of the vector.
@@ -4438,16 +4660,19 @@ static bool findBuildVector(InsertElementInst *FirstInsertElem,
static bool findBuildAggregate(InsertValueInst *IV,
SmallVectorImpl<Value *> &BuildVector,
SmallVectorImpl<Value *> &BuildVectorOpds) {
- if (!IV->hasOneUse())
- return false;
- Value *V = IV->getAggregateOperand();
- if (!isa<UndefValue>(V)) {
- InsertValueInst *I = dyn_cast<InsertValueInst>(V);
- if (!I || !findBuildAggregate(I, BuildVector, BuildVectorOpds))
+ Value *V;
+ do {
+ BuildVector.push_back(IV);
+ BuildVectorOpds.push_back(IV->getInsertedValueOperand());
+ V = IV->getAggregateOperand();
+ if (isa<UndefValue>(V))
+ break;
+ IV = dyn_cast<InsertValueInst>(V);
+ if (!IV || !IV->hasOneUse())
return false;
- }
- BuildVector.push_back(IV);
- BuildVectorOpds.push_back(IV->getInsertedValueOperand());
+ } while (true);
+ std::reverse(BuildVector.begin(), BuildVector.end());
+ std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end());
return true;
}
@@ -4507,29 +4732,137 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
return nullptr;
}
+namespace {
+/// Tracks instructons and its children.
+class WeakVHWithLevel final : public CallbackVH {
+ /// Operand index of the instruction currently beeing analized.
+ unsigned Level = 0;
+ /// Is this the instruction that should be vectorized, or are we now
+ /// processing children (i.e. operands of this instruction) for potential
+ /// vectorization?
+ bool IsInitial = true;
+
+public:
+ explicit WeakVHWithLevel() = default;
+ WeakVHWithLevel(Value *V) : CallbackVH(V){};
+ /// Restart children analysis each time it is repaced by the new instruction.
+ void allUsesReplacedWith(Value *New) override {
+ setValPtr(New);
+ Level = 0;
+ IsInitial = true;
+ }
+ /// Check if the instruction was not deleted during vectorization.
+ bool isValid() const { return !getValPtr(); }
+ /// Is the istruction itself must be vectorized?
+ bool isInitial() const { return IsInitial; }
+ /// Try to vectorize children.
+ void clearInitial() { IsInitial = false; }
+ /// Are all children processed already?
+ bool isFinal() const {
+ assert(getValPtr() &&
+ (isa<Instruction>(getValPtr()) &&
+ cast<Instruction>(getValPtr())->getNumOperands() >= Level));
+ return getValPtr() &&
+ cast<Instruction>(getValPtr())->getNumOperands() == Level;
+ }
+ /// Get next child operation.
+ Value *nextOperand() {
+ assert(getValPtr() && isa<Instruction>(getValPtr()) &&
+ cast<Instruction>(getValPtr())->getNumOperands() > Level);
+ return cast<Instruction>(getValPtr())->getOperand(Level++);
+ }
+ virtual ~WeakVHWithLevel() = default;
+};
+} // namespace
+
/// \brief Attempt to reduce a horizontal reduction.
/// If it is legal to match a horizontal reduction feeding
-/// the phi node P with reduction operators BI, then check if it
-/// can be done.
+/// the phi node P with reduction operators Root in a basic block BB, then check
+/// if it can be done.
/// \returns true if a horizontal reduction was matched and reduced.
/// \returns false if a horizontal reduction was not matched.
-static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI,
- BoUpSLP &R, TargetTransformInfo *TTI,
- unsigned MinRegSize) {
+static bool canBeVectorized(
+ PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
+ TargetTransformInfo *TTI,
+ const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) {
if (!ShouldVectorizeHor)
return false;
- HorizontalReduction HorRdx(MinRegSize);
- if (!HorRdx.matchAssociativeReduction(P, BI))
+ if (!Root)
return false;
- // If there is a sufficient number of reduction values, reduce
- // to a nearby power-of-2. Can safely generate oversized
- // vectors and rely on the backend to split them to legal sizes.
- HorRdx.ReduxWidth =
- std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
+ if (Root->getParent() != BB)
+ return false;
+ SmallVector<WeakVHWithLevel, 8> Stack(1, Root);
+ SmallSet<Value *, 8> VisitedInstrs;
+ bool Res = false;
+ while (!Stack.empty()) {
+ Value *V = Stack.back();
+ if (!V) {
+ Stack.pop_back();
+ continue;
+ }
+ auto *Inst = dyn_cast<Instruction>(V);
+ if (!Inst || isa<PHINode>(Inst)) {
+ Stack.pop_back();
+ continue;
+ }
+ if (Stack.back().isInitial()) {
+ Stack.back().clearInitial();
+ if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
+ HorizontalReduction HorRdx;
+ if (HorRdx.matchAssociativeReduction(P, BI)) {
+ if (HorRdx.tryToReduce(R, TTI)) {
+ Res = true;
+ P = nullptr;
+ continue;
+ }
+ }
+ if (P) {
+ Inst = dyn_cast<Instruction>(BI->getOperand(0));
+ if (Inst == P)
+ Inst = dyn_cast<Instruction>(BI->getOperand(1));
+ if (!Inst) {
+ P = nullptr;
+ continue;
+ }
+ }
+ }
+ P = nullptr;
+ if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ Res = true;
+ continue;
+ }
+ }
+ if (Stack.back().isFinal()) {
+ Stack.pop_back();
+ continue;
+ }
- return HorRdx.tryToReduce(R, TTI);
+ if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand()))
+ if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second &&
+ Stack.size() < RecursionMaxDepth)
+ Stack.push_back(NextV);
+ }
+ return Res;
+}
+
+bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
+ BasicBlock *BB, BoUpSLP &R,
+ TargetTransformInfo *TTI) {
+ if (!V)
+ return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ if (!isa<BinaryOperator>(I))
+ P = nullptr;
+ // Try to match and vectorize a horizontal reduction.
+ return canBeVectorized(P, I, BB, R, TTI,
+ [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
+ return tryToVectorize(BI, R);
+ });
}
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
@@ -4599,67 +4932,42 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (P->getNumIncomingValues() != 2)
return Changed;
- Value *Rdx = getReductionValue(DT, P, BB, LI);
-
- // Check if this is a Binary Operator.
- BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
- if (!BI)
- continue;
-
// Try to match and vectorize a horizontal reduction.
- if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) {
+ if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
+ TTI)) {
Changed = true;
it = BB->begin();
e = BB->end();
continue;
}
-
- Value *Inst = BI->getOperand(0);
- if (Inst == P)
- Inst = BI->getOperand(1);
-
- if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
- // We would like to start over since some instructions are deleted
- // and the iterator may become invalid value.
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
- }
-
continue;
}
- if (ShouldStartVectorizeHorAtStore)
- if (StoreInst *SI = dyn_cast<StoreInst>(it))
- if (BinaryOperator *BinOp =
- dyn_cast<BinaryOperator>(SI->getValueOperand())) {
- if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
- R.getMinVecRegSize()) ||
- tryToVectorize(BinOp, R)) {
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
- }
+ if (ShouldStartVectorizeHorAtStore) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(it)) {
+ // Try to match and vectorize a horizontal reduction.
+ if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R,
+ TTI)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
}
+ }
+ }
// Try to vectorize horizontal reductions feeding into a return.
- if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
- if (RI->getNumOperands() != 0)
- if (BinaryOperator *BinOp =
- dyn_cast<BinaryOperator>(RI->getOperand(0))) {
- DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
- if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
- R.getMinVecRegSize()) ||
- tryToVectorizePair(BinOp->getOperand(0), BinOp->getOperand(1),
- R)) {
- Changed = true;
- it = BB->begin();
- e = BB->end();
- continue;
- }
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) {
+ if (RI->getNumOperands() != 0) {
+ // Try to match and vectorize a horizontal reduction.
+ if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) {
+ Changed = true;
+ it = BB->begin();
+ e = BB->end();
+ continue;
}
+ }
+ }
// Try to vectorize trees that start at compare instructions.
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
@@ -4672,16 +4980,14 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
continue;
}
- for (int i = 0; i < 2; ++i) {
- if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
- if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
- Changed = true;
- // We would like to start over since some instructions are deleted
- // and the iterator may become invalid value.
- it = BB->begin();
- e = BB->end();
- break;
- }
+ for (int I = 0; I < 2; ++I) {
+ if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) {
+ Changed = true;
+ // We would like to start over since some instructions are deleted
+ // and the iterator may become invalid value.
+ it = BB->begin();
+ e = BB->end();
+ break;
}
}
continue;
diff --git a/lib/XRay/CMakeLists.txt b/lib/XRay/CMakeLists.txt
index 6c1acba79bfa..8d558209d8ee 100644
--- a/lib/XRay/CMakeLists.txt
+++ b/lib/XRay/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMXRay
+ InstrumentationMap.cpp
Trace.cpp
ADDITIONAL_HEADER_DIRS
@@ -7,7 +8,9 @@ add_llvm_library(LLVMXRay
DEPENDS
LLVMSupport
+ LLVMObject
LINK_LIBS
LLVMSupport
+ LLVMObject
)
diff --git a/lib/XRay/InstrumentationMap.cpp b/lib/XRay/InstrumentationMap.cpp
new file mode 100644
index 000000000000..431c251feb65
--- /dev/null
+++ b/lib/XRay/InstrumentationMap.cpp
@@ -0,0 +1,198 @@
+//===- InstrumentationMap.cpp - XRay Instrumentation Map ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the InstrumentationMap type for XRay sleds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/XRay/InstrumentationMap.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <system_error>
+#include <vector>
+
+using namespace llvm;
+using namespace xray;
+
+Optional<int32_t> InstrumentationMap::getFunctionId(uint64_t Addr) const {
+ auto I = FunctionIds.find(Addr);
+ if (I != FunctionIds.end())
+ return I->second;
+ return None;
+}
+
+Optional<uint64_t> InstrumentationMap::getFunctionAddr(int32_t FuncId) const {
+ auto I = FunctionAddresses.find(FuncId);
+ if (I != FunctionAddresses.end())
+ return I->second;
+ return None;
+}
+
+static Error
+loadELF64(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
+ InstrumentationMap::SledContainer &Sleds,
+ InstrumentationMap::FunctionAddressMap &FunctionAddresses,
+ InstrumentationMap::FunctionAddressReverseMap &FunctionIds) {
+ InstrumentationMap Map;
+
+ // Find the section named "xray_instr_map".
+ if (!ObjFile.getBinary()->isELF() ||
+ !(ObjFile.getBinary()->getArch() == Triple::x86_64 ||
+ ObjFile.getBinary()->getArch() == Triple::ppc64le))
+ return make_error<StringError>(
+ "File format not supported (only does ELF little endian 64-bit).",
+ std::make_error_code(std::errc::not_supported));
+
+ StringRef Contents = "";
+ const auto &Sections = ObjFile.getBinary()->sections();
+ auto I = llvm::find_if(Sections, [&](object::SectionRef Section) {
+ StringRef Name = "";
+ if (Section.getName(Name))
+ return false;
+ return Name == "xray_instr_map";
+ });
+
+ if (I == Sections.end())
+ return make_error<StringError>(
+ "Failed to find XRay instrumentation map.",
+ std::make_error_code(std::errc::executable_format_error));
+
+ if (I->getContents(Contents))
+ return errorCodeToError(
+ std::make_error_code(std::errc::executable_format_error));
+
+ // Copy the instrumentation map data into the Sleds data structure.
+ auto C = Contents.bytes_begin();
+ static constexpr size_t ELF64SledEntrySize = 32;
+
+ if ((C - Contents.bytes_end()) % ELF64SledEntrySize != 0)
+ return make_error<StringError>(
+ Twine("Instrumentation map entries not evenly divisible by size of "
+ "an XRay sled entry in ELF64."),
+ std::make_error_code(std::errc::executable_format_error));
+
+ int32_t FuncId = 1;
+ uint64_t CurFn = 0;
+ for (; C != Contents.bytes_end(); C += ELF64SledEntrySize) {
+ DataExtractor Extractor(
+ StringRef(reinterpret_cast<const char *>(C), ELF64SledEntrySize), true,
+ 8);
+ Sleds.push_back({});
+ auto &Entry = Sleds.back();
+ uint32_t OffsetPtr = 0;
+ Entry.Address = Extractor.getU64(&OffsetPtr);
+ Entry.Function = Extractor.getU64(&OffsetPtr);
+ auto Kind = Extractor.getU8(&OffsetPtr);
+ static constexpr SledEntry::FunctionKinds Kinds[] = {
+ SledEntry::FunctionKinds::ENTRY, SledEntry::FunctionKinds::EXIT,
+ SledEntry::FunctionKinds::TAIL,
+ };
+ if (Kind >= sizeof(Kinds))
+ return errorCodeToError(
+ std::make_error_code(std::errc::executable_format_error));
+ Entry.Kind = Kinds[Kind];
+ Entry.AlwaysInstrument = Extractor.getU8(&OffsetPtr) != 0;
+
+ // We do replicate the function id generation scheme implemented in the
+ // XRay runtime.
+ // FIXME: Figure out how to keep this consistent with the XRay runtime.
+ if (CurFn == 0) {
+ CurFn = Entry.Function;
+ FunctionAddresses[FuncId] = Entry.Function;
+ FunctionIds[Entry.Function] = FuncId;
+ }
+ if (Entry.Function != CurFn) {
+ ++FuncId;
+ CurFn = Entry.Function;
+ FunctionAddresses[FuncId] = Entry.Function;
+ FunctionIds[Entry.Function] = FuncId;
+ }
+ }
+ return Error::success();
+}
+
+static Error
+loadYAML(int Fd, size_t FileSize, StringRef Filename,
+ InstrumentationMap::SledContainer &Sleds,
+ InstrumentationMap::FunctionAddressMap &FunctionAddresses,
+ InstrumentationMap::FunctionAddressReverseMap &FunctionIds) {
+ std::error_code EC;
+ sys::fs::mapped_file_region MappedFile(
+ Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+ if (EC)
+ return make_error<StringError>(
+ Twine("Failed memory-mapping file '") + Filename + "'.", EC);
+
+ std::vector<YAMLXRaySledEntry> YAMLSleds;
+ yaml::Input In(StringRef(MappedFile.data(), MappedFile.size()));
+ In >> YAMLSleds;
+ if (In.error())
+ return make_error<StringError>(
+ Twine("Failed loading YAML document from '") + Filename + "'.",
+ In.error());
+
+ Sleds.reserve(YAMLSleds.size());
+ for (const auto &Y : YAMLSleds) {
+ FunctionAddresses[Y.FuncId] = Y.Function;
+ FunctionIds[Y.Function] = Y.FuncId;
+ Sleds.push_back(
+ SledEntry{Y.Address, Y.Function, Y.Kind, Y.AlwaysInstrument});
+ }
+ return Error::success();
+}
+
+// FIXME: Create error types that encapsulate a bit more information than what
+// StringError instances contain.
+Expected<InstrumentationMap>
+llvm::xray::loadInstrumentationMap(StringRef Filename) {
+ // At this point we assume the file is an object file -- and if that doesn't
+ // work, we treat it as YAML.
+ // FIXME: Extend to support non-ELF and non-x86_64 binaries.
+
+ InstrumentationMap Map;
+ auto ObjectFileOrError = object::ObjectFile::createObjectFile(Filename);
+ if (!ObjectFileOrError) {
+ auto E = ObjectFileOrError.takeError();
+ // We try to load it as YAML if the ELF load didn't work.
+ int Fd;
+ if (sys::fs::openFileForRead(Filename, Fd))
+ return std::move(E);
+
+ uint64_t FileSize;
+ if (sys::fs::file_size(Filename, FileSize))
+ return std::move(E);
+
+ // If the file is empty, we return the original error.
+ if (FileSize == 0)
+ return std::move(E);
+
+ // From this point on the errors will be only for the YAML parts, so we
+ // consume the errors at this point.
+ consumeError(std::move(E));
+ if (auto E = loadYAML(Fd, FileSize, Filename, Map.Sleds,
+ Map.FunctionAddresses, Map.FunctionIds))
+ return std::move(E);
+ } else if (auto E = loadELF64(Filename, *ObjectFileOrError, Map.Sleds,
+ Map.FunctionAddresses, Map.FunctionIds)) {
+ return std::move(E);
+ }
+ return Map;
+}
diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp
index 51000c777de8..d2984697c8a9 100644
--- a/lib/XRay/Trace.cpp
+++ b/lib/XRay/Trace.cpp
@@ -24,8 +24,8 @@ using llvm::yaml::Input;
using XRayRecordStorage =
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
-Error NaiveLogLoader(StringRef Data, XRayFileHeader &FileHeader,
- std::vector<XRayRecord> &Records) {
+// Populates the FileHeader reference by reading the first 32 bytes of the file.
+Error readBinaryFormatHeader(StringRef Data, XRayFileHeader &FileHeader) {
// FIXME: Maybe deduce whether the data is little or big-endian using some
// magic bytes in the beginning of the file?
@@ -37,16 +37,6 @@ Error NaiveLogLoader(StringRef Data, XRayFileHeader &FileHeader,
// (4) uint32 : bitfield
// (8) uint64 : cycle frequency
// (16) - : padding
- //
- if (Data.size() < 32)
- return make_error<StringError>(
- "Not enough bytes for an XRay log.",
- std::make_error_code(std::errc::invalid_argument));
-
- if (Data.size() - 32 == 0 || Data.size() % 32 != 0)
- return make_error<StringError>(
- "Invalid-sized XRay data.",
- std::make_error_code(std::errc::invalid_argument));
DataExtractor HeaderExtractor(Data, true, 8);
uint32_t OffsetPtr = 0;
@@ -56,11 +46,29 @@ Error NaiveLogLoader(StringRef Data, XRayFileHeader &FileHeader,
FileHeader.ConstantTSC = Bitfield & 1uL;
FileHeader.NonstopTSC = Bitfield & 1uL << 1;
FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr);
-
+ std::memcpy(&FileHeader.FreeFormData, Data.bytes_begin() + OffsetPtr, 16);
if (FileHeader.Version != 1)
return make_error<StringError>(
Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
std::make_error_code(std::errc::invalid_argument));
+ return Error::success();
+}
+
+Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
+ std::vector<XRayRecord> &Records) {
+ // Check that there is at least a header
+ if (Data.size() < 32)
+ return make_error<StringError>(
+ "Not enough bytes for an XRay log.",
+ std::make_error_code(std::errc::invalid_argument));
+
+ if (Data.size() - 32 == 0 || Data.size() % 32 != 0)
+ return make_error<StringError>(
+ "Invalid-sized XRay data.",
+ std::make_error_code(std::errc::invalid_argument));
+
+ if (auto E = readBinaryFormatHeader(Data, FileHeader))
+ return E;
// Each record after the header will be 32 bytes, in the following format:
//
@@ -98,9 +106,327 @@ Error NaiveLogLoader(StringRef Data, XRayFileHeader &FileHeader,
return Error::success();
}
-Error YAMLLogLoader(StringRef Data, XRayFileHeader &FileHeader,
- std::vector<XRayRecord> &Records) {
+/// When reading from a Flight Data Recorder mode log, metadata records are
+/// sparse compared to packed function records, so we must maintain state as we
+/// read through the sequence of entries. This allows the reader to denormalize
+/// the CPUId and Thread Id onto each Function Record and transform delta
+/// encoded TSC values into absolute encodings on each record.
+struct FDRState {
+ uint16_t CPUId;
+ uint16_t ThreadId;
+ uint64_t BaseTSC;
+ /// Encode some of the state transitions for the FDR log reader as explicit
+ /// checks. These are expectations for the next Record in the stream.
+ enum class Token {
+ NEW_BUFFER_RECORD_OR_EOF,
+ WALLCLOCK_RECORD,
+ NEW_CPU_ID_RECORD,
+ FUNCTION_SEQUENCE,
+ SCAN_TO_END_OF_THREAD_BUF,
+ };
+ Token Expects;
+ // Each threads buffer may have trailing garbage to scan over, so we track our
+ // progress.
+ uint64_t CurrentBufferSize;
+ uint64_t CurrentBufferConsumed;
+};
+
+Twine fdrStateToTwine(const FDRState::Token &state) {
+ switch (state) {
+ case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
+ return "NEW_BUFFER_RECORD_OR_EOF";
+ case FDRState::Token::WALLCLOCK_RECORD:
+ return "WALLCLOCK_RECORD";
+ case FDRState::Token::NEW_CPU_ID_RECORD:
+ return "NEW_CPU_ID_RECORD";
+ case FDRState::Token::FUNCTION_SEQUENCE:
+ return "FUNCTION_SEQUENCE";
+ case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF:
+ return "SCAN_TO_END_OF_THREAD_BUF";
+ }
+ return "UNKNOWN";
+}
+
+/// State transition when a NewBufferRecord is encountered.
+Error processFDRNewBufferRecord(FDRState &State, uint8_t RecordFirstByte,
+ DataExtractor &RecordExtractor) {
+
+ if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
+ return make_error<StringError>(
+ "Malformed log. Read New Buffer record kind out of sequence",
+ std::make_error_code(std::errc::executable_format_error));
+ uint32_t OffsetPtr = 1; // 1 byte into record.
+ State.ThreadId = RecordExtractor.getU16(&OffsetPtr);
+ State.Expects = FDRState::Token::WALLCLOCK_RECORD;
+ return Error::success();
+}
+
+/// State transition when an EndOfBufferRecord is encountered.
+Error processFDREndOfBufferRecord(FDRState &State, uint8_t RecordFirstByte,
+ DataExtractor &RecordExtractor) {
+ if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
+ return make_error<StringError>(
+ "Malformed log. Received EOB message without current buffer.",
+ std::make_error_code(std::errc::executable_format_error));
+ State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF;
+ return Error::success();
+}
+
+/// State transition when a NewCPUIdRecord is encountered.
+Error processFDRNewCPUIdRecord(FDRState &State, uint8_t RecordFirstByte,
+ DataExtractor &RecordExtractor) {
+ if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE &&
+ State.Expects != FDRState::Token::NEW_CPU_ID_RECORD)
+ return make_error<StringError>(
+ "Malformed log. Read NewCPUId record kind out of sequence",
+ std::make_error_code(std::errc::executable_format_error));
+ uint32_t OffsetPtr = 1; // Read starting after the first byte.
+ State.CPUId = RecordExtractor.getU16(&OffsetPtr);
+ State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
+ State.Expects = FDRState::Token::FUNCTION_SEQUENCE;
+ return Error::success();
+}
+
+/// State transition when a TSCWrapRecord (overflow detection) is encountered.
+Error processFDRTSCWrapRecord(FDRState &State, uint8_t RecordFirstByte,
+ DataExtractor &RecordExtractor) {
+ if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE)
+ return make_error<StringError>(
+ "Malformed log. Read TSCWrap record kind out of sequence",
+ std::make_error_code(std::errc::executable_format_error));
+ uint32_t OffsetPtr = 1; // Read starting after the first byte.
+ State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
+ return Error::success();
+}
+
+/// State transition when a WallTimeMarkerRecord is encountered.
+Error processFDRWallTimeRecord(FDRState &State, uint8_t RecordFirstByte,
+ DataExtractor &RecordExtractor) {
+ if (State.Expects != FDRState::Token::WALLCLOCK_RECORD)
+ return make_error<StringError>(
+ "Malformed log. Read Wallclock record kind out of sequence",
+ std::make_error_code(std::errc::executable_format_error));
+ // We don't encode the wall time into any of the records.
+ // XRayRecords are concerned with the TSC instead.
+ State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
+ return Error::success();
+}
+
+/// Advances the state machine for reading the FDR record type by reading one
+/// Metadata Record and updating the State appropriately based on the kind of
+/// record encountered. The RecordKind is encoded in the first byte of the
+/// Record, which the caller should pass in because they have already read it
+/// to determine that this is a metadata record as opposed to a function record.
+Error processFDRMetadataRecord(FDRState &State, uint8_t RecordFirstByte,
+ DataExtractor &RecordExtractor) {
+ // The remaining 7 bits are the RecordKind enum.
+ uint8_t RecordKind = RecordFirstByte >> 1;
+ switch (RecordKind) {
+ case 0: // NewBuffer
+ if (auto E =
+ processFDRNewBufferRecord(State, RecordFirstByte, RecordExtractor))
+ return E;
+ break;
+ case 1: // EndOfBuffer
+ if (auto E = processFDREndOfBufferRecord(State, RecordFirstByte,
+ RecordExtractor))
+ return E;
+ break;
+ case 2: // NewCPUId
+ if (auto E =
+ processFDRNewCPUIdRecord(State, RecordFirstByte, RecordExtractor))
+ return E;
+ break;
+ case 3: // TSCWrap
+ if (auto E =
+ processFDRTSCWrapRecord(State, RecordFirstByte, RecordExtractor))
+ return E;
+ break;
+ case 4: // WallTimeMarker
+ if (auto E =
+ processFDRWallTimeRecord(State, RecordFirstByte, RecordExtractor))
+ return E;
+ break;
+ default:
+ // Widen the record type to uint16_t to prevent conversion to char.
+ return make_error<StringError>(
+ Twine("Illegal metadata record type: ")
+ .concat(Twine(static_cast<unsigned>(RecordKind))),
+ std::make_error_code(std::errc::executable_format_error));
+ }
+ return Error::success();
+}
+
+/// Reads a function record from an FDR format log, appending a new XRayRecord
+/// to the vector being populated and updating the State with a new value
+/// reference value to interpret TSC deltas.
+///
+/// The XRayRecord constructed includes information from the function record
+/// processed here as well as Thread ID and CPU ID formerly extracted into
+/// State.
+Error processFDRFunctionRecord(FDRState &State, uint8_t RecordFirstByte,
+ DataExtractor &RecordExtractor,
+ std::vector<XRayRecord> &Records) {
+ switch (State.Expects) {
+ case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
+ return make_error<StringError>(
+ "Malformed log. Received Function Record before new buffer setup.",
+ std::make_error_code(std::errc::executable_format_error));
+ case FDRState::Token::WALLCLOCK_RECORD:
+ return make_error<StringError>(
+ "Malformed log. Received Function Record when expecting wallclock.",
+ std::make_error_code(std::errc::executable_format_error));
+ case FDRState::Token::NEW_CPU_ID_RECORD:
+ return make_error<StringError>(
+ "Malformed log. Received Function Record before first CPU record.",
+ std::make_error_code(std::errc::executable_format_error));
+ default:
+ Records.emplace_back();
+ auto &Record = Records.back();
+ Record.RecordType = 0; // Record is type NORMAL.
+ // Strip off record type bit and use the next three bits.
+ uint8_t RecordType = (RecordFirstByte >> 1) & 0x07;
+ switch (RecordType) {
+ case static_cast<uint8_t>(RecordTypes::ENTER):
+ Record.Type = RecordTypes::ENTER;
+ break;
+ case static_cast<uint8_t>(RecordTypes::EXIT):
+ case 2: // TAIL_EXIT is not yet defined in RecordTypes.
+ Record.Type = RecordTypes::EXIT;
+ break;
+ default:
+ // When initializing the error, convert to uint16_t so that the record
+ // type isn't interpreted as a char.
+ return make_error<StringError>(
+ Twine("Illegal function record type: ")
+ .concat(Twine(static_cast<unsigned>(RecordType))),
+ std::make_error_code(std::errc::executable_format_error));
+ }
+ Record.CPU = State.CPUId;
+ Record.TId = State.ThreadId;
+ // Back up to read first 32 bits, including the 8 we pulled RecordType
+ // and RecordKind out of. The remaining 28 are FunctionId.
+ uint32_t OffsetPtr = 0;
+ // Despite function Id being a signed int on XRayRecord,
+ // when it is written to an FDR format, the top bits are truncated,
+ // so it is effectively an unsigned value. When we shift off the
+ // top four bits, we want the shift to be logical, so we read as
+ // uint32_t.
+ uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr);
+ Record.FuncId = FuncIdBitField >> 4;
+ // FunctionRecords have a 32 bit delta from the previous absolute TSC
+ // or TSC delta. If this would overflow, we should read a TSCWrap record
+ // with an absolute TSC reading.
+ uint64_t new_tsc = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr);
+ State.BaseTSC = new_tsc;
+ Record.TSC = new_tsc;
+ }
+ return Error::success();
+}
+/// Reads a log in FDR mode for version 1 of this binary format. FDR mode is
+/// defined as part of the compiler-rt project in xray_fdr_logging.h, and such
+/// a log consists of the familiar 32 bit XRayHeader, followed by sequences of
+/// of interspersed 16 byte Metadata Records and 8 byte Function Records.
+///
+/// The following is an attempt to document the grammar of the format, which is
+/// parsed by this function for little-endian machines. Since the format makes
+/// use of BitFields, when we support big-Endian architectures, we will need to
+/// adjust not only the endianness parameter to llvm's RecordExtractor, but also
+/// the bit twiddling logic, which is consistent with the little-endian
+/// convention that BitFields within a struct will first be packed into the
+/// least significant bits the address they belong to.
+///
+/// We expect a format complying with the grammar in the following pseudo-EBNF.
+///
+/// FDRLog: XRayFileHeader ThreadBuffer*
+/// XRayFileHeader: 32 bits to identify the log as FDR with machine metadata.
+/// ThreadBuffer: BufSize NewBuffer WallClockTime NewCPUId FunctionSequence EOB
+/// BufSize: 8 byte unsigned integer indicating how large the buffer is.
+/// NewBuffer: 16 byte metadata record with Thread Id.
+/// WallClockTime: 16 byte metadata record with human readable time.
+/// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading.
+/// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize.
+/// FunctionSequence: NewCPUId | TSCWrap | FunctionRecord
+/// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading.
+/// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta.
+Error loadFDRLog(StringRef Data, XRayFileHeader &FileHeader,
+ std::vector<XRayRecord> &Records) {
+ if (Data.size() < 32)
+ return make_error<StringError>(
+ "Not enough bytes for an XRay log.",
+ std::make_error_code(std::errc::invalid_argument));
+
+ // For an FDR log, there are records sized 16 and 8 bytes.
+ // There actually may be no records if no non-trivial functions are
+ // instrumented.
+ if (Data.size() % 8 != 0)
+ return make_error<StringError>(
+ "Invalid-sized XRay data.",
+ std::make_error_code(std::errc::invalid_argument));
+
+ if (auto E = readBinaryFormatHeader(Data, FileHeader))
+ return E;
+
+ uint64_t BufferSize = 0;
+ {
+ StringRef ExtraDataRef(FileHeader.FreeFormData, 16);
+ DataExtractor ExtraDataExtractor(ExtraDataRef, true, 8);
+ uint32_t ExtraDataOffset = 0;
+ BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset);
+ }
+ FDRState State{0, 0, 0, FDRState::Token::NEW_BUFFER_RECORD_OR_EOF,
+ BufferSize, 0};
+ // RecordSize will tell the loop how far to seek ahead based on the record
+ // type that we have just read.
+ size_t RecordSize = 0;
+ for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(RecordSize)) {
+ DataExtractor RecordExtractor(S, true, 8);
+ uint32_t OffsetPtr = 0;
+ if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) {
+ RecordSize = State.CurrentBufferSize - State.CurrentBufferConsumed;
+ if (S.size() < State.CurrentBufferSize - State.CurrentBufferConsumed) {
+ return make_error<StringError>(
+ Twine("Incomplete thread buffer. Expected ") +
+ Twine(State.CurrentBufferSize - State.CurrentBufferConsumed) +
+ " remaining bytes but found " + Twine(S.size()),
+ make_error_code(std::errc::invalid_argument));
+ }
+ State.CurrentBufferConsumed = 0;
+ State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
+ continue;
+ }
+ uint8_t BitField = RecordExtractor.getU8(&OffsetPtr);
+ bool isMetadataRecord = BitField & 0x01uL;
+ if (isMetadataRecord) {
+ RecordSize = 16;
+ if (auto E = processFDRMetadataRecord(State, BitField, RecordExtractor))
+ return E;
+ State.CurrentBufferConsumed += RecordSize;
+ } else { // Process Function Record
+ RecordSize = 8;
+ if (auto E = processFDRFunctionRecord(State, BitField, RecordExtractor,
+ Records))
+ return E;
+ State.CurrentBufferConsumed += RecordSize;
+ }
+ }
+ // There are two conditions
+ if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF &&
+ !(State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF &&
+ State.CurrentBufferSize == State.CurrentBufferConsumed))
+ return make_error<StringError>(
+ Twine("Encountered EOF with unexpected state expectation ") +
+ fdrStateToTwine(State.Expects) +
+ ". Remaining expected bytes in thread buffer total " +
+ Twine(State.CurrentBufferSize - State.CurrentBufferConsumed),
+ std::make_error_code(std::errc::executable_format_error));
+
+ return Error::success();
+}
+
+Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
+ std::vector<XRayRecord> &Records) {
// Load the documents from the MappedFile.
YAMLXRayTrace Trace;
Input In(Data);
@@ -175,14 +501,21 @@ Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
+ enum BinaryFormatType { NAIVE_FORMAT = 0, FLIGHT_DATA_RECORDER_FORMAT = 1 };
+
Trace T;
- if (Version == 1 && (Type == 0 || Type == 1)) {
- if (auto E = NaiveLogLoader(StringRef(MappedFile.data(), MappedFile.size()),
- T.FileHeader, T.Records))
+ if (Version == 1 && Type == NAIVE_FORMAT) {
+ if (auto E =
+ loadNaiveFormatLog(StringRef(MappedFile.data(), MappedFile.size()),
+ T.FileHeader, T.Records))
+ return std::move(E);
+ } else if (Version == 1 && Type == FLIGHT_DATA_RECORDER_FORMAT) {
+ if (auto E = loadFDRLog(StringRef(MappedFile.data(), MappedFile.size()),
+ T.FileHeader, T.Records))
return std::move(E);
} else {
- if (auto E = YAMLLogLoader(StringRef(MappedFile.data(), MappedFile.size()),
- T.FileHeader, T.Records))
+ if (auto E = loadYAMLLog(StringRef(MappedFile.data(), MappedFile.size()),
+ T.FileHeader, T.Records))
return std::move(E);
}